commit 77adb26a74d2c07215985a818c201fb8b1e91596
Author: 奇武 <ly261666@alibaba-inc.com>
Date:   Thu Mar 2 11:17:26 2023 +0800

    add EasyFace

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..cf36a20
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,129 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+/package
+/temp
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+.vscode
+.idea
+
+# custom
+*.pkl
+*.pkl.json
+*.log.json
+*.whl
+*.tar.gz
+*.swp
+*.log
+*.tar.gz
+source.sh
+tensorboard.sh
+.DS_Store
+replace.sh
+result.png
+result.jpg
+
+# Pytorch
+*.pth
+*.pt
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..14cec7d
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,203 @@
+Copyright 2022-2023 Alibaba ModelScope. All rights reserved.
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2020-2022 Alibaba ModelScope.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f8cc997
--- /dev/null
+++ b/README.md
@@ -0,0 +1,296 @@
+<div align="center">
+  <img src="demo/modelscope.gif" width="40%" height="40%" />
+</div>
+
+<div align="center">
+
+<!-- [![Documentation Status](https://readthedocs.org/projects/easy-cv/badge/?version=latest)](https://easy-cv.readthedocs.io/en/latest/) -->
+[![license](https://img.shields.io/github/license/modelscope/modelscope.svg)](https://github.com/modelscope/modelscope/blob/master/LICENSE)
+</div>
+
+
+<h4 align="center">
+    <a href=#EasyFace> 特性 </a> |
+    <a href=#安装> 安装 </a> |
+    <a href=#单模型推理> 单模型推理</a> | 
+    <a href=#单模型训练和微调> 单模型训练/微调</a> |
+    <a href=#单模型选型和对比> 单模型选型/对比</a>  
+    <!--- <a href=#人脸识别系统多模块一键选型/对比> 人脸识别系统多模块一键选型/对比</a> -->
+</h4>
+
+## EasyFace
+
+**EasyFace**旨在快速选型/了解/对比/体验人脸相关sota模型，依托于[**Modelscope**](https://modelscope.cn/home)开发库和[**Pytorch**](https://pytorch.org)框架，EasyFace具有以下特性:
+- 快速体验/对比/选型Sota的人脸相关模型, 涉及人脸检测，人脸识别，人脸关键点，人脸表情识别，人脸活体检测等领域，目前支持人脸检测相关sota模型。
+- 5行代码即可进行模型推理，10行代码进行模型训练/Finetune, 20行代码对比不同模型在自建/公开数据集上的精度以及可视化结果。
+- 基于现有模型快速搭建[**创空间**](https://modelscope.cn/studios/damo/face_album/summary)应用。
+
+## News 📢
+
+<!--- 🔥 **`2023-03-20`**：新增DamoFR人脸识别模型，基于Vit Backbone 围绕data-centric以及patch-level hard example  mining策略重新设计了Transformer-based Small/Medium/Large 人脸识别backbone，效果sota，已release不同算力下的sota人脸识别，口罩人脸识别DamoFR模型，[**paper**]() and [**project**]()；-->
+
+🔥 **`2023-03-10`**：新增DamoFD（ICLR23）人脸检测关键点模型，基于SCRFD框架进一步搜索了FD-friendly backbone结构。 在0.5/2.5/10/34 GFlops VGA分辨率的算力约束条件下性能均超过SCRFD。其中提出的$轻量级的检测器DDSAR-0.5G在VGA分辨率0.5GFlops条件下WiderFace上hard集精度为71.03(超过SCRFD 2.5个点)，欢迎大家一键使用(支持训练和推理)，[**paper**](https://openreview.net/forum?id=NkJOhtNKX91)。
+
+🔥  **`2023-03-10`**：新增4个人脸检测模型，包括DamoFD，MogFace，RetinaFace，Mtcnn。
+
+## 支持模型列表
+`**对应模型的推理和训练单元测试放在face_project目录下**`
+
+### 推理
+
+🔥 **`人脸检测`**：DamoFD，MogFace，RetinaFace，Mtcnn。
+
+### 训练
+🔥 **`人脸检测`**：DamoFD。
+
+## 安装
+```
+conda create --offline -n  EasyFace python=3.8
+conda activate EasyFace
+# pytorch >= 1.3.0
+pip install torch==1.8.1+cu102  torchvision==0.9.1+cu102  --extra-index-url https://download.pytorch.org/whl/cu102
+git clone https://github.com/ly19965/FaceMaas
+cd FaceMaas
+pip install -r requirements.txt
+mim install mmcv-full
+```
+
+## 单模型推理
+从支持推理的模型列表里选择想体验的模型, e.g.人脸检测模型DamoFD_0.5g
+
+### 单张图片推理
+```python
+import cv2
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import  Tasks
+
+face_detection = pipeline(task=Tasks.face_detection, model='damo/cv_ddsar_face-detection_iclr23-damofd')
+# 支持 url image and abs dir image path
+img_path = 'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/face_detection2.jpeg' 
+result = face_detection(img_path)
+
+# 提供可视化结果
+from modelscope.utils.cv.image_utils import draw_face_detection_result
+from modelscope.preprocessors.image import LoadImage
+img = LoadImage.convert_to_ndarray(img_path)
+cv2.imwrite('srcImg.jpg', img)
+img_draw = draw_face_detection_result('srcImg.jpg', result)
+import matplotlib.pyplot as plt
+plt.imshow(img_draw)
+```
+
+### Mini公开数据集推理
+```python
+import os.path as osp
+import cv2
+import os
+import numpy as np
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import voc_ap, image_eval,img_pr_info, gen_gt_info, dataset_pr_info, bbox_overlap
+
+model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd'
+val_set = MsDataset.load('widerface_mini_train_val', namespace='ly261666', split='validation')#, download_mode=DownloadMode.FORCE_REDOWNLOAD)
+img_base_path = next(iter(val_set))[1]
+img_dir = osp.join(img_base_path, 'val_data')
+img_gt = osp.join(img_base_path, 'val_label.txt')
+gt_info = gen_gt_info(img_gt)
+pred_info = {}
+iou_th = 0.5
+thresh_num = 1000
+face_detection_func = pipeline(Tasks.face_detection, model=model_id)
+count_face = 0
+pr_curve = np.zeros((thresh_num, 2)).astype('float')
+for img_name in os.listdir(img_dir):
+    abs_img_name = osp.join(img_dir, img_name)
+    result = face_detection_func(abs_img_name)
+    pred_info = np.concatenate([result['boxes'], np.array(result['scores'])[:,np.newaxis]], axis=1)
+    gt_box = np.array(gt_info[img_name])
+    pred_recall, proposal_list = image_eval(pred_info, gt_box, iou_th)
+    _img_pr_info, fp = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
+    pr_curve += _img_pr_info
+    count_face += gt_box.shape[0]
+    
+pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
+propose = pr_curve[:, 0]
+recall = pr_curve[:, 1]
+for srecall in np.arange(0.1, 1.0001, 0.1):
+    rindex = len(np.where(recall<=srecall)[0])-1
+    rthresh = 1.0 - float(rindex)/thresh_num
+    print('Recall-Precision-Thresh:', recall[rindex], propose[rindex], rthresh)
+ap = voc_ap(recall, propose)
+print('ap: %.5f, iou_th: %.2f'%(ap, iou_th))
+```
+
+## 单模型训练和微调
+从支持训练的模型列表里选择想体验的模型, e.g.人脸检测模型DamoFD_0.5g
+
+### 训练
+
+```python
+import os
+import tempfile
+from modelscope.msdatasets import MsDataset
+from modelscope.metainfo import Trainers
+from modelscope.trainers import build_trainer
+from modelscope.hub.snapshot_download import snapshot_download
+
+model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd'
+ms_ds_widerface = MsDataset.load('WIDER_FACE_mini', namespace='shaoxuan')  # remove '_mini' for full dataset
+
+data_path = ms_ds_widerface.config_kwargs['split_config']
+train_dir = data_path['train']
+val_dir = data_path['validation']
+
+def get_name(dir_name):
+    names = [i for i in os.listdir(dir_name) if not i.startswith('_')]
+    return names[0]
+
+train_root = train_dir + '/' + get_name(train_dir) + '/'
+val_root = val_dir + '/' + get_name(val_dir) + '/'
+cache_path = snapshot_download(model_id)
+tmp_dir = tempfile.TemporaryDirectory().name
+if not os.path.exists(tmp_dir):
+    os.makedirs(tmp_dir)
+
+def _cfg_modify_fn(cfg):
+    cfg.checkpoint_config.interval = 1
+    cfg.log_config.interval = 10
+    cfg.evaluation.interval = 1
+    cfg.data.workers_per_gpu = 1
+    cfg.data.samples_per_gpu = 4
+    return cfg
+
+kwargs = dict(
+    cfg_file=os.path.join(cache_path, 'DamoFD_lms.py'),
+    work_dir=tmp_dir,
+    train_root=train_root,
+    val_root=val_root,
+    total_epochs=1,  # run #epochs
+    cfg_modify_fn=_cfg_modify_fn)
+
+trainer = build_trainer(name=Trainers.face_detection_scrfd, default_args=kwargs)
+trainer.train()
+```
+
+### 模型微调
+
+```python
+import os
+import tempfile
+from modelscope.msdatasets import MsDataset
+from modelscope.metainfo import Trainers
+from modelscope.trainers import build_trainer
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.utils.constant import ModelFile
+
+model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd'
+ms_ds_widerface = MsDataset.load('WIDER_FACE_mini', namespace='shaoxuan')  # remove '_mini' for full dataset
+
+data_path = ms_ds_widerface.config_kwargs['split_config']
+train_dir = data_path['train']
+val_dir = data_path['validation']
+
+def get_name(dir_name):
+    names = [i for i in os.listdir(dir_name) if not i.startswith('_')]
+    return names[0]
+
+train_root = train_dir + '/' + get_name(train_dir) + '/'
+val_root = val_dir + '/' + get_name(val_dir) + '/'
+cache_path = snapshot_download(model_id)
+tmp_dir = tempfile.TemporaryDirectory().name
+pretrain_epochs = 640
+ft_epochs = 1
+total_epochs = pretrain_epochs + ft_epochs
+if not os.path.exists(tmp_dir):
+    os.makedirs(tmp_dir)
+
+def _cfg_modify_fn(cfg):
+    cfg.checkpoint_config.interval = 1
+    cfg.log_config.interval = 10
+    cfg.evaluation.interval = 1
+    cfg.data.workers_per_gpu = 1
+    cfg.data.samples_per_gpu = 4
+    return cfg
+
+kwargs = dict(
+    cfg_file=os.path.join(cache_path, 'DamoFD_lms.py'),
+    work_dir=tmp_dir,
+    train_root=train_root,
+    val_root=val_root,
+    resume_from=os.path.join(cache_path, ModelFile.TORCH_MODEL_FILE),
+    total_epochs=total_epochs,  # run #epochs
+    cfg_modify_fn=_cfg_modify_fn)
+
+trainer = build_trainer(name=Trainers.face_detection_scrfd, default_args=kwargs)
+trainer.train()
+```
+
+## 单模型选型和对比
+```python
+import os.path as osp
+import cv2
+import os
+import numpy as np
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import voc_ap, image_eval,img_pr_info, gen_gt_info, dataset_pr_info, bbox_overlap
+
+model_id_list = ['damo/cv_ddsar_face-detection_iclr23-damofd', 'damo/cv_resnet101_face-detection_cvpr22papermogface',  'damo/cv_resnet50_face-detection_retinaface', 'damo/cv_manual_face-detection_mtcnn'] 
+val_set = MsDataset.load('widerface_mini_train_val', namespace='ly261666', split='validation')#, download_mode=DownloadMode.FORCE_REDOWNLOAD)
+img_base_path = next(iter(val_set))[1]
+img_dir = osp.join(img_base_path, 'val_data')
+img_gt = osp.join(img_base_path, 'val_label.txt')
+gt_info = gen_gt_info(img_gt)
+pred_info = {}
+iou_th = 0.5
+thresh_num = 1000
+count_face = 0
+conf_th = 0.01
+final_info = ""
+pr_curve = np.zeros((thresh_num, 2)).astype('float')
+for model_id in model_id_list:
+    pr_curve = np.zeros((thresh_num, 2)).astype('float')
+    count_face = 0
+    if 'mtcnn' in model_id:
+        face_detection_func = pipeline(Tasks.face_detection, model=model_id, conf_th=0.7) # Mtcnn only support high conf threshold
+    elif 'damofd' in model_id:
+        face_detection_func = pipeline(Tasks.face_detection, model=model_id) # Revise conf_th in DamoFD_lms.py
+    else:
+        face_detection_func = pipeline(Tasks.face_detection, model=model_id, conf_th=0.01)
+    for idx, img_name in enumerate(os.listdir(img_dir)):
+        print ('model_id: {}, inference img: {} {}/{}'.format(model_id, img_name, idx+1, len(os.listdir(img_dir))))
+        abs_img_name = osp.join(img_dir, img_name)
+        result = face_detection_func(abs_img_name)
+        pred_info = np.concatenate([result['boxes'], np.array(result['scores'])[:,np.newaxis]], axis=1)
+        gt_box = np.array(gt_info[img_name])
+        pred_recall, proposal_list = image_eval(pred_info, gt_box, iou_th)
+        _img_pr_info, fp = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
+        pr_curve += _img_pr_info
+        count_face += gt_box.shape[0]
+    
+    pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
+    propose = pr_curve[:, 0]
+    recall = pr_curve[:, 1]
+    for srecall in np.arange(0.1, 1.0001, 0.1):
+        rindex = len(np.where(recall<=srecall)[0])-1
+        rthresh = 1.0 - float(rindex)/thresh_num
+        print('Recall-Precision-Thresh:', recall[rindex], propose[rindex], rthresh)
+    ap = voc_ap(recall, propose)
+    result_info = 'model_id: {}, ap: {:.5f}, iou_th: {:.2f}'.format(model_id, ap, iou_th)
+    print(result_info)
+    final_info += result_info + '\n'
+print("Overall Result:")
+print(final_info)
+```
+
+
+<!--- ## 人脸识别系统多模块一键选型/对比 -->
+
+
+
+
+
+
diff --git a/data/test/images/face_detection.png b/data/test/images/face_detection.png
new file mode 100644
index 0000000..a5db37e
Binary files /dev/null and b/data/test/images/face_detection.png differ
diff --git a/data/test/images/face_detection2.jpeg b/data/test/images/face_detection2.jpeg
new file mode 100644
index 0000000..0712eb9
Binary files /dev/null and b/data/test/images/face_detection2.jpeg differ
diff --git a/data/test/images/face_liveness_ir.jpg b/data/test/images/face_liveness_ir.jpg
new file mode 100644
index 0000000..725986c
Binary files /dev/null and b/data/test/images/face_liveness_ir.jpg differ
diff --git a/data/test/images/face_liveness_rgb.png b/data/test/images/face_liveness_rgb.png
new file mode 100644
index 0000000..5114682
Binary files /dev/null and b/data/test/images/face_liveness_rgb.png differ
diff --git a/data/test/images/face_liveness_xc.png b/data/test/images/face_liveness_xc.png
new file mode 100644
index 0000000..4a59ad6
Binary files /dev/null and b/data/test/images/face_liveness_xc.png differ
diff --git a/data/test/images/face_recognition_1.png b/data/test/images/face_recognition_1.png
new file mode 100644
index 0000000..ad90841
Binary files /dev/null and b/data/test/images/face_recognition_1.png differ
diff --git a/data/test/images/face_recognition_2.png b/data/test/images/face_recognition_2.png
new file mode 100644
index 0000000..e701f87
Binary files /dev/null and b/data/test/images/face_recognition_2.png differ
diff --git a/data/test/images/face_reconstruction.jpg b/data/test/images/face_reconstruction.jpg
new file mode 100644
index 0000000..ca1cf8c
Binary files /dev/null and b/data/test/images/face_reconstruction.jpg differ
diff --git a/data/test/images/facial_expression_recognition.jpg b/data/test/images/facial_expression_recognition.jpg
new file mode 100644
index 0000000..fe8522b
Binary files /dev/null and b/data/test/images/facial_expression_recognition.jpg differ
diff --git a/data/test/images/ir_face_recognition_1.png b/data/test/images/ir_face_recognition_1.png
new file mode 100644
index 0000000..27d3ea0
Binary files /dev/null and b/data/test/images/ir_face_recognition_1.png differ
diff --git a/data/test/images/ir_face_recognition_2.png b/data/test/images/ir_face_recognition_2.png
new file mode 100644
index 0000000..d98e2e7
Binary files /dev/null and b/data/test/images/ir_face_recognition_2.png differ
diff --git a/data/test/images/mask_face_recognition_1.jpg b/data/test/images/mask_face_recognition_1.jpg
new file mode 100644
index 0000000..80930f1
Binary files /dev/null and b/data/test/images/mask_face_recognition_1.jpg differ
diff --git a/data/test/images/mask_face_recognition_2.jpg b/data/test/images/mask_face_recognition_2.jpg
new file mode 100644
index 0000000..eaa66e9
Binary files /dev/null and b/data/test/images/mask_face_recognition_2.jpg differ
diff --git a/data/test/images/mog_face_detection.jpg b/data/test/images/mog_face_detection.jpg
new file mode 100644
index 0000000..c2ec030
Binary files /dev/null and b/data/test/images/mog_face_detection.jpg differ
diff --git a/data/test/images/mtcnn_face_detection.jpg b/data/test/images/mtcnn_face_detection.jpg
new file mode 100644
index 0000000..c2ec030
Binary files /dev/null and b/data/test/images/mtcnn_face_detection.jpg differ
diff --git a/data/test/images/retina_face_detection.jpg b/data/test/images/retina_face_detection.jpg
new file mode 100644
index 0000000..c2ec030
Binary files /dev/null and b/data/test/images/retina_face_detection.jpg differ
diff --git a/data/test/images/ulfd_face_detection.jpg b/data/test/images/ulfd_face_detection.jpg
new file mode 100644
index 0000000..c2ec030
Binary files /dev/null and b/data/test/images/ulfd_face_detection.jpg differ
diff --git a/demo/modelscope.gif b/demo/modelscope.gif
new file mode 100644
index 0000000..47629f9
Binary files /dev/null and b/demo/modelscope.gif differ
diff --git a/face_project/face_detection/DamoFD/README.md b/face_project/face_detection/DamoFD/README.md
new file mode 100644
index 0000000..eec9004
--- /dev/null
+++ b/face_project/face_detection/DamoFD/README.md
@@ -0,0 +1,276 @@
+
+<div align="center">
+  <img src="demo/DamoFD.jpg" width="100%" height="10%" />
+</div>
+<h4 align="center">
+    <a href=#DamoFD模型介绍> 模型介绍 </a> |
+    <a href=#快速使用> 快速使用 </a> |
+    <a href=#单图片推理> 单图片推理 </a> | 
+    <a href=#多图片推理和评测> 多图片推理/评测 </a> |
+    <a href=#模型训练> 模型训练 </a> | 
+    <a href=#模型微调> 模型微调 </a> 
+</h4>
+
+# DamoFD模型介绍
+人脸检测关键点模型DamoFD，被ICLR2023录取([论文地址](https://openreview.net/forum?id=NkJOhtNKX91)), 这个项目中开源的模型是在DamoFD增加了关键点分支，论文原文代码见[项目地址]()，论文解析详见[解析]()。
+
+## 快速使用
+
+DamoFD为当前SOTA的人脸检测关键点方法，论文已被ICLR23录取([论文地址](https://openreview.net/forum?id=NkJOhtNKX91))。DamoFD提供了family-based 人脸检测关键点模型，分别为`DamoFD-0.5G, DamoFD-2.5G, DamoFD-10G, DamoFD-34G`，性能均明显超过[SCRFD](https://arxiv.org/abs/2105.04714)。在这个界面中，我们提供几个有关`推理/评测/训练/微调`脚本帮助大家迅速/一键使用DamoFD, 代码范例中的实例均集成在如下几个unit test脚本里：
+- `DamoFD-0.5G: 训练，微调`：train_damofd_500m.py; 推理，评测：test_damofd_500m.py
+- `DamoFD-2.5G: 训练，微调`：train_damofd_2500m.py; 推理，评测：test_damofd_2500m.py
+- `DamoFD-10G: 训练，微调`：train_damofd_10g.py; 推理，评测：test_damofd_10g.py
+- `DamoFD-34G: 训练，微调`：train_damofd_34g.py; 推理，评测：test_damofd_34g.py
+- `Usage on DamoFD-0.5G`: 
+```python
+PYTHONPATH=. python face_project/face_detection/DamoFD/train_damofd_500m.py
+PYTHONPATH=. python face_project/face_detection/DamoFD/test_damofd_500m.py
+```
+
+## 代码范例
+我们以DamoFD-0.5G为例，提供了推理/评测/训练/微调代码范例和解析:
+
+### 单图片推理
+```python
+import cv2
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import  Tasks
+
+face_detection = pipeline(task=Tasks.face_detection, model='damo/cv_ddsar_face-detection_iclr23-damofd')
+# 支持 url image and abs dir image path
+img_path = 'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/face_detection2.jpeg' 
+result = face_detection(img_path)
+
+# 提供可视化结果
+from modelscope.utils.cv.image_utils import draw_face_detection_result
+from modelscope.preprocessors.image import LoadImage
+img = LoadImage.convert_to_ndarray(img_path)
+cv2.imwrite('srcImg.jpg', img)
+img_draw = draw_face_detection_result('srcImg.jpg', result)
+import matplotlib.pyplot as plt
+plt.imshow(img_draw)
+```
+
+### 多图片推理和评测
+- 我们提供了100张测试图片，可运行下面代码一键使用（下载数据集+推理）；
+- 也支持测试自建数据集，需要按如下格式建立数据集:
+```
+img_base_path/
+    val_data/
+        test_1.jpg
+        ...
+        test_N.jpg
+    val_label.txt 
+    ## val_label.txt format
+    test_1.jpg
+    x0 x1 w h 
+    x0 x1 w h 
+    ...
+    test_N.jpg
+    x0 x1 w h 
+    x0 x1 w h 
+    ...
+```
+
+```python
+import os.path as osp
+import cv2
+import os
+import numpy as np
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import voc_ap, image_eval,img_pr_info, gen_gt_info, dataset_pr_info, bbox_overlap
+
+model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd'
+val_set = MsDataset.load('widerface_mini_train_val', namespace='ly261666', split='validation')#, download_mode=DownloadMode.FORCE_REDOWNLOAD)
+img_base_path = next(iter(val_set))[1]
+img_dir = osp.join(img_base_path, 'val_data')
+img_gt = osp.join(img_base_path, 'val_label.txt')
+gt_info = gen_gt_info(img_gt)
+pred_info = {}
+iou_th = 0.5
+thresh_num = 1000
+face_detection_func = pipeline(Tasks.face_detection, model=model_id)
+count_face = 0
+pr_curve = np.zeros((thresh_num, 2)).astype('float')
+for img_name in os.listdir(img_dir):
+    abs_img_name = osp.join(img_dir, img_name)
+    result = face_detection_func(abs_img_name)
+    pred_info = np.concatenate([result['boxes'], np.array(result['scores'])[:,np.newaxis]], axis=1)
+    gt_box = np.array(gt_info[img_name])
+    pred_recall, proposal_list = image_eval(pred_info, gt_box, iou_th)
+    _img_pr_info, fp = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
+    pr_curve += _img_pr_info
+    count_face += gt_box.shape[0]
+    
+pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
+propose = pr_curve[:, 0]
+recall = pr_curve[:, 1]
+for srecall in np.arange(0.1, 1.0001, 0.1):
+    rindex = len(np.where(recall<=srecall)[0])-1
+    rthresh = 1.0 - float(rindex)/thresh_num
+    print('Recall-Precision-Thresh:', recall[rindex], propose[rindex], rthresh)
+ap = voc_ap(recall, propose)
+print('ap: %.5f, iou_th: %.2f'%(ap, iou_th))
+```
+Result:
+```
+Recall-Precision-Thresh: 0.09902038655017209 1.0 0.746
+Recall-Precision-Thresh: 0.19989409584326184 0.993421052631579 0.632
+Recall-Precision-Thresh: 0.2991792427852793 0.9519797809604044 0.499
+Recall-Precision-Thresh: 0.39925867090283296 0.8308539944903581 0.367
+Recall-Precision-Thresh: 0.4495631453534551 0.7237851662404092 0.0010000000000000009
+Recall-Precision-Thresh: 0.4495631453534551 0.7237851662404092 0.0010000000000000009
+Recall-Precision-Thresh: 0.4495631453534551 0.7237851662404092 0.0010000000000000009
+Recall-Precision-Thresh: 0.4495631453534551 0.7237851662404092 0.0010000000000000009
+Recall-Precision-Thresh: 0.4495631453534551 0.7237851662404092 0.0010000000000000009
+Recall-Precision-Thresh: 0.4495631453534551 0.7237851662404092 0.0010000000000000009
+ap: 0.42606, iou_th: 0.50
+```
+
+### 模型训练
+- 我们提供了Wider Face 和 Wider Face mini的训练集，可运行下面代码一键使用（下载数据集+训练）；
+- 也支持训练自建数据集，需要按如下格式建立数据集:
+```
+# <image_path> image_width image_height
+bbox_x1 bbox_y1 bbox_x2 bbox_y2 (<keypoint,3>*N)
+...
+...
+# <image_path> image_width image_height
+bbox_x1 bbox_y1 bbox_x2 bbox_y2 (<keypoint,3>*N)
+...
+...
+```
+
+```python
+import os
+import tempfile
+from modelscope.msdatasets import MsDataset
+from modelscope.metainfo import Trainers
+from modelscope.trainers import build_trainer
+from modelscope.hub.snapshot_download import snapshot_download
+
+model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd'
+ms_ds_widerface = MsDataset.load('WIDER_FACE_mini', namespace='shaoxuan')  # remove '_mini' for full dataset
+
+data_path = ms_ds_widerface.config_kwargs['split_config']
+train_dir = data_path['train']
+val_dir = data_path['validation']
+
+def get_name(dir_name):
+    names = [i for i in os.listdir(dir_name) if not i.startswith('_')]
+    return names[0]
+
+train_root = train_dir + '/' + get_name(train_dir) + '/'
+val_root = val_dir + '/' + get_name(val_dir) + '/'
+cache_path = snapshot_download(model_id)
+tmp_dir = tempfile.TemporaryDirectory().name
+if not os.path.exists(tmp_dir):
+    os.makedirs(tmp_dir)
+
+def _cfg_modify_fn(cfg):
+    cfg.checkpoint_config.interval = 1
+    cfg.log_config.interval = 10
+    cfg.evaluation.interval = 1
+    cfg.data.workers_per_gpu = 1
+    cfg.data.samples_per_gpu = 4
+    return cfg
+
+kwargs = dict(
+    cfg_file=os.path.join(cache_path, 'DamoFD_lms.py'),
+    work_dir=tmp_dir,
+    train_root=train_root,
+    val_root=val_root,
+    total_epochs=1,  # run #epochs
+    cfg_modify_fn=_cfg_modify_fn)
+
+trainer = build_trainer(name=Trainers.face_detection_scrfd, default_args=kwargs)
+trainer.train()
+```
+
+### 模型微调
+- 我们提供了Wider Face 和 Wider Face mini的训练集，可运行下面代码一键使用（下载数据集+训练）；
+- 网络结构在'modelscope/modelscope/models/cv/face_detection/scrfd/damofd_detect.py', 训练细节在'trainers/cv/face_detection_scrfd_trainer.py'。可以修改这两个文件中的
+- 也支持微调自建数据集，需要按如下格式建立数据集:
+```
+# <image_path> image_width image_height
+bbox_x1 bbox_y1 bbox_x2 bbox_y2 (<keypoint,3>*N)
+...
+...
+# <image_path> image_width image_height
+bbox_x1 bbox_y1 bbox_x2 bbox_y2 (<keypoint,3>*N)
+...
+...
+```
+
+```python
+import os
+import tempfile
+from modelscope.msdatasets import MsDataset
+from modelscope.metainfo import Trainers
+from modelscope.trainers import build_trainer
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.utils.constant import ModelFile
+
+model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd'
+ms_ds_widerface = MsDataset.load('WIDER_FACE_mini', namespace='shaoxuan')  # remove '_mini' for full dataset
+
+data_path = ms_ds_widerface.config_kwargs['split_config']
+train_dir = data_path['train']
+val_dir = data_path['validation']
+
+def get_name(dir_name):
+    names = [i for i in os.listdir(dir_name) if not i.startswith('_')]
+    return names[0]
+
+train_root = train_dir + '/' + get_name(train_dir) + '/'
+val_root = val_dir + '/' + get_name(val_dir) + '/'
+cache_path = snapshot_download(model_id)
+tmp_dir = tempfile.TemporaryDirectory().name
+pretrain_epochs = 640
+ft_epochs = 1
+total_epochs = pretrain_epochs + ft_epochs
+if not os.path.exists(tmp_dir):
+    os.makedirs(tmp_dir)
+
+def _cfg_modify_fn(cfg):
+    cfg.checkpoint_config.interval = 1
+    cfg.log_config.interval = 10
+    cfg.evaluation.interval = 1
+    cfg.data.workers_per_gpu = 1
+    cfg.data.samples_per_gpu = 4
+    return cfg
+
+kwargs = dict(
+    cfg_file=os.path.join(cache_path, 'DamoFD_lms.py'),
+    work_dir=tmp_dir,
+    train_root=train_root,
+    val_root=val_root,
+    resume_from=os.path.join(cache_path, ModelFile.TORCH_MODEL_FILE),
+    total_epochs=total_epochs,  # run #epochs
+    cfg_modify_fn=_cfg_modify_fn)
+
+trainer = build_trainer(name=Trainers.face_detection_scrfd, default_args=kwargs)
+trainer.train()
+```
+
+
+## 模型效果
+![DamoFD性能](demo/DamoFD_ap.jpg)
+
+<!---
+## 引用
+如果你觉得这个该模型对有所帮助，请考虑引用下面的相关的论文：
+
+```BibTeX
+@inproceedings{liu2022mogface,
+      title={MogFace: Towards a Deeper Appreciation on Face Detection},
+        author={Liu, Yang and Wang, Fei and Deng, Jiankang and Zhou, Zhipeng and Sun, Baigui and Li, Hao},
+          booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+            pages={4093--4102},
+              year={2022}
+}
+```
+-->
+
diff --git a/face_project/face_detection/DamoFD/demo/DamoFD.jpg b/face_project/face_detection/DamoFD/demo/DamoFD.jpg
new file mode 100644
index 0000000..0a17d0d
Binary files /dev/null and b/face_project/face_detection/DamoFD/demo/DamoFD.jpg differ
diff --git a/face_project/face_detection/DamoFD/demo/DamoFD_ap.jpg b/face_project/face_detection/DamoFD/demo/DamoFD_ap.jpg
new file mode 100644
index 0000000..5376579
Binary files /dev/null and b/face_project/face_detection/DamoFD/demo/DamoFD_ap.jpg differ
diff --git a/face_project/face_detection/DamoFD/test_damofd_10g.py b/face_project/face_detection/DamoFD/test_damofd_10g.py
new file mode 100644
index 0000000..210052d
--- /dev/null
+++ b/face_project/face_detection/DamoFD/test_damofd_10g.py
@@ -0,0 +1,74 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+import os
+import numpy as np
+
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_result
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+from modelscope.utils.constant import DownloadMode
+from modelscope.utils.cv.image_utils import voc_ap, image_eval,img_pr_info, gen_gt_info, dataset_pr_info, bbox_overlap
+
+
+class DamoFDFaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+
+    def setUp(self) -> None:
+        self.task = Tasks.face_detection
+        self.model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd-10G'
+        self.img_path = 'data/test/images/mog_face_detection.jpg'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_detection, model=self.model_id)
+
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_run_with_dataset(self):
+        val_set = MsDataset.load('widerface_mini_train_val', namespace='ly261666', split='validation')#, download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        img_base_path = next(iter(val_set))[1]
+        img_dir = osp.join(img_base_path, 'val_data')
+        img_gt = osp.join(img_base_path, 'val_label.txt')
+        gt_info = gen_gt_info(img_gt)
+        pred_info = {}
+        iou_th = 0.5
+        thresh_num = 1000
+        face_detection_func = pipeline(Tasks.face_detection, model=self.model_id)
+        count_face = 0
+        pr_curve = np.zeros((thresh_num, 2)).astype('float')
+        for img_name in os.listdir(img_dir):
+            abs_img_name = osp.join(img_dir, img_name)
+            result = face_detection_func(abs_img_name)
+            pred_info = np.concatenate([result['boxes'], np.array(result['scores'])[:,np.newaxis]], axis=1)
+            gt_box = np.array(gt_info[img_name])
+            pred_recall, proposal_list = image_eval(pred_info, gt_box, iou_th)
+            _img_pr_info, fp = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
+            pr_curve += _img_pr_info
+            count_face += gt_box.shape[0]
+			
+        pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
+        propose = pr_curve[:, 0]
+        recall = pr_curve[:, 1]
+        for srecall in np.arange(0.1, 1.0001, 0.1):
+            rindex = len(np.where(recall<=srecall)[0])-1
+            rthresh = 1.0 - float(rindex)/thresh_num
+            print('Recall-Precision-Thresh:', recall[rindex], propose[rindex], rthresh)
+        ap = voc_ap(recall, propose)
+        print('ap: %.5f, iou_th: %.2f'%(ap, iou_th))
+        self.show_result(abs_img_name, result)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/face_project/face_detection/DamoFD/test_damofd_2500m.py b/face_project/face_detection/DamoFD/test_damofd_2500m.py
new file mode 100644
index 0000000..58e2e56
--- /dev/null
+++ b/face_project/face_detection/DamoFD/test_damofd_2500m.py
@@ -0,0 +1,78 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+import os
+import numpy as np
+
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_result
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+from modelscope.utils.constant import DownloadMode
+from modelscope.utils.cv.image_utils import voc_ap, image_eval,img_pr_info, gen_gt_info, dataset_pr_info, bbox_overlap
+
+
+class DamoFDFaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+
+    def setUp(self) -> None:
+        self.task = Tasks.face_detection
+        self.model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd-2.5G'
+        self.img_path = 'data/test/images/mog_face_detection.jpg'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_run_with_dataset(self):
+        val_set = MsDataset.load('widerface_mini_train_val', namespace='ly261666', split='validation')#, download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        img_base_path = next(iter(val_set))[1]
+        img_dir = osp.join(img_base_path, 'val_data')
+        img_gt = osp.join(img_base_path, 'val_label.txt')
+        gt_info = gen_gt_info(img_gt)
+        pred_info = {}
+        iou_th = 0.5
+        thresh_num = 1000
+        face_detection_func = pipeline(Tasks.face_detection, model=self.model_id)
+        count_face = 0
+        pr_curve = np.zeros((thresh_num, 2)).astype('float')
+        for img_name in os.listdir(img_dir):
+            abs_img_name = osp.join(img_dir, img_name)
+            result = face_detection_func(abs_img_name)
+            pred_info = np.concatenate([result['boxes'], np.array(result['scores'])[:,np.newaxis]], axis=1)
+            gt_box = np.array(gt_info[img_name])
+            pred_recall, proposal_list = image_eval(pred_info, gt_box, iou_th)
+            _img_pr_info, fp = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
+            pr_curve += _img_pr_info
+            count_face += gt_box.shape[0]
+			
+        pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
+        propose = pr_curve[:, 0]
+        recall = pr_curve[:, 1]
+        for srecall in np.arange(0.1, 1.0001, 0.1):
+            rindex = len(np.where(recall<=srecall)[0])-1
+            rthresh = 1.0 - float(rindex)/thresh_num
+            print('Recall-Precision-Thresh:', recall[rindex], propose[rindex], rthresh)
+        ap = voc_ap(recall, propose)
+        print('ap: %.5f, iou_th: %.2f'%(ap, iou_th))
+        self.show_result(abs_img_name, result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_detection, model=self.model_id)
+
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_demo_compatibility(self):
+        self.compatibility_check()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/face_project/face_detection/DamoFD/test_damofd_34g.py b/face_project/face_detection/DamoFD/test_damofd_34g.py
new file mode 100644
index 0000000..9f9a93d
--- /dev/null
+++ b/face_project/face_detection/DamoFD/test_damofd_34g.py
@@ -0,0 +1,78 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+import os
+import numpy as np
+
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_result
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+from modelscope.utils.constant import DownloadMode
+from modelscope.utils.cv.image_utils import voc_ap, image_eval,img_pr_info, gen_gt_info, dataset_pr_info, bbox_overlap
+
+
+class DamoFDFaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+
+    def setUp(self) -> None:
+        self.task = Tasks.face_detection
+        self.model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd-34G'
+        self.img_path = 'data/test/images/mog_face_detection.jpg'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_run_with_dataset(self):
+        val_set = MsDataset.load('widerface_mini_train_val', namespace='ly261666', split='validation')#, download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        img_base_path = next(iter(val_set))[1]
+        img_dir = osp.join(img_base_path, 'val_data')
+        img_gt = osp.join(img_base_path, 'val_label.txt')
+        gt_info = gen_gt_info(img_gt)
+        pred_info = {}
+        iou_th = 0.5
+        thresh_num = 1000
+        face_detection_func = pipeline(Tasks.face_detection, model=self.model_id)
+        count_face = 0
+        pr_curve = np.zeros((thresh_num, 2)).astype('float')
+        for img_name in os.listdir(img_dir):
+            abs_img_name = osp.join(img_dir, img_name)
+            result = face_detection_func(abs_img_name)
+            pred_info = np.concatenate([result['boxes'], np.array(result['scores'])[:,np.newaxis]], axis=1)
+            gt_box = np.array(gt_info[img_name])
+            pred_recall, proposal_list = image_eval(pred_info, gt_box, iou_th)
+            _img_pr_info, fp = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
+            pr_curve += _img_pr_info
+            count_face += gt_box.shape[0]
+			
+        pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
+        propose = pr_curve[:, 0]
+        recall = pr_curve[:, 1]
+        for srecall in np.arange(0.1, 1.0001, 0.1):
+            rindex = len(np.where(recall<=srecall)[0])-1
+            rthresh = 1.0 - float(rindex)/thresh_num
+            print('Recall-Precision-Thresh:', recall[rindex], propose[rindex], rthresh)
+        ap = voc_ap(recall, propose)
+        print('ap: %.5f, iou_th: %.2f'%(ap, iou_th))
+        self.show_result(abs_img_name, result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_detection, model=self.model_id)
+
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_demo_compatibility(self):
+        self.compatibility_check()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/face_project/face_detection/DamoFD/test_damofd_500m.py b/face_project/face_detection/DamoFD/test_damofd_500m.py
new file mode 100644
index 0000000..2e29b94
--- /dev/null
+++ b/face_project/face_detection/DamoFD/test_damofd_500m.py
@@ -0,0 +1,78 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+import os
+import numpy as np
+
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_result
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+from modelscope.utils.constant import DownloadMode
+from modelscope.utils.cv.image_utils import voc_ap, image_eval,img_pr_info, gen_gt_info, dataset_pr_info, bbox_overlap
+
+
+class DamoFDFaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+
+    def setUp(self) -> None:
+        self.task = Tasks.face_detection
+        self.model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd'
+        self.img_path = 'data/test/images/mog_face_detection.jpg'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_run_with_dataset(self):
+        val_set = MsDataset.load('widerface_mini_train_val', namespace='ly261666', split='validation')#, download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        img_base_path = next(iter(val_set))[1]
+        img_dir = osp.join(img_base_path, 'val_data')
+        img_gt = osp.join(img_base_path, 'val_label.txt')
+        gt_info = gen_gt_info(img_gt)
+        pred_info = {}
+        iou_th = 0.5
+        thresh_num = 1000
+        face_detection_func = pipeline(Tasks.face_detection, model=self.model_id)
+        count_face = 0
+        pr_curve = np.zeros((thresh_num, 2)).astype('float')
+        for img_name in os.listdir(img_dir):
+            abs_img_name = osp.join(img_dir, img_name)
+            result = face_detection_func(abs_img_name)
+            pred_info = np.concatenate([result['boxes'], np.array(result['scores'])[:,np.newaxis]], axis=1)
+            gt_box = np.array(gt_info[img_name])
+            pred_recall, proposal_list = image_eval(pred_info, gt_box, iou_th)
+            _img_pr_info, fp = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
+            pr_curve += _img_pr_info
+            count_face += gt_box.shape[0]
+			
+        pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
+        propose = pr_curve[:, 0]
+        recall = pr_curve[:, 1]
+        for srecall in np.arange(0.1, 1.0001, 0.1):
+            rindex = len(np.where(recall<=srecall)[0])-1
+            rthresh = 1.0 - float(rindex)/thresh_num
+            print('Recall-Precision-Thresh:', recall[rindex], propose[rindex], rthresh)
+        ap = voc_ap(recall, propose)
+        print('ap: %.5f, iou_th: %.2f'%(ap, iou_th))
+        self.show_result(abs_img_name, result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_detection, model=self.model_id)
+
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_demo_compatibility(self):
+        self.compatibility_check()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/face_project/face_detection/DamoFD/train_damofd_10g.py b/face_project/face_detection/DamoFD/train_damofd_10g.py
new file mode 100644
index 0000000..fcc382c
--- /dev/null
+++ b/face_project/face_detection/DamoFD/train_damofd_10g.py
@@ -0,0 +1,150 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.test_utils import DistributedTestCase, test_level
+
+
+def _setup():
+    model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd-10G'
+    # mini dataset only for unit test, remove '_mini' for full dataset.
+    ms_ds_widerface = MsDataset.load('WIDER_FACE_mini', namespace='shaoxuan')
+
+    data_path = ms_ds_widerface.config_kwargs['split_config']
+    train_dir = data_path['train']
+    val_dir = data_path['validation']
+    train_root = train_dir + '/' + os.listdir(train_dir)[0] + '/'
+    val_root = val_dir + '/' + os.listdir(val_dir)[0] + '/'
+    max_epochs = 1  # run epochs in unit test
+
+    cache_path = snapshot_download(model_id)
+
+    tmp_dir = tempfile.TemporaryDirectory().name
+    if not os.path.exists(tmp_dir):
+        os.makedirs(tmp_dir)
+    return train_root, val_root, max_epochs, cache_path, tmp_dir
+
+
+def train_func(**kwargs):
+    trainer = build_trainer(
+        name=Trainers.face_detection_scrfd, default_args=kwargs)
+    trainer.train()
+
+
+class TestFaceDetectionDamofdTrainerSingleGPU(unittest.TestCase):
+
+    def setUp(self):
+        print(('SingleGPU Testing %s.%s' %
+               (type(self).__name__, self._testMethodName)))
+        self.train_root, self.val_root, self.max_epochs, self.cache_path, self.tmp_dir = _setup(
+        )
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    def _cfg_modify_fn(self, cfg):
+        cfg.checkpoint_config.interval = 1
+        cfg.log_config.interval = 10
+        cfg.evaluation.interval = 1
+        cfg.data.workers_per_gpu = 3
+        cfg.data.samples_per_gpu = 4  # batch size
+        return cfg
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_from_scratch(self):
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'DamoFD_lms.py'),
+            work_dir=self.tmp_dir,
+            train_root=self.train_root,
+            val_root=self.val_root,
+            total_epochs=self.max_epochs,
+            cfg_modify_fn=self._cfg_modify_fn)
+
+        trainer = build_trainer(
+            name=Trainers.face_detection_scrfd, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_finetune(self):
+        pretrain_epoch = 640
+        self.max_epochs += pretrain_epoch
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'DamoFD_lms.py'),
+            work_dir=self.tmp_dir,
+            train_root=self.train_root,
+            val_root=self.val_root,
+            total_epochs=self.max_epochs,
+            resume_from=os.path.join(self.cache_path,
+                                     ModelFile.TORCH_MODEL_FILE),
+            cfg_modify_fn=self._cfg_modify_fn)
+
+        trainer = build_trainer(
+            name=Trainers.face_detection_scrfd, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(pretrain_epoch, self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+@unittest.skipIf(not torch.cuda.is_available()
+                 or torch.cuda.device_count() <= 1, 'distributed unittest')
+class TestFaceDetectionDamofdTrainerMultiGpus(DistributedTestCase):
+
+    def setUp(self):
+        print(('MultiGPUs Testing %s.%s' %
+               (type(self).__name__, self._testMethodName)))
+        self.train_root, self.val_root, self.max_epochs, self.cache_path, self.tmp_dir = _setup(
+        )
+        cfg_file_path = os.path.join(self.cache_path, 'DamoFD_lms.py')
+        cfg = Config.from_file(cfg_file_path)
+        cfg.checkpoint_config.interval = 1
+        cfg.log_config.interval = 10
+        cfg.evaluation.interval = 1
+        cfg.data.workers_per_gpu = 3
+        cfg.data.samples_per_gpu = 4
+        cfg.dump(cfg_file_path)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_multi_gpus_finetune(self):
+        pretrain_epoch = 640
+        self.max_epochs += pretrain_epoch
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'DamoFD_lms.py'),
+            work_dir=self.tmp_dir,
+            train_root=self.train_root,
+            val_root=self.val_root,
+            total_epochs=self.max_epochs,
+            resume_from=os.path.join(self.cache_path,
+                                     ModelFile.TORCH_MODEL_FILE),
+            launcher='pytorch')
+        self.start(train_func, num_gpus=2, **kwargs)
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+        for i in range(pretrain_epoch, self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/face_project/face_detection/DamoFD/train_damofd_2500m.py b/face_project/face_detection/DamoFD/train_damofd_2500m.py
new file mode 100644
index 0000000..2f9c00e
--- /dev/null
+++ b/face_project/face_detection/DamoFD/train_damofd_2500m.py
@@ -0,0 +1,150 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.test_utils import DistributedTestCase, test_level
+
+
+def _setup():
+    model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd-2.5G'
+    # mini dataset only for unit test, remove '_mini' for full dataset.
+    ms_ds_widerface = MsDataset.load('WIDER_FACE_mini', namespace='shaoxuan')
+
+    data_path = ms_ds_widerface.config_kwargs['split_config']
+    train_dir = data_path['train']
+    val_dir = data_path['validation']
+    train_root = train_dir + '/' + os.listdir(train_dir)[0] + '/'
+    val_root = val_dir + '/' + os.listdir(val_dir)[0] + '/'
+    max_epochs = 1  # run epochs in unit test
+
+    cache_path = snapshot_download(model_id)
+
+    tmp_dir = tempfile.TemporaryDirectory().name
+    if not os.path.exists(tmp_dir):
+        os.makedirs(tmp_dir)
+    return train_root, val_root, max_epochs, cache_path, tmp_dir
+
+
+def train_func(**kwargs):
+    trainer = build_trainer(
+        name=Trainers.face_detection_scrfd, default_args=kwargs)
+    trainer.train()
+
+
+class TestFaceDetectionDamofdTrainerSingleGPU(unittest.TestCase):
+
+    def setUp(self):
+        print(('SingleGPU Testing %s.%s' %
+               (type(self).__name__, self._testMethodName)))
+        self.train_root, self.val_root, self.max_epochs, self.cache_path, self.tmp_dir = _setup(
+        )
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    def _cfg_modify_fn(self, cfg):
+        cfg.checkpoint_config.interval = 1
+        cfg.log_config.interval = 10
+        cfg.evaluation.interval = 1
+        cfg.data.workers_per_gpu = 3
+        cfg.data.samples_per_gpu = 4  # batch size
+        return cfg
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_from_scratch(self):
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'DamoFD_lms.py'),
+            work_dir=self.tmp_dir,
+            train_root=self.train_root,
+            val_root=self.val_root,
+            total_epochs=self.max_epochs,
+            cfg_modify_fn=self._cfg_modify_fn)
+
+        trainer = build_trainer(
+            name=Trainers.face_detection_scrfd, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_finetune(self):
+        pretrain_epoch = 640
+        self.max_epochs += pretrain_epoch
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'DamoFD_lms.py'),
+            work_dir=self.tmp_dir,
+            train_root=self.train_root,
+            val_root=self.val_root,
+            total_epochs=self.max_epochs,
+            resume_from=os.path.join(self.cache_path,
+                                     ModelFile.TORCH_MODEL_FILE),
+            cfg_modify_fn=self._cfg_modify_fn)
+
+        trainer = build_trainer(
+            name=Trainers.face_detection_scrfd, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(pretrain_epoch, self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+@unittest.skipIf(not torch.cuda.is_available()
+                 or torch.cuda.device_count() <= 1, 'distributed unittest')
+class TestFaceDetectionDamofdTrainerMultiGpus(DistributedTestCase):
+
+    def setUp(self):
+        print(('MultiGPUs Testing %s.%s' %
+               (type(self).__name__, self._testMethodName)))
+        self.train_root, self.val_root, self.max_epochs, self.cache_path, self.tmp_dir = _setup(
+        )
+        cfg_file_path = os.path.join(self.cache_path, 'DamoFD_lms.py')
+        cfg = Config.from_file(cfg_file_path)
+        cfg.checkpoint_config.interval = 1
+        cfg.log_config.interval = 10
+        cfg.evaluation.interval = 1
+        cfg.data.workers_per_gpu = 3
+        cfg.data.samples_per_gpu = 4
+        cfg.dump(cfg_file_path)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_multi_gpus_finetune(self):
+        pretrain_epoch = 640
+        self.max_epochs += pretrain_epoch
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'DamoFD_lms.py'),
+            work_dir=self.tmp_dir,
+            train_root=self.train_root,
+            val_root=self.val_root,
+            total_epochs=self.max_epochs,
+            resume_from=os.path.join(self.cache_path,
+                                     ModelFile.TORCH_MODEL_FILE),
+            launcher='pytorch')
+        self.start(train_func, num_gpus=2, **kwargs)
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+        for i in range(pretrain_epoch, self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/face_project/face_detection/DamoFD/train_damofd_34g.py b/face_project/face_detection/DamoFD/train_damofd_34g.py
new file mode 100644
index 0000000..268a668
--- /dev/null
+++ b/face_project/face_detection/DamoFD/train_damofd_34g.py
@@ -0,0 +1,150 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.test_utils import DistributedTestCase, test_level
+
+
+def _setup():
+    model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd-34G'
+    # mini dataset only for unit test, remove '_mini' for full dataset.
+    ms_ds_widerface = MsDataset.load('WIDER_FACE_mini', namespace='shaoxuan')
+
+    data_path = ms_ds_widerface.config_kwargs['split_config']
+    train_dir = data_path['train']
+    val_dir = data_path['validation']
+    train_root = train_dir + '/' + os.listdir(train_dir)[0] + '/'
+    val_root = val_dir + '/' + os.listdir(val_dir)[0] + '/'
+    max_epochs = 1  # run epochs in unit test
+
+    cache_path = snapshot_download(model_id)
+
+    tmp_dir = tempfile.TemporaryDirectory().name
+    if not os.path.exists(tmp_dir):
+        os.makedirs(tmp_dir)
+    return train_root, val_root, max_epochs, cache_path, tmp_dir
+
+
+def train_func(**kwargs):
+    trainer = build_trainer(
+        name=Trainers.face_detection_scrfd, default_args=kwargs)
+    trainer.train()
+
+
+class TestFaceDetectionDamofdTrainerSingleGPU(unittest.TestCase):
+
+    def setUp(self):
+        print(('SingleGPU Testing %s.%s' %
+               (type(self).__name__, self._testMethodName)))
+        self.train_root, self.val_root, self.max_epochs, self.cache_path, self.tmp_dir = _setup(
+        )
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    def _cfg_modify_fn(self, cfg):
+        cfg.checkpoint_config.interval = 1
+        cfg.log_config.interval = 10
+        cfg.evaluation.interval = 1
+        cfg.data.workers_per_gpu = 3
+        cfg.data.samples_per_gpu = 4  # batch size
+        return cfg
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_from_scratch(self):
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'DamoFD_lms.py'),
+            work_dir=self.tmp_dir,
+            train_root=self.train_root,
+            val_root=self.val_root,
+            total_epochs=self.max_epochs,
+            cfg_modify_fn=self._cfg_modify_fn)
+
+        trainer = build_trainer(
+            name=Trainers.face_detection_scrfd, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_finetune(self):
+        pretrain_epoch = 640
+        self.max_epochs += pretrain_epoch
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'DamoFD_lms.py'),
+            work_dir=self.tmp_dir,
+            train_root=self.train_root,
+            val_root=self.val_root,
+            total_epochs=self.max_epochs,
+            resume_from=os.path.join(self.cache_path,
+                                     ModelFile.TORCH_MODEL_FILE),
+            cfg_modify_fn=self._cfg_modify_fn)
+
+        trainer = build_trainer(
+            name=Trainers.face_detection_scrfd, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(pretrain_epoch, self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+@unittest.skipIf(not torch.cuda.is_available()
+                 or torch.cuda.device_count() <= 1, 'distributed unittest')
+class TestFaceDetectionDamofdTrainerMultiGpus(DistributedTestCase):
+
+    def setUp(self):
+        print(('MultiGPUs Testing %s.%s' %
+               (type(self).__name__, self._testMethodName)))
+        self.train_root, self.val_root, self.max_epochs, self.cache_path, self.tmp_dir = _setup(
+        )
+        cfg_file_path = os.path.join(self.cache_path, 'DamoFD_lms.py')
+        cfg = Config.from_file(cfg_file_path)
+        cfg.checkpoint_config.interval = 1
+        cfg.log_config.interval = 10
+        cfg.evaluation.interval = 1
+        cfg.data.workers_per_gpu = 3
+        cfg.data.samples_per_gpu = 4
+        cfg.dump(cfg_file_path)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_multi_gpus_finetune(self):
+        pretrain_epoch = 640
+        self.max_epochs += pretrain_epoch
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'DamoFD_lms.py'),
+            work_dir=self.tmp_dir,
+            train_root=self.train_root,
+            val_root=self.val_root,
+            total_epochs=self.max_epochs,
+            resume_from=os.path.join(self.cache_path,
+                                     ModelFile.TORCH_MODEL_FILE),
+            launcher='pytorch')
+        self.start(train_func, num_gpus=2, **kwargs)
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+        for i in range(pretrain_epoch, self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/face_project/face_detection/DamoFD/train_damofd_500m.py b/face_project/face_detection/DamoFD/train_damofd_500m.py
new file mode 100644
index 0000000..4a36791
--- /dev/null
+++ b/face_project/face_detection/DamoFD/train_damofd_500m.py
@@ -0,0 +1,150 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.test_utils import DistributedTestCase, test_level
+
+
+def _setup():
+    model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd'
+    # mini dataset only for unit test, remove '_mini' for full dataset.
+    ms_ds_widerface = MsDataset.load('WIDER_FACE_mini', namespace='shaoxuan')
+
+    data_path = ms_ds_widerface.config_kwargs['split_config']
+    train_dir = data_path['train']
+    val_dir = data_path['validation']
+    train_root = train_dir + '/' + os.listdir(train_dir)[0] + '/'
+    val_root = val_dir + '/' + os.listdir(val_dir)[0] + '/'
+    max_epochs = 1  # run epochs in unit test
+
+    cache_path = snapshot_download(model_id)
+
+    tmp_dir = tempfile.TemporaryDirectory().name
+    if not os.path.exists(tmp_dir):
+        os.makedirs(tmp_dir)
+    return train_root, val_root, max_epochs, cache_path, tmp_dir
+
+
+def train_func(**kwargs):
+    trainer = build_trainer(
+        name=Trainers.face_detection_scrfd, default_args=kwargs)
+    trainer.train()
+
+
+class TestFaceDetectionDamofdTrainerSingleGPU(unittest.TestCase):
+
+    def setUp(self):
+        print(('SingleGPU Testing %s.%s' %
+               (type(self).__name__, self._testMethodName)))
+        self.train_root, self.val_root, self.max_epochs, self.cache_path, self.tmp_dir = _setup(
+        )
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    def _cfg_modify_fn(self, cfg):
+        cfg.checkpoint_config.interval = 1
+        cfg.log_config.interval = 10
+        cfg.evaluation.interval = 1
+        cfg.data.workers_per_gpu = 3
+        cfg.data.samples_per_gpu = 4  # batch size
+        return cfg
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_from_scratch(self):
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'DamoFD_lms.py'),
+            work_dir=self.tmp_dir,
+            train_root=self.train_root,
+            val_root=self.val_root,
+            total_epochs=self.max_epochs,
+            cfg_modify_fn=self._cfg_modify_fn)
+
+        trainer = build_trainer(
+            name=Trainers.face_detection_scrfd, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_finetune(self):
+        pretrain_epoch = 640
+        self.max_epochs += pretrain_epoch
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'DamoFD_lms.py'),
+            work_dir=self.tmp_dir,
+            train_root=self.train_root,
+            val_root=self.val_root,
+            total_epochs=self.max_epochs,
+            resume_from=os.path.join(self.cache_path,
+                                     ModelFile.TORCH_MODEL_FILE),
+            cfg_modify_fn=self._cfg_modify_fn)
+
+        trainer = build_trainer(
+            name=Trainers.face_detection_scrfd, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(pretrain_epoch, self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+@unittest.skipIf(not torch.cuda.is_available()
+                 or torch.cuda.device_count() <= 1, 'distributed unittest')
+class TestFaceDetectionDamofdTrainerMultiGpus(DistributedTestCase):
+
+    def setUp(self):
+        print(('MultiGPUs Testing %s.%s' %
+               (type(self).__name__, self._testMethodName)))
+        self.train_root, self.val_root, self.max_epochs, self.cache_path, self.tmp_dir = _setup(
+        )
+        cfg_file_path = os.path.join(self.cache_path, 'DamoFD_lms.py')
+        cfg = Config.from_file(cfg_file_path)
+        cfg.checkpoint_config.interval = 1
+        cfg.log_config.interval = 10
+        cfg.evaluation.interval = 1
+        cfg.data.workers_per_gpu = 3
+        cfg.data.samples_per_gpu = 4
+        cfg.dump(cfg_file_path)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_multi_gpus_finetune(self):
+        pretrain_epoch = 640
+        self.max_epochs += pretrain_epoch
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'DamoFD_lms.py'),
+            work_dir=self.tmp_dir,
+            train_root=self.train_root,
+            val_root=self.val_root,
+            total_epochs=self.max_epochs,
+            resume_from=os.path.join(self.cache_path,
+                                     ModelFile.TORCH_MODEL_FILE),
+            launcher='pytorch')
+        self.start(train_func, num_gpus=2, **kwargs)
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+        for i in range(pretrain_epoch, self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/face_project/face_detection/MogFace/README.md b/face_project/face_detection/MogFace/README.md
new file mode 100644
index 0000000..6ad53e7
--- /dev/null
+++ b/face_project/face_detection/MogFace/README.md
@@ -0,0 +1,146 @@
+
+<div align="center">
+  <img src="demo/MogFace.jpg" width="100%" height="10%" />
+</div>
+<h4 align="center">
+    <a href=#MogFace模型介绍> 模型介绍 </a> |
+    <a href=#快速使用> 快速使用 </a> |
+    <a href=#单图片推理> 单图片推理 </a> | 
+    <a href=#多图片推理和评测> 多图片推理/评测 </a> 
+</h4>
+
+# MogFace模型介绍
+MogFace为当前SOTA的人脸检测方法，已在Wider Face六项榜单上霸榜一年以上，后续被CVPR2022录取([论文地址](https://openaccess.thecvf.com/content/CVPR2022/papers/Liu_MogFace_Towards_a_Deeper_Appreciation_on_Face_Detection_CVPR_2022_paper.pdf))，该方法的主要贡献是从下面三个角度提升人脸检测器：
+- Scale-level Data Augmentation (SSE)：SSE是第一个从maximize pyramid layer 表征的角度来控制数据集中gt的尺度分布，而不是intuitive的假想检测器的学习能力，因此会在不同场景下都很鲁棒。
+- Adaptive Online Anchor Mining Strategy(Ali-AMS)：减少对超参的依赖， 简单且有效的adpative label assign 方法。
+- Hierarchical Context-aware Module (HCAM)： 减少误检是real world人脸检测器面对的最大挑战，HCAM是最近几年第一次在算法侧给出solid solution。
+
+## 快速使用
+
+在这个界面中，我们提供几个有关`推理/评测`脚本帮助大家迅速/一键使用MogFace, 代码范例中的实例均集成在test_mog_face_detection.py
+- `Usage`:
+```python
+PYTHONPATH=. python face_project/face_detection/MogFace/test_mog_face_detection.py
+```
+
+## 代码范例
+
+### 单图片推理
+```python
+import cv2
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import  Tasks
+
+face_detection = pipeline(task=Tasks.face_detection, model='damo/cv_resnet101_face-detection_cvpr22papermogface')
+# 支持 url image and abs dir image path
+img_path = 'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/face_detection2.jpeg' 
+result = face_detection(img_path)
+
+# 提供可视化结果
+from modelscope.utils.cv.image_utils import draw_face_detection_result
+from modelscope.preprocessors.image import LoadImage
+img = LoadImage.convert_to_ndarray(img_path)
+cv2.imwrite('srcImg.jpg', img)
+img_draw = draw_face_detection_result('srcImg.jpg', result)
+import matplotlib.pyplot as plt
+plt.imshow(img_draw)
+```
+
+### 多图片推理和评测
+- 我们提供了100张测试图片，可运行下面代码一键使用（下载数据集+推理）；
+- 也支持测试自建数据集，需要按如下格式建立数据集:
+```
+img_base_path/
+    val_data/
+        test_1.jpg
+        ...
+        test_N.jpg
+    val_label.txt 
+    ## val_label.txt format
+    test_1.jpg
+    x0 x1 w h 
+    x0 x1 w h 
+    ...
+    test_N.jpg
+    x0 x1 w h 
+    x0 x1 w h 
+    ...
+```
+
+```python
+import os.path as osp
+import cv2
+import os
+import numpy as np
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import voc_ap, image_eval,img_pr_info, gen_gt_info, dataset_pr_info, bbox_overlap
+
+model_id = 'damo/cv_resnet101_face-detection_cvpr22papermogface'
+val_set = MsDataset.load('widerface_mini_train_val', namespace='ly261666', split='validation')#, download_mode=DownloadMode.FORCE_REDOWNLOAD)
+img_base_path = next(iter(val_set))[1]
+img_dir = osp.join(img_base_path, 'val_data')
+img_gt = osp.join(img_base_path, 'val_label.txt')
+gt_info = gen_gt_info(img_gt)
+pred_info = {}
+iou_th = 0.5
+thresh_num = 1000
+face_detection_func = pipeline(Tasks.face_detection, model=self.model_id, conf_th=0.01)
+count_face = 0
+pr_curve = np.zeros((thresh_num, 2)).astype('float')
+for img_name in os.listdir(img_dir):
+    print ('inference img: {} {}/{}'.format(img_name, idx+1, len(os.listdir(img_dir))))
+    abs_img_name = osp.join(img_dir, img_name)
+    result = face_detection_func(abs_img_name)
+    pred_info = np.concatenate([result['boxes'], np.array(result['scores'])[:,np.newaxis]], axis=1)
+    gt_box = np.array(gt_info[img_name])
+    pred_recall, proposal_list = image_eval(pred_info, gt_box, iou_th)
+    _img_pr_info, fp = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
+    pr_curve += _img_pr_info
+    count_face += gt_box.shape[0]
+    
+pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
+propose = pr_curve[:, 0]
+recall = pr_curve[:, 1]
+for srecall in np.arange(0.1, 1.0001, 0.1):
+    rindex = len(np.where(recall<=srecall)[0])-1
+    rthresh = 1.0 - float(rindex)/thresh_num
+    print('Recall-Precision-Thresh:', recall[rindex], propose[rindex], rthresh)
+ap = voc_ap(recall, propose)
+print('ap: %.5f, iou_th: %.2f'%(ap, iou_th))
+```
+Result:
+```
+Recall-Precision-Thresh: 0.09928514694201747 1.0 0.914
+Recall-Precision-Thresh: 0.19962933545141648 0.9986754966887417 0.841
+Recall-Precision-Thresh: 0.29864972200158857 0.9964664310954063 0.749
+Recall-Precision-Thresh: 0.39899391051098754 0.9947194719471947 0.6619999999999999
+Recall-Precision-Thresh: 0.4996028594122319 0.9823008849557522 0.565
+Recall-Precision-Thresh: 0.598623245962404 0.9548141891891891 0.471
+Recall-Precision-Thresh: 0.6997617156473391 0.9091847265221878 0.384
+Recall-Precision-Thresh: 0.7995763833730474 0.8055481461723126 0.274
+Recall-Precision-Thresh: 0.8988615303150649 0.05734797297297297 0.0010000000000000009
+Recall-Precision-Thresh: 0.8988615303150649 0.05734797297297297 0.0010000000000000009
+ap: 0.83243, iou_th: 0.50
+```
+
+## 模型精度
+![MogFace性能](demo/MogFace_result.jpg)
+
+## 来源说明
+本模型及代码来自达摩院自研技术
+
+## 引用
+如果你觉得这个该模型对有所帮助，请考虑引用下面的相关的论文：
+
+```BibTeX
+@inproceedings{liu2022mogface,
+      title={MogFace: Towards a Deeper Appreciation on Face Detection},
+        author={Liu, Yang and Wang, Fei and Deng, Jiankang and Zhou, Zhipeng and Sun, Baigui and Li, Hao},
+          booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+            pages={4093--4102},
+              year={2022}
+}
+```
+
diff --git a/face_project/face_detection/MogFace/demo/MogFace.jpg b/face_project/face_detection/MogFace/demo/MogFace.jpg
new file mode 100644
index 0000000..4f29f9e
Binary files /dev/null and b/face_project/face_detection/MogFace/demo/MogFace.jpg differ
diff --git a/face_project/face_detection/MogFace/demo/MogFace_result.jpg b/face_project/face_detection/MogFace/demo/MogFace_result.jpg
new file mode 100644
index 0000000..cda2491
Binary files /dev/null and b/face_project/face_detection/MogFace/demo/MogFace_result.jpg differ
diff --git a/face_project/face_detection/MogFace/test_mog_face_detection.py b/face_project/face_detection/MogFace/test_mog_face_detection.py
new file mode 100644
index 0000000..c5fe34e
--- /dev/null
+++ b/face_project/face_detection/MogFace/test_mog_face_detection.py
@@ -0,0 +1,73 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+import os
+import numpy as np
+
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_no_lm_result
+from modelscope.utils.test_utils import test_level
+
+from modelscope.msdatasets import MsDataset
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+from modelscope.utils.constant import DownloadMode
+from modelscope.utils.cv.image_utils import voc_ap, image_eval,img_pr_info, gen_gt_info, dataset_pr_info, bbox_overlap
+
+class MogFaceDetectionTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'damo/cv_resnet101_face-detection_cvpr22papermogface'
+        self.img_path = 'data/test/images/mog_face_detection.jpg'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_no_lm_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_detection, model=self.model_id)
+
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_run_with_dataset(self):
+        val_set = MsDataset.load('widerface_mini_train_val', namespace='ly261666', split='validation')#, download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        img_base_path = next(iter(val_set))[1]
+        img_dir = osp.join(img_base_path, 'val_data')
+        img_gt = osp.join(img_base_path, 'val_label.txt')
+        gt_info = gen_gt_info(img_gt)
+        pred_info = {}
+        iou_th = 0.5
+        thresh_num = 1000
+        face_detection_func = pipeline(Tasks.face_detection, model=self.model_id, conf_th=0.01)
+        count_face = 0
+        pr_curve = np.zeros((thresh_num, 2)).astype('float')
+        for idx, img_name in enumerate(os.listdir(img_dir)):
+            print ('inference img: {} {}/{}'.format(img_name, idx+1, len(os.listdir(img_dir))))
+            abs_img_name = osp.join(img_dir, img_name)
+            result = face_detection_func(abs_img_name)
+            pred_info = np.concatenate([result['boxes'], np.array(result['scores'])[:,np.newaxis]], axis=1)
+            gt_box = np.array(gt_info[img_name])
+            pred_recall, proposal_list = image_eval(pred_info, gt_box, iou_th)
+            _img_pr_info, fp = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
+            pr_curve += _img_pr_info
+            count_face += gt_box.shape[0]
+			
+        pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
+        propose = pr_curve[:, 0]
+        recall = pr_curve[:, 1]
+        for srecall in np.arange(0.1, 1.0001, 0.1):
+            rindex = len(np.where(recall<=srecall)[0])-1
+            rthresh = 1.0 - float(rindex)/thresh_num
+            print('Recall-Precision-Thresh:', recall[rindex], propose[rindex], rthresh)
+        ap = voc_ap(recall, propose)
+        print('ap: %.5f, iou_th: %.2f'%(ap, iou_th))
+        self.show_result(abs_img_name, result)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/face_project/face_detection/Mtcnn/README.md b/face_project/face_detection/Mtcnn/README.md
new file mode 100644
index 0000000..edbb2df
--- /dev/null
+++ b/face_project/face_detection/Mtcnn/README.md
@@ -0,0 +1,143 @@
+<div align="center">
+  <img src="demo/Mtcnn.jpg" width="100%" height="10%" />
+</div>
+<h4 align="center">
+    <a href=#Mtcnn模型介绍> 模型介绍 </a> |
+    <a href=#快速使用> 快速使用 </a> |
+    <a href=#单图片推理> 单图片推理 </a> | 
+    <a href=#多图片推理和评测> 多图片推理/评测 </a> 
+</h4>
+
+# Mtcnn模型介绍
+MTCNN是工业界广泛应用的检测关键点二合一模型, ([论文地址](https://arxiv.org/abs/1604.02878), [代码地址](https://github.com/TropComplique/mtcnn-pytorch))，该方法包含下面4个模块:
+- Image Pyramid: 首先将图像进行不同尺度的变换，构建图像金字塔，以适应不同大小的人脸的进行检测;
+- Proposal Network: 其基本的构造是一个全卷积网络。对上一步构建完成的图像金字塔，通过一个FCN进行初步特征提取与标定边框，并进行Bounding-Box Regression调整窗口与NMS进行大部分窗口的过滤。
+- Refine Network: 其基本的构造是一个卷积神经网络，相对于第一层的P-Net来说，增加了一个全连接层，因此对于输入数据的筛选会更加严格。在图片经过P-Net后，会留下许多预测窗口，我们将所有的预测窗口送入R-Net，这个网络会滤除大量效果比较差的候选框，最后对选定的候选框进行Bounding-Box Regression和NMS进一步优化预测结果;
+- Output Network: 基本结构是一个较为复杂的卷积神经网络，相对于R-Net来说多了一个卷积层。O-Net的效果与R-Net的区别在于这一层结构会通过更多的监督来识别面部的区域，而且会对人的面部特征点进行回归，最终输出五个人脸面部特征点。
+
+## 快速使用
+
+在这个界面中，我们提供几个有关`推理/评测`脚本帮助大家迅速/一键使用Mtcnn, 代码范例中的实例均集成在test_mtcnn_face_detection.py
+- `Usage`:
+```python
+PYTHONPATH=. python face_project/face_detection/Mtcnn/test_mtcnn_face_detection.py
+```
+
+## 代码范例
+
+### 单图片推理
+```python
+import cv2
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import  Tasks
+
+model_id = 'damo/cv_manual_face-detection_mtcnn'
+face_detection = pipeline(task=Tasks.face_detection, model=model_id)
+# 支持 url image and abs dir image path
+img_path = 'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/face_detection2.jpeg' 
+result = face_detection(img_path)
+
+# 提供可视化结果
+from modelscope.utils.cv.image_utils import draw_face_detection_result
+from modelscope.preprocessors.image import LoadImage
+img = LoadImage.convert_to_ndarray(img_path)
+cv2.imwrite('srcImg.jpg', img)
+img_draw = draw_face_detection_result('srcImg.jpg', result)
+import matplotlib.pyplot as plt
+plt.imshow(img_draw)
+```
+
+### 多图片推理和评测
+- 我们提供了100张测试图片，可运行下面代码一键使用（下载数据集+推理）；
+- 也支持测试自建数据集，需要按如下格式建立数据集:
+```
+img_base_path/
+    val_data/
+        test_1.jpg
+        ...
+        test_N.jpg
+    val_label.txt 
+    ## val_label.txt format
+    test_1.jpg
+    x0 x1 w h 
+    x0 x1 w h 
+    ...
+    test_N.jpg
+    x0 x1 w h 
+    x0 x1 w h 
+    ...
+```
+
+```python
+import os.path as osp
+import cv2
+import os
+import numpy as np
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import voc_ap, image_eval,img_pr_info, gen_gt_info, dataset_pr_info, bbox_overlap
+
+model_id = 'damo/cv_manual_face-detection_mtcnn'
+val_set = MsDataset.load('widerface_mini_train_val', namespace='ly261666', split='validation')#, download_mode=DownloadMode.FORCE_REDOWNLOAD)
+img_base_path = next(iter(val_set))[1]
+img_dir = osp.join(img_base_path, 'val_data')
+img_gt = osp.join(img_base_path, 'val_label.txt')
+gt_info = gen_gt_info(img_gt)
+pred_info = {}
+iou_th = 0.5
+thresh_num = 1000
+face_detection_func = pipeline(Tasks.face_detection, model=model_id, conf_th=0.7)
+count_face = 0
+pr_curve = np.zeros((thresh_num, 2)).astype('float')
+for idx, img_name in enumerate(os.listdir(img_dir)):
+    print ('inference img: {} {}/{}'.format(img_name, idx+1, len(os.listdir(img_dir))))
+    abs_img_name = osp.join(img_dir, img_name)
+    result = face_detection_func(abs_img_name)
+    pred_info = np.concatenate([result['boxes'], np.array(result['scores'])[:,np.newaxis]], axis=1)
+    gt_box = np.array(gt_info[img_name])
+    pred_recall, proposal_list = image_eval(pred_info, gt_box, iou_th)
+    _img_pr_info, fp = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
+    pr_curve += _img_pr_info
+    count_face += gt_box.shape[0]
+    
+pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
+propose = pr_curve[:, 0]
+recall = pr_curve[:, 1]
+for srecall in np.arange(0.1, 1.0001, 0.1):
+    rindex = len(np.where(recall<=srecall)[0])-1
+    rthresh = 1.0 - float(rindex)/thresh_num
+    print('Recall-Precision-Thresh:', recall[rindex], propose[rindex], rthresh)
+ap = voc_ap(recall, propose)
+print('ap: %.5f, iou_th: %.2f'%(ap, iou_th))
+```
+Result:
+```
+Recall-Precision-Thresh: 0.4945724119671697 0.013132364106746154 1.001
+Recall-Precision-Thresh: 0.19909981466772572 0.9791666666666666 0.997
+Recall-Precision-Thresh: 0.2997087635689701 0.827485380116959 0.95
+Recall-Precision-Thresh: 0.3995234312946783 0.26216122307157746 0.6579999999999999
+Recall-Precision-Thresh: 0.4945724119671697 0.013132364106746154 0.0010000000000000009
+Recall-Precision-Thresh: 0.4945724119671697 0.013132364106746154 0.0010000000000000009
+Recall-Precision-Thresh: 0.4945724119671697 0.013132364106746154 0.0010000000000000009
+Recall-Precision-Thresh: 0.4945724119671697 0.013132364106746154 0.0010000000000000009
+Recall-Precision-Thresh: 0.4945724119671697 0.013132364106746154 0.0010000000000000009
+Recall-Precision-Thresh: 0.4945724119671697 0.013132364106746154 0.0010000000000000009
+ap: 0.35710, iou_th: 0.50
+```
+
+
+## 引用
+如果你觉得这个该模型对有所帮助，请考虑引用下面的相关的论文：
+
+```BibTeX
+@inproceedings{xiang2017joint,
+      title={Joint face detection and facial expression recognition with MTCNN},
+        author={Xiang, Jia and Zhu, Gengming},
+          booktitle={2017 4th international conference on information science and control engineering (ICISCE)},
+            pages={424--427},
+              year={2017},
+                organization={IEEE}
+}
+```
+
diff --git a/face_project/face_detection/Mtcnn/demo/Mtcnn.jpg b/face_project/face_detection/Mtcnn/demo/Mtcnn.jpg
new file mode 100644
index 0000000..fc01072
Binary files /dev/null and b/face_project/face_detection/Mtcnn/demo/Mtcnn.jpg differ
diff --git a/face_project/face_detection/Mtcnn/test_mtcnn_face_detection.py b/face_project/face_detection/Mtcnn/test_mtcnn_face_detection.py
new file mode 100644
index 0000000..da3d1a3
--- /dev/null
+++ b/face_project/face_detection/Mtcnn/test_mtcnn_face_detection.py
@@ -0,0 +1,73 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+import os
+import numpy as np
+
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_no_lm_result
+from modelscope.utils.test_utils import test_level
+
+from modelscope.msdatasets import MsDataset
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+from modelscope.utils.constant import DownloadMode
+from modelscope.utils.cv.image_utils import voc_ap, image_eval,img_pr_info, gen_gt_info, dataset_pr_info, bbox_overlap
+
+class RetinaFaceDetectionTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'damo/cv_manual_face-detection_mtcnn'
+        self.img_path = 'data/test/images/mog_face_detection.jpg'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_no_lm_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_detection, model=self.model_id)
+
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_run_with_dataset(self):
+        val_set = MsDataset.load('widerface_mini_train_val', namespace='ly261666', split='validation')#, download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        img_base_path = next(iter(val_set))[1]
+        img_dir = osp.join(img_base_path, 'val_data')
+        img_gt = osp.join(img_base_path, 'val_label.txt')
+        gt_info = gen_gt_info(img_gt)
+        pred_info = {}
+        iou_th = 0.5
+        thresh_num = 1000
+        face_detection_func = pipeline(Tasks.face_detection, model=self.model_id, conf_th=0.01)
+        count_face = 0
+        pr_curve = np.zeros((thresh_num, 2)).astype('float')
+        for idx, img_name in enumerate(os.listdir(img_dir)):
+            print ('inference img: {} {}/{}'.format(img_name, idx+1, len(os.listdir(img_dir))))
+            abs_img_name = osp.join(img_dir, img_name)
+            result = face_detection_func(abs_img_name)
+            pred_info = np.concatenate([result['boxes'], np.array(result['scores'])[:,np.newaxis]], axis=1)
+            gt_box = np.array(gt_info[img_name])
+            pred_recall, proposal_list = image_eval(pred_info, gt_box, iou_th)
+            _img_pr_info, fp = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
+            pr_curve += _img_pr_info
+            count_face += gt_box.shape[0]
+			
+        pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
+        propose = pr_curve[:, 0]
+        recall = pr_curve[:, 1]
+        for srecall in np.arange(0.1, 1.0001, 0.1):
+            rindex = len(np.where(recall<=srecall)[0])-1
+            rthresh = 1.0 - float(rindex)/thresh_num
+            print('Recall-Precision-Thresh:', recall[rindex], propose[rindex], rthresh)
+        ap = voc_ap(recall, propose)
+        print('ap: %.5f, iou_th: %.2f'%(ap, iou_th))
+        self.show_result(abs_img_name, result)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/face_project/face_detection/RetinaFace/README.md b/face_project/face_detection/RetinaFace/README.md
new file mode 100644
index 0000000..516c5c5
--- /dev/null
+++ b/face_project/face_detection/RetinaFace/README.md
@@ -0,0 +1,143 @@
+
+<div align="center">
+  <img src="demo/RetinaFace.jpg" width="100%" height="10%" />
+</div>
+<h4 align="center">
+    <a href=#RetinaFace模型介绍> 模型介绍 </a> |
+    <a href=#快速使用> 快速使用 </a> |
+    <a href=#单图片推理> 单图片推理 </a> | 
+    <a href=#多图片推理和评测> 多图片推理/评测 </a> 
+</h4>
+
+# RetinaFace模型介绍
+RetinaFace为当前学术界和工业界精度较高的人脸检测和人脸关键点定位二合一的方法，被CVPR 2020 录取([论文地址](https://arxiv.org/abs/1905.00641), [代码地址](https://github.com/biubug6/Pytorch_Retinaface)))，该方法的主要贡献是:
+- 引入关键点分支，可以在训练阶段引入关键点预测分支进行多任务学习，提供额外的互补特征，inference去掉关键点分支即可，并不会引入额外的计算量。
+
+## 快速使用
+
+在这个界面中，我们提供几个有关`推理/评测`脚本帮助大家迅速/一键使用RetinaFace, 代码范例中的实例均集成在test_retina_face_detection.py
+- `Usage`:
+```python
+PYTHONPATH=. python face_project/face_detection/RetinaFace/test_retina_face_detection.py
+```
+
+## 代码范例
+
+### 单图片推理
+```python
+import cv2
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import  Tasks
+
+model_id = 'damo/cv_resnet50_face-detection_retinaface'
+face_detection = pipeline(task=Tasks.face_detection, model=model_id)
+# 支持 url image and abs dir image path
+img_path = 'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/face_detection2.jpeg' 
+result = face_detection(img_path)
+
+# 提供可视化结果
+from modelscope.utils.cv.image_utils import draw_face_detection_result
+from modelscope.preprocessors.image import LoadImage
+img = LoadImage.convert_to_ndarray(img_path)
+cv2.imwrite('srcImg.jpg', img)
+img_draw = draw_face_detection_result('srcImg.jpg', result)
+import matplotlib.pyplot as plt
+plt.imshow(img_draw)
+```
+
+### 多图片推理和评测
+- 我们提供了100张测试图片，可运行下面代码一键使用（下载数据集+推理）；
+- 也支持测试自建数据集，需要按如下格式建立数据集:
+```
+img_base_path/
+    val_data/
+        test_1.jpg
+        ...
+        test_N.jpg
+    val_label.txt 
+    ## val_label.txt format
+    test_1.jpg
+    x0 x1 w h 
+    x0 x1 w h 
+    ...
+    test_N.jpg
+    x0 x1 w h 
+    x0 x1 w h 
+    ...
+```
+
+```python
+import os.path as osp
+import cv2
+import os
+import numpy as np
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import voc_ap, image_eval,img_pr_info, gen_gt_info, dataset_pr_info, bbox_overlap
+
+model_id = 'damo/cv_resnet50_face-detection_retinaface'
+val_set = MsDataset.load('widerface_mini_train_val', namespace='ly261666', split='validation')#, download_mode=DownloadMode.FORCE_REDOWNLOAD)
+img_base_path = next(iter(val_set))[1]
+img_dir = osp.join(img_base_path, 'val_data')
+img_gt = osp.join(img_base_path, 'val_label.txt')
+gt_info = gen_gt_info(img_gt)
+pred_info = {}
+iou_th = 0.5
+thresh_num = 1000
+face_detection_func = pipeline(Tasks.face_detection, model=model_id, conf_th=0.01)
+count_face = 0
+pr_curve = np.zeros((thresh_num, 2)).astype('float')
+for img_name in os.listdir(img_dir):
+    print ('inference img: {} {}/{}'.format(img_name, idx+1, len(os.listdir(img_dir))))
+    abs_img_name = osp.join(img_dir, img_name)
+    result = face_detection_func(abs_img_name)
+    pred_info = np.concatenate([result['boxes'], np.array(result['scores'])[:,np.newaxis]], axis=1)
+    gt_box = np.array(gt_info[img_name])
+    pred_recall, proposal_list = image_eval(pred_info, gt_box, iou_th)
+    _img_pr_info, fp = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
+    pr_curve += _img_pr_info
+    count_face += gt_box.shape[0]
+    
+pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
+propose = pr_curve[:, 0]
+recall = pr_curve[:, 1]
+for srecall in np.arange(0.1, 1.0001, 0.1):
+    rindex = len(np.where(recall<=srecall)[0])-1
+    rthresh = 1.0 - float(rindex)/thresh_num
+    print('Recall-Precision-Thresh:', recall[rindex], propose[rindex], rthresh)
+ap = voc_ap(recall, propose)
+print('ap: %.5f, iou_th: %.2f'%(ap, iou_th))
+```
+Result:
+```
+Recall-Precision-Thresh: 0.09981466772570824 0.9973544973544973 0.979
+Recall-Precision-Thresh: 0.19962933545141648 0.989501312335958 0.855
+Recall-Precision-Thresh: 0.2994440031771247 0.9576629974597799 0.486
+Recall-Precision-Thresh: 0.3995234312946783 0.7038246268656716 0.11099999999999999
+Recall-Precision-Thresh: 0.4980142970611596 0.3608286974870516 0.029000000000000026
+Recall-Precision-Thresh: 0.5837966640190627 0.17127543886903837 0.0010000000000000009
+Recall-Precision-Thresh: 0.5837966640190627 0.17127543886903837 0.0010000000000000009
+Recall-Precision-Thresh: 0.5837966640190627 0.17127543886903837 0.0010000000000000009
+Recall-Precision-Thresh: 0.5837966640190627 0.17127543886903837 0.0010000000000000009
+Recall-Precision-Thresh: 0.5837966640190627 0.17127543886903837 0.0010000000000000009
+ap: 0.45492, iou_th: 0.50
+```
+
+## 模型精度
+![RetinaFace性能](demo/result.png)
+
+
+## 引用
+如果你觉得这个该模型对有所帮助，请考虑引用下面的相关的论文：
+
+```BibTeX
+@inproceedings{deng2020retinaface,
+      title={Retinaface: Single-shot multi-level face localisation in the wild},
+        author={Deng, Jiankang and Guo, Jia and Ververas, Evangelos and Kotsia, Irene and Zafeiriou, Stefanos},
+          booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
+            pages={5203--5212},
+              year={2020}
+}
+```
+
diff --git a/face_project/face_detection/RetinaFace/demo/RetinaFace.jpg b/face_project/face_detection/RetinaFace/demo/RetinaFace.jpg
new file mode 100644
index 0000000..cea3d10
Binary files /dev/null and b/face_project/face_detection/RetinaFace/demo/RetinaFace.jpg differ
diff --git a/face_project/face_detection/RetinaFace/test_retina_face_detection.py b/face_project/face_detection/RetinaFace/test_retina_face_detection.py
new file mode 100644
index 0000000..5363f6e
--- /dev/null
+++ b/face_project/face_detection/RetinaFace/test_retina_face_detection.py
@@ -0,0 +1,73 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+import os
+import numpy as np
+
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_no_lm_result
+from modelscope.utils.test_utils import test_level
+
+from modelscope.msdatasets import MsDataset
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+from modelscope.utils.constant import DownloadMode
+from modelscope.utils.cv.image_utils import voc_ap, image_eval,img_pr_info, gen_gt_info, dataset_pr_info, bbox_overlap
+
+class RetinaFaceDetectionTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'damo/cv_resnet50_face-detection_retinaface'
+        self.img_path = 'data/test/images/mog_face_detection.jpg'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_no_lm_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_detection, model=self.model_id)
+
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_run_with_dataset(self):
+        val_set = MsDataset.load('widerface_mini_train_val', namespace='ly261666', split='validation')#, download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        img_base_path = next(iter(val_set))[1]
+        img_dir = osp.join(img_base_path, 'val_data')
+        img_gt = osp.join(img_base_path, 'val_label.txt')
+        gt_info = gen_gt_info(img_gt)
+        pred_info = {}
+        iou_th = 0.5
+        thresh_num = 1000
+        face_detection_func = pipeline(Tasks.face_detection, model=self.model_id, conf_th=0.7)
+        count_face = 0
+        pr_curve = np.zeros((thresh_num, 2)).astype('float')
+        for idx, img_name in enumerate(os.listdir(img_dir)):
+            print ('inference img: {} {}/{}'.format(img_name, idx+1, len(os.listdir(img_dir))))
+            abs_img_name = osp.join(img_dir, img_name)
+            result = face_detection_func(abs_img_name)
+            pred_info = np.concatenate([result['boxes'], np.array(result['scores'])[:,np.newaxis]], axis=1)
+            gt_box = np.array(gt_info[img_name])
+            pred_recall, proposal_list = image_eval(pred_info, gt_box, iou_th)
+            _img_pr_info, fp = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
+            pr_curve += _img_pr_info
+            count_face += gt_box.shape[0]
+			
+        pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
+        propose = pr_curve[:, 0]
+        recall = pr_curve[:, 1]
+        for srecall in np.arange(0.1, 1.0001, 0.1):
+            rindex = len(np.where(recall<=srecall)[0])-1
+            rthresh = 1.0 - float(rindex)/thresh_num
+            print('Recall-Precision-Thresh:', recall[rindex], propose[rindex], rthresh)
+        ap = voc_ap(recall, propose)
+        print('ap: %.5f, iou_th: %.2f'%(ap, iou_th))
+        self.show_result(abs_img_name, result)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/modelscope/__init__.py b/modelscope/__init__.py
new file mode 100644
index 0000000..81fdf50
--- /dev/null
+++ b/modelscope/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .version import __release_datetime__, __version__
+
+__all__ = ['__version__', '__release_datetime__']
diff --git a/modelscope/fileio/__init__.py b/modelscope/fileio/__init__.py
new file mode 100644
index 0000000..385cd02
--- /dev/null
+++ b/modelscope/fileio/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from .file import File, LocalStorage
+from .io import dump, dumps, load
diff --git a/modelscope/fileio/file.py b/modelscope/fileio/file.py
new file mode 100644
index 0000000..c6e2ee3
--- /dev/null
+++ b/modelscope/fileio/file.py
@@ -0,0 +1,324 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import contextlib
+import os
+import tempfile
+from abc import ABCMeta, abstractmethod
+from pathlib import Path
+from typing import Generator, Union
+
+import requests
+
+
+class Storage(metaclass=ABCMeta):
+    """Abstract class of storage.
+
+    All backends need to implement two apis: ``read()`` and ``read_text()``.
+    ``read()`` reads the file as a byte stream and ``read_text()`` reads
+    the file as texts.
+    """
+    @abstractmethod
+    def read(self, filepath: str):
+        pass
+
+    @abstractmethod
+    def read_text(self, filepath: str):
+        pass
+
+    @abstractmethod
+    def write(self, obj: bytes, filepath: Union[str, Path]) -> None:
+        pass
+
+    @abstractmethod
+    def write_text(self,
+                   obj: str,
+                   filepath: Union[str, Path],
+                   encoding: str = 'utf-8') -> None:
+        pass
+
+
+class LocalStorage(Storage):
+    """Local hard disk storage"""
+    def read(self, filepath: Union[str, Path]) -> bytes:
+        """Read data from a given ``filepath`` with 'rb' mode.
+
+        Args:
+            filepath (str or Path): Path to read data.
+
+        Returns:
+            bytes: Expected bytes object.
+        """
+        with open(filepath, 'rb') as f:
+            content = f.read()
+        return content
+
+    def read_text(self,
+                  filepath: Union[str, Path],
+                  encoding: str = 'utf-8') -> str:
+        """Read data from a given ``filepath`` with 'r' mode.
+
+        Args:
+            filepath (str or Path): Path to read data.
+            encoding (str): The encoding format used to open the ``filepath``.
+                Default: 'utf-8'.
+
+        Returns:
+            str: Expected text reading from ``filepath``.
+        """
+        with open(filepath, 'r', encoding=encoding) as f:
+            value_buf = f.read()
+        return value_buf
+
+    def write(self, obj: bytes, filepath: Union[str, Path]) -> None:
+        """Write data to a given ``filepath`` with 'wb' mode.
+
+        Note:
+            ``write`` will create a directory if the directory of ``filepath``
+            does not exist.
+
+        Args:
+            obj (bytes): Data to be written.
+            filepath (str or Path): Path to write data.
+        """
+        dirname = os.path.dirname(filepath)
+        if dirname and not os.path.exists(dirname):
+            os.makedirs(dirname, exist_ok=True)
+
+        with open(filepath, 'wb') as f:
+            f.write(obj)
+
+    def write_text(self,
+                   obj: str,
+                   filepath: Union[str, Path],
+                   encoding: str = 'utf-8') -> None:
+        """Write data to a given ``filepath`` with 'w' mode.
+
+        Note:
+            ``write_text`` will create a directory if the directory of
+            ``filepath`` does not exist.
+
+        Args:
+            obj (str): Data to be written.
+            filepath (str or Path): Path to write data.
+            encoding (str): The encoding format used to open the ``filepath``.
+                Default: 'utf-8'.
+        """
+        dirname = os.path.dirname(filepath)
+        if dirname and not os.path.exists(dirname):
+            os.makedirs(dirname, exist_ok=True)
+
+        with open(filepath, 'w', encoding=encoding) as f:
+            f.write(obj)
+
+    @contextlib.contextmanager
+    def as_local_path(
+            self,
+            filepath: Union[str,
+                            Path]) -> Generator[Union[str, Path], None, None]:
+        """Only for unified API and do nothing."""
+        yield filepath
+
+
+class HTTPStorage(Storage):
+    """HTTP and HTTPS storage."""
+    def read(self, url):
+        # TODO @wenmeng.zwm add progress bar if file is too large
+        r = requests.get(url)
+        r.raise_for_status()
+        return r.content
+
+    def read_text(self, url):
+        r = requests.get(url)
+        r.raise_for_status()
+        return r.text
+
+    @contextlib.contextmanager
+    def as_local_path(
+            self, filepath: str) -> Generator[Union[str, Path], None, None]:
+        """Download a file from ``filepath``.
+
+        ``as_local_path`` is decorated by :meth:`contextlib.contextmanager`. It
+        can be called with ``with`` statement, and when exists from the
+        ``with`` statement, the temporary path will be released.
+
+        Args:
+            filepath (str): Download a file from ``filepath``.
+
+        Examples:
+            >>> storage = HTTPStorage()
+            >>> # After existing from the ``with`` clause,
+            >>> # the path will be removed
+            >>> with storage.get_local_path('http://path/to/file') as path:
+            ...     # do something here
+        """
+        try:
+            f = tempfile.NamedTemporaryFile(delete=False)
+            f.write(self.read(filepath))
+            f.close()
+            yield f.name
+        finally:
+            os.remove(f.name)
+
+    def write(self, obj: bytes, url: Union[str, Path]) -> None:
+        raise NotImplementedError('write is not supported by HTTP Storage')
+
+    def write_text(self,
+                   obj: str,
+                   url: Union[str, Path],
+                   encoding: str = 'utf-8') -> None:
+        raise NotImplementedError(
+            'write_text is not supported by HTTP Storage')
+
+
+class OSSStorage(Storage):
+    """OSS storage."""
+    def __init__(self, oss_config_file=None):
+        # read from config file or env var
+        raise NotImplementedError(
+            'OSSStorage.__init__ to be implemented in the future')
+
+    def read(self, filepath):
+        raise NotImplementedError(
+            'OSSStorage.read to be implemented in the future')
+
+    def read_text(self, filepath, encoding='utf-8'):
+        raise NotImplementedError(
+            'OSSStorage.read_text to be implemented in the future')
+
+    @contextlib.contextmanager
+    def as_local_path(
+            self, filepath: str) -> Generator[Union[str, Path], None, None]:
+        """Download a file from ``filepath``.
+
+        ``as_local_path`` is decorated by :meth:`contextlib.contextmanager`. It
+        can be called with ``with`` statement, and when exists from the
+        ``with`` statement, the temporary path will be released.
+
+        Args:
+            filepath (str): Download a file from ``filepath``.
+
+        Examples:
+            >>> storage = OSSStorage()
+            >>> # After existing from the ``with`` clause,
+            >>> # the path will be removed
+            >>> with storage.get_local_path('http://path/to/file') as path:
+            ...     # do something here
+        """
+        try:
+            f = tempfile.NamedTemporaryFile(delete=False)
+            f.write(self.read(filepath))
+            f.close()
+            yield f.name
+        finally:
+            os.remove(f.name)
+
+    def write(self, obj: bytes, filepath: Union[str, Path]) -> None:
+        raise NotImplementedError(
+            'OSSStorage.write to be implemented in the future')
+
+    def write_text(self,
+                   obj: str,
+                   filepath: Union[str, Path],
+                   encoding: str = 'utf-8') -> None:
+        raise NotImplementedError(
+            'OSSStorage.write_text to be implemented in the future')
+
+
+G_STORAGES = {}
+
+
+class File(object):
+    _prefix_to_storage: dict = {
+        'oss': OSSStorage,
+        'http': HTTPStorage,
+        'https': HTTPStorage,
+        'local': LocalStorage,
+    }
+
+    @staticmethod
+    def _get_storage(uri):
+        assert isinstance(uri,
+                          str), f'uri should be str type, but got {type(uri)}'
+
+        if '://' not in uri:
+            # local path
+            storage_type = 'local'
+        else:
+            prefix, _ = uri.split('://')
+            storage_type = prefix
+
+        assert storage_type in File._prefix_to_storage, \
+            f'Unsupported uri {uri}, valid prefixs: '\
+            f'{list(File._prefix_to_storage.keys())}'
+
+        if storage_type not in G_STORAGES:
+            G_STORAGES[storage_type] = File._prefix_to_storage[storage_type]()
+
+        return G_STORAGES[storage_type]
+
+    @staticmethod
+    def read(uri: str) -> bytes:
+        """Read data from a given ``filepath`` with 'rb' mode.
+
+        Args:
+            filepath (str or Path): Path to read data.
+
+        Returns:
+            bytes: Expected bytes object.
+        """
+        storage = File._get_storage(uri)
+        return storage.read(uri)
+
+    @staticmethod
+    def read_text(uri: Union[str, Path], encoding: str = 'utf-8') -> str:
+        """Read data from a given ``filepath`` with 'r' mode.
+
+        Args:
+            filepath (str or Path): Path to read data.
+            encoding (str): The encoding format used to open the ``filepath``.
+                Default: 'utf-8'.
+
+        Returns:
+            str: Expected text reading from ``filepath``.
+        """
+        storage = File._get_storage(uri)
+        return storage.read_text(uri)
+
+    @staticmethod
+    def write(obj: bytes, uri: Union[str, Path]) -> None:
+        """Write data to a given ``filepath`` with 'wb' mode.
+
+        Note:
+            ``write`` will create a directory if the directory of ``filepath``
+            does not exist.
+
+        Args:
+            obj (bytes): Data to be written.
+            filepath (str or Path): Path to write data.
+        """
+        storage = File._get_storage(uri)
+        return storage.write(obj, uri)
+
+    @staticmethod
+    def write_text(obj: str, uri: str, encoding: str = 'utf-8') -> None:
+        """Write data to a given ``filepath`` with 'w' mode.
+
+        Note:
+            ``write_text`` will create a directory if the directory of
+            ``filepath`` does not exist.
+
+        Args:
+            obj (str): Data to be written.
+            filepath (str or Path): Path to write data.
+            encoding (str): The encoding format used to open the ``filepath``.
+                Default: 'utf-8'.
+        """
+        storage = File._get_storage(uri)
+        return storage.write_text(obj, uri)
+
+    @contextlib.contextmanager
+    def as_local_path(uri: str) -> Generator[Union[str, Path], None, None]:
+        """Only for unified API and do nothing."""
+        storage = File._get_storage(uri)
+        with storage.as_local_path(uri) as local_path:
+            yield local_path
diff --git a/modelscope/fileio/format/__init__.py b/modelscope/fileio/format/__init__.py
new file mode 100644
index 0000000..6851826
--- /dev/null
+++ b/modelscope/fileio/format/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from .base import FormatHandler
+from .json import JsonHandler
+from .yaml import YamlHandler
diff --git a/modelscope/fileio/format/base.py b/modelscope/fileio/format/base.py
new file mode 100644
index 0000000..6303c3b
--- /dev/null
+++ b/modelscope/fileio/format/base.py
@@ -0,0 +1,20 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from abc import ABCMeta, abstractmethod
+
+
+class FormatHandler(metaclass=ABCMeta):
+    # if `text_format` is True, file
+    # should use text mode otherwise binary mode
+    text_mode = True
+
+    @abstractmethod
+    def load(self, file, **kwargs):
+        pass
+
+    @abstractmethod
+    def dump(self, obj, file, **kwargs):
+        pass
+
+    @abstractmethod
+    def dumps(self, obj, **kwargs):
+        pass
diff --git a/modelscope/fileio/format/json.py b/modelscope/fileio/format/json.py
new file mode 100644
index 0000000..811d9b2
--- /dev/null
+++ b/modelscope/fileio/format/json.py
@@ -0,0 +1,35 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import numpy as np
+
+from .base import FormatHandler
+
+
+def set_default(obj):
+    """Set default json values for non-serializable values.
+
+    It helps convert ``set``, ``range`` and ``np.ndarray`` data types to list.
+    It also converts ``np.generic`` (including ``np.int32``, ``np.float32``,
+    etc.) into plain numbers of plain python built-in types.
+    """
+    if isinstance(obj, (set, range)):
+        return list(obj)
+    elif isinstance(obj, np.ndarray):
+        return obj.tolist()
+    elif isinstance(obj, np.generic):
+        return obj.item()
+    raise TypeError(f'{type(obj)} is unsupported for json dump')
+
+
+class JsonHandler(FormatHandler):
+    """Use jsonplus, serialization of Python types to JSON that "just works"."""
+    def load(self, file):
+        import jsonplus
+        return jsonplus.loads(file.read())
+
+    def dump(self, obj, file, **kwargs):
+        file.write(self.dumps(obj, **kwargs))
+
+    def dumps(self, obj, **kwargs):
+        import jsonplus
+        kwargs.setdefault('default', set_default)
+        return jsonplus.dumps(obj, **kwargs)
diff --git a/modelscope/fileio/format/yaml.py b/modelscope/fileio/format/yaml.py
new file mode 100644
index 0000000..6d2955d
--- /dev/null
+++ b/modelscope/fileio/format/yaml.py
@@ -0,0 +1,24 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import yaml
+
+try:
+    from yaml import CDumper as Dumper
+    from yaml import CLoader as Loader
+except ImportError:
+    from yaml import Loader, Dumper  # type: ignore
+
+from .base import FormatHandler  # isort:skip
+
+
+class YamlHandler(FormatHandler):
+    def load(self, file, **kwargs):
+        kwargs.setdefault('Loader', Loader)
+        return yaml.load(file, **kwargs)
+
+    def dump(self, obj, file, **kwargs):
+        kwargs.setdefault('Dumper', Dumper)
+        yaml.dump(obj, file, **kwargs)
+
+    def dumps(self, obj, **kwargs):
+        kwargs.setdefault('Dumper', Dumper)
+        return yaml.dump(obj, **kwargs)
diff --git a/modelscope/fileio/io.py b/modelscope/fileio/io.py
new file mode 100644
index 0000000..5dc4a83
--- /dev/null
+++ b/modelscope/fileio/io.py
@@ -0,0 +1,127 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+# Copyright (c) OpenMMLab. All rights reserved.
+from io import BytesIO, StringIO
+from pathlib import Path
+
+from .file import File
+from .format import JsonHandler, YamlHandler
+
+format_handlers = {
+    'json': JsonHandler(),
+    'yaml': YamlHandler(),
+    'yml': YamlHandler(),
+}
+
+
+def load(file, file_format=None, **kwargs):
+    """Load data from json/yaml/pickle files.
+
+    This method provides a unified api for loading data from serialized files.
+
+    Args:
+        file (str or :obj:`Path` or file-like object): Filename or a file-like
+            object.
+        file_format (str, optional): If not specified, the file format will be
+            inferred from the file extension, otherwise use the specified one.
+            Currently supported formats include "json", "yaml/yml".
+
+    Examples:
+        >>> load('/path/of/your/file')  # file is stored in disk
+        >>> load('https://path/of/your/file')  # file is stored on internet
+        >>> load('oss://path/of/your/file')  # file is stored in petrel
+
+    Returns:
+        The content from the file.
+    """
+    if isinstance(file, Path):
+        file = str(file)
+    if file_format is None and isinstance(file, str):
+        file_format = file.split('.')[-1]
+    if file_format not in format_handlers:
+        raise TypeError(f'Unsupported format: {file_format}')
+
+    handler = format_handlers[file_format]
+    if isinstance(file, str):
+        if handler.text_mode:
+            with StringIO(File.read_text(file)) as f:
+                obj = handler.load(f, **kwargs)
+        else:
+            with BytesIO(File.read(file)) as f:
+                obj = handler.load(f, **kwargs)
+    elif hasattr(file, 'read'):
+        obj = handler.load(file, **kwargs)
+    else:
+        raise TypeError('"file" must be a filepath str or a file-object')
+    return obj
+
+
+def dump(obj, file=None, file_format=None, **kwargs):
+    """Dump data to json/yaml strings or files.
+
+    This method provides a unified api for dumping data as strings or to files.
+
+    Args:
+        obj (any): The python object to be dumped.
+        file (str or :obj:`Path` or file-like object, optional): If not
+            specified, then the object is dumped to a str, otherwise to a file
+            specified by the filename or file-like object.
+        file_format (str, optional): Same as :func:`load`.
+
+    Examples:
+        >>> dump('hello world', '/path/of/your/file')  # disk
+        >>> dump('hello world', 'oss://path/of/your/file')  # oss
+
+    Returns:
+        bool: True for success, False otherwise.
+    """
+    if isinstance(file, Path):
+        file = str(file)
+    if file_format is None:
+        if isinstance(file, str):
+            file_format = file.split('.')[-1]
+        elif file is None:
+            raise ValueError(
+                'file_format must be specified since file is None')
+    if file_format not in format_handlers:
+        raise TypeError(f'Unsupported format: {file_format}')
+
+    handler = format_handlers[file_format]
+    if file is None:
+        return handler.dump_to_str(obj, **kwargs)
+    elif isinstance(file, str):
+        if handler.text_mode:
+            with StringIO() as f:
+                handler.dump(obj, f, **kwargs)
+                File.write_text(f.getvalue(), file)
+        else:
+            with BytesIO() as f:
+                handler.dump(obj, f, **kwargs)
+                File.write(f.getvalue(), file)
+    elif hasattr(file, 'write'):
+        handler.dump(obj, file, **kwargs)
+    else:
+        raise TypeError('"file" must be a filename str or a file-object')
+
+
+def dumps(obj, format, **kwargs):
+    """Dump data to json/yaml strings or files.
+
+    This method provides a unified api for dumping data as strings or to files.
+
+    Args:
+        obj (any): The python object to be dumped.
+        format (str, optional): Same as file_format :func:`load`.
+
+    Examples:
+        >>> dumps('hello world', 'json')  # json
+        >>> dumps('hello world', 'yaml')  # yaml
+
+    Returns:
+        bool: True for success, False otherwise.
+    """
+    if format not in format_handlers:
+        raise TypeError(f'Unsupported format: {format}')
+
+    handler = format_handlers[format]
+    return handler.dumps(obj, **kwargs)
diff --git a/modelscope/hub/__init__.py b/modelscope/hub/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py
new file mode 100644
index 0000000..beb6990
--- /dev/null
+++ b/modelscope/hub/api.py
@@ -0,0 +1,906 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+# yapf: disable
+
+import datetime
+import functools
+import os
+import pickle
+import platform
+import shutil
+import tempfile
+import uuid
+from collections import defaultdict
+from http import HTTPStatus
+from http.cookiejar import CookieJar
+from os.path import expanduser
+from typing import Dict, List, Optional, Tuple, Union
+
+from requests import Session
+from requests.adapters import HTTPAdapter, Retry
+
+from modelscope import __version__
+from modelscope.hub.constants import (API_HTTP_CLIENT_TIMEOUT,
+                                      API_RESPONSE_FIELD_DATA,
+                                      API_RESPONSE_FIELD_EMAIL,
+                                      API_RESPONSE_FIELD_GIT_ACCESS_TOKEN,
+                                      API_RESPONSE_FIELD_MESSAGE,
+                                      API_RESPONSE_FIELD_USERNAME,
+                                      DEFAULT_CREDENTIALS_PATH,
+                                      MODELSCOPE_CLOUD_ENVIRONMENT,
+                                      MODELSCOPE_CLOUD_USERNAME,
+                                      ONE_YEAR_SECONDS,
+                                      REQUESTS_API_HTTP_METHOD, Licenses,
+                                      ModelVisibility)
+from modelscope.hub.errors import (InvalidParameter, NotExistError,
+                                   NotLoginException, NoValidRevisionError,
+                                   RequestError, datahub_raise_on_error,
+                                   handle_http_post_error,
+                                   handle_http_response, is_ok,
+                                   raise_for_http_status, raise_on_error)
+from modelscope.hub.git import GitCommandWrapper
+from modelscope.hub.repository import Repository
+from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
+                                       DEFAULT_MODEL_REVISION,
+                                       DEFAULT_REPOSITORY_REVISION,
+                                       MASTER_MODEL_BRANCH, DatasetFormations,
+                                       DatasetMetaFormats,
+                                       DatasetVisibilityMap, DownloadChannel,
+                                       ModelFile)
+from modelscope.utils.logger import get_logger
+
+from .utils.utils import (get_endpoint, get_release_datetime,
+                          model_id_to_group_owner_name)
+
+logger = get_logger()
+
+
+class HubApi:
+    """Model hub api interface.
+    """
+    def __init__(self, endpoint: Optional[str] = None):
+        """The ModelScope HubApi。
+
+        Args:
+            endpoint (str, optional): The modelscope server http|https address. Defaults to None.
+        """
+        self.endpoint = endpoint if endpoint is not None else get_endpoint()
+        self.headers = {'user-agent': ModelScopeConfig.get_user_agent()}
+        self.session = Session()
+        retry = Retry(
+            total=2,
+            read=2,
+            connect=2,
+            backoff_factor=1,
+            status_forcelist=(500, 502, 503, 504),
+        )
+        adapter = HTTPAdapter(max_retries=retry)
+        self.session.mount('http://', adapter)
+        self.session.mount('https://', adapter)
+        # set http timeout
+        for method in REQUESTS_API_HTTP_METHOD:
+            setattr(
+                self.session, method,
+                functools.partial(
+                    getattr(self.session, method),
+                    timeout=API_HTTP_CLIENT_TIMEOUT))
+
+    def login(
+        self,
+        access_token: str,
+    ) -> tuple():
+        """Login with your SDK access token, which can be obtained from
+           https://www.modelscope.cn user center.
+
+        Args:
+            access_token (str): user access token on modelscope.
+
+        Returns:
+            cookies: to authenticate yourself to ModelScope open-api
+            git_token: token to access your git repository.
+
+        Note:
+            You only have to login once within 30 days.
+        """
+        path = f'{self.endpoint}/api/v1/login'
+        r = self.session.post(
+            path, json={'AccessToken': access_token}, headers=self.headers)
+        raise_for_http_status(r)
+        d = r.json()
+        raise_on_error(d)
+
+        token = d[API_RESPONSE_FIELD_DATA][API_RESPONSE_FIELD_GIT_ACCESS_TOKEN]
+        cookies = r.cookies
+
+        # save token and cookie
+        ModelScopeConfig.save_token(token)
+        ModelScopeConfig.save_cookies(cookies)
+        ModelScopeConfig.save_user_info(
+            d[API_RESPONSE_FIELD_DATA][API_RESPONSE_FIELD_USERNAME],
+            d[API_RESPONSE_FIELD_DATA][API_RESPONSE_FIELD_EMAIL])
+
+        return d[API_RESPONSE_FIELD_DATA][
+            API_RESPONSE_FIELD_GIT_ACCESS_TOKEN], cookies
+
+    def create_model(self,
+                     model_id: str,
+                     visibility: Optional[int] = ModelVisibility.PUBLIC,
+                     license: Optional[str] = Licenses.APACHE_V2,
+                     chinese_name: Optional[str] = None) -> str:
+        """Create model repo at ModelScopeHub.
+
+        Args:
+            model_id (str): The model id
+            visibility (int, optional): visibility of the model(1-private, 5-public), default 5.
+            license (str, optional): license of the model, default none.
+            chinese_name (str, optional): chinese name of the model.
+
+        Returns:
+            Name of the model created
+
+        Raises:
+            InvalidParameter: If model_id is invalid.
+            ValueError: If not login.
+
+        Note:
+            model_id = {owner}/{name}
+        """
+        if model_id is None:
+            raise InvalidParameter('model_id is required!')
+        cookies = ModelScopeConfig.get_cookies()
+        if cookies is None:
+            raise ValueError('Token does not exist, please login first.')
+
+        path = f'{self.endpoint}/api/v1/models'
+        owner_or_group, name = model_id_to_group_owner_name(model_id)
+        body = {
+            'Path': owner_or_group,
+            'Name': name,
+            'ChineseName': chinese_name,
+            'Visibility': visibility,  # server check
+            'License': license
+        }
+        r = self.session.post(
+            path, json=body, cookies=cookies, headers=self.headers)
+        handle_http_post_error(r, path, body)
+        raise_on_error(r.json())
+        model_repo_url = f'{get_endpoint()}/{model_id}'
+        return model_repo_url
+
+    def delete_model(self, model_id: str):
+        """Delete model_id from ModelScope.
+
+        Args:
+            model_id (str): The model id.
+
+        Raises:
+            ValueError: If not login.
+
+        Note:
+            model_id = {owner}/{name}
+        """
+        cookies = ModelScopeConfig.get_cookies()
+        if cookies is None:
+            raise ValueError('Token does not exist, please login first.')
+        path = f'{self.endpoint}/api/v1/models/{model_id}'
+
+        r = self.session.delete(path, cookies=cookies, headers=self.headers)
+        raise_for_http_status(r)
+        raise_on_error(r.json())
+
+    def get_model_url(self, model_id: str):
+        return f'{self.endpoint}/api/v1/models/{model_id}.git'
+
+    def get_model(
+        self,
+        model_id: str,
+        revision: Optional[str] = DEFAULT_MODEL_REVISION,
+    ) -> str:
+        """Get model information at ModelScope
+
+        Args:
+            model_id (str): The model id.
+            revision (str optional): revision of model.
+
+        Returns:
+            The model detail information.
+
+        Raises:
+            NotExistError: If the model is not exist, will throw NotExistError
+
+        Note:
+            model_id = {owner}/{name}
+        """
+        cookies = ModelScopeConfig.get_cookies()
+        owner_or_group, name = model_id_to_group_owner_name(model_id)
+        if revision:
+            path = f'{self.endpoint}/api/v1/models/{owner_or_group}/{name}?Revision={revision}'
+        else:
+            path = f'{self.endpoint}/api/v1/models/{owner_or_group}/{name}'
+
+        r = self.session.get(path, cookies=cookies, headers=self.headers)
+        handle_http_response(r, logger, cookies, model_id)
+        if r.status_code == HTTPStatus.OK:
+            if is_ok(r.json()):
+                return r.json()[API_RESPONSE_FIELD_DATA]
+            else:
+                raise NotExistError(r.json()[API_RESPONSE_FIELD_MESSAGE])
+        else:
+            raise_for_http_status(r)
+
+    def push_model(self,
+                   model_id: str,
+                   model_dir: str,
+                   visibility: Optional[int] = ModelVisibility.PUBLIC,
+                   license: Optional[str] = Licenses.APACHE_V2,
+                   chinese_name: Optional[str] = None,
+                   commit_message: Optional[str] = 'upload model',
+                   revision: Optional[str] = DEFAULT_REPOSITORY_REVISION):
+        """Upload model from a given directory to given repository. A valid model directory
+        must contain a configuration.json file.
+
+        This function upload the files in given directory to given repository. If the
+        given repository is not exists in remote, it will automatically create it with
+        given visibility, license and chinese_name parameters. If the revision is also
+        not exists in remote repository, it will create a new branch for it.
+
+        This function must be called before calling HubApi's login with a valid token
+        which can be obtained from ModelScope's website.
+
+        Args:
+            model_id (str):
+                The model id to be uploaded, caller must have write permission for it.
+            model_dir(str):
+                The Absolute Path of the finetune result.
+            visibility(int, optional):
+                Visibility of the new created model(1-private, 5-public). If the model is
+                not exists in ModelScope, this function will create a new model with this
+                visibility and this parameter is required. You can ignore this parameter
+                if you make sure the model's existence.
+            license(`str`, defaults to `None`):
+                License of the new created model(see License). If the model is not exists
+                in ModelScope, this function will create a new model with this license
+                and this parameter is required. You can ignore this parameter if you
+                make sure the model's existence.
+            chinese_name(`str`, *optional*, defaults to `None`):
+                chinese name of the new created model.
+            commit_message(`str`, *optional*, defaults to `None`):
+                commit message of the push request.
+            revision (`str`, *optional*, default to DEFAULT_MODEL_REVISION):
+                which branch to push. If the branch is not exists, It will create a new
+                branch and push to it.
+
+        Raises:
+            InvalidParameter: Parameter invalid.
+            NotLoginException: Not login
+            ValueError: No configuration.json
+            Exception: Create failed.
+        """
+        if model_id is None:
+            raise InvalidParameter('model_id cannot be empty!')
+        if model_dir is None:
+            raise InvalidParameter('model_dir cannot be empty!')
+        if not os.path.exists(model_dir) or os.path.isfile(model_dir):
+            raise InvalidParameter('model_dir must be a valid directory.')
+        cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
+        if not os.path.exists(cfg_file):
+            raise ValueError(f'{model_dir} must contain a configuration.json.')
+        cookies = ModelScopeConfig.get_cookies()
+        if cookies is None:
+            raise NotLoginException('Must login before upload!')
+        files_to_save = os.listdir(model_dir)
+        try:
+            self.get_model(model_id=model_id)
+        except Exception:
+            if visibility is None or license is None:
+                raise InvalidParameter(
+                    'visibility and license cannot be empty if want to create new repo'
+                )
+            logger.info('Create new model %s' % model_id)
+            self.create_model(
+                model_id=model_id,
+                visibility=visibility,
+                license=license,
+                chinese_name=chinese_name)
+        tmp_dir = tempfile.mkdtemp()
+        git_wrapper = GitCommandWrapper()
+        try:
+            repo = Repository(model_dir=tmp_dir, clone_from=model_id)
+            branches = git_wrapper.get_remote_branches(tmp_dir)
+            if revision not in branches:
+                logger.info('Create new branch %s' % revision)
+                git_wrapper.new_branch(tmp_dir, revision)
+            git_wrapper.checkout(tmp_dir, revision)
+            files_in_repo = os.listdir(tmp_dir)
+            for f in files_in_repo:
+                if f[0] != '.':
+                    src = os.path.join(tmp_dir, f)
+                    if os.path.isfile(src):
+                        os.remove(src)
+                    else:
+                        shutil.rmtree(src, ignore_errors=True)
+            for f in files_to_save:
+                if f[0] != '.':
+                    src = os.path.join(model_dir, f)
+                    if os.path.isdir(src):
+                        shutil.copytree(src, os.path.join(tmp_dir, f))
+                    else:
+                        shutil.copy(src, tmp_dir)
+            if not commit_message:
+                date = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
+                commit_message = '[automsg] push model %s to hub at %s' % (
+                    model_id, date)
+            repo.push(
+                commit_message=commit_message,
+                local_branch=revision,
+                remote_branch=revision)
+        except Exception:
+            raise
+        finally:
+            shutil.rmtree(tmp_dir, ignore_errors=True)
+
+    def list_models(self,
+                    owner_or_group: str,
+                    page_number: Optional[int] = 1,
+                    page_size: Optional[int] = 10) -> dict:
+        """List models in owner or group.
+
+        Args:
+            owner_or_group(str): owner or group.
+            page_number(int, optional): The page number, default: 1
+            page_size(int, optional): The page size, default: 10
+
+        Raises:
+            RequestError: The request error.
+
+        Returns:
+            dict: {"models": "list of models", "TotalCount": total_number_of_models_in_owner_or_group}
+        """
+        cookies = ModelScopeConfig.get_cookies()
+        path = f'{self.endpoint}/api/v1/models/'
+        r = self.session.put(
+            path,
+            data='{"Path":"%s", "PageNumber":%s, "PageSize": %s}' %
+            (owner_or_group, page_number, page_size),
+            cookies=cookies,
+            headers=self.headers)
+        handle_http_response(r, logger, cookies, 'list_model')
+        if r.status_code == HTTPStatus.OK:
+            if is_ok(r.json()):
+                data = r.json()[API_RESPONSE_FIELD_DATA]
+                return data
+            else:
+                raise RequestError(r.json()[API_RESPONSE_FIELD_MESSAGE])
+        else:
+            raise_for_http_status(r)
+        return None
+
+    def _check_cookie(self,
+                      use_cookies: Union[bool,
+                                         CookieJar] = False) -> CookieJar:
+        cookies = None
+        if isinstance(use_cookies, CookieJar):
+            cookies = use_cookies
+        elif use_cookies:
+            cookies = ModelScopeConfig.get_cookies()
+            if cookies is None:
+                raise ValueError('Token does not exist, please login first.')
+        return cookies
+
+    def list_model_revisions(
+            self,
+            model_id: str,
+            cutoff_timestamp: Optional[int] = None,
+            use_cookies: Union[bool, CookieJar] = False) -> List[str]:
+        """Get model branch and tags.
+
+        Args:
+            model_id (str): The model id
+            cutoff_timestamp (int): Tags created before the cutoff will be included.
+                                    The timestamp is represented by the seconds elapsed from the epoch time.
+            use_cookies (Union[bool, CookieJar], optional): If is cookieJar, we will use this cookie, if True,
+                        will load cookie from local. Defaults to False.
+
+        Returns:
+            Tuple[List[str], List[str]]: Return list of branch name and tags
+        """
+        cookies = self._check_cookie(use_cookies)
+        if cutoff_timestamp is None:
+            cutoff_timestamp = get_release_datetime()
+        path = f'{self.endpoint}/api/v1/models/{model_id}/revisions?EndTime=%s' % cutoff_timestamp
+        r = self.session.get(path, cookies=cookies, headers=self.headers)
+        handle_http_response(r, logger, cookies, model_id)
+        d = r.json()
+        raise_on_error(d)
+        info = d[API_RESPONSE_FIELD_DATA]
+        # tags returned from backend are guaranteed to be ordered by create-time
+        tags = [x['Revision'] for x in info['RevisionMap']['Tags']
+                ] if info['RevisionMap']['Tags'] else []
+        return tags
+
+    def get_valid_revision(self,
+                           model_id: str,
+                           revision=None,
+                           cookies: Optional[CookieJar] = None):
+        release_timestamp = get_release_datetime()
+        current_timestamp = int(round(datetime.datetime.now().timestamp()))
+        # for active development in library codes (non-release-branches), release_timestamp
+        # is set to be a far-away-time-in-the-future, to ensure that we shall
+        # get the master-HEAD version from model repo by default (when no revision is provided)
+        if release_timestamp > current_timestamp + ONE_YEAR_SECONDS:
+            branches, tags = self.get_model_branches_and_tags(
+                model_id, use_cookies=False if cookies is None else cookies)
+            if revision is None:
+                revision = MASTER_MODEL_BRANCH
+                logger.info(
+                    'Model revision not specified, use default: %s in development mode'
+                    % revision)
+            if revision not in branches and revision not in tags:
+                raise NotExistError('The model: %s has no revision : %s .' % (model_id, revision))
+            logger.info('Development mode use revision: %s' % revision)
+        else:
+            if revision is None:  # user not specified revision, use latest revision before release time
+                revisions = self.list_model_revisions(
+                    model_id,
+                    cutoff_timestamp=release_timestamp,
+                    use_cookies=False if cookies is None else cookies)
+                if len(revisions) == 0:
+                    raise NoValidRevisionError(
+                        'The model: %s has no valid revision!' % model_id)
+                # tags (revisions) returned from backend are guaranteed to be ordered by create-time
+                # we shall obtain the latest revision created earlier than release version of this branch
+                revision = revisions[0]
+                logger.info(
+                    'Model revision not specified, use the latest revision: %s'
+                    % revision)
+            else:
+                # use user-specified revision
+                revisions = self.list_model_revisions(
+                    model_id,
+                    cutoff_timestamp=current_timestamp,
+                    use_cookies=False if cookies is None else cookies)
+                if revision not in revisions:
+                    raise NotExistError('The model: %s has no revision: %s !' %
+                                        (model_id, revision))
+                logger.info('Use user-specified model revision: %s' % revision)
+        return revision
+
+    def get_model_branches_and_tags(
+        self,
+        model_id: str,
+        use_cookies: Union[bool, CookieJar] = False,
+    ) -> Tuple[List[str], List[str]]:
+        """Get model branch and tags.
+
+        Args:
+            model_id (str): The model id
+            use_cookies (Union[bool, CookieJar], optional): If is cookieJar, we will use this cookie, if True,
+                        will load cookie from local. Defaults to False.
+
+        Returns:
+            Tuple[List[str], List[str]]: Return list of branch name and tags
+        """
+        cookies = self._check_cookie(use_cookies)
+
+        path = f'{self.endpoint}/api/v1/models/{model_id}/revisions'
+        r = self.session.get(path, cookies=cookies, headers=self.headers)
+        handle_http_response(r, logger, cookies, model_id)
+        d = r.json()
+        raise_on_error(d)
+        info = d[API_RESPONSE_FIELD_DATA]
+        branches = [x['Revision'] for x in info['RevisionMap']['Branches']
+                    ] if info['RevisionMap']['Branches'] else []
+        tags = [x['Revision'] for x in info['RevisionMap']['Tags']
+                ] if info['RevisionMap']['Tags'] else []
+        return branches, tags
+
+    def get_model_files(self,
+                        model_id: str,
+                        revision: Optional[str] = DEFAULT_MODEL_REVISION,
+                        root: Optional[str] = None,
+                        recursive: Optional[str] = False,
+                        use_cookies: Union[bool, CookieJar] = False,
+                        headers: Optional[dict] = {}) -> List[dict]:
+        """List the models files.
+
+        Args:
+            model_id (str): The model id
+            revision (Optional[str], optional): The branch or tag name.
+            root (Optional[str], optional): The root path. Defaults to None.
+            recursive (Optional[str], optional): Is recursive list files. Defaults to False.
+            use_cookies (Union[bool, CookieJar], optional): If is cookieJar, we will use this cookie, if True,
+                        will load cookie from local. Defaults to False.
+            headers: request headers
+
+        Returns:
+            List[dict]: Model file list.
+        """
+        if revision:
+            path = '%s/api/v1/models/%s/repo/files?Revision=%s&Recursive=%s' % (
+                self.endpoint, model_id, revision, recursive)
+        else:
+            path = '%s/api/v1/models/%s/repo/files?Recursive=%s' % (
+                self.endpoint, model_id, recursive)
+        cookies = self._check_cookie(use_cookies)
+        if root is not None:
+            path = path + f'&Root={root}'
+        headers = self.headers if headers is None else headers
+        r = self.session.get(
+            path, cookies=cookies, headers=headers)
+
+        handle_http_response(r, logger, cookies, model_id)
+        d = r.json()
+        raise_on_error(d)
+
+        files = []
+        for file in d[API_RESPONSE_FIELD_DATA]['Files']:
+            if file['Name'] == '.gitignore' or file['Name'] == '.gitattributes':
+                continue
+
+            files.append(file)
+        return files
+
+    def list_datasets(self):
+        path = f'{self.endpoint}/api/v1/datasets'
+        params = {}
+        r = self.session.get(path, params=params, headers=self.headers)
+        raise_for_http_status(r)
+        dataset_list = r.json()[API_RESPONSE_FIELD_DATA]
+        return [x['Name'] for x in dataset_list]
+
+    def get_dataset_id_and_type(self, dataset_name: str, namespace: str):
+        """ Get the dataset id and type. """
+        datahub_url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}'
+        cookies = ModelScopeConfig.get_cookies()
+        r = self.session.get(datahub_url, cookies=cookies)
+        resp = r.json()
+        datahub_raise_on_error(datahub_url, resp)
+        dataset_id = resp['Data']['Id']
+        dataset_type = resp['Data']['Type']
+        return dataset_id, dataset_type
+
+    def get_dataset_meta_file_list(self, dataset_name: str, namespace: str, dataset_id: str, revision: str):
+        """ Get the meta file-list of the dataset. """
+        datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/tree?Revision={revision}'
+        cookies = ModelScopeConfig.get_cookies()
+        r = self.session.get(datahub_url, cookies=cookies, headers=self.headers)
+        r = self.session.get(
+            datahub_url, cookies=cookies, headers=self.headers)
+        resp = r.json()
+        datahub_raise_on_error(datahub_url, resp)
+        file_list = resp['Data']
+        if file_list is None:
+            raise NotExistError(
+                f'The modelscope dataset [dataset_name = {dataset_name}, namespace = {namespace}, '
+                f'version = {revision}] dose not exist')
+
+        file_list = file_list['Files']
+        return file_list
+
+    def get_dataset_meta_files_local_paths(self, dataset_name: str,
+                                           namespace: str,
+                                           revision: str,
+                                           meta_cache_dir: str, dataset_type: int, file_list: list):
+        local_paths = defaultdict(list)
+        dataset_formation = DatasetFormations(dataset_type)
+        dataset_meta_format = DatasetMetaFormats[dataset_formation]
+        cookies = ModelScopeConfig.get_cookies()
+
+        # Dump the data_type as a local file
+        dataset_type_file_path = os.path.join(meta_cache_dir,
+                                              f'{str(dataset_type)}{DatasetFormations.formation_mark_ext.value}')
+        with open(dataset_type_file_path, 'w') as fp:
+            fp.write('*** Automatically-generated file, do not modify ***')
+
+        for file_info in file_list:
+            file_path = file_info['Path']
+            extension = os.path.splitext(file_path)[-1]
+            if extension in dataset_meta_format:
+                datahub_url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/repo?' \
+                              f'Revision={revision}&FilePath={file_path}'
+                r = self.session.get(datahub_url, cookies=cookies)
+                raise_for_http_status(r)
+                local_path = os.path.join(meta_cache_dir, file_path)
+                if os.path.exists(local_path):
+                    logger.warning(
+                        f"Reusing dataset {dataset_name}'s python file ({local_path})"
+                    )
+                    local_paths[extension].append(local_path)
+                    continue
+                with open(local_path, 'wb') as f:
+                    f.write(r.content)
+                local_paths[extension].append(local_path)
+
+        return local_paths, dataset_formation
+
+    def fetch_single_csv_script(self, script_url: str):
+        cookies = ModelScopeConfig.get_cookies()
+        resp = self.session.get(script_url, cookies=cookies, headers=self.headers)
+        if not resp or not resp.text:
+            raise 'The meta-csv file cannot be empty when the meta-args `big_data` is true.'
+        text_list = resp.text.strip().split('\n')
+        text_headers = text_list[0]
+        text_content = text_list[1:]
+
+        return text_headers, text_content
+
+    def get_dataset_file_url(
+            self,
+            file_name: str,
+            dataset_name: str,
+            namespace: str,
+            revision: Optional[str] = DEFAULT_DATASET_REVISION):
+        if file_name.endswith('.csv'):
+            file_name = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/repo?' \
+                        f'Revision={revision}&FilePath={file_name}'
+        return file_name
+
+    def get_dataset_access_config(
+            self,
+            dataset_name: str,
+            namespace: str,
+            revision: Optional[str] = DEFAULT_DATASET_REVISION):
+        datahub_url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/' \
+                      f'ststoken?Revision={revision}'
+        return self.datahub_remote_call(datahub_url)
+
+    def get_dataset_access_config_session(
+            self,
+            dataset_name: str,
+            namespace: str,
+            check_cookie: bool,
+            revision: Optional[str] = DEFAULT_DATASET_REVISION):
+
+        datahub_url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/' \
+                      f'ststoken?Revision={revision}'
+        if check_cookie:
+            cookies = self._check_cookie(use_cookies=True)
+        else:
+            cookies = ModelScopeConfig.get_cookies()
+        r = self.session.get(url=datahub_url, cookies=cookies, headers=self.headers)
+
+        r = self.session.get(
+            url=datahub_url, cookies=cookies, headers=self.headers)
+        resp = r.json()
+        raise_on_error(resp)
+        return resp['Data']
+
+    def get_dataset_access_config_for_unzipped(self,
+                                               dataset_name: str,
+                                               namespace: str,
+                                               revision: str,
+                                               zip_file_name: str):
+        datahub_url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}'
+        cookies = ModelScopeConfig.get_cookies()
+        r = self.session.get(url=datahub_url, cookies=cookies, headers=self.headers)
+        resp = r.json()
+        # get visibility of the dataset
+        raise_on_error(resp)
+        data = resp['Data']
+        visibility = DatasetVisibilityMap.get(data['Visibility'])
+
+        datahub_sts_url = f'{datahub_url}/ststoken?Revision={revision}'
+        r_sts = self.session.get(url=datahub_sts_url, cookies=cookies, headers=self.headers)
+        resp_sts = r_sts.json()
+        raise_on_error(resp_sts)
+        data_sts = resp_sts['Data']
+        file_dir = visibility + '-unzipped' + '/' + namespace + '_' + dataset_name + '_' + zip_file_name
+        data_sts['Dir'] = file_dir
+        return data_sts
+
+    def list_oss_dataset_objects(self, dataset_name, namespace, max_limit,
+                                 is_recursive, is_filter_dir, revision):
+        url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/oss/tree/?' \
+            f'MaxLimit={max_limit}&Revision={revision}&Recursive={is_recursive}&FilterDir={is_filter_dir}'
+
+        cookies = ModelScopeConfig.get_cookies()
+        resp = self.session.get(url=url, cookies=cookies, timeout=1800)
+        resp = resp.json()
+        raise_on_error(resp)
+        resp = resp['Data']
+        return resp
+
+    def delete_oss_dataset_object(self, object_name: str, dataset_name: str,
+                                  namespace: str, revision: str) -> str:
+        if not object_name or not dataset_name or not namespace or not revision:
+            raise ValueError('Args cannot be empty!')
+
+        url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/oss?Path={object_name}&Revision={revision}'
+
+        cookies = ModelScopeConfig.get_cookies()
+        resp = self.session.delete(url=url, cookies=cookies)
+        resp = resp.json()
+        raise_on_error(resp)
+        resp = resp['Message']
+        return resp
+
+    def delete_oss_dataset_dir(self, object_name: str, dataset_name: str,
+                               namespace: str, revision: str) -> str:
+        if not object_name or not dataset_name or not namespace or not revision:
+            raise ValueError('Args cannot be empty!')
+
+        url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/oss/prefix?Prefix={object_name}/' \
+            f'&Revision={revision}'
+
+        cookies = ModelScopeConfig.get_cookies()
+        resp = self.session.delete(url=url, cookies=cookies)
+        resp = resp.json()
+        raise_on_error(resp)
+        resp = resp['Message']
+        return resp
+
+    def datahub_remote_call(self, url):
+        cookies = ModelScopeConfig.get_cookies()
+        r = self.session.get(
+            url,
+            cookies=cookies,
+            headers={'user-agent': ModelScopeConfig.get_user_agent()})
+        resp = r.json()
+        datahub_raise_on_error(url, resp)
+        return resp['Data']
+
+    def dataset_download_statistics(self, dataset_name: str, namespace: str, use_streaming: bool) -> None:
+        is_ci_test = os.getenv('CI_TEST') == 'True'
+        if dataset_name and namespace and not is_ci_test and not use_streaming:
+            try:
+                cookies = ModelScopeConfig.get_cookies()
+
+                # Download count
+                download_count_url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/increase'
+                download_count_resp = self.session.post(download_count_url, cookies=cookies, headers=self.headers)
+                raise_for_http_status(download_count_resp)
+
+                # Download uv
+                channel = DownloadChannel.LOCAL.value
+                user_name = ''
+                if MODELSCOPE_CLOUD_ENVIRONMENT in os.environ:
+                    channel = os.environ[MODELSCOPE_CLOUD_ENVIRONMENT]
+                if MODELSCOPE_CLOUD_USERNAME in os.environ:
+                    user_name = os.environ[MODELSCOPE_CLOUD_USERNAME]
+                download_uv_url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/' \
+                                  f'{channel}?user={user_name}'
+                download_uv_resp = self.session.post(download_uv_url, cookies=cookies, headers=self.headers)
+                download_uv_resp = download_uv_resp.json()
+                raise_on_error(download_uv_resp)
+
+            except Exception as e:
+                logger.error(e)
+
+
+class ModelScopeConfig:
+    path_credential = expanduser(DEFAULT_CREDENTIALS_PATH)
+    COOKIES_FILE_NAME = 'cookies'
+    GIT_TOKEN_FILE_NAME = 'git_token'
+    USER_INFO_FILE_NAME = 'user'
+    USER_SESSION_ID_FILE_NAME = 'session'
+
+    @staticmethod
+    def make_sure_credential_path_exist():
+        os.makedirs(ModelScopeConfig.path_credential, exist_ok=True)
+
+    @staticmethod
+    def save_cookies(cookies: CookieJar):
+        ModelScopeConfig.make_sure_credential_path_exist()
+        with open(
+                os.path.join(ModelScopeConfig.path_credential,
+                             ModelScopeConfig.COOKIES_FILE_NAME), 'wb+') as f:
+            pickle.dump(cookies, f)
+
+    @staticmethod
+    def get_cookies():
+        cookies_path = os.path.join(ModelScopeConfig.path_credential,
+                                    ModelScopeConfig.COOKIES_FILE_NAME)
+        if os.path.exists(cookies_path):
+            with open(cookies_path, 'rb') as f:
+                cookies = pickle.load(f)
+                for cookie in cookies:
+                    if cookie.is_expired():
+                        logger.warning(
+                            'Authentication has expired, '
+                            'please re-login if you need to access private models or datasets.')
+                        return None
+                return cookies
+        return None
+
+    @staticmethod
+    def get_user_session_id():
+        session_path = os.path.join(ModelScopeConfig.path_credential,
+                                    ModelScopeConfig.USER_SESSION_ID_FILE_NAME)
+        session_id = ''
+        if os.path.exists(session_path):
+            with open(session_path, 'rb') as f:
+                session_id = str(f.readline().strip(), encoding='utf-8')
+                return session_id
+        if session_id == '' or len(session_id) != 32:
+            session_id = str(uuid.uuid4().hex)
+            ModelScopeConfig.make_sure_credential_path_exist()
+            with open(session_path, 'w+') as wf:
+                wf.write(session_id)
+
+        return session_id
+
+    @staticmethod
+    def save_token(token: str):
+        ModelScopeConfig.make_sure_credential_path_exist()
+        with open(
+                os.path.join(ModelScopeConfig.path_credential,
+                             ModelScopeConfig.GIT_TOKEN_FILE_NAME), 'w+') as f:
+            f.write(token)
+
+    @staticmethod
+    def save_user_info(user_name: str, user_email: str):
+        ModelScopeConfig.make_sure_credential_path_exist()
+        with open(
+                os.path.join(ModelScopeConfig.path_credential,
+                             ModelScopeConfig.USER_INFO_FILE_NAME), 'w+') as f:
+            f.write('%s:%s' % (user_name, user_email))
+
+    @staticmethod
+    def get_user_info() -> Tuple[str, str]:
+        try:
+            with open(
+                    os.path.join(ModelScopeConfig.path_credential,
+                                 ModelScopeConfig.USER_INFO_FILE_NAME),
+                    'r',
+                    encoding='utf-8') as f:
+                info = f.read()
+                return info.split(':')[0], info.split(':')[1]
+        except FileNotFoundError:
+            pass
+        return None, None
+
+    @staticmethod
+    def get_token() -> Optional[str]:
+        """
+        Get token or None if not existent.
+
+        Returns:
+            `str` or `None`: The token, `None` if it doesn't exist.
+
+        """
+        token = None
+        try:
+            with open(
+                    os.path.join(ModelScopeConfig.path_credential,
+                                 ModelScopeConfig.GIT_TOKEN_FILE_NAME),
+                    'r',
+                    encoding='utf-8') as f:
+                token = f.read()
+        except FileNotFoundError:
+            pass
+        return token
+
+    @staticmethod
+    def get_user_agent(user_agent: Union[Dict, str, None] = None, ) -> str:
+        """Formats a user-agent string with basic info about a request.
+
+        Args:
+            user_agent (`str`, `dict`, *optional*):
+                The user agent info in the form of a dictionary or a single string.
+
+        Returns:
+            The formatted user-agent string.
+        """
+
+        # include some more telemetrics when executing in dedicated
+        # cloud containers
+        env = 'custom'
+        if MODELSCOPE_CLOUD_ENVIRONMENT in os.environ:
+            env = os.environ[MODELSCOPE_CLOUD_ENVIRONMENT]
+        user_name = 'unknown'
+        if MODELSCOPE_CLOUD_USERNAME in os.environ:
+            user_name = os.environ[MODELSCOPE_CLOUD_USERNAME]
+
+        ua = 'modelscope/%s; python/%s; session_id/%s; platform/%s; processor/%s; env/%s; user/%s' % (
+            __version__,
+            platform.python_version(),
+            ModelScopeConfig.get_user_session_id(),
+            platform.platform(),
+            platform.processor(),
+            env,
+            user_name,
+        )
+        if isinstance(user_agent, dict):
+            ua += '; ' + '; '.join(f'{k}/{v}' for k, v in user_agent.items())
+        elif isinstance(user_agent, str):
+            ua += '; ' + user_agent
+        return ua
diff --git a/modelscope/hub/check_model.py b/modelscope/hub/check_model.py
new file mode 100644
index 0000000..ad9593d
--- /dev/null
+++ b/modelscope/hub/check_model.py
@@ -0,0 +1,93 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+from typing import Dict, Optional, Union
+from urllib.parse import urlparse
+
+from modelscope.hub.api import HubApi, ModelScopeConfig
+from modelscope.hub.constants import FILE_HASH
+from modelscope.hub.git import GitCommandWrapper
+from modelscope.hub.utils.caching import ModelFileSystemCache
+from modelscope.hub.utils.utils import compute_hash
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+def check_local_model_is_latest(
+    model_root_path: str,
+    user_agent: Optional[Union[Dict, str]] = None,
+):
+    """Check local model repo is latest.
+    Check local model repo is same as hub latest version.
+    """
+    model_cache = None
+    # download with git
+    if os.path.exists(os.path.join(model_root_path, '.git')):
+        git_cmd_wrapper = GitCommandWrapper()
+        git_url = git_cmd_wrapper.get_repo_remote_url(model_root_path)
+        if git_url.endswith('.git'):
+            git_url = git_url[:-4]
+        u_parse = urlparse(git_url)
+        model_id = u_parse.path[1:]
+    else:  # snapshot_download
+        model_cache = ModelFileSystemCache(model_root_path)
+        model_id = model_cache.get_model_id()
+
+    try:
+        # make headers
+        headers = {
+            'user-agent':
+            ModelScopeConfig.get_user_agent(user_agent=user_agent, )
+        }
+        cookies = ModelScopeConfig.get_cookies()
+
+        snapshot_header = headers if 'CI_TEST' in os.environ else {
+            **headers,
+            **{
+                'Snapshot': 'True'
+            }
+        }
+        _api = HubApi()
+        try:
+            _, revisions = _api.get_model_branches_and_tags(
+                model_id=model_id, use_cookies=cookies)
+            if len(revisions) > 0:
+                latest_revision = revisions[0]
+            else:
+                latest_revision = 'master'
+        except:  # noqa: E722
+            latest_revision = 'master'
+
+        model_files = _api.get_model_files(
+            model_id=model_id,
+            revision=latest_revision,
+            recursive=True,
+            headers=snapshot_header,
+            use_cookies=cookies,
+        )
+        for model_file in model_files:
+            if model_file['Type'] == 'tree':
+                continue
+            # check model_file updated
+            if model_cache is not None:
+                if model_cache.exists(model_file):
+                    continue
+                else:
+                    logger.info(
+                        'Model is updated from modelscope hub, you can verify from https://www.modelscope.cn.'
+                    )
+                    break
+            else:
+                if FILE_HASH in model_file:
+                    local_file_hash = compute_hash(
+                        os.path.join(model_root_path, model_file['Path']))
+                    if local_file_hash == model_file[FILE_HASH]:
+                        continue
+                    else:
+                        logger.info(
+                            'Model is updated from modelscope hub, you can verify from https://www.modelscope.cn.'
+                        )
+                        break
+    except:  # noqa: E722
+        pass  # ignore
diff --git a/modelscope/hub/constants.py b/modelscope/hub/constants.py
new file mode 100644
index 0000000..3cde867
--- /dev/null
+++ b/modelscope/hub/constants.py
@@ -0,0 +1,46 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from pathlib import Path
+
+MODELSCOPE_URL_SCHEME = 'http://'
+DEFAULT_MODELSCOPE_DOMAIN = 'www.modelscope.cn'
+DEFAULT_MODELSCOPE_DATA_ENDPOINT = MODELSCOPE_URL_SCHEME + DEFAULT_MODELSCOPE_DOMAIN
+
+DEFAULT_MODELSCOPE_GROUP = 'damo'
+MODEL_ID_SEPARATOR = '/'
+FILE_HASH = 'Sha256'
+LOGGER_NAME = 'ModelScopeHub'
+DEFAULT_CREDENTIALS_PATH = Path.home().joinpath('.modelscope', 'credentials')
+REQUESTS_API_HTTP_METHOD = ['get', 'head', 'post', 'put', 'patch', 'delete']
+API_HTTP_CLIENT_TIMEOUT = 60
+API_RESPONSE_FIELD_DATA = 'Data'
+API_FILE_DOWNLOAD_RETRY_TIMES = 5
+API_FILE_DOWNLOAD_TIMEOUT = 60 * 5
+API_FILE_DOWNLOAD_CHUNK_SIZE = 4096
+API_RESPONSE_FIELD_GIT_ACCESS_TOKEN = 'AccessToken'
+API_RESPONSE_FIELD_USERNAME = 'Username'
+API_RESPONSE_FIELD_EMAIL = 'Email'
+API_RESPONSE_FIELD_MESSAGE = 'Message'
+MODELSCOPE_CLOUD_ENVIRONMENT = 'MODELSCOPE_ENVIRONMENT'
+MODELSCOPE_CLOUD_USERNAME = 'MODELSCOPE_USERNAME'
+MODELSCOPE_SDK_DEBUG = 'MODELSCOPE_SDK_DEBUG'
+ONE_YEAR_SECONDS = 24 * 365 * 60 * 60
+MODEL_META_FILE_NAME = '.mdl'
+MODEL_META_MODEL_ID = 'id'
+
+
+class Licenses(object):
+    APACHE_V2 = 'Apache License 2.0'
+    GPL_V2 = 'GPL-2.0'
+    GPL_V3 = 'GPL-3.0'
+    LGPL_V2_1 = 'LGPL-2.1'
+    LGPL_V3 = 'LGPL-3.0'
+    AFL_V3 = 'AFL-3.0'
+    ECL_V2 = 'ECL-2.0'
+    MIT = 'MIT'
+
+
+class ModelVisibility(object):
+    PRIVATE = 1
+    INTERNAL = 3
+    PUBLIC = 5
diff --git a/modelscope/hub/deploy.py b/modelscope/hub/deploy.py
new file mode 100644
index 0000000..9780dbe
--- /dev/null
+++ b/modelscope/hub/deploy.py
@@ -0,0 +1,338 @@
+import json
+import urllib
+from abc import ABC
+from http import HTTPStatus
+from typing import Optional
+
+import requests
+from attrs import asdict, define, field, validators
+
+from modelscope.hub.api import ModelScopeConfig
+from modelscope.hub.constants import (API_RESPONSE_FIELD_DATA,
+                                      API_RESPONSE_FIELD_MESSAGE)
+from modelscope.hub.errors import (NotLoginException, NotSupportError,
+                                   RequestError, handle_http_response, is_ok,
+                                   raise_for_http_status)
+from modelscope.hub.utils.utils import get_endpoint
+from modelscope.utils.logger import get_logger
+
+# yapf: enable
+
+logger = get_logger()
+
+
+class Accelerator(object):
+    CPU = 'cpu'
+    GPU = 'gpu'
+
+
+class Vendor(object):
+    EAS = 'eas'
+
+
+class EASRegion(object):
+    beijing = 'cn-beijing'
+    hangzhou = 'cn-hangzhou'
+
+
+class EASCpuInstanceType(object):
+    """EAS Cpu Instance Type, ref(https://help.aliyun.com/document_detail/144261.html)
+    """
+    tiny = 'ecs.c6.2xlarge'
+    small = 'ecs.c6.4xlarge'
+    medium = 'ecs.c6.6xlarge'
+    large = 'ecs.c6.8xlarge'
+
+
+class EASGpuInstanceType(object):
+    """EAS Gpu Instance Type, ref(https://help.aliyun.com/document_detail/144261.html)
+    """
+    tiny = 'ecs.gn5-c28g1.7xlarge'
+    small = 'ecs.gn5-c8g1.4xlarge'
+    medium = 'ecs.gn6i-c24g1.12xlarge'
+    large = 'ecs.gn6e-c12g1.3xlarge'
+
+
+def min_smaller_than_max(instance, attribute, value):
+    if value > instance.max_replica:
+        raise ValueError(
+            "'min_replica' value: %s has to be smaller than 'max_replica' value: %s!"
+            % (value, instance.max_replica))
+
+
+@define
+class ServiceScalingConfig(object):
+    """Resource scaling config
+       Currently we ignore max_replica
+    Args:
+        max_replica: maximum replica
+        min_replica: minimum replica
+    """
+    max_replica: int = field(default=1, validator=validators.ge(1))
+    min_replica: int = field(
+        default=1, validator=[validators.ge(1), min_smaller_than_max])
+
+
+@define
+class ServiceResourceConfig(object):
+    """Eas Resource request.
+
+    Args:
+        accelerator: the accelerator(cpu|gpu)
+        instance_type: the instance type.
+        scaling: The instance scaling config.
+    """
+    instance_type: str
+    scaling: ServiceScalingConfig
+    accelerator: str = field(default=Accelerator.CPU,
+                             validator=validators.in_(
+                                 [Accelerator.CPU, Accelerator.GPU]))
+
+
+@define
+class ServiceProviderParameters(ABC):
+    pass
+
+
+@define
+class EASDeployParameters(ServiceProviderParameters):
+    """Parameters for EAS Deployment.
+
+    Args:
+        resource_group: the resource group to deploy, current default.
+        region: The eas instance region(eg: cn-hangzhou).
+        access_key_id: The eas account access key id.
+        access_key_secret: The eas account access key secret.
+        vendor: must be 'eas'
+    """
+    region: str
+    access_key_id: str
+    access_key_secret: str
+    resource_group: Optional[str] = None
+    vendor: str = field(default=Vendor.EAS,
+                        validator=validators.in_([Vendor.EAS]))
+
+
+@define
+class EASListParameters(ServiceProviderParameters):
+    """EAS instance list parameters.
+
+    Args:
+        resource_group: the resource group to deploy, current default.
+        region: The eas instance region(eg: cn-hangzhou).
+        access_key_id: The eas account access key id.
+        access_key_secret: The eas account access key secret.
+        vendor: must be 'eas'
+    """
+    access_key_id: str
+    access_key_secret: str
+    region: str = None
+    resource_group: str = None
+    vendor: str = field(default=Vendor.EAS,
+                        validator=validators.in_([Vendor.EAS]))
+
+
+@define
+class DeployServiceParameters(object):
+    """Deploy service parameters
+
+    Args:
+        instance_name: the name of the service.
+        model_id: the modelscope model_id
+        revision: the modelscope model revision
+        resource: the resource requirement.
+        provider: the cloud service provider.
+    """
+    instance_name: str
+    model_id: str
+    revision: str
+    resource: ServiceResourceConfig
+    provider: ServiceProviderParameters
+
+
+class AttrsToQueryString(ABC):
+    """Convert the attrs class to json string.
+
+    Args:
+    """
+    def to_query_str(self):
+        self_dict = asdict(self.provider,
+                           filter=lambda attr, value: value is not None)
+        json_str = json.dumps(self_dict)
+        print(json_str)
+        safe_str = urllib.parse.quote_plus(json_str)
+        print(safe_str)
+        query_param = 'provider=%s' % safe_str
+        return query_param
+
+
+@define
+class ListServiceParameters(AttrsToQueryString):
+    provider: ServiceProviderParameters
+    skip: int = 0
+    limit: int = 100
+
+
+@define
+class GetServiceParameters(AttrsToQueryString):
+    provider: ServiceProviderParameters
+
+
+@define
+class DeleteServiceParameters(AttrsToQueryString):
+    provider: ServiceProviderParameters
+
+
+class ServiceDeployer(object):
+    """Facilitate model deployment on to supported service provider(s).
+    """
+    def __init__(self, endpoint=None):
+        self.endpoint = endpoint if endpoint is not None else get_endpoint()
+        self.headers = {'user-agent': ModelScopeConfig.get_user_agent()}
+        self.cookies = ModelScopeConfig.get_cookies()
+        if self.cookies is None:
+            raise NotLoginException(
+                'Token does not exist, please login with HubApi first.')
+
+    # deploy_model
+    def create(self, model_id: str, revision: str, instance_name: str,
+               resource: ServiceResourceConfig,
+               provider: ServiceProviderParameters):
+        """Deploy model to cloud, current we only support PAI EAS, this is an async API ,
+        and the deployment could take a while to finish remotely. Please check deploy instance
+        status separately via checking the status.
+
+        Args:
+            model_id (str): The deployed model id
+            revision (str): The model revision
+            instance_name (str): The deployed model instance name.
+            resource (ServiceResourceConfig): The service resource information.
+            provider (ServiceProviderParameters): The service provider parameter
+
+        Raises:
+            NotSupportError: Not supported platform.
+            RequestError: The server return error.
+
+        Returns:
+            ServiceInstanceInfo: The information of the deployed service instance.
+        """
+        if provider.vendor != Vendor.EAS:
+            raise NotSupportError(
+                'Not support vendor: %s ,only support EAS current.' %
+                (provider.vendor))
+        create_params = DeployServiceParameters(instance_name=instance_name,
+                                                model_id=model_id,
+                                                revision=revision,
+                                                resource=resource,
+                                                provider=provider)
+        path = f'{self.endpoint}/api/v1/deployer/endpoint'
+        body = asdict(create_params)
+        r = requests.post(path,
+                          json=body,
+                          cookies=self.cookies,
+                          headers=self.headers)
+        handle_http_response(r, logger, self.cookies, 'create_service')
+        if r.status_code >= HTTPStatus.OK and r.status_code < HTTPStatus.MULTIPLE_CHOICES:
+            if is_ok(r.json()):
+                data = r.json()[API_RESPONSE_FIELD_DATA]
+                return data
+            else:
+                raise RequestError(r.json()[API_RESPONSE_FIELD_MESSAGE])
+        else:
+            raise_for_http_status(r)
+        return None
+
+    def get(self, instance_name: str, provider: ServiceProviderParameters):
+        """Query the specified instance information.
+
+        Args:
+            instance_name (str): The deployed instance name.
+            provider (ServiceProviderParameters): The cloud provider information, for eas
+                need region(eg: ch-hangzhou), access_key_id and access_key_secret.
+
+        Raises:
+            RequestError: The request is failed from server.
+
+        Returns:
+            Dict: The information of the requested service instance.
+        """
+        params = GetServiceParameters(provider=provider)
+        path = '%s/api/v1/deployer/endpoint/%s?%s' % (
+            self.endpoint, instance_name, params.to_query_str())
+        r = requests.get(path, cookies=self.cookies, headers=self.headers)
+        handle_http_response(r, logger, self.cookies, 'get_service')
+        if r.status_code == HTTPStatus.OK:
+            if is_ok(r.json()):
+                data = r.json()[API_RESPONSE_FIELD_DATA]
+                return data
+            else:
+                raise RequestError(r.json()[API_RESPONSE_FIELD_MESSAGE])
+        else:
+            raise_for_http_status(r)
+        return None
+
+    def delete(self, instance_name: str, provider: ServiceProviderParameters):
+        """Delete deployed model, this api send delete command and return, it will take
+        some to delete, please check through the cloud console.
+
+        Args:
+            instance_name (str): The instance name you want to delete.
+            provider (ServiceProviderParameters): The cloud provider information, for eas
+                need region(eg: ch-hangzhou), access_key_id and access_key_secret.
+
+        Raises:
+            RequestError: The request is failed.
+
+        Returns:
+            Dict: The deleted instance information.
+        """
+        params = DeleteServiceParameters(provider=provider)
+        path = '%s/api/v1/deployer/endpoint/%s?%s' % (
+            self.endpoint, instance_name, params.to_query_str())
+        r = requests.delete(path, cookies=self.cookies, headers=self.headers)
+        handle_http_response(r, logger, self.cookies, 'delete_service')
+        if r.status_code == HTTPStatus.OK:
+            if is_ok(r.json()):
+                data = r.json()[API_RESPONSE_FIELD_DATA]
+                return data
+            else:
+                raise RequestError(r.json()[API_RESPONSE_FIELD_MESSAGE])
+        else:
+            raise_for_http_status(r)
+        return None
+
+    def list(self,
+             provider: ServiceProviderParameters,
+             skip: Optional[int] = 0,
+             limit: Optional[int] = 100):
+        """List deployed model instances.
+
+        Args:
+            provider (ServiceProviderParameters): The cloud service provider parameter,
+                for eas, need access_key_id and access_key_secret.
+            skip (int, optional): start of the list, current not support.
+            limit (int, optional): maximum number of instances return, current not support
+
+        Raises:
+            RequestError: The request is failed from server.
+
+        Returns:
+            List: List of instance information
+        """
+
+        params = ListServiceParameters(provider=provider,
+                                       skip=skip,
+                                       limit=limit)
+        path = '%s/api/v1/deployer/endpoint?%s' % (self.endpoint,
+                                                   params.to_query_str())
+        r = requests.get(path, cookies=self.cookies, headers=self.headers)
+        handle_http_response(r, logger, self.cookies, 'list_service_instances')
+        if r.status_code == HTTPStatus.OK:
+            if is_ok(r.json()):
+                data = r.json()[API_RESPONSE_FIELD_DATA]
+                return data
+            else:
+                raise RequestError(r.json()[API_RESPONSE_FIELD_MESSAGE])
+        else:
+            raise_for_http_status(r)
+        return None
diff --git a/modelscope/hub/errors.py b/modelscope/hub/errors.py
new file mode 100644
index 0000000..be94d7f
--- /dev/null
+++ b/modelscope/hub/errors.py
@@ -0,0 +1,153 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from http import HTTPStatus
+
+from requests.exceptions import HTTPError
+
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+class NotSupportError(Exception):
+    pass
+
+
+class NoValidRevisionError(Exception):
+    pass
+
+
+class NotExistError(Exception):
+    pass
+
+
+class RequestError(Exception):
+    pass
+
+
+class GitError(Exception):
+    pass
+
+
+class InvalidParameter(Exception):
+    pass
+
+
+class NotLoginException(Exception):
+    pass
+
+
+class FileIntegrityError(Exception):
+    pass
+
+
+class FileDownloadError(Exception):
+    pass
+
+
+def is_ok(rsp):
+    """ Check the request is ok
+
+    Args:
+        rsp (Response): The request response body
+
+    Returns:
+       bool: `True` if success otherwise `False`.
+    """
+    return rsp['Code'] == HTTPStatus.OK and rsp['Success']
+
+
+def handle_http_post_error(response, url, request_body):
+    try:
+        response.raise_for_status()
+    except HTTPError as error:
+        logger.error('Request %s with body: %s exception' %
+                     (url, request_body))
+        logger.error('Response details: %s' % response.content)
+        raise error
+
+
+def handle_http_response(response, logger, cookies, model_id):
+    try:
+        response.raise_for_status()
+    except HTTPError as error:
+        if cookies is None:  # code in [403] and
+            logger.error(
+                f'Authentication token does not exist, failed to access model {model_id} which may not exist or may be \
+                private. Please login first.')
+        logger.error('Response details: %s' % response.content)
+        raise error
+
+
+def raise_on_error(rsp):
+    """If response error, raise exception
+
+    Args:
+        rsp (_type_): The server response
+
+    Raises:
+        RequestError: the response error message.
+
+    Returns:
+        bool: True if request is OK, otherwise raise `RequestError` exception.
+    """
+    if rsp['Code'] == HTTPStatus.OK:
+        return True
+    else:
+        raise RequestError(rsp['Message'])
+
+
+def datahub_raise_on_error(url, rsp):
+    """If response error, raise exception
+
+    Args:
+        url (str): The request url
+        rsp (HTTPResponse): The server response.
+
+    Raises:
+        RequestError: the http request error.
+
+    Returns:
+        bool: `True` if request is OK, otherwise raise `RequestError` exception.
+    """
+    if rsp.get('Code') == HTTPStatus.OK:
+        return True
+    else:
+        raise RequestError(
+            f"Url = {url}, Message = {rsp.get('Message')}, Please specify correct dataset_name and namespace."
+        )
+
+
+def raise_for_http_status(rsp):
+    """Attempt to decode utf-8 first since some servers
+    localize reason strings, for invalid utf-8, fall back
+    to decoding with iso-8859-1.
+
+    Args:
+        rsp: The http response.
+
+    Raises:
+        HTTPError: The http error info.
+    """
+    http_error_msg = ''
+    if isinstance(rsp.reason, bytes):
+        try:
+            reason = rsp.reason.decode('utf-8')
+        except UnicodeDecodeError:
+            reason = rsp.reason.decode('iso-8859-1')
+    else:
+        reason = rsp.reason
+
+    if 400 <= rsp.status_code < 500:
+        http_error_msg = u'%s Client Error: %s for url: %s' % (rsp.status_code,
+                                                               reason, rsp.url)
+
+    elif 500 <= rsp.status_code < 600:
+        http_error_msg = u'%s Server Error: %s for url: %s' % (rsp.status_code,
+                                                               reason, rsp.url)
+
+    if http_error_msg:
+        req = rsp.request
+        if req.method == 'POST':
+            http_error_msg = u'%s, body: %s' % (http_error_msg, req.body)
+        raise HTTPError(http_error_msg, response=rsp)
diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py
new file mode 100644
index 0000000..c712759
--- /dev/null
+++ b/modelscope/hub/file_download.py
@@ -0,0 +1,261 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import copy
+import os
+import tempfile
+from functools import partial
+from http.cookiejar import CookieJar
+from pathlib import Path
+from typing import Dict, Optional, Union
+
+import requests
+from requests.adapters import Retry
+from tqdm import tqdm
+
+from modelscope import __version__
+from modelscope.hub.api import HubApi, ModelScopeConfig
+from modelscope.hub.constants import (API_FILE_DOWNLOAD_CHUNK_SIZE,
+                                      API_FILE_DOWNLOAD_RETRY_TIMES,
+                                      API_FILE_DOWNLOAD_TIMEOUT, FILE_HASH)
+from modelscope.utils.constant import DEFAULT_MODEL_REVISION
+from modelscope.utils.logger import get_logger
+
+from .errors import FileDownloadError, NotExistError
+from .utils.caching import ModelFileSystemCache
+from .utils.utils import (file_integrity_validation, get_cache_dir,
+                          get_endpoint, model_id_to_group_owner_name)
+
+logger = get_logger()
+
+
+def model_file_download(
+    model_id: str,
+    file_path: str,
+    revision: Optional[str] = DEFAULT_MODEL_REVISION,
+    cache_dir: Optional[str] = None,
+    user_agent: Union[Dict, str, None] = None,
+    local_files_only: Optional[bool] = False,
+    cookies: Optional[CookieJar] = None,
+) -> Optional[str]:  # pragma: no cover
+    """Download from a given URL and cache it if it's not already present in the local cache.
+
+    Given a URL, this function looks for the corresponding file in the local
+    cache. If it's not there, download it. Then return the path to the cached
+    file.
+
+    Args:
+        model_id (str): The model to whom the file to be downloaded belongs.
+        file_path(str): Path of the file to be downloaded, relative to the root of model repo.
+        revision(str, optional): revision of the model file to be downloaded.
+            Can be any of a branch, tag or commit hash.
+        cache_dir (str, Path, optional): Path to the folder where cached files are stored.
+        user_agent (dict, str, optional): The user-agent info in the form of a dictionary or a string.
+        local_files_only (bool, optional):  If `True`, avoid downloading the file and return the path to the
+            local cached file if it exists. if `False`, download the file anyway even it exists.
+        cookies (CookieJar, optional): The cookie of download request.
+
+    Returns:
+        string: string of local file or if networking is off, last version of
+        file cached on disk.
+
+    Raises:
+        NotExistError: The file is not exist.
+        ValueError: The request parameter error.
+
+    Note:
+        Raises the following errors:
+
+            - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
+            if `use_auth_token=True` and the token cannot be found.
+            - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError)
+            if ETag cannot be determined.
+            - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            if some parameter value is invalid
+    """
+    if cache_dir is None:
+        cache_dir = get_cache_dir()
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+    temporary_cache_dir = os.path.join(cache_dir, 'temp')
+    os.makedirs(temporary_cache_dir, exist_ok=True)
+
+    group_or_owner, name = model_id_to_group_owner_name(model_id)
+
+    cache = ModelFileSystemCache(cache_dir, group_or_owner, name)
+
+    # if local_files_only is `True` and the file already exists in cached_path
+    # return the cached path
+    if local_files_only:
+        cached_file_path = cache.get_file_by_path(file_path)
+        if cached_file_path is not None:
+            logger.warning(
+                "File exists in local cache, but we're not sure it's up to date"
+            )
+            return cached_file_path
+        else:
+            raise ValueError(
+                'Cannot find the requested files in the cached path and outgoing'
+                ' traffic has been disabled. To enable model look-ups and downloads'
+                " online, set 'local_files_only' to False.")
+
+    _api = HubApi()
+    headers = {
+        'user-agent': ModelScopeConfig.get_user_agent(user_agent=user_agent, )
+    }
+    if cookies is None:
+        cookies = ModelScopeConfig.get_cookies()
+
+    revision = _api.get_valid_revision(model_id,
+                                       revision=revision,
+                                       cookies=cookies)
+    file_to_download_info = None
+    # we need to confirm the version is up-to-date
+    # we need to get the file list to check if the latest version is cached, if so return, otherwise download
+    model_files = _api.get_model_files(
+        model_id=model_id,
+        revision=revision,
+        recursive=True,
+        use_cookies=False if cookies is None else cookies)
+
+    for model_file in model_files:
+        if model_file['Type'] == 'tree':
+            continue
+
+        if model_file['Path'] == file_path:
+            if cache.exists(model_file):
+                logger.info(
+                    f'File {model_file["Name"]} already in cache, skip downloading!'
+                )
+                return cache.get_file_by_info(model_file)
+            else:
+                file_to_download_info = model_file
+            break
+
+    if file_to_download_info is None:
+        raise NotExistError('The file path: %s not exist in: %s' %
+                            (file_path, model_id))
+
+    # we need to download again
+    url_to_download = get_file_download_url(model_id, file_path, revision)
+    file_to_download_info = {
+        'Path': file_path,
+        'Revision': file_to_download_info['Revision'],
+        FILE_HASH: file_to_download_info[FILE_HASH]
+    }
+
+    temp_file_name = next(tempfile._get_candidate_names())
+    http_get_file(url_to_download,
+                  temporary_cache_dir,
+                  temp_file_name,
+                  headers=headers,
+                  cookies=None if cookies is None else cookies.get_dict())
+    temp_file_path = os.path.join(temporary_cache_dir, temp_file_name)
+    # for download with commit we can't get Sha256
+    if file_to_download_info[FILE_HASH] is not None:
+        file_integrity_validation(temp_file_path,
+                                  file_to_download_info[FILE_HASH])
+    return cache.put_file(file_to_download_info,
+                          os.path.join(temporary_cache_dir, temp_file_name))
+
+
+def get_file_download_url(model_id: str, file_path: str, revision: str):
+    """Format file download url according to `model_id`, `revision` and `file_path`.
+    e.g., Given `model_id=john/bert`, `revision=master`, `file_path=README.md`,
+    the resulted download url is: https://modelscope.co/api/v1/models/john/bert/repo?Revision=master&FilePath=README.md
+
+    Args:
+        model_id (str): The model_id.
+        file_path (str): File path
+        revision (str): File revision.
+
+    Returns:
+        str: The file url.
+    """
+    download_url_template = '{endpoint}/api/v1/models/{model_id}/repo?Revision={revision}&FilePath={file_path}'
+    return download_url_template.format(
+        endpoint=get_endpoint(),
+        model_id=model_id,
+        revision=revision,
+        file_path=file_path,
+    )
+
+
+def http_get_file(
+    url: str,
+    local_dir: str,
+    file_name: str,
+    cookies: CookieJar,
+    headers: Optional[Dict[str, str]] = None,
+):
+    """Download remote file, will retry 5 times before giving up on errors.
+
+    Args:
+        url(str):
+            actual download url of the file
+        local_dir(str):
+            local directory where the downloaded file stores
+        file_name(str):
+            name of the file stored in `local_dir`
+        cookies(CookieJar):
+            cookies used to authentication the user, which is used for downloading private repos
+        headers(Dict[str, str], optional):
+            http headers to carry necessary info when requesting the remote file
+
+    Raises:
+        FileDownloadError: File download failed.
+
+    """
+    total = -1
+    temp_file_manager = partial(tempfile.NamedTemporaryFile,
+                                mode='wb',
+                                dir=local_dir,
+                                delete=False)
+    get_headers = {} if headers is None else copy.deepcopy(headers)
+    with temp_file_manager() as temp_file:
+        logger.info('downloading %s to %s', url, temp_file.name)
+        # retry sleep 0.5s, 1s, 2s, 4s
+        retry = Retry(total=API_FILE_DOWNLOAD_RETRY_TIMES,
+                      backoff_factor=1,
+                      allowed_methods=['GET'])
+        while True:
+            try:
+                downloaded_size = temp_file.tell()
+                get_headers['Range'] = 'bytes=%d-' % downloaded_size
+                r = requests.get(url,
+                                 stream=True,
+                                 headers=get_headers,
+                                 cookies=cookies,
+                                 timeout=API_FILE_DOWNLOAD_TIMEOUT)
+                r.raise_for_status()
+                content_length = r.headers.get('Content-Length')
+                total = int(
+                    content_length) if content_length is not None else None
+                progress = tqdm(
+                    unit='B',
+                    unit_scale=True,
+                    unit_divisor=1024,
+                    total=total,
+                    initial=downloaded_size,
+                    desc='Downloading',
+                )
+                for chunk in r.iter_content(
+                        chunk_size=API_FILE_DOWNLOAD_CHUNK_SIZE):
+                    if chunk:  # filter out keep-alive new chunks
+                        progress.update(len(chunk))
+                        temp_file.write(chunk)
+                progress.close()
+                break
+            except (Exception) as e:  # no matter what happen, we will retry.
+                retry = retry.increment('GET', url, error=e)
+                retry.sleep()
+
+    logger.info('storing %s in cache at %s', url, local_dir)
+    downloaded_length = os.path.getsize(temp_file.name)
+    if total != downloaded_length:
+        os.remove(temp_file.name)
+        msg = 'File %s download incomplete, content_length: %s but the \
+                    file downloaded length: %s, please download again' % (
+            file_name, total, downloaded_length)
+        logger.error(msg)
+        raise FileDownloadError(msg)
+    os.replace(temp_file.name, os.path.join(local_dir, file_name))
diff --git a/modelscope/hub/git.py b/modelscope/hub/git.py
new file mode 100644
index 0000000..d6e5eb4
--- /dev/null
+++ b/modelscope/hub/git.py
@@ -0,0 +1,260 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import subprocess
+from typing import List, Optional
+
+from modelscope.utils.logger import get_logger
+
+from ..utils.constant import MASTER_MODEL_BRANCH
+from .errors import GitError
+
+logger = get_logger()
+
+
+class Singleton(type):
+    _instances = {}
+
+    def __call__(cls, *args, **kwargs):
+        if cls not in cls._instances:
+            cls._instances[cls] = super(Singleton,
+                                        cls).__call__(*args, **kwargs)
+        return cls._instances[cls]
+
+
+class GitCommandWrapper(metaclass=Singleton):
+    """Some git operation wrapper
+    """
+    default_git_path = 'git'  # The default git command line
+
+    def __init__(self, path: str = None):
+        self.git_path = path or self.default_git_path
+
+    def _run_git_command(self, *args) -> subprocess.CompletedProcess:
+        """Run git command, if command return 0, return subprocess.response
+             otherwise raise GitError, message is stdout and stderr.
+
+        Args:
+            args: List of command args.
+
+        Raises:
+            GitError: Exception with stdout and stderr.
+
+        Returns:
+            subprocess.CompletedProcess: the command response
+        """
+        logger.debug(' '.join(args))
+        git_env = os.environ.copy()
+        git_env['GIT_TERMINAL_PROMPT'] = '0'
+        response = subprocess.run(
+            [self.git_path, *args],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            env=git_env,
+        )  # compatible for python3.6
+        try:
+            response.check_returncode()
+            return response
+        except subprocess.CalledProcessError as error:
+            if response.returncode == 1:
+                logger.info('Nothing to commit.')
+                return response
+            else:
+                logger.error(
+                    'There are error run git command, you may need to login first.'
+                )
+                raise GitError('stdout: %s, stderr: %s' %
+                               (response.stdout.decode('utf8'),
+                                error.stderr.decode('utf8')))
+
+    def config_auth_token(self, repo_dir, auth_token):
+        url = self.get_repo_remote_url(repo_dir)
+        if '//oauth2' not in url:
+            auth_url = self._add_token(auth_token, url)
+            cmd_args = '-C %s remote set-url origin %s' % (repo_dir, auth_url)
+            cmd_args = cmd_args.split(' ')
+            rsp = self._run_git_command(*cmd_args)
+            logger.debug(rsp.stdout.decode('utf8'))
+
+    def _add_token(self, token: str, url: str):
+        if token:
+            if '//oauth2' not in url:
+                url = url.replace('//', '//oauth2:%s@' % token)
+        return url
+
+    def remove_token_from_url(self, url: str):
+        if url and '//oauth2' in url:
+            start_index = url.find('oauth2')
+            end_index = url.find('@')
+            url = url[:start_index] + url[end_index + 1:]
+        return url
+
+    def is_lfs_installed(self):
+        cmd = ['lfs', 'env']
+        try:
+            self._run_git_command(*cmd)
+            return True
+        except GitError:
+            return False
+
+    def git_lfs_install(self, repo_dir):
+        cmd = ['-C', repo_dir, 'lfs', 'install']
+        try:
+            self._run_git_command(*cmd)
+            return True
+        except GitError:
+            return False
+
+    def clone(self,
+              repo_base_dir: str,
+              token: str,
+              url: str,
+              repo_name: str,
+              branch: Optional[str] = None):
+        """ git clone command wrapper.
+        For public project, token can None, private repo, there must token.
+
+        Args:
+            repo_base_dir (str): The local base dir, the repository will be clone to local_dir/repo_name
+            token (str): The git token, must be provided for private project.
+            url (str): The remote url
+            repo_name (str): The local repository path name.
+            branch (str, optional): _description_. Defaults to None.
+
+        Returns:
+            The popen response.
+        """
+        url = self._add_token(token, url)
+        if branch:
+            clone_args = '-C %s clone %s %s --branch %s' % (repo_base_dir, url,
+                                                            repo_name, branch)
+        else:
+            clone_args = '-C %s clone %s' % (repo_base_dir, url)
+        logger.debug(clone_args)
+        clone_args = clone_args.split(' ')
+        response = self._run_git_command(*clone_args)
+        logger.debug(response.stdout.decode('utf8'))
+        return response
+
+    def add_user_info(self, repo_base_dir, repo_name):
+        from modelscope.hub.api import ModelScopeConfig
+        user_name, user_email = ModelScopeConfig.get_user_info()
+        if user_name and user_email:
+            # config user.name and user.email if exist
+            config_user_name_args = '-C %s/%s config user.name %s' % (
+                repo_base_dir, repo_name, user_name)
+            response = self._run_git_command(*config_user_name_args.split(' '))
+            logger.debug(response.stdout.decode('utf8'))
+            config_user_email_args = '-C %s/%s config user.email %s' % (
+                repo_base_dir, repo_name, user_email)
+            response = self._run_git_command(
+                *config_user_email_args.split(' '))
+            logger.debug(response.stdout.decode('utf8'))
+
+    def add(self,
+            repo_dir: str,
+            files: List[str] = list(),
+            all_files: bool = False):
+        if all_files:
+            add_args = '-C %s add -A' % repo_dir
+        elif len(files) > 0:
+            files_str = ' '.join(files)
+            add_args = '-C %s add %s' % (repo_dir, files_str)
+        add_args = add_args.split(' ')
+        rsp = self._run_git_command(*add_args)
+        logger.debug(rsp.stdout.decode('utf8'))
+        return rsp
+
+    def commit(self, repo_dir: str, message: str):
+        """Run git commit command
+
+        Args:
+            repo_dir (str): the repository directory.
+            message (str): commit message.
+
+        Returns:
+            The command popen response.
+        """
+        commit_args = ['-C', '%s' % repo_dir, 'commit', '-m', "'%s'" % message]
+        rsp = self._run_git_command(*commit_args)
+        logger.info(rsp.stdout.decode('utf8'))
+        return rsp
+
+    def checkout(self, repo_dir: str, revision: str):
+        cmds = ['-C', '%s' % repo_dir, 'checkout', '%s' % revision]
+        return self._run_git_command(*cmds)
+
+    def new_branch(self, repo_dir: str, revision: str):
+        cmds = ['-C', '%s' % repo_dir, 'checkout', '-b', revision]
+        return self._run_git_command(*cmds)
+
+    def get_remote_branches(self, repo_dir: str):
+        cmds = ['-C', '%s' % repo_dir, 'branch', '-r']
+        rsp = self._run_git_command(*cmds)
+        info = [
+            line.strip()
+            for line in rsp.stdout.decode('utf8').strip().split(os.linesep)
+        ]
+        if len(info) == 1:
+            return ['/'.join(info[0].split('/')[1:])]
+        else:
+            return ['/'.join(line.split('/')[1:]) for line in info[1:]]
+
+    def pull(self, repo_dir: str):
+        cmds = ['-C', repo_dir, 'pull']
+        return self._run_git_command(*cmds)
+
+    def push(self,
+             repo_dir: str,
+             token: str,
+             url: str,
+             local_branch: str,
+             remote_branch: str,
+             force: bool = False):
+        url = self._add_token(token, url)
+
+        push_args = '-C %s push %s %s:%s' % (repo_dir, url, local_branch,
+                                             remote_branch)
+        if force:
+            push_args += ' -f'
+        push_args = push_args.split(' ')
+        rsp = self._run_git_command(*push_args)
+        logger.debug(rsp.stdout.decode('utf8'))
+        return rsp
+
+    def get_repo_remote_url(self, repo_dir: str):
+        cmd_args = '-C %s config --get remote.origin.url' % repo_dir
+        cmd_args = cmd_args.split(' ')
+        rsp = self._run_git_command(*cmd_args)
+        url = rsp.stdout.decode('utf8')
+        return url.strip()
+
+    def list_lfs_files(self, repo_dir: str):
+        cmd_args = '-C %s lfs ls-files' % repo_dir
+        cmd_args = cmd_args.split(' ')
+        rsp = self._run_git_command(*cmd_args)
+        out = rsp.stdout.decode('utf8').strip()
+        files = []
+        for line in out.split(os.linesep):
+            files.append(line.split(' ')[-1])
+
+        return files
+
+    def tag(self,
+            repo_dir: str,
+            tag_name: str,
+            message: str,
+            ref: str = MASTER_MODEL_BRANCH):
+        cmd_args = [
+            '-C', repo_dir, 'tag', tag_name, '-m',
+            '"%s"' % message, ref
+        ]
+        rsp = self._run_git_command(*cmd_args)
+        logger.debug(rsp.stdout.decode('utf8'))
+        return rsp
+
+    def push_tag(self, repo_dir: str, tag_name):
+        cmd_args = ['-C', repo_dir, 'push', 'origin', tag_name]
+        rsp = self._run_git_command(*cmd_args)
+        logger.debug(rsp.stdout.decode('utf8'))
+        return rsp
diff --git a/modelscope/hub/repository.py b/modelscope/hub/repository.py
new file mode 100644
index 0000000..40d00fc
--- /dev/null
+++ b/modelscope/hub/repository.py
@@ -0,0 +1,290 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+from typing import Optional
+
+from modelscope.hub.errors import GitError, InvalidParameter, NotLoginException
+from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
+                                       DEFAULT_REPOSITORY_REVISION,
+                                       MASTER_MODEL_BRANCH)
+from modelscope.utils.logger import get_logger
+
+from .git import GitCommandWrapper
+from .utils.utils import get_endpoint
+
+logger = get_logger()
+
+
+class Repository:
+    """A local representation of the model git repository.
+    """
+    def __init__(self,
+                 model_dir: str,
+                 clone_from: str,
+                 revision: Optional[str] = DEFAULT_REPOSITORY_REVISION,
+                 auth_token: Optional[str] = None,
+                 git_path: Optional[str] = None):
+        """Instantiate a Repository object by cloning the remote ModelScopeHub repo
+
+        Args:
+            model_dir (str): The model root directory.
+            clone_from (str): model id in ModelScope-hub from which git clone
+            revision (str, optional): revision of the model you want to clone from.
+                     Can be any of a branch, tag or commit hash
+            auth_token (str, optional): token obtained when calling `HubApi.login()`.
+                        Usually you can safely ignore the parameter as the token is already
+                        saved when you login the first time, if None, we will use saved token.
+            git_path (str, optional): The git command line path, if None, we use 'git'
+
+        Raises:
+            InvalidParameter: revision is None.
+        """
+        self.model_dir = model_dir
+        self.model_base_dir = os.path.dirname(model_dir)
+        self.model_repo_name = os.path.basename(model_dir)
+
+        if not revision:
+            err_msg = 'a non-default value of revision cannot be empty.'
+            raise InvalidParameter(err_msg)
+
+        from modelscope.hub.api import ModelScopeConfig
+        if auth_token:
+            self.auth_token = auth_token
+        else:
+            self.auth_token = ModelScopeConfig.get_token()
+
+        git_wrapper = GitCommandWrapper()
+        if not git_wrapper.is_lfs_installed():
+            logger.error('git lfs is not installed, please install.')
+
+        self.git_wrapper = GitCommandWrapper(git_path)
+        os.makedirs(self.model_dir, exist_ok=True)
+        url = self._get_model_id_url(clone_from)
+        if os.listdir(self.model_dir):  # directory not empty.
+            remote_url = self._get_remote_url()
+            remote_url = self.git_wrapper.remove_token_from_url(remote_url)
+            if remote_url and remote_url == url:  # need not clone again
+                return
+        self.git_wrapper.clone(self.model_base_dir, self.auth_token, url,
+                               self.model_repo_name, revision)
+
+        if git_wrapper.is_lfs_installed():
+            git_wrapper.git_lfs_install(self.model_dir)  # init repo lfs
+
+        # add user info if login
+        self.git_wrapper.add_user_info(self.model_base_dir,
+                                       self.model_repo_name)
+        if self.auth_token:  # config remote with auth token
+            self.git_wrapper.config_auth_token(self.model_dir, self.auth_token)
+
+    def _get_model_id_url(self, model_id):
+        url = f'{get_endpoint()}/{model_id}.git'
+        return url
+
+    def _get_remote_url(self):
+        try:
+            remote = self.git_wrapper.get_repo_remote_url(self.model_dir)
+        except GitError:
+            remote = None
+        return remote
+
+    def push(self,
+             commit_message: str,
+             local_branch: Optional[str] = DEFAULT_REPOSITORY_REVISION,
+             remote_branch: Optional[str] = DEFAULT_REPOSITORY_REVISION,
+             force: Optional[bool] = False):
+        """Push local files to remote, this method will do.
+        Execute git pull, git add, git commit, git push in order.
+
+        Args:
+            commit_message (str): commit message
+            local_branch(str, optional): The local branch, default master.
+            remote_branch (str, optional): The remote branch to push, default master.
+            force (bool, optional): whether to use forced-push.
+
+        Raises:
+            InvalidParameter: no commit message.
+            NotLoginException: no auth token.
+        """
+        if commit_message is None or not isinstance(commit_message, str):
+            msg = 'commit_message must be provided!'
+            raise InvalidParameter(msg)
+        if not isinstance(force, bool):
+            raise InvalidParameter('force must be bool')
+
+        if not self.auth_token:
+            raise NotLoginException('Must login to push, please login first.')
+
+        self.git_wrapper.config_auth_token(self.model_dir, self.auth_token)
+        self.git_wrapper.add_user_info(self.model_base_dir,
+                                       self.model_repo_name)
+
+        url = self.git_wrapper.get_repo_remote_url(self.model_dir)
+        self.git_wrapper.pull(self.model_dir)
+
+        self.git_wrapper.add(self.model_dir, all_files=True)
+        self.git_wrapper.commit(self.model_dir, commit_message)
+        self.git_wrapper.push(repo_dir=self.model_dir,
+                              token=self.auth_token,
+                              url=url,
+                              local_branch=local_branch,
+                              remote_branch=remote_branch)
+
+    def tag(self,
+            tag_name: str,
+            message: str,
+            ref: Optional[str] = MASTER_MODEL_BRANCH):
+        """Create a new tag.
+
+        Args:
+            tag_name (str): The name of the tag
+            message (str): The tag message.
+            ref (str, optional): The tag reference, can be commit id or branch.
+
+        Raises:
+            InvalidParameter: no commit message.
+        """
+        if tag_name is None or tag_name == '':
+            msg = 'We use tag-based revision, therefore tag_name cannot be None or empty.'
+            raise InvalidParameter(msg)
+        if message is None or message == '':
+            msg = 'We use annotated tag, therefore message cannot None or empty.'
+            raise InvalidParameter(msg)
+        self.git_wrapper.tag(repo_dir=self.model_dir,
+                             tag_name=tag_name,
+                             message=message,
+                             ref=ref)
+
+    def tag_and_push(self,
+                     tag_name: str,
+                     message: str,
+                     ref: Optional[str] = MASTER_MODEL_BRANCH):
+        """Create tag and push to remote
+
+        Args:
+            tag_name (str): The name of the tag
+            message (str): The tag message.
+            ref (str, optional): The tag ref, can be commit id or branch. Defaults to MASTER_MODEL_BRANCH.
+        """
+        self.tag(tag_name, message, ref)
+
+        self.git_wrapper.push_tag(repo_dir=self.model_dir, tag_name=tag_name)
+
+
+class DatasetRepository:
+    """A local representation of the dataset (metadata) git repository.
+    """
+    def __init__(self,
+                 repo_work_dir: str,
+                 dataset_id: str,
+                 revision: Optional[str] = DEFAULT_DATASET_REVISION,
+                 auth_token: Optional[str] = None,
+                 git_path: Optional[str] = None):
+        """
+        Instantiate a Dataset Repository object by cloning the remote ModelScope dataset repo
+
+        Args:
+            repo_work_dir (str): The dataset repo root directory.
+            dataset_id (str): dataset id in ModelScope from which git clone
+            revision (str, optional): revision of the dataset you want to clone from.
+                                      Can be any of a branch, tag or commit hash
+            auth_token (str, optional): token obtained when calling `HubApi.login()`.
+                                        Usually you can safely ignore the parameter as the token is
+                                        already saved when you login the first time, if None, we will use saved token.
+            git_path (str, optional): The git command line path, if None, we use 'git'
+
+        Raises:
+            InvalidParameter: parameter invalid.
+        """
+        self.dataset_id = dataset_id
+        if not repo_work_dir or not isinstance(repo_work_dir, str):
+            err_msg = 'dataset_work_dir must be provided!'
+            raise InvalidParameter(err_msg)
+        self.repo_work_dir = repo_work_dir.rstrip('/')
+        if not self.repo_work_dir:
+            err_msg = 'dataset_work_dir can not be root dir!'
+            raise InvalidParameter(err_msg)
+        self.repo_base_dir = os.path.dirname(self.repo_work_dir)
+        self.repo_name = os.path.basename(self.repo_work_dir)
+
+        if not revision:
+            err_msg = 'a non-default value of revision cannot be empty.'
+            raise InvalidParameter(err_msg)
+        self.revision = revision
+        from modelscope.hub.api import ModelScopeConfig
+        if auth_token:
+            self.auth_token = auth_token
+        else:
+            self.auth_token = ModelScopeConfig.get_token()
+
+        self.git_wrapper = GitCommandWrapper(git_path)
+        os.makedirs(self.repo_work_dir, exist_ok=True)
+        self.repo_url = self._get_repo_url(dataset_id=dataset_id)
+
+    def clone(self) -> str:
+        # check local repo dir, directory not empty.
+        if os.listdir(self.repo_work_dir):
+            remote_url = self._get_remote_url()
+            remote_url = self.git_wrapper.remove_token_from_url(remote_url)
+            # no need clone again
+            if remote_url and remote_url == self.repo_url:
+                return ''
+
+        logger.info('Cloning repo from {} '.format(self.repo_url))
+        self.git_wrapper.clone(self.repo_base_dir, self.auth_token,
+                               self.repo_url, self.repo_name, self.revision)
+        return self.repo_work_dir
+
+    def push(self,
+             commit_message: str,
+             branch: Optional[str] = DEFAULT_DATASET_REVISION,
+             force: Optional[bool] = False):
+        """Push local files to remote, this method will do.
+           git pull
+           git add
+           git commit
+           git push
+
+        Args:
+            commit_message (str): commit message
+            branch (str, optional): which branch to push.
+            force (bool, optional): whether to use forced-push.
+
+        Raises:
+            InvalidParameter: no commit message.
+            NotLoginException: no access token.
+        """
+        if commit_message is None or not isinstance(commit_message, str):
+            msg = 'commit_message must be provided!'
+            raise InvalidParameter(msg)
+
+        if not isinstance(force, bool):
+            raise InvalidParameter('force must be bool')
+
+        if not self.auth_token:
+            raise NotLoginException('Must login to push, please login first.')
+
+        self.git_wrapper.config_auth_token(self.repo_work_dir, self.auth_token)
+        self.git_wrapper.add_user_info(self.repo_base_dir, self.repo_name)
+
+        remote_url = self._get_remote_url()
+        remote_url = self.git_wrapper.remove_token_from_url(remote_url)
+
+        self.git_wrapper.pull(self.repo_work_dir)
+        self.git_wrapper.add(self.repo_work_dir, all_files=True)
+        self.git_wrapper.commit(self.repo_work_dir, commit_message)
+        self.git_wrapper.push(repo_dir=self.repo_work_dir,
+                              token=self.auth_token,
+                              url=remote_url,
+                              local_branch=branch,
+                              remote_branch=branch)
+
+    def _get_repo_url(self, dataset_id):
+        return f'{get_endpoint()}/datasets/{dataset_id}.git'
+
+    def _get_remote_url(self):
+        try:
+            remote = self.git_wrapper.get_repo_remote_url(self.repo_work_dir)
+        except GitError:
+            remote = None
+        return remote
diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py
new file mode 100644
index 0000000..3bcdc53
--- /dev/null
+++ b/modelscope/hub/snapshot_download.py
@@ -0,0 +1,151 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import re
+import tempfile
+from http.cookiejar import CookieJar
+from pathlib import Path
+from typing import Dict, List, Optional, Union
+
+from modelscope.hub.api import HubApi, ModelScopeConfig
+from modelscope.utils.constant import DEFAULT_MODEL_REVISION
+from modelscope.utils.logger import get_logger
+
+from .constants import FILE_HASH
+from .file_download import get_file_download_url, http_get_file
+from .utils.caching import ModelFileSystemCache
+from .utils.utils import (file_integrity_validation, get_cache_dir,
+                          model_id_to_group_owner_name)
+
+logger = get_logger()
+
+
+def snapshot_download(model_id: str,
+                      revision: Optional[str] = DEFAULT_MODEL_REVISION,
+                      cache_dir: Union[str, Path, None] = None,
+                      user_agent: Optional[Union[Dict, str]] = None,
+                      local_files_only: Optional[bool] = False,
+                      cookies: Optional[CookieJar] = None,
+                      ignore_file_pattern: List = None) -> str:
+    """Download all files of a repo.
+    Downloads a whole snapshot of a repo's files at the specified revision. This
+    is useful when you want all files from a repo, because you don't know which
+    ones you will need a priori. All files are nested inside a folder in order
+    to keep their actual filename relative to that folder.
+
+    An alternative would be to just clone a repo but this would require that the
+    user always has git and git-lfs installed, and properly configured.
+
+    Args:
+        model_id (str): A user or an organization name and a repo name separated by a `/`.
+        revision (str, optional): An optional Git revision id which can be a branch name, a tag, or a
+            commit hash. NOTE: currently only branch and tag name is supported
+        cache_dir (str, Path, optional): Path to the folder where cached files are stored.
+        user_agent (str, dict, optional): The user-agent info in the form of a dictionary or a string.
+        local_files_only (bool, optional): If `True`, avoid downloading the file and return the path to the
+            local cached file if it exists.
+        cookies (CookieJar, optional): The cookie of the request, default None.
+        ignore_file_pattern (`str` or `List`, *optional*, default to `None`):
+            Any file pattern to be ignored in downloading, like exact file names or file extensions.
+    Raises:
+        ValueError: the value details.
+
+    Returns:
+        str: Local folder path (string) of repo snapshot
+
+    Note:
+        Raises the following errors:
+        - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
+        if `use_auth_token=True` and the token cannot be found.
+        - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
+        ETag cannot be determined.
+        - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+        if some parameter value is invalid
+    """
+
+    if cache_dir is None:
+        cache_dir = get_cache_dir()
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+    temporary_cache_dir = os.path.join(cache_dir, 'temp')
+    os.makedirs(temporary_cache_dir, exist_ok=True)
+
+    group_or_owner, name = model_id_to_group_owner_name(model_id)
+
+    cache = ModelFileSystemCache(cache_dir, group_or_owner, name)
+    if local_files_only:
+        if len(cache.cached_files) == 0:
+            raise ValueError(
+                'Cannot find the requested files in the cached path and outgoing'
+                ' traffic has been disabled. To enable model look-ups and downloads'
+                " online, set 'local_files_only' to False.")
+        logger.warning(
+            'We can not confirm the cached file is for revision: %s' %
+            revision)
+        return cache.get_root_location(
+        )  # we can not confirm the cached file is for snapshot 'revision'
+    else:
+        # make headers
+        headers = {
+            'user-agent':
+            ModelScopeConfig.get_user_agent(user_agent=user_agent, )
+        }
+        _api = HubApi()
+        if cookies is None:
+            cookies = ModelScopeConfig.get_cookies()
+        revision = _api.get_valid_revision(model_id,
+                                           revision=revision,
+                                           cookies=cookies)
+
+        snapshot_header = headers if 'CI_TEST' in os.environ else {
+            **headers,
+            **{
+                'Snapshot': 'True'
+            }
+        }
+        model_files = _api.get_model_files(
+            model_id=model_id,
+            revision=revision,
+            recursive=True,
+            use_cookies=False if cookies is None else cookies,
+            headers=snapshot_header,
+        )
+
+        if ignore_file_pattern is None:
+            ignore_file_pattern = []
+        if isinstance(ignore_file_pattern, str):
+            ignore_file_pattern = [ignore_file_pattern]
+
+        with tempfile.TemporaryDirectory(
+                dir=temporary_cache_dir) as temp_cache_dir:
+            for model_file in model_files:
+                if model_file['Type'] == 'tree' or \
+                        any([re.search(pattern, model_file['Name']) is not None for pattern in ignore_file_pattern]):
+                    continue
+                # check model_file is exist in cache, if existed, skip download, otherwise download
+                if cache.exists(model_file):
+                    file_name = os.path.basename(model_file['Name'])
+                    logger.info(
+                        f'File {file_name} already in cache, skip downloading!'
+                    )
+                    continue
+
+                # get download url
+                url = get_file_download_url(model_id=model_id,
+                                            file_path=model_file['Path'],
+                                            revision=revision)
+
+                # First download to /tmp
+                http_get_file(url=url,
+                              local_dir=temp_cache_dir,
+                              file_name=model_file['Name'],
+                              headers=headers,
+                              cookies=cookies)
+                # check file integrity
+                temp_file = os.path.join(temp_cache_dir, model_file['Name'])
+                if FILE_HASH in model_file:
+                    file_integrity_validation(temp_file, model_file[FILE_HASH])
+                # put file to cache
+                cache.put_file(model_file, temp_file)
+
+        return os.path.join(cache.get_root_location())
diff --git a/modelscope/hub/utils/__init__.py b/modelscope/hub/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/hub/utils/caching.py b/modelscope/hub/utils/caching.py
new file mode 100644
index 0000000..4d10c9f
--- /dev/null
+++ b/modelscope/hub/utils/caching.py
@@ -0,0 +1,290 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import hashlib
+import os
+import pickle
+import tempfile
+from shutil import move, rmtree
+
+from modelscope.hub.constants import MODEL_META_FILE_NAME, MODEL_META_MODEL_ID
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+"""Implements caching functionality, used internally only
+"""
+
+
+class FileSystemCache(object):
+    KEY_FILE_NAME = '.msc'
+    """Local file cache.
+    """
+    def __init__(
+        self,
+        cache_root_location: str,
+        **kwargs,
+    ):
+        """Base file system cache interface.
+
+        Args:
+            cache_root_location (str): The root location to store files.
+            kwargs(dict): The keyword arguments.
+        """
+        os.makedirs(cache_root_location, exist_ok=True)
+        self.cache_root_location = cache_root_location
+        self.load_cache()
+
+    def get_root_location(self):
+        return self.cache_root_location
+
+    def load_cache(self):
+        self.cached_files = []
+        cache_keys_file_path = os.path.join(self.cache_root_location,
+                                            FileSystemCache.KEY_FILE_NAME)
+        if os.path.exists(cache_keys_file_path):
+            with open(cache_keys_file_path, 'rb') as f:
+                self.cached_files = pickle.load(f)
+
+    def save_cached_files(self):
+        """Save cache metadata."""
+        # save new meta to tmp and move to KEY_FILE_NAME
+        cache_keys_file_path = os.path.join(self.cache_root_location,
+                                            FileSystemCache.KEY_FILE_NAME)
+        # TODO: Sync file write
+        fd, fn = tempfile.mkstemp()
+        with open(fd, 'wb') as f:
+            pickle.dump(self.cached_files, f)
+        move(fn, cache_keys_file_path)
+
+    def get_file(self, key):
+        """Check the key is in the cache, if exist, return the file, otherwise return None.
+
+        Args:
+            key(str): The cache key.
+
+        Raises:
+            None
+        """
+        pass
+
+    def put_file(self, key, location):
+        """Put file to the cache.
+
+        Args:
+            key (str): The cache key
+            location (str): Location of the file, we will move the file to cache.
+
+        Raises:
+            None
+        """
+        pass
+
+    def remove_key(self, key):
+        """Remove cache key in index, The file is removed manually
+
+        Args:
+            key (dict): The cache key.
+        """
+        if key in self.cached_files:
+            self.cached_files.remove(key)
+            self.save_cached_files()
+
+    def exists(self, key):
+        for cache_file in self.cached_files:
+            if cache_file == key:
+                return True
+
+        return False
+
+    def clear_cache(self):
+        """Remove all files and metadata from the cache
+        In the case of multiple cache locations, this clears only the last one,
+        which is assumed to be the read/write one.
+        """
+        rmtree(self.cache_root_location)
+        self.load_cache()
+
+    def hash_name(self, key):
+        return hashlib.sha256(key.encode()).hexdigest()
+
+
+class ModelFileSystemCache(FileSystemCache):
+    """Local cache file layout
+       cache_root/owner/model_name/individual cached files and cache index file '.mcs'
+       Save only one version for each file.
+    """
+    def __init__(self, cache_root, owner=None, name=None):
+        """Put file to the cache
+        Args:
+            cache_root(`str`): The modelscope local cache root(default: ~/.cache/modelscope/)
+            owner(`str`): The model owner.
+            name('str'): The name of the model
+        Returns:
+        Raises:
+            None
+        <Tip>
+            model_id = {owner}/{name}
+        </Tip>
+        """
+        if owner is None or name is None:
+            # get model meta from
+            super().__init__(os.path.join(cache_root))
+            self.load_model_meta()
+        else:
+            super().__init__(os.path.join(cache_root, owner, name))
+            self.model_meta = {MODEL_META_MODEL_ID: '%s/%s' % (owner, name)}
+            self.save_model_meta()
+
+    def load_model_meta(self):
+        meta_file_path = os.path.join(self.cache_root_location,
+                                      MODEL_META_FILE_NAME)
+        if os.path.exists(meta_file_path):
+            with open(meta_file_path, 'rb') as f:
+                self.model_meta = pickle.load(f)
+        else:
+            self.model_meta = {MODEL_META_MODEL_ID: 'unknown'}
+
+    def get_model_id(self):
+        return self.model_meta[MODEL_META_MODEL_ID]
+
+    def save_model_meta(self):
+        meta_file_path = os.path.join(self.cache_root_location,
+                                      MODEL_META_FILE_NAME)
+        with open(meta_file_path, 'wb') as f:
+            pickle.dump(self.model_meta, f)
+
+    def get_file_by_path(self, file_path):
+        """Retrieve the cache if there is file match the path.
+
+        Args:
+            file_path (str): The file path in the model.
+
+        Returns:
+            path: the full path of the file.
+        """
+        for cached_file in self.cached_files:
+            if file_path == cached_file['Path']:
+                cached_file_path = os.path.join(self.cache_root_location,
+                                                cached_file['Path'])
+                if os.path.exists(cached_file_path):
+                    return cached_file_path
+                else:
+                    self.remove_key(cached_file)
+
+        return None
+
+    def get_file_by_path_and_commit_id(self, file_path, commit_id):
+        """Retrieve the cache if there is file match the path.
+
+        Args:
+            file_path (str): The file path in the model.
+            commit_id (str): The commit id of the file
+
+        Returns:
+            path: the full path of the file.
+        """
+        for cached_file in self.cached_files:
+            if file_path == cached_file['Path'] and \
+               (cached_file['Revision'].startswith(commit_id) or commit_id.startswith(cached_file['Revision'])):
+                cached_file_path = os.path.join(self.cache_root_location,
+                                                cached_file['Path'])
+                if os.path.exists(cached_file_path):
+                    return cached_file_path
+                else:
+                    self.remove_key(cached_file)
+
+        return None
+
+    def get_file_by_info(self, model_file_info):
+        """Check if exist cache file.
+
+        Args:
+            model_file_info (ModelFileInfo): The file information of the file.
+
+        Returns:
+            str: The file path.
+        """
+        cache_key = self.__get_cache_key(model_file_info)
+        for cached_file in self.cached_files:
+            if cached_file == cache_key:
+                orig_path = os.path.join(self.cache_root_location,
+                                         cached_file['Path'])
+                if os.path.exists(orig_path):
+                    return orig_path
+                else:
+                    self.remove_key(cached_file)
+                    break
+
+        return None
+
+    def __get_cache_key(self, model_file_info):
+        cache_key = {
+            'Path': model_file_info['Path'],
+            'Revision': model_file_info['Revision'],  # commit id
+        }
+        return cache_key
+
+    def exists(self, model_file_info):
+        """Check the file is cached or not.
+
+        Args:
+            model_file_info (CachedFileInfo): The cached file info
+
+        Returns:
+            bool: If exists return True otherwise False
+        """
+        key = self.__get_cache_key(model_file_info)
+        is_exists = False
+        for cached_key in self.cached_files:
+            if cached_key['Path'] == key['Path'] and (
+                    cached_key['Revision'].startswith(key['Revision'])
+                    or key['Revision'].startswith(cached_key['Revision'])):
+                is_exists = True
+                break
+        file_path = os.path.join(self.cache_root_location,
+                                 model_file_info['Path'])
+        if is_exists:
+            if os.path.exists(file_path):
+                return True
+            else:
+                self.remove_key(
+                    model_file_info)  # someone may manual delete the file
+        return False
+
+    def remove_if_exists(self, model_file_info):
+        """We in cache, remove it.
+
+        Args:
+            model_file_info (ModelFileInfo): The model file information from server.
+        """
+        for cached_file in self.cached_files:
+            if cached_file['Path'] == model_file_info['Path']:
+                self.remove_key(cached_file)
+                file_path = os.path.join(self.cache_root_location,
+                                         cached_file['Path'])
+                if os.path.exists(file_path):
+                    os.remove(file_path)
+                break
+
+    def put_file(self, model_file_info, model_file_location):
+        """Put model on model_file_location to cache, the model first download to /tmp, and move to cache.
+
+        Args:
+            model_file_info (str): The file description returned by get_model_files.
+            model_file_location (str): The location of the temporary file.
+
+        Returns:
+            str: The location of the cached file.
+        """
+        self.remove_if_exists(model_file_info)  # backup old revision
+        cache_key = self.__get_cache_key(model_file_info)
+        cache_full_path = os.path.join(
+            self.cache_root_location,
+            cache_key['Path'])  # Branch and Tag do not have same name.
+        cache_file_dir = os.path.dirname(cache_full_path)
+        if not os.path.exists(cache_file_dir):
+            os.makedirs(cache_file_dir, exist_ok=True)
+        # We can't make operation transaction
+        move(model_file_location, cache_full_path)
+        self.cached_files.append(cache_key)
+        self.save_cached_files()
+        return cache_full_path
diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py
new file mode 100644
index 0000000..31e6e72
--- /dev/null
+++ b/modelscope/hub/utils/utils.py
@@ -0,0 +1,94 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import hashlib
+import os
+from datetime import datetime
+from typing import Optional
+
+import requests
+
+from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN,
+                                      DEFAULT_MODELSCOPE_GROUP,
+                                      MODEL_ID_SEPARATOR, MODELSCOPE_SDK_DEBUG,
+                                      MODELSCOPE_URL_SCHEME)
+from modelscope.hub.errors import FileIntegrityError
+from modelscope.utils.file_utils import get_default_cache_dir
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+def model_id_to_group_owner_name(model_id):
+    if MODEL_ID_SEPARATOR in model_id:
+        group_or_owner = model_id.split(MODEL_ID_SEPARATOR)[0]
+        name = model_id.split(MODEL_ID_SEPARATOR)[1]
+    else:
+        group_or_owner = DEFAULT_MODELSCOPE_GROUP
+        name = model_id
+    return group_or_owner, name
+
+
+def get_cache_dir(model_id: Optional[str] = None):
+    """cache dir precedence:
+        function parameter > environment > ~/.cache/modelscope/hub
+
+    Args:
+        model_id (str, optional): The model id.
+
+    Returns:
+        str: the model_id dir if model_id not None, otherwise cache root dir.
+    """
+    default_cache_dir = get_default_cache_dir()
+    base_path = os.getenv('MODELSCOPE_CACHE',
+                          os.path.join(default_cache_dir, 'hub'))
+    return base_path if model_id is None else os.path.join(
+        base_path, model_id + '/')
+
+
+def get_release_datetime():
+    if MODELSCOPE_SDK_DEBUG in os.environ:
+        rt = int(round(datetime.now().timestamp()))
+    else:
+        from modelscope import version
+        rt = int(
+            round(
+                datetime.strptime(version.__release_datetime__,
+                                  '%Y-%m-%d %H:%M:%S').timestamp()))
+    return rt
+
+
+def get_endpoint():
+    modelscope_domain = os.getenv('MODELSCOPE_DOMAIN',
+                                  DEFAULT_MODELSCOPE_DOMAIN)
+    return MODELSCOPE_URL_SCHEME + modelscope_domain
+
+
+def compute_hash(file_path):
+    BUFFER_SIZE = 1024 * 64  # 64k buffer size
+    sha256_hash = hashlib.sha256()
+    with open(file_path, 'rb') as f:
+        while True:
+            data = f.read(BUFFER_SIZE)
+            if not data:
+                break
+            sha256_hash.update(data)
+    return sha256_hash.hexdigest()
+
+
+def file_integrity_validation(file_path, expected_sha256):
+    """Validate the file hash is expected, if not, delete the file
+
+    Args:
+        file_path (str): The file to validate
+        expected_sha256 (str): The expected sha256 hash
+
+    Raises:
+        FileIntegrityError: If file_path hash is not expected.
+
+    """
+    file_sha256 = compute_hash(file_path)
+    if not file_sha256 == expected_sha256:
+        os.remove(file_path)
+        msg = 'File %s integrity check failed, the download may be incomplete, please try again.' % file_path
+        logger.error(msg)
+        raise FileIntegrityError(msg)
diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py
new file mode 100644
index 0000000..dccbf77
--- /dev/null
+++ b/modelscope/metainfo.py
@@ -0,0 +1,299 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from modelscope.utils.constant import Fields, Tasks
+
+
+class Models(object):
+    """ Names for different models.
+
+        Holds the standard model name to use for identifying different model.
+    This should be used to register models.
+
+        Model name should only contain model information but not task information.
+    """
+    tinynas_damoyolo = 'tinynas-damoyolo'
+    # face models
+    scrfd = 'scrfd'
+    face_2d_keypoints = 'face-2d-keypoints'
+    fer = 'fer'
+    fairface = 'fairface'
+    retinaface = 'retinaface'
+    mogface = 'mogface'
+    mtcnn = 'mtcnn'
+    ulfd = 'ulfd'
+    rts = 'rts'
+    flir = 'flir'
+    arcface = 'arcface'
+    facemask = 'facemask'
+    flc = 'flc'
+    tinymog = 'tinymog'
+    damofd = 'damofd'
+
+
+class TaskModels(object):
+    pass
+class Heads(object):
+    pass
+
+class Pipelines(object):
+    """ Names for different pipelines.
+
+        Holds the standard pipline name to use for identifying different pipeline.
+    This should be used to register pipelines.
+
+        For pipeline which support different models and implements the common function, we
+    should use task name for this pipeline.
+        For pipeline which suuport only one model, we should use ${Model}-${Task} as its name.
+    """
+    # vision tasks
+    face_2d_keypoints = 'mobilenet_face-2d-keypoints_alignment'
+    salient_detection = 'u2net-salient-detection'
+    salient_boudary_detection = 'res2net-salient-detection'
+    camouflaged_detection = 'res2net-camouflaged-detection'
+    image_demoire = 'uhdm-image-demoireing'
+    image_classification = 'image-classification'
+    face_detection = 'resnet-face-detection-scrfd10gkps'
+    face_liveness_ir = 'manual-face-liveness-flir'
+    face_liveness_rgb = 'manual-face-liveness-flir'
+    face_liveness_xc = 'manual-face-liveness-flxc'
+    card_detection = 'resnet-card-detection-scrfd34gkps'
+    ulfd_face_detection = 'manual-face-detection-ulfd'
+    tinymog_face_detection = 'manual-face-detection-tinymog'
+    facial_expression_recognition = 'vgg19-facial-expression-recognition-fer'
+    facial_landmark_confidence = 'manual-facial-landmark-confidence-flcm'
+    face_attribute_recognition = 'resnet34-face-attribute-recognition-fairface'
+    retina_face_detection = 'resnet50-face-detection-retinaface'
+    mog_face_detection = 'resnet101-face-detection-cvpr22papermogface'
+    mtcnn_face_detection = 'manual-face-detection-mtcnn'
+    face_recognition = 'ir101-face-recognition-cfglint'
+    face_recognition_ood = 'ir-face-recognition-ood-rts'
+    face_quality_assessment = 'manual-face-quality-assessment-fqa'
+    face_recognition_ood = 'ir-face-recognition-rts'
+    face_recognition_onnx_ir = 'manual-face-recognition-frir'
+    face_recognition_onnx_fm = 'manual-face-recognition-frfm'
+    arc_face_recognition = 'ir50-face-recognition-arcface'
+    mask_face_recognition = 'resnet-face-recognition-facemask'
+
+
+DEFAULT_MODEL_FOR_PIPELINE = {
+    # TaskName: (pipeline_module_name, model_repo)
+    Tasks.face_detection:
+    (Pipelines.mog_face_detection,
+     'damo/cv_resnet101_face-detection_cvpr22papermogface'),
+    Tasks.face_liveness: (Pipelines.face_liveness_ir,
+                          'damo/cv_manual_face-liveness_flir'),
+    Tasks.face_recognition: (Pipelines.face_recognition,
+                             'damo/cv_ir101_facerecognition_cfglint'),
+    Tasks.facial_expression_recognition:
+    (Pipelines.facial_expression_recognition,
+     'damo/cv_vgg19_facial-expression-recognition_fer'),
+    Tasks.face_attribute_recognition:
+    (Pipelines.face_attribute_recognition,
+     'damo/cv_resnet34_face-attribute-recognition_fairface'),
+    Tasks.face_2d_keypoints: (Pipelines.face_2d_keypoints,
+                              'damo/cv_mobilenet_face-2d-keypoints_alignment'),
+    Tasks.face_quality_assessment:
+    (Pipelines.face_quality_assessment,
+     'damo/cv_manual_face-quality-assessment_fqa'),
+}
+class CVTrainers(object):
+    face_detection_scrfd = 'face-detection-scrfd'
+
+
+class Trainers(CVTrainers):
+    """ Names for different trainer.
+
+        Holds the standard trainer name to use for identifying different trainer.
+    This should be used to register trainers.
+
+        For a general Trainer, you can use EpochBasedTrainer.
+        For a model specific Trainer, you can use ${ModelName}-${Task}-trainer.
+    """
+
+    default = 'trainer'
+    easycv = 'easycv'
+    tinynas_damoyolo = 'tinynas-damoyolo'
+
+    @staticmethod
+    def get_trainer_domain(attribute_or_value):
+        if attribute_or_value in vars(
+                CVTrainers) or attribute_or_value in vars(CVTrainers).values():
+            return Fields.cv
+        elif attribute_or_value in vars(
+                NLPTrainers) or attribute_or_value in vars(
+                    NLPTrainers).values():
+            return Fields.nlp
+        elif attribute_or_value in vars(
+                AudioTrainers) or attribute_or_value in vars(
+                    AudioTrainers).values():
+            return Fields.audio
+        elif attribute_or_value in vars(
+                MultiModalTrainers) or attribute_or_value in vars(
+                    MultiModalTrainers).values():
+            return Fields.multi_modal
+        elif attribute_or_value == Trainers.default:
+            return Trainers.default
+        elif attribute_or_value == Trainers.easycv:
+            return Trainers.easycv
+        else:
+            return 'unknown'
+
+
+class Preprocessors(object):
+    """ Names for different preprocessor.
+
+        Holds the standard preprocessor name to use for identifying different preprocessor.
+    This should be used to register preprocessors.
+
+        For a general preprocessor, just use the function name as preprocessor name such as
+    resize-image, random-crop
+        For a model-specific preprocessor, use ${modelname}-${fuction}
+    """
+
+    # cv preprocessor
+    load_image = 'load-image'
+    image_denoise_preprocessor = 'image-denoise-preprocessor'
+    image_deblur_preprocessor = 'image-deblur-preprocessor'
+    object_detection_tinynas_preprocessor = 'object-detection-tinynas-preprocessor'
+    image_classification_mmcv_preprocessor = 'image-classification-mmcv-preprocessor'
+    image_color_enhance_preprocessor = 'image-color-enhance-preprocessor'
+    image_instance_segmentation_preprocessor = 'image-instance-segmentation-preprocessor'
+    image_driving_perception_preprocessor = 'image-driving-perception-preprocessor'
+    image_portrait_enhancement_preprocessor = 'image-portrait-enhancement-preprocessor'
+    image_quality_assessment_mos_preprocessor = 'image-quality_assessment-mos-preprocessor'
+    video_summarization_preprocessor = 'video-summarization-preprocessor'
+    movie_scene_segmentation_preprocessor = 'movie-scene-segmentation-preprocessor'
+    image_classification_bypass_preprocessor = 'image-classification-bypass-preprocessor'
+    object_detection_scrfd = 'object-detection-scrfd'
+
+
+
+class Metrics(object):
+    """ Names for different metrics.
+    """
+
+    # accuracy
+    accuracy = 'accuracy'
+
+    multi_average_precision = 'mAP'
+    audio_noise_metric = 'audio-noise-metric'
+    PPL = 'ppl'
+
+    # text gen
+    BLEU = 'bleu'
+
+    # metrics for image denoise task
+    image_denoise_metric = 'image-denoise-metric'
+    # metrics for video frame-interpolation task
+    video_frame_interpolation_metric = 'video-frame-interpolation-metric'
+    # metrics for real-world video super-resolution task
+    video_super_resolution_metric = 'video-super-resolution-metric'
+
+    # metric for image instance segmentation task
+    image_ins_seg_coco_metric = 'image-ins-seg-coco-metric'
+    # metrics for sequence classification task
+    seq_cls_metric = 'seq-cls-metric'
+    # loss metric
+    loss_metric = 'loss-metric'
+    # metrics for token-classification task
+    token_cls_metric = 'token-cls-metric'
+    # metrics for text-generation task
+    text_gen_metric = 'text-gen-metric'
+    # file saving wrapper
+    prediction_saving_wrapper = 'prediction-saving-wrapper'
+    # metrics for image-color-enhance task
+    image_color_enhance_metric = 'image-color-enhance-metric'
+    # metrics for image-portrait-enhancement task
+    image_portrait_enhancement_metric = 'image-portrait-enhancement-metric'
+    video_summarization_metric = 'video-summarization-metric'
+    # metric for movie-scene-segmentation task
+    movie_scene_segmentation_metric = 'movie-scene-segmentation-metric'
+    # metric for inpainting task
+    image_inpainting_metric = 'image-inpainting-metric'
+    # metric for ocr
+    NED = 'ned'
+    # metric for cross-modal retrieval
+    inbatch_recall = 'inbatch_recall'
+    # metric for referring-video-object-segmentation task
+    referring_video_object_segmentation_metric = 'referring-video-object-segmentation-metric'
+    # metric for video stabilization task
+    video_stabilization_metric = 'video-stabilization-metric'
+    # metirc for image-quality-assessment-mos task
+    image_quality_assessment_mos_metric = 'image-quality-assessment-mos-metric'
+    # metirc for image-quality-assessment-degradation task
+    image_quality_assessment_degradation_metric = 'image-quality-assessment-degradation-metric'
+    # metric for text-ranking task
+    text_ranking_metric = 'text-ranking-metric'
+
+
+class Optimizers(object):
+    """ Names for different OPTIMIZER.
+
+        Holds the standard optimizer name to use for identifying different optimizer.
+        This should be used to register optimizer.
+    """
+
+    default = 'optimizer'
+
+    SGD = 'SGD'
+
+
+class Hooks(object):
+    """ Names for different hooks.
+
+        All kinds of hooks are defined here
+    """
+    # lr
+    LrSchedulerHook = 'LrSchedulerHook'
+    PlateauLrSchedulerHook = 'PlateauLrSchedulerHook'
+    NoneLrSchedulerHook = 'NoneLrSchedulerHook'
+
+    # optimizer
+    OptimizerHook = 'OptimizerHook'
+    TorchAMPOptimizerHook = 'TorchAMPOptimizerHook'
+    ApexAMPOptimizerHook = 'ApexAMPOptimizerHook'
+    NoneOptimizerHook = 'NoneOptimizerHook'
+
+    # checkpoint
+    CheckpointHook = 'CheckpointHook'
+    BestCkptSaverHook = 'BestCkptSaverHook'
+    LoadCheckpointHook = 'LoadCheckpointHook'
+
+    # logger
+    TextLoggerHook = 'TextLoggerHook'
+    TensorboardHook = 'TensorboardHook'
+
+    IterTimerHook = 'IterTimerHook'
+    EvaluationHook = 'EvaluationHook'
+
+    # Compression
+    SparsityHook = 'SparsityHook'
+
+    # CLIP logit_scale clamp
+    ClipClampLogitScaleHook = 'ClipClampLogitScaleHook'
+
+    # train
+    EarlyStopHook = 'EarlyStopHook'
+    DeepspeedHook = 'DeepspeedHook'
+
+
+class LR_Schedulers(object):
+    """learning rate scheduler is defined here
+
+    """
+    LinearWarmup = 'LinearWarmup'
+    ConstantWarmup = 'ConstantWarmup'
+    ExponentialWarmup = 'ExponentialWarmup'
+
+
+class Datasets(object):
+    """ Names for different datasets.
+    """
+    ClsDataset = 'ClsDataset'
+    Face2dKeypointsDataset = 'FaceKeypointDataset'
+    HandCocoWholeBodyDataset = 'HandCocoWholeBodyDataset'
+    HumanWholeBodyKeypointDataset = 'WholeBodyCocoTopDownDataset'
+    SegDataset = 'SegDataset'
+    DetDataset = 'DetDataset'
+    DetImagesMixDataset = 'DetImagesMixDataset'
+    PanopticDataset = 'PanopticDataset'
+    PairedDataset = 'PairedDataset'
diff --git a/modelscope/models/__init__.py b/modelscope/models/__init__.py
new file mode 100644
index 0000000..aa321f3
--- /dev/null
+++ b/modelscope/models/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.error import (AUDIO_IMPORT_ERROR,
+                                    TENSORFLOW_IMPORT_WARNING)
+from modelscope.utils.import_utils import is_torch_available
+
+from . import cv
+from .base import Head, Model
+from .builder import BACKBONES, HEADS, MODELS, build_model
+
+if is_torch_available():
+    from .base.base_torch_model import TorchModel
+    from .base.base_torch_head import TorchHead
diff --git a/modelscope/models/base/__init__.py b/modelscope/models/base/__init__.py
new file mode 100644
index 0000000..33e45be
--- /dev/null
+++ b/modelscope/models/base/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from modelscope.utils.import_utils import is_torch_available
+
+from .base_head import *  # noqa F403
+from .base_model import *  # noqa F403
+
+if is_torch_available():
+    from .base_torch_model import TorchModel
+    from .base_torch_head import TorchHead
diff --git a/modelscope/models/base/base_head.py b/modelscope/models/base/base_head.py
new file mode 100644
index 0000000..a56e051
--- /dev/null
+++ b/modelscope/models/base/base_head.py
@@ -0,0 +1,39 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Union
+
+from modelscope.models.base.base_model import Model
+from modelscope.utils.config import ConfigDict
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+Tensor = Union['torch.Tensor', 'tf.Tensor']
+Input = Union[Dict[str, Tensor], Model]
+
+
+class Head(ABC):
+    """The head base class is for the tasks head method definition
+
+    """
+    def __init__(self, **kwargs):
+        self.config = ConfigDict(kwargs)
+
+    @abstractmethod
+    def forward(self, *args, **kwargs) -> Dict[str, Any]:
+        """
+        This method will use the output from backbone model to do any
+        downstream tasks. Receive The output from backbone model.
+
+        Returns (Dict[str, Any]): The output from downstream task.
+        """
+        pass
+
+    @abstractmethod
+    def compute_loss(self, *args, **kwargs) -> Dict[str, Any]:
+        """
+        compute loss for head during the finetuning.
+
+        Returns (Dict[str, Any]): The loss dict
+        """
+        pass
diff --git a/modelscope/models/base/base_model.py b/modelscope/models/base/base_model.py
new file mode 100644
index 0000000..bb1e021
--- /dev/null
+++ b/modelscope/models/base/base_model.py
@@ -0,0 +1,167 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import os.path as osp
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional, Union
+
+from modelscope.hub.check_model import check_local_model_is_latest
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.models.builder import build_model
+from modelscope.utils.config import Config
+from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, ModelFile
+from modelscope.utils.device import verify_device
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+Tensor = Union['torch.Tensor', 'tf.Tensor']
+
+
+class Model(ABC):
+    """Base model interface.
+    """
+    def __init__(self, model_dir, *args, **kwargs):
+        self.model_dir = model_dir
+        device_name = kwargs.get('device', 'gpu')
+        verify_device(device_name)
+        self._device_name = device_name
+
+    def __call__(self, *args, **kwargs) -> Dict[str, Any]:
+        return self.postprocess(self.forward(*args, **kwargs))
+
+    @abstractmethod
+    def forward(self, *args, **kwargs) -> Dict[str, Any]:
+        """
+        Run the forward pass for a model.
+
+        Returns:
+            Dict[str, Any]: output from the model forward pass
+        """
+        pass
+
+    def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:
+        """ Model specific postprocess and convert model output to
+        standard model outputs.
+
+        Args:
+            inputs:  input data
+
+        Return:
+            dict of results:  a dict containing outputs of model, each
+                output should have the standard output name.
+        """
+        return inputs
+
+    @classmethod
+    def _instantiate(cls, **kwargs):
+        """ Define the instantiation method of a model,default method is by
+            calling the constructor. Note that in the case of no loading model
+            process in constructor of a task model, a load_model method is
+            added, and thus this method is overloaded
+        """
+        return cls(**kwargs)
+
+    @classmethod
+    def from_pretrained(cls,
+                        model_name_or_path: str,
+                        revision: Optional[str] = DEFAULT_MODEL_REVISION,
+                        cfg_dict: Config = None,
+                        device: str = None,
+                        **kwargs):
+        """Instantiate a model from local directory or remote model repo. Note
+        that when loading from remote, the model revision can be specified.
+
+        Args:
+            model_name_or_path(str): A model dir or a model id to be loaded
+            revision(str, `optional`): The revision used when the model_name_or_path is
+                a model id of the remote hub. default `master`.
+            cfg_dict(Config, `optional`): An optional model config. If provided, it will replace
+                the config read out of the `model_name_or_path`
+            device(str, `optional`): The device to load the model.
+            **kwargs:
+                task(str, `optional`): The `Tasks` enumeration value to replace the task value
+                read out of config in the `model_name_or_path`. This is useful when the model to be loaded is not
+                equal to the model saved.
+                For example, load a `backbone` into a `text-classification` model.
+                Other kwargs will be directly fed into the `model` key, to replace the default configs.
+        Returns:
+            A model instance.
+
+        Examples:
+            >>> from modelscope.models import Model
+            >>> Model.from_pretrained('damo/nlp_structbert_backbone_base_std', task='text-classification')
+        """
+        prefetched = kwargs.get('model_prefetched')
+        if prefetched is not None:
+            kwargs.pop('model_prefetched')
+        invoked_by = kwargs.get(Invoke.KEY)
+        if invoked_by is not None:
+            kwargs.pop(Invoke.KEY)
+        else:
+            invoked_by = Invoke.PRETRAINED
+
+        if osp.exists(model_name_or_path):
+            local_model_dir = model_name_or_path
+        else:
+            if prefetched is True:
+                raise RuntimeError(
+                    'Expecting model is pre-fetched locally, but is not found.'
+                )
+
+            invoked_by = '%s/%s' % (Invoke.KEY, invoked_by)
+            local_model_dir = snapshot_download(model_name_or_path,
+                                                revision,
+                                                user_agent=invoked_by)
+        logger.info(f'initialize model from {local_model_dir}')
+        if cfg_dict is not None:
+            cfg = cfg_dict
+        else:
+            cfg = Config.from_file(
+                osp.join(local_model_dir, ModelFile.CONFIGURATION))
+        task_name = cfg.task
+        if 'task' in kwargs:
+            task_name = kwargs.pop('task')
+        model_cfg = cfg.model
+        if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'):
+            model_cfg.type = model_cfg.model_type
+        model_cfg.model_dir = local_model_dir
+        for k, v in kwargs.items():
+            model_cfg[k] = v
+        if device is not None:
+            model_cfg.device = device
+            model = build_model(model_cfg, task_name=task_name)
+        else:
+            model = build_model(model_cfg, task_name=task_name)
+
+        # dynamically add pipeline info to model for pipeline inference
+        if hasattr(cfg, 'pipeline'):
+            model.pipeline = cfg.pipeline
+
+        if not hasattr(model, 'cfg'):
+            model.cfg = cfg
+
+        model_cfg.pop('model_dir', None)
+        model.name = model_name_or_path
+        model.model_dir = local_model_dir
+        return model
+
+    def save_pretrained(self,
+                        target_folder: Union[str, os.PathLike],
+                        save_checkpoint_names: Union[str, List[str]] = None,
+                        config: Optional[dict] = None,
+                        **kwargs):
+        """save the pretrained model, its configuration and other related files to a directory,
+            so that it can be re-loaded
+
+        Args:
+            target_folder (Union[str, os.PathLike]):
+            Directory to which to save. Will be created if it doesn't exist.
+
+            save_checkpoint_names (Union[str, List[str]]):
+            The checkpoint names to be saved in the target_folder
+
+            config (Optional[dict], optional):
+            The config for the configuration.json, might not be identical with model.config
+        """
+        raise NotImplementedError(
+            'save_pretrained method need to be implemented by the subclass.')
diff --git a/modelscope/models/base/base_torch_head.py b/modelscope/models/base/base_torch_head.py
new file mode 100644
index 0000000..f8fb94f
--- /dev/null
+++ b/modelscope/models/base/base_torch_head.py
@@ -0,0 +1,24 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import Any, Dict
+
+import torch
+
+from modelscope.models.base.base_head import Head
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+class TorchHead(Head, torch.nn.Module):
+    """ Base head interface for pytorch
+
+    """
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        torch.nn.Module.__init__(self)
+
+    def forward(self, *args, **kwargs) -> Dict[str, Any]:
+        raise NotImplementedError
+
+    def compute_loss(self, *args, **kwargs) -> Dict[str, Any]:
+        raise NotImplementedError
diff --git a/modelscope/models/base/base_torch_model.py b/modelscope/models/base/base_torch_model.py
new file mode 100644
index 0000000..48c6de8
--- /dev/null
+++ b/modelscope/models/base/base_torch_model.py
@@ -0,0 +1,128 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+from copy import deepcopy
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import torch
+from torch import nn
+from torch.nn.parallel import DataParallel, DistributedDataParallel
+
+from modelscope.utils.checkpoint import (save_checkpoint, save_configuration,
+                                         save_pretrained)
+from modelscope.utils.file_utils import func_receive_dict_inputs
+from modelscope.utils.logger import get_logger
+
+from .base_model import Model
+
+logger = get_logger()
+
+
+class TorchModel(Model, torch.nn.Module):
+    """ Base model interface for pytorch
+
+    """
+    def __init__(self, model_dir=None, *args, **kwargs):
+        super().__init__(model_dir, *args, **kwargs)
+        torch.nn.Module.__init__(self)
+
+    def __call__(self, *args, **kwargs) -> Dict[str, Any]:
+        # Adapting a model with only one dict arg, and the arg name must be input or inputs
+        if func_receive_dict_inputs(self.forward):
+            return self.postprocess(self.forward(args[0], **kwargs))
+        else:
+            return self.postprocess(self.forward(*args, **kwargs))
+
+    def _load_pretrained(self,
+                         net,
+                         load_path,
+                         strict=True,
+                         param_key='params'):
+        if isinstance(net, (DataParallel, DistributedDataParallel)):
+            net = net.module
+        load_net = torch.load(load_path,
+                              map_location=lambda storage, loc: storage)
+        if param_key is not None:
+            if param_key not in load_net and 'params' in load_net:
+                param_key = 'params'
+                logger.info(
+                    f'Loading: {param_key} does not exist, use params.')
+            if param_key in load_net:
+                load_net = load_net[param_key]
+        logger.info(
+            f'Loading {net.__class__.__name__} model from {load_path}, with param key: [{param_key}].'
+        )
+        # remove unnecessary 'module.'
+        for k, v in deepcopy(load_net).items():
+            if k.startswith('module.'):
+                load_net[k[7:]] = v
+                load_net.pop(k)
+        net.load_state_dict(load_net, strict=strict)
+        logger.info('load model done.')
+        return net
+
+    def forward(self, *args, **kwargs) -> Dict[str, Any]:
+        raise NotImplementedError
+
+    def post_init(self):
+        """
+        A method executed at the end of each model initialization, to execute code that needs the model's
+        modules properly initialized (such as weight initialization).
+        """
+        self.init_weights()
+
+    def init_weights(self):
+        # Initialize weights
+        self.apply(self._init_weights)
+
+    def _init_weights(self, module):
+        """Initialize the weights"""
+        if isinstance(module, nn.Linear):
+            # Slightly different from the TF version which uses truncated_normal for initialization
+            # cf https://github.com/pytorch/pytorch/pull/5617
+            module.weight.data.normal_(mean=0.0, std=0.02)
+            if module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.Embedding):
+            module.weight.data.normal_(mean=0.0, std=0.02)
+            if module.padding_idx is not None:
+                module.weight.data[module.padding_idx].zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+
+    def save_pretrained(self,
+                        target_folder: Union[str, os.PathLike],
+                        save_checkpoint_names: Union[str, List[str]] = None,
+                        save_function: Callable = save_checkpoint,
+                        config: Optional[dict] = None,
+                        save_config_function: Callable = save_configuration,
+                        **kwargs):
+        """save the pretrained model, its configuration and other related files to a directory,
+            so that it can be re-loaded
+
+        Args:
+            target_folder (Union[str, os.PathLike]):
+            Directory to which to save. Will be created if it doesn't exist.
+
+            save_checkpoint_names (Union[str, List[str]]):
+            The checkpoint names to be saved in the target_folder
+
+            save_function (Callable, optional):
+            The function to use to save the state dictionary.
+
+            config (Optional[dict], optional):
+            The config for the configuration.json, might not be identical with model.config
+
+            save_config_function (Callble, optional):
+            The function to use to save the configuration.
+
+        """
+        if config is None and hasattr(self, 'cfg'):
+            config = self.cfg
+
+        save_pretrained(self, target_folder, save_checkpoint_names,
+                        save_function, **kwargs)
+
+        if config is not None:
+            save_config_function(target_folder, config)
diff --git a/modelscope/models/builder.py b/modelscope/models/builder.py
new file mode 100644
index 0000000..da49472
--- /dev/null
+++ b/modelscope/models/builder.py
@@ -0,0 +1,98 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from modelscope.metainfo import Models
+from modelscope.utils.config import ConfigDict
+from modelscope.utils.constant import Tasks
+from modelscope.utils.import_utils import INDEX_KEY, LazyImportModule
+from modelscope.utils.logger import get_logger
+from modelscope.utils.registry import Registry, build_from_cfg
+from modelscope.utils.task_utils import get_task_by_subtask_name
+
+logger = get_logger()
+
+MODELS = Registry('models')
+BACKBONES = MODELS
+HEADS = Registry('heads')
+
+modules = LazyImportModule.AST_INDEX[INDEX_KEY]
+for module_index in list(modules.keys()):
+    if module_index[1] == Tasks.backbone and module_index[0] == 'BACKBONES':
+        modules[(MODELS.name.upper(), module_index[1],
+                 module_index[2])] = modules[module_index]
+
+
+def build_model(cfg: ConfigDict,
+                task_name: str = None,
+                default_args: dict = None):
+    """ build model given model config dict
+
+    Args:
+        cfg (:obj:`ConfigDict`): config dict for model object.
+        task_name (str, optional):  task name, refer to
+            :obj:`Tasks` for more details
+        default_args (dict, optional): Default initialization arguments.
+    """
+    try:
+        model = build_from_cfg(cfg,
+                               MODELS,
+                               group_key=task_name,
+                               default_args=default_args)
+    except KeyError as e:
+        # Handle subtask with a backbone model that hasn't been registered
+        # All the subtask with a parent task should have a task model, otherwise it is not a
+        # valid subtask
+        parent_task, task_model_type = get_task_by_subtask_name(task_name)
+        if task_model_type is None:
+            raise KeyError(e)
+        cfg['type'] = task_model_type
+        model = build_from_cfg(cfg,
+                               MODELS,
+                               group_key=parent_task,
+                               default_args=default_args)
+    return model
+
+
+def build_backbone(cfg: ConfigDict, default_args: dict = None):
+    """ build backbone given backbone config dict
+
+    Args:
+        cfg (:obj:`ConfigDict`): config dict for backbone object.
+        default_args (dict, optional): Default initialization arguments.
+    """
+    try:
+        model_dir = cfg.pop('model_dir', None)
+        model = build_from_cfg(cfg,
+                               BACKBONES,
+                               group_key=Tasks.backbone,
+                               default_args=default_args)
+    except KeyError:
+        # Handle backbone that is not in the register group by using transformers AutoModel.
+        # AutoModel are mostly using in NLP and part of Multi-Modal, while the number of backbone in CV、Audio and MM
+        # is limited, thus could be added and registered in Modelscope directly
+        logger.WARNING(
+            f'The backbone {cfg.type} is not registered in modelscope, try to import the backbone from hf transformers.'
+        )
+        cfg['type'] = Models.transformers
+        if model_dir is not None:
+            cfg['model_dir'] = model_dir
+        model = build_from_cfg(cfg,
+                               BACKBONES,
+                               group_key=Tasks.backbone,
+                               default_args=default_args)
+    return model
+
+
+def build_head(cfg: ConfigDict,
+               task_name: str = None,
+               default_args: dict = None):
+    """ build head given config dict
+
+    Args:
+        cfg (:obj:`ConfigDict`): config dict for head object.
+        task_name (str, optional):  task name, refer to
+            :obj:`Tasks` for more details
+        default_args (dict, optional): Default initialization arguments.
+    """
+    return build_from_cfg(cfg,
+                          HEADS,
+                          group_key=task_name,
+                          default_args=default_args)
diff --git a/modelscope/models/cv/face_2d_keypoints/__init__.py b/modelscope/models/cv/face_2d_keypoints/__init__.py
new file mode 100644
index 0000000..636ba0f
--- /dev/null
+++ b/modelscope/models/cv/face_2d_keypoints/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .face_2d_keypoints_align import Face2DKeypoints
+
+else:
+    _import_structure = {'face_2d_keypoints_align': ['Face2DKeypoints']}
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/models/cv/face_2d_keypoints/face_2d_keypoints_align.py b/modelscope/models/cv/face_2d_keypoints/face_2d_keypoints_align.py
new file mode 100644
index 0000000..49f0c15
--- /dev/null
+++ b/modelscope/models/cv/face_2d_keypoints/face_2d_keypoints_align.py
@@ -0,0 +1,15 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from easycv.models.face.face_keypoint import FaceKeypoint
+
+from modelscope.metainfo import Models
+from modelscope.models.builder import MODELS
+from modelscope.models.cv.easycv_base import EasyCVBaseModel
+from modelscope.utils.constant import Tasks
+
+
+@MODELS.register_module(group_key=Tasks.face_2d_keypoints,
+                        module_name=Models.face_2d_keypoints)
+class Face2DKeypoints(EasyCVBaseModel, FaceKeypoint):
+    def __init__(self, model_dir=None, *args, **kwargs):
+        EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
+        FaceKeypoint.__init__(self, *args, **kwargs)
diff --git a/modelscope/models/cv/face_attribute_recognition/__init__.py b/modelscope/models/cv/face_attribute_recognition/__init__.py
new file mode 100644
index 0000000..8d1592e
--- /dev/null
+++ b/modelscope/models/cv/face_attribute_recognition/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .fair_face import FaceAttributeRecognition
+
+else:
+    _import_structure = {'fair_face': ['FaceAttributeRecognition']}
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/models/cv/face_attribute_recognition/fair_face/__init__.py b/modelscope/models/cv/face_attribute_recognition/fair_face/__init__.py
new file mode 100644
index 0000000..a5f2be3
--- /dev/null
+++ b/modelscope/models/cv/face_attribute_recognition/fair_face/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .face_attribute_recognition import FaceAttributeRecognition
diff --git a/modelscope/models/cv/face_attribute_recognition/fair_face/face_attribute_recognition.py b/modelscope/models/cv/face_attribute_recognition/fair_face/face_attribute_recognition.py
new file mode 100644
index 0000000..a4bf147
--- /dev/null
+++ b/modelscope/models/cv/face_attribute_recognition/fair_face/face_attribute_recognition.py
@@ -0,0 +1,78 @@
+# The implementation is based on FairFace, available at
+# https://github.com/dchen236/FairFace
+import os
+
+import cv2
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+from PIL import Image
+from torch.autograd import Variable
+from torchvision import datasets, models, transforms
+
+from modelscope.metainfo import Models
+from modelscope.models.base import Tensor, TorchModel
+from modelscope.models.builder import MODELS
+from modelscope.utils.constant import ModelFile, Tasks
+
+
+@MODELS.register_module(Tasks.face_attribute_recognition,
+                        module_name=Models.fairface)
+class FaceAttributeRecognition(TorchModel):
+    def __init__(self, model_path, device='cuda'):
+        super().__init__(model_path)
+        cudnn.benchmark = True
+        self.model_path = model_path
+        self.device = device
+        self.cfg_path = model_path.replace(ModelFile.TORCH_MODEL_FILE,
+                                           ModelFile.CONFIGURATION)
+        fair_face = torchvision.models.resnet34(pretrained=False)
+        fair_face.fc = nn.Linear(fair_face.fc.in_features, 18)
+        self.net = fair_face
+        self.load_model()
+        self.net = self.net.to(device)
+        self.trans = transforms.Compose([
+            transforms.ToPILImage(),
+            transforms.Resize((224, 224)),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                 std=[0.229, 0.224, 0.225])
+        ])
+
+    def load_model(self, load_to_cpu=False):
+        pretrained_dict = torch.load(self.model_path,
+                                     map_location=torch.device('cpu'))
+        self.net.load_state_dict(pretrained_dict, strict=True)
+        self.net.eval()
+
+    def forward(self, img):
+        """ FariFace model forward process.
+
+        Args:
+            img: [h, w, c]
+
+        Return:
+            list of attribute result: [gender_score, age_score]
+        """
+        img = cv2.cvtColor(img.cpu().numpy(), cv2.COLOR_BGR2RGB)
+        img = img.astype(np.uint8)
+
+        inputs = self.trans(img)
+
+        c, h, w = inputs.shape
+
+        inputs = inputs.view(-1, c, h, w)
+        inputs = inputs.to(self.device)
+        inputs = Variable(inputs, volatile=True)
+        outputs = self.net(inputs)[0]
+
+        gender_outputs = outputs[7:9]
+        age_outputs = outputs[9:18]
+
+        gender_score = F.softmax(gender_outputs).detach().cpu().tolist()
+        age_score = F.softmax(age_outputs).detach().cpu().tolist()
+
+        return [gender_score, age_score]
diff --git a/modelscope/models/cv/face_detection/__init__.py b/modelscope/models/cv/face_detection/__init__.py
new file mode 100644
index 0000000..02e417c
--- /dev/null
+++ b/modelscope/models/cv/face_detection/__init__.py
@@ -0,0 +1,32 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .mogface import MogFaceDetector
+    from .mtcnn import MtcnnFaceDetector
+    from .retinaface import RetinaFaceDetection
+    from .ulfd_slim import UlfdFaceDetector
+    from .scrfd import ScrfdDetect
+    from .scrfd import TinyMogDetect
+    from .scrfd import SCRFDPreprocessor
+    from .scrfd import DamoFdDetect
+else:
+    _import_structure = {
+        'ulfd_slim': ['UlfdFaceDetector'],
+        'retinaface': ['RetinaFaceDetection'],
+        'mtcnn': ['MtcnnFaceDetector'],
+        'mogface': ['MogFaceDetector'],
+        'scrfd': ['TinyMogDetect', 'ScrfdDetect', 'SCRFDPreprocessor', 'DamoFdDetect'],
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/models/cv/face_detection/mogface/__init__.py b/modelscope/models/cv/face_detection/mogface/__init__.py
new file mode 100644
index 0000000..a58268d
--- /dev/null
+++ b/modelscope/models/cv/face_detection/mogface/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .models.detectors import MogFaceDetector
diff --git a/modelscope/models/cv/face_detection/mogface/models/__init__.py b/modelscope/models/cv/face_detection/mogface/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/models/cv/face_detection/mogface/models/detectors.py b/modelscope/models/cv/face_detection/mogface/models/detectors.py
new file mode 100644
index 0000000..960a846
--- /dev/null
+++ b/modelscope/models/cv/face_detection/mogface/models/detectors.py
@@ -0,0 +1,98 @@
+# The implementation is based on MogFace, available at
+# https://github.com/damo-cv/MogFace
+import os
+
+import cv2
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+
+from modelscope.metainfo import Models
+from modelscope.models.base import TorchModel
+from modelscope.models.builder import MODELS
+from modelscope.utils.constant import Tasks
+
+from .mogface import MogFace
+from .utils import MogPriorBox, mogdecode, py_cpu_nms
+
+
+@MODELS.register_module(Tasks.face_detection, module_name=Models.mogface)
+class MogFaceDetector(TorchModel):
+    def __init__(self, model_path, device='cuda', **kwargs):
+        super().__init__(model_path)
+        cudnn.benchmark = True
+        self.model_path = model_path
+        self.device = device
+        self.net = MogFace()
+        self.load_model()
+        self.net = self.net.to(device)
+        self.conf_th = kwargs.get('conf_th', -1.82)
+        self.nms_th = kwargs.get('nms_th', 0.4)
+
+        self.mean = np.array([[104, 117, 123]])
+
+    def load_model(self, load_to_cpu=False):
+        pretrained_dict = torch.load(self.model_path,
+                                     map_location=torch.device('cpu'))
+        self.net.load_state_dict(pretrained_dict, strict=False)
+        self.net.eval()
+
+    def forward(self, input):
+
+        img_raw = input['img']
+        img = np.array(img_raw.cpu().detach())
+        img = img[:, :, ::-1]
+
+        im_height, im_width = img.shape[:2]
+        ss = 1.0
+        # tricky
+        if max(im_height, im_width) > 1500:
+            ss = 1000.0 / max(im_height, im_width)
+            img = cv2.resize(img, (0, 0), fx=ss, fy=ss)
+            im_height, im_width = img.shape[:2]
+
+        scale = torch.Tensor(
+            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
+        img -= np.array([[103.53, 116.28, 123.675]])
+        img /= np.array([[57.375, 57.120003, 58.395]])
+        img /= 255
+        img = img[:, :, ::-1].copy()
+        img = img.transpose(2, 0, 1)
+        img = torch.from_numpy(img).unsqueeze(0)
+        img = img.to(self.device)
+        scale = scale.to(self.device)
+
+        conf, loc = self.net(img)  # forward pass
+
+        top_k = 5000
+        keep_top_k = 750
+
+        priorbox = MogPriorBox(scale_list=[0.68])
+        priors = priorbox(im_height, im_width)
+        priors = torch.tensor(priors).to(self.device)
+        prior_data = priors.data
+
+        boxes = mogdecode(loc.data.squeeze(0), prior_data)
+        boxes = boxes.cpu().numpy()
+        scores = conf.squeeze(0).data.cpu().numpy()[:, 0]
+
+        # ignore low scores
+        inds = np.where(scores > self.conf_th)[0]
+        boxes = boxes[inds]
+        scores = scores[inds]
+
+        # keep top-K before NMS
+        order = scores.argsort()[::-1][:top_k]
+        boxes = boxes[order]
+        scores = scores[order]
+
+        # do NMS
+        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
+                                                                copy=False)
+        keep = py_cpu_nms(dets, self.nms_th)
+        dets = dets[keep, :]
+
+        # keep top-K faster NMS
+        dets = dets[:keep_top_k, :]
+
+        return dets / ss
diff --git a/modelscope/models/cv/face_detection/mogface/models/mogface.py b/modelscope/models/cv/face_detection/mogface/models/mogface.py
new file mode 100644
index 0000000..8a0b3f3
--- /dev/null
+++ b/modelscope/models/cv/face_detection/mogface/models/mogface.py
@@ -0,0 +1,132 @@
+# --------------------------------------------------------
+# The implementation is also open-sourced by the authors as Yang Liu, and is available publicly on
+# https://github.com/damo-cv/MogFace
+# --------------------------------------------------------
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .mogprednet import MogPredNet
+from .resnet import ResNet
+
+
+class MogFace(nn.Module):
+    def __init__(self):
+        super(MogFace, self).__init__()
+        self.backbone = ResNet(depth=101)
+        self.fpn = LFPN()
+        self.pred_net = MogPredNet()
+
+    def forward(self, x):
+        feature_list = self.backbone(x)
+        fpn_list = self.fpn(feature_list)
+        pyramid_feature_list = fpn_list[0]
+        conf, loc = self.pred_net(pyramid_feature_list)
+        return conf, loc
+
+
+class FeatureFusion(nn.Module):
+    def __init__(self, lat_ch=256, **channels):
+        super(FeatureFusion, self).__init__()
+        self.main_conv = nn.Conv2d(channels['main'], lat_ch, kernel_size=1)
+
+    def forward(self, up, main):
+        main = self.main_conv(main)
+        _, _, H, W = main.size()
+        res = F.upsample(up, scale_factor=2, mode='bilinear')
+        if res.size(2) != main.size(2) or res.size(3) != main.size(3):
+            res = res[:, :, 0:H, 0:W]
+        res = res + main
+        return res
+
+
+class LFPN(nn.Module):
+    def __init__(self,
+                 c2_out_ch=256,
+                 c3_out_ch=512,
+                 c4_out_ch=1024,
+                 c5_out_ch=2048,
+                 c6_mid_ch=512,
+                 c6_out_ch=512,
+                 c7_mid_ch=128,
+                 c7_out_ch=256,
+                 out_dsfd_ft=True):
+        super(LFPN, self).__init__()
+        self.out_dsfd_ft = out_dsfd_ft
+        if self.out_dsfd_ft:
+            dsfd_module = []
+            dsfd_module.append(nn.Conv2d(256, 256, kernel_size=3, padding=1))
+            dsfd_module.append(nn.Conv2d(512, 256, kernel_size=3, padding=1))
+            dsfd_module.append(nn.Conv2d(1024, 256, kernel_size=3, padding=1))
+            dsfd_module.append(nn.Conv2d(2048, 256, kernel_size=3, padding=1))
+            dsfd_module.append(nn.Conv2d(256, 256, kernel_size=3, padding=1))
+            dsfd_module.append(nn.Conv2d(256, 256, kernel_size=3, padding=1))
+            self.dsfd_modules = nn.ModuleList(dsfd_module)
+
+        c6_input_ch = c5_out_ch
+        self.c6 = nn.Sequential(*[
+            nn.Conv2d(
+                c6_input_ch,
+                c6_mid_ch,
+                kernel_size=1,
+            ),
+            nn.BatchNorm2d(c6_mid_ch),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(c6_mid_ch, c6_out_ch, kernel_size=3, padding=1,
+                      stride=2),
+            nn.BatchNorm2d(c6_out_ch),
+            nn.ReLU(inplace=True)
+        ])
+        self.c7 = nn.Sequential(*[
+            nn.Conv2d(
+                c6_out_ch,
+                c7_mid_ch,
+                kernel_size=1,
+            ),
+            nn.BatchNorm2d(c7_mid_ch),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(c7_mid_ch, c7_out_ch, kernel_size=3, padding=1,
+                      stride=2),
+            nn.BatchNorm2d(c7_out_ch),
+            nn.ReLU(inplace=True)
+        ])
+
+        self.p2_lat = nn.Conv2d(256, 256, kernel_size=3, padding=1)
+        self.p3_lat = nn.Conv2d(256, 256, kernel_size=3, padding=1)
+        self.p4_lat = nn.Conv2d(256, 256, kernel_size=3, padding=1)
+
+        self.c5_lat = nn.Conv2d(c6_input_ch, 256, kernel_size=3, padding=1)
+        self.c6_lat = nn.Conv2d(c6_out_ch, 256, kernel_size=3, padding=1)
+        self.c7_lat = nn.Conv2d(c7_out_ch, 256, kernel_size=3, padding=1)
+
+        self.ff_c5_c4 = FeatureFusion(main=c4_out_ch)
+        self.ff_c4_c3 = FeatureFusion(main=c3_out_ch)
+        self.ff_c3_c2 = FeatureFusion(main=c2_out_ch)
+
+    def forward(self, feature_list):
+        c2, c3, c4, c5 = feature_list
+        c6 = self.c6(c5)
+        c7 = self.c7(c6)
+
+        c5 = self.c5_lat(c5)
+        c6 = self.c6_lat(c6)
+        c7 = self.c7_lat(c7)
+
+        if self.out_dsfd_ft:
+            dsfd_fts = []
+            dsfd_fts.append(self.dsfd_modules[0](c2))
+            dsfd_fts.append(self.dsfd_modules[1](c3))
+            dsfd_fts.append(self.dsfd_modules[2](c4))
+            dsfd_fts.append(self.dsfd_modules[3](feature_list[-1]))
+            dsfd_fts.append(self.dsfd_modules[4](c6))
+            dsfd_fts.append(self.dsfd_modules[5](c7))
+
+        p4 = self.ff_c5_c4(c5, c4)
+        p3 = self.ff_c4_c3(p4, c3)
+        p2 = self.ff_c3_c2(p3, c2)
+
+        p2 = self.p2_lat(p2)
+        p3 = self.p3_lat(p3)
+        p4 = self.p4_lat(p4)
+
+        if self.out_dsfd_ft:
+            return ([p2, p3, p4, c5, c6, c7], dsfd_fts)
diff --git a/modelscope/models/cv/face_detection/mogface/models/mogprednet.py b/modelscope/models/cv/face_detection/mogface/models/mogprednet.py
new file mode 100644
index 0000000..5da62d9
--- /dev/null
+++ b/modelscope/models/cv/face_detection/mogface/models/mogprednet.py
@@ -0,0 +1,168 @@
+# --------------------------------------------------------
+# The implementation is also open-sourced by the authors as Yang Liu, and is available publicly on
+# https://github.com/damo-cv/MogFace
+# --------------------------------------------------------
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class conv_bn(nn.Module):
+    """docstring for conv"""
+    def __init__(self, in_plane, out_plane, kernel_size, stride, padding):
+        super(conv_bn, self).__init__()
+        self.conv1 = nn.Conv2d(in_plane,
+                               out_plane,
+                               kernel_size=kernel_size,
+                               stride=stride,
+                               padding=padding)
+        self.bn1 = nn.BatchNorm2d(out_plane)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        return self.bn1(x)
+
+
+class SSHContext(nn.Module):
+    def __init__(self, channels, Xchannels=256):
+        super(SSHContext, self).__init__()
+
+        self.conv1 = nn.Conv2d(channels,
+                               Xchannels,
+                               kernel_size=3,
+                               stride=1,
+                               padding=1)
+        self.conv2 = nn.Conv2d(channels,
+                               Xchannels // 2,
+                               kernel_size=3,
+                               dilation=2,
+                               stride=1,
+                               padding=2)
+        self.conv2_1 = nn.Conv2d(Xchannels // 2,
+                                 Xchannels // 2,
+                                 kernel_size=3,
+                                 stride=1,
+                                 padding=1)
+        self.conv2_2 = nn.Conv2d(Xchannels // 2,
+                                 Xchannels // 2,
+                                 kernel_size=3,
+                                 dilation=2,
+                                 stride=1,
+                                 padding=2)
+        self.conv2_2_1 = nn.Conv2d(Xchannels // 2,
+                                   Xchannels // 2,
+                                   kernel_size=3,
+                                   stride=1,
+                                   padding=1)
+
+    def forward(self, x):
+        x1 = F.relu(self.conv1(x), inplace=True)
+        x2 = F.relu(self.conv2(x), inplace=True)
+        x2_1 = F.relu(self.conv2_1(x2), inplace=True)
+        x2_2 = F.relu(self.conv2_2(x2), inplace=True)
+        x2_2 = F.relu(self.conv2_2_1(x2_2), inplace=True)
+
+        return torch.cat([x1, x2_1, x2_2], 1)
+
+
+class DeepHead(nn.Module):
+    def __init__(self,
+                 in_channel=256,
+                 out_channel=256,
+                 use_gn=False,
+                 num_conv=4):
+        super(DeepHead, self).__init__()
+        self.use_gn = use_gn
+        self.num_conv = num_conv
+        self.conv1 = nn.Conv2d(in_channel, out_channel, 3, 1, 1)
+        self.conv2 = nn.Conv2d(out_channel, out_channel, 3, 1, 1)
+        self.conv3 = nn.Conv2d(out_channel, out_channel, 3, 1, 1)
+        self.conv4 = nn.Conv2d(out_channel, out_channel, 3, 1, 1)
+        if self.use_gn:
+            self.gn1 = nn.GroupNorm(16, out_channel)
+            self.gn2 = nn.GroupNorm(16, out_channel)
+            self.gn3 = nn.GroupNorm(16, out_channel)
+            self.gn4 = nn.GroupNorm(16, out_channel)
+
+    def forward(self, x):
+        if self.use_gn:
+            x1 = F.relu(self.gn1(self.conv1(x)), inplace=True)
+            x2 = F.relu(self.gn2(self.conv1(x1)), inplace=True)
+            x3 = F.relu(self.gn3(self.conv1(x2)), inplace=True)
+            x4 = F.relu(self.gn4(self.conv1(x3)), inplace=True)
+        else:
+            x1 = F.relu(self.conv1(x), inplace=True)
+            x2 = F.relu(self.conv1(x1), inplace=True)
+            if self.num_conv == 2:
+                return x2
+            x3 = F.relu(self.conv1(x2), inplace=True)
+            x4 = F.relu(self.conv1(x3), inplace=True)
+
+        return x4
+
+
+class MogPredNet(nn.Module):
+    def __init__(self,
+                 num_anchor_per_pixel=1,
+                 num_classes=1,
+                 input_ch_list=[256, 256, 256, 256, 256, 256],
+                 use_deep_head=True,
+                 deep_head_with_gn=True,
+                 use_ssh=True,
+                 deep_head_ch=512):
+        super(MogPredNet, self).__init__()
+        self.num_classes = num_classes
+        self.use_deep_head = use_deep_head
+        self.deep_head_with_gn = deep_head_with_gn
+
+        self.use_ssh = use_ssh
+
+        self.deep_head_ch = deep_head_ch
+
+        if self.use_ssh:
+            self.conv_SSH = SSHContext(input_ch_list[0],
+                                       self.deep_head_ch // 2)
+
+        if self.use_deep_head:
+            if self.deep_head_with_gn:
+                self.deep_loc_head = DeepHead(self.deep_head_ch,
+                                              self.deep_head_ch,
+                                              use_gn=True)
+                self.deep_cls_head = DeepHead(self.deep_head_ch,
+                                              self.deep_head_ch,
+                                              use_gn=True)
+
+            self.pred_cls = nn.Conv2d(self.deep_head_ch,
+                                      1 * num_anchor_per_pixel, 3, 1, 1)
+            self.pred_loc = nn.Conv2d(self.deep_head_ch,
+                                      4 * num_anchor_per_pixel, 3, 1, 1)
+
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, pyramid_feature_list, dsfd_ft_list=None):
+        loc = []
+        conf = []
+
+        if self.use_deep_head:
+            for x in pyramid_feature_list:
+                if self.use_ssh:
+                    x = self.conv_SSH(x)
+                x_cls = self.deep_cls_head(x)
+                x_loc = self.deep_loc_head(x)
+
+                conf.append(
+                    self.pred_cls(x_cls).permute(0, 2, 3, 1).contiguous())
+                loc.append(
+                    self.pred_loc(x_loc).permute(0, 2, 3, 1).contiguous())
+
+        loc = torch.cat([o.view(o.size(0), -1, 4) for o in loc], 1)
+        conf = torch.cat(
+            [o.view(o.size(0), -1, self.num_classes) for o in conf], 1)
+        output = (
+            self.sigmoid(conf.view(conf.size(0), -1, self.num_classes)),
+            loc.view(loc.size(0), -1, 4),
+        )
+
+        return output
diff --git a/modelscope/models/cv/face_detection/mogface/models/resnet.py b/modelscope/models/cv/face_detection/mogface/models/resnet.py
new file mode 100644
index 0000000..fc7ef27
--- /dev/null
+++ b/modelscope/models/cv/face_detection/mogface/models/resnet.py
@@ -0,0 +1,194 @@
+# The implementation is modified from original resent implementaiton, which is
+#  also open-sourced by the authors as Yang Liu,
+#  and is available publicly on  https://github.com/damo-cv/MogFace
+
+import torch.nn as nn
+
+
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes,
+                     out_planes,
+                     kernel_size=3,
+                     stride=stride,
+                     padding=dilation,
+                     groups=groups,
+                     bias=False,
+                     dilation=dilation)
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes,
+                     out_planes,
+                     kernel_size=1,
+                     stride=stride,
+                     bias=False)
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 groups=1,
+                 base_width=64,
+                 dilation=1,
+                 norm_layer=None):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(nn.Module):
+    def __init__(self,
+                 depth=50,
+                 groups=1,
+                 width_per_group=64,
+                 replace_stride_with_dilation=None,
+                 norm_layer=None,
+                 inplanes=64,
+                 shrink_ch_ratio=1):
+        super(ResNet, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+
+        if depth == 50:
+            block = Bottleneck
+            layers = [3, 4, 6, 3]
+        elif depth == 101:
+            block = Bottleneck
+            layers = [3, 4, 23, 3]
+        elif depth == 152:
+            block = Bottleneck
+            layers = [3, 4, 36, 3]
+        elif depth == 18:
+            block = BasicBlock
+            layers = [2, 2, 2, 2]
+        else:
+            raise ValueError('only support depth in [18, 50, 101, 152]')
+
+        shrink_input_ch = int(inplanes * shrink_ch_ratio)
+        self.inplanes = int(inplanes * shrink_ch_ratio)
+        if shrink_ch_ratio == 0.125:
+            layers = [2, 3, 3, 3]
+
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError('replace_stride_with_dilation should be None '
+                             'or a 3-element tuple, got {}'.format(
+                                 replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(3,
+                               self.inplanes,
+                               kernel_size=7,
+                               stride=2,
+                               padding=3,
+                               bias=False)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, shrink_input_ch, layers[0])
+        self.layer2 = self._make_layer(block,
+                                       shrink_input_ch * 2,
+                                       layers[1],
+                                       stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block,
+                                       shrink_input_ch * 4,
+                                       layers[2],
+                                       stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.layer4 = self._make_layer(block,
+                                       shrink_input_ch * 8,
+                                       layers[3],
+                                       stride=2,
+                                       dilate=replace_stride_with_dilation[2])
+
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(
+            block(self.inplanes, planes, stride, downsample, self.groups,
+                  self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(
+                block(self.inplanes,
+                      planes,
+                      groups=self.groups,
+                      base_width=self.base_width,
+                      dilation=self.dilation,
+                      norm_layer=norm_layer))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        four_conv_layer = []
+        x = self.layer1(x)
+        four_conv_layer.append(x)
+        x = self.layer2(x)
+        four_conv_layer.append(x)
+        x = self.layer3(x)
+        four_conv_layer.append(x)
+        x = self.layer4(x)
+        four_conv_layer.append(x)
+
+        return four_conv_layer
diff --git a/modelscope/models/cv/face_detection/mogface/models/utils.py b/modelscope/models/cv/face_detection/mogface/models/utils.py
new file mode 100755
index 0000000..12fac6d
--- /dev/null
+++ b/modelscope/models/cv/face_detection/mogface/models/utils.py
@@ -0,0 +1,210 @@
+# Modified from https://github.com/biubug6/Pytorch_Retinaface
+
+import math
+from itertools import product as product
+from math import ceil
+
+import numpy as np
+import torch
+
+
+def transform_anchor(anchors):
+    """
+    from [x0, x1, y0, y1] to [c_x, cy, w, h]
+    x1 = x0 + w - 1
+    c_x = (x0 + x1) / 2 = (2x0 + w - 1) / 2 = x0 + (w - 1) / 2
+    """
+    return np.concatenate(((anchors[:, :2] + anchors[:, 2:]) / 2,
+                           anchors[:, 2:] - anchors[:, :2] + 1),
+                          axis=1)
+
+
+def normalize_anchor(anchors):
+    """
+    from  [c_x, cy, w, h] to [x0, x1, y0, y1]
+    """
+    item_1 = anchors[:, :2] - (anchors[:, 2:] - 1) / 2
+    item_2 = anchors[:, :2] + (anchors[:, 2:] - 1) / 2
+    return np.concatenate((item_1, item_2), axis=1)
+
+
+class MogPriorBox(object):
+    """
+    both for fpn and single layer, single layer need to test
+    return (np.array) [num_anchros, 4] [x0, y0, x1, y1]
+    """
+    def __init__(self,
+                 scale_list=[1.],
+                 aspect_ratio_list=[1.0],
+                 stride_list=[4, 8, 16, 32, 64, 128],
+                 anchor_size_list=[16, 32, 64, 128, 256, 512]):
+        self.scale_list = scale_list
+        self.aspect_ratio_list = aspect_ratio_list
+        self.stride_list = stride_list
+        self.anchor_size_list = anchor_size_list
+
+    def __call__(self, img_height, img_width):
+        final_anchor_list = []
+
+        for idx, stride in enumerate(self.stride_list):
+            anchor_list = []
+            cur_img_height = img_height
+            cur_img_width = img_width
+            tmp_stride = stride
+
+            while tmp_stride != 1:
+                tmp_stride = tmp_stride // 2
+                cur_img_height = (cur_img_height + 1) // 2
+                cur_img_width = (cur_img_width + 1) // 2
+
+            for i in range(cur_img_height):
+                for j in range(cur_img_width):
+                    for scale in self.scale_list:
+                        cx = (j + 0.5) * stride
+                        cy = (i + 0.5) * stride
+                        side_x = self.anchor_size_list[idx] * scale
+                        side_y = self.anchor_size_list[idx] * scale
+                        for ratio in self.aspect_ratio_list:
+                            anchor_list.append([
+                                cx, cy, side_x / math.sqrt(ratio),
+                                side_y * math.sqrt(ratio)
+                            ])
+
+            final_anchor_list.append(anchor_list)
+        final_anchor_arr = np.concatenate(final_anchor_list, axis=0)
+        normalized_anchor_arr = normalize_anchor(final_anchor_arr).astype(
+            'float32')
+        transformed_anchor = transform_anchor(normalized_anchor_arr)
+
+        return transformed_anchor
+
+
+class PriorBox(object):
+    def __init__(self, cfg, image_size=None, phase='train'):
+        super(PriorBox, self).__init__()
+        self.min_sizes = cfg['min_sizes']
+        self.steps = cfg['steps']
+        self.clip = cfg['clip']
+        self.image_size = image_size
+        self.feature_maps = [[
+            ceil(self.image_size[0] / step),
+            ceil(self.image_size[1] / step)
+        ] for step in self.steps]
+        self.name = 's'
+
+    def forward(self):
+        anchors = []
+        for k, f in enumerate(self.feature_maps):
+            min_sizes = self.min_sizes[k]
+            for i, j in product(range(f[0]), range(f[1])):
+                for min_size in min_sizes:
+                    s_kx = min_size / self.image_size[1]
+                    s_ky = min_size / self.image_size[0]
+                    dense_cx = [
+                        x * self.steps[k] / self.image_size[1]
+                        for x in [j + 0.5]
+                    ]
+                    dense_cy = [
+                        y * self.steps[k] / self.image_size[0]
+                        for y in [i + 0.5]
+                    ]
+                    for cy, cx in product(dense_cy, dense_cx):
+                        anchors += [cx, cy, s_kx, s_ky]
+
+        # back to torch land
+        output = torch.Tensor(anchors).view(-1, 4)
+        if self.clip:
+            output.clamp_(max=1, min=0)
+        return output
+
+
+def py_cpu_nms(dets, thresh):
+    """Pure Python NMS baseline."""
+    x1 = dets[:, 0]
+    y1 = dets[:, 1]
+    x2 = dets[:, 2]
+    y2 = dets[:, 3]
+    scores = dets[:, 4]
+
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+def mogdecode(loc, anchors):
+    """
+    loc: torch.Tensor
+    anchors: 2-d, torch.Tensor (cx, cy, w, h)
+    boxes: 2-d, torch.Tensor (x0, y0, x1, y1)
+    """
+
+    boxes = torch.cat((anchors[:, :2] + loc[:, :2] * anchors[:, 2:],
+                       anchors[:, 2:] * torch.exp(loc[:, 2:])), 1)
+
+    boxes[:, 0] -= (boxes[:, 2] - 1) / 2
+    boxes[:, 1] -= (boxes[:, 3] - 1) / 2
+    boxes[:, 2] += boxes[:, 0] - 1
+    boxes[:, 3] += boxes[:, 1] - 1
+
+    return boxes
+
+
+# Adapted from https://github.com/Hakuyume/chainer-ssd
+def decode(loc, priors, variances):
+    """Decode locations from predictions using priors to undo
+    the encoding we did for offset regression at train time.
+    Args:
+        loc (tensor): location predictions for loc layers,
+            Shape: [num_priors,4]
+        priors (tensor): Prior boxes in center-offset form.
+            Shape: [num_priors,4].
+        variances: (list[float]) Variances of priorboxes
+    Return:
+        decoded bounding box predictions
+    """
+
+    boxes = torch.cat(
+        (priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
+         priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
+    boxes[:, :2] -= boxes[:, 2:] / 2
+    boxes[:, 2:] += boxes[:, :2]
+    return boxes
+
+
+def decode_landm(pre, priors, variances):
+    """Decode landm from predictions using priors to undo
+    the encoding we did for offset regression at train time.
+    Args:
+        pre (tensor): landm predictions for loc layers,
+            Shape: [num_priors,10]
+        priors (tensor): Prior boxes in center-offset form.
+            Shape: [num_priors,4].
+        variances: (list[float]) Variances of priorboxes
+    Return:
+        decoded landm predictions
+    """
+    a = priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:]
+    b = priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:]
+    c = priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:]
+    d = priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:]
+    e = priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]
+    landms = torch.cat((a, b, c, d, e), dim=1)
+    return landms
diff --git a/modelscope/models/cv/face_detection/mtcnn/__init__.py b/modelscope/models/cv/face_detection/mtcnn/__init__.py
new file mode 100644
index 0000000..9fddab9
--- /dev/null
+++ b/modelscope/models/cv/face_detection/mtcnn/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .models.detector import MtcnnFaceDetector
diff --git a/modelscope/models/cv/face_detection/mtcnn/models/__init__.py b/modelscope/models/cv/face_detection/mtcnn/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/models/cv/face_detection/mtcnn/models/box_utils.py b/modelscope/models/cv/face_detection/mtcnn/models/box_utils.py
new file mode 100644
index 0000000..f6a27b0
--- /dev/null
+++ b/modelscope/models/cv/face_detection/mtcnn/models/box_utils.py
@@ -0,0 +1,240 @@
+# The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch
+import numpy as np
+from PIL import Image
+
+
+def nms(boxes, overlap_threshold=0.5, mode='union'):
+    """Non-maximum suppression.
+
+    Arguments:
+        boxes: a float numpy array of shape [n, 5],
+            where each row is (xmin, ymin, xmax, ymax, score).
+        overlap_threshold: a float number.
+        mode: 'union' or 'min'.
+
+    Returns:
+        list with indices of the selected boxes
+    """
+
+    # if there are no boxes, return the empty list
+    if len(boxes) == 0:
+        return []
+
+    # list of picked indices
+    pick = []
+
+    # grab the coordinates of the bounding boxes
+    x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
+
+    area = (x2 - x1 + 1.0) * (y2 - y1 + 1.0)
+    ids = np.argsort(score)  # in increasing order
+
+    while len(ids) > 0:
+
+        # grab index of the largest value
+        last = len(ids) - 1
+        i = ids[last]
+        pick.append(i)
+
+        # compute intersections
+        # of the box with the largest score
+        # with the rest of boxes
+
+        # left top corner of intersection boxes
+        ix1 = np.maximum(x1[i], x1[ids[:last]])
+        iy1 = np.maximum(y1[i], y1[ids[:last]])
+
+        # right bottom corner of intersection boxes
+        ix2 = np.minimum(x2[i], x2[ids[:last]])
+        iy2 = np.minimum(y2[i], y2[ids[:last]])
+
+        # width and height of intersection boxes
+        w = np.maximum(0.0, ix2 - ix1 + 1.0)
+        h = np.maximum(0.0, iy2 - iy1 + 1.0)
+
+        # intersections' areas
+        inter = w * h
+        if mode == 'min':
+            overlap = inter / np.minimum(area[i], area[ids[:last]])
+        elif mode == 'union':
+            # intersection over union (IoU)
+            overlap = inter / (area[i] + area[ids[:last]] - inter)
+
+        # delete all boxes where overlap is too big
+        ids = np.delete(
+            ids,
+            np.concatenate([[last],
+                            np.where(overlap > overlap_threshold)[0]]))
+
+    return pick
+
+
+def convert_to_square(bboxes):
+    """Convert bounding boxes to a square form.
+
+    Arguments:
+        bboxes: a float numpy array of shape [n, 5].
+
+    Returns:
+        a float numpy array of shape [n, 5],
+            squared bounding boxes.
+    """
+
+    square_bboxes = np.zeros_like(bboxes)
+    x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
+    h = y2 - y1 + 1.0
+    w = x2 - x1 + 1.0
+    max_side = np.maximum(h, w)
+    square_bboxes[:, 0] = x1 + w * 0.5 - max_side * 0.5
+    square_bboxes[:, 1] = y1 + h * 0.5 - max_side * 0.5
+    square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0
+    square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0
+    return square_bboxes
+
+
+def calibrate_box(bboxes, offsets):
+    """Transform bounding boxes to be more like true bounding boxes.
+    'offsets' is one of the outputs of the nets.
+
+    Arguments:
+        bboxes: a float numpy array of shape [n, 5].
+        offsets: a float numpy array of shape [n, 4].
+
+    Returns:
+        a float numpy array of shape [n, 5].
+    """
+    x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
+    w = x2 - x1 + 1.0
+    h = y2 - y1 + 1.0
+    w = np.expand_dims(w, 1)
+    h = np.expand_dims(h, 1)
+
+    # this is what happening here:
+    # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)]
+    # x1_true = x1 + tx1*w
+    # y1_true = y1 + ty1*h
+    # x2_true = x2 + tx2*w
+    # y2_true = y2 + ty2*h
+    # below is just more compact form of this
+
+    # are offsets always such that
+    # x1 < x2 and y1 < y2 ?
+
+    translation = np.hstack([w, h, w, h]) * offsets
+    bboxes[:, 0:4] = bboxes[:, 0:4] + translation
+    return bboxes
+
+
+def get_image_boxes(bounding_boxes, img, size=24):
+    """Cut out boxes from the image.
+
+    Arguments:
+        bounding_boxes: a float numpy array of shape [n, 5].
+        img: an instance of PIL.Image.
+        size: an integer, size of cutouts.
+
+    Returns:
+        a float numpy array of shape [n, 3, size, size].
+    """
+
+    num_boxes = len(bounding_boxes)
+    width, height = img.size
+
+    [dy, edy, dx, edx, y, ey, x, ex, w,
+     h] = correct_bboxes(bounding_boxes, width, height)
+    img_boxes = np.zeros((num_boxes, 3, size, size), 'float32')
+
+    for i in range(num_boxes):
+        img_box = np.zeros((h[i], w[i], 3), 'uint8')
+
+        img_array = np.asarray(img, 'uint8')
+        img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] =\
+            img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :]
+
+        # resize
+        img_box = Image.fromarray(img_box)
+        img_box = img_box.resize((size, size), Image.BILINEAR)
+        img_box = np.asarray(img_box, 'float32')
+
+        img_boxes[i, :, :, :] = _preprocess(img_box)
+
+    return img_boxes
+
+
+def correct_bboxes(bboxes, width, height):
+    """Crop boxes that are too big and get coordinates
+    with respect to cutouts.
+
+    Arguments:
+        bboxes: a float numpy array of shape [n, 5],
+            where each row is (xmin, ymin, xmax, ymax, score).
+        width: a float number.
+        height: a float number.
+
+    Returns:
+        dy, dx, edy, edx: a int numpy arrays of shape [n],
+            coordinates of the boxes with respect to the cutouts.
+        y, x, ey, ex: a int numpy arrays of shape [n],
+            corrected ymin, xmin, ymax, xmax.
+        h, w: a int numpy arrays of shape [n],
+            just heights and widths of boxes.
+
+        in the following order:
+            [dy, edy, dx, edx, y, ey, x, ex, w, h].
+    """
+
+    x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
+    w, h = x2 - x1 + 1.0, y2 - y1 + 1.0
+    num_boxes = bboxes.shape[0]
+
+    # 'e' stands for end
+    # (x, y) -> (ex, ey)
+    x, y, ex, ey = x1, y1, x2, y2
+
+    # we need to cut out a box from the image.
+    # (x, y, ex, ey) are corrected coordinates of the box
+    # in the image.
+    # (dx, dy, edx, edy) are coordinates of the box in the cutout
+    # from the image.
+    dx, dy = np.zeros((num_boxes, )), np.zeros((num_boxes, ))
+    edx, edy = w.copy() - 1.0, h.copy() - 1.0
+
+    # if box's bottom right corner is too far right
+    ind = np.where(ex > width - 1.0)[0]
+    edx[ind] = w[ind] + width - 2.0 - ex[ind]
+    ex[ind] = width - 1.0
+
+    # if box's bottom right corner is too low
+    ind = np.where(ey > height - 1.0)[0]
+    edy[ind] = h[ind] + height - 2.0 - ey[ind]
+    ey[ind] = height - 1.0
+
+    # if box's top left corner is too far left
+    ind = np.where(x < 0.0)[0]
+    dx[ind] = 0.0 - x[ind]
+    x[ind] = 0.0
+
+    # if box's top left corner is too high
+    ind = np.where(y < 0.0)[0]
+    dy[ind] = 0.0 - y[ind]
+    y[ind] = 0.0
+
+    return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h]
+    return_list = [i.astype('int32') for i in return_list]
+
+    return return_list
+
+
+def _preprocess(img):
+    """Preprocessing step before feeding the network.
+
+    Arguments:
+        img: a float numpy array of shape [h, w, c].
+
+    Returns:
+        a float numpy array of shape [1, c, h, w].
+    """
+    img = img.transpose((2, 0, 1))
+    img = np.expand_dims(img, 0)
+    img = (img - 127.5) * 0.0078125
+    return img
diff --git a/modelscope/models/cv/face_detection/mtcnn/models/detector.py b/modelscope/models/cv/face_detection/mtcnn/models/detector.py
new file mode 100644
index 0000000..9fc0cf5
--- /dev/null
+++ b/modelscope/models/cv/face_detection/mtcnn/models/detector.py
@@ -0,0 +1,153 @@
+# The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch
+import os
+
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+from PIL import Image
+from torch.autograd import Variable
+
+from modelscope.metainfo import Models
+from modelscope.models.base import TorchModel
+from modelscope.models.builder import MODELS
+from modelscope.utils.constant import Tasks
+
+from .box_utils import calibrate_box, convert_to_square, get_image_boxes, nms
+from .first_stage import run_first_stage
+from .get_nets import ONet, PNet, RNet
+
+
+@MODELS.register_module(Tasks.face_detection, module_name=Models.mtcnn)
+class MtcnnFaceDetector(TorchModel):
+    def __init__(self, model_path, device='cuda', **kwargs):
+        super().__init__(model_path)
+        cudnn.benchmark = True
+        self.model_path = model_path
+        self.device = device
+
+        self.pnet = PNet(model_path=os.path.join(self.model_path, 'pnet.npy'))
+        self.rnet = RNet(model_path=os.path.join(self.model_path, 'rnet.npy'))
+        self.onet = ONet(model_path=os.path.join(self.model_path, 'onet.npy'))
+
+        self.pnet = self.pnet.to(device)
+        self.rnet = self.rnet.to(device)
+        self.onet = self.onet.to(device)
+
+        conf_th = kwargs.get('conf_th')
+        if conf_th is not None:
+            self.threshods = [conf_th] * 3
+        else:
+            self.threshods = [0.7, 0.8, 0.9]
+
+    def forward(self, input):
+        image = Image.fromarray(np.uint8(input['img'].cpu().numpy()))
+        pnet = self.pnet
+        rnet = self.rnet
+        onet = self.onet
+        onet.eval()
+
+        min_face_size = 20.0
+        thresholds = self.threshods
+        nms_thresholds = [0.7, 0.7, 0.7]
+
+        # BUILD AN IMAGE PYRAMID
+        width, height = image.size
+        min_length = min(height, width)
+
+        min_detection_size = 12
+        factor = 0.707  # sqrt(0.5)
+
+        # scales for scaling the image
+        scales = []
+
+        m = min_detection_size / min_face_size
+        min_length *= m
+
+        factor_count = 0
+        while min_length > min_detection_size:
+            scales.append(m * factor**factor_count)
+            min_length *= factor
+            factor_count += 1
+
+        # STAGE 1
+
+        # it will be returned
+        bounding_boxes = []
+
+        # run P-Net on different scales
+        for s in scales:
+            boxes = run_first_stage(image,
+                                    pnet,
+                                    scale=s,
+                                    threshold=thresholds[0],
+                                    device=self.device)
+            bounding_boxes.append(boxes)
+
+        # collect boxes (and offsets, and scores) from different scales
+        bounding_boxes = [i for i in bounding_boxes if i is not None]
+        bounding_boxes = np.vstack(bounding_boxes)
+
+        keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
+        bounding_boxes = bounding_boxes[keep]
+
+        # use offsets predicted by pnet to transform bounding boxes
+        bounding_boxes = calibrate_box(bounding_boxes[:, 0:5],
+                                       bounding_boxes[:, 5:])
+        # shape [n_boxes, 5]
+
+        bounding_boxes = convert_to_square(bounding_boxes)
+        bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+
+        # STAGE 2
+
+        img_boxes = get_image_boxes(bounding_boxes, image, size=24)
+        img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True)
+        output = rnet(img_boxes.to(self.device))
+        offsets = output[0].cpu().data.numpy()  # shape [n_boxes, 4]
+        probs = output[1].cpu().data.numpy()  # shape [n_boxes, 2]
+
+        keep = np.where(probs[:, 1] > thresholds[1])[0]
+        bounding_boxes = bounding_boxes[keep]
+        bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
+        offsets = offsets[keep]
+
+        keep = nms(bounding_boxes, nms_thresholds[1])
+        bounding_boxes = bounding_boxes[keep]
+        bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
+        bounding_boxes = convert_to_square(bounding_boxes)
+        bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+
+        # STAGE 3
+
+        img_boxes = get_image_boxes(bounding_boxes, image, size=48)
+        if len(img_boxes) == 0:
+            return [], []
+        img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True)
+        output = onet(img_boxes.to(self.device))
+        landmarks = output[0].cpu().data.numpy()  # shape [n_boxes, 10]
+        offsets = output[1].cpu().data.numpy()  # shape [n_boxes, 4]
+        probs = output[2].cpu().data.numpy()  # shape [n_boxes, 2]
+
+        keep = np.where(probs[:, 1] > thresholds[2])[0]
+        bounding_boxes = bounding_boxes[keep]
+        bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
+        offsets = offsets[keep]
+        landmarks = landmarks[keep]
+
+        # compute landmark points
+        width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
+        height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
+        xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
+        landmarks[:, 0:5] = np.expand_dims(
+            xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
+        landmarks[:, 5:10] = np.expand_dims(
+            ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]
+
+        bounding_boxes = calibrate_box(bounding_boxes, offsets)
+        keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
+        bounding_boxes = bounding_boxes[keep]
+        landmarks = landmarks[keep]
+        landmarks = landmarks.reshape(-1, 2, 5).transpose(
+            (0, 2, 1)).reshape(-1, 10)
+
+        return bounding_boxes, landmarks
diff --git a/modelscope/models/cv/face_detection/mtcnn/models/first_stage.py b/modelscope/models/cv/face_detection/mtcnn/models/first_stage.py
new file mode 100644
index 0000000..0b5328e
--- /dev/null
+++ b/modelscope/models/cv/face_detection/mtcnn/models/first_stage.py
@@ -0,0 +1,100 @@
+# The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch
+import math
+
+import numpy as np
+import torch
+from PIL import Image
+from torch.autograd import Variable
+
+from .box_utils import _preprocess, nms
+
+
+def run_first_stage(image, net, scale, threshold, device='cuda'):
+    """Run P-Net, generate bounding boxes, and do NMS.
+
+    Arguments:
+        image: an instance of PIL.Image.
+        net: an instance of pytorch's nn.Module, P-Net.
+        scale: a float number,
+            scale width and height of the image by this number.
+        threshold: a float number,
+            threshold on the probability of a face when generating
+            bounding boxes from predictions of the net.
+
+    Returns:
+        a float numpy array of shape [n_boxes, 9],
+            bounding boxes with scores and offsets (4 + 1 + 4).
+    """
+
+    # scale the image and convert it to a float array
+    width, height = image.size
+    sw, sh = math.ceil(width * scale), math.ceil(height * scale)
+    img = image.resize((sw, sh), Image.BILINEAR)
+    img = np.asarray(img, 'float32')
+
+    img = Variable(torch.FloatTensor(_preprocess(img)),
+                   volatile=True).to(device)
+    output = net(img)
+    probs = output[1].cpu().data.numpy()[0, 1, :, :]
+    offsets = output[0].cpu().data.numpy()
+    # probs: probability of a face at each sliding window
+    # offsets: transformations to true bounding boxes
+
+    boxes = _generate_bboxes(probs, offsets, scale, threshold)
+    if len(boxes) == 0:
+        return None
+
+    keep = nms(boxes[:, 0:5], overlap_threshold=0.5)
+    return boxes[keep]
+
+
+def _generate_bboxes(probs, offsets, scale, threshold):
+    """Generate bounding boxes at places
+    where there is probably a face.
+
+    Arguments:
+        probs: a float numpy array of shape [n, m].
+        offsets: a float numpy array of shape [1, 4, n, m].
+        scale: a float number,
+            width and height of the image were scaled by this number.
+        threshold: a float number.
+
+    Returns:
+        a float numpy array of shape [n_boxes, 9]
+    """
+
+    # applying P-Net is equivalent, in some sense, to
+    # moving 12x12 window with stride 2
+    stride = 2
+    cell_size = 12
+
+    # indices of boxes where there is probably a face
+    inds = np.where(probs > threshold)
+
+    if inds[0].size == 0:
+        return np.array([])
+
+    # transformations of bounding boxes
+    tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)]
+    # they are defined as:
+    # w = x2 - x1 + 1
+    # h = y2 - y1 + 1
+    # x1_true = x1 + tx1*w
+    # x2_true = x2 + tx2*w
+    # y1_true = y1 + ty1*h
+    # y2_true = y2 + ty2*h
+
+    offsets = np.array([tx1, ty1, tx2, ty2])
+    score = probs[inds[0], inds[1]]
+
+    # P-Net is applied to scaled images
+    # so we need to rescale bounding boxes back
+    bounding_boxes = np.vstack([
+        np.round((stride * inds[1] + 1.0) / scale),
+        np.round((stride * inds[0] + 1.0) / scale),
+        np.round((stride * inds[1] + 1.0 + cell_size) / scale),
+        np.round((stride * inds[0] + 1.0 + cell_size) / scale), score, offsets
+    ])
+    # why one is added?
+
+    return bounding_boxes.T
diff --git a/modelscope/models/cv/face_detection/mtcnn/models/get_nets.py b/modelscope/models/cv/face_detection/mtcnn/models/get_nets.py
new file mode 100644
index 0000000..f6443c2
--- /dev/null
+++ b/modelscope/models/cv/face_detection/mtcnn/models/get_nets.py
@@ -0,0 +1,156 @@
+# The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch
+from collections import OrderedDict
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Flatten(nn.Module):
+    def __init__(self):
+        super(Flatten, self).__init__()
+
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, c, h, w].
+        Returns:
+            a float tensor with shape [batch_size, c*h*w].
+        """
+
+        # without this pretrained model isn't working
+        x = x.transpose(3, 2).contiguous()
+
+        return x.view(x.size(0), -1)
+
+
+class PNet(nn.Module):
+    def __init__(self, model_path=None):
+
+        super(PNet, self).__init__()
+
+        # suppose we have input with size HxW, then
+        # after first layer: H - 2,
+        # after pool: ceil((H - 2)/2),
+        # after second conv: ceil((H - 2)/2) - 2,
+        # after last conv: ceil((H - 2)/2) - 4,
+        # and the same for W
+
+        self.features = nn.Sequential(
+            OrderedDict([('conv1', nn.Conv2d(3, 10, 3, 1)),
+                         ('prelu1', nn.PReLU(10)),
+                         ('pool1', nn.MaxPool2d(2, 2, ceil_mode=True)),
+                         ('conv2', nn.Conv2d(10, 16, 3, 1)),
+                         ('prelu2', nn.PReLU(16)),
+                         ('conv3', nn.Conv2d(16, 32, 3, 1)),
+                         ('prelu3', nn.PReLU(32))]))
+
+        self.conv4_1 = nn.Conv2d(32, 2, 1, 1)
+        self.conv4_2 = nn.Conv2d(32, 4, 1, 1)
+
+        weights = np.load(model_path, allow_pickle=True)[()]
+        for n, p in self.named_parameters():
+            p.data = torch.FloatTensor(weights[n])
+
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, 3, h, w].
+        Returns:
+            b: a float tensor with shape [batch_size, 4, h', w'].
+            a: a float tensor with shape [batch_size, 2, h', w'].
+        """
+        x = self.features(x)
+        a = self.conv4_1(x)
+        b = self.conv4_2(x)
+        a = F.softmax(a)
+        return b, a
+
+
+class RNet(nn.Module):
+    def __init__(self, model_path=None):
+
+        super(RNet, self).__init__()
+
+        self.features = nn.Sequential(
+            OrderedDict([('conv1', nn.Conv2d(3, 28, 3, 1)),
+                         ('prelu1', nn.PReLU(28)),
+                         ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
+                         ('conv2', nn.Conv2d(28, 48, 3, 1)),
+                         ('prelu2', nn.PReLU(48)),
+                         ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
+                         ('conv3', nn.Conv2d(48, 64, 2, 1)),
+                         ('prelu3', nn.PReLU(64)), ('flatten', Flatten()),
+                         ('conv4', nn.Linear(576, 128)),
+                         ('prelu4', nn.PReLU(128))]))
+
+        self.conv5_1 = nn.Linear(128, 2)
+        self.conv5_2 = nn.Linear(128, 4)
+
+        weights = np.load(model_path, allow_pickle=True)[()]
+        for n, p in self.named_parameters():
+            p.data = torch.FloatTensor(weights[n])
+
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, 3, h, w].
+        Returns:
+            b: a float tensor with shape [batch_size, 4].
+            a: a float tensor with shape [batch_size, 2].
+        """
+        x = self.features(x)
+        a = self.conv5_1(x)
+        b = self.conv5_2(x)
+        a = F.softmax(a)
+        return b, a
+
+
+class ONet(nn.Module):
+    def __init__(self, model_path=None):
+
+        super(ONet, self).__init__()
+
+        self.features = nn.Sequential(
+            OrderedDict([
+                ('conv1', nn.Conv2d(3, 32, 3, 1)),
+                ('prelu1', nn.PReLU(32)),
+                ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
+                ('conv2', nn.Conv2d(32, 64, 3, 1)),
+                ('prelu2', nn.PReLU(64)),
+                ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
+                ('conv3', nn.Conv2d(64, 64, 3, 1)),
+                ('prelu3', nn.PReLU(64)),
+                ('pool3', nn.MaxPool2d(2, 2, ceil_mode=True)),
+                ('conv4', nn.Conv2d(64, 128, 2, 1)),
+                ('prelu4', nn.PReLU(128)),
+                ('flatten', Flatten()),
+                ('conv5', nn.Linear(1152, 256)),
+                ('drop5', nn.Dropout(0.25)),
+                ('prelu5', nn.PReLU(256)),
+            ]))
+
+        self.conv6_1 = nn.Linear(256, 2)
+        self.conv6_2 = nn.Linear(256, 4)
+        self.conv6_3 = nn.Linear(256, 10)
+
+        weights = np.load(model_path, allow_pickle=True)[()]
+        for n, p in self.named_parameters():
+            p.data = torch.FloatTensor(weights[n])
+
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, 3, h, w].
+        Returns:
+            c: a float tensor with shape [batch_size, 10].
+            b: a float tensor with shape [batch_size, 4].
+            a: a float tensor with shape [batch_size, 2].
+        """
+        x = self.features(x)
+        a = self.conv6_1(x)
+        b = self.conv6_2(x)
+        c = self.conv6_3(x)
+        a = F.softmax(a)
+        return c, b, a
diff --git a/modelscope/models/cv/face_detection/peppa_pig_face/LK/__init__.py b/modelscope/models/cv/face_detection/peppa_pig_face/LK/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/models/cv/face_detection/peppa_pig_face/LK/lk.py b/modelscope/models/cv/face_detection/peppa_pig_face/LK/lk.py
new file mode 100644
index 0000000..6eebd5e
--- /dev/null
+++ b/modelscope/models/cv/face_detection/peppa_pig_face/LK/lk.py
@@ -0,0 +1,96 @@
+# The implementation here is modified based on InsightFace_Pytorch, originally Apache License and publicly available
+# at https://github.com/610265158/Peppa_Pig_Face_Engine
+import numpy as np
+
+
+class GroupTrack():
+    def __init__(self):
+        self.old_frame = None
+        self.previous_landmarks_set = None
+        self.with_landmark = True
+        self.thres = 1
+        self.alpha = 0.95
+        self.iou_thres = 0.5
+
+    def calculate(self, img, current_landmarks_set):
+        if self.previous_landmarks_set is None:
+            self.previous_landmarks_set = current_landmarks_set
+            result = current_landmarks_set
+        else:
+            previous_lm_num = self.previous_landmarks_set.shape[0]
+            if previous_lm_num == 0:
+                self.previous_landmarks_set = current_landmarks_set
+                result = current_landmarks_set
+                return result
+            else:
+                result = []
+                for i in range(current_landmarks_set.shape[0]):
+                    not_in_flag = True
+                    for j in range(previous_lm_num):
+                        if self.iou(current_landmarks_set[i],
+                                    self.previous_landmarks_set[j]
+                                    ) > self.iou_thres:
+                            result.append(
+                                self.smooth(current_landmarks_set[i],
+                                            self.previous_landmarks_set[j]))
+                            not_in_flag = False
+                            break
+                    if not_in_flag:
+                        result.append(current_landmarks_set[i])
+
+        result = np.array(result)
+        self.previous_landmarks_set = result
+
+        return result
+
+    def iou(self, p_set0, p_set1):
+        rec1 = [
+            np.min(p_set0[:, 0]),
+            np.min(p_set0[:, 1]),
+            np.max(p_set0[:, 0]),
+            np.max(p_set0[:, 1])
+        ]
+        rec2 = [
+            np.min(p_set1[:, 0]),
+            np.min(p_set1[:, 1]),
+            np.max(p_set1[:, 0]),
+            np.max(p_set1[:, 1])
+        ]
+
+        # computing area of each rectangles
+        S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
+        S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
+
+        # computing the sum_area
+        sum_area = S_rec1 + S_rec2
+
+        # find the each edge of intersect rectangle
+        x1 = max(rec1[0], rec2[0])
+        y1 = max(rec1[1], rec2[1])
+        x2 = min(rec1[2], rec2[2])
+        y2 = min(rec1[3], rec2[3])
+
+        # judge if there is an intersect
+        intersect = max(0, x2 - x1) * max(0, y2 - y1)
+
+        iou = intersect / (sum_area - intersect)
+        return iou
+
+    def smooth(self, now_landmarks, previous_landmarks):
+        result = []
+        for i in range(now_landmarks.shape[0]):
+            x = now_landmarks[i][0] - previous_landmarks[i][0]
+            y = now_landmarks[i][1] - previous_landmarks[i][1]
+            dis = np.sqrt(np.square(x) + np.square(y))
+            if dis < self.thres:
+                result.append(previous_landmarks[i])
+            else:
+                result.append(
+                    self.do_moving_average(now_landmarks[i],
+                                           previous_landmarks[i]))
+
+        return np.array(result)
+
+    def do_moving_average(self, p_now, p_previous):
+        p = self.alpha * p_now + (1 - self.alpha) * p_previous
+        return p
diff --git a/modelscope/models/cv/face_detection/peppa_pig_face/__init__.py b/modelscope/models/cv/face_detection/peppa_pig_face/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/models/cv/face_detection/peppa_pig_face/face_detector.py b/modelscope/models/cv/face_detection/peppa_pig_face/face_detector.py
new file mode 100644
index 0000000..0e52f16
--- /dev/null
+++ b/modelscope/models/cv/face_detection/peppa_pig_face/face_detector.py
@@ -0,0 +1,113 @@
+# The implementation here is modified based on InsightFace_Pytorch, originally Apache License and publicly available
+# at https://github.com/610265158/Peppa_Pig_Face_Engine
+import cv2
+import numpy as np
+
+import tensorflow as tf
+
+if tf.__version__ >= '2.0':
+    tf = tf.compat.v1
+
+
+class FaceDetector:
+    def __init__(self, dir):
+
+        self.model_path = dir + '/detector.pb'
+        self.thres = 0.8
+        self.input_shape = (512, 512, 3)
+        self.pixel_means = np.array([123., 116., 103.])
+
+        self._graph = tf.Graph()
+
+        with self._graph.as_default():
+            self._graph, self._sess = self.init_model(self.model_path)
+
+            self.input_image = tf.get_default_graph().get_tensor_by_name(
+                'tower_0/images:0')
+            self.training = tf.get_default_graph().get_tensor_by_name(
+                'training_flag:0')
+            self.output_ops = [
+                tf.get_default_graph().get_tensor_by_name('tower_0/boxes:0'),
+                tf.get_default_graph().get_tensor_by_name('tower_0/scores:0'),
+                tf.get_default_graph().get_tensor_by_name(
+                    'tower_0/num_detections:0'),
+            ]
+
+    def __call__(self, image):
+
+        image, scale_x, scale_y = self.preprocess(
+            image,
+            target_width=self.input_shape[1],
+            target_height=self.input_shape[0])
+
+        image = np.expand_dims(image, 0)
+
+        boxes, scores, num_boxes = self._sess.run(self.output_ops,
+                                                  feed_dict={
+                                                      self.input_image: image,
+                                                      self.training: False
+                                                  })
+
+        num_boxes = num_boxes[0]
+        boxes = boxes[0][:num_boxes]
+
+        scores = scores[0][:num_boxes]
+
+        to_keep = scores > self.thres
+        boxes = boxes[to_keep]
+        scores = scores[to_keep]
+
+        y1 = self.input_shape[0] / scale_y
+        x1 = self.input_shape[1] / scale_x
+        y2 = self.input_shape[0] / scale_y
+        x2 = self.input_shape[1] / scale_x
+        scaler = np.array([y1, x1, y2, x2], dtype='float32')
+        boxes = boxes * scaler
+
+        scores = np.expand_dims(scores, 0).reshape([-1, 1])
+
+        for i in range(boxes.shape[0]):
+            boxes[i] = np.array(
+                [boxes[i][1], boxes[i][0], boxes[i][3], boxes[i][2]])
+        return np.concatenate([boxes, scores], axis=1)
+
+    def preprocess(self, image, target_height, target_width, label=None):
+
+        h, w, c = image.shape
+
+        bimage = np.zeros(shape=[target_height, target_width, c],
+                          dtype=image.dtype) + np.array(self.pixel_means,
+                                                        dtype=image.dtype)
+        long_side = max(h, w)
+
+        scale_x = scale_y = target_height / long_side
+
+        image = cv2.resize(image, None, fx=scale_x, fy=scale_y)
+
+        h_, w_, _ = image.shape
+        bimage[:h_, :w_, :] = image
+
+        return bimage, scale_x, scale_y
+
+    def init_model(self, *args):
+        pb_path = args[0]
+
+        def init_pb(model_path):
+            config = tf.ConfigProto()
+            config.gpu_options.per_process_gpu_memory_fraction = 0.2
+            compute_graph = tf.Graph()
+            compute_graph.as_default()
+            sess = tf.Session(config=config)
+            with tf.gfile.GFile(model_path, 'rb') as fid:
+                graph_def = tf.GraphDef()
+                graph_def.ParseFromString(fid.read())
+                tf.import_graph_def(graph_def, name='')
+
+            return (compute_graph, sess)
+
+        model = init_pb(pb_path)
+
+        graph = model[0]
+        sess = model[1]
+
+        return graph, sess
diff --git a/modelscope/models/cv/face_detection/peppa_pig_face/face_landmark.py b/modelscope/models/cv/face_detection/peppa_pig_face/face_landmark.py
new file mode 100644
index 0000000..f66b4ef
--- /dev/null
+++ b/modelscope/models/cv/face_detection/peppa_pig_face/face_landmark.py
@@ -0,0 +1,153 @@
+# The implementation here is modified based on InsightFace_Pytorch, originally Apache License and publicly available
+# at https://github.com/610265158/Peppa_Pig_Face_Engine
+import cv2
+import numpy as np
+
+import tensorflow as tf
+
+if tf.__version__ >= '2.0':
+    tf = tf.compat.v1
+
+
+class FaceLandmark:
+    def __init__(self, dir):
+        self.model_path = dir + '/keypoints.pb'
+        self.min_face = 60
+        self.keypoint_num = 136
+        self.pixel_means = np.array([123., 116., 103.])
+        self.kp_extend_range = [0.2, 0.3]
+        self.kp_shape = (160, 160, 3)
+
+        self._graph = tf.Graph()
+
+        with self._graph.as_default():
+
+            self._graph, self._sess = self.init_model(self.model_path)
+            self.img_input = tf.get_default_graph().get_tensor_by_name(
+                'tower_0/images:0')
+            self.embeddings = tf.get_default_graph().get_tensor_by_name(
+                'tower_0/prediction:0')
+            self.training = tf.get_default_graph().get_tensor_by_name(
+                'training_flag:0')
+
+            self.landmark = self.embeddings[:, :self.keypoint_num]
+            self.headpose = self.embeddings[:, -7:-4] * 90.
+            self.state = tf.nn.sigmoid(self.embeddings[:, -4:])
+
+    def __call__(self, img, bboxes):
+        landmark_result = []
+        state_result = []
+        for i, bbox in enumerate(bboxes):
+            landmark, state = self._one_shot_run(img, bbox, i)
+            if landmark is not None:
+                landmark_result.append(landmark)
+                state_result.append(state)
+        return np.array(landmark_result), np.array(state_result)
+
+    def simple_run(self, cropped_img):
+        with self._graph.as_default():
+
+            cropped_img = np.expand_dims(cropped_img, axis=0)
+            landmark, p, states = self._sess.run(
+                [self.landmark, self.headpose, self.state],
+                feed_dict={
+                    self.img_input: cropped_img,
+                    self.training: False
+                })
+
+        return landmark, states
+
+    def _one_shot_run(self, image, bbox, i):
+
+        bbox_width = bbox[2] - bbox[0]
+        bbox_height = bbox[3] - bbox[1]
+        if (bbox_width <= self.min_face and bbox_height <= self.min_face):
+            return None, None
+        add = int(max(bbox_width, bbox_height))
+        bimg = cv2.copyMakeBorder(image,
+                                  add,
+                                  add,
+                                  add,
+                                  add,
+                                  borderType=cv2.BORDER_CONSTANT,
+                                  value=self.pixel_means)
+        bbox += add
+
+        one_edge = (1 + 2 * self.kp_extend_range[0]) * bbox_width
+        center = [(bbox[0] + bbox[2]) // 2, (bbox[1] + bbox[3]) // 2]
+
+        bbox[0] = center[0] - one_edge // 2
+        bbox[1] = center[1] - one_edge // 2
+        bbox[2] = center[0] + one_edge // 2
+        bbox[3] = center[1] + one_edge // 2
+
+        bbox = bbox.astype(np.int)
+        crop_image = bimg[bbox[1]:bbox[3], bbox[0]:bbox[2], :]
+        h, w, _ = crop_image.shape
+        crop_image = cv2.resize(crop_image,
+                                (self.kp_shape[1], self.kp_shape[0]))
+        crop_image = crop_image.astype(np.float32)
+
+        keypoints, state = self.simple_run(crop_image)
+
+        res = keypoints[0][:self.keypoint_num].reshape((-1, 2))
+        res[:, 0] = res[:, 0] * w / self.kp_shape[1]
+        res[:, 1] = res[:, 1] * h / self.kp_shape[0]
+
+        landmark = []
+        for _index in range(res.shape[0]):
+            x_y = res[_index]
+            landmark.append([
+                int(x_y[0] * self.kp_shape[0] + bbox[0] - add),
+                int(x_y[1] * self.kp_shape[1] + bbox[1] - add)
+            ])
+
+        landmark = np.array(landmark, np.float32)
+
+        return landmark, state
+
+    def init_model(self, *args):
+
+        if len(args) == 1:
+            use_pb = True
+            pb_path = args[0]
+        else:
+            use_pb = False
+            meta_path = args[0]
+            restore_model_path = args[1]
+
+        def ini_ckpt():
+            graph = tf.Graph()
+            graph.as_default()
+            configProto = tf.ConfigProto()
+            configProto.gpu_options.allow_growth = True
+            sess = tf.Session(config=configProto)
+            # load_model(model_path, sess)
+            saver = tf.train.import_meta_graph(meta_path)
+            saver.restore(sess, restore_model_path)
+
+            print('Model restored!')
+            return (graph, sess)
+
+        def init_pb(model_path):
+            config = tf.ConfigProto()
+            config.gpu_options.per_process_gpu_memory_fraction = 0.2
+            compute_graph = tf.Graph()
+            compute_graph.as_default()
+            sess = tf.Session(config=config)
+            with tf.gfile.GFile(model_path, 'rb') as fid:
+                graph_def = tf.GraphDef()
+                graph_def.ParseFromString(fid.read())
+                tf.import_graph_def(graph_def, name='')
+
+            return (compute_graph, sess)
+
+        if use_pb:
+            model = init_pb(pb_path)
+        else:
+            model = ini_ckpt()
+
+        graph = model[0]
+        sess = model[1]
+
+        return graph, sess
diff --git a/modelscope/models/cv/face_detection/peppa_pig_face/facer.py b/modelscope/models/cv/face_detection/peppa_pig_face/facer.py
new file mode 100644
index 0000000..75b0bc5
--- /dev/null
+++ b/modelscope/models/cv/face_detection/peppa_pig_face/facer.py
@@ -0,0 +1,136 @@
+# The implementation here is modified based on InsightFace_Pytorch, originally Apache License and publicly available
+# at https://github.com/610265158/Peppa_Pig_Face_Engine
+import cv2
+import numpy as np
+
+from .face_detector import FaceDetector
+from .face_landmark import FaceLandmark
+from .LK.lk import GroupTrack
+
+
+class FaceAna():
+    def __init__(self, model_dir):
+        self.face_detector = FaceDetector(model_dir)
+        self.face_landmark = FaceLandmark(model_dir)
+        self.trace = GroupTrack()
+
+        self.track_box = None
+        self.previous_image = None
+        self.previous_box = None
+
+        self.diff_thres = 5
+        self.top_k = 10
+        self.iou_thres = 0.5
+        self.alpha = 0.3
+
+    def run(self, image):
+
+        boxes = self.face_detector(image)
+
+        if boxes.shape[0] > self.top_k:
+            boxes = self.sort(boxes)
+
+        boxes_return = np.array(boxes)
+        landmarks, states = self.face_landmark(image, boxes)
+
+        if 1:
+            track = []
+            for i in range(landmarks.shape[0]):
+                track.append([
+                    np.min(landmarks[i][:, 0]),
+                    np.min(landmarks[i][:, 1]),
+                    np.max(landmarks[i][:, 0]),
+                    np.max(landmarks[i][:, 1])
+                ])
+            tmp_box = np.array(track)
+
+            self.track_box = self.judge_boxs(boxes_return, tmp_box)
+
+        self.track_box, landmarks = self.sort_res(self.track_box, landmarks)
+        return self.track_box, landmarks, states
+
+    def sort_res(self, bboxes, points):
+        area = []
+        for bbox in bboxes:
+            bbox_width = bbox[2] - bbox[0]
+            bbox_height = bbox[3] - bbox[1]
+            area.append(bbox_height * bbox_width)
+
+        area = np.array(area)
+        picked = area.argsort()[::-1]
+        sorted_bboxes = [bboxes[x] for x in picked]
+        sorted_points = [points[x] for x in picked]
+        return np.array(sorted_bboxes), np.array(sorted_points)
+
+    def diff_frames(self, previous_frame, image):
+        if previous_frame is None:
+            return True
+        else:
+            _diff = cv2.absdiff(previous_frame, image)
+            diff = np.sum(
+                _diff) / previous_frame.shape[0] / previous_frame.shape[1] / 3.
+            return diff > self.diff_thres
+
+    def sort(self, bboxes):
+        if self.top_k > 100:
+            return bboxes
+        area = []
+        for bbox in bboxes:
+
+            bbox_width = bbox[2] - bbox[0]
+            bbox_height = bbox[3] - bbox[1]
+            area.append(bbox_height * bbox_width)
+
+        area = np.array(area)
+
+        picked = area.argsort()[-self.top_k:][::-1]
+        sorted_bboxes = [bboxes[x] for x in picked]
+        return np.array(sorted_bboxes)
+
+    def judge_boxs(self, previuous_bboxs, now_bboxs):
+        def iou(rec1, rec2):
+
+            S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
+            S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
+
+            sum_area = S_rec1 + S_rec2
+
+            x1 = max(rec1[0], rec2[0])
+            y1 = max(rec1[1], rec2[1])
+            x2 = min(rec1[2], rec2[2])
+            y2 = min(rec1[3], rec2[3])
+
+            intersect = max(0, x2 - x1) * max(0, y2 - y1)
+
+            return intersect / (sum_area - intersect)
+
+        if previuous_bboxs is None:
+            return now_bboxs
+
+        result = []
+
+        for i in range(now_bboxs.shape[0]):
+            contain = False
+            for j in range(previuous_bboxs.shape[0]):
+                if iou(now_bboxs[i], previuous_bboxs[j]) > self.iou_thres:
+                    result.append(self.smooth(now_bboxs[i],
+                                              previuous_bboxs[j]))
+                    contain = True
+                    break
+            if not contain:
+                result.append(now_bboxs[i])
+
+        return np.array(result)
+
+    def smooth(self, now_box, previous_box):
+
+        return self.do_moving_average(now_box[:4], previous_box[:4])
+
+    def do_moving_average(self, p_now, p_previous):
+        p = self.alpha * p_now + (1 - self.alpha) * p_previous
+        return p
+
+    def reset(self):
+        self.track_box = None
+        self.previous_image = None
+        self.previous_box = None
diff --git a/modelscope/models/cv/face_detection/retinaface/__init__.py b/modelscope/models/cv/face_detection/retinaface/__init__.py
new file mode 100644
index 0000000..e7b589a
--- /dev/null
+++ b/modelscope/models/cv/face_detection/retinaface/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .detection import RetinaFaceDetection
diff --git a/modelscope/models/cv/face_detection/retinaface/detection.py b/modelscope/models/cv/face_detection/retinaface/detection.py
new file mode 100755
index 0000000..87b3ea5
--- /dev/null
+++ b/modelscope/models/cv/face_detection/retinaface/detection.py
@@ -0,0 +1,136 @@
+# The implementation is based on resnet, available at https://github.com/biubug6/Pytorch_Retinaface
+import cv2
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+
+from modelscope.metainfo import Models
+from modelscope.models.base import Tensor, TorchModel
+from modelscope.models.builder import MODELS
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile, Tasks
+
+from .models.retinaface import RetinaFace
+from .utils import PriorBox, decode, decode_landm, py_cpu_nms
+
+
+@MODELS.register_module(Tasks.face_detection, module_name=Models.retinaface)
+class RetinaFaceDetection(TorchModel):
+    def __init__(self, model_path, device='cuda', **kwargs):
+        super().__init__(model_path)
+        cudnn.benchmark = True
+        self.model_path = model_path
+        self.cfg = Config.from_file(
+            model_path.replace(ModelFile.TORCH_MODEL_FILE,
+                               ModelFile.CONFIGURATION))['models']
+        self.net = RetinaFace(cfg=self.cfg)
+        self.load_model()
+        self.device = device
+        self.net = self.net.to(self.device)
+
+        self.conf_th = kwargs.get('conf_th', 0.82)
+        self.nms_th = kwargs.get('nms_th', 0.4)
+        self.mean = torch.tensor([[[[104]], [[117]], [[123]]]]).to(device)
+
+    def check_keys(self, pretrained_state_dict):
+        ckpt_keys = set(pretrained_state_dict.keys())
+        model_keys = set(self.net.state_dict().keys())
+        used_pretrained_keys = model_keys & ckpt_keys
+        assert len(
+            used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
+        return True
+
+    def remove_prefix(self, state_dict, prefix):
+        new_state_dict = dict()
+        for k, v in state_dict.items():
+            if k.startswith(prefix):
+                new_state_dict[k[len(prefix):]] = v
+            else:
+                new_state_dict[k] = v
+        return new_state_dict
+
+    def load_model(self, load_to_cpu=False):
+        pretrained_dict = torch.load(self.model_path,
+                                     map_location=torch.device('cpu'))
+        if 'state_dict' in pretrained_dict.keys():
+            pretrained_dict = self.remove_prefix(pretrained_dict['state_dict'],
+                                                 'module.')
+        else:
+            pretrained_dict = self.remove_prefix(pretrained_dict, 'module.')
+        self.check_keys(pretrained_dict)
+        self.net.load_state_dict(pretrained_dict, strict=False)
+        self.net.eval()
+
+    def forward(self, input):
+        img_raw = input['img'].cpu().numpy()
+        img = np.float32(img_raw)
+
+        im_height, im_width = img.shape[:2]
+        ss = 1.0
+        # tricky
+        if max(im_height, im_width) > 1500:
+            ss = 1000.0 / max(im_height, im_width)
+            img = cv2.resize(img, (0, 0), fx=ss, fy=ss)
+            im_height, im_width = img.shape[:2]
+
+        scale = torch.Tensor(
+            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
+        img -= (104, 117, 123)
+        img = img.transpose(2, 0, 1)
+        img = torch.from_numpy(img).unsqueeze(0)
+        img = img.to(self.device)
+        scale = scale.to(self.device)
+
+        loc, conf, landms = self.net(img)  # forward pass
+        del img
+
+        top_k = 5000
+        keep_top_k = 750
+
+        priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
+        priors = priorbox.forward()
+        priors = priors.to(self.device)
+        prior_data = priors.data
+        boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
+        boxes = boxes * scale
+        boxes = boxes.cpu().numpy()
+        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
+        landms = decode_landm(landms.data.squeeze(0), prior_data,
+                              self.cfg['variance'])
+        scale1 = torch.Tensor([
+            im_width, im_height, im_width, im_height, im_width, im_height,
+            im_width, im_height, im_width, im_height
+        ])
+        scale1 = scale1.to(self.device)
+        landms = landms * scale1
+        landms = landms.cpu().numpy()
+
+        # ignore low scores
+        inds = np.where(scores > self.conf_th)[0]
+        boxes = boxes[inds]
+        landms = landms[inds]
+        scores = scores[inds]
+
+        # keep top-K before NMS
+        order = scores.argsort()[::-1][:top_k]
+        boxes = boxes[order]
+        landms = landms[order]
+        scores = scores[order]
+
+        # do NMS
+        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
+                                                                copy=False)
+        keep = py_cpu_nms(dets, self.nms_th)
+        dets = dets[keep, :]
+        landms = landms[keep]
+
+        # keep top-K faster NMS
+        dets = dets[:keep_top_k, :]
+        landms = landms[:keep_top_k, :]
+
+        landms = landms.reshape((-1, 5, 2))
+        landms = landms.reshape(
+            -1,
+            10,
+        )
+        return dets / ss, landms / ss
diff --git a/modelscope/models/cv/face_detection/retinaface/models/__init__.py b/modelscope/models/cv/face_detection/retinaface/models/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/modelscope/models/cv/face_detection/retinaface/models/net.py b/modelscope/models/cv/face_detection/retinaface/models/net.py
new file mode 100755
index 0000000..74f3bdb
--- /dev/null
+++ b/modelscope/models/cv/face_detection/retinaface/models/net.py
@@ -0,0 +1,162 @@
+# The implementation is based on resnet, available at https://github.com/biubug6/Pytorch_Retinaface
+import time
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as models
+import torchvision.models._utils as _utils
+from torch.autograd import Variable
+
+
+def conv_bn(inp, oup, stride=1, leaky=0):
+    return nn.Sequential(nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+                         nn.BatchNorm2d(oup),
+                         nn.LeakyReLU(negative_slope=leaky, inplace=True))
+
+
+def conv_bn_no_relu(inp, oup, stride):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+        nn.BatchNorm2d(oup),
+    )
+
+
+def conv_bn1X1(inp, oup, stride, leaky=0):
+    return nn.Sequential(nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
+                         nn.BatchNorm2d(oup),
+                         nn.LeakyReLU(negative_slope=leaky, inplace=True))
+
+
+def conv_dw(inp, oup, stride, leaky=0.1):
+    return nn.Sequential(
+        nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
+        nn.BatchNorm2d(inp),
+        nn.LeakyReLU(negative_slope=leaky, inplace=True),
+        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+        nn.BatchNorm2d(oup),
+        nn.LeakyReLU(negative_slope=leaky, inplace=True),
+    )
+
+
+class SSH(nn.Module):
+    def __init__(self, in_channel, out_channel):
+        super(SSH, self).__init__()
+        assert out_channel % 4 == 0
+        leaky = 0
+        if (out_channel <= 64):
+            leaky = 0.1
+        self.conv3X3 = conv_bn_no_relu(in_channel, out_channel // 2, stride=1)
+
+        self.conv5X5_1 = conv_bn(in_channel,
+                                 out_channel // 4,
+                                 stride=1,
+                                 leaky=leaky)
+        self.conv5X5_2 = conv_bn_no_relu(out_channel // 4,
+                                         out_channel // 4,
+                                         stride=1)
+
+        self.conv7X7_2 = conv_bn(out_channel // 4,
+                                 out_channel // 4,
+                                 stride=1,
+                                 leaky=leaky)
+        self.conv7x7_3 = conv_bn_no_relu(out_channel // 4,
+                                         out_channel // 4,
+                                         stride=1)
+
+    def forward(self, input):
+        conv3X3 = self.conv3X3(input)
+
+        conv5X5_1 = self.conv5X5_1(input)
+        conv5X5 = self.conv5X5_2(conv5X5_1)
+
+        conv7X7_2 = self.conv7X7_2(conv5X5_1)
+        conv7X7 = self.conv7x7_3(conv7X7_2)
+
+        out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
+        out = F.relu(out)
+        return out
+
+
+class FPN(nn.Module):
+    def __init__(self, in_channels_list, out_channels):
+        super(FPN, self).__init__()
+        leaky = 0
+        if (out_channels <= 64):
+            leaky = 0.1
+        self.output1 = conv_bn1X1(in_channels_list[0],
+                                  out_channels,
+                                  stride=1,
+                                  leaky=leaky)
+        self.output2 = conv_bn1X1(in_channels_list[1],
+                                  out_channels,
+                                  stride=1,
+                                  leaky=leaky)
+        self.output3 = conv_bn1X1(in_channels_list[2],
+                                  out_channels,
+                                  stride=1,
+                                  leaky=leaky)
+
+        self.merge1 = conv_bn(out_channels, out_channels, leaky=leaky)
+        self.merge2 = conv_bn(out_channels, out_channels, leaky=leaky)
+
+    def forward(self, input):
+        # names = list(input.keys())
+        input = list(input.values())
+
+        output1 = self.output1(input[0])
+        output2 = self.output2(input[1])
+        output3 = self.output3(input[2])
+
+        up3 = F.interpolate(output3,
+                            size=[output2.size(2),
+                                  output2.size(3)],
+                            mode='nearest')
+        output2 = output2 + up3
+        output2 = self.merge2(output2)
+
+        up2 = F.interpolate(output2,
+                            size=[output1.size(2),
+                                  output1.size(3)],
+                            mode='nearest')
+        output1 = output1 + up2
+        output1 = self.merge1(output1)
+
+        out = [output1, output2, output3]
+        return out
+
+
+class MobileNetV1(nn.Module):
+    def __init__(self):
+        super(MobileNetV1, self).__init__()
+        self.stage1 = nn.Sequential(
+            conv_bn(3, 8, 2, leaky=0.1),  # 3
+            conv_dw(8, 16, 1),  # 7
+            conv_dw(16, 32, 2),  # 11
+            conv_dw(32, 32, 1),  # 19
+            conv_dw(32, 64, 2),  # 27
+            conv_dw(64, 64, 1),  # 43
+        )
+        self.stage2 = nn.Sequential(
+            conv_dw(64, 128, 2),  # 43 + 16 = 59
+            conv_dw(128, 128, 1),  # 59 + 32 = 91
+            conv_dw(128, 128, 1),  # 91 + 32 = 123
+            conv_dw(128, 128, 1),  # 123 + 32 = 155
+            conv_dw(128, 128, 1),  # 155 + 32 = 187
+            conv_dw(128, 128, 1),  # 187 + 32 = 219
+        )
+        self.stage3 = nn.Sequential(
+            conv_dw(128, 256, 2),  # 219 +3 2 = 241
+            conv_dw(256, 256, 1),  # 241 + 64 = 301
+        )
+        self.avg = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(256, 1000)
+
+    def forward(self, x):
+        x = self.stage1(x)
+        x = self.stage2(x)
+        x = self.stage3(x)
+        x = self.avg(x)
+        x = x.view(-1, 256)
+        x = self.fc(x)
+        return x
diff --git a/modelscope/models/cv/face_detection/retinaface/models/retinaface.py b/modelscope/models/cv/face_detection/retinaface/models/retinaface.py
new file mode 100755
index 0000000..75e320c
--- /dev/null
+++ b/modelscope/models/cv/face_detection/retinaface/models/retinaface.py
@@ -0,0 +1,138 @@
+# The implementation is based on resnet, available at https://github.com/biubug6/Pytorch_Retinaface
+from collections import OrderedDict
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as models
+import torchvision.models._utils as _utils
+import torchvision.models.detection.backbone_utils as backbone_utils
+
+from .net import FPN, SSH, MobileNetV1
+
+
+class ClassHead(nn.Module):
+    def __init__(self, inchannels=512, num_anchors=3):
+        super(ClassHead, self).__init__()
+        self.num_anchors = num_anchors
+        self.conv1x1 = nn.Conv2d(inchannels,
+                                 self.num_anchors * 2,
+                                 kernel_size=(1, 1),
+                                 stride=1,
+                                 padding=0)
+
+    def forward(self, x):
+        out = self.conv1x1(x)
+        out = out.permute(0, 2, 3, 1).contiguous()
+
+        return out.view(out.shape[0], -1, 2)
+
+
+class BboxHead(nn.Module):
+    def __init__(self, inchannels=512, num_anchors=3):
+        super(BboxHead, self).__init__()
+        self.conv1x1 = nn.Conv2d(inchannels,
+                                 num_anchors * 4,
+                                 kernel_size=(1, 1),
+                                 stride=1,
+                                 padding=0)
+
+    def forward(self, x):
+        out = self.conv1x1(x)
+        out = out.permute(0, 2, 3, 1).contiguous()
+
+        return out.view(out.shape[0], -1, 4)
+
+
+class LandmarkHead(nn.Module):
+    def __init__(self, inchannels=512, num_anchors=3):
+        super(LandmarkHead, self).__init__()
+        self.conv1x1 = nn.Conv2d(inchannels,
+                                 num_anchors * 10,
+                                 kernel_size=(1, 1),
+                                 stride=1,
+                                 padding=0)
+
+    def forward(self, x):
+        out = self.conv1x1(x)
+        out = out.permute(0, 2, 3, 1).contiguous()
+
+        return out.view(out.shape[0], -1, 10)
+
+
+class RetinaFace(nn.Module):
+    def __init__(self, cfg=None):
+        """
+        :param cfg:  Network related settings.
+        """
+        super(RetinaFace, self).__init__()
+        backbone = None
+        if cfg['name'] == 'Resnet50':
+            backbone = models.resnet50(pretrained=cfg['pretrain'])
+        else:
+            raise Exception('Invalid name')
+
+        self.body = _utils.IntermediateLayerGetter(backbone,
+                                                   cfg['return_layers'])
+        in_channels_stage2 = cfg['in_channel']
+        in_channels_list = [
+            in_channels_stage2 * 2,
+            in_channels_stage2 * 4,
+            in_channels_stage2 * 8,
+        ]
+        out_channels = cfg['out_channel']
+        self.fpn = FPN(in_channels_list, out_channels)
+        self.ssh1 = SSH(out_channels, out_channels)
+        self.ssh2 = SSH(out_channels, out_channels)
+        self.ssh3 = SSH(out_channels, out_channels)
+
+        self.ClassHead = self._make_class_head(fpn_num=3,
+                                               inchannels=cfg['out_channel'])
+        self.BboxHead = self._make_bbox_head(fpn_num=3,
+                                             inchannels=cfg['out_channel'])
+        self.LandmarkHead = self._make_landmark_head(
+            fpn_num=3, inchannels=cfg['out_channel'])
+
+    def _make_class_head(self, fpn_num=3, inchannels=64, anchor_num=2):
+        classhead = nn.ModuleList()
+        for i in range(fpn_num):
+            classhead.append(ClassHead(inchannels, anchor_num))
+        return classhead
+
+    def _make_bbox_head(self, fpn_num=3, inchannels=64, anchor_num=2):
+        bboxhead = nn.ModuleList()
+        for i in range(fpn_num):
+            bboxhead.append(BboxHead(inchannels, anchor_num))
+        return bboxhead
+
+    def _make_landmark_head(self, fpn_num=3, inchannels=64, anchor_num=2):
+        landmarkhead = nn.ModuleList()
+        for i in range(fpn_num):
+            landmarkhead.append(LandmarkHead(inchannels, anchor_num))
+        return landmarkhead
+
+    def forward(self, inputs):
+        out = self.body(inputs)
+
+        # FPN
+        fpn = self.fpn(out)
+
+        # SSH
+        feature1 = self.ssh1(fpn[0])
+        feature2 = self.ssh2(fpn[1])
+        feature3 = self.ssh3(fpn[2])
+        features = [feature1, feature2, feature3]
+
+        bbox_regressions = torch.cat(
+            [self.BboxHead[i](feature) for i, feature in enumerate(features)],
+            dim=1)
+        classifications = torch.cat(
+            [self.ClassHead[i](feature) for i, feature in enumerate(features)],
+            dim=1)
+        ldm_regressions = torch.cat(
+            [self.LandmarkHead[i](feat) for i, feat in enumerate(features)],
+            dim=1)
+
+        output = (bbox_regressions, F.softmax(classifications,
+                                              dim=-1), ldm_regressions)
+        return output
diff --git a/modelscope/models/cv/face_detection/retinaface/utils.py b/modelscope/models/cv/face_detection/retinaface/utils.py
new file mode 100755
index 0000000..9e38b60
--- /dev/null
+++ b/modelscope/models/cv/face_detection/retinaface/utils.py
@@ -0,0 +1,122 @@
+# --------------------------------------------------------
+# Modified from https://github.com/biubug6/Pytorch_Retinaface
+# --------------------------------------------------------
+
+from itertools import product as product
+from math import ceil
+
+import numpy as np
+import torch
+
+
+class PriorBox(object):
+    def __init__(self, cfg, image_size=None, phase='train'):
+        super(PriorBox, self).__init__()
+        self.min_sizes = cfg['min_sizes']
+        self.steps = cfg['steps']
+        self.clip = cfg['clip']
+        self.image_size = image_size
+        self.feature_maps = [[
+            ceil(self.image_size[0] / step),
+            ceil(self.image_size[1] / step)
+        ] for step in self.steps]
+        self.name = 's'
+
+    def forward(self):
+        anchors = []
+        for k, f in enumerate(self.feature_maps):
+            min_sizes = self.min_sizes[k]
+            for i, j in product(range(f[0]), range(f[1])):
+                for min_size in min_sizes:
+                    s_kx = min_size / self.image_size[1]
+                    s_ky = min_size / self.image_size[0]
+                    dense_cx = [
+                        x * self.steps[k] / self.image_size[1]
+                        for x in [j + 0.5]
+                    ]
+                    dense_cy = [
+                        y * self.steps[k] / self.image_size[0]
+                        for y in [i + 0.5]
+                    ]
+                    for cy, cx in product(dense_cy, dense_cx):
+                        anchors += [cx, cy, s_kx, s_ky]
+
+        # back to torch land
+        output = torch.Tensor(anchors).view(-1, 4)
+        if self.clip:
+            output.clamp_(max=1, min=0)
+        return output
+
+
+def py_cpu_nms(dets, thresh):
+    """Pure Python NMS baseline."""
+    x1 = dets[:, 0]
+    y1 = dets[:, 1]
+    x2 = dets[:, 2]
+    y2 = dets[:, 3]
+    scores = dets[:, 4]
+
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+# Adapted from https://github.com/Hakuyume/chainer-ssd
+def decode(loc, priors, variances):
+    """Decode locations from predictions using priors to undo
+    the encoding we did for offset regression at train time.
+    Args:
+        loc (tensor): location predictions for loc layers,
+            Shape: [num_priors,4]
+        priors (tensor): Prior boxes in center-offset form.
+            Shape: [num_priors,4].
+        variances: (list[float]) Variances of priorboxes
+    Return:
+        decoded bounding box predictions
+    """
+
+    boxes = torch.cat(
+        (priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
+         priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
+    boxes[:, :2] -= boxes[:, 2:] / 2
+    boxes[:, 2:] += boxes[:, :2]
+    return boxes
+
+
+def decode_landm(pre, priors, variances):
+    """Decode landm from predictions using priors to undo
+    the encoding we did for offset regression at train time.
+    Args:
+        pre (tensor): landm predictions for loc layers,
+            Shape: [num_priors,10]
+        priors (tensor): Prior boxes in center-offset form.
+            Shape: [num_priors,4].
+        variances: (list[float]) Variances of priorboxes
+    Return:
+        decoded landm predictions
+    """
+    a = priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:]
+    b = priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:]
+    c = priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:]
+    d = priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:]
+    e = priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]
+    landms = torch.cat((a, b, c, d, e), dim=1)
+    return landms
diff --git a/modelscope/models/cv/face_detection/scrfd/__init__.py b/modelscope/models/cv/face_detection/scrfd/__init__.py
new file mode 100644
index 0000000..d86fa37
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .damofd_detect import DamoFdDetect
+from .preprocessor import SCRFDPreprocessor
+from .scrfd_detect import ScrfdDetect
+from .tinymog_detect import TinyMogDetect
diff --git a/modelscope/models/cv/face_detection/scrfd/damofd_detect.py b/modelscope/models/cv/face_detection/scrfd/damofd_detect.py
new file mode 100644
index 0000000..30cf91a
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/damofd_detect.py
@@ -0,0 +1,31 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from copy import deepcopy
+from typing import Any, Dict
+
+import torch
+
+from modelscope.metainfo import Models
+from modelscope.models.base import TorchModel
+from modelscope.models.builder import MODELS
+from modelscope.outputs import OutputKeys
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+from .scrfd_detect import ScrfdDetect
+
+logger = get_logger()
+
+__all__ = ['DamoFdDetect']
+
+
+@MODELS.register_module(Tasks.face_detection, module_name=Models.damofd)
+class DamoFdDetect(ScrfdDetect):
+    def __init__(self, model_dir, *args, **kwargs):
+        """
+        initialize the tinymog face detection model from the `model_dir` path.
+        """
+        config_file = 'DamoFD_lms.py'
+        kwargs['config_file'] = config_file
+        kwargs['model_file'] = ModelFile.TORCH_MODEL_FILE
+        super().__init__(model_dir, **kwargs)
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/__init__.py
new file mode 100755
index 0000000..14fc235
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/__init__.py
@@ -0,0 +1,4 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet
+"""
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/__init__.py
new file mode 100644
index 0000000..e369df3
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/__init__.py
@@ -0,0 +1,7 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/bbox
+"""
+from .transforms import bbox2result, distance2kps, kps2distance
+
+__all__ = ['bbox2result', 'distance2kps', 'kps2distance']
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/transforms.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/transforms.py
new file mode 100755
index 0000000..7d0f307
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/transforms.py
@@ -0,0 +1,87 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/bbox/transforms.py
+"""
+import numpy as np
+import torch
+
+
+def bbox2result(bboxes, labels, num_classes, kps=None, num_kps=5):
+    """Convert detection results to a list of numpy arrays.
+
+    Args:
+        bboxes (torch.Tensor | np.ndarray): shape (n, 5)
+        labels (torch.Tensor | np.ndarray): shape (n, )
+        num_classes (int): class number, including background class
+
+    Returns:
+        list(ndarray): bbox results of each class
+    """
+    bbox_len = 5 if kps is None else 5 + num_kps * 2  # if has kps, add num_kps*2 into bbox
+    if bboxes.shape[0] == 0:
+        return [
+            np.zeros((0, bbox_len), dtype=np.float32)
+            for i in range(num_classes)
+        ]
+    else:
+        if isinstance(bboxes, torch.Tensor):
+            bboxes = bboxes.detach().cpu().numpy()
+            labels = labels.detach().cpu().numpy()
+        if kps is None:
+            return [bboxes[labels == i, :] for i in range(num_classes)]
+        else:  # with kps
+            if isinstance(kps, torch.Tensor):
+                kps = kps.detach().cpu().numpy()
+                return [
+                    np.hstack([bboxes[labels == i, :], kps[labels == i, :]])
+                    for i in range(num_classes)
+                ]
+
+
+def distance2kps(points, distance, max_shape=None):
+    """Decode distance prediction to bounding box.
+
+    Args:
+        points (Tensor): Shape (n, 2), [x, y].
+        distance (Tensor): Distance from the given point to 4
+            boundaries (left, top, right, bottom).
+        max_shape (tuple): Shape of the image.
+
+    Returns:
+        Tensor: Decoded kps.
+    """
+    preds = []
+    for i in range(0, distance.shape[1], 2):
+        px = points[:, i % 2] + distance[:, i]
+        py = points[:, i % 2 + 1] + distance[:, i + 1]
+        if max_shape is not None:
+            px = px.clamp(min=0, max=max_shape[1])
+            py = py.clamp(min=0, max=max_shape[0])
+        preds.append(px)
+        preds.append(py)
+    return torch.stack(preds, -1)
+
+
+def kps2distance(points, kps, max_dis=None, eps=0.1):
+    """Decode bounding box based on distances.
+
+    Args:
+        points (Tensor): Shape (n, 2), [x, y].
+        kps (Tensor): Shape (n, K), "xyxy" format
+        max_dis (float): Upper bound of the distance.
+        eps (float): a small value to ensure target < max_dis, instead <=
+
+    Returns:
+        Tensor: Decoded distances.
+    """
+
+    preds = []
+    for i in range(0, kps.shape[1], 2):
+        px = kps[:, i] - points[:, i % 2]
+        py = kps[:, i + 1] - points[:, i % 2 + 1]
+        if max_dis is not None:
+            px = px.clamp(min=0, max=max_dis - eps)
+            py = py.clamp(min=0, max=max_dis - eps)
+        preds.append(px)
+        preds.append(py)
+    return torch.stack(preds, -1)
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/__init__.py
new file mode 100755
index 0000000..7e0e85a
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/__init__.py
@@ -0,0 +1,7 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/post_processing/bbox_nms.py
+"""
+from .bbox_nms import multiclass_nms
+
+__all__ = ['multiclass_nms']
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/bbox_nms.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/bbox_nms.py
new file mode 100644
index 0000000..d1d09fc
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/bbox_nms.py
@@ -0,0 +1,89 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/post_processing/bbox_nms.py
+"""
+import torch
+
+
+def multiclass_nms(multi_bboxes,
+                   multi_scores,
+                   score_thr,
+                   nms_cfg,
+                   max_num=-1,
+                   score_factors=None,
+                   return_inds=False,
+                   multi_kps=None):
+    """NMS for multi-class bboxes.
+
+    Args:
+        multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
+        multi_kps (Tensor): shape (n, #class*num_kps*2) or (n, num_kps*2)
+        multi_scores (Tensor): shape (n, #class), where the last column
+            contains scores of the background class, but this will be ignored.
+        score_thr (float): bbox threshold, bboxes with scores lower than it
+            will not be considered.
+        nms_thr (float): NMS IoU threshold
+        max_num (int, optional): if there are more than max_num bboxes after
+            NMS, only top max_num will be kept. Default to -1.
+        score_factors (Tensor, optional): The factors multiplied to scores
+            before applying NMS. Default to None.
+        return_inds (bool, optional): Whether return the indices of kept
+            bboxes. Default to False.
+
+    Returns:
+        tuple: (bboxes, labels, indices (optional)), tensors of shape (k, 5),
+            (k), and (k). Labels are 0-based.
+    """
+    num_classes = multi_scores.size(1) - 1
+    # exclude background category
+    kps = None
+    if multi_kps is not None:
+        num_kps = int((multi_kps.shape[1] / num_classes) / 2)
+    if multi_bboxes.shape[1] > 4:
+        bboxes = multi_bboxes.view(multi_scores.size(0), -1, 4)
+        if multi_kps is not None:
+            kps = multi_kps.view(multi_scores.size(0), -1, num_kps * 2)
+    else:
+        bboxes = multi_bboxes[:, None].expand(multi_scores.size(0),
+                                              num_classes, 4)
+        if multi_kps is not None:
+            kps = multi_kps[:, None].expand(multi_scores.size(0), num_classes,
+                                            num_kps * 2)
+
+    scores = multi_scores[:, :-1]
+    if score_factors is not None:
+        scores = scores * score_factors[:, None]
+
+    labels = torch.arange(num_classes, dtype=torch.long)
+    labels = labels.view(1, -1).expand_as(scores)
+
+    bboxes = bboxes.reshape(-1, 4)
+    if kps is not None:
+        kps = kps.reshape(-1, num_kps * 2)
+    scores = scores.reshape(-1)
+    labels = labels.reshape(-1)
+
+    # remove low scoring boxes
+    valid_mask = scores > score_thr
+    inds = valid_mask.nonzero(as_tuple=False).squeeze(1)
+    bboxes, scores, labels = bboxes[inds], scores[inds], labels[inds]
+    if kps is not None:
+        kps = kps[inds]
+    if inds.numel() == 0:
+        if torch.onnx.is_in_onnx_export():
+            raise RuntimeError('[ONNX Error] Can not record NMS '
+                               'as it has not been executed this time')
+        return bboxes, labels, kps
+
+    # TODO: add size check before feed into batched_nms
+    from mmcv.ops.nms import batched_nms
+    dets, keep = batched_nms(bboxes, scores, labels, nms_cfg)
+
+    if max_num > 0:
+        dets = dets[:max_num]
+        keep = keep[:max_num]
+
+    if return_inds:
+        return dets, labels[keep], kps[keep], keep
+    else:
+        return dets, labels[keep], kps[keep]
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/__init__.py
new file mode 100644
index 0000000..b31cb54
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/__init__.py
@@ -0,0 +1,7 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets
+"""
+from .retinaface import RetinaFaceDataset
+
+__all__ = ['RetinaFaceDataset']
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/__init__.py
new file mode 100755
index 0000000..639e29d
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/__init__.py
@@ -0,0 +1,13 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines
+"""
+from .auto_augment import RotateV2
+from .formating import DefaultFormatBundleV2
+from .loading import LoadAnnotationsV2
+from .transforms import RandomSquareCrop
+
+__all__ = [
+    'RandomSquareCrop', 'LoadAnnotationsV2', 'RotateV2',
+    'DefaultFormatBundleV2'
+]
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/auto_augment.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/auto_augment.py
new file mode 100644
index 0000000..59cec5f
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/auto_augment.py
@@ -0,0 +1,280 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines/auto_augment.py
+"""
+import copy
+
+import cv2
+import mmcv
+import numpy as np
+from mmdet.datasets.builder import PIPELINES
+
+_MAX_LEVEL = 10
+
+
+def level_to_value(level, max_value):
+    """Map from level to values based on max_value."""
+    return (level / _MAX_LEVEL) * max_value
+
+
+def random_negative(value, random_negative_prob):
+    """Randomly negate value based on random_negative_prob."""
+    return -value if np.random.rand() < random_negative_prob else value
+
+
+def bbox2fields():
+    """The key correspondence from bboxes to labels, masks and
+    segmentations."""
+    bbox2label = {
+        'gt_bboxes': 'gt_labels',
+        'gt_bboxes_ignore': 'gt_labels_ignore'
+    }
+    bbox2mask = {
+        'gt_bboxes': 'gt_masks',
+        'gt_bboxes_ignore': 'gt_masks_ignore'
+    }
+    bbox2seg = {
+        'gt_bboxes': 'gt_semantic_seg',
+    }
+    return bbox2label, bbox2mask, bbox2seg
+
+
+@PIPELINES.register_module()
+class RotateV2(object):
+    """Apply Rotate Transformation to image (and its corresponding bbox, mask,
+    segmentation).
+
+    Args:
+        level (int | float): The level should be in range (0,_MAX_LEVEL].
+        scale (int | float): Isotropic scale factor. Same in
+            ``mmcv.imrotate``.
+        center (int | float | tuple[float]): Center point (w, h) of the
+            rotation in the source image. If None, the center of the
+            image will be used. Same in ``mmcv.imrotate``.
+        img_fill_val (int | float | tuple): The fill value for image border.
+            If float, the same value will be used for all the three
+            channels of image. If tuple, the should be 3 elements (e.g.
+            equals the number of channels for image).
+        seg_ignore_label (int): The fill value used for segmentation map.
+            Note this value must equals ``ignore_label`` in ``semantic_head``
+            of the corresponding config. Default 255.
+        prob (float): The probability for perform transformation and
+            should be in range 0 to 1.
+        max_rotate_angle (int | float): The maximum angles for rotate
+            transformation.
+        random_negative_prob (float): The probability that turns the
+             offset negative.
+    """
+    def __init__(self,
+                 level,
+                 scale=1,
+                 center=None,
+                 img_fill_val=128,
+                 seg_ignore_label=255,
+                 prob=0.5,
+                 max_rotate_angle=30,
+                 random_negative_prob=0.5):
+        assert isinstance(level, (int, float)), \
+            f'The level must be type int or float. got {type(level)}.'
+        assert 0 <= level <= _MAX_LEVEL, \
+            f'The level should be in range (0,{_MAX_LEVEL}]. got {level}.'
+        assert isinstance(scale, (int, float)), \
+            f'The scale must be type int or float. got type {type(scale)}.'
+        if isinstance(center, (int, float)):
+            center = (center, center)
+        elif isinstance(center, tuple):
+            assert len(center) == 2, 'center with type tuple must have '\
+                f'2 elements. got {len(center)} elements.'
+        else:
+            assert center is None, 'center must be None or type int, '\
+                f'float or tuple, got type {type(center)}.'
+        if isinstance(img_fill_val, (float, int)):
+            img_fill_val = tuple([float(img_fill_val)] * 3)
+        elif isinstance(img_fill_val, tuple):
+            assert len(img_fill_val) == 3, 'img_fill_val as tuple must '\
+                f'have 3 elements. got {len(img_fill_val)}.'
+            img_fill_val = tuple([float(val) for val in img_fill_val])
+        else:
+            raise ValueError(
+                'img_fill_val must be float or tuple with 3 elements.')
+        assert np.all([0 <= val <= 255 for val in img_fill_val]), \
+            'all elements of img_fill_val should between range [0,255]. '\
+            f'got {img_fill_val}.'
+        assert 0 <= prob <= 1.0, 'The probability should be in range [0,1]. '\
+            f'got {prob}.'
+        assert isinstance(max_rotate_angle, (int, float)), 'max_rotate_angle '\
+            f'should be type int or float. got type {type(max_rotate_angle)}.'
+        self.level = level
+        self.scale = scale
+        # Rotation angle in degrees. Positive values mean
+        # clockwise rotation.
+        self.angle = level_to_value(level, max_rotate_angle)
+        self.center = center
+        self.img_fill_val = img_fill_val
+        self.seg_ignore_label = seg_ignore_label
+        self.prob = prob
+        self.max_rotate_angle = max_rotate_angle
+        self.random_negative_prob = random_negative_prob
+
+    def _rotate_img(self, results, angle, center=None, scale=1.0):
+        """Rotate the image.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+            angle (float): Rotation angle in degrees, positive values
+                mean clockwise rotation. Same in ``mmcv.imrotate``.
+            center (tuple[float], optional): Center point (w, h) of the
+                rotation. Same in ``mmcv.imrotate``.
+            scale (int | float): Isotropic scale factor. Same in
+                ``mmcv.imrotate``.
+        """
+        for key in results.get('img_fields', ['img']):
+            img = results[key].copy()
+            img_rotated = mmcv.imrotate(img,
+                                        angle,
+                                        center,
+                                        scale,
+                                        border_value=self.img_fill_val)
+            results[key] = img_rotated.astype(img.dtype)
+            results['img_shape'] = results[key].shape
+
+    def _rotate_bboxes(self, results, rotate_matrix):
+        """Rotate the bboxes."""
+        h, w, c = results['img_shape']
+        for key in results.get('bbox_fields', []):
+            min_x, min_y, max_x, max_y = np.split(results[key],
+                                                  results[key].shape[-1],
+                                                  axis=-1)
+            coordinates = np.stack([[min_x, min_y], [max_x, min_y],
+                                    [min_x, max_y],
+                                    [max_x, max_y]])  # [4, 2, nb_bbox, 1]
+            # pad 1 to convert from format [x, y] to homogeneous
+            # coordinates format [x, y, 1]
+            coordinates = np.concatenate(
+                (coordinates,
+                 np.ones((4, 1, coordinates.shape[2], 1), coordinates.dtype)),
+                axis=1)  # [4, 3, nb_bbox, 1]
+            coordinates = coordinates.transpose(
+                (2, 0, 1, 3))  # [nb_bbox, 4, 3, 1]
+            rotated_coords = np.matmul(rotate_matrix,
+                                       coordinates)  # [nb_bbox, 4, 2, 1]
+            rotated_coords = rotated_coords[..., 0]  # [nb_bbox, 4, 2]
+            min_x, min_y = np.min(rotated_coords[:, :, 0],
+                                  axis=1), np.min(rotated_coords[:, :, 1],
+                                                  axis=1)
+            max_x, max_y = np.max(rotated_coords[:, :, 0],
+                                  axis=1), np.max(rotated_coords[:, :, 1],
+                                                  axis=1)
+            results[key] = np.stack([min_x, min_y, max_x, max_y],
+                                    axis=-1).astype(results[key].dtype)
+
+    def _rotate_keypoints90(self, results, angle):
+        """Rotate the keypoints, only valid when angle in [-90,90,-180,180]"""
+        if angle not in [-90, 90, 180, -180
+                         ] or self.scale != 1 or self.center is not None:
+            return
+        for key in results.get('keypoints_fields', []):
+            k = results[key]
+            if angle == 90:
+                w, h, c = results['img'].shape
+                new = np.stack([h - k[..., 1], k[..., 0], k[..., 2]], axis=-1)
+            elif angle == -90:
+                w, h, c = results['img'].shape
+                new = np.stack([k[..., 1], w - k[..., 0], k[..., 2]], axis=-1)
+            else:
+                h, w, c = results['img'].shape
+                new = np.stack([w - k[..., 0], h - k[..., 1], k[..., 2]],
+                               axis=-1)
+            # a kps is invalid if thrid value is -1
+            kps_invalid = new[..., -1][:, -1] == -1
+            new[kps_invalid] = np.zeros(new.shape[1:]) - 1
+            results[key] = new
+
+    def _rotate_masks(self,
+                      results,
+                      angle,
+                      center=None,
+                      scale=1.0,
+                      fill_val=0):
+        """Rotate the masks."""
+        h, w, c = results['img_shape']
+        for key in results.get('mask_fields', []):
+            masks = results[key]
+            results[key] = masks.rotate((h, w), angle, center, scale, fill_val)
+
+    def _rotate_seg(self,
+                    results,
+                    angle,
+                    center=None,
+                    scale=1.0,
+                    fill_val=255):
+        """Rotate the segmentation map."""
+        for key in results.get('seg_fields', []):
+            seg = results[key].copy()
+            results[key] = mmcv.imrotate(seg,
+                                         angle,
+                                         center,
+                                         scale,
+                                         border_value=fill_val).astype(
+                                             seg.dtype)
+
+    def _filter_invalid(self, results, min_bbox_size=0):
+        """Filter bboxes and corresponding masks too small after rotate
+        augmentation."""
+        bbox2label, bbox2mask, _ = bbox2fields()
+        for key in results.get('bbox_fields', []):
+            bbox_w = results[key][:, 2] - results[key][:, 0]
+            bbox_h = results[key][:, 3] - results[key][:, 1]
+            valid_inds = (bbox_w > min_bbox_size) & (bbox_h > min_bbox_size)
+            valid_inds = np.nonzero(valid_inds)[0]
+            results[key] = results[key][valid_inds]
+            # label fields. e.g. gt_labels and gt_labels_ignore
+            label_key = bbox2label.get(key)
+            if label_key in results:
+                results[label_key] = results[label_key][valid_inds]
+            # mask fields, e.g. gt_masks and gt_masks_ignore
+            mask_key = bbox2mask.get(key)
+            if mask_key in results:
+                results[mask_key] = results[mask_key][valid_inds]
+
+    def __call__(self, results):
+        """Call function to rotate images, bounding boxes, masks and semantic
+        segmentation maps.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Rotated results.
+        """
+        if np.random.rand() > self.prob:
+            return results
+        h, w = results['img'].shape[:2]
+        center = self.center
+        if center is None:
+            center = ((w - 1) * 0.5, (h - 1) * 0.5)
+        angle = random_negative(self.angle, self.random_negative_prob)
+        self._rotate_img(results, angle, center, self.scale)
+        rotate_matrix = cv2.getRotationMatrix2D(center, -angle, self.scale)
+        self._rotate_bboxes(results, rotate_matrix)
+        self._rotate_keypoints90(results, angle)
+        self._rotate_masks(results, angle, center, self.scale, fill_val=0)
+        self._rotate_seg(results,
+                         angle,
+                         center,
+                         self.scale,
+                         fill_val=self.seg_ignore_label)
+        self._filter_invalid(results)
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(level={self.level}, '
+        repr_str += f'scale={self.scale}, '
+        repr_str += f'center={self.center}, '
+        repr_str += f'img_fill_val={self.img_fill_val}, '
+        repr_str += f'seg_ignore_label={self.seg_ignore_label}, '
+        repr_str += f'prob={self.prob}, '
+        repr_str += f'max_rotate_angle={self.max_rotate_angle}, '
+        repr_str += f'random_negative_prob={self.random_negative_prob})'
+        return repr_str
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/formating.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/formating.py
new file mode 100644
index 0000000..dae5c7c
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/formating.py
@@ -0,0 +1,112 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines/formating.py
+"""
+import numpy as np
+import torch
+from mmcv.parallel import DataContainer as DC
+from mmdet.datasets.builder import PIPELINES
+
+
+def to_tensor(data):
+    """Convert objects of various python types to :obj:`torch.Tensor`.
+
+    Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
+    :class:`Sequence`, :class:`int` and :class:`float`.
+
+    Args:
+        data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to
+            be converted.
+    """
+
+    if isinstance(data, torch.Tensor):
+        return data
+    elif isinstance(data, np.ndarray):
+        return torch.from_numpy(data)
+    elif isinstance(data, Sequence) and not mmcv.is_str(data):
+        return torch.tensor(data)
+    elif isinstance(data, int):
+        return torch.LongTensor([data])
+    elif isinstance(data, float):
+        return torch.FloatTensor([data])
+    else:
+        raise TypeError(f'type {type(data)} cannot be converted to tensor.')
+
+
+@PIPELINES.register_module()
+class DefaultFormatBundleV2(object):
+    """Default formatting bundle.
+
+    It simplifies the pipeline of formatting common fields, including "img",
+    "proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg".
+    These fields are formatted as follows.
+
+    - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
+    - proposals: (1)to tensor, (2)to DataContainer
+    - gt_bboxes: (1)to tensor, (2)to DataContainer
+    - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
+    - gt_labels: (1)to tensor, (2)to DataContainer
+    - gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True)
+    - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, \
+                       (3)to DataContainer (stack=True)
+    """
+    def __call__(self, results):
+        """Call function to transform and format common fields in results.
+
+        Args:
+            results (dict): Result dict contains the data to convert.
+
+        Returns:
+            dict: The result dict contains the data that is formatted with \
+                default bundle.
+        """
+
+        if 'img' in results:
+            img = results['img']
+            # add default meta keys
+            results = self._add_default_meta_keys(results)
+            if len(img.shape) < 3:
+                img = np.expand_dims(img, -1)
+            img = np.ascontiguousarray(img.transpose(2, 0, 1))
+            results['img'] = DC(to_tensor(img), stack=True)
+        for key in [
+                'proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_keypointss',
+                'gt_labels'
+        ]:
+            if key not in results:
+                continue
+            results[key] = DC(to_tensor(results[key]))
+        if 'gt_masks' in results:
+            results['gt_masks'] = DC(results['gt_masks'], cpu_only=True)
+        if 'gt_semantic_seg' in results:
+            results['gt_semantic_seg'] = DC(to_tensor(
+                results['gt_semantic_seg'][None, ...]),
+                                            stack=True)
+        return results
+
+    def _add_default_meta_keys(self, results):
+        """Add default meta keys.
+
+        We set default meta keys including `pad_shape`, `scale_factor` and
+        `img_norm_cfg` to avoid the case where no `Resize`, `Normalize` and
+        `Pad` are implemented during the whole pipeline.
+
+        Args:
+            results (dict): Result dict contains the data to convert.
+
+        Returns:
+            results (dict): Updated result dict contains the data to convert.
+        """
+        img = results['img']
+        results.setdefault('pad_shape', img.shape)
+        results.setdefault('scale_factor', 1.0)
+        num_channels = 1 if len(img.shape) < 3 else img.shape[2]
+        results.setdefault(
+            'img_norm_cfg',
+            dict(mean=np.zeros(num_channels, dtype=np.float32),
+                 std=np.ones(num_channels, dtype=np.float32),
+                 to_rgb=False))
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/loading.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/loading.py
new file mode 100644
index 0000000..7100b0d
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/loading.py
@@ -0,0 +1,224 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines/loading.py
+"""
+import os.path as osp
+
+import numpy as np
+import pycocotools.mask as maskUtils
+from mmdet.core import BitmapMasks, PolygonMasks
+from mmdet.datasets.builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class LoadAnnotationsV2(object):
+    """Load mutiple types of annotations.
+
+    Args:
+        with_bbox (bool): Whether to parse and load the bbox annotation.
+             Default: True.
+        with_label (bool): Whether to parse and load the label annotation.
+            Default: True.
+        with_keypoints (bool): Whether to parse and load the keypoints annotation.
+            Default: False.
+        with_mask (bool): Whether to parse and load the mask annotation.
+             Default: False.
+        with_seg (bool): Whether to parse and load the semantic segmentation
+            annotation. Default: False.
+        poly2mask (bool): Whether to convert the instance masks from polygons
+            to bitmaps. Default: True.
+        file_client_args (dict): Arguments to instantiate a FileClient.
+            See :class:`mmcv.fileio.FileClient` for details.
+            Defaults to ``dict(backend='disk')``.
+    """
+    def __init__(self,
+                 with_bbox=True,
+                 with_label=True,
+                 with_keypoints=False,
+                 with_mask=False,
+                 with_seg=False,
+                 poly2mask=True,
+                 file_client_args=dict(backend='disk')):
+        self.with_bbox = with_bbox
+        self.with_label = with_label
+        self.with_keypoints = with_keypoints
+        self.with_mask = with_mask
+        self.with_seg = with_seg
+        self.poly2mask = poly2mask
+        self.file_client_args = file_client_args.copy()
+        self.file_client = None
+
+    def _load_bboxes(self, results):
+        """Private function to load bounding box annotations.
+
+        Args:
+            results (dict): Result dict from :obj:`mmdet.CustomDataset`.
+
+        Returns:
+            dict: The dict contains loaded bounding box annotations.
+        """
+
+        ann_info = results['ann_info']
+        results['gt_bboxes'] = ann_info['bboxes'].copy()
+
+        gt_bboxes_ignore = ann_info.get('bboxes_ignore', None)
+        if gt_bboxes_ignore is not None:
+            results['gt_bboxes_ignore'] = gt_bboxes_ignore.copy()
+            results['bbox_fields'].append('gt_bboxes_ignore')
+        results['bbox_fields'].append('gt_bboxes')
+        return results
+
+    def _load_keypoints(self, results):
+        """Private function to load bounding box annotations.
+
+        Args:
+            results (dict): Result dict from :obj:`mmdet.CustomDataset`.
+
+        Returns:
+            dict: The dict contains loaded bounding box annotations.
+        """
+
+        ann_info = results['ann_info']
+        results['gt_keypointss'] = ann_info['keypointss'].copy()
+
+        results['keypoints_fields'] = ['gt_keypointss']
+        return results
+
+    def _load_labels(self, results):
+        """Private function to load label annotations.
+
+        Args:
+            results (dict): Result dict from :obj:`mmdet.CustomDataset`.
+
+        Returns:
+            dict: The dict contains loaded label annotations.
+        """
+
+        results['gt_labels'] = results['ann_info']['labels'].copy()
+        return results
+
+    def _poly2mask(self, mask_ann, img_h, img_w):
+        """Private function to convert masks represented with polygon to
+        bitmaps.
+
+        Args:
+            mask_ann (list | dict): Polygon mask annotation input.
+            img_h (int): The height of output mask.
+            img_w (int): The width of output mask.
+
+        Returns:
+            numpy.ndarray: The decode bitmap mask of shape (img_h, img_w).
+        """
+
+        if isinstance(mask_ann, list):
+            # polygon -- a single object might consist of multiple parts
+            # we merge all parts into one mask rle code
+            rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)
+            rle = maskUtils.merge(rles)
+        elif isinstance(mask_ann['counts'], list):
+            # uncompressed RLE
+            rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)
+        else:
+            # rle
+            rle = mask_ann
+        mask = maskUtils.decode(rle)
+        return mask
+
+    def process_polygons(self, polygons):
+        """Convert polygons to list of ndarray and filter invalid polygons.
+
+        Args:
+            polygons (list[list]): Polygons of one instance.
+
+        Returns:
+            list[numpy.ndarray]: Processed polygons.
+        """
+
+        polygons = [np.array(p) for p in polygons]
+        valid_polygons = []
+        for polygon in polygons:
+            if len(polygon) % 2 == 0 and len(polygon) >= 6:
+                valid_polygons.append(polygon)
+        return valid_polygons
+
+    def _load_masks(self, results):
+        """Private function to load mask annotations.
+
+        Args:
+            results (dict): Result dict from :obj:`mmdet.CustomDataset`.
+
+        Returns:
+            dict: The dict contains loaded mask annotations.
+                If ``self.poly2mask`` is set ``True``, `gt_mask` will contain
+                :obj:`PolygonMasks`. Otherwise, :obj:`BitmapMasks` is used.
+        """
+
+        h, w = results['img_info']['height'], results['img_info']['width']
+        gt_masks = results['ann_info']['masks']
+        if self.poly2mask:
+            gt_masks = BitmapMasks(
+                [self._poly2mask(mask, h, w) for mask in gt_masks], h, w)
+        else:
+            gt_masks = PolygonMasks(
+                [self.process_polygons(polygons) for polygons in gt_masks], h,
+                w)
+        results['gt_masks'] = gt_masks
+        results['mask_fields'].append('gt_masks')
+        return results
+
+    def _load_semantic_seg(self, results):
+        """Private function to load semantic segmentation annotations.
+
+        Args:
+            results (dict): Result dict from :obj:`dataset`.
+
+        Returns:
+            dict: The dict contains loaded semantic segmentation annotations.
+        """
+        import mmcv
+        if self.file_client is None:
+            self.file_client = mmcv.FileClient(**self.file_client_args)
+
+        filename = osp.join(results['seg_prefix'],
+                            results['ann_info']['seg_map'])
+        img_bytes = self.file_client.get(filename)
+        results['gt_semantic_seg'] = mmcv.imfrombytes(
+            img_bytes, flag='unchanged').squeeze()
+        results['seg_fields'].append('gt_semantic_seg')
+        return results
+
+    def __call__(self, results):
+        """Call function to load multiple types annotations.
+
+        Args:
+            results (dict): Result dict from :obj:`mmdet.CustomDataset`.
+
+        Returns:
+            dict: The dict contains loaded bounding box, label, mask and
+                semantic segmentation annotations.
+        """
+
+        if self.with_bbox:
+            results = self._load_bboxes(results)
+            if results is None:
+                return None
+        if self.with_label:
+            results = self._load_labels(results)
+        if self.with_keypoints:
+            results = self._load_keypoints(results)
+        if self.with_mask:
+            results = self._load_masks(results)
+        if self.with_seg:
+            results = self._load_semantic_seg(results)
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(with_bbox={self.with_bbox}, '
+        repr_str += f'with_label={self.with_label}, '
+        repr_str += f'with_keypoints={self.with_keypoints}, '
+        repr_str += f'with_mask={self.with_mask}, '
+        repr_str += f'with_seg={self.with_seg})'
+        repr_str += f'poly2mask={self.poly2mask})'
+        repr_str += f'poly2mask={self.file_client_args})'
+        return repr_str
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py
new file mode 100755
index 0000000..aa0f833
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py
@@ -0,0 +1,728 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines/transforms.py
+"""
+import mmcv
+import numpy as np
+from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
+from mmdet.datasets.builder import PIPELINES
+from numpy import random
+
+
+@PIPELINES.register_module()
+class ResizeV2(object):
+    """Resize images & bbox & mask &kps.
+
+    This transform resizes the input image to some scale. Bboxes and masks are
+    then resized with the same scale factor. If the input dict contains the key
+    "scale", then the scale in the input dict is used, otherwise the specified
+    scale in the init method is used. If the input dict contains the key
+    "scale_factor" (if MultiScaleFlipAug does not give img_scale but
+    scale_factor), the actual scale will be computed by image shape and
+    scale_factor.
+
+    `img_scale` can either be a tuple (single-scale) or a list of tuple
+    (multi-scale). There are 3 multiscale modes:
+
+    - ``ratio_range is not None``: randomly sample a ratio from the ratio \
+      range and multiply it with the image scale.
+    - ``ratio_range is None`` and ``multiscale_mode == "range"``: randomly \
+      sample a scale from the multiscale range.
+    - ``ratio_range is None`` and ``multiscale_mode == "value"``: randomly \
+      sample a scale from multiple scales.
+
+    Args:
+        img_scale (tuple or list[tuple]): Images scales for resizing.
+        multiscale_mode (str): Either "range" or "value".
+        ratio_range (tuple[float]): (min_ratio, max_ratio)
+        keep_ratio (bool): Whether to keep the aspect ratio when resizing the
+            image.
+        bbox_clip_border (bool, optional): Whether clip the objects outside
+            the border of the image. Defaults to True.
+        backend (str): Image resize backend, choices are 'cv2' and 'pillow'.
+            These two backends generates slightly different results. Defaults
+            to 'cv2'.
+        override (bool, optional): Whether to override `scale` and
+            `scale_factor` so as to call resize twice. Default False. If True,
+            after the first resizing, the existed `scale` and `scale_factor`
+            will be ignored so the second resizing can be allowed.
+            This option is a work-around for multiple times of resize in DETR.
+            Defaults to False.
+    """
+    def __init__(self,
+                 img_scale=None,
+                 multiscale_mode='range',
+                 ratio_range=None,
+                 keep_ratio=True,
+                 bbox_clip_border=True,
+                 backend='cv2',
+                 override=False):
+        if img_scale is None:
+            self.img_scale = None
+        else:
+            if isinstance(img_scale, list):
+                self.img_scale = img_scale
+            else:
+                self.img_scale = [img_scale]
+            assert mmcv.is_list_of(self.img_scale, tuple)
+
+        if ratio_range is not None:
+            # mode 1: given a scale and a range of image ratio
+            assert len(self.img_scale) == 1
+        else:
+            # mode 2: given multiple scales or a range of scales
+            assert multiscale_mode in ['value', 'range']
+
+        self.backend = backend
+        self.multiscale_mode = multiscale_mode
+        self.ratio_range = ratio_range
+        self.keep_ratio = keep_ratio
+        # TODO: refactor the override option in Resize
+        self.override = override
+        self.bbox_clip_border = bbox_clip_border
+
+    @staticmethod
+    def random_select(img_scales):
+        """Randomly select an img_scale from given candidates.
+
+        Args:
+            img_scales (list[tuple]): Images scales for selection.
+
+        Returns:
+            (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, \
+                where ``img_scale`` is the selected image scale and \
+                ``scale_idx`` is the selected index in the given candidates.
+        """
+
+        assert mmcv.is_list_of(img_scales, tuple)
+        scale_idx = np.random.randint(len(img_scales))
+        img_scale = img_scales[scale_idx]
+        return img_scale, scale_idx
+
+    @staticmethod
+    def random_sample(img_scales):
+        """Randomly sample an img_scale when ``multiscale_mode=='range'``.
+
+        Args:
+            img_scales (list[tuple]): Images scale range for sampling.
+                There must be two tuples in img_scales, which specify the lower
+                and uper bound of image scales.
+
+        Returns:
+            (tuple, None): Returns a tuple ``(img_scale, None)``, where \
+                ``img_scale`` is sampled scale and None is just a placeholder \
+                to be consistent with :func:`random_select`.
+        """
+
+        assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2
+        img_scale_long = [max(s) for s in img_scales]
+        img_scale_short = [min(s) for s in img_scales]
+        long_edge = np.random.randint(min(img_scale_long),
+                                      max(img_scale_long) + 1)
+        short_edge = np.random.randint(min(img_scale_short),
+                                       max(img_scale_short) + 1)
+        img_scale = (long_edge, short_edge)
+        return img_scale, None
+
+    @staticmethod
+    def random_sample_ratio(img_scale, ratio_range):
+        """Randomly sample an img_scale when ``ratio_range`` is specified.
+
+        A ratio will be randomly sampled from the range specified by
+        ``ratio_range``. Then it would be multiplied with ``img_scale`` to
+        generate sampled scale.
+
+        Args:
+            img_scale (tuple): Images scale base to multiply with ratio.
+            ratio_range (tuple[float]): The minimum and maximum ratio to scale
+                the ``img_scale``.
+
+        Returns:
+            (tuple, None): Returns a tuple ``(scale, None)``, where \
+                ``scale`` is sampled ratio multiplied with ``img_scale`` and \
+                None is just a placeholder to be consistent with \
+                :func:`random_select`.
+        """
+
+        assert isinstance(img_scale, tuple) and len(img_scale) == 2
+        min_ratio, max_ratio = ratio_range
+        assert min_ratio <= max_ratio
+        ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
+        scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
+        return scale, None
+
+    def _random_scale(self, results):
+        """Randomly sample an img_scale according to ``ratio_range`` and
+        ``multiscale_mode``.
+
+        If ``ratio_range`` is specified, a ratio will be sampled and be
+        multiplied with ``img_scale``.
+        If multiple scales are specified by ``img_scale``, a scale will be
+        sampled according to ``multiscale_mode``.
+        Otherwise, single scale will be used.
+
+        Args:
+            results (dict): Result dict from :obj:`dataset`.
+
+        Returns:
+            dict: Two new keys 'scale` and 'scale_idx` are added into \
+                ``results``, which would be used by subsequent pipelines.
+        """
+
+        if self.ratio_range is not None:
+            scale, scale_idx = self.random_sample_ratio(
+                self.img_scale[0], self.ratio_range)
+        elif len(self.img_scale) == 1:
+            scale, scale_idx = self.img_scale[0], 0
+        elif self.multiscale_mode == 'range':
+            scale, scale_idx = self.random_sample(self.img_scale)
+        elif self.multiscale_mode == 'value':
+            scale, scale_idx = self.random_select(self.img_scale)
+        else:
+            raise NotImplementedError
+
+        results['scale'] = scale
+        results['scale_idx'] = scale_idx
+
+    def _resize_img(self, results):
+        """Resize images with ``results['scale']``."""
+        for key in results.get('img_fields', ['img']):
+            if self.keep_ratio:
+                img, scale_factor = mmcv.imrescale(results[key],
+                                                   results['scale'],
+                                                   return_scale=True,
+                                                   backend=self.backend)
+                # the w_scale and h_scale has minor difference
+                # a real fix should be done in the mmcv.imrescale in the future
+                new_h, new_w = img.shape[:2]
+                h, w = results[key].shape[:2]
+                w_scale = new_w / w
+                h_scale = new_h / h
+            else:
+                img, w_scale, h_scale = mmcv.imresize(results[key],
+                                                      results['scale'],
+                                                      return_scale=True,
+                                                      backend=self.backend)
+            results[key] = img
+
+            scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
+                                    dtype=np.float32)
+            results['img_shape'] = img.shape
+            # in case that there is no padding
+            results['pad_shape'] = img.shape
+            results['scale_factor'] = scale_factor
+            results['keep_ratio'] = self.keep_ratio
+
+    def _resize_bboxes(self, results):
+        """Resize bounding boxes with ``results['scale_factor']``."""
+        for key in results.get('bbox_fields', []):
+            bboxes = results[key] * results['scale_factor']
+            if self.bbox_clip_border:
+                img_shape = results['img_shape']
+                bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])
+                bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])
+            results[key] = bboxes
+
+    def _resize_keypoints(self, results):
+        """Resize keypoints with ``results['scale_factor']``."""
+        for key in results.get('keypoints_fields', []):
+            keypointss = results[key].copy()
+            factors = results['scale_factor']
+            assert factors[0] == factors[2]
+            assert factors[1] == factors[3]
+            keypointss[:, :, 0] *= factors[0]
+            keypointss[:, :, 1] *= factors[1]
+            if self.bbox_clip_border:
+                img_shape = results['img_shape']
+                keypointss[:, :, 0] = np.clip(keypointss[:, :, 0], 0,
+                                              img_shape[1])
+                keypointss[:, :, 1] = np.clip(keypointss[:, :, 1], 0,
+                                              img_shape[0])
+            results[key] = keypointss
+
+    def _resize_masks(self, results):
+        """Resize masks with ``results['scale']``"""
+        for key in results.get('mask_fields', []):
+            if results[key] is None:
+                continue
+            if self.keep_ratio:
+                results[key] = results[key].rescale(results['scale'])
+            else:
+                results[key] = results[key].resize(results['img_shape'][:2])
+
+    def _resize_seg(self, results):
+        """Resize semantic segmentation map with ``results['scale']``."""
+        for key in results.get('seg_fields', []):
+            if self.keep_ratio:
+                gt_seg = mmcv.imrescale(results[key],
+                                        results['scale'],
+                                        interpolation='nearest',
+                                        backend=self.backend)
+            else:
+                gt_seg = mmcv.imresize(results[key],
+                                       results['scale'],
+                                       interpolation='nearest',
+                                       backend=self.backend)
+            results['gt_semantic_seg'] = gt_seg
+
+    def __call__(self, results):
+        """Call function to resize images, bounding boxes, masks, semantic
+        segmentation map.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', \
+                'keep_ratio' keys are added into result dict.
+        """
+
+        if 'scale' not in results:
+            if 'scale_factor' in results:
+                img_shape = results['img'].shape[:2]
+                scale_factor = results['scale_factor']
+                assert isinstance(scale_factor, float)
+                results['scale'] = tuple(
+                    [int(x * scale_factor) for x in img_shape][::-1])
+            else:
+                self._random_scale(results)
+        else:
+            if not self.override:
+                assert 'scale_factor' not in results, (
+                    'scale and scale_factor cannot be both set.')
+            else:
+                results.pop('scale')
+                if 'scale_factor' in results:
+                    results.pop('scale_factor')
+                self._random_scale(results)
+
+        self._resize_img(results)
+        self._resize_bboxes(results)
+        self._resize_keypoints(results)
+        self._resize_masks(results)
+        self._resize_seg(results)
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(img_scale={self.img_scale}, '
+        repr_str += f'multiscale_mode={self.multiscale_mode}, '
+        repr_str += f'ratio_range={self.ratio_range}, '
+        repr_str += f'keep_ratio={self.keep_ratio})'
+        repr_str += f'bbox_clip_border={self.bbox_clip_border})'
+        return repr_str
+
+
+@PIPELINES.register_module()
+class RandomFlipV2(object):
+    """Flip the image & bbox & mask & kps.
+
+    If the input dict contains the key "flip", then the flag will be used,
+    otherwise it will be randomly decided by a ratio specified in the init
+    method.
+
+    When random flip is enabled, ``flip_ratio``/``direction`` can either be a
+    float/string or tuple of float/string. There are 3 flip modes:
+
+    - ``flip_ratio`` is float, ``direction`` is string: the image will be
+        ``direction``ly flipped with probability of ``flip_ratio`` .
+        E.g., ``flip_ratio=0.5``, ``direction='horizontal'``,
+        then image will be horizontally flipped with probability of 0.5.
+    - ``flip_ratio`` is float, ``direction`` is list of string: the image wil
+        be ``direction[i]``ly flipped with probability of
+        ``flip_ratio/len(direction)``.
+        E.g., ``flip_ratio=0.5``, ``direction=['horizontal', 'vertical']``,
+        then image will be horizontally flipped with probability of 0.25,
+        vertically with probability of 0.25.
+    - ``flip_ratio`` is list of float, ``direction`` is list of string:
+        given ``len(flip_ratio) == len(direction)``, the image wil
+        be ``direction[i]``ly flipped with probability of ``flip_ratio[i]``.
+        E.g., ``flip_ratio=[0.3, 0.5]``, ``direction=['horizontal',
+        'vertical']``, then image will be horizontally flipped with probability
+         of 0.3, vertically with probability of 0.5
+
+    Args:
+        flip_ratio (float | list[float], optional): The flipping probability.
+            Default: None.
+        direction(str | list[str], optional): The flipping direction. Options
+            are 'horizontal', 'vertical', 'diagonal'. Default: 'horizontal'.
+            If input is a list, the length must equal ``flip_ratio``. Each
+            element in ``flip_ratio`` indicates the flip probability of
+            corresponding direction.
+    """
+    def __init__(self, flip_ratio=None, direction='horizontal'):
+        if isinstance(flip_ratio, list):
+            assert mmcv.is_list_of(flip_ratio, float)
+            assert 0 <= sum(flip_ratio) <= 1
+        elif isinstance(flip_ratio, float):
+            assert 0 <= flip_ratio <= 1
+        elif flip_ratio is None:
+            pass
+        else:
+            raise ValueError('flip_ratios must be None, float, '
+                             'or list of float')
+        self.flip_ratio = flip_ratio
+
+        valid_directions = ['horizontal', 'vertical', 'diagonal']
+        if isinstance(direction, str):
+            assert direction in valid_directions
+        elif isinstance(direction, list):
+            assert mmcv.is_list_of(direction, str)
+            assert set(direction).issubset(set(valid_directions))
+        else:
+            raise ValueError('direction must be either str or list of str')
+        self.direction = direction
+
+        if isinstance(flip_ratio, list):
+            assert len(self.flip_ratio) == len(self.direction)
+        self.count = 0
+
+    def bbox_flip(self, bboxes, img_shape, direction):
+        """Flip bboxes horizontally.
+
+        Args:
+            bboxes (numpy.ndarray): Bounding boxes, shape (..., 4*k)
+            img_shape (tuple[int]): Image shape (height, width)
+            direction (str): Flip direction. Options are 'horizontal',
+                'vertical'.
+
+        Returns:
+            numpy.ndarray: Flipped bounding boxes.
+        """
+
+        assert bboxes.shape[-1] % 4 == 0
+        flipped = bboxes.copy()
+        if direction == 'horizontal':
+            w = img_shape[1]
+            flipped[..., 0::4] = w - bboxes[..., 2::4]
+            flipped[..., 2::4] = w - bboxes[..., 0::4]
+        elif direction == 'vertical':
+            h = img_shape[0]
+            flipped[..., 1::4] = h - bboxes[..., 3::4]
+            flipped[..., 3::4] = h - bboxes[..., 1::4]
+        elif direction == 'diagonal':
+            w = img_shape[1]
+            h = img_shape[0]
+            flipped[..., 0::4] = w - bboxes[..., 2::4]
+            flipped[..., 1::4] = h - bboxes[..., 3::4]
+            flipped[..., 2::4] = w - bboxes[..., 0::4]
+            flipped[..., 3::4] = h - bboxes[..., 1::4]
+        else:
+            raise ValueError(f"Invalid flipping direction '{direction}'")
+        return flipped
+
+    def keypoints_flip(self, keypointss, img_shape, direction):
+        """Flip keypoints horizontally."""
+
+        assert direction == 'horizontal'
+        assert keypointss.shape[-1] == 3
+        num_kps = keypointss.shape[1]
+        assert num_kps in [4, 5], f'Only Support num_kps=4 or 5, got:{num_kps}'
+        assert keypointss.ndim == 3
+        flipped = keypointss.copy()
+        if num_kps == 5:
+            flip_order = [1, 0, 2, 4, 3]
+        elif num_kps == 4:
+            flip_order = [3, 2, 1, 0]
+        for idx, a in enumerate(flip_order):
+            flipped[:, idx, :] = keypointss[:, a, :]
+        w = img_shape[1]
+        flipped[..., 0] = w - flipped[..., 0]
+        return flipped
+
+    def __call__(self, results):
+        """Call function to flip bounding boxes, masks, semantic segmentation
+        maps.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Flipped results, 'flip', 'flip_direction' keys are added \
+                into result dict.
+        """
+        if 'flip' not in results:
+            if isinstance(self.direction, list):
+                # None means non-flip
+                direction_list = self.direction + [None]
+            else:
+                # None means non-flip
+                direction_list = [self.direction, None]
+
+            if isinstance(self.flip_ratio, list):
+                non_flip_ratio = 1 - sum(self.flip_ratio)
+                flip_ratio_list = self.flip_ratio + [non_flip_ratio]
+            else:
+                non_flip_ratio = 1 - self.flip_ratio
+                # exclude non-flip
+                single_ratio = self.flip_ratio / (len(direction_list) - 1)
+                flip_ratio_list = [single_ratio] * (len(direction_list) -
+                                                    1) + [non_flip_ratio]
+
+            cur_dir = np.random.choice(direction_list, p=flip_ratio_list)
+
+            results['flip'] = cur_dir is not None
+        if 'flip_direction' not in results:
+            results['flip_direction'] = cur_dir
+        if results['flip']:
+            # flip image
+            for key in results.get('img_fields', ['img']):
+                results[key] = mmcv.imflip(results[key],
+                                           direction=results['flip_direction'])
+            # flip bboxes
+            for key in results.get('bbox_fields', []):
+                results[key] = self.bbox_flip(results[key],
+                                              results['img_shape'],
+                                              results['flip_direction'])
+            # flip kps
+            for key in results.get('keypoints_fields', []):
+                results[key] = self.keypoints_flip(results[key],
+                                                   results['img_shape'],
+                                                   results['flip_direction'])
+            # flip masks
+            for key in results.get('mask_fields', []):
+                results[key] = results[key].flip(results['flip_direction'])
+
+            # flip segs
+            for key in results.get('seg_fields', []):
+                results[key] = mmcv.imflip(results[key],
+                                           direction=results['flip_direction'])
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__ + f'(flip_ratio={self.flip_ratio})'
+
+
+@PIPELINES.register_module()
+class RandomSquareCrop(object):
+    """Random crop the image & bboxes, the cropped patches have minimum IoU
+    requirement with original image & bboxes, the IoU threshold is randomly
+    selected from min_ious.
+
+    Args:
+        min_ious (tuple): minimum IoU threshold for all intersections with
+        bounding boxes
+        min_crop_size (float): minimum crop's size (i.e. h,w := a*h, a*w,
+        where a >= min_crop_size).
+
+    Note:
+        The keys for bboxes, labels and masks should be paired. That is, \
+        `gt_bboxes` corresponds to `gt_labels` and `gt_masks`, and \
+        `gt_bboxes_ignore` to `gt_labels_ignore` and `gt_masks_ignore`.
+    """
+    def __init__(self,
+                 crop_ratio_range=None,
+                 crop_choice=None,
+                 bbox_clip_border=True,
+                 big_face_ratio=0,
+                 big_face_crop_choice=None):
+
+        self.crop_ratio_range = crop_ratio_range
+        self.crop_choice = crop_choice
+        self.big_face_crop_choice = big_face_crop_choice
+        self.bbox_clip_border = bbox_clip_border
+
+        assert (self.crop_ratio_range is None) ^ (self.crop_choice is None)
+        if self.crop_ratio_range is not None:
+            self.crop_ratio_min, self.crop_ratio_max = self.crop_ratio_range
+
+        self.bbox2label = {
+            'gt_bboxes': 'gt_labels',
+            'gt_bboxes_ignore': 'gt_labels_ignore'
+        }
+        self.bbox2mask = {
+            'gt_bboxes': 'gt_masks',
+            'gt_bboxes_ignore': 'gt_masks_ignore'
+        }
+        assert big_face_ratio >= 0 and big_face_ratio <= 1.0
+        self.big_face_ratio = big_face_ratio
+
+    def __call__(self, results):
+        """Call function to crop images and bounding boxes with minimum IoU
+        constraint.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Result dict with images and bounding boxes cropped, \
+                'img_shape' key is updated.
+        """
+
+        if 'img_fields' in results:
+            assert results['img_fields'] == ['img'], \
+                'Only single img_fields is allowed'
+        img = results['img']
+        assert 'bbox_fields' in results
+        assert 'gt_bboxes' in results
+        # try augment big face images
+        find_bigface = False
+        if np.random.random() < self.big_face_ratio:
+            min_size = 100  # h and w
+            expand_ratio = 0.3  # expand ratio of croped face alongwith both w and h
+            bbox = results['gt_bboxes'].copy()
+            lmks = results['gt_keypointss'].copy()
+            label = results['gt_labels'].copy()
+            # filter small faces
+            size_mask = ((bbox[:, 2] - bbox[:, 0]) > min_size) * (
+                (bbox[:, 3] - bbox[:, 1]) > min_size)
+            bbox = bbox[size_mask]
+            lmks = lmks[size_mask]
+            label = label[size_mask]
+            # randomly choose a face that has no overlap with others
+            if len(bbox) > 0:
+                overlaps = bbox_overlaps(bbox, bbox)
+                overlaps -= np.eye(overlaps.shape[0])
+                iou_mask = np.sum(overlaps, axis=1) == 0
+                bbox = bbox[iou_mask]
+                lmks = lmks[iou_mask]
+                label = label[iou_mask]
+                if len(bbox) > 0:
+                    choice = np.random.randint(len(bbox))
+                    bbox = bbox[choice]
+                    lmks = lmks[choice]
+                    label = [label[choice]]
+                    w = bbox[2] - bbox[0]
+                    h = bbox[3] - bbox[1]
+                    x1 = bbox[0] - w * expand_ratio
+                    x2 = bbox[2] + w * expand_ratio
+                    y1 = bbox[1] - h * expand_ratio
+                    y2 = bbox[3] + h * expand_ratio
+                    x1, x2 = np.clip([x1, x2], 0, img.shape[1])
+                    y1, y2 = np.clip([y1, y2], 0, img.shape[0])
+                    bbox -= np.tile([x1, y1], 2)
+                    lmks -= (x1, y1, 0)
+
+                    find_bigface = True
+                    img = img[int(y1):int(y2), int(x1):int(x2), :]
+                    results['gt_bboxes'] = np.expand_dims(bbox, axis=0)
+                    results['gt_keypointss'] = np.expand_dims(lmks, axis=0)
+                    results['gt_labels'] = np.array(label)
+                    results['img'] = img
+
+        boxes = results['gt_bboxes']
+        h, w, c = img.shape
+
+        if self.crop_ratio_range is not None:
+            max_scale = self.crop_ratio_max
+        else:
+            max_scale = np.amax(self.crop_choice)
+        scale_retry = 0
+        while True:
+            scale_retry += 1
+            if scale_retry == 1 or max_scale > 1.0:
+                if self.crop_ratio_range is not None:
+                    scale = np.random.uniform(self.crop_ratio_min,
+                                              self.crop_ratio_max)
+                elif self.crop_choice is not None:
+                    scale = np.random.choice(self.crop_choice)
+            else:
+                scale = scale * 1.2
+
+            if find_bigface:
+                # select a scale from big_face_crop_choice if in big_face mode
+                scale = np.random.choice(self.big_face_crop_choice)
+
+            for i in range(250):
+                long_side = max(w, h)
+                cw = int(scale * long_side)
+                ch = cw
+
+                # TODO +1
+                if w == cw:
+                    left = 0
+                elif w > cw:
+                    left = random.randint(0, w - cw)
+                else:
+                    left = random.randint(w - cw, 0)
+                if h == ch:
+                    top = 0
+                elif h > ch:
+                    top = random.randint(0, h - ch)
+                else:
+                    top = random.randint(h - ch, 0)
+
+                patch = np.array(
+                    (int(left), int(top), int(left + cw), int(top + ch)),
+                    dtype=np.int32)
+
+                # center of boxes should inside the crop img
+                # only adjust boxes and instance masks when the gt is not empty
+                # adjust boxes
+                def is_center_of_bboxes_in_patch(boxes, patch):
+                    # TODO >=
+                    center = (boxes[:, :2] + boxes[:, 2:]) / 2
+                    mask = \
+                        ((center[:, 0] > patch[0])
+                         * (center[:, 1] > patch[1])
+                         * (center[:, 0] < patch[2])
+                         * (center[:, 1] < patch[3]))
+                    return mask
+
+                mask = is_center_of_bboxes_in_patch(boxes, patch)
+                if not mask.any():
+                    continue
+                for key in results.get('bbox_fields', []):
+                    boxes = results[key].copy()
+                    mask = is_center_of_bboxes_in_patch(boxes, patch)
+                    boxes = boxes[mask]
+                    if self.bbox_clip_border:
+                        boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:])
+                        boxes[:, :2] = boxes[:, :2].clip(min=patch[:2])
+                    boxes -= np.tile(patch[:2], 2)
+
+                    results[key] = boxes
+                    # labels
+                    label_key = self.bbox2label.get(key)
+                    if label_key in results:
+                        results[label_key] = results[label_key][mask]
+
+                    # keypoints field
+                    if key == 'gt_bboxes':
+                        for kps_key in results.get('keypoints_fields', []):
+                            keypointss = results[kps_key].copy()
+                            keypointss = keypointss[mask, :, :]
+                            if self.bbox_clip_border:
+                                keypointss[:, :, :
+                                           2] = keypointss[:, :, :2].clip(
+                                               max=patch[2:])
+                                keypointss[:, :, :
+                                           2] = keypointss[:, :, :2].clip(
+                                               min=patch[:2])
+                            keypointss[:, :, 0] -= patch[0]
+                            keypointss[:, :, 1] -= patch[1]
+                            results[kps_key] = keypointss
+
+                    # mask fields
+                    mask_key = self.bbox2mask.get(key)
+                    if mask_key in results:
+                        results[mask_key] = results[mask_key][mask.nonzero()
+                                                              [0]].crop(patch)
+
+                # adjust the img no matter whether the gt is empty before crop
+                rimg = np.ones((ch, cw, 3), dtype=img.dtype) * 128
+                patch_from = patch.copy()
+                patch_from[0] = max(0, patch_from[0])
+                patch_from[1] = max(0, patch_from[1])
+                patch_from[2] = min(img.shape[1], patch_from[2])
+                patch_from[3] = min(img.shape[0], patch_from[3])
+                patch_to = patch.copy()
+                patch_to[0] = max(0, patch_to[0] * -1)
+                patch_to[1] = max(0, patch_to[1] * -1)
+                patch_to[2] = patch_to[0] + (patch_from[2] - patch_from[0])
+                patch_to[3] = patch_to[1] + (patch_from[3] - patch_from[1])
+                rimg[patch_to[1]:patch_to[3],
+                     patch_to[0]:patch_to[2], :] = img[
+                         patch_from[1]:patch_from[3],
+                         patch_from[0]:patch_from[2], :]
+                img = rimg
+                results['img'] = img
+                results['img_shape'] = img.shape
+
+                return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(min_ious={self.min_iou}, '
+        repr_str += f'crop_size={self.crop_size})'
+        return repr_str
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/retinaface.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/retinaface.py
new file mode 100755
index 0000000..739382a
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/retinaface.py
@@ -0,0 +1,151 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/retinaface.py
+"""
+import numpy as np
+from mmdet.datasets.builder import DATASETS
+from mmdet.datasets.custom import CustomDataset
+
+
+@DATASETS.register_module()
+class RetinaFaceDataset(CustomDataset):
+
+    CLASSES = ('FG', )
+
+    def __init__(self, min_size=None, **kwargs):
+        self.NK = kwargs.pop('num_kps', 5)
+        self.cat2label = {cat: i for i, cat in enumerate(self.CLASSES)}
+        self.min_size = min_size
+        self.gt_path = kwargs.get('gt_path')
+        super(RetinaFaceDataset, self).__init__(**kwargs)
+
+    def _parse_ann_line(self, line):
+        values = [float(x) for x in line.strip().split()]
+        bbox = np.array(values[0:4], dtype=np.float32)
+        kps = np.zeros((self.NK, 3), dtype=np.float32)
+        ignore = False
+        if self.min_size is not None:
+            assert not self.test_mode
+            w = bbox[2] - bbox[0]
+            h = bbox[3] - bbox[1]
+            if w < self.min_size or h < self.min_size:
+                ignore = True
+        if len(values) > 4:
+            if len(values) > 5:
+                kps = np.array(values[4:4 + self.NK * 3],
+                               dtype=np.float32).reshape((self.NK, 3))
+                for li in range(kps.shape[0]):
+                    if (kps[li, :] == -1).all():
+                        kps[li][2] = 0.0  # weight = 0, ignore
+                    else:
+                        assert kps[li][2] >= 0
+                        kps[li][2] = 1.0  # weight
+            else:  # len(values)==5
+                if not ignore:
+                    ignore = (values[4] == 1)
+        else:
+            assert self.test_mode
+
+        return dict(bbox=bbox, kps=kps, ignore=ignore, cat='FG')
+
+    def load_annotations(self, ann_file):
+        """Load annotation from COCO style annotation file.
+
+        Args:
+            ann_file (str): Path of annotation file.
+            20220711@tyx: ann_file is list of img paths is supported
+
+        Returns:
+            list[dict]: Annotation info from COCO api.
+        """
+        if isinstance(ann_file, list):
+            data_infos = []
+            for line in ann_file:
+                name = line
+                objs = [0, 0, 0, 0]
+                data_infos.append(
+                    dict(filename=name, width=0, height=0, objs=objs))
+        else:
+            name = None
+            bbox_map = {}
+            for line in open(ann_file, 'r'):
+                line = line.strip()
+                if line.startswith('#'):
+                    value = line[1:].strip().split()
+                    name = value[0]
+                    width = int(value[1])
+                    height = int(value[2])
+
+                    bbox_map[name] = dict(width=width, height=height, objs=[])
+                    continue
+                assert name is not None
+                assert name in bbox_map
+                bbox_map[name]['objs'].append(line)
+            print('origin image size', len(bbox_map))
+            data_infos = []
+            for name in bbox_map:
+                item = bbox_map[name]
+                width = item['width']
+                height = item['height']
+                vals = item['objs']
+                objs = []
+                for line in vals:
+                    data = self._parse_ann_line(line)
+                    if data is None:
+                        continue
+                    objs.append(data)  # data is (bbox, kps, cat)
+                if len(objs) == 0 and not self.test_mode:
+                    continue
+                data_infos.append(
+                    dict(filename=name, width=width, height=height, objs=objs))
+        return data_infos
+
+    def get_ann_info(self, idx):
+        """Get COCO annotation by index.
+
+        Args:
+            idx (int): Index of data.
+
+        Returns:
+            dict: Annotation info of specified index.
+        """
+        data_info = self.data_infos[idx]
+
+        bboxes = []
+        keypointss = []
+        labels = []
+        bboxes_ignore = []
+        labels_ignore = []
+        for obj in data_info['objs']:
+            label = self.cat2label[obj['cat']]
+            bbox = obj['bbox']
+            keypoints = obj['kps']
+            ignore = obj['ignore']
+            if ignore:
+                bboxes_ignore.append(bbox)
+                labels_ignore.append(label)
+            else:
+                bboxes.append(bbox)
+                labels.append(label)
+                keypointss.append(keypoints)
+        if not bboxes:
+            bboxes = np.zeros((0, 4))
+            labels = np.zeros((0, ))
+            keypointss = np.zeros((0, self.NK, 3))
+        else:
+            # bboxes = np.array(bboxes, ndmin=2) - 1
+            bboxes = np.array(bboxes, ndmin=2)
+            labels = np.array(labels)
+            keypointss = np.array(keypointss, ndmin=3)
+        if not bboxes_ignore:
+            bboxes_ignore = np.zeros((0, 4))
+            labels_ignore = np.zeros((0, ))
+        else:
+            bboxes_ignore = np.array(bboxes_ignore, ndmin=2)
+            labels_ignore = np.array(labels_ignore)
+        ann = dict(bboxes=bboxes.astype(np.float32),
+                   labels=labels.astype(np.int64),
+                   keypointss=keypointss.astype(np.float32),
+                   bboxes_ignore=bboxes_ignore.astype(np.float32),
+                   labels_ignore=labels_ignore.astype(np.int64))
+        return ann
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/__init__.py
new file mode 100755
index 0000000..d1c86ab
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/__init__.py
@@ -0,0 +1,6 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models
+"""
+from .dense_heads import *  # noqa: F401,F403
+from .detectors import *  # noqa: F401,F403
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/SuperResIDWEXKX.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/SuperResIDWEXKX.py
new file mode 100644
index 0000000..81de4ec
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/SuperResIDWEXKX.py
@@ -0,0 +1,273 @@
+'''
+Copyright (C) 2010-2021 Alibaba Group Holding Limited.
+'''
+
+
+import os, sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import uuid
+
+import PlainNet
+from PlainNet import _get_right_parentheses_index_
+from PlainNet.super_blocks import PlainNetSuperBlockClass
+from torch import nn
+import global_utils
+
+
+class SuperResIDWEXKX(PlainNetSuperBlockClass):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None,
+                 kernel_size=None, expension=None,
+                 no_create=False, no_reslink=False, no_BN=False, use_se=False, **kwargs):
+        super(SuperResIDWEXKX, self).__init__(**kwargs)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+        self.bottleneck_channels = bottleneck_channels
+        self.sub_layers = sub_layers
+        self.kernel_size = kernel_size
+        self.expension = expension
+        self.no_create = no_create
+        self.no_reslink = no_reslink
+        self.no_BN = no_BN
+
+        self.use_se = use_se
+        if self.use_se:
+            print('---debug use_se in ' + str(self))
+
+        full_str = ''
+        last_channels = in_channels
+        current_stride = stride
+        for i in range(self.sub_layers):
+            inner_str = ''
+            # first DW
+            dw_channels = global_utils.smart_round(self.bottleneck_channels * self.expension, base=8)
+            inner_str += 'ConvKX({},{},{},{})'.format(last_channels, dw_channels, 1, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(dw_channels)
+            inner_str += 'RELU({})'.format(dw_channels)
+
+            inner_str += 'ConvDW({},{},{})'.format(dw_channels, self.kernel_size, current_stride)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(dw_channels)
+            inner_str += 'RELU({})'.format(dw_channels)
+            if self.use_se:
+                inner_str += 'SE({})'.format(dw_channels)
+
+            inner_str += 'ConvKX({},{},{},{})'.format(dw_channels, bottleneck_channels, 1, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(bottleneck_channels)
+            # inner_str += 'RELU({})'.format(bottleneck_channels)
+
+            if not self.no_reslink:
+                if i == 0:
+                    res_str = 'ResBlockProj({})RELU({})'.format(inner_str, self.out_channels)
+                else:
+                    res_str = 'ResBlock({})RELU({})'.format(inner_str, self.out_channels)
+
+            else:
+                res_str = '{}RELU({})'.format(inner_str, self.out_channels)
+
+            full_str += res_str
+
+            # second DW
+            inner_str = ''
+            dw_channels = global_utils.smart_round(self.out_channels * self.expension, base=8)
+            inner_str += 'ConvKX({},{},{},{})'.format(bottleneck_channels, dw_channels, 1, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(dw_channels)
+            inner_str += 'RELU({})'.format(dw_channels)
+
+            inner_str += 'ConvDW({},{},{})'.format(dw_channels, self.kernel_size, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(dw_channels)
+            inner_str += 'RELU({})'.format(dw_channels)
+            if self.use_se:
+                inner_str += 'SE({})'.format(dw_channels)
+
+            inner_str += 'ConvKX({},{},{},{})'.format(dw_channels, self.out_channels, 1, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.out_channels)
+
+            if not self.no_reslink:
+                res_str = 'ResBlock({})RELU({})'.format(inner_str, self.out_channels)
+            else:
+                res_str = '{}RELU({})'.format(inner_str, self.out_channels)
+
+            full_str += res_str
+            last_channels = out_channels
+            current_stride = 1
+        pass
+
+        self.block_list = PlainNet.create_netblock_list_from_str(full_str, no_create=no_create, no_reslink=no_reslink, no_BN=no_BN, **kwargs)
+        if not no_create:
+            self.module_list = nn.ModuleList(self.block_list)
+        else:
+            self.module_list = None
+
+    def __str__(self):
+        return type(self).__name__ + '({},{},{},{},{})'.format(self.in_channels, self.out_channels,
+                                                                self.stride, self.bottleneck_channels, self.sub_layers)
+
+    def __repr__(self):
+        return type(self).__name__ + '({}|in={},out={},stride={},btl_channels={},sub_layers={},kernel_size={})'.format(
+            self.block_name, self.in_channels, self.out_channels, self.stride, self.bottleneck_channels, self.sub_layers, self.kernel_size
+        )
+
+    def encode_structure(self):
+        return [self.out_channels, self.sub_layers, self.bottleneck_channels]
+
+    def split(self, split_layer_threshold):
+        if self.sub_layers >= split_layer_threshold:
+            new_sublayers_1 = split_layer_threshold // 2
+            new_sublayers_2 = self.sub_layers - new_sublayers_1
+            new_block_str1 = type(self).__name__ + '({},{},{},{},{})'.format(self.in_channels, self.out_channels,
+                                                                self.stride, self.bottleneck_channels, new_sublayers_1)
+            new_block_str2 = type(self).__name__ + '({},{},{},{},{})'.format(self.out_channels, self.out_channels,
+                                                                             1, self.bottleneck_channels,
+                                                                             new_sublayers_2)
+            return new_block_str1 + new_block_str2
+        else:
+            return str(self)
+
+    def structure_scale(self, scale=1.0, channel_scale=None, sub_layer_scale=None):
+        if channel_scale is None:
+            channel_scale = scale
+        if sub_layer_scale is None:
+            sub_layer_scale = scale
+
+        new_out_channels = global_utils.smart_round(self.out_channels * channel_scale)
+        new_bottleneck_channels = global_utils.smart_round(self.bottleneck_channels * channel_scale)
+        new_sub_layers = max(1, round(self.sub_layers * sub_layer_scale))
+
+        return type(self).__name__ + '({},{},{},{},{})'.format(self.in_channels, new_out_channels,
+                                                                self.stride, new_bottleneck_channels, new_sub_layers)
+
+
+    @classmethod
+    def create_from_str(cls, s, **kwargs):
+        assert cls.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len(cls.__name__ + '('):idx]
+
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        param_str_split = param_str.split(',')
+        in_channels = int(param_str_split[0])
+        out_channels = int(param_str_split[1])
+        stride = int(param_str_split[2])
+        bottleneck_channels = int(param_str_split[3])
+        sub_layers = int(param_str_split[4])
+        return cls(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                   bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                   block_name=tmp_block_name, **kwargs),s[idx + 1:]
+
+
+class SuperResIDWE1K3(SuperResIDWEXKX):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResIDWE1K3, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=3, expension=1.0,
+                                           no_create=no_create, **kwargs)
+
+class SuperResIDWE2K3(SuperResIDWEXKX):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResIDWE2K3, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=3, expension=2.0,
+                                           no_create=no_create, **kwargs)
+
+class SuperResIDWE4K3(SuperResIDWEXKX):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResIDWE4K3, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=3, expension=4.0,
+                                           no_create=no_create, **kwargs)
+
+class SuperResIDWE6K3(SuperResIDWEXKX):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResIDWE6K3, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=3, expension=6.0,
+                                           no_create=no_create, **kwargs)
+
+
+class SuperResIDWE1K5(SuperResIDWEXKX):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResIDWE1K5, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=5, expension=1.0,
+                                           no_create=no_create, **kwargs)
+
+class SuperResIDWE2K5(SuperResIDWEXKX):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResIDWE2K5, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=5, expension=2.0,
+                                           no_create=no_create, **kwargs)
+
+class SuperResIDWE4K5(SuperResIDWEXKX):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResIDWE4K5, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=5, expension=4.0,
+                                           no_create=no_create, **kwargs)
+
+class SuperResIDWE6K5(SuperResIDWEXKX):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResIDWE6K5, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=5, expension=6.0,
+                                           no_create=no_create, **kwargs)
+
+class SuperResIDWE1K7(SuperResIDWEXKX):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResIDWE1K7, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=7, expension=1.0,
+                                           no_create=no_create, **kwargs)
+
+class SuperResIDWE2K7(SuperResIDWEXKX):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResIDWE2K7, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=7, expension=2.0,
+                                           no_create=no_create, **kwargs)
+
+class SuperResIDWE4K7(SuperResIDWEXKX):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResIDWE4K7, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=7, expension=4.0,
+                                           no_create=no_create, **kwargs)
+
+class SuperResIDWE6K7(SuperResIDWEXKX):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResIDWE6K7, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=7, expension=6.0,
+                                           no_create=no_create, **kwargs)
+
+def register_netblocks_dict(netblocks_dict: dict):
+    this_py_file_netblocks_dict = {
+        'SuperResIDWE1K3': SuperResIDWE1K3,
+        'SuperResIDWE2K3': SuperResIDWE2K3,
+        'SuperResIDWE4K3': SuperResIDWE4K3,
+        'SuperResIDWE6K3': SuperResIDWE6K3,
+        'SuperResIDWE1K5': SuperResIDWE1K5,
+        'SuperResIDWE2K5': SuperResIDWE2K5,
+        'SuperResIDWE4K5': SuperResIDWE4K5,
+        'SuperResIDWE6K5': SuperResIDWE6K5,
+        'SuperResIDWE1K7': SuperResIDWE1K7,
+        'SuperResIDWE2K7': SuperResIDWE2K7,
+        'SuperResIDWE4K7': SuperResIDWE4K7,
+        'SuperResIDWE6K7': SuperResIDWE6K7,
+    }
+    netblocks_dict.update(this_py_file_netblocks_dict)
+    return netblocks_dict
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/SuperResK1KXK1.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/SuperResK1KXK1.py
new file mode 100644
index 0000000..bef8c63
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/SuperResK1KXK1.py
@@ -0,0 +1,199 @@
+'''
+Copyright (C) 2010-2021 Alibaba Group Holding Limited.
+'''
+
+
+import os, sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import uuid
+
+import PlainNet
+from PlainNet import _get_right_parentheses_index_
+from PlainNet.super_blocks import PlainNetSuperBlockClass
+from torch import nn
+import global_utils
+
+
+class SuperResK1KXK1(PlainNetSuperBlockClass):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None ,sub_layers=None, kernel_size=None,
+                 no_create=False, no_reslink=False, no_BN=False, use_se=False, **kwargs):
+        super(SuperResK1KXK1, self).__init__(**kwargs)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+        self.bottleneck_channels = bottleneck_channels
+        self.sub_layers = sub_layers
+        self.kernel_size = kernel_size
+        self.no_create = no_create
+        self.no_reslink = no_reslink
+        self.no_BN = no_BN
+        self.use_se = use_se
+        if self.use_se:
+            print('---debug use_se in ' + str(self))
+
+        full_str = ''
+        last_channels = in_channels
+        current_stride = stride
+        for i in range(self.sub_layers):
+            inner_str = ''
+
+            # first bl-block with reslink
+            inner_str += 'ConvKX({},{},{},{})'.format(last_channels, self.bottleneck_channels, 1, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.bottleneck_channels)
+            inner_str += 'RELU({})'.format(self.bottleneck_channels)
+
+            inner_str += 'ConvKX({},{},{},{})'.format(self.bottleneck_channels, self.bottleneck_channels,
+                                                      self.kernel_size, current_stride)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.bottleneck_channels)
+            inner_str += 'RELU({})'.format(self.bottleneck_channels)
+            if self.use_se:
+                inner_str += 'SE({})'.format(bottleneck_channels)
+
+            inner_str += 'ConvKX({},{},{},{})'.format(self.bottleneck_channels, self.out_channels, 1, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.out_channels)
+
+            if not self.no_reslink:
+                if i == 0:
+                    res_str = 'ResBlockProj({})RELU({})'.format(inner_str, out_channels)
+                else:
+                    res_str = 'ResBlock({})RELU({})'.format(inner_str, out_channels)
+            else:
+                res_str = '{}RELU({})'.format(inner_str, out_channels)
+
+            full_str += res_str
+
+            # second bl-block with reslink
+            inner_str = ''
+            inner_str += 'ConvKX({},{},{},{})'.format(self.out_channels, self.bottleneck_channels, 1, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.bottleneck_channels)
+            inner_str += 'RELU({})'.format(self.bottleneck_channels)
+
+            inner_str += 'ConvKX({},{},{},{})'.format(self.bottleneck_channels, self.bottleneck_channels,
+                                                      self.kernel_size, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.bottleneck_channels)
+            inner_str += 'RELU({})'.format(self.bottleneck_channels)
+            if self.use_se:
+                inner_str += 'SE({})'.format(bottleneck_channels)
+
+            inner_str += 'ConvKX({},{},{},{})'.format(self.bottleneck_channels, self.out_channels, 1, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.out_channels)
+
+            if not self.no_reslink:
+                res_str = 'ResBlock({})RELU({})'.format(inner_str, out_channels)
+            else:
+                res_str = '{}RELU({})'.format(inner_str, out_channels)
+
+            full_str += res_str
+
+            last_channels = out_channels
+            current_stride = 1
+        pass
+
+        self.block_list = PlainNet.create_netblock_list_from_str(full_str, no_create=no_create, no_reslink=no_reslink, no_BN=no_BN, **kwargs)
+        if not no_create:
+            self.module_list = nn.ModuleList(self.block_list)
+        else:
+            self.module_list = None
+
+    def __str__(self):
+        return type(self).__name__ + '({},{},{},{},{})'.format(self.in_channels, self.out_channels,
+                                                                self.stride, self.bottleneck_channels, self.sub_layers)
+
+    def __repr__(self):
+        return type(self).__name__ + '({}|in={},out={},stride={},btl_channels={},sub_layers={},kernel_size={})'.format(
+            self.block_name, self.in_channels, self.out_channels, self.stride, self.bottleneck_channels, self.sub_layers, self.kernel_size
+        )
+
+    def encode_structure(self):
+        return [self.out_channels, self.sub_layers, self.bottleneck_channels]
+
+    def split(self, split_layer_threshold):
+        if self.sub_layers >= split_layer_threshold:
+            new_sublayers_1 = split_layer_threshold // 2
+            new_sublayers_2 = self.sub_layers - new_sublayers_1
+            new_block_str1 = type(self).__name__ + '({},{},{},{},{})'.format(self.in_channels, self.out_channels,
+                                                                self.stride, self.bottleneck_channels, new_sublayers_1)
+            new_block_str2 = type(self).__name__ + '({},{},{},{},{})'.format(self.out_channels, self.out_channels,
+                                                                             1, self.bottleneck_channels,
+                                                                             new_sublayers_2)
+            return new_block_str1 + new_block_str2
+        else:
+            return str(self)
+
+    def structure_scale(self, scale=1.0, channel_scale=None, sub_layer_scale=None):
+        if channel_scale is None:
+            channel_scale = scale
+        if sub_layer_scale is None:
+            sub_layer_scale = scale
+
+        new_out_channels = global_utils.smart_round(self.out_channels * channel_scale)
+        new_bottleneck_channels = global_utils.smart_round(self.bottleneck_channels * channel_scale)
+        new_sub_layers = max(1, round(self.sub_layers * sub_layer_scale))
+
+        return type(self).__name__ + '({},{},{},{},{})'.format(self.in_channels, new_out_channels,
+                                                               self.stride, new_bottleneck_channels, new_sub_layers)
+
+
+    @classmethod
+    def create_from_str(cls, s, **kwargs):
+        assert cls.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len(cls.__name__ + '('):idx]
+
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        param_str_split = param_str.split(',')
+        in_channels = int(param_str_split[0])
+        out_channels = int(param_str_split[1])
+        stride = int(param_str_split[2])
+        bottleneck_channels = int(param_str_split[3])
+        sub_layers = int(param_str_split[4])
+        return cls(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                   bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                   block_name=tmp_block_name, **kwargs),s[idx + 1:]
+
+
+class SuperResK1K3K1(SuperResK1KXK1):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResK1K3K1, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=3,
+                                           no_create=no_create, **kwargs)
+
+class SuperResK1K5K1(SuperResK1KXK1):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResK1K5K1, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=5,
+                                           no_create=no_create, **kwargs)
+
+
+class SuperResK1K7K1(SuperResK1KXK1):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResK1K7K1, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=7,
+                                           no_create=no_create, **kwargs)
+
+
+def register_netblocks_dict(netblocks_dict: dict):
+    this_py_file_netblocks_dict = {
+        'SuperResK1K3K1': SuperResK1K3K1,
+        'SuperResK1K5K1': SuperResK1K5K1,
+        'SuperResK1K7K1': SuperResK1K7K1,
+    }
+    netblocks_dict.update(this_py_file_netblocks_dict)
+    return netblocks_dict
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/SuperResKXKX.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/SuperResKXKX.py
new file mode 100644
index 0000000..c2ba60f
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/SuperResKXKX.py
@@ -0,0 +1,174 @@
+'''
+Copyright (C) 2010-2021 Alibaba Group Holding Limited.
+'''
+
+
+import os, sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import uuid
+
+import PlainNet
+from PlainNet import _get_right_parentheses_index_
+from PlainNet.super_blocks import PlainNetSuperBlockClass
+from torch import nn
+import global_utils
+
+
+class SuperResKXKX(PlainNetSuperBlockClass):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, kernel_size=None,
+                 no_create=False, no_reslink=False, no_BN=False, use_se=False, **kwargs):
+        super(SuperResKXKX, self).__init__(**kwargs)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+        self.bottleneck_channels = bottleneck_channels
+        self.sub_layers = sub_layers
+        self.kernel_size = kernel_size
+        self.no_create = no_create
+        self.no_reslink = no_reslink
+        self.no_BN = no_BN
+        self.use_se = use_se
+        if self.use_se:
+            print('---debug use_se in ' + str(self))
+
+        full_str = ''
+        last_channels = in_channels
+        current_stride = stride
+        for i in range(self.sub_layers):
+            inner_str = ''
+
+            inner_str += 'ConvKX({},{},{},{})'.format(last_channels, self.bottleneck_channels, self.kernel_size, current_stride)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.bottleneck_channels)
+            inner_str += 'RELU({})'.format(self.bottleneck_channels)
+            if self.use_se:
+                inner_str += 'SE({})'.format(bottleneck_channels)
+
+            inner_str += 'ConvKX({},{},{},{})'.format(self.bottleneck_channels, self.out_channels, self.kernel_size, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.out_channels)
+
+            if not self.no_reslink:
+                if i == 0:
+                    res_str = 'ResBlockProj({})RELU({})'.format(inner_str, out_channels)
+                else:
+                    res_str = 'ResBlock({})RELU({})'.format(inner_str, out_channels)
+            else:
+                res_str = '{}RELU({})'.format(inner_str, out_channels)
+
+            full_str += res_str
+
+            last_channels = out_channels
+            current_stride = 1
+        pass
+
+        self.block_list = PlainNet.create_netblock_list_from_str(full_str, no_create=no_create, no_reslink=no_reslink, no_BN=no_BN, **kwargs)
+        if not no_create:
+            self.module_list = nn.ModuleList(self.block_list)
+        else:
+            self.module_list = None
+
+    def forward_pre_relu(self, x):
+        output = x
+        for block in self.block_list[0:-1]:
+            output = block(output)
+        return output
+
+
+    def __str__(self):
+        return type(self).__name__ + '({},{},{},{},{})'.format(self.in_channels, self.out_channels,
+                                                                self.stride, self.bottleneck_channels, self.sub_layers)
+
+    def __repr__(self):
+        return type(self).__name__ + '({}|in={},out={},stride={},btl_channels={},sub_layers={},kernel_size={})'.format(
+            self.block_name, self.in_channels, self.out_channels, self.stride, self.bottleneck_channels, self.sub_layers, self.kernel_size
+        )
+
+    def encode_structure(self):
+        return [self.out_channels, self.sub_layers, self.bottleneck_channels]
+
+    def split(self, split_layer_threshold):
+        if self.sub_layers >= split_layer_threshold:
+            new_sublayers_1 = split_layer_threshold // 2
+            new_sublayers_2 = self.sub_layers - new_sublayers_1
+            new_block_str1 = type(self).__name__ + '({},{},{},{},{})'.format(self.in_channels, self.out_channels,
+                                                                self.stride, self.bottleneck_channels, new_sublayers_1)
+            new_block_str2 = type(self).__name__ + '({},{},{},{},{})'.format(self.out_channels, self.out_channels,
+                                                                             1, self.bottleneck_channels,
+                                                                             new_sublayers_2)
+            return new_block_str1 + new_block_str2
+        else:
+            return str(self)
+
+    def structure_scale(self, scale=1.0, channel_scale=None, sub_layer_scale=None):
+        if channel_scale is None:
+            channel_scale = scale
+        if sub_layer_scale is None:
+            sub_layer_scale = scale
+
+        new_out_channels = global_utils.smart_round(self.out_channels * channel_scale)
+        new_bottleneck_channels = global_utils.smart_round(self.bottleneck_channels * channel_scale)
+        new_sub_layers = max(1, round(self.sub_layers * sub_layer_scale))
+
+        return type(self).__name__ + '({},{},{},{},{})'.format(self.in_channels, new_out_channels,
+                                                               self.stride, new_bottleneck_channels, new_sub_layers)
+
+
+    @classmethod
+    def create_from_str(cls, s, **kwargs):
+        assert cls.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len(cls.__name__ + '('):idx]
+
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        param_str_split = param_str.split(',')
+        in_channels = int(param_str_split[0])
+        out_channels = int(param_str_split[1])
+        stride = int(param_str_split[2])
+        bottleneck_channels = int(param_str_split[3])
+        sub_layers = int(param_str_split[4])
+        return cls(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                   bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                   block_name=tmp_block_name, **kwargs), s[idx + 1:]
+
+
+class SuperResK3K3(SuperResKXKX):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResK3K3, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=3,
+                                           no_create=no_create, **kwargs)
+
+class SuperResK5K5(SuperResKXKX):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResK5K5, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=5,
+                                           no_create=no_create, **kwargs)
+
+class SuperResK7K7(SuperResKXKX):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, bottleneck_channels=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperResK7K7, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           bottleneck_channels=bottleneck_channels, sub_layers=sub_layers,
+                                           kernel_size=7,
+                                           no_create=no_create, **kwargs)
+
+
+
+def register_netblocks_dict(netblocks_dict: dict):
+    this_py_file_netblocks_dict = {
+        'SuperResK3K3': SuperResK3K3,
+        'SuperResK5K5': SuperResK5K5,
+        'SuperResK7K7': SuperResK7K7,
+
+    }
+    netblocks_dict.update(this_py_file_netblocks_dict)
+    return netblocks_dict
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/__init__.py
new file mode 100644
index 0000000..2bf4d6f
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/__init__.py
@@ -0,0 +1,289 @@
+'''
+Copyright (C) 2010-2021 Alibaba Group Holding Limited.
+'''
+
+
+import os, sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import torch, argparse
+from torch import nn
+
+_all_netblocks_dict_ = {}
+
+def parse_cmd_options(argv, opt=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--plainnet_struct', type=str, default=None, help='PlainNet structure string')
+    parser.add_argument('--plainnet_struct_txt', type=str, default=None, help='PlainNet structure file name')
+    parser.add_argument('--num_classes', type=int, default=None, help='how to prune')
+    module_opt, _ = parser.parse_known_args(argv)
+
+    return module_opt
+
+def _get_right_parentheses_index_(s):
+    # assert s[0] == '('
+    left_paren_count = 0
+    for index, x in enumerate(s):
+
+        if x == '(':
+            left_paren_count += 1
+        elif x == ')':
+            left_paren_count -= 1
+            if left_paren_count == 0:
+                return index
+        else:
+            pass
+    return None
+
+def pretty_format(plainnet_str, indent=2):
+    the_formated_str = ''
+    indent_str = ''
+    if indent >= 1:
+        indent_str = ''.join(['  '] * indent)
+
+    # print(indent_str, end='')
+    the_formated_str += indent_str
+
+    s = plainnet_str
+    while len(s) > 0:
+        if s[0] == ';':
+            # print(';\n' + indent_str, end='')
+            the_formated_str += ';\n' + indent_str
+            s = s[1:]
+
+        left_par_idx = s.find('(')
+        assert left_par_idx is not None
+        right_par_idx = _get_right_parentheses_index_(s)
+        the_block_class_name = s[0:left_par_idx]
+
+        if the_block_class_name in ['MultiSumBlock', 'MultiCatBlock','MultiGroupBlock']:
+            # print('\n' + indent_str + the_block_class_name + '(')
+            sub_str = s[left_par_idx + 1:right_par_idx]
+
+            # find block_name
+            tmp_idx = sub_str.find('|')
+            if tmp_idx < 0:
+                tmp_block_name = 'no_name'
+            else:
+                tmp_block_name = sub_str[0:tmp_idx]
+                sub_str = sub_str[tmp_idx+1:]
+
+            if len(tmp_block_name) > 8:
+                tmp_block_name = tmp_block_name[0:4] + tmp_block_name[-4:]
+
+            the_formated_str += '\n' + indent_str + the_block_class_name + '({}|\n'.format(tmp_block_name)
+
+            the_formated_str += pretty_format(sub_str, indent + 1)
+            # print('\n' + indent_str + ')')
+            # print(indent_str, end='')
+            the_formated_str += '\n' + indent_str + ')\n' + indent_str
+        elif the_block_class_name in ['ResBlock']:
+            # print('\n' + indent_str + the_block_class_name + '(')
+            in_channels = None
+            the_stride = None
+            sub_str = s[left_par_idx + 1:right_par_idx]
+            # find block_name
+            tmp_idx = sub_str.find('|')
+            if tmp_idx < 0:
+                tmp_block_name = 'no_name'
+            else:
+                tmp_block_name = sub_str[0:tmp_idx]
+                sub_str = sub_str[tmp_idx + 1:]
+
+            first_comma_index = sub_str.find(',')
+            if first_comma_index < 0 or not sub_str[0:first_comma_index].isdigit():
+                in_channels = None
+            else:
+                in_channels = int(sub_str[0:first_comma_index])
+                sub_str = sub_str[first_comma_index+1:]
+                second_comma_index = sub_str.find(',')
+                if second_comma_index < 0 or not sub_str[0:second_comma_index].isdigit():
+                    the_stride = None
+                else:
+                    the_stride = int(sub_str[0:second_comma_index])
+                    sub_str = sub_str[second_comma_index + 1:]
+                pass
+            pass
+
+            if len(tmp_block_name) > 8:
+                tmp_block_name = tmp_block_name[0:4] + tmp_block_name[-4:]
+
+            the_formated_str += '\n' + indent_str + the_block_class_name + '({}|'.format(tmp_block_name)
+            if in_channels is not None:
+                the_formated_str += '{},'.format(in_channels)
+            else:
+                the_formated_str += ','
+
+            if the_stride is not None:
+                the_formated_str += '{},'.format(the_stride)
+            else:
+                the_formated_str += ','
+
+            the_formated_str += '\n'
+
+            the_formated_str += pretty_format(sub_str, indent + 1)
+            # print('\n' + indent_str + ')')
+            # print(indent_str, end='')
+            the_formated_str += '\n' + indent_str + ')\n' + indent_str
+        else:
+            # print(s[0:right_par_idx+1], end='')
+            sub_str = s[left_par_idx + 1:right_par_idx]
+            # find block_name
+            tmp_idx = sub_str.find('|')
+            if tmp_idx < 0:
+                tmp_block_name = 'no_name'
+            else:
+                tmp_block_name = sub_str[0:tmp_idx]
+                sub_str = sub_str[tmp_idx + 1:]
+
+            if len(tmp_block_name) > 8:
+                tmp_block_name = tmp_block_name[0:4] + tmp_block_name[-4:]
+
+            the_formated_str += the_block_class_name + '({}|'.format(tmp_block_name) + sub_str + ')'
+
+        s = s[right_par_idx+1:]
+    pass  # end while
+
+    return the_formated_str
+
+def _create_netblock_list_from_str_(s, no_create=False, **kwargs):
+    block_list = []
+    while len(s) > 0:
+        is_found_block_class = False
+        for the_block_class_name in _all_netblocks_dict_.keys():
+            tmp_idx = s.find('(')
+            if tmp_idx > 0 and s[0:tmp_idx] == the_block_class_name:
+                is_found_block_class = True
+                the_block_class = _all_netblocks_dict_[the_block_class_name]
+                the_block, remaining_s = the_block_class.create_from_str(s, no_create=no_create, **kwargs)
+                if the_block is not None:
+                    block_list.append(the_block)
+                s = remaining_s
+                if len(s) > 0 and s[0] == ';':
+                    return block_list, s[1:]
+                break
+            pass  # end if
+        pass  # end for
+        assert is_found_block_class
+    pass  # end while
+    return block_list, ''
+
+def create_netblock_list_from_str(s, no_create=False, **kwargs):
+    the_list, remaining_s = _create_netblock_list_from_str_(s, no_create=no_create, **kwargs)
+    assert len(remaining_s) == 0
+    return the_list
+
+def add_SE_block(structure_str: str):
+    new_str = ''
+    RELU = 'RELU'
+    offset = 4
+
+    idx = structure_str.find(RELU)
+    while idx >= 0:
+        new_str += structure_str[0: idx]
+        structure_str = structure_str[idx:]
+        r_idx = _get_right_parentheses_index_(structure_str[offset:]) + offset
+        channels = structure_str[offset + 1:r_idx]
+        new_str += 'RELU({})SE({})'.format(channels, channels)
+        structure_str = structure_str[r_idx + 1:]
+        idx = structure_str.find(RELU)
+    pass
+
+    new_str += structure_str
+    return new_str
+
+class PlainNet(nn.Module):
+    def __init__(self, argv=None, opt=None, num_classes=None, plainnet_struct=None, no_create=False,
+                 **kwargs):
+        super(PlainNet, self).__init__()
+        self.argv = argv
+        self.opt = opt
+        self.num_classes = num_classes
+        self.plainnet_struct = plainnet_struct
+
+        self.module_opt = parse_cmd_options(self.argv)
+
+        if self.num_classes is None:
+            self.num_classes = self.module_opt.num_classes
+
+        if self.plainnet_struct is None and self.module_opt.plainnet_struct is not None:
+            self.plainnet_struct = self.module_opt.plainnet_struct
+
+        if self.plainnet_struct is None:
+            # load structure from text file
+            if hasattr(opt, 'plainnet_struct_txt') and opt.plainnet_struct_txt is not None:
+                plainnet_struct_txt = opt.plainnet_struct_txt
+            else:
+                plainnet_struct_txt = self.module_opt.plainnet_struct_txt
+
+            if plainnet_struct_txt is not None:
+                with open(plainnet_struct_txt, 'r') as fid:
+                    the_line = fid.readlines()[0].strip()
+                    self.plainnet_struct = the_line
+                pass
+
+        if self.plainnet_struct is None:
+            return
+
+        the_s = self.plainnet_struct  # type: str
+
+        block_list, remaining_s = _create_netblock_list_from_str_(the_s, no_create=no_create, **kwargs)
+        assert len(remaining_s) == 0
+
+        self.block_list = block_list
+        if not no_create:
+            self.module_list = nn.ModuleList(block_list)  # register
+
+    def forward(self, x):
+        output = x
+        for the_block in self.block_list:
+            output = the_block(output)
+        return output
+
+    def __str__(self):
+        s = ''
+        for the_block in self.block_list:
+            s += str(the_block)
+        return s
+
+    def __repr__(self):
+        return str(self)
+
+    def get_FLOPs(self, input_resolution):
+        the_res = input_resolution
+        the_flops = 0
+        for the_block in self.block_list:
+            the_flops += the_block.get_FLOPs(the_res)
+            the_res = the_block.get_output_resolution(the_res)
+
+        return the_flops
+
+    def get_model_size(self):
+        the_size = 0
+        for the_block in self.block_list:
+            the_size += the_block.get_model_size()
+
+        return the_size
+
+    def replace_block(self, block_id, new_block):
+        self.block_list[block_id] = new_block
+        if block_id < len(self.block_list):
+            self.block_list[block_id + 1].set_in_channels(new_block.out_channels)
+
+        self.module_list = nn.Module(self.block_list)
+
+
+
+from PlainNet import basic_blocks
+_all_netblocks_dict_ = basic_blocks.register_netblocks_dict(_all_netblocks_dict_)
+
+from PlainNet import super_blocks
+_all_netblocks_dict_ = super_blocks.register_netblocks_dict(_all_netblocks_dict_)
+from PlainNet import SuperResKXKX
+_all_netblocks_dict_ = SuperResKXKX.register_netblocks_dict(_all_netblocks_dict_)
+
+from PlainNet import SuperResK1KXK1
+_all_netblocks_dict_ = SuperResK1KXK1.register_netblocks_dict(_all_netblocks_dict_)
+
+from PlainNet import SuperResIDWEXKX
+_all_netblocks_dict_ = SuperResIDWEXKX.register_netblocks_dict(_all_netblocks_dict_)
\ No newline at end of file
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/basic_blocks.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/basic_blocks.py
new file mode 100644
index 0000000..0de8ade
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/basic_blocks.py
@@ -0,0 +1,1503 @@
+'''
+Copyright (C) 2010-2021 Alibaba Group Holding Limited.
+'''
+
+
+import os, sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+import numpy as np
+import uuid
+
+from PlainNet import _get_right_parentheses_index_, _create_netblock_list_from_str_
+
+class PlainNetBasicBlockClass(nn.Module):
+    def __init__(self, in_channels=None, out_channels=None, stride=1, no_create=False, block_name=None, **kwargs):
+        super(PlainNetBasicBlockClass, self).__init__(**kwargs)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+        self.no_create = no_create
+        self.block_name = block_name
+        if self.block_name is None:
+            self.block_name = 'uuid{}'.format(uuid.uuid4().hex)
+
+    def forward(self, x):
+        raise RuntimeError('Not implemented')
+
+    def __str__(self):
+        return type(self).__name__ + '({},{},{})'.format(self.in_channels, self.out_channels, self.stride)
+
+    def __repr__(self):
+        return type(self).__name__ + '({}|{},{},{})'.format(self.block_name, self.in_channels, self.out_channels, self.stride)
+
+    def get_output_resolution(self, input_resolution):
+        raise RuntimeError('Not implemented')
+
+    def get_FLOPs(self, input_resolution):
+        raise RuntimeError('Not implemented')
+
+    def get_model_size(self):
+        raise RuntimeError('Not implemented')
+
+    def set_in_channels(self, c):
+        raise RuntimeError('Not implemented')
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert PlainNetBasicBlockClass.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len(cls.__name__ + '('):idx]
+
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        param_str_split = param_str.split(',')
+        in_channels = int(param_str_split[0])
+        out_channels = int(param_str_split[1])
+        stride = int(param_str_split[2])
+        return cls(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                               block_name=tmp_block_name, no_create=no_create), s[idx + 1:]
+
+    @classmethod
+    def is_instance_from_str(cls, s):
+        if s.startswith(cls.__name__ + '(') and s[-1] == ')':
+            return True
+        else:
+            return False
+
+
+class AdaptiveAvgPool(PlainNetBasicBlockClass):
+    def __init__(self, out_channels, output_size, no_create=False, **kwargs):
+        super(AdaptiveAvgPool, self).__init__(**kwargs)
+        self.in_channels = out_channels
+        self.out_channels = out_channels
+        self.output_size = output_size
+        self.no_create = no_create
+        if not no_create:
+            self.netblock = nn.AdaptiveAvgPool2d(output_size=(self.output_size, self.output_size))
+
+    def forward(self, x):
+        return self.netblock(x)
+
+    def __str__(self):
+        return type(self).__name__ + '({},{})'.format(self.out_channels // self.output_size**2, self.output_size)
+
+    def __repr__(self):
+        return type(self).__name__ + '({}|{},{})'.format(self.block_name,
+                                                         self.out_channels // self.output_size ** 2, self.output_size)
+
+    def get_output_resolution(self, input_resolution):
+        return self.output_size
+
+    def get_FLOPs(self, input_resolution):
+        return 0
+
+    def get_model_size(self):
+        return 0
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        self.out_channels = c
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert AdaptiveAvgPool.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('AdaptiveAvgPool('):idx]
+
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        param_str_split = param_str.split(',')
+        out_channels = int(param_str_split[0])
+        output_size = int(param_str_split[1])
+        return AdaptiveAvgPool(out_channels=out_channels, output_size=output_size,
+                               block_name=tmp_block_name, no_create=no_create), s[idx + 1:]
+
+
+class BN(PlainNetBasicBlockClass):
+    def __init__(self, out_channels=None, copy_from=None, no_create=False, **kwargs):
+        super(BN, self).__init__(**kwargs)
+        self.no_create = no_create
+
+        if copy_from is not None:
+            assert isinstance(copy_from, nn.BatchNorm2d)
+            self.in_channels = copy_from.weight.shape[0]
+            self.out_channels = copy_from.weight.shape[0]
+            assert out_channels is None or out_channels == self.out_channels
+            self.netblock = copy_from
+
+        else:
+            self.in_channels = out_channels
+            self.out_channels = out_channels
+            if no_create:
+                return
+            else:
+                self.netblock = nn.BatchNorm2d(num_features=self.out_channels)
+
+    def forward(self, x):
+        return self.netblock(x)
+
+    def __str__(self):
+        return 'BN({})'.format(self.out_channels)
+
+    def __repr__(self):
+        return 'BN({}|{})'.format(self.block_name, self.out_channels)
+
+    def get_output_resolution(self, input_resolution):
+        return input_resolution
+
+    def get_FLOPs(self, input_resolution):
+        return input_resolution ** 2 * self.out_channels
+
+    def get_model_size(self):
+        return self.out_channels
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        self.out_channels = c
+        if not self.no_create:
+            self.netblock = nn.BatchNorm2d(num_features=self.out_channels)
+            self.netblock.train()
+            self.netblock.requires_grad_(True)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert BN.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('BN('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+        out_channels = int(param_str)
+        return BN(out_channels=out_channels, block_name=tmp_block_name, no_create=no_create), s[idx + 1:]
+
+
+class ConvKX(PlainNetBasicBlockClass):
+    def __init__(self, in_channels=None, out_channels=None, kernel_size=None, stride=None, groups=1, copy_from=None,
+                 no_create=False, **kwargs):
+        super(ConvKX, self).__init__(**kwargs)
+        self.no_create = no_create
+
+        if copy_from is not None:
+            assert isinstance(copy_from, nn.Conv2d)
+            self.in_channels = copy_from.in_channels
+            self.out_channels = copy_from.out_channels
+            self.kernel_size = copy_from.kernel_size[0]
+            self.stride = copy_from.stride[0]
+            self.groups = copy_from.groups
+            assert in_channels is None or in_channels == self.in_channels
+            assert out_channels is None or out_channels == self.out_channels
+            assert kernel_size is None or kernel_size == self.kernel_size
+            assert stride is None or stride == self.stride
+            self.netblock = copy_from
+        else:
+            self.in_channels = in_channels
+            self.out_channels = out_channels
+            self.stride = stride
+            self.groups = groups
+            self.kernel_size = kernel_size
+            self.padding = (self.kernel_size - 1) // 2
+            if no_create or self.in_channels == 0 or self.out_channels == 0 or self.kernel_size == 0 \
+                    or self.stride == 0:
+                return
+            else:
+                self.netblock = nn.Conv2d(in_channels=self.in_channels, out_channels=self.out_channels,
+                                          kernel_size=self.kernel_size, stride=self.stride,
+                                          padding=self.padding, bias=False, groups=self.groups)
+
+    def forward(self, x):
+        return self.netblock(x)
+
+    def __str__(self):
+        return type(self).__name__ + '({},{},{},{})'.format(self.in_channels, self.out_channels, self.kernel_size, self.stride)
+
+    def __repr__(self):
+        return type(self).__name__ + '({}|{},{},{},{})'.format(self.block_name, self.in_channels, self.out_channels, self.kernel_size, self.stride)
+
+    def get_output_resolution(self, input_resolution):
+        return input_resolution // self.stride
+
+    def get_FLOPs(self, input_resolution):
+        return self.in_channels * self.out_channels * self.kernel_size ** 2 * input_resolution ** 2 // self.stride ** 2 // self.groups
+
+    def get_model_size(self):
+        return self.in_channels * self.out_channels * self.kernel_size ** 2 // self.groups
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        if not self.no_create:
+            self.netblock = nn.Conv2d(in_channels=self.in_channels, out_channels=self.out_channels,
+                                      kernel_size=self.kernel_size, stride=self.stride,
+                                      padding=self.padding, bias=False)
+            self.netblock.train()
+            self.netblock.requires_grad_(True)
+
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert cls.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len(cls.__name__ + '('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        split_str = param_str.split(',')
+        in_channels = int(split_str[0])
+        out_channels = int(split_str[1])
+        kernel_size = int(split_str[2])
+        stride = int(split_str[3])
+        return cls(in_channels=in_channels, out_channels=out_channels,
+                      kernel_size=kernel_size, stride=stride, no_create=no_create, block_name=tmp_block_name), s[idx + 1:]
+
+
+class ConvDW(PlainNetBasicBlockClass):
+    def __init__(self, out_channels=None, kernel_size=None, stride=None, copy_from=None,
+                 no_create=False, **kwargs):
+        super(ConvDW, self).__init__(**kwargs)
+        self.no_create = no_create
+
+        if copy_from is not None:
+            assert isinstance(copy_from, nn.Conv2d)
+            self.in_channels = copy_from.in_channels
+            self.out_channels = copy_from.out_channels
+            self.kernel_size = copy_from.kernel_size[0]
+            self.stride = copy_from.stride[0]
+            assert self.in_channels == self.out_channels
+            assert out_channels is None or out_channels == self.out_channels
+            assert kernel_size is None or kernel_size == self.kernel_size
+            assert stride is None or stride == self.stride
+
+            self.netblock = copy_from
+        else:
+
+            self.in_channels = out_channels
+            self.out_channels = out_channels
+            self.stride = stride
+            self.kernel_size = kernel_size
+
+            self.padding = (self.kernel_size - 1) // 2
+            if no_create or self.in_channels == 0 or self.out_channels == 0 or self.kernel_size == 0 \
+                    or self.stride == 0:
+                return
+            else:
+                self.netblock = nn.Conv2d(in_channels=self.in_channels, out_channels=self.out_channels,
+                                          kernel_size=self.kernel_size, stride=self.stride,
+                                          padding=self.padding, bias=False, groups=self.in_channels)
+
+    def forward(self, x):
+        return self.netblock(x)
+
+    def __str__(self):
+        return 'ConvDW({},{},{})'.format(self.out_channels, self.kernel_size, self.stride)
+
+    def __repr__(self):
+        return 'ConvDW({}|{},{},{})'.format(self.block_name, self.out_channels, self.kernel_size, self.stride)
+
+    def get_output_resolution(self, input_resolution):
+        return input_resolution // self.stride
+
+    def get_FLOPs(self, input_resolution):
+        return self.out_channels * self.kernel_size ** 2 * input_resolution ** 2 // self.stride ** 2
+
+    def get_model_size(self):
+        return self.out_channels * self.kernel_size ** 2
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        self.out_channels=self.in_channels
+        if not self.no_create:
+            self.netblock = nn.Conv2d(in_channels=self.in_channels, out_channels=self.out_channels,
+                                      kernel_size=self.kernel_size, stride=self.stride,
+                                      padding=self.padding, bias=False, groups=self.in_channels)
+            self.netblock.train()
+            self.netblock.requires_grad_(True)
+
+
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert ConvDW.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('ConvDW('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        split_str = param_str.split(',')
+        out_channels = int(split_str[0])
+        kernel_size = int(split_str[1])
+        stride = int(split_str[2])
+        return ConvDW(out_channels=out_channels,
+                      kernel_size=kernel_size, stride=stride, no_create=no_create, block_name=tmp_block_name), s[idx + 1:]
+
+class ConvKXG2(ConvKX):
+    def __init__(self, in_channels=None, out_channels=None, kernel_size=None, stride=None, copy_from=None,
+                 no_create=False, **kwargs):
+        super(ConvKXG2, self).__init__(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
+                                       stride=stride, copy_from=copy_from, no_create=no_create,
+                                       groups=2, **kwargs)
+
+class ConvKXG4(ConvKX):
+    def __init__(self, in_channels=None, out_channels=None, kernel_size=None, stride=None, copy_from=None,
+                 no_create=False, **kwargs):
+        super(ConvKXG4, self).__init__(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
+                                       stride=stride, copy_from=copy_from, no_create=no_create,
+                                       groups=4, **kwargs)
+
+
+class ConvKXG8(ConvKX):
+    def __init__(self, in_channels=None, out_channels=None, kernel_size=None, stride=None, copy_from=None,
+                 no_create=False, **kwargs):
+        super(ConvKXG8, self).__init__(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
+                                       stride=stride, copy_from=copy_from, no_create=no_create,
+                                       groups=8, **kwargs)
+
+class ConvKXG16(ConvKX):
+    def __init__(self, in_channels=None, out_channels=None, kernel_size=None, stride=None, copy_from=None,
+                 no_create=False, **kwargs):
+        super(ConvKXG16, self).__init__(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
+                                       stride=stride, copy_from=copy_from, no_create=no_create,
+                                       groups=16, **kwargs)
+
+class ConvKXG32(ConvKX):
+    def __init__(self, in_channels=None, out_channels=None, kernel_size=None, stride=None, copy_from=None,
+                 no_create=False, **kwargs):
+        super(ConvKXG32, self).__init__(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
+                                       stride=stride, copy_from=copy_from, no_create=no_create,
+                                       groups=32, **kwargs)
+
+
+class Flatten(PlainNetBasicBlockClass):
+    def __init__(self, out_channels, no_create=False, **kwargs):
+        super(Flatten, self).__init__(**kwargs)
+        self.in_channels = out_channels
+        self.out_channels = out_channels
+        self.no_create = no_create
+
+    def forward(self, x):
+        return torch.flatten(x, 1)
+
+    def __str__(self):
+        return 'Flatten({})'.format(self.out_channels)
+
+    def __repr__(self):
+        return 'Flatten({}|{})'.format(self.block_name, self.out_channels)
+
+    def get_output_resolution(self, input_resolution):
+        return 1
+
+    def get_FLOPs(self, input_resolution):
+        return 0
+
+    def get_model_size(self):
+        return 0
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        self.out_channels = c
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert Flatten.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('Flatten('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        out_channels = int(param_str)
+        return Flatten(out_channels=out_channels, no_create=no_create, block_name=tmp_block_name), s[idx + 1:]
+
+
+
+class Linear(PlainNetBasicBlockClass):
+    def __init__(self, in_channels=None, out_channels=None, bias=True, copy_from=None,
+                 no_create=False,  **kwargs):
+        super(Linear, self).__init__(**kwargs)
+        self.no_create = no_create
+
+        if copy_from is not None:
+            assert isinstance(copy_from, nn.Linear)
+            self.in_channels = copy_from.weight.shape[1]
+            self.out_channels = copy_from.weight.shape[0]
+            self.use_bias = copy_from.bias is not None
+            assert in_channels is None or in_channels == self.in_channels
+            assert out_channels is None or out_channels == self.out_channels
+
+            self.netblock = copy_from
+        else:
+
+            self.in_channels = in_channels
+            self.out_channels = out_channels
+            self.use_bias = bias
+            if not no_create:
+                self.netblock = nn.Linear(self.in_channels, self.out_channels,
+                                          bias=self.use_bias)
+
+    def forward(self, x):
+        return self.netblock(x)
+
+    def __str__(self):
+        return 'Linear({},{},{})'.format(self.in_channels, self.out_channels, int(self.use_bias))
+
+    def __repr__(self):
+        return 'Linear({}|{},{},{})'.format(self.block_name, self.in_channels, self.out_channels, int(self.use_bias))
+
+    def get_output_resolution(self, input_resolution):
+        assert input_resolution == 1
+        return 1
+
+    def get_FLOPs(self, input_resolution):
+        return self.in_channels * self.out_channels
+
+    def get_model_size(self):
+        return self.in_channels * self.out_channels + int(self.use_bias)
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        if not self.no_create:
+            self.netblock = nn.Linear(self.in_channels, self.out_channels,
+                                      bias=self.use_bias)
+            self.netblock.train()
+            self.netblock.requires_grad_(True)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert Linear.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('Linear('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        split_str = param_str.split(',')
+        in_channels = int(split_str[0])
+        out_channels = int(split_str[1])
+        use_bias = int(split_str[2])
+
+        return Linear(in_channels=in_channels, out_channels=out_channels, bias=use_bias == 1,
+            block_name=tmp_block_name, no_create=no_create), s[idx+1 :]
+
+
+
+class MaxPool(PlainNetBasicBlockClass):
+    def __init__(self, out_channels, kernel_size, stride, no_create=False,  **kwargs):
+        super(MaxPool, self).__init__(**kwargs)
+        self.in_channels = out_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = (kernel_size - 1) // 2
+        self.no_create = no_create
+        if not no_create:
+            self.netblock = nn.MaxPool2d(kernel_size=self.kernel_size, stride=self.stride, padding=self.padding)
+
+    def forward(self, x):
+        return self.netblock(x)
+
+    def __str__(self):
+        return 'MaxPool({},{},{})'.format(self.out_channels, self.kernel_size, self.stride)
+
+    def __repr__(self):
+        return 'MaxPool({}|{},{},{})'.format(self.block_name, self.out_channels, self.kernel_size, self.stride)
+
+    def get_output_resolution(self, input_resolution):
+        return input_resolution // self.stride
+
+    def get_FLOPs(self, input_resolution):
+        return 0
+
+    def get_model_size(self):
+        return 0
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        self.out_channels = c
+        if not self.no_create:
+            self.netblock = nn.MaxPool2d(kernel_size=self.kernel_size, stride=self.stride, padding=self.padding)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert MaxPool.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('MaxPool('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        param_str_split = param_str.split(',')
+        out_channels = int(param_str_split[0])
+        kernel_size = int(param_str_split[1])
+        stride = int(param_str_split[2])
+        return MaxPool(out_channels=out_channels, kernel_size=kernel_size, stride=stride, no_create=no_create,
+                       block_name=tmp_block_name), s[idx + 1:]
+
+
+class Sequential(PlainNetBasicBlockClass):
+    def __init__(self, block_list, no_create=False, **kwargs):
+        super(Sequential, self).__init__(**kwargs)
+        self.block_list = block_list
+        if not no_create:
+            self.module_list = nn.ModuleList(block_list)
+        self.in_channels = block_list[0].in_channels
+        self.out_channels = block_list[-1].out_channels
+        self.no_create = no_create
+        res = 1024
+        for block in self.block_list:
+            res = block.get_output_resolution(res)
+        self.stride = 1024 // res
+
+    def forward(self, x):
+        output = x
+        for inner_block in self.block_list:
+            output = inner_block(output)
+        return output
+
+    def __str__(self):
+        s = 'Sequential('
+        for inner_block in self.block_list:
+            s += str(inner_block)
+        s += ')'
+        return s
+
+    def __repr__(self):
+        return str(self)
+
+    def get_output_resolution(self, input_resolution):
+        the_res = input_resolution
+        for the_block in self.block_list:
+            the_res = the_block.get_output_resolution(the_res)
+        return the_res
+
+    def get_FLOPs(self, input_resolution):
+        the_res = input_resolution
+        the_flops = 0
+        for the_block in self.block_list:
+            the_flops += the_block.get_FLOPs(the_res)
+            the_res = the_block.get_output_resolution(the_res)
+        return the_flops
+
+    def get_model_size(self):
+        the_size = 0
+        for the_block in self.block_list:
+            the_size += the_block.get_model_size()
+
+        return the_size
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        if len(self.block_list) == 0:
+            self.out_channels = c
+            return
+
+        self.block_list[0].set_in_channels(c)
+        last_channels = self.block_list[0].out_channels
+        if len(self.block_list) >= 2 and isinstance(self.block_list[1], BN):
+            self.block_list[1].set_in_channels(last_channels)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert Sequential.is_instance_from_str(s)
+        the_right_paraen_idx = _get_right_parentheses_index_(s)
+        param_str = s[len('Sequential(')+1:the_right_paraen_idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        the_block_list, remaining_s = _create_netblock_list_from_str_(param_str, no_create=no_create)
+        assert len(remaining_s) == 0
+        if the_block_list is None or len(the_block_list) == 0:
+            return None, ''
+        return Sequential(block_list=the_block_list, no_create=no_create, block_name=tmp_block_name), ''
+
+
+class MultiSumBlock(PlainNetBasicBlockClass):
+    def __init__(self, block_list, no_create=False, **kwargs):
+        super(MultiSumBlock, self).__init__(**kwargs)
+        self.block_list = block_list
+        if not no_create:
+            self.module_list = nn.ModuleList(block_list)
+        self.in_channels = np.max([x.in_channels for x in block_list])
+        self.out_channels = np.max([x.out_channels for x in block_list])
+        self.no_create = no_create
+
+        res = 1024
+        res = self.block_list[0].get_output_resolution(res)
+        self.stride = 1024 // res
+
+    def forward(self, x):
+        output = self.block_list[0](x)
+        for inner_block in self.block_list[1:]:
+            output2 = inner_block(x)
+            output = output + output2
+        return output
+
+    def __str__(self):
+        s = 'MultiSumBlock({}|'.format(self.block_name)
+        for inner_block in self.block_list:
+            s += str(inner_block) + ';'
+        s = s[:-1]
+        s += ')'
+        return s
+
+    def __repr__(self):
+        return str(self)
+
+
+    def get_output_resolution(self, input_resolution):
+        the_res = self.block_list[0].get_output_resolution(input_resolution)
+        for the_block in self.block_list:
+            assert the_res == the_block.get_output_resolution(input_resolution)
+
+        return the_res
+
+    def get_FLOPs(self, input_resolution):
+        the_flops = 0
+        for the_block in self.block_list:
+            the_flops += the_block.get_FLOPs(input_resolution)
+
+        return the_flops
+
+    def get_model_size(self):
+        the_size = 0
+        for the_block in self.block_list:
+            the_size += the_block.get_model_size()
+
+        return the_size
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        for the_block in self.block_list:
+            the_block.set_in_channels(c)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert MultiSumBlock.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('MultiSumBlock('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        the_s = param_str
+
+        the_block_list = []
+        while len(the_s) > 0:
+            tmp_block_list, remaining_s = _create_netblock_list_from_str_(the_s, no_create=no_create)
+            the_s = remaining_s
+            if tmp_block_list is None:
+                pass
+            elif len(tmp_block_list) == 1:
+                the_block_list.append(tmp_block_list[0])
+            else:
+                the_block_list.append(Sequential(block_list=tmp_block_list, no_create=no_create))
+        pass  # end while
+
+        if len(the_block_list) == 0:
+            return None, s[idx+1:]
+
+        return MultiSumBlock(block_list=the_block_list, block_name=tmp_block_name, no_create=no_create), s[idx+1:]
+
+
+class MultiCatBlock(PlainNetBasicBlockClass):
+    def __init__(self, block_list, no_create=False, **kwargs):
+        super(MultiCatBlock, self).__init__(**kwargs)
+        self.block_list = block_list
+        if not no_create:
+            self.module_list = nn.ModuleList(block_list)
+        self.in_channels = np.max([x.in_channels for x in block_list])
+        self.out_channels = np.sum([x.out_channels for x in block_list])
+        self.no_create = no_create
+
+        res = 1024
+        res = self.block_list[0].get_output_resolution(res)
+        self.stride = 1024 // res
+
+    def forward(self, x):
+        output_list = []
+        for inner_block in self.block_list:
+            output = inner_block(x)
+            output_list.append(output)
+
+        return torch.cat(output_list, dim=1)
+
+    def __str__(self):
+        s = 'MultiCatBlock({}|'.format(self.block_name)
+        for inner_block in self.block_list:
+            s += str(inner_block) + ';'
+
+        s = s[:-1]
+        s += ')'
+        return s
+
+    def __repr__(self):
+        return str(self)
+
+    def get_output_resolution(self, input_resolution):
+        the_res = self.block_list[0].get_output_resolution(input_resolution)
+        for the_block in self.block_list:
+            assert the_res == the_block.get_output_resolution(input_resolution)
+
+        return the_res
+
+    def get_FLOPs(self, input_resolution):
+        the_flops = 0
+        for the_block in self.block_list:
+            the_flops += the_block.get_FLOPs(input_resolution)
+
+        return the_flops
+
+    def get_model_size(self):
+        the_size = 0
+        for the_block in self.block_list:
+            the_size += the_block.get_model_size()
+
+        return the_size
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        for the_block in self.block_list:
+            the_block.set_in_channels(c)
+        self.out_channels = np.sum([x.out_channels for x in self.block_list])
+
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert MultiCatBlock.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('MultiCatBlock('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        the_s = param_str
+
+        the_block_list = []
+        while len(the_s) > 0:
+            tmp_block_list, remaining_s = _create_netblock_list_from_str_(the_s, no_create=no_create)
+            the_s = remaining_s
+            if tmp_block_list is None:
+                pass
+            elif len(tmp_block_list) == 1:
+                the_block_list.append(tmp_block_list[0])
+            else:
+                the_block_list.append(Sequential(block_list=tmp_block_list, no_create=no_create))
+            pass  # end if
+        pass  # end while
+
+        if len(the_block_list) == 0:
+            return None, s[idx+1:]
+
+        return MultiCatBlock(block_list=the_block_list, block_name=tmp_block_name,
+                             no_create=no_create), s[idx + 1:]
+
+
+class RELU(PlainNetBasicBlockClass):
+    def __init__(self, out_channels, no_create=False, **kwargs):
+        super(RELU, self).__init__(**kwargs)
+        self.in_channels = out_channels
+        self.out_channels = out_channels
+        self.no_create = no_create
+
+    def forward(self, x):
+        return F.relu(x)
+
+    def __str__(self):
+        return 'RELU({})'.format(self.out_channels)
+
+    def __repr__(self):
+        return 'RELU({}|{})'.format(self.block_name, self.out_channels)
+
+    def get_output_resolution(self, input_resolution):
+        return input_resolution
+
+    def get_FLOPs(self, input_resolution):
+        return 0
+
+    def get_model_size(self):
+        return 0
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        self.out_channels = c
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert RELU.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('RELU('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        out_channels = int(param_str)
+        return RELU(out_channels=out_channels, no_create=no_create, block_name=tmp_block_name), s[idx+1:]
+
+
+
+class ResBlock(PlainNetBasicBlockClass):
+    '''
+    ResBlock(in_channles, inner_blocks_str). If in_channels is missing, use block_list[0].in_channels as in_channels
+    '''
+    def __init__(self, block_list, in_channels=None, stride=None, no_create=False, **kwargs):
+        super(ResBlock, self).__init__(**kwargs)
+        self.block_list = block_list
+        self.stride = stride
+        self.no_create = no_create
+        if not no_create:
+            self.module_list = nn.ModuleList(block_list)
+
+        if in_channels is None:
+            self.in_channels = block_list[0].in_channels
+        else:
+            self.in_channels = in_channels
+        self.out_channels = block_list[-1].out_channels
+
+        if self.stride is None:
+            tmp_input_res = 1024
+            tmp_output_res = self.get_output_resolution(tmp_input_res)
+            self.stride = tmp_input_res // tmp_output_res
+
+        self.proj = None
+        if self.stride > 1 or self.in_channels != self.out_channels:
+            self.proj = nn.Sequential(
+                nn.Conv2d(self.in_channels, self.out_channels, 1, self.stride),
+                nn.BatchNorm2d(self.out_channels),
+            )
+
+    def forward(self, x):
+        if len(self.block_list) == 0:
+            return x
+
+        output = x
+        for inner_block in self.block_list:
+            output = inner_block(output)
+
+        if self.proj is not None:
+            output = output + self.proj(x)
+        else:
+            output = output + x
+
+        return output
+
+    def __str__(self):
+        s = 'ResBlock({},{},'.format(self.in_channels, self.stride)
+        for inner_block in self.block_list:
+            s += str(inner_block)
+
+        s += ')'
+        return s
+
+    def __repr__(self):
+        s = 'ResBlock({}|{},{},'.format(self.block_name, self.in_channels, self.stride)
+        for inner_block in self.block_list:
+            s += str(inner_block)
+
+        s += ')'
+        return s
+
+    def get_output_resolution(self, input_resolution):
+        the_res = input_resolution
+        for the_block in self.block_list:
+            the_res = the_block.get_output_resolution(the_res)
+
+        return the_res
+
+    def get_FLOPs(self, input_resolution):
+        the_res = input_resolution
+        the_flops = 0
+        for the_block in self.block_list:
+            the_flops += the_block.get_FLOPs(the_res)
+            the_res = the_block.get_output_resolution(the_res)
+
+        if self.proj is not None:
+            the_flops += self.in_channels * self.out_channels * (the_res / self.stride) ** 2 + \
+                         (the_res / self.stride) ** 2 * self.out_channels
+
+        return the_flops
+
+    def get_model_size(self):
+        the_size = 0
+        for the_block in self.block_list:
+            the_size += the_block.get_model_size()
+
+        if self.proj is not None:
+            the_size += self.in_channels * self.out_channels + self.out_channels
+
+        return the_size
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        if len(self.block_list) == 0:
+            self.out_channels = c
+            return
+
+        self.block_list[0].set_in_channels(c)
+        last_channels = self.block_list[0].out_channels
+        if len(self.block_list) >= 2 and \
+                ( isinstance(self.block_list[0], ConvKX) or isinstance(self.block_list[0], ConvDW)) and \
+                isinstance(self.block_list[1], BN):
+            self.block_list[1].set_in_channels(last_channels)
+
+        self.proj = None
+        if not self.no_create:
+            if self.stride > 1 or self.in_channels != self.out_channels:
+                self.proj = nn.Sequential(
+                    nn.Conv2d(self.in_channels, self.out_channels, 1, self.stride),
+                    nn.BatchNorm2d(self.out_channels),
+                )
+                self.proj.train()
+                self.proj.requires_grad_(True)
+
+
+
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert ResBlock.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        the_stride = None
+        param_str = s[len('ResBlock('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        first_comma_index = param_str.find(',')
+        if first_comma_index < 0 or not param_str[0:first_comma_index].isdigit():  # cannot parse in_channels, missing, use default
+            in_channels = None
+            the_block_list, remaining_s = _create_netblock_list_from_str_(param_str, no_create=no_create)
+        else:
+            in_channels = int(param_str[0:first_comma_index])
+            param_str = param_str[first_comma_index+1:]
+            second_comma_index = param_str.find(',')
+            if second_comma_index < 0 or not param_str[0:second_comma_index].isdigit():
+                the_block_list, remaining_s = _create_netblock_list_from_str_(param_str, no_create=no_create)
+            else:
+                the_stride = int(param_str[0:second_comma_index])
+                param_str = param_str[second_comma_index + 1:]
+                the_block_list, remaining_s = _create_netblock_list_from_str_(param_str, no_create=no_create)
+            pass
+        pass
+
+        assert len(remaining_s) == 0
+        if the_block_list is None or len(the_block_list) == 0:
+            return None, s[idx+1:]
+        return ResBlock(block_list=the_block_list, in_channels=in_channels,
+                        stride=the_stride, no_create=no_create, block_name=tmp_block_name), s[idx+1:]
+
+
+class ResBlockProj(PlainNetBasicBlockClass):
+    '''
+    ResBlockProj(in_channles, inner_blocks_str). If in_channels is missing, use block_list[0].in_channels as in_channels
+    '''
+    def __init__(self, block_list, in_channels=None, stride=None, no_create=False, **kwargs):
+        super(ResBlockProj, self).__init__(**kwargs)
+        self.block_list = block_list
+        self.stride = stride
+        self.no_create = no_create
+        if not no_create:
+            self.module_list = nn.ModuleList(block_list)
+
+        if in_channels is None:
+            self.in_channels = block_list[0].in_channels
+        else:
+            self.in_channels = in_channels
+        self.out_channels = block_list[-1].out_channels
+
+        if self.stride is None:
+            tmp_input_res = 1024
+            tmp_output_res = self.get_output_resolution(tmp_input_res)
+            self.stride = tmp_input_res // tmp_output_res
+
+
+        self.proj = nn.Sequential(
+            nn.Conv2d(self.in_channels, self.out_channels, 1, self.stride),
+            nn.BatchNorm2d(self.out_channels),
+            )
+
+    def forward(self, x):
+        if len(self.block_list) == 0:
+            return x
+
+        output = x
+        for inner_block in self.block_list:
+            output = inner_block(output)
+        output = output + self.proj(x)
+        return output
+
+    def __str__(self):
+        s = 'ResBlockProj({},{},'.format(self.in_channels, self.stride)
+        for inner_block in self.block_list:
+            s += str(inner_block)
+
+        s += ')'
+        return s
+
+    def __repr__(self):
+        s = 'ResBlockProj({}|{},{},'.format(self.block_name, self.in_channels, self.stride)
+        for inner_block in self.block_list:
+            s += str(inner_block)
+
+        s += ')'
+        return s
+
+    def get_output_resolution(self, input_resolution):
+        the_res = input_resolution
+        for the_block in self.block_list:
+            the_res = the_block.get_output_resolution(the_res)
+
+        return the_res
+
+    def get_FLOPs(self, input_resolution):
+        the_res = input_resolution
+        the_flops = 0
+        for the_block in self.block_list:
+            the_flops += the_block.get_FLOPs(the_res)
+            the_res = the_block.get_output_resolution(the_res)
+
+        if self.proj is not None:
+            the_flops += self.in_channels * self.out_channels * (the_res / self.stride) ** 2 + \
+                         (the_res / self.stride) ** 2 * self.out_channels
+
+        return the_flops
+
+    def get_model_size(self):
+        the_size = 0
+        for the_block in self.block_list:
+            the_size += the_block.get_model_size()
+
+        if self.proj is not None:
+            the_size += self.in_channels * self.out_channels + self.out_channels
+
+        return the_size
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        if len(self.block_list) == 0:
+            self.out_channels = c
+            return
+
+        self.block_list[0].set_in_channels(c)
+        last_channels = self.block_list[0].out_channels
+        if len(self.block_list) >= 2 and \
+                ( isinstance(self.block_list[0], ConvKX) or isinstance(self.block_list[0], ConvDW)) and \
+                isinstance(self.block_list[1], BN):
+            self.block_list[1].set_in_channels(last_channels)
+
+        self.proj = None
+        if not self.no_create:
+            if self.stride > 1 or self.in_channels != self.out_channels:
+                self.proj = nn.Sequential(
+                    nn.Conv2d(self.in_channels, self.out_channels, 1, self.stride),
+                    nn.BatchNorm2d(self.out_channels),
+                )
+                self.proj.train()
+                self.proj.requires_grad_(True)
+
+
+
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert ResBlockProj.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        the_stride = None
+        param_str = s[len('ResBlockProj('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        first_comma_index = param_str.find(',')
+        if first_comma_index < 0 or not param_str[0:first_comma_index].isdigit():  # cannot parse in_channels, missing, use default
+            in_channels = None
+            the_block_list, remaining_s = _create_netblock_list_from_str_(param_str, no_create=no_create)
+        else:
+            in_channels = int(param_str[0:first_comma_index])
+            param_str = param_str[first_comma_index+1:]
+            second_comma_index = param_str.find(',')
+            if second_comma_index < 0 or not param_str[0:second_comma_index].isdigit():
+                the_block_list, remaining_s = _create_netblock_list_from_str_(param_str, no_create=no_create)
+            else:
+                the_stride = int(param_str[0:second_comma_index])
+                param_str = param_str[second_comma_index + 1:]
+                the_block_list, remaining_s = _create_netblock_list_from_str_(param_str, no_create=no_create)
+            pass
+        pass
+
+        assert len(remaining_s) == 0
+        if the_block_list is None or len(the_block_list) == 0:
+            return None, s[idx+1:]
+        return ResBlockProj(block_list=the_block_list, in_channels=in_channels,
+                        stride=the_stride, no_create=no_create, block_name=tmp_block_name), s[idx+1:]
+
+class SE(PlainNetBasicBlockClass):
+    def __init__(self, out_channels=None, copy_from=None,
+                 no_create=False, **kwargs):
+        super(SE, self).__init__(**kwargs)
+        self.no_create = no_create
+
+        if copy_from is not None:
+            raise RuntimeError('Not implemented')
+        else:
+            self.in_channels = out_channels
+            self.out_channels = out_channels
+            self.se_ratio = 0.25
+            self.se_channels = max(1, int(round(self.out_channels * self.se_ratio)))
+            if no_create or self.out_channels == 0:
+                return
+            else:
+                self.netblock = nn.Sequential(
+                    nn.AdaptiveAvgPool2d((1,1)),
+                    nn.Conv2d(in_channels=self.out_channels, out_channels=self.se_channels, kernel_size=1, stride=1,
+                              padding=0, bias=False),
+                    nn.BatchNorm2d(self.se_channels),
+                    nn.ReLU(),
+                    nn.Conv2d(in_channels=self.se_channels, out_channels=self.out_channels, kernel_size=1, stride=1,
+                              padding=0, bias=False),
+                    nn.BatchNorm2d(self.out_channels),
+                    nn.Sigmoid()
+                )
+
+    def forward(self, x):
+        se_x = self.netblock(x)
+        return se_x * x
+
+    def __str__(self):
+        return 'SE({})'.format(self.out_channels)
+
+    def __repr__(self):
+        return 'SE({}|{})'.format(self.block_name,self.out_channels)
+
+    def get_output_resolution(self, input_resolution):
+        return input_resolution
+
+    def get_FLOPs(self, input_resolution):
+        return self.in_channels * self.se_channels + self.se_channels * self.out_channels + self.out_channels + \
+            self.out_channels * input_resolution ** 2
+
+    def get_model_size(self):
+        return self.in_channels * self.se_channels + 2 * self.se_channels + self.se_channels * self.out_channels + \
+            2 * self.out_channels
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        if not self.no_create:
+            self.netblock = nn.Sequential(
+                nn.AdaptiveAvgPool2d((1, 1)),
+                nn.Conv2d(in_channels=self.out_channels, out_channels=self.se_channels, kernel_size=1, stride=1,
+                          padding=0, bias=False),
+                nn.BatchNorm2d(self.se_channels),
+                nn.ReLU(),
+                nn.Conv2d(in_channels=self.se_channels, out_channels=self.out_channels, kernel_size=1, stride=1,
+                          padding=0, bias=False),
+                nn.BatchNorm2d(self.out_channels),
+                nn.Sigmoid()
+            )
+            self.netblock.train()
+            self.netblock.requires_grad_(True)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert SE.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('SE('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        out_channels = int(param_str)
+        return SE(out_channels=out_channels, no_create=no_create, block_name=tmp_block_name), s[idx + 1:]
+
+
+
+class SwishImplementation(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, i):
+        result = i * torch.sigmoid(i)
+        ctx.save_for_backward(i)
+        return result
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        i = ctx.saved_variables[0]
+        sigmoid_i = torch.sigmoid(i)
+        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
+
+
+class Swish(PlainNetBasicBlockClass):
+    def __init__(self, out_channels=None, copy_from=None,
+                 no_create=False, **kwargs):
+        super(Swish, self).__init__(**kwargs)
+        self.no_create = no_create
+
+        if copy_from is not None:
+            raise RuntimeError('Not implemented')
+        else:
+            self.in_channels = out_channels
+            self.out_channels = out_channels
+
+    def forward(self, x):
+        return SwishImplementation.apply(x)
+
+    def __str__(self):
+        return 'Swish({})'.format(self.out_channels)
+
+    def __repr__(self):
+        return 'Swish({}|{})'.format(self.block_name, self.out_channels)
+
+    def get_output_resolution(self, input_resolution):
+        return input_resolution
+
+    def get_FLOPs(self, input_resolution):
+        return self.out_channels * input_resolution ** 2
+
+    def get_model_size(self):
+        return 0
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        self.out_channels = c
+
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert Swish.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('Swish('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        out_channels = int(param_str)
+        return Swish(out_channels=out_channels, no_create=no_create, block_name=tmp_block_name), s[idx + 1:]
+
+
+
+def _add_bn_layer_(block_list):
+    new_block_list = []
+    for the_block in block_list:
+        if isinstance(the_block, ConvKX) or isinstance(the_block, ConvDW):
+            out_channels = the_block.out_channels
+            new_bn_block = BN(out_channels=out_channels, no_create=True)
+            new_seq_with_bn = Sequential(block_list=[the_block, new_bn_block], no_create=True)
+            new_block_list.append(new_seq_with_bn)
+        elif hasattr(the_block, 'block_list'):
+            new_block_list = _add_bn_layer_(the_block.block_list)
+            the_block.module_list = nn.ModuleList(new_block_list)
+            the_block.block_list = new_block_list
+            new_block_list.append(the_block)
+        else:
+            new_block_list.append(the_block)
+        pass
+    pass
+
+    return new_block_list
+
+
+def _remove_bn_layer_(block_list):
+    new_block_list = []
+    for the_block in block_list:
+        if isinstance(the_block, BN):
+            continue
+        elif hasattr(the_block, 'block_list'):
+            new_block_list = _remove_bn_layer_(the_block.block_list)
+            the_block.module_list = nn.ModuleList(new_block_list)
+            the_block.block_list = new_block_list
+            new_block_list.append(the_block)
+        else:
+            new_block_list.append(the_block)
+        pass
+    pass
+
+    return new_block_list
+
+
+def _add_se_layer_(block_list):
+    new_block_list = []
+    for the_block in block_list:
+        if isinstance(the_block, RELU):
+            out_channels = the_block.out_channels
+            new_se_block = SE(out_channels=out_channels, no_create=True)
+            new_seq_with_bn = Sequential(block_list=[the_block, new_se_block], no_create=True)
+            new_block_list.append(new_seq_with_bn)
+        elif hasattr(the_block, 'block_list'):
+            new_block_list = _add_se_layer_(the_block.block_list)
+            the_block.module_list = nn.ModuleList(new_block_list)
+            the_block.block_list = new_block_list
+            new_block_list.append(the_block)
+        else:
+            new_block_list.append(the_block)
+        pass
+    pass
+
+    return new_block_list
+
+def _replace_relu_with_swish_layer_(block_list):
+    new_block_list = []
+    for the_block in block_list:
+        if isinstance(the_block, RELU):
+            out_channels = the_block.out_channels
+            new_swish_block = Swish(out_channels=out_channels, no_create=True)
+            new_block_list.append(new_swish_block)
+        elif hasattr(the_block, 'block_list'):
+            new_block_list = _replace_relu_with_swish_layer_(the_block.block_list)
+            the_block.module_list = nn.ModuleList(new_block_list)
+            the_block.block_list = new_block_list
+            new_block_list.append(the_block)
+        else:
+            new_block_list.append(the_block)
+        pass
+    pass
+
+    return new_block_list
+
+def _fuse_convkx_and_bn_(convkx, bn):
+    the_weight_scale = bn.weight / torch.sqrt(bn.running_var + bn.eps)
+    convkx.weight[:] = convkx.weight * the_weight_scale.view((-1, 1, 1, 1))
+    the_bias_shift = (bn.weight * bn.running_mean) / \
+                     torch.sqrt(bn.running_var + bn.eps)
+    bn.weight[:] = 1
+    bn.bias[:] = bn.bias - the_bias_shift
+    bn.running_var[:] = 1.0 - bn.eps
+    bn.running_mean[:] = 0.0
+
+
+def _fuse_bn_layer_for_blocks_list_(block_list):
+    last_block = None  # type: ConvKX
+    with torch.no_grad():
+        for the_block in block_list:
+            if isinstance(the_block, BN):
+                # assert isinstance(last_block, ConvKX) or isinstance(last_block, ConvDW)
+                if isinstance(last_block, ConvKX) or isinstance(last_block, ConvDW):
+                    _fuse_convkx_and_bn_(last_block.netblock, the_block.netblock)
+                else:
+                    print('--- warning! Cannot fuse BN={} because last_block={}'.format(the_block, last_block))
+
+                last_block = None
+            elif isinstance(the_block, ConvKX) or isinstance(the_block, ConvDW):
+                last_block = the_block
+            elif hasattr(the_block, 'block_list') and the_block.block_list is not None and \
+                    len(the_block.block_list) > 0:
+                _fuse_bn_layer_for_blocks_list_(the_block.block_list)
+            else:
+                pass
+            pass
+        pass
+    pass  # end with
+
+
+
+
+def register_netblocks_dict(netblocks_dict: dict):
+    this_py_file_netblocks_dict = {
+        'AdaptiveAvgPool': AdaptiveAvgPool,
+        'BN': BN,
+        'ConvDW': ConvDW,
+        'ConvKX': ConvKX,
+        'ConvKXG2': ConvKXG2,
+        'ConvKXG4': ConvKXG4,
+        'ConvKXG8': ConvKXG8,
+        'ConvKXG16': ConvKXG16,
+        'ConvKXG32': ConvKXG32,
+        'Flatten': Flatten,
+        'Linear': Linear,
+        'MaxPool': MaxPool,
+        'MultiSumBlock': MultiSumBlock,
+        'MultiCatBlock': MultiCatBlock,
+        'PlainNetBasicBlockClass': PlainNetBasicBlockClass,
+        'RELU': RELU,
+        'ResBlock': ResBlock,
+        'ResBlockProj': ResBlockProj,
+        'Sequential': Sequential,
+        'SE': SE,
+        'Swish': Swish,
+    }
+    netblocks_dict.update(this_py_file_netblocks_dict)
+    return netblocks_dict
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/super_blocks.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/super_blocks.py
new file mode 100644
index 0000000..71fd095
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet/super_blocks.py
@@ -0,0 +1,222 @@
+'''
+Copyright (C) 2010-2021 Alibaba Group Holding Limited.
+'''
+
+
+import os, sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+import numpy as np
+import uuid
+import global_utils
+
+import PlainNet
+from PlainNet import _get_right_parentheses_index_, basic_blocks
+
+class PlainNetSuperBlockClass(basic_blocks.PlainNetBasicBlockClass):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, sub_layers=None, no_create=False, **kwargs):
+        super(PlainNetSuperBlockClass, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+        self.sub_layers = sub_layers
+        self.no_create = no_create
+        self.block_list = None
+        self.module_list = None
+
+    def forward(self, x):
+        output = x
+        for block in self.block_list:
+            output = block(output)
+        return output
+
+    def __str__(self):
+        return type(self).__name__ + '({},{},{},{})'.format(self.in_channels, self.out_channels,
+                                                                self.stride, self.sub_layers)
+
+    def __repr__(self):
+        return type(self).__name__ + '({}|{},{},{},{})'.format(self.block_name, self.in_channels, self.out_channels,
+                                                               self.stride, self.sub_layers)
+
+    def get_output_resolution(self, input_resolution):
+        resolution = input_resolution
+        for block in self.block_list:
+            resolution = block.get_output_resolution(resolution)
+        return resolution
+
+    def get_FLOPs(self, input_resolution):
+        resolution = input_resolution
+        flops = 0.0
+        for block in self.block_list:
+            flops += block.get_FLOPs(resolution)
+            resolution = block.get_output_resolution(resolution)
+        return flops
+
+
+    def get_model_size(self):
+        model_size = 0.0
+        for block in self.block_list:
+            model_size += block.get_model_size()
+        return model_size
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        if len(self.block_list) == 0:
+            self.out_channels = c
+            return
+
+        self.block_list[0].set_in_channels(c)
+        last_channels = self.block_list[0].out_channels
+        if len(self.block_list) >= 2 and \
+                (isinstance(self.block_list[0], basic_blocks.ConvKX) or isinstance(self.block_list[0], basic_blocks.ConvDW)) and \
+                isinstance(self.block_list[1], basic_blocks.BN):
+            self.block_list[1].set_in_channels(last_channels)
+
+    def encode_structure(self):
+        return [self.out_channels, self.sub_layers]
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert cls.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len(cls.__name__ + '('):idx]
+
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        param_str_split = param_str.split(',')
+        in_channels = int(param_str_split[0])
+        out_channels = int(param_str_split[1])
+        stride = int(param_str_split[2])
+        sub_layers = int(param_str_split[3])
+        return cls(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                       sub_layers=sub_layers, block_name=tmp_block_name, no_create=no_create,
+                   **kwargs),\
+               s[idx + 1:]
+
+
+class SuperConvKXBNRELU(PlainNetSuperBlockClass):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, sub_layers=None, kernel_size=None,
+                 no_create=False, no_reslink=False, no_BN=False, **kwargs):
+        super(SuperConvKXBNRELU, self).__init__(**kwargs)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+        self.sub_layers = sub_layers
+        self.kernel_size = kernel_size
+        self.no_create = no_create
+        self.no_reslink = no_reslink
+        self.no_BN = no_BN
+
+        # if self.no_reslink:
+        #     print('Warning! {} use no_reslink'.format(str(self)))
+        # if self.no_BN:
+        #     print('Warning! {} use no_BN'.format(str(self)))
+
+        full_str = ''
+        last_channels = in_channels
+        current_stride = stride
+        for i in range(self.sub_layers):
+            if not self.no_BN:
+                inner_str = 'ConvKX({},{},{},{})BN({})RELU({})'.format(last_channels, self.out_channels,
+                                                                   self.kernel_size,
+                                                                   current_stride,
+                                                                   self.out_channels, self.out_channels)
+            else:
+                inner_str = 'ConvKX({},{},{},{})RELU({})'.format(last_channels, self.out_channels,
+                                                                       self.kernel_size,
+                                                                       current_stride,
+                                                                       self.out_channels)
+            full_str += inner_str
+
+            last_channels = out_channels
+            current_stride = 1
+        pass
+
+        self.block_list = PlainNet.create_netblock_list_from_str(full_str, no_create=no_create,
+                                                                 no_reslink=no_reslink, no_BN=no_BN)
+        if not no_create:
+            self.module_list = nn.ModuleList(self.block_list)
+        else:
+            self.module_list = None
+
+    def forward_pre_relu(self, x):
+        output = x
+        for block in self.block_list[0:-1]:
+            output = block(output)
+        return output
+
+    def __str__(self):
+        return type(self).__name__ + '({},{},{},{})'.format(self.in_channels, self.out_channels,
+                                                                self.stride, self.sub_layers)
+
+    def __repr__(self):
+        return type(self).__name__ + '({}|in={},out={},stride={},sub_layers={},kernel_size={})'.format(
+            self.block_name, self.in_channels, self.out_channels, self.stride, self.sub_layers, self.kernel_size)
+
+    def split(self, split_layer_threshold):
+        return str(self)
+
+    def structure_scale(self, scale=1.0, channel_scale=None, sub_layer_scale=None):
+        if channel_scale is None:
+            channel_scale = scale
+        if sub_layer_scale is None:
+            sub_layer_scale = scale
+
+        new_out_channels = global_utils.smart_round(self.out_channels * channel_scale)
+        new_sub_layers = max(1, round(self.sub_layers * sub_layer_scale))
+
+        return type(self).__name__ + '({},{},{},{})'.format(self.in_channels, new_out_channels,
+                                                     self.stride, new_sub_layers)
+
+
+
+class SuperConvK1BNRELU(SuperConvKXBNRELU):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperConvK1BNRELU, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           sub_layers=sub_layers,
+                                           kernel_size=1,
+                                           no_create=no_create, **kwargs)
+
+class SuperConvK3BNRELU(SuperConvKXBNRELU):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperConvK3BNRELU, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           sub_layers=sub_layers,
+                                           kernel_size=3,
+                                           no_create=no_create, **kwargs)
+
+class SuperConvK5BNRELU(SuperConvKXBNRELU):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperConvK5BNRELU, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           sub_layers=sub_layers,
+                                           kernel_size=5,
+                                           no_create=no_create, **kwargs)
+
+
+class SuperConvK7BNRELU(SuperConvKXBNRELU):
+    def __init__(self, in_channels=None, out_channels=None, stride=None, sub_layers=None, no_create=False, **kwargs):
+        super(SuperConvK7BNRELU, self).__init__(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                           sub_layers=sub_layers,
+                                           kernel_size=7,
+                                           no_create=no_create, **kwargs)
+
+
+def register_netblocks_dict(netblocks_dict: dict):
+    this_py_file_netblocks_dict = {
+        'SuperConvK1BNRELU': SuperConvK1BNRELU,
+        'SuperConvK3BNRELU': SuperConvK3BNRELU,
+        'SuperConvK5BNRELU': SuperConvK5BNRELU,
+        'SuperConvK7BNRELU': SuperConvK7BNRELU,
+
+    }
+    netblocks_dict.update(this_py_file_netblocks_dict)
+    return netblocks_dict
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/SuperResIDWEXKX.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/SuperResIDWEXKX.py
new file mode 100755
index 0000000..1860db9
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/SuperResIDWEXKX.py
@@ -0,0 +1,486 @@
+'''
+Copyright (C) 2010-2021 Alibaba Group Holding Limited.
+'''
+
+import os
+import sys
+import uuid
+
+from torch import nn
+
+from . import global_utils
+from .. import PlainNet
+#from . import _get_right_parentheses_index_
+from .super_blocks import PlainNetSuperBlockClass
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+class SuperResIDWEXKX(PlainNetSuperBlockClass):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 kernel_size=None,
+                 expension=None,
+                 no_create=False,
+                 no_reslink=False,
+                 no_BN=False,
+                 use_se=False,
+                 **kwargs):
+        super(SuperResIDWEXKX, self).__init__(**kwargs)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+        self.bottleneck_channels = bottleneck_channels
+        self.sub_layers = sub_layers
+        self.kernel_size = kernel_size
+        self.expension = expension
+        self.no_create = no_create
+        self.no_reslink = no_reslink
+        self.no_BN = no_BN
+
+        self.use_se = use_se
+        if self.use_se:
+            print('---debug use_se in ' + str(self))
+
+        full_str = ''
+        last_channels = in_channels
+        current_stride = stride
+        print('self.no_reslink:', self.no_reslink, self.no_BN, self.use_se)
+        for i in range(self.sub_layers):
+            inner_str = ''
+            # first DW
+            dw_channels = global_utils.smart_round(self.bottleneck_channels *
+                                                   self.expension,
+                                                   base=8)
+            inner_str += 'ConvKX({},{},{},{})'.format(last_channels,
+                                                      dw_channels, 1, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(dw_channels)
+            inner_str += 'RELU({})'.format(dw_channels)
+
+            inner_str += 'ConvDW({},{},{})'.format(dw_channels,
+                                                   self.kernel_size,
+                                                   current_stride)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(dw_channels)
+            inner_str += 'RELU({})'.format(dw_channels)
+            if self.use_se:
+                inner_str += 'SE({})'.format(dw_channels)
+
+            inner_str += 'ConvKX({},{},{},{})'.format(dw_channels,
+                                                      bottleneck_channels, 1,
+                                                      1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(bottleneck_channels)
+            # inner_str += 'RELU({})'.format(bottleneck_channels)
+
+            if not self.no_reslink:
+                if i == 0:
+                    res_str = 'ResBlockProj({})RELU({})'.format(
+                        inner_str, self.out_channels)
+                else:
+                    res_str = 'ResBlock({})RELU({})'.format(
+                        inner_str, self.out_channels)
+
+            else:
+                res_str = '{}RELU({})'.format(inner_str, self.out_channels)
+
+            full_str += res_str
+
+            # second DW
+            inner_str = ''
+            dw_channels = global_utils.smart_round(self.out_channels *
+                                                   self.expension,
+                                                   base=8)
+            inner_str += 'ConvKX({},{},{},{})'.format(bottleneck_channels,
+                                                      dw_channels, 1, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(dw_channels)
+            inner_str += 'RELU({})'.format(dw_channels)
+
+            inner_str += 'ConvDW({},{},{})'.format(dw_channels,
+                                                   self.kernel_size, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(dw_channels)
+            inner_str += 'RELU({})'.format(dw_channels)
+            if self.use_se:
+                inner_str += 'SE({})'.format(dw_channels)
+
+            inner_str += 'ConvKX({},{},{},{})'.format(dw_channels,
+                                                      self.out_channels, 1, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.out_channels)
+
+            if not self.no_reslink:
+                res_str = 'ResBlock({})RELU({})'.format(
+                    inner_str, self.out_channels)
+            else:
+                res_str = '{}RELU({})'.format(inner_str, self.out_channels)
+
+            full_str += res_str
+            last_channels = out_channels
+            current_stride = 1
+        pass
+
+        self.block_list = PlainNet.create_netblock_list_from_str(
+            full_str,
+            no_create=no_create,
+            no_reslink=no_reslink,
+            no_BN=no_BN,
+            **kwargs)
+        if not no_create:
+            self.module_list = nn.ModuleList(self.block_list)
+        else:
+            self.module_list = None
+
+    def __str__(self):
+        return type(self).__name__ + '({},{},{},{},{})'.format(
+            self.in_channels, self.out_channels, self.stride,
+            self.bottleneck_channels, self.sub_layers)
+
+    def __repr__(self):
+        return type(
+            self
+        ).__name__ + '({}|in={},out={},stride={},btl_channels={},sub_layers={},kernel_size={})'.format(
+            self.block_name, self.in_channels, self.out_channels, self.stride,
+            self.bottleneck_channels, self.sub_layers, self.kernel_size)
+
+    def encode_structure(self):
+        return [self.out_channels, self.sub_layers, self.bottleneck_channels]
+
+    def split(self, split_layer_threshold):
+        if self.sub_layers >= split_layer_threshold:
+            new_sublayers_1 = split_layer_threshold // 2
+            new_sublayers_2 = self.sub_layers - new_sublayers_1
+            new_block_str1 = type(self).__name__ + '({},{},{},{},{})'.format(
+                self.in_channels, self.out_channels, self.stride,
+                self.bottleneck_channels, new_sublayers_1)
+            new_block_str2 = type(self).__name__ + '({},{},{},{},{})'.format(
+                self.out_channels, self.out_channels, 1,
+                self.bottleneck_channels, new_sublayers_2)
+            return new_block_str1 + new_block_str2
+        else:
+            return str(self)
+
+    def structure_scale(self,
+                        scale=1.0,
+                        channel_scale=None,
+                        sub_layer_scale=None):
+        if channel_scale is None:
+            channel_scale = scale
+        if sub_layer_scale is None:
+            sub_layer_scale = scale
+
+        new_out_channels = global_utils.smart_round(self.out_channels *
+                                                    channel_scale)
+        new_bottleneck_channels = global_utils.smart_round(
+            self.bottleneck_channels * channel_scale)
+        new_sub_layers = max(1, round(self.sub_layers * sub_layer_scale))
+
+        return type(self).__name__ + '({},{},{},{},{})'.format(
+            self.in_channels, new_out_channels, self.stride,
+            new_bottleneck_channels, new_sub_layers)
+
+    @classmethod
+    def create_from_str(cls, s, **kwargs):
+        assert cls.is_instance_from_str(s)
+        idx = PlainNet._get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len(cls.__name__ + '('):idx]
+
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        param_str_split = param_str.split(',')
+        in_channels = int(param_str_split[0])
+        out_channels = int(param_str_split[1])
+        stride = int(param_str_split[2])
+        bottleneck_channels = int(param_str_split[3])
+        sub_layers = int(param_str_split[4])
+        return cls(in_channels=in_channels,
+                   out_channels=out_channels,
+                   stride=stride,
+                   bottleneck_channels=bottleneck_channels,
+                   sub_layers=sub_layers,
+                   block_name=tmp_block_name,
+                   **kwargs), s[idx + 1:]
+
+
+class SuperResIDWE1K3(SuperResIDWEXKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResIDWE1K3,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=3,
+                             expension=1.0,
+                             no_create=no_create,
+                             **kwargs)
+
+
+class SuperResIDWE2K3(SuperResIDWEXKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResIDWE2K3,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=3,
+                             expension=2.0,
+                             no_create=no_create,
+                             **kwargs)
+
+
+class SuperResIDWE4K3(SuperResIDWEXKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResIDWE4K3,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=3,
+                             expension=4.0,
+                             no_create=no_create,
+                             **kwargs)
+
+
+class SuperResIDWE6K3(SuperResIDWEXKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResIDWE6K3,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=3,
+                             expension=6.0,
+                             no_create=no_create,
+                             **kwargs)
+
+
+class SuperResIDWE1K5(SuperResIDWEXKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResIDWE1K5,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=5,
+                             expension=1.0,
+                             no_create=no_create,
+                             **kwargs)
+
+
+class SuperResIDWE2K5(SuperResIDWEXKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResIDWE2K5,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=5,
+                             expension=2.0,
+                             no_create=no_create,
+                             **kwargs)
+
+
+class SuperResIDWE4K5(SuperResIDWEXKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResIDWE4K5,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=5,
+                             expension=4.0,
+                             no_create=no_create,
+                             **kwargs)
+
+
+class SuperResIDWE6K5(SuperResIDWEXKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResIDWE6K5,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=5,
+                             expension=6.0,
+                             no_create=no_create,
+                             **kwargs)
+
+
+class SuperResIDWE1K7(SuperResIDWEXKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResIDWE1K7,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=7,
+                             expension=1.0,
+                             no_create=no_create,
+                             **kwargs)
+
+
+class SuperResIDWE2K7(SuperResIDWEXKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResIDWE2K7,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=7,
+                             expension=2.0,
+                             no_create=no_create,
+                             **kwargs)
+
+
+class SuperResIDWE4K7(SuperResIDWEXKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResIDWE4K7,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=7,
+                             expension=4.0,
+                             no_create=no_create,
+                             **kwargs)
+
+
+class SuperResIDWE6K7(SuperResIDWEXKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResIDWE6K7,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=7,
+                             expension=6.0,
+                             no_create=no_create,
+                             **kwargs)
+
+
+def register_netblocks_dict(netblocks_dict: dict):
+    this_py_file_netblocks_dict = {
+        'SuperResIDWE1K3': SuperResIDWE1K3,
+        'SuperResIDWE2K3': SuperResIDWE2K3,
+        'SuperResIDWE4K3': SuperResIDWE4K3,
+        'SuperResIDWE6K3': SuperResIDWE6K3,
+        'SuperResIDWE1K5': SuperResIDWE1K5,
+        'SuperResIDWE2K5': SuperResIDWE2K5,
+        'SuperResIDWE4K5': SuperResIDWE4K5,
+        'SuperResIDWE6K5': SuperResIDWE6K5,
+        'SuperResIDWE1K7': SuperResIDWE1K7,
+        'SuperResIDWE2K7': SuperResIDWE2K7,
+        'SuperResIDWE4K7': SuperResIDWE4K7,
+        'SuperResIDWE6K7': SuperResIDWE6K7,
+    }
+    netblocks_dict.update(this_py_file_netblocks_dict)
+    return netblocks_dict
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/SuperResK1KXK1.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/SuperResK1KXK1.py
new file mode 100755
index 0000000..881b570
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/SuperResK1KXK1.py
@@ -0,0 +1,278 @@
+'''
+Copyright (C) 2010-2021 Alibaba Group Holding Limited.
+'''
+
+import os
+import sys
+import uuid
+
+from torch import nn
+
+from . import global_utils
+import PlainNet
+from PlainNet import _get_right_parentheses_index_
+from PlainNet.super_blocks import PlainNetSuperBlockClass
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+class SuperResK1KXK1(PlainNetSuperBlockClass):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 kernel_size=None,
+                 no_create=False,
+                 no_reslink=False,
+                 no_BN=False,
+                 use_se=False,
+                 **kwargs):
+        super(SuperResK1KXK1, self).__init__(**kwargs)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+        self.bottleneck_channels = bottleneck_channels
+        self.sub_layers = sub_layers
+        self.kernel_size = kernel_size
+        self.no_create = no_create
+        self.no_reslink = no_reslink
+        self.no_BN = no_BN
+        self.use_se = use_se
+        if self.use_se:
+            print('---debug use_se in ' + str(self))
+
+        full_str = ''
+        last_channels = in_channels
+        current_stride = stride
+        for i in range(self.sub_layers):
+            inner_str = ''
+
+            # first bl-block with reslink
+            inner_str += 'ConvKX({},{},{},{})'.format(last_channels,
+                                                      self.bottleneck_channels,
+                                                      1, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.bottleneck_channels)
+            inner_str += 'RELU({})'.format(self.bottleneck_channels)
+
+            inner_str += 'ConvKX({},{},{},{})'.format(self.bottleneck_channels,
+                                                      self.bottleneck_channels,
+                                                      self.kernel_size,
+                                                      current_stride)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.bottleneck_channels)
+            inner_str += 'RELU({})'.format(self.bottleneck_channels)
+            if self.use_se:
+                inner_str += 'SE({})'.format(bottleneck_channels)
+
+            inner_str += 'ConvKX({},{},{},{})'.format(self.bottleneck_channels,
+                                                      self.out_channels, 1, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.out_channels)
+
+            if not self.no_reslink:
+                if i == 0:
+                    res_str = 'ResBlockProj({})RELU({})'.format(
+                        inner_str, out_channels)
+                else:
+                    res_str = 'ResBlock({})RELU({})'.format(
+                        inner_str, out_channels)
+            else:
+                res_str = '{}RELU({})'.format(inner_str, out_channels)
+
+            full_str += res_str
+
+            # second bl-block with reslink
+            inner_str = ''
+            inner_str += 'ConvKX({},{},{},{})'.format(self.out_channels,
+                                                      self.bottleneck_channels,
+                                                      1, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.bottleneck_channels)
+            inner_str += 'RELU({})'.format(self.bottleneck_channels)
+
+            inner_str += 'ConvKX({},{},{},{})'.format(self.bottleneck_channels,
+                                                      self.bottleneck_channels,
+                                                      self.kernel_size, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.bottleneck_channels)
+            inner_str += 'RELU({})'.format(self.bottleneck_channels)
+            if self.use_se:
+                inner_str += 'SE({})'.format(bottleneck_channels)
+
+            inner_str += 'ConvKX({},{},{},{})'.format(self.bottleneck_channels,
+                                                      self.out_channels, 1, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.out_channels)
+
+            if not self.no_reslink:
+                res_str = 'ResBlock({})RELU({})'.format(
+                    inner_str, out_channels)
+            else:
+                res_str = '{}RELU({})'.format(inner_str, out_channels)
+
+            full_str += res_str
+
+            last_channels = out_channels
+            current_stride = 1
+        pass
+
+        self.block_list = PlainNet.create_netblock_list_from_str(
+            full_str,
+            no_create=no_create,
+            no_reslink=no_reslink,
+            no_BN=no_BN,
+            **kwargs)
+        if not no_create:
+            self.module_list = nn.ModuleList(self.block_list)
+        else:
+            self.module_list = None
+
+    def __str__(self):
+        return type(self).__name__ + '({},{},{},{},{})'.format(
+            self.in_channels, self.out_channels, self.stride,
+            self.bottleneck_channels, self.sub_layers)
+
+    def __repr__(self):
+        return type(
+            self
+        ).__name__ + '({}|in={},out={},stride={},btl_channels={},sub_layers={},kernel_size={})'.format(
+            self.block_name, self.in_channels, self.out_channels, self.stride,
+            self.bottleneck_channels, self.sub_layers, self.kernel_size)
+
+    def encode_structure(self):
+        return [self.out_channels, self.sub_layers, self.bottleneck_channels]
+
+    def split(self, split_layer_threshold):
+        if self.sub_layers >= split_layer_threshold:
+            new_sublayers_1 = split_layer_threshold // 2
+            new_sublayers_2 = self.sub_layers - new_sublayers_1
+            new_block_str1 = type(self).__name__ + '({},{},{},{},{})'.format(
+                self.in_channels, self.out_channels, self.stride,
+                self.bottleneck_channels, new_sublayers_1)
+            new_block_str2 = type(self).__name__ + '({},{},{},{},{})'.format(
+                self.out_channels, self.out_channels, 1,
+                self.bottleneck_channels, new_sublayers_2)
+            return new_block_str1 + new_block_str2
+        else:
+            return str(self)
+
+    def structure_scale(self,
+                        scale=1.0,
+                        channel_scale=None,
+                        sub_layer_scale=None):
+        if channel_scale is None:
+            channel_scale = scale
+        if sub_layer_scale is None:
+            sub_layer_scale = scale
+
+        new_out_channels = global_utils.smart_round(self.out_channels *
+                                                    channel_scale)
+        new_bottleneck_channels = global_utils.smart_round(
+            self.bottleneck_channels * channel_scale)
+        new_sub_layers = max(1, round(self.sub_layers * sub_layer_scale))
+
+        return type(self).__name__ + '({},{},{},{},{})'.format(
+            self.in_channels, new_out_channels, self.stride,
+            new_bottleneck_channels, new_sub_layers)
+
+    @classmethod
+    def create_from_str(cls, s, **kwargs):
+        assert cls.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len(cls.__name__ + '('):idx]
+
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        param_str_split = param_str.split(',')
+        in_channels = int(param_str_split[0])
+        out_channels = int(param_str_split[1])
+        stride = int(param_str_split[2])
+        bottleneck_channels = int(param_str_split[3])
+        sub_layers = int(param_str_split[4])
+        return cls(in_channels=in_channels,
+                   out_channels=out_channels,
+                   stride=stride,
+                   bottleneck_channels=bottleneck_channels,
+                   sub_layers=sub_layers,
+                   block_name=tmp_block_name,
+                   **kwargs), s[idx + 1:]
+
+
+class SuperResK1K3K1(SuperResK1KXK1):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResK1K3K1,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=3,
+                             no_create=no_create,
+                             **kwargs)
+
+
+class SuperResK1K5K1(SuperResK1KXK1):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResK1K5K1,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=5,
+                             no_create=no_create,
+                             **kwargs)
+
+
+class SuperResK1K7K1(SuperResK1KXK1):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResK1K7K1,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=7,
+                             no_create=no_create,
+                             **kwargs)
+
+
+def register_netblocks_dict(netblocks_dict: dict):
+    this_py_file_netblocks_dict = {
+        'SuperResK1K3K1': SuperResK1K3K1,
+        'SuperResK1K5K1': SuperResK1K5K1,
+        'SuperResK1K7K1': SuperResK1K7K1,
+    }
+    netblocks_dict.update(this_py_file_netblocks_dict)
+    return netblocks_dict
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/SuperResKXKX.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/SuperResKXKX.py
new file mode 100755
index 0000000..d21327e
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/SuperResKXKX.py
@@ -0,0 +1,246 @@
+'''
+Copyright (C) 2010-2021 Alibaba Group Holding Limited.
+'''
+
+import os
+import sys
+import uuid
+
+from torch import nn
+
+from . import global_utils
+import PlainNet
+from PlainNet import _get_right_parentheses_index_
+from PlainNet.super_blocks import PlainNetSuperBlockClass
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+class SuperResKXKX(PlainNetSuperBlockClass):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 kernel_size=None,
+                 no_create=False,
+                 no_reslink=False,
+                 no_BN=False,
+                 use_se=False,
+                 **kwargs):
+        super(SuperResKXKX, self).__init__(**kwargs)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+        self.bottleneck_channels = bottleneck_channels
+        self.sub_layers = sub_layers
+        self.kernel_size = kernel_size
+        self.no_create = no_create
+        self.no_reslink = no_reslink
+        self.no_BN = no_BN
+        self.use_se = use_se
+        if self.use_se:
+            print('---debug use_se in ' + str(self))
+
+        full_str = ''
+        last_channels = in_channels
+        current_stride = stride
+        for i in range(self.sub_layers):
+            inner_str = ''
+
+            inner_str += 'ConvKX({},{},{},{})'.format(last_channels,
+                                                      self.bottleneck_channels,
+                                                      self.kernel_size,
+                                                      current_stride)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.bottleneck_channels)
+            inner_str += 'RELU({})'.format(self.bottleneck_channels)
+            if self.use_se:
+                inner_str += 'SE({})'.format(bottleneck_channels)
+
+            inner_str += 'ConvKX({},{},{},{})'.format(self.bottleneck_channels,
+                                                      self.out_channels,
+                                                      self.kernel_size, 1)
+            if not self.no_BN:
+                inner_str += 'BN({})'.format(self.out_channels)
+
+            if not self.no_reslink:
+                if i == 0:
+                    res_str = 'ResBlockProj({})RELU({})'.format(
+                        inner_str, out_channels)
+                else:
+                    res_str = 'ResBlock({})RELU({})'.format(
+                        inner_str, out_channels)
+            else:
+                res_str = '{}RELU({})'.format(inner_str, out_channels)
+
+            full_str += res_str
+
+            last_channels = out_channels
+            current_stride = 1
+        pass
+
+        self.block_list = PlainNet.create_netblock_list_from_str(
+            full_str,
+            no_create=no_create,
+            no_reslink=no_reslink,
+            no_BN=no_BN,
+            **kwargs)
+        if not no_create:
+            self.module_list = nn.ModuleList(self.block_list)
+        else:
+            self.module_list = None
+
+    def forward_pre_relu(self, x):
+        output = x
+        for block in self.block_list[0:-1]:
+            output = block(output)
+        return output
+
+    def __str__(self):
+        return type(self).__name__ + '({},{},{},{},{})'.format(
+            self.in_channels, self.out_channels, self.stride,
+            self.bottleneck_channels, self.sub_layers)
+
+    def __repr__(self):
+        return type(
+            self
+        ).__name__ + '({}|in={},out={},stride={},btl_channels={},sub_layers={},kernel_size={})'.format(
+            self.block_name, self.in_channels, self.out_channels, self.stride,
+            self.bottleneck_channels, self.sub_layers, self.kernel_size)
+
+    def encode_structure(self):
+        return [self.out_channels, self.sub_layers, self.bottleneck_channels]
+
+    def split(self, split_layer_threshold):
+        if self.sub_layers >= split_layer_threshold:
+            new_sublayers_1 = split_layer_threshold // 2
+            new_sublayers_2 = self.sub_layers - new_sublayers_1
+            new_block_str1 = type(self).__name__ + '({},{},{},{},{})'.format(
+                self.in_channels, self.out_channels, self.stride,
+                self.bottleneck_channels, new_sublayers_1)
+            new_block_str2 = type(self).__name__ + '({},{},{},{},{})'.format(
+                self.out_channels, self.out_channels, 1,
+                self.bottleneck_channels, new_sublayers_2)
+            return new_block_str1 + new_block_str2
+        else:
+            return str(self)
+
+    def structure_scale(self,
+                        scale=1.0,
+                        channel_scale=None,
+                        sub_layer_scale=None):
+        if channel_scale is None:
+            channel_scale = scale
+        if sub_layer_scale is None:
+            sub_layer_scale = scale
+
+        new_out_channels = global_utils.smart_round(self.out_channels *
+                                                    channel_scale)
+        new_bottleneck_channels = global_utils.smart_round(
+            self.bottleneck_channels * channel_scale)
+        new_sub_layers = max(1, round(self.sub_layers * sub_layer_scale))
+
+        return type(self).__name__ + '({},{},{},{},{})'.format(
+            self.in_channels, new_out_channels, self.stride,
+            new_bottleneck_channels, new_sub_layers)
+
+    @classmethod
+    def create_from_str(cls, s, **kwargs):
+        assert cls.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len(cls.__name__ + '('):idx]
+
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        param_str_split = param_str.split(',')
+        in_channels = int(param_str_split[0])
+        out_channels = int(param_str_split[1])
+        stride = int(param_str_split[2])
+        bottleneck_channels = int(param_str_split[3])
+        sub_layers = int(param_str_split[4])
+        return cls(in_channels=in_channels,
+                   out_channels=out_channels,
+                   stride=stride,
+                   bottleneck_channels=bottleneck_channels,
+                   sub_layers=sub_layers,
+                   block_name=tmp_block_name,
+                   **kwargs), s[idx + 1:]
+
+
+class SuperResK3K3(SuperResKXKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResK3K3,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=3,
+                             no_create=no_create,
+                             **kwargs)
+
+
+class SuperResK5K5(SuperResKXKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResK5K5,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=5,
+                             no_create=no_create,
+                             **kwargs)
+
+
+class SuperResK7K7(SuperResKXKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 bottleneck_channels=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperResK7K7,
+              self).__init__(in_channels=in_channels,
+                             out_channels=out_channels,
+                             stride=stride,
+                             bottleneck_channels=bottleneck_channels,
+                             sub_layers=sub_layers,
+                             kernel_size=7,
+                             no_create=no_create,
+                             **kwargs)
+
+
+def register_netblocks_dict(netblocks_dict: dict):
+    this_py_file_netblocks_dict = {
+        'SuperResK3K3': SuperResK3K3,
+        'SuperResK5K5': SuperResK5K5,
+        'SuperResK7K7': SuperResK7K7,
+    }
+    netblocks_dict.update(this_py_file_netblocks_dict)
+    return netblocks_dict
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/__init__.py
new file mode 100755
index 0000000..a9acf4d
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/__init__.py
@@ -0,0 +1,327 @@
+'''
+Copyright (C) 2010-2021 Alibaba Group Holding Limited.
+'''
+
+import argparse
+import os
+import sys
+
+import torch
+from torch import nn
+
+from . import (SuperResIDWEXKX, SuperResK1KXK1, SuperResKXKX,
+                      basic_blocks, super_blocks)
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+_all_netblocks_dict_ = {}
+
+
+def parse_cmd_options(argv, opt=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--plainnet_struct',
+                        type=str,
+                        default=None,
+                        help='PlainNet structure string')
+    parser.add_argument('--plainnet_struct_txt',
+                        type=str,
+                        default=None,
+                        help='PlainNet structure file name')
+    parser.add_argument('--num_classes',
+                        type=int,
+                        default=None,
+                        help='how to prune')
+    module_opt, _ = parser.parse_known_args(argv)
+
+    return module_opt
+
+
+def _get_right_parentheses_index_(s):
+    # assert s[0] == '('
+    left_paren_count = 0
+    for index, x in enumerate(s):
+
+        if x == '(':
+            left_paren_count += 1
+        elif x == ')':
+            left_paren_count -= 1
+            if left_paren_count == 0:
+                return index
+        else:
+            pass
+    return None
+
+
+def pretty_format(plainnet_str, indent=2):
+    the_formated_str = ''
+    indent_str = ''
+    if indent >= 1:
+        indent_str = ''.join(['  '] * indent)
+
+    # print(indent_str, end='')
+    the_formated_str += indent_str
+
+    s = plainnet_str
+    while len(s) > 0:
+        if s[0] == ';':
+            # print(';\n' + indent_str, end='')
+            the_formated_str += ';\n' + indent_str
+            s = s[1:]
+
+        left_par_idx = s.find('(')
+        assert left_par_idx is not None
+        right_par_idx = _get_right_parentheses_index_(s)
+        the_block_class_name = s[0:left_par_idx]
+
+        if the_block_class_name in [
+                'MultiSumBlock', 'MultiCatBlock', 'MultiGroupBlock'
+        ]:
+            # print('\n' + indent_str + the_block_class_name + '(')
+            sub_str = s[left_par_idx + 1:right_par_idx]
+
+            # find block_name
+            tmp_idx = sub_str.find('|')
+            if tmp_idx < 0:
+                tmp_block_name = 'no_name'
+            else:
+                tmp_block_name = sub_str[0:tmp_idx]
+                sub_str = sub_str[tmp_idx + 1:]
+
+            if len(tmp_block_name) > 8:
+                tmp_block_name = tmp_block_name[0:4] + tmp_block_name[-4:]
+
+            the_formated_str += '\n' + indent_str + the_block_class_name + '({}|\n'.format(
+                tmp_block_name)
+
+            the_formated_str += pretty_format(sub_str, indent + 1)
+            # print('\n' + indent_str + ')')
+            # print(indent_str, end='')
+            the_formated_str += '\n' + indent_str + ')\n' + indent_str
+        elif the_block_class_name in ['ResBlock']:
+            # print('\n' + indent_str + the_block_class_name + '(')
+            in_channels = None
+            the_stride = None
+            sub_str = s[left_par_idx + 1:right_par_idx]
+            # find block_name
+            tmp_idx = sub_str.find('|')
+            if tmp_idx < 0:
+                tmp_block_name = 'no_name'
+            else:
+                tmp_block_name = sub_str[0:tmp_idx]
+                sub_str = sub_str[tmp_idx + 1:]
+
+            first_comma_index = sub_str.find(',')
+            if first_comma_index < 0 or not sub_str[
+                    0:first_comma_index].isdigit():
+                in_channels = None
+            else:
+                in_channels = int(sub_str[0:first_comma_index])
+                sub_str = sub_str[first_comma_index + 1:]
+                second_comma_index = sub_str.find(',')
+                if second_comma_index < 0 or not sub_str[
+                        0:second_comma_index].isdigit():
+                    the_stride = None
+                else:
+                    the_stride = int(sub_str[0:second_comma_index])
+                    sub_str = sub_str[second_comma_index + 1:]
+                pass
+            pass
+
+            if len(tmp_block_name) > 8:
+                tmp_block_name = tmp_block_name[0:4] + tmp_block_name[-4:]
+
+            the_formated_str += '\n' + indent_str + the_block_class_name + '({}|'.format(
+                tmp_block_name)
+            if in_channels is not None:
+                the_formated_str += '{},'.format(in_channels)
+            else:
+                the_formated_str += ','
+
+            if the_stride is not None:
+                the_formated_str += '{},'.format(the_stride)
+            else:
+                the_formated_str += ','
+
+            the_formated_str += '\n'
+
+            the_formated_str += pretty_format(sub_str, indent + 1)
+            # print('\n' + indent_str + ')')
+            # print(indent_str, end='')
+            the_formated_str += '\n' + indent_str + ')\n' + indent_str
+        else:
+            # print(s[0:right_par_idx+1], end='')
+            sub_str = s[left_par_idx + 1:right_par_idx]
+            # find block_name
+            tmp_idx = sub_str.find('|')
+            if tmp_idx < 0:
+                tmp_block_name = 'no_name'
+            else:
+                tmp_block_name = sub_str[0:tmp_idx]
+                sub_str = sub_str[tmp_idx + 1:]
+
+            if len(tmp_block_name) > 8:
+                tmp_block_name = tmp_block_name[0:4] + tmp_block_name[-4:]
+
+            the_formated_str += the_block_class_name + '({}|'.format(
+                tmp_block_name) + sub_str + ')'
+
+        s = s[right_par_idx + 1:]
+    pass  # end while
+
+    return the_formated_str
+
+
+def _create_netblock_list_from_str_(s, no_create=False, **kwargs):
+    block_list = []
+    while len(s) > 0:
+        is_found_block_class = False
+        for the_block_class_name in _all_netblocks_dict_.keys():
+            tmp_idx = s.find('(')
+            if tmp_idx > 0 and s[0:tmp_idx] == the_block_class_name:
+                is_found_block_class = True
+                the_block_class = _all_netblocks_dict_[the_block_class_name]
+                the_block, remaining_s = the_block_class.create_from_str(
+                    s, no_create=no_create, **kwargs)
+                if the_block is not None:
+                    block_list.append(the_block)
+                s = remaining_s
+                if len(s) > 0 and s[0] == ';':
+                    return block_list, s[1:]
+                break
+            pass  # end if
+        pass  # end for
+        assert is_found_block_class
+    pass  # end while
+    return block_list, ''
+
+
+def create_netblock_list_from_str(s, no_create=False, **kwargs):
+    the_list, remaining_s = _create_netblock_list_from_str_(
+        s, no_create=no_create, **kwargs)
+    assert len(remaining_s) == 0
+    return the_list
+
+
+def add_SE_block(structure_str: str):
+    new_str = ''
+    RELU = 'RELU'
+    offset = 4
+
+    idx = structure_str.find(RELU)
+    while idx >= 0:
+        new_str += structure_str[0:idx]
+        structure_str = structure_str[idx:]
+        r_idx = _get_right_parentheses_index_(structure_str[offset:]) + offset
+        channels = structure_str[offset + 1:r_idx]
+        new_str += 'RELU({})SE({})'.format(channels, channels)
+        structure_str = structure_str[r_idx + 1:]
+        idx = structure_str.find(RELU)
+    pass
+
+    new_str += structure_str
+    return new_str
+
+
+class PlainNet(nn.Module):
+    def __init__(self,
+                 argv=None,
+                 opt=None,
+                 num_classes=None,
+                 plainnet_struct=None,
+                 no_create=False,
+                 **kwargs):
+        super(PlainNet, self).__init__()
+        self.argv = argv
+        self.opt = opt
+        self.num_classes = num_classes
+        self.plainnet_struct = plainnet_struct
+
+        self.module_opt = parse_cmd_options(self.argv)
+
+        if self.num_classes is None:
+            self.num_classes = self.module_opt.num_classes
+
+        if self.plainnet_struct is None and self.module_opt.plainnet_struct is not None:
+            self.plainnet_struct = self.module_opt.plainnet_struct
+
+        if self.plainnet_struct is None:
+            # load structure from text file
+            if hasattr(opt, 'plainnet_struct_txt'
+                       ) and opt.plainnet_struct_txt is not None:
+                plainnet_struct_txt = opt.plainnet_struct_txt
+            else:
+                plainnet_struct_txt = self.module_opt.plainnet_struct_txt
+
+            if plainnet_struct_txt is not None:
+                with open(plainnet_struct_txt, 'r') as fid:
+                    the_line = fid.readlines()[0].strip()
+                    self.plainnet_struct = the_line
+                pass
+
+        if self.plainnet_struct is None:
+            return
+
+        the_s = self.plainnet_struct  # type: str
+
+        block_list, remaining_s = _create_netblock_list_from_str_(
+            the_s, no_create=no_create, **kwargs)
+        assert len(remaining_s) == 0
+
+        self.block_list = block_list
+        if not no_create:
+            self.module_list = nn.ModuleList(block_list)  # register
+
+    def forward(self, x):
+        output = x
+        for the_block in self.block_list:
+            output = the_block(output)
+        return output
+
+    def __str__(self):
+        s = ''
+        for the_block in self.block_list:
+            s += str(the_block)
+        return s
+
+    def __repr__(self):
+        return str(self)
+
+    def get_FLOPs(self, input_resolution):
+        the_res = input_resolution
+        the_flops = 0
+        for the_block in self.block_list:
+            the_flops += the_block.get_FLOPs(the_res)
+            the_res = the_block.get_output_resolution(the_res)
+
+        return the_flops
+
+    def get_model_size(self):
+        the_size = 0
+        for the_block in self.block_list:
+            the_size += the_block.get_model_size()
+
+        return the_size
+
+    def replace_block(self, block_id, new_block):
+        self.block_list[block_id] = new_block
+        if block_id < len(self.block_list):
+            self.block_list[block_id + 1].set_in_channels(
+                new_block.out_channels)
+
+        self.module_list = nn.Module(self.block_list)
+
+
+_all_netblocks_dict_ = basic_blocks.register_netblocks_dict(
+    _all_netblocks_dict_)
+
+_all_netblocks_dict_ = super_blocks.register_netblocks_dict(
+    _all_netblocks_dict_)
+_all_netblocks_dict_ = SuperResKXKX.register_netblocks_dict(
+    _all_netblocks_dict_)
+
+_all_netblocks_dict_ = SuperResK1KXK1.register_netblocks_dict(
+    _all_netblocks_dict_)
+
+_all_netblocks_dict_ = SuperResIDWEXKX.register_netblocks_dict(
+    _all_netblocks_dict_)
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/basic_blocks.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/basic_blocks.py
new file mode 100755
index 0000000..c69546e
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/basic_blocks.py
@@ -0,0 +1,1711 @@
+'''
+Copyright (C) 2010-2021 Alibaba Group Holding Limited.
+'''
+
+import os
+import sys
+import uuid
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+import pdb;pdb.set_trace()
+import _create_netblock_list_from_str_
+import _get_right_parentheses_index_
+
+
+
+class PlainNetBasicBlockClass(nn.Module):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=1,
+                 no_create=False,
+                 block_name=None,
+                 **kwargs):
+        super(PlainNetBasicBlockClass, self).__init__(**kwargs)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+        self.no_create = no_create
+        self.block_name = block_name
+        if self.block_name is None:
+            self.block_name = 'uuid{}'.format(uuid.uuid4().hex)
+
+    def forward(self, x):
+        raise RuntimeError('Not implemented')
+
+    def __str__(self):
+        return type(self).__name__ + '({},{},{})'.format(
+            self.in_channels, self.out_channels, self.stride)
+
+    def __repr__(self):
+        return type(self).__name__ + '({}|{},{},{})'.format(
+            self.block_name, self.in_channels, self.out_channels, self.stride)
+
+    def get_output_resolution(self, input_resolution):
+        raise RuntimeError('Not implemented')
+
+    def get_FLOPs(self, input_resolution):
+        raise RuntimeError('Not implemented')
+
+    def get_model_size(self):
+        raise RuntimeError('Not implemented')
+
+    def set_in_channels(self, c):
+        raise RuntimeError('Not implemented')
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert PlainNetBasicBlockClass.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len(cls.__name__ + '('):idx]
+
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        param_str_split = param_str.split(',')
+        in_channels = int(param_str_split[0])
+        out_channels = int(param_str_split[1])
+        stride = int(param_str_split[2])
+        return cls(in_channels=in_channels,
+                   out_channels=out_channels,
+                   stride=stride,
+                   block_name=tmp_block_name,
+                   no_create=no_create), s[idx + 1:]
+
+    @classmethod
+    def is_instance_from_str(cls, s):
+        if s.startswith(cls.__name__ + '(') and s[-1] == ')':
+            return True
+        else:
+            return False
+
+
+class AdaptiveAvgPool(PlainNetBasicBlockClass):
+    def __init__(self, out_channels, output_size, no_create=False, **kwargs):
+        super(AdaptiveAvgPool, self).__init__(**kwargs)
+        self.in_channels = out_channels
+        self.out_channels = out_channels
+        self.output_size = output_size
+        self.no_create = no_create
+        if not no_create:
+            self.netblock = nn.AdaptiveAvgPool2d(
+                output_size=(self.output_size, self.output_size))
+
+    def forward(self, x):
+        return self.netblock(x)
+
+    def __str__(self):
+        return type(self).__name__ + '({},{})'.format(
+            self.out_channels // self.output_size**2, self.output_size)
+
+    def __repr__(self):
+        return type(self).__name__ + '({}|{},{})'.format(
+            self.block_name, self.out_channels // self.output_size**2,
+            self.output_size)
+
+    def get_output_resolution(self, input_resolution):
+        return self.output_size
+
+    def get_FLOPs(self, input_resolution):
+        return 0
+
+    def get_model_size(self):
+        return 0
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        self.out_channels = c
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert AdaptiveAvgPool.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('AdaptiveAvgPool('):idx]
+
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        param_str_split = param_str.split(',')
+        out_channels = int(param_str_split[0])
+        output_size = int(param_str_split[1])
+        return AdaptiveAvgPool(out_channels=out_channels,
+                               output_size=output_size,
+                               block_name=tmp_block_name,
+                               no_create=no_create), s[idx + 1:]
+
+
+class BN(PlainNetBasicBlockClass):
+    def __init__(self,
+                 out_channels=None,
+                 copy_from=None,
+                 no_create=False,
+                 **kwargs):
+        super(BN, self).__init__(**kwargs)
+        self.no_create = no_create
+
+        if copy_from is not None:
+            assert isinstance(copy_from, nn.BatchNorm2d)
+            self.in_channels = copy_from.weight.shape[0]
+            self.out_channels = copy_from.weight.shape[0]
+            assert out_channels is None or out_channels == self.out_channels
+            self.netblock = copy_from
+
+        else:
+            self.in_channels = out_channels
+            self.out_channels = out_channels
+            if no_create:
+                return
+            else:
+                self.netblock = nn.BatchNorm2d(num_features=self.out_channels)
+
+    def forward(self, x):
+        return self.netblock(x)
+
+    def __str__(self):
+        return 'BN({})'.format(self.out_channels)
+
+    def __repr__(self):
+        return 'BN({}|{})'.format(self.block_name, self.out_channels)
+
+    def get_output_resolution(self, input_resolution):
+        return input_resolution
+
+    def get_FLOPs(self, input_resolution):
+        return input_resolution**2 * self.out_channels
+
+    def get_model_size(self):
+        return self.out_channels
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        self.out_channels = c
+        if not self.no_create:
+            self.netblock = nn.BatchNorm2d(num_features=self.out_channels)
+            self.netblock.train()
+            self.netblock.requires_grad_(True)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert BN.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('BN('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+        out_channels = int(param_str)
+        return BN(out_channels=out_channels,
+                  block_name=tmp_block_name,
+                  no_create=no_create), s[idx + 1:]
+
+
+class ConvKX(PlainNetBasicBlockClass):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 kernel_size=None,
+                 stride=None,
+                 groups=1,
+                 copy_from=None,
+                 no_create=False,
+                 **kwargs):
+        super(ConvKX, self).__init__(**kwargs)
+        self.no_create = no_create
+
+        if copy_from is not None:
+            assert isinstance(copy_from, nn.Conv2d)
+            self.in_channels = copy_from.in_channels
+            self.out_channels = copy_from.out_channels
+            self.kernel_size = copy_from.kernel_size[0]
+            self.stride = copy_from.stride[0]
+            self.groups = copy_from.groups
+            assert in_channels is None or in_channels == self.in_channels
+            assert out_channels is None or out_channels == self.out_channels
+            assert kernel_size is None or kernel_size == self.kernel_size
+            assert stride is None or stride == self.stride
+            self.netblock = copy_from
+        else:
+            self.in_channels = in_channels
+            self.out_channels = out_channels
+            self.stride = stride
+            self.groups = groups
+            self.kernel_size = kernel_size
+            self.padding = (self.kernel_size - 1) // 2
+            if no_create or self.in_channels == 0 or self.out_channels == 0 or self.kernel_size == 0 \
+                    or self.stride == 0:
+                return
+            else:
+                self.netblock = nn.Conv2d(in_channels=self.in_channels,
+                                          out_channels=self.out_channels,
+                                          kernel_size=self.kernel_size,
+                                          stride=self.stride,
+                                          padding=self.padding,
+                                          bias=False,
+                                          groups=self.groups)
+
+    def forward(self, x):
+        return self.netblock(x)
+
+    def __str__(self):
+        return type(self).__name__ + '({},{},{},{})'.format(
+            self.in_channels, self.out_channels, self.kernel_size, self.stride)
+
+    def __repr__(self):
+        return type(self).__name__ + '({}|{},{},{},{})'.format(
+            self.block_name, self.in_channels, self.out_channels,
+            self.kernel_size, self.stride)
+
+    def get_output_resolution(self, input_resolution):
+        return input_resolution // self.stride
+
+    def get_FLOPs(self, input_resolution):
+        return self.in_channels * self.out_channels * self.kernel_size**2 * input_resolution**2 // self.stride**2 // self.groups
+
+    def get_model_size(self):
+        return self.in_channels * self.out_channels * self.kernel_size**2 // self.groups
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        if not self.no_create:
+            self.netblock = nn.Conv2d(in_channels=self.in_channels,
+                                      out_channels=self.out_channels,
+                                      kernel_size=self.kernel_size,
+                                      stride=self.stride,
+                                      padding=self.padding,
+                                      bias=False)
+            self.netblock.train()
+            self.netblock.requires_grad_(True)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert cls.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len(cls.__name__ + '('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        split_str = param_str.split(',')
+        in_channels = int(split_str[0])
+        out_channels = int(split_str[1])
+        kernel_size = int(split_str[2])
+        stride = int(split_str[3])
+        return cls(in_channels=in_channels,
+                   out_channels=out_channels,
+                   kernel_size=kernel_size,
+                   stride=stride,
+                   no_create=no_create,
+                   block_name=tmp_block_name), s[idx + 1:]
+
+
+class ConvDW(PlainNetBasicBlockClass):
+    def __init__(self,
+                 out_channels=None,
+                 kernel_size=None,
+                 stride=None,
+                 copy_from=None,
+                 no_create=False,
+                 **kwargs):
+        super(ConvDW, self).__init__(**kwargs)
+        self.no_create = no_create
+
+        if copy_from is not None:
+            assert isinstance(copy_from, nn.Conv2d)
+            self.in_channels = copy_from.in_channels
+            self.out_channels = copy_from.out_channels
+            self.kernel_size = copy_from.kernel_size[0]
+            self.stride = copy_from.stride[0]
+            assert self.in_channels == self.out_channels
+            assert out_channels is None or out_channels == self.out_channels
+            assert kernel_size is None or kernel_size == self.kernel_size
+            assert stride is None or stride == self.stride
+
+            self.netblock = copy_from
+        else:
+
+            self.in_channels = out_channels
+            self.out_channels = out_channels
+            self.stride = stride
+            self.kernel_size = kernel_size
+
+            self.padding = (self.kernel_size - 1) // 2
+            if no_create or self.in_channels == 0 or self.out_channels == 0 or self.kernel_size == 0 \
+                    or self.stride == 0:
+                return
+            else:
+                self.netblock = nn.Conv2d(in_channels=self.in_channels,
+                                          out_channels=self.out_channels,
+                                          kernel_size=self.kernel_size,
+                                          stride=self.stride,
+                                          padding=self.padding,
+                                          bias=False,
+                                          groups=self.in_channels)
+
+    def forward(self, x):
+        return self.netblock(x)
+
+    def __str__(self):
+        return 'ConvDW({},{},{})'.format(self.out_channels, self.kernel_size,
+                                         self.stride)
+
+    def __repr__(self):
+        return 'ConvDW({}|{},{},{})'.format(self.block_name, self.out_channels,
+                                            self.kernel_size, self.stride)
+
+    def get_output_resolution(self, input_resolution):
+        return input_resolution // self.stride
+
+    def get_FLOPs(self, input_resolution):
+        return self.out_channels * self.kernel_size**2 * input_resolution**2 // self.stride**2
+
+    def get_model_size(self):
+        return self.out_channels * self.kernel_size**2
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        self.out_channels = self.in_channels
+        if not self.no_create:
+            self.netblock = nn.Conv2d(in_channels=self.in_channels,
+                                      out_channels=self.out_channels,
+                                      kernel_size=self.kernel_size,
+                                      stride=self.stride,
+                                      padding=self.padding,
+                                      bias=False,
+                                      groups=self.in_channels)
+            self.netblock.train()
+            self.netblock.requires_grad_(True)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert ConvDW.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('ConvDW('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        split_str = param_str.split(',')
+        out_channels = int(split_str[0])
+        kernel_size = int(split_str[1])
+        stride = int(split_str[2])
+        return ConvDW(out_channels=out_channels,
+                      kernel_size=kernel_size,
+                      stride=stride,
+                      no_create=no_create,
+                      block_name=tmp_block_name), s[idx + 1:]
+
+
+class ConvKXG2(ConvKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 kernel_size=None,
+                 stride=None,
+                 copy_from=None,
+                 no_create=False,
+                 **kwargs):
+        super(ConvKXG2, self).__init__(in_channels=in_channels,
+                                       out_channels=out_channels,
+                                       kernel_size=kernel_size,
+                                       stride=stride,
+                                       copy_from=copy_from,
+                                       no_create=no_create,
+                                       groups=2,
+                                       **kwargs)
+
+
+class ConvKXG4(ConvKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 kernel_size=None,
+                 stride=None,
+                 copy_from=None,
+                 no_create=False,
+                 **kwargs):
+        super(ConvKXG4, self).__init__(in_channels=in_channels,
+                                       out_channels=out_channels,
+                                       kernel_size=kernel_size,
+                                       stride=stride,
+                                       copy_from=copy_from,
+                                       no_create=no_create,
+                                       groups=4,
+                                       **kwargs)
+
+
+class ConvKXG8(ConvKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 kernel_size=None,
+                 stride=None,
+                 copy_from=None,
+                 no_create=False,
+                 **kwargs):
+        super(ConvKXG8, self).__init__(in_channels=in_channels,
+                                       out_channels=out_channels,
+                                       kernel_size=kernel_size,
+                                       stride=stride,
+                                       copy_from=copy_from,
+                                       no_create=no_create,
+                                       groups=8,
+                                       **kwargs)
+
+
+class ConvKXG16(ConvKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 kernel_size=None,
+                 stride=None,
+                 copy_from=None,
+                 no_create=False,
+                 **kwargs):
+        super(ConvKXG16, self).__init__(in_channels=in_channels,
+                                        out_channels=out_channels,
+                                        kernel_size=kernel_size,
+                                        stride=stride,
+                                        copy_from=copy_from,
+                                        no_create=no_create,
+                                        groups=16,
+                                        **kwargs)
+
+
+class ConvKXG32(ConvKX):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 kernel_size=None,
+                 stride=None,
+                 copy_from=None,
+                 no_create=False,
+                 **kwargs):
+        super(ConvKXG32, self).__init__(in_channels=in_channels,
+                                        out_channels=out_channels,
+                                        kernel_size=kernel_size,
+                                        stride=stride,
+                                        copy_from=copy_from,
+                                        no_create=no_create,
+                                        groups=32,
+                                        **kwargs)
+
+
+class Flatten(PlainNetBasicBlockClass):
+    def __init__(self, out_channels, no_create=False, **kwargs):
+        super(Flatten, self).__init__(**kwargs)
+        self.in_channels = out_channels
+        self.out_channels = out_channels
+        self.no_create = no_create
+
+    def forward(self, x):
+        return torch.flatten(x, 1)
+
+    def __str__(self):
+        return 'Flatten({})'.format(self.out_channels)
+
+    def __repr__(self):
+        return 'Flatten({}|{})'.format(self.block_name, self.out_channels)
+
+    def get_output_resolution(self, input_resolution):
+        return 1
+
+    def get_FLOPs(self, input_resolution):
+        return 0
+
+    def get_model_size(self):
+        return 0
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        self.out_channels = c
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert Flatten.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('Flatten('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        out_channels = int(param_str)
+        return Flatten(out_channels=out_channels,
+                       no_create=no_create,
+                       block_name=tmp_block_name), s[idx + 1:]
+
+
+class Linear(PlainNetBasicBlockClass):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 bias=True,
+                 copy_from=None,
+                 no_create=False,
+                 **kwargs):
+        super(Linear, self).__init__(**kwargs)
+        self.no_create = no_create
+
+        if copy_from is not None:
+            assert isinstance(copy_from, nn.Linear)
+            self.in_channels = copy_from.weight.shape[1]
+            self.out_channels = copy_from.weight.shape[0]
+            self.use_bias = copy_from.bias is not None
+            assert in_channels is None or in_channels == self.in_channels
+            assert out_channels is None or out_channels == self.out_channels
+
+            self.netblock = copy_from
+        else:
+
+            self.in_channels = in_channels
+            self.out_channels = out_channels
+            self.use_bias = bias
+            if not no_create:
+                self.netblock = nn.Linear(self.in_channels,
+                                          self.out_channels,
+                                          bias=self.use_bias)
+
+    def forward(self, x):
+        return self.netblock(x)
+
+    def __str__(self):
+        return 'Linear({},{},{})'.format(self.in_channels, self.out_channels,
+                                         int(self.use_bias))
+
+    def __repr__(self):
+        return 'Linear({}|{},{},{})'.format(self.block_name, self.in_channels,
+                                            self.out_channels,
+                                            int(self.use_bias))
+
+    def get_output_resolution(self, input_resolution):
+        assert input_resolution == 1
+        return 1
+
+    def get_FLOPs(self, input_resolution):
+        return self.in_channels * self.out_channels
+
+    def get_model_size(self):
+        return self.in_channels * self.out_channels + int(self.use_bias)
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        if not self.no_create:
+            self.netblock = nn.Linear(self.in_channels,
+                                      self.out_channels,
+                                      bias=self.use_bias)
+            self.netblock.train()
+            self.netblock.requires_grad_(True)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert Linear.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('Linear('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        split_str = param_str.split(',')
+        in_channels = int(split_str[0])
+        out_channels = int(split_str[1])
+        use_bias = int(split_str[2])
+
+        return Linear(in_channels=in_channels,
+                      out_channels=out_channels,
+                      bias=use_bias == 1,
+                      block_name=tmp_block_name,
+                      no_create=no_create), s[idx + 1:]
+
+
+class MaxPool(PlainNetBasicBlockClass):
+    def __init__(self,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 no_create=False,
+                 **kwargs):
+        super(MaxPool, self).__init__(**kwargs)
+        self.in_channels = out_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = (kernel_size - 1) // 2
+        self.no_create = no_create
+        if not no_create:
+            self.netblock = nn.MaxPool2d(kernel_size=self.kernel_size,
+                                         stride=self.stride,
+                                         padding=self.padding)
+
+    def forward(self, x):
+        return self.netblock(x)
+
+    def __str__(self):
+        return 'MaxPool({},{},{})'.format(self.out_channels, self.kernel_size,
+                                          self.stride)
+
+    def __repr__(self):
+        return 'MaxPool({}|{},{},{})'.format(self.block_name,
+                                             self.out_channels,
+                                             self.kernel_size, self.stride)
+
+    def get_output_resolution(self, input_resolution):
+        return input_resolution // self.stride
+
+    def get_FLOPs(self, input_resolution):
+        return 0
+
+    def get_model_size(self):
+        return 0
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        self.out_channels = c
+        if not self.no_create:
+            self.netblock = nn.MaxPool2d(kernel_size=self.kernel_size,
+                                         stride=self.stride,
+                                         padding=self.padding)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert MaxPool.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('MaxPool('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        param_str_split = param_str.split(',')
+        out_channels = int(param_str_split[0])
+        kernel_size = int(param_str_split[1])
+        stride = int(param_str_split[2])
+        return MaxPool(out_channels=out_channels,
+                       kernel_size=kernel_size,
+                       stride=stride,
+                       no_create=no_create,
+                       block_name=tmp_block_name), s[idx + 1:]
+
+
+class Sequential(PlainNetBasicBlockClass):
+    def __init__(self, block_list, no_create=False, **kwargs):
+        super(Sequential, self).__init__(**kwargs)
+        self.block_list = block_list
+        if not no_create:
+            self.module_list = nn.ModuleList(block_list)
+        self.in_channels = block_list[0].in_channels
+        self.out_channels = block_list[-1].out_channels
+        self.no_create = no_create
+        res = 1024
+        for block in self.block_list:
+            res = block.get_output_resolution(res)
+        self.stride = 1024 // res
+
+    def forward(self, x):
+        output = x
+        for inner_block in self.block_list:
+            output = inner_block(output)
+        return output
+
+    def __str__(self):
+        s = 'Sequential('
+        for inner_block in self.block_list:
+            s += str(inner_block)
+        s += ')'
+        return s
+
+    def __repr__(self):
+        return str(self)
+
+    def get_output_resolution(self, input_resolution):
+        the_res = input_resolution
+        for the_block in self.block_list:
+            the_res = the_block.get_output_resolution(the_res)
+        return the_res
+
+    def get_FLOPs(self, input_resolution):
+        the_res = input_resolution
+        the_flops = 0
+        for the_block in self.block_list:
+            the_flops += the_block.get_FLOPs(the_res)
+            the_res = the_block.get_output_resolution(the_res)
+        return the_flops
+
+    def get_model_size(self):
+        the_size = 0
+        for the_block in self.block_list:
+            the_size += the_block.get_model_size()
+
+        return the_size
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        if len(self.block_list) == 0:
+            self.out_channels = c
+            return
+
+        self.block_list[0].set_in_channels(c)
+        last_channels = self.block_list[0].out_channels
+        if len(self.block_list) >= 2 and isinstance(self.block_list[1], BN):
+            self.block_list[1].set_in_channels(last_channels)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert Sequential.is_instance_from_str(s)
+        the_right_paraen_idx = _get_right_parentheses_index_(s)
+        param_str = s[len('Sequential(') + 1:the_right_paraen_idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        the_block_list, remaining_s = _create_netblock_list_from_str_(
+            param_str, no_create=no_create)
+        assert len(remaining_s) == 0
+        if the_block_list is None or len(the_block_list) == 0:
+            return None, ''
+        return Sequential(block_list=the_block_list,
+                          no_create=no_create,
+                          block_name=tmp_block_name), ''
+
+
+class MultiSumBlock(PlainNetBasicBlockClass):
+    def __init__(self, block_list, no_create=False, **kwargs):
+        super(MultiSumBlock, self).__init__(**kwargs)
+        self.block_list = block_list
+        if not no_create:
+            self.module_list = nn.ModuleList(block_list)
+        self.in_channels = np.max([x.in_channels for x in block_list])
+        self.out_channels = np.max([x.out_channels for x in block_list])
+        self.no_create = no_create
+
+        res = 1024
+        res = self.block_list[0].get_output_resolution(res)
+        self.stride = 1024 // res
+
+    def forward(self, x):
+        output = self.block_list[0](x)
+        for inner_block in self.block_list[1:]:
+            output2 = inner_block(x)
+            output = output + output2
+        return output
+
+    def __str__(self):
+        s = 'MultiSumBlock({}|'.format(self.block_name)
+        for inner_block in self.block_list:
+            s += str(inner_block) + ';'
+        s = s[:-1]
+        s += ')'
+        return s
+
+    def __repr__(self):
+        return str(self)
+
+    def get_output_resolution(self, input_resolution):
+        the_res = self.block_list[0].get_output_resolution(input_resolution)
+        for the_block in self.block_list:
+            assert the_res == the_block.get_output_resolution(input_resolution)
+
+        return the_res
+
+    def get_FLOPs(self, input_resolution):
+        the_flops = 0
+        for the_block in self.block_list:
+            the_flops += the_block.get_FLOPs(input_resolution)
+
+        return the_flops
+
+    def get_model_size(self):
+        the_size = 0
+        for the_block in self.block_list:
+            the_size += the_block.get_model_size()
+
+        return the_size
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        for the_block in self.block_list:
+            the_block.set_in_channels(c)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert MultiSumBlock.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('MultiSumBlock('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        the_s = param_str
+
+        the_block_list = []
+        while len(the_s) > 0:
+            tmp_block_list, remaining_s = _create_netblock_list_from_str_(
+                the_s, no_create=no_create)
+            the_s = remaining_s
+            if tmp_block_list is None:
+                pass
+            elif len(tmp_block_list) == 1:
+                the_block_list.append(tmp_block_list[0])
+            else:
+                the_block_list.append(
+                    Sequential(block_list=tmp_block_list, no_create=no_create))
+        pass  # end while
+
+        if len(the_block_list) == 0:
+            return None, s[idx + 1:]
+
+        return MultiSumBlock(block_list=the_block_list,
+                             block_name=tmp_block_name,
+                             no_create=no_create), s[idx + 1:]
+
+
+class MultiCatBlock(PlainNetBasicBlockClass):
+    def __init__(self, block_list, no_create=False, **kwargs):
+        super(MultiCatBlock, self).__init__(**kwargs)
+        self.block_list = block_list
+        if not no_create:
+            self.module_list = nn.ModuleList(block_list)
+        self.in_channels = np.max([x.in_channels for x in block_list])
+        self.out_channels = np.sum([x.out_channels for x in block_list])
+        self.no_create = no_create
+
+        res = 1024
+        res = self.block_list[0].get_output_resolution(res)
+        self.stride = 1024 // res
+
+    def forward(self, x):
+        output_list = []
+        for inner_block in self.block_list:
+            output = inner_block(x)
+            output_list.append(output)
+
+        return torch.cat(output_list, dim=1)
+
+    def __str__(self):
+        s = 'MultiCatBlock({}|'.format(self.block_name)
+        for inner_block in self.block_list:
+            s += str(inner_block) + ';'
+
+        s = s[:-1]
+        s += ')'
+        return s
+
+    def __repr__(self):
+        return str(self)
+
+    def get_output_resolution(self, input_resolution):
+        the_res = self.block_list[0].get_output_resolution(input_resolution)
+        for the_block in self.block_list:
+            assert the_res == the_block.get_output_resolution(input_resolution)
+
+        return the_res
+
+    def get_FLOPs(self, input_resolution):
+        the_flops = 0
+        for the_block in self.block_list:
+            the_flops += the_block.get_FLOPs(input_resolution)
+
+        return the_flops
+
+    def get_model_size(self):
+        the_size = 0
+        for the_block in self.block_list:
+            the_size += the_block.get_model_size()
+
+        return the_size
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        for the_block in self.block_list:
+            the_block.set_in_channels(c)
+        self.out_channels = np.sum([x.out_channels for x in self.block_list])
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert MultiCatBlock.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('MultiCatBlock('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        the_s = param_str
+
+        the_block_list = []
+        while len(the_s) > 0:
+            tmp_block_list, remaining_s = _create_netblock_list_from_str_(
+                the_s, no_create=no_create)
+            the_s = remaining_s
+            if tmp_block_list is None:
+                pass
+            elif len(tmp_block_list) == 1:
+                the_block_list.append(tmp_block_list[0])
+            else:
+                the_block_list.append(
+                    Sequential(block_list=tmp_block_list, no_create=no_create))
+            pass  # end if
+        pass  # end while
+
+        if len(the_block_list) == 0:
+            return None, s[idx + 1:]
+
+        return MultiCatBlock(block_list=the_block_list,
+                             block_name=tmp_block_name,
+                             no_create=no_create), s[idx + 1:]
+
+
+class RELU(PlainNetBasicBlockClass):
+    def __init__(self, out_channels, no_create=False, **kwargs):
+        super(RELU, self).__init__(**kwargs)
+        self.in_channels = out_channels
+        self.out_channels = out_channels
+        self.no_create = no_create
+
+    def forward(self, x):
+        return F.relu(x)
+
+    def __str__(self):
+        return 'RELU({})'.format(self.out_channels)
+
+    def __repr__(self):
+        return 'RELU({}|{})'.format(self.block_name, self.out_channels)
+
+    def get_output_resolution(self, input_resolution):
+        return input_resolution
+
+    def get_FLOPs(self, input_resolution):
+        return 0
+
+    def get_model_size(self):
+        return 0
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        self.out_channels = c
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert RELU.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('RELU('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        out_channels = int(param_str)
+        return RELU(out_channels=out_channels,
+                    no_create=no_create,
+                    block_name=tmp_block_name), s[idx + 1:]
+
+
+class ResBlock(PlainNetBasicBlockClass):
+    '''
+    ResBlock(in_channles, inner_blocks_str). If in_channels is missing, use block_list[0].in_channels as in_channels
+    '''
+    def __init__(self,
+                 block_list,
+                 in_channels=None,
+                 stride=None,
+                 no_create=False,
+                 **kwargs):
+        super(ResBlock, self).__init__(**kwargs)
+        self.block_list = block_list
+        self.stride = stride
+        self.no_create = no_create
+        if not no_create:
+            self.module_list = nn.ModuleList(block_list)
+
+        if in_channels is None:
+            self.in_channels = block_list[0].in_channels
+        else:
+            self.in_channels = in_channels
+        self.out_channels = block_list[-1].out_channels
+
+        if self.stride is None:
+            tmp_input_res = 1024
+            tmp_output_res = self.get_output_resolution(tmp_input_res)
+            self.stride = tmp_input_res // tmp_output_res
+
+        self.proj = None
+        if self.stride > 1 or self.in_channels != self.out_channels:
+            self.proj = nn.Sequential(
+                nn.Conv2d(self.in_channels, self.out_channels, 1, self.stride),
+                nn.BatchNorm2d(self.out_channels),
+            )
+
+    def forward(self, x):
+        if len(self.block_list) == 0:
+            return x
+
+        output = x
+        for inner_block in self.block_list:
+            output = inner_block(output)
+
+        if self.proj is not None:
+            output = output + self.proj(x)
+        else:
+            output = output + x
+
+        return output
+
+    def __str__(self):
+        s = 'ResBlock({},{},'.format(self.in_channels, self.stride)
+        for inner_block in self.block_list:
+            s += str(inner_block)
+
+        s += ')'
+        return s
+
+    def __repr__(self):
+        s = 'ResBlock({}|{},{},'.format(self.block_name, self.in_channels,
+                                        self.stride)
+        for inner_block in self.block_list:
+            s += str(inner_block)
+
+        s += ')'
+        return s
+
+    def get_output_resolution(self, input_resolution):
+        the_res = input_resolution
+        for the_block in self.block_list:
+            the_res = the_block.get_output_resolution(the_res)
+
+        return the_res
+
+    def get_FLOPs(self, input_resolution):
+        the_res = input_resolution
+        the_flops = 0
+        for the_block in self.block_list:
+            the_flops += the_block.get_FLOPs(the_res)
+            the_res = the_block.get_output_resolution(the_res)
+
+        if self.proj is not None:
+            the_flops += self.in_channels * self.out_channels * (the_res / self.stride) ** 2 + \
+                         (the_res / self.stride) ** 2 * self.out_channels
+
+        return the_flops
+
+    def get_model_size(self):
+        the_size = 0
+        for the_block in self.block_list:
+            the_size += the_block.get_model_size()
+
+        if self.proj is not None:
+            the_size += self.in_channels * self.out_channels + self.out_channels
+
+        return the_size
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        if len(self.block_list) == 0:
+            self.out_channels = c
+            return
+
+        self.block_list[0].set_in_channels(c)
+        last_channels = self.block_list[0].out_channels
+        if len(self.block_list) >= 2 and \
+                ( isinstance(self.block_list[0], ConvKX) or isinstance(self.block_list[0], ConvDW)) and \
+                isinstance(self.block_list[1], BN):
+            self.block_list[1].set_in_channels(last_channels)
+
+        self.proj = None
+        if not self.no_create:
+            if self.stride > 1 or self.in_channels != self.out_channels:
+                self.proj = nn.Sequential(
+                    nn.Conv2d(self.in_channels, self.out_channels, 1,
+                              self.stride),
+                    nn.BatchNorm2d(self.out_channels),
+                )
+                self.proj.train()
+                self.proj.requires_grad_(True)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert ResBlock.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        the_stride = None
+        param_str = s[len('ResBlock('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        first_comma_index = param_str.find(',')
+        if first_comma_index < 0 or not param_str[0:first_comma_index].isdigit(
+        ):  # cannot parse in_channels, missing, use default
+            in_channels = None
+            the_block_list, remaining_s = _create_netblock_list_from_str_(
+                param_str, no_create=no_create)
+        else:
+            in_channels = int(param_str[0:first_comma_index])
+            param_str = param_str[first_comma_index + 1:]
+            second_comma_index = param_str.find(',')
+            if second_comma_index < 0 or not param_str[
+                    0:second_comma_index].isdigit():
+                the_block_list, remaining_s = _create_netblock_list_from_str_(
+                    param_str, no_create=no_create)
+            else:
+                the_stride = int(param_str[0:second_comma_index])
+                param_str = param_str[second_comma_index + 1:]
+                the_block_list, remaining_s = _create_netblock_list_from_str_(
+                    param_str, no_create=no_create)
+            pass
+        pass
+
+        assert len(remaining_s) == 0
+        if the_block_list is None or len(the_block_list) == 0:
+            return None, s[idx + 1:]
+        return ResBlock(block_list=the_block_list,
+                        in_channels=in_channels,
+                        stride=the_stride,
+                        no_create=no_create,
+                        block_name=tmp_block_name), s[idx + 1:]
+
+
+class ResBlockProj(PlainNetBasicBlockClass):
+    '''
+    ResBlockProj(in_channles, inner_blocks_str). If in_channels is missing, use block_list[0].in_channels as in_channels
+    '''
+    def __init__(self,
+                 block_list,
+                 in_channels=None,
+                 stride=None,
+                 no_create=False,
+                 **kwargs):
+        super(ResBlockProj, self).__init__(**kwargs)
+        self.block_list = block_list
+        self.stride = stride
+        self.no_create = no_create
+        if not no_create:
+            self.module_list = nn.ModuleList(block_list)
+
+        if in_channels is None:
+            self.in_channels = block_list[0].in_channels
+        else:
+            self.in_channels = in_channels
+        self.out_channels = block_list[-1].out_channels
+
+        if self.stride is None:
+            tmp_input_res = 1024
+            tmp_output_res = self.get_output_resolution(tmp_input_res)
+            self.stride = tmp_input_res // tmp_output_res
+
+        self.proj = nn.Sequential(
+            nn.Conv2d(self.in_channels, self.out_channels, 1, self.stride),
+            nn.BatchNorm2d(self.out_channels),
+        )
+
+    def forward(self, x):
+        if len(self.block_list) == 0:
+            return x
+
+        output = x
+        for inner_block in self.block_list:
+            output = inner_block(output)
+        output = output + self.proj(x)
+        return output
+
+    def __str__(self):
+        s = 'ResBlockProj({},{},'.format(self.in_channels, self.stride)
+        for inner_block in self.block_list:
+            s += str(inner_block)
+
+        s += ')'
+        return s
+
+    def __repr__(self):
+        s = 'ResBlockProj({}|{},{},'.format(self.block_name, self.in_channels,
+                                            self.stride)
+        for inner_block in self.block_list:
+            s += str(inner_block)
+
+        s += ')'
+        return s
+
+    def get_output_resolution(self, input_resolution):
+        the_res = input_resolution
+        for the_block in self.block_list:
+            the_res = the_block.get_output_resolution(the_res)
+
+        return the_res
+
+    def get_FLOPs(self, input_resolution):
+        the_res = input_resolution
+        the_flops = 0
+        for the_block in self.block_list:
+            the_flops += the_block.get_FLOPs(the_res)
+            the_res = the_block.get_output_resolution(the_res)
+
+        if self.proj is not None:
+            the_flops += self.in_channels * self.out_channels * (the_res / self.stride) ** 2 + \
+                         (the_res / self.stride) ** 2 * self.out_channels
+
+        return the_flops
+
+    def get_model_size(self):
+        the_size = 0
+        for the_block in self.block_list:
+            the_size += the_block.get_model_size()
+
+        if self.proj is not None:
+            the_size += self.in_channels * self.out_channels + self.out_channels
+
+        return the_size
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        if len(self.block_list) == 0:
+            self.out_channels = c
+            return
+
+        self.block_list[0].set_in_channels(c)
+        last_channels = self.block_list[0].out_channels
+        if len(self.block_list) >= 2 and \
+                ( isinstance(self.block_list[0], ConvKX) or isinstance(self.block_list[0], ConvDW)) and \
+                isinstance(self.block_list[1], BN):
+            self.block_list[1].set_in_channels(last_channels)
+
+        self.proj = None
+        if not self.no_create:
+            if self.stride > 1 or self.in_channels != self.out_channels:
+                self.proj = nn.Sequential(
+                    nn.Conv2d(self.in_channels, self.out_channels, 1,
+                              self.stride),
+                    nn.BatchNorm2d(self.out_channels),
+                )
+                self.proj.train()
+                self.proj.requires_grad_(True)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert ResBlockProj.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        the_stride = None
+        param_str = s[len('ResBlockProj('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        first_comma_index = param_str.find(',')
+        if first_comma_index < 0 or not param_str[0:first_comma_index].isdigit(
+        ):  # cannot parse in_channels, missing, use default
+            in_channels = None
+            the_block_list, remaining_s = _create_netblock_list_from_str_(
+                param_str, no_create=no_create)
+        else:
+            in_channels = int(param_str[0:first_comma_index])
+            param_str = param_str[first_comma_index + 1:]
+            second_comma_index = param_str.find(',')
+            if second_comma_index < 0 or not param_str[
+                    0:second_comma_index].isdigit():
+                the_block_list, remaining_s = _create_netblock_list_from_str_(
+                    param_str, no_create=no_create)
+            else:
+                the_stride = int(param_str[0:second_comma_index])
+                param_str = param_str[second_comma_index + 1:]
+                the_block_list, remaining_s = _create_netblock_list_from_str_(
+                    param_str, no_create=no_create)
+            pass
+        pass
+
+        assert len(remaining_s) == 0
+        if the_block_list is None or len(the_block_list) == 0:
+            return None, s[idx + 1:]
+        return ResBlockProj(block_list=the_block_list,
+                            in_channels=in_channels,
+                            stride=the_stride,
+                            no_create=no_create,
+                            block_name=tmp_block_name), s[idx + 1:]
+
+
+class SE(PlainNetBasicBlockClass):
+    def __init__(self,
+                 out_channels=None,
+                 copy_from=None,
+                 no_create=False,
+                 **kwargs):
+        super(SE, self).__init__(**kwargs)
+        self.no_create = no_create
+
+        if copy_from is not None:
+            raise RuntimeError('Not implemented')
+        else:
+            self.in_channels = out_channels
+            self.out_channels = out_channels
+            self.se_ratio = 0.25
+            self.se_channels = max(
+                1, int(round(self.out_channels * self.se_ratio)))
+            if no_create or self.out_channels == 0:
+                return
+            else:
+                self.netblock = nn.Sequential(
+                    nn.AdaptiveAvgPool2d((1, 1)),
+                    nn.Conv2d(in_channels=self.out_channels,
+                              out_channels=self.se_channels,
+                              kernel_size=1,
+                              stride=1,
+                              padding=0,
+                              bias=False), nn.BatchNorm2d(self.se_channels),
+                    nn.ReLU(),
+                    nn.Conv2d(in_channels=self.se_channels,
+                              out_channels=self.out_channels,
+                              kernel_size=1,
+                              stride=1,
+                              padding=0,
+                              bias=False), nn.BatchNorm2d(self.out_channels),
+                    nn.Sigmoid())
+
+    def forward(self, x):
+        se_x = self.netblock(x)
+        return se_x * x
+
+    def __str__(self):
+        return 'SE({})'.format(self.out_channels)
+
+    def __repr__(self):
+        return 'SE({}|{})'.format(self.block_name, self.out_channels)
+
+    def get_output_resolution(self, input_resolution):
+        return input_resolution
+
+    def get_FLOPs(self, input_resolution):
+        return self.in_channels * self.se_channels + self.se_channels * self.out_channels + self.out_channels + \
+            self.out_channels * input_resolution ** 2
+
+    def get_model_size(self):
+        return self.in_channels * self.se_channels + 2 * self.se_channels + self.se_channels * self.out_channels + \
+            2 * self.out_channels
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        if not self.no_create:
+            self.netblock = nn.Sequential(
+                nn.AdaptiveAvgPool2d((1, 1)),
+                nn.Conv2d(in_channels=self.out_channels,
+                          out_channels=self.se_channels,
+                          kernel_size=1,
+                          stride=1,
+                          padding=0,
+                          bias=False), nn.BatchNorm2d(self.se_channels),
+                nn.ReLU(),
+                nn.Conv2d(in_channels=self.se_channels,
+                          out_channels=self.out_channels,
+                          kernel_size=1,
+                          stride=1,
+                          padding=0,
+                          bias=False), nn.BatchNorm2d(self.out_channels),
+                nn.Sigmoid())
+            self.netblock.train()
+            self.netblock.requires_grad_(True)
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert SE.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('SE('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        out_channels = int(param_str)
+        return SE(out_channels=out_channels,
+                  no_create=no_create,
+                  block_name=tmp_block_name), s[idx + 1:]
+
+
+class SwishImplementation(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, i):
+        result = i * torch.sigmoid(i)
+        ctx.save_for_backward(i)
+        return result
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        i = ctx.saved_variables[0]
+        sigmoid_i = torch.sigmoid(i)
+        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
+
+
+class Swish(PlainNetBasicBlockClass):
+    def __init__(self,
+                 out_channels=None,
+                 copy_from=None,
+                 no_create=False,
+                 **kwargs):
+        super(Swish, self).__init__(**kwargs)
+        self.no_create = no_create
+
+        if copy_from is not None:
+            raise RuntimeError('Not implemented')
+        else:
+            self.in_channels = out_channels
+            self.out_channels = out_channels
+
+    def forward(self, x):
+        return SwishImplementation.apply(x)
+
+    def __str__(self):
+        return 'Swish({})'.format(self.out_channels)
+
+    def __repr__(self):
+        return 'Swish({}|{})'.format(self.block_name, self.out_channels)
+
+    def get_output_resolution(self, input_resolution):
+        return input_resolution
+
+    def get_FLOPs(self, input_resolution):
+        return self.out_channels * input_resolution**2
+
+    def get_model_size(self):
+        return 0
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        self.out_channels = c
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert Swish.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len('Swish('):idx]
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        out_channels = int(param_str)
+        return Swish(out_channels=out_channels,
+                     no_create=no_create,
+                     block_name=tmp_block_name), s[idx + 1:]
+
+
+def _add_bn_layer_(block_list):
+    new_block_list = []
+    for the_block in block_list:
+        if isinstance(the_block, ConvKX) or isinstance(the_block, ConvDW):
+            out_channels = the_block.out_channels
+            new_bn_block = BN(out_channels=out_channels, no_create=True)
+            new_seq_with_bn = Sequential(block_list=[the_block, new_bn_block],
+                                         no_create=True)
+            new_block_list.append(new_seq_with_bn)
+        elif hasattr(the_block, 'block_list'):
+            new_block_list = _add_bn_layer_(the_block.block_list)
+            the_block.module_list = nn.ModuleList(new_block_list)
+            the_block.block_list = new_block_list
+            new_block_list.append(the_block)
+        else:
+            new_block_list.append(the_block)
+        pass
+    pass
+
+    return new_block_list
+
+
+def _remove_bn_layer_(block_list):
+    new_block_list = []
+    for the_block in block_list:
+        if isinstance(the_block, BN):
+            continue
+        elif hasattr(the_block, 'block_list'):
+            new_block_list = _remove_bn_layer_(the_block.block_list)
+            the_block.module_list = nn.ModuleList(new_block_list)
+            the_block.block_list = new_block_list
+            new_block_list.append(the_block)
+        else:
+            new_block_list.append(the_block)
+        pass
+    pass
+
+    return new_block_list
+
+
+def _add_se_layer_(block_list):
+    new_block_list = []
+    for the_block in block_list:
+        if isinstance(the_block, RELU):
+            out_channels = the_block.out_channels
+            new_se_block = SE(out_channels=out_channels, no_create=True)
+            new_seq_with_bn = Sequential(block_list=[the_block, new_se_block],
+                                         no_create=True)
+            new_block_list.append(new_seq_with_bn)
+        elif hasattr(the_block, 'block_list'):
+            new_block_list = _add_se_layer_(the_block.block_list)
+            the_block.module_list = nn.ModuleList(new_block_list)
+            the_block.block_list = new_block_list
+            new_block_list.append(the_block)
+        else:
+            new_block_list.append(the_block)
+        pass
+    pass
+
+    return new_block_list
+
+
+def _replace_relu_with_swish_layer_(block_list):
+    new_block_list = []
+    for the_block in block_list:
+        if isinstance(the_block, RELU):
+            out_channels = the_block.out_channels
+            new_swish_block = Swish(out_channels=out_channels, no_create=True)
+            new_block_list.append(new_swish_block)
+        elif hasattr(the_block, 'block_list'):
+            new_block_list = _replace_relu_with_swish_layer_(
+                the_block.block_list)
+            the_block.module_list = nn.ModuleList(new_block_list)
+            the_block.block_list = new_block_list
+            new_block_list.append(the_block)
+        else:
+            new_block_list.append(the_block)
+        pass
+    pass
+
+    return new_block_list
+
+
+def _fuse_convkx_and_bn_(convkx, bn):
+    the_weight_scale = bn.weight / torch.sqrt(bn.running_var + bn.eps)
+    convkx.weight[:] = convkx.weight * the_weight_scale.view((-1, 1, 1, 1))
+    the_bias_shift = (bn.weight * bn.running_mean) / \
+                     torch.sqrt(bn.running_var + bn.eps)
+    bn.weight[:] = 1
+    bn.bias[:] = bn.bias - the_bias_shift
+    bn.running_var[:] = 1.0 - bn.eps
+    bn.running_mean[:] = 0.0
+
+
+def _fuse_bn_layer_for_blocks_list_(block_list):
+    last_block = None  # type: ConvKX
+    with torch.no_grad():
+        for the_block in block_list:
+            if isinstance(the_block, BN):
+                # assert isinstance(last_block, ConvKX) or isinstance(last_block, ConvDW)
+                if isinstance(last_block, ConvKX) or isinstance(
+                        last_block, ConvDW):
+                    _fuse_convkx_and_bn_(last_block.netblock,
+                                         the_block.netblock)
+                else:
+                    print(
+                        '--- warning! Cannot fuse BN={} because last_block={}'.
+                        format(the_block, last_block))
+
+                last_block = None
+            elif isinstance(the_block, ConvKX) or isinstance(
+                    the_block, ConvDW):
+                last_block = the_block
+            elif hasattr(the_block, 'block_list') and the_block.block_list is not None and \
+                    len(the_block.block_list) > 0:
+                _fuse_bn_layer_for_blocks_list_(the_block.block_list)
+            else:
+                pass
+            pass
+        pass
+    pass  # end with
+
+
+def register_netblocks_dict(netblocks_dict: dict):
+    this_py_file_netblocks_dict = {
+        'AdaptiveAvgPool': AdaptiveAvgPool,
+        'BN': BN,
+        'ConvDW': ConvDW,
+        'ConvKX': ConvKX,
+        'ConvKXG2': ConvKXG2,
+        'ConvKXG4': ConvKXG4,
+        'ConvKXG8': ConvKXG8,
+        'ConvKXG16': ConvKXG16,
+        'ConvKXG32': ConvKXG32,
+        'Flatten': Flatten,
+        'Linear': Linear,
+        'MaxPool': MaxPool,
+        'MultiSumBlock': MultiSumBlock,
+        'MultiCatBlock': MultiCatBlock,
+        'PlainNetBasicBlockClass': PlainNetBasicBlockClass,
+        'RELU': RELU,
+        'ResBlock': ResBlock,
+        'ResBlockProj': ResBlockProj,
+        'Sequential': Sequential,
+        'SE': SE,
+        'Swish': Swish,
+    }
+    netblocks_dict.update(this_py_file_netblocks_dict)
+    return netblocks_dict
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/global_utils.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/global_utils.py
new file mode 100755
index 0000000..9608ab8
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/global_utils.py
@@ -0,0 +1,428 @@
+'''
+Copyright (C) 2010-2021 Alibaba Group Holding Limited.
+'''
+
+import argparse
+import ast
+import distutils.dir_util
+import logging
+import os
+import pprint
+
+import numpy as np
+import torch
+
+
+def load_py_module_from_path(module_path, module_name=None):
+    if module_path.find(':') > 0:
+        split_path = module_path.split(':')
+        module_path = split_path[0]
+        function_name = split_path[1]
+    else:
+        function_name = None
+
+    if module_name is None:
+        module_name = module_path.replace('/', '_').replace('.', '_')
+
+    assert os.path.isfile(module_path)
+
+    import importlib.util
+    spec = importlib.util.spec_from_file_location(module_name, module_path)
+    any_module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(any_module)
+    if function_name is None:
+        return any_module
+    else:
+        return getattr(any_module, function_name)
+
+
+def mkfilepath(filename):
+    distutils.dir_util.mkpath(os.path.dirname(filename))
+
+
+def mkdir(dirname):
+    distutils.dir_util.mkpath(dirname)
+
+
+def smart_round(x, base=None):
+    if base is None:
+        if x > 32 * 8:
+            round_base = 32
+        elif x > 16 * 8:
+            round_base = 16
+        else:
+            round_base = 8
+    else:
+        round_base = base
+
+    return max(round_base, round(x / float(round_base)) * round_base)
+
+
+def save_pyobj(filename, pyobj):
+    mkfilepath(filename)
+    the_s = pprint.pformat(pyobj, indent=2, width=120, compact=True)
+    with open(filename, 'w') as fid:
+        fid.write(the_s)
+
+
+def load_pyobj(filename):
+    with open(filename, 'r') as fid:
+        the_s = fid.readlines()
+
+    if isinstance(the_s, list):
+        the_s = ''.join(the_s)
+
+    the_s = the_s.replace('inf', '1e20')
+    pyobj = ast.literal_eval(the_s)
+    return pyobj
+
+
+def parse_cmd_options(argv):
+
+    parser = argparse.ArgumentParser(
+        description='Default command line parser.')
+
+    parser.add_argument('--evaluate_only',
+                        action='store_true',
+                        help='Only evaluation.')
+
+    # apex support
+    parser.add_argument('--apex',
+                        action='store_true',
+                        help='Mixed precision training using apex.')
+    parser.add_argument('--apex_loss_scale',
+                        type=str,
+                        default='dynamic',
+                        help='loss scale for apex.')
+    parser.add_argument('--apex_opt_level', type=str, default='O1')
+    parser.add_argument('--fp16', action='store_true', help='Using FP16.')
+
+    # distributed training
+    parser.add_argument(
+        '--dist_mode',
+        type=str,
+        default='cpu',
+        help='Distribution mode, could be cpu, single, horovod, mpi, auto.')
+    parser.add_argument(
+        '--independent_training',
+        action='store_true',
+        help='When distributed training, use each gpu separately.')
+    parser.add_argument('--world-size',
+                        default=1,
+                        type=int,
+                        help='number of nodes for distributed training')
+    parser.add_argument('--rank',
+                        default=-1,
+                        type=int,
+                        help='node rank for distributed training')
+
+    parser.add_argument(
+        '--gpu',
+        default=None,
+        type=int,
+        help='GPU id to use. Used by torch.distributed package')
+    parser.add_argument('--sync_bn',
+                        action='store_true',
+                        help='Use synchronized BN.')
+
+    parser.add_argument('--num_job_splits',
+                        default=None,
+                        type=str,
+                        help='Split jobs into multiple groups.')
+    parser.add_argument('--job_id',
+                        default=None,
+                        type=int,
+                        help='The id of this job node.')
+
+    # horovod setting
+    parser.add_argument('--fp16_allreduce',
+                        action='store_true',
+                        help='use fp16 compression during allreduce.')
+    parser.add_argument('--batches_per_allreduce',
+                        type=int,
+                        default=1,
+                        help='number of batches processed locally before '
+                        'executing allreduce across workers; it multiplies '
+                        'total batch size.')
+
+    # learning rate setting
+    parser.add_argument('--lr',
+                        default=None,
+                        type=float,
+                        help='initial learning rate per 256 batch size')
+    parser.add_argument('--target_lr',
+                        default=None,
+                        type=float,
+                        help='target learning rate')
+    parser.add_argument('--lr_per_256',
+                        default=0.1,
+                        type=float,
+                        help='initial learning rate per 256 batch size')
+    parser.add_argument('--target_lr_per_256',
+                        default=0.0,
+                        type=float,
+                        help='target learning rate')
+    parser.add_argument('--lr_mode',
+                        default=None,
+                        type=str,
+                        help='learning rate decay mode.')
+    parser.add_argument('--warmup',
+                        default=0,
+                        type=int,
+                        help='epochs for warmup.')
+    parser.add_argument(
+        '--epoch_offset',
+        default=0.0,
+        type=float,
+        help=
+        'Make the learning rate decaying as epochs + epoch_offset but start from epoch_offset. '
+    )
+
+    parser.add_argument('--lr_stage_list',
+                        default=None,
+                        type=str,
+                        help='stage-wise learning epoch list.')
+    parser.add_argument('--lr_stage_decay',
+                        default=None,
+                        type=float,
+                        help='stage-wise learning epoch list.')
+
+    # optimizer
+    parser.add_argument('--optimizer',
+                        default='sgd',
+                        type=str,
+                        help='sgd optimizer')
+    parser.add_argument('--momentum', default=0.9, type=float, help='momentum')
+    parser.add_argument('--adadelta_rho', default=0.9, type=float)
+    parser.add_argument('--adadelta_eps', default=1e-9, type=float)
+
+    parser.add_argument('--wd',
+                        '--weight_decay',
+                        default=4e-5,
+                        type=float,
+                        help='weight decay (default: 4e-5)',
+                        dest='weight_decay')
+
+    # training settings
+
+    parser.add_argument('--resume',
+                        default=None,
+                        type=str,
+                        help='path to latest checkpoint (default: none)')
+    parser.add_argument('--auto_resume',
+                        action='store_true',
+                        help='auto resume from latest check point')
+    parser.add_argument('--load_parameters_from',
+                        default=None,
+                        type=str,
+                        help='Only load parameters from pth file.')
+    parser.add_argument('--strict_load',
+                        action='store_true',
+                        help='Mixed precision training using apex.')
+
+    parser.add_argument('--start_epoch',
+                        default=0,
+                        type=int,
+                        help='manual epoch number (useful on restarts)')
+    parser.add_argument('--epochs',
+                        default=90,
+                        type=int,
+                        metavar='N',
+                        help='number of total epochs to run')
+    parser.add_argument('--save_dir',
+                        default=None,
+                        type=str,
+                        help='where to save models.')
+    parser.add_argument('--save_freq',
+                        default=10,
+                        type=int,
+                        help='How many epochs to save a model.')
+    parser.add_argument('--print_freq',
+                        default=100,
+                        type=int,
+                        help='print frequency (default: 100)')
+
+    # training tricks
+    parser.add_argument('--label_smoothing', action='store_true')
+    parser.add_argument('--weight_init',
+                        type=str,
+                        default='None',
+                        help='How to initialize parameters')
+    parser.add_argument('--nesterov', action='store_true')
+    parser.add_argument('--grad_clip', type=float, default=None)
+
+    # BN layer
+    parser.add_argument('--bn_momentum', type=float, default=None)
+    parser.add_argument('--bn_eps', type=float, default=None)
+
+    # data augmentation
+    parser.add_argument('--mixup', action='store_true')
+    parser.add_argument('--random_erase', action='store_true')
+    parser.add_argument('--auto_augment', action='store_true')
+    parser.add_argument('--no_data_augment', action='store_true')
+
+    # for loading dataset
+    parser.add_argument('--data_dir',
+                        type=str,
+                        default=None,
+                        help='path to dataset')
+    parser.add_argument('--dataset',
+                        type=str,
+                        default=None,
+                        help='name of the dataset')
+    parser.add_argument(
+        '--workers_per_gpu',
+        default=6,
+        type=int,
+        help='number of data loading workers per gpu. default 6.')
+    parser.add_argument(
+        '--batch_size',
+        default=None,
+        type=int,
+        help='mini-batch size (default: 256), this is the total '
+        'batch size of all GPUs on the current node when '
+        'using Data Parallel or Distributed Data Parallel',
+    )
+
+    parser.add_argument('--batch_size_per_gpu',
+                        default=None,
+                        type=int,
+                        help='batch size per GPU.')
+    parser.add_argument('--auto_batch_size',
+                        action='store_true',
+                        help='allow adjust batch size smartly.')
+    parser.add_argument('--num_cv_folds',
+                        type=int,
+                        default=None,
+                        help='Number of cross-validation folds.')
+    parser.add_argument('--cv_id',
+                        type=int,
+                        default=None,
+                        help='Current ID of cross-validation fold.')
+    parser.add_argument('--input_image_size',
+                        type=int,
+                        default=224,
+                        help='input image size.')
+    parser.add_argument('--input_image_crop',
+                        type=float,
+                        default=0.875,
+                        help='crop ratio of input image')
+
+    # for loading model
+    parser.add_argument('--arch',
+                        default=None,
+                        help='model names/module to load')
+    parser.add_argument('--pretrained',
+                        dest='pretrained',
+                        action='store_true',
+                        help='use pre-trained model')
+    parser.add_argument('--num_classes',
+                        type=int,
+                        default=None,
+                        help='number of classes.')
+
+    # for testing
+    parser.add_argument('--dataloader_testing',
+                        action='store_true',
+                        help='Testing data loader.')
+
+    # for teacher-student distillation
+    parser.add_argument('--teacher_input_image_size', type=int, default=None)
+    parser.add_argument('--teacher_arch', type=str, default=None)
+    parser.add_argument('--teacher_pretrained', action='store_true')
+    parser.add_argument('--ts_proj_no_relu', action='store_true')
+    parser.add_argument('--ts_proj_no_bn', action='store_true')
+    parser.add_argument('--teacher_load_parameters_from',
+                        type=str,
+                        default=None)
+    parser.add_argument('--teacher_feature_weight', type=float, default=None)
+    parser.add_argument('--teacher_logit_weight', type=float, default=None)
+    parser.add_argument('--ts_clip', type=float, default=None)
+    parser.add_argument('--target_downsample_ratio', type=int, default=None)
+
+    opt, _ = parser.parse_known_args(argv)
+    return opt
+
+
+def create_logging(log_filename=None, level=logging.INFO):
+    if log_filename is not None:
+        mkfilepath(log_filename)
+        logging.basicConfig(level=level,
+                            format='%(message)s',
+                            handlers=[
+                                logging.FileHandler(log_filename),
+                                logging.StreamHandler()
+                            ])
+    else:
+        logging.basicConfig(level=level,
+                            format='%(message)s',
+                            handlers=[logging.StreamHandler()])
+
+
+class LearningRateScheduler():
+    def __init__(
+        self,
+        mode,
+        lr,
+        target_lr=None,
+        num_training_instances=None,
+        stop_epoch=None,
+        warmup_epoch=None,
+        stage_list=None,
+        stage_decay=None,
+    ):
+        self.mode = mode
+        self.lr = lr
+        self.target_lr = target_lr if target_lr is not None else 0
+        self.num_training_instances = num_training_instances if num_training_instances is not None else 1
+        self.stop_epoch = stop_epoch if stop_epoch is not None else np.inf
+        self.warmup_epoch = warmup_epoch if warmup_epoch is not None else 0
+        self.stage_list = stage_list if stage_list is not None else None
+        self.stage_decay = stage_decay if stage_decay is not None else 0
+
+        self.num_received_training_instances = 0
+
+        if self.stage_list is not None:
+            self.stage_list = [int(x) for x in self.stage_list.split(',')]
+
+    def update_lr(self, batch_size):
+        self.num_received_training_instances += batch_size
+
+    def get_lr(self, num_received_training_instances=None):
+        if num_received_training_instances is None:
+            num_received_training_instances = self.num_received_training_instances
+
+        # start_instances = self.num_training_instances * self.start_epoch
+        stop_instances = self.num_training_instances * self.stop_epoch
+        warmup_instances = self.num_training_instances * self.warmup_epoch
+
+        assert stop_instances > warmup_instances
+
+        current_epoch = self.num_received_training_instances // self.num_training_instances
+
+        if num_received_training_instances < warmup_instances:
+            return float(num_received_training_instances +
+                         1) / float(warmup_instances) * self.lr
+
+        ratio_epoch = float(num_received_training_instances - warmup_instances + 1) / \
+                      float(stop_instances - warmup_instances)
+
+        if self.mode == 'cosine':
+            factor = (1 - np.math.cos(np.math.pi * ratio_epoch)) / 2.0
+            return self.lr + (self.target_lr - self.lr) * factor
+        elif self.mode == 'stagedecay':
+            stage_lr = self.lr
+            for stage_epoch in self.stage_list:
+                if current_epoch <= stage_epoch:
+                    return stage_lr
+                else:
+                    stage_lr *= self.stage_decay
+                pass  # end if
+            pass  # end for
+            return stage_lr
+        elif self.mode == 'linear':
+            factor = ratio_epoch
+            return self.lr + (self.target_lr - self.lr) * factor
+        else:
+            raise RuntimeError('Unknown learning rate mode: ' + self.mode)
+        pass  # end if
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/super_blocks.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/super_blocks.py
new file mode 100755
index 0000000..edefac1
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/PlainNet_or/super_blocks.py
@@ -0,0 +1,278 @@
+'''
+Copyright (C) 2010-2021 Alibaba Group Holding Limited.
+'''
+
+import os
+import sys
+import uuid
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from . import global_utils
+from ..PlainNet import _get_right_parentheses_index_, basic_blocks, create_netblock_list_from_str
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+class PlainNetSuperBlockClass(basic_blocks.PlainNetBasicBlockClass):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(PlainNetSuperBlockClass, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+        self.sub_layers = sub_layers
+        self.no_create = no_create
+        self.block_list = None
+        self.module_list = None
+
+    def forward(self, x):
+        output = x
+        for block in self.block_list:
+            output = block(output)
+        return output
+
+    def __str__(self):
+        return type(self).__name__ + '({},{},{},{})'.format(
+            self.in_channels, self.out_channels, self.stride, self.sub_layers)
+
+    def __repr__(self):
+        return type(self).__name__ + '({}|{},{},{},{})'.format(
+            self.block_name, self.in_channels, self.out_channels, self.stride,
+            self.sub_layers)
+
+    def get_output_resolution(self, input_resolution):
+        resolution = input_resolution
+        for block in self.block_list:
+            resolution = block.get_output_resolution(resolution)
+        return resolution
+
+    def get_FLOPs(self, input_resolution):
+        resolution = input_resolution
+        flops = 0.0
+        for block in self.block_list:
+            flops += block.get_FLOPs(resolution)
+            resolution = block.get_output_resolution(resolution)
+        return flops
+
+    def get_model_size(self):
+        model_size = 0.0
+        for block in self.block_list:
+            model_size += block.get_model_size()
+        return model_size
+
+    def set_in_channels(self, c):
+        self.in_channels = c
+        if len(self.block_list) == 0:
+            self.out_channels = c
+            return
+
+        self.block_list[0].set_in_channels(c)
+        last_channels = self.block_list[0].out_channels
+        if len(self.block_list) >= 2 and \
+                (isinstance(self.block_list[0], basic_blocks.ConvKX) or isinstance(self.block_list[0], basic_blocks.ConvDW)) and \
+                isinstance(self.block_list[1], basic_blocks.BN):
+            self.block_list[1].set_in_channels(last_channels)
+
+    def encode_structure(self):
+        return [self.out_channels, self.sub_layers]
+
+    @classmethod
+    def create_from_str(cls, s, no_create=False, **kwargs):
+        assert cls.is_instance_from_str(s)
+        idx = _get_right_parentheses_index_(s)
+        assert idx is not None
+        param_str = s[len(cls.__name__ + '('):idx]
+
+        # find block_name
+        tmp_idx = param_str.find('|')
+        if tmp_idx < 0:
+            tmp_block_name = 'uuid{}'.format(uuid.uuid4().hex)
+        else:
+            tmp_block_name = param_str[0:tmp_idx]
+            param_str = param_str[tmp_idx + 1:]
+
+        param_str_split = param_str.split(',')
+        in_channels = int(param_str_split[0])
+        out_channels = int(param_str_split[1])
+        stride = int(param_str_split[2])
+        sub_layers = int(param_str_split[3])
+        return cls(in_channels=in_channels, out_channels=out_channels, stride=stride,
+                                       sub_layers=sub_layers, block_name=tmp_block_name, no_create=no_create,
+                   **kwargs),\
+               s[idx + 1:]
+
+
+class SuperConvKXBNRELU(PlainNetSuperBlockClass):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 sub_layers=None,
+                 kernel_size=None,
+                 no_create=False,
+                 no_reslink=False,
+                 no_BN=False,
+                 **kwargs):
+        super(SuperConvKXBNRELU, self).__init__(**kwargs)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+        self.sub_layers = sub_layers
+        self.kernel_size = kernel_size
+        self.no_create = no_create
+        self.no_reslink = no_reslink
+        self.no_BN = no_BN
+
+        # if self.no_reslink:
+        #     print('Warning! {} use no_reslink'.format(str(self)))
+        # if self.no_BN:
+        #     print('Warning! {} use no_BN'.format(str(self)))
+
+        full_str = ''
+        last_channels = in_channels
+        current_stride = stride
+        for i in range(self.sub_layers):
+            if not self.no_BN:
+                inner_str = 'ConvKX({},{},{},{})BN({})RELU({})'.format(
+                    last_channels, self.out_channels, self.kernel_size,
+                    current_stride, self.out_channels, self.out_channels)
+            else:
+                inner_str = 'ConvKX({},{},{},{})RELU({})'.format(
+                    last_channels, self.out_channels, self.kernel_size,
+                    current_stride, self.out_channels)
+            full_str += inner_str
+
+            last_channels = out_channels
+            current_stride = 1
+        pass
+
+        self.block_list = create_netblock_list_from_str(
+            full_str, no_create=no_create, no_reslink=no_reslink, no_BN=no_BN)
+        if not no_create:
+            self.module_list = nn.ModuleList(self.block_list)
+        else:
+            self.module_list = None
+
+    def forward_pre_relu(self, x):
+        output = x
+        for block in self.block_list[0:-1]:
+            output = block(output)
+        return output
+
+    def __str__(self):
+        return type(self).__name__ + '({},{},{},{})'.format(
+            self.in_channels, self.out_channels, self.stride, self.sub_layers)
+
+    def __repr__(self):
+        return type(
+            self
+        ).__name__ + '({}|in={},out={},stride={},sub_layers={},kernel_size={})'.format(
+            self.block_name, self.in_channels, self.out_channels, self.stride,
+            self.sub_layers, self.kernel_size)
+
+    def split(self, split_layer_threshold):
+        return str(self)
+
+    def structure_scale(self,
+                        scale=1.0,
+                        channel_scale=None,
+                        sub_layer_scale=None):
+        if channel_scale is None:
+            channel_scale = scale
+        if sub_layer_scale is None:
+            sub_layer_scale = scale
+
+        new_out_channels = global_utils.smart_round(self.out_channels *
+                                                    channel_scale)
+        new_sub_layers = max(1, round(self.sub_layers * sub_layer_scale))
+
+        return type(self).__name__ + '({},{},{},{})'.format(
+            self.in_channels, new_out_channels, self.stride, new_sub_layers)
+
+
+class SuperConvK1BNRELU(SuperConvKXBNRELU):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperConvK1BNRELU, self).__init__(in_channels=in_channels,
+                                                out_channels=out_channels,
+                                                stride=stride,
+                                                sub_layers=sub_layers,
+                                                kernel_size=1,
+                                                no_create=no_create,
+                                                **kwargs)
+
+
+class SuperConvK3BNRELU(SuperConvKXBNRELU):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperConvK3BNRELU, self).__init__(in_channels=in_channels,
+                                                out_channels=out_channels,
+                                                stride=stride,
+                                                sub_layers=sub_layers,
+                                                kernel_size=3,
+                                                no_create=no_create,
+                                                **kwargs)
+
+
+class SuperConvK5BNRELU(SuperConvKXBNRELU):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperConvK5BNRELU, self).__init__(in_channels=in_channels,
+                                                out_channels=out_channels,
+                                                stride=stride,
+                                                sub_layers=sub_layers,
+                                                kernel_size=5,
+                                                no_create=no_create,
+                                                **kwargs)
+
+
+class SuperConvK7BNRELU(SuperConvKXBNRELU):
+    def __init__(self,
+                 in_channels=None,
+                 out_channels=None,
+                 stride=None,
+                 sub_layers=None,
+                 no_create=False,
+                 **kwargs):
+        super(SuperConvK7BNRELU, self).__init__(in_channels=in_channels,
+                                                out_channels=out_channels,
+                                                stride=stride,
+                                                sub_layers=sub_layers,
+                                                kernel_size=7,
+                                                no_create=no_create,
+                                                **kwargs)
+
+
+def register_netblocks_dict(netblocks_dict: dict):
+    this_py_file_netblocks_dict = {
+        'SuperConvK1BNRELU': SuperConvK1BNRELU,
+        'SuperConvK3BNRELU': SuperConvK3BNRELU,
+        'SuperConvK5BNRELU': SuperConvK5BNRELU,
+        'SuperConvK7BNRELU': SuperConvK7BNRELU,
+    }
+    netblocks_dict.update(this_py_file_netblocks_dict)
+    return netblocks_dict
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/__init__.py
new file mode 100755
index 0000000..ca6cf77
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/__init__.py
@@ -0,0 +1,9 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/backbones
+"""
+from .masternet import MasterNet
+from .mobilenet import MobileNetV1
+from .resnet import ResNetV1e
+
+__all__ = ['ResNetV1e', 'MobileNetV1', 'MasterNet']
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/global_utils.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/global_utils.py
new file mode 100755
index 0000000..9608ab8
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/global_utils.py
@@ -0,0 +1,428 @@
+'''
+Copyright (C) 2010-2021 Alibaba Group Holding Limited.
+'''
+
+import argparse
+import ast
+import distutils.dir_util
+import logging
+import os
+import pprint
+
+import numpy as np
+import torch
+
+
+def load_py_module_from_path(module_path, module_name=None):
+    if module_path.find(':') > 0:
+        split_path = module_path.split(':')
+        module_path = split_path[0]
+        function_name = split_path[1]
+    else:
+        function_name = None
+
+    if module_name is None:
+        module_name = module_path.replace('/', '_').replace('.', '_')
+
+    assert os.path.isfile(module_path)
+
+    import importlib.util
+    spec = importlib.util.spec_from_file_location(module_name, module_path)
+    any_module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(any_module)
+    if function_name is None:
+        return any_module
+    else:
+        return getattr(any_module, function_name)
+
+
+def mkfilepath(filename):
+    distutils.dir_util.mkpath(os.path.dirname(filename))
+
+
+def mkdir(dirname):
+    distutils.dir_util.mkpath(dirname)
+
+
+def smart_round(x, base=None):
+    if base is None:
+        if x > 32 * 8:
+            round_base = 32
+        elif x > 16 * 8:
+            round_base = 16
+        else:
+            round_base = 8
+    else:
+        round_base = base
+
+    return max(round_base, round(x / float(round_base)) * round_base)
+
+
+def save_pyobj(filename, pyobj):
+    mkfilepath(filename)
+    the_s = pprint.pformat(pyobj, indent=2, width=120, compact=True)
+    with open(filename, 'w') as fid:
+        fid.write(the_s)
+
+
+def load_pyobj(filename):
+    with open(filename, 'r') as fid:
+        the_s = fid.readlines()
+
+    if isinstance(the_s, list):
+        the_s = ''.join(the_s)
+
+    the_s = the_s.replace('inf', '1e20')
+    pyobj = ast.literal_eval(the_s)
+    return pyobj
+
+
+def parse_cmd_options(argv):
+
+    parser = argparse.ArgumentParser(
+        description='Default command line parser.')
+
+    parser.add_argument('--evaluate_only',
+                        action='store_true',
+                        help='Only evaluation.')
+
+    # apex support
+    parser.add_argument('--apex',
+                        action='store_true',
+                        help='Mixed precision training using apex.')
+    parser.add_argument('--apex_loss_scale',
+                        type=str,
+                        default='dynamic',
+                        help='loss scale for apex.')
+    parser.add_argument('--apex_opt_level', type=str, default='O1')
+    parser.add_argument('--fp16', action='store_true', help='Using FP16.')
+
+    # distributed training
+    parser.add_argument(
+        '--dist_mode',
+        type=str,
+        default='cpu',
+        help='Distribution mode, could be cpu, single, horovod, mpi, auto.')
+    parser.add_argument(
+        '--independent_training',
+        action='store_true',
+        help='When distributed training, use each gpu separately.')
+    parser.add_argument('--world-size',
+                        default=1,
+                        type=int,
+                        help='number of nodes for distributed training')
+    parser.add_argument('--rank',
+                        default=-1,
+                        type=int,
+                        help='node rank for distributed training')
+
+    parser.add_argument(
+        '--gpu',
+        default=None,
+        type=int,
+        help='GPU id to use. Used by torch.distributed package')
+    parser.add_argument('--sync_bn',
+                        action='store_true',
+                        help='Use synchronized BN.')
+
+    parser.add_argument('--num_job_splits',
+                        default=None,
+                        type=str,
+                        help='Split jobs into multiple groups.')
+    parser.add_argument('--job_id',
+                        default=None,
+                        type=int,
+                        help='The id of this job node.')
+
+    # horovod setting
+    parser.add_argument('--fp16_allreduce',
+                        action='store_true',
+                        help='use fp16 compression during allreduce.')
+    parser.add_argument('--batches_per_allreduce',
+                        type=int,
+                        default=1,
+                        help='number of batches processed locally before '
+                        'executing allreduce across workers; it multiplies '
+                        'total batch size.')
+
+    # learning rate setting
+    parser.add_argument('--lr',
+                        default=None,
+                        type=float,
+                        help='initial learning rate per 256 batch size')
+    parser.add_argument('--target_lr',
+                        default=None,
+                        type=float,
+                        help='target learning rate')
+    parser.add_argument('--lr_per_256',
+                        default=0.1,
+                        type=float,
+                        help='initial learning rate per 256 batch size')
+    parser.add_argument('--target_lr_per_256',
+                        default=0.0,
+                        type=float,
+                        help='target learning rate')
+    parser.add_argument('--lr_mode',
+                        default=None,
+                        type=str,
+                        help='learning rate decay mode.')
+    parser.add_argument('--warmup',
+                        default=0,
+                        type=int,
+                        help='epochs for warmup.')
+    parser.add_argument(
+        '--epoch_offset',
+        default=0.0,
+        type=float,
+        help=
+        'Make the learning rate decaying as epochs + epoch_offset but start from epoch_offset. '
+    )
+
+    parser.add_argument('--lr_stage_list',
+                        default=None,
+                        type=str,
+                        help='stage-wise learning epoch list.')
+    parser.add_argument('--lr_stage_decay',
+                        default=None,
+                        type=float,
+                        help='stage-wise learning epoch list.')
+
+    # optimizer
+    parser.add_argument('--optimizer',
+                        default='sgd',
+                        type=str,
+                        help='sgd optimizer')
+    parser.add_argument('--momentum', default=0.9, type=float, help='momentum')
+    parser.add_argument('--adadelta_rho', default=0.9, type=float)
+    parser.add_argument('--adadelta_eps', default=1e-9, type=float)
+
+    parser.add_argument('--wd',
+                        '--weight_decay',
+                        default=4e-5,
+                        type=float,
+                        help='weight decay (default: 4e-5)',
+                        dest='weight_decay')
+
+    # training settings
+
+    parser.add_argument('--resume',
+                        default=None,
+                        type=str,
+                        help='path to latest checkpoint (default: none)')
+    parser.add_argument('--auto_resume',
+                        action='store_true',
+                        help='auto resume from latest check point')
+    parser.add_argument('--load_parameters_from',
+                        default=None,
+                        type=str,
+                        help='Only load parameters from pth file.')
+    parser.add_argument('--strict_load',
+                        action='store_true',
+                        help='Mixed precision training using apex.')
+
+    parser.add_argument('--start_epoch',
+                        default=0,
+                        type=int,
+                        help='manual epoch number (useful on restarts)')
+    parser.add_argument('--epochs',
+                        default=90,
+                        type=int,
+                        metavar='N',
+                        help='number of total epochs to run')
+    parser.add_argument('--save_dir',
+                        default=None,
+                        type=str,
+                        help='where to save models.')
+    parser.add_argument('--save_freq',
+                        default=10,
+                        type=int,
+                        help='How many epochs to save a model.')
+    parser.add_argument('--print_freq',
+                        default=100,
+                        type=int,
+                        help='print frequency (default: 100)')
+
+    # training tricks
+    parser.add_argument('--label_smoothing', action='store_true')
+    parser.add_argument('--weight_init',
+                        type=str,
+                        default='None',
+                        help='How to initialize parameters')
+    parser.add_argument('--nesterov', action='store_true')
+    parser.add_argument('--grad_clip', type=float, default=None)
+
+    # BN layer
+    parser.add_argument('--bn_momentum', type=float, default=None)
+    parser.add_argument('--bn_eps', type=float, default=None)
+
+    # data augmentation
+    parser.add_argument('--mixup', action='store_true')
+    parser.add_argument('--random_erase', action='store_true')
+    parser.add_argument('--auto_augment', action='store_true')
+    parser.add_argument('--no_data_augment', action='store_true')
+
+    # for loading dataset
+    parser.add_argument('--data_dir',
+                        type=str,
+                        default=None,
+                        help='path to dataset')
+    parser.add_argument('--dataset',
+                        type=str,
+                        default=None,
+                        help='name of the dataset')
+    parser.add_argument(
+        '--workers_per_gpu',
+        default=6,
+        type=int,
+        help='number of data loading workers per gpu. default 6.')
+    parser.add_argument(
+        '--batch_size',
+        default=None,
+        type=int,
+        help='mini-batch size (default: 256), this is the total '
+        'batch size of all GPUs on the current node when '
+        'using Data Parallel or Distributed Data Parallel',
+    )
+
+    parser.add_argument('--batch_size_per_gpu',
+                        default=None,
+                        type=int,
+                        help='batch size per GPU.')
+    parser.add_argument('--auto_batch_size',
+                        action='store_true',
+                        help='allow adjust batch size smartly.')
+    parser.add_argument('--num_cv_folds',
+                        type=int,
+                        default=None,
+                        help='Number of cross-validation folds.')
+    parser.add_argument('--cv_id',
+                        type=int,
+                        default=None,
+                        help='Current ID of cross-validation fold.')
+    parser.add_argument('--input_image_size',
+                        type=int,
+                        default=224,
+                        help='input image size.')
+    parser.add_argument('--input_image_crop',
+                        type=float,
+                        default=0.875,
+                        help='crop ratio of input image')
+
+    # for loading model
+    parser.add_argument('--arch',
+                        default=None,
+                        help='model names/module to load')
+    parser.add_argument('--pretrained',
+                        dest='pretrained',
+                        action='store_true',
+                        help='use pre-trained model')
+    parser.add_argument('--num_classes',
+                        type=int,
+                        default=None,
+                        help='number of classes.')
+
+    # for testing
+    parser.add_argument('--dataloader_testing',
+                        action='store_true',
+                        help='Testing data loader.')
+
+    # for teacher-student distillation
+    parser.add_argument('--teacher_input_image_size', type=int, default=None)
+    parser.add_argument('--teacher_arch', type=str, default=None)
+    parser.add_argument('--teacher_pretrained', action='store_true')
+    parser.add_argument('--ts_proj_no_relu', action='store_true')
+    parser.add_argument('--ts_proj_no_bn', action='store_true')
+    parser.add_argument('--teacher_load_parameters_from',
+                        type=str,
+                        default=None)
+    parser.add_argument('--teacher_feature_weight', type=float, default=None)
+    parser.add_argument('--teacher_logit_weight', type=float, default=None)
+    parser.add_argument('--ts_clip', type=float, default=None)
+    parser.add_argument('--target_downsample_ratio', type=int, default=None)
+
+    opt, _ = parser.parse_known_args(argv)
+    return opt
+
+
+def create_logging(log_filename=None, level=logging.INFO):
+    if log_filename is not None:
+        mkfilepath(log_filename)
+        logging.basicConfig(level=level,
+                            format='%(message)s',
+                            handlers=[
+                                logging.FileHandler(log_filename),
+                                logging.StreamHandler()
+                            ])
+    else:
+        logging.basicConfig(level=level,
+                            format='%(message)s',
+                            handlers=[logging.StreamHandler()])
+
+
+class LearningRateScheduler():
+    def __init__(
+        self,
+        mode,
+        lr,
+        target_lr=None,
+        num_training_instances=None,
+        stop_epoch=None,
+        warmup_epoch=None,
+        stage_list=None,
+        stage_decay=None,
+    ):
+        self.mode = mode
+        self.lr = lr
+        self.target_lr = target_lr if target_lr is not None else 0
+        self.num_training_instances = num_training_instances if num_training_instances is not None else 1
+        self.stop_epoch = stop_epoch if stop_epoch is not None else np.inf
+        self.warmup_epoch = warmup_epoch if warmup_epoch is not None else 0
+        self.stage_list = stage_list if stage_list is not None else None
+        self.stage_decay = stage_decay if stage_decay is not None else 0
+
+        self.num_received_training_instances = 0
+
+        if self.stage_list is not None:
+            self.stage_list = [int(x) for x in self.stage_list.split(',')]
+
+    def update_lr(self, batch_size):
+        self.num_received_training_instances += batch_size
+
+    def get_lr(self, num_received_training_instances=None):
+        if num_received_training_instances is None:
+            num_received_training_instances = self.num_received_training_instances
+
+        # start_instances = self.num_training_instances * self.start_epoch
+        stop_instances = self.num_training_instances * self.stop_epoch
+        warmup_instances = self.num_training_instances * self.warmup_epoch
+
+        assert stop_instances > warmup_instances
+
+        current_epoch = self.num_received_training_instances // self.num_training_instances
+
+        if num_received_training_instances < warmup_instances:
+            return float(num_received_training_instances +
+                         1) / float(warmup_instances) * self.lr
+
+        ratio_epoch = float(num_received_training_instances - warmup_instances + 1) / \
+                      float(stop_instances - warmup_instances)
+
+        if self.mode == 'cosine':
+            factor = (1 - np.math.cos(np.math.pi * ratio_epoch)) / 2.0
+            return self.lr + (self.target_lr - self.lr) * factor
+        elif self.mode == 'stagedecay':
+            stage_lr = self.lr
+            for stage_epoch in self.stage_list:
+                if current_epoch <= stage_epoch:
+                    return stage_lr
+                else:
+                    stage_lr *= self.stage_decay
+                pass  # end if
+            pass  # end for
+            return stage_lr
+        elif self.mode == 'linear':
+            factor = ratio_epoch
+            return self.lr + (self.target_lr - self.lr) * factor
+        else:
+            raise RuntimeError('Unknown learning rate mode: ' + self.mode)
+        pass  # end if
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/masternet.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/masternet.py
new file mode 100755
index 0000000..e77d12f
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/masternet.py
@@ -0,0 +1,220 @@
+'''
+Copyright (C) 2010-2021 Alibaba Group Holding Limited.
+'''
+
+import argparse
+import os
+import sys
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from mmdet.models.builder import BACKBONES
+from torch import nn
+
+from . import PlainNet
+from .PlainNet import (_create_netblock_list_from_str_, basic_blocks,
+                      parse_cmd_options, super_blocks)
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+
+
+def parse_cmd_options(argv):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--no_BN', action='store_true')
+    parser.add_argument('--no_reslink', action='store_true')
+    parser.add_argument('--use_se', action='store_true')
+    module_opt, _ = parser.parse_known_args(argv)
+    return module_opt
+
+
+@BACKBONES.register_module()
+class MasterNet(PlainNet.PlainNet):
+    def __init__(self,
+                 argv=None,
+                 opt=None,
+                 num_classes=None,
+                 plainnet_struct=None,
+                 no_create=False,
+                 no_reslink=None,
+                 no_BN=None,
+                 use_se=None):
+
+        if argv is not None:
+            module_opt = parse_cmd_options(argv)
+        else:
+            module_opt = None
+
+        if no_BN is None:
+            if module_opt is not None:
+                no_BN = module_opt.no_BN
+            else:
+                no_BN = False
+
+        if no_reslink is None:
+            if module_opt is not None:
+                no_reslink = module_opt.no_reslink
+            else:
+                no_reslink = False
+
+        if use_se is None:
+            if module_opt is not None:
+                use_se = module_opt.use_se
+            else:
+                use_se = False
+        plainnet_struct = plainnet_struct
+
+        self.num_classes = 2048
+        self.last_channels = 2048
+        super(MasterNet, self).__init__(argv=argv,
+                                        opt=opt,
+                                        num_classes=num_classes,
+                                        plainnet_struct=plainnet_struct,
+                                        no_create=no_create,
+                                        no_reslink=no_reslink,
+                                        no_BN=no_BN,
+                                        use_se=use_se)
+
+        self.no_create = no_create
+        self.no_reslink = no_reslink
+        self.no_BN = no_BN
+        self.use_se = use_se
+
+        block_cfg = None
+        if block_cfg is None:
+            stage_planes = [
+                16, 40, 64, 96, 224, 2048
+            ]  #[16, 40, 64, 96, 224, 2048]  #0.25 default  #[16, 16, 40, 72, 152, 288]
+            stage_blocks = [1, 5, 5, 1]
+        else:
+            stage_planes = block_cfg['stage_planes']
+            stage_blocks = block_cfg['stage_blocks']
+
+        # bn eps
+        for layer in self.modules():
+            if isinstance(layer, nn.BatchNorm2d):
+                layer.eps = 1e-3
+
+        #self.stage_layers = extract_stage_features_and_logit()
+
+    def extract_stage_features_and_logit(self, x, target_downsample_ratio=4):
+        stage_features_list = []
+        image_size = x.shape[2]
+        output = x
+        block_id = 0
+        for block_id, the_block in enumerate(self.block_list):
+            output = the_block(output)
+            #import pdb; pdb.set_trace()
+            dowsample_ratio = round(image_size / output.shape[2])
+            if dowsample_ratio == target_downsample_ratio:
+                stage_features_list.append(output)
+                target_downsample_ratio *= 2
+            pass
+        pass
+
+        #import pdb; pdb.set_trace()
+        #output = F.adaptive_avg_pool2d(output, output_size=1)
+        #output = torch.flatten(output, 1)
+        #logit = self.fc_linear(output)
+        #print("stage_features_list:", stage_features_list)
+        return stage_features_list
+
+    def forward(self, x):
+        output = self.extract_stage_features_and_logit(x)
+
+        #output = x
+        #for block_id, the_block in enumerate(self.block_list):
+        #import pdb; pdb.set_trace()
+        #    output = the_block(output)
+
+        #output = F.adaptive_avg_pool2d(output, output_size=1)
+
+        #output = torch.flatten(output, 1)
+        #output = self.fc_linear(output)
+        return tuple(output)
+
+    def forward_pre_GAP(self, x):
+        output = x
+        for the_block in self.block_list:
+            output = the_block(output)
+        return output
+
+    def get_FLOPs(self, input_resolution):
+        the_res = input_resolution
+        the_flops = 0
+        for the_block in self.block_list:
+            the_flops += the_block.get_FLOPs(the_res)
+            the_res = the_block.get_output_resolution(the_res)
+
+        return the_flops
+
+    def get_model_size(self):
+        the_size = 0
+        for the_block in self.block_list:
+            the_size += the_block.get_model_size()
+
+        return the_size
+
+    def get_num_layers(self):
+        num_layers = 0
+        for block in self.block_list:
+            assert isinstance(block, super_blocks.PlainNetSuperBlockClass)
+            num_layers += block.sub_layers
+        return num_layers
+
+    def replace_block(self, block_id, new_block):
+        self.block_list[block_id] = new_block
+
+        if block_id < len(self.block_list) - 1:
+            if self.block_list[block_id +
+                               1].in_channels != new_block.out_channels:
+                self.block_list[block_id + 1].set_in_channels(
+                    new_block.out_channels)
+        else:
+            assert block_id == len(self.block_list) - 1
+            self.last_channels = self.block_list[-1].out_channels
+
+        self.module_list = nn.ModuleList(self.block_list)
+
+    def split(self, split_layer_threshold):
+        new_str = ''
+        for block in self.block_list:
+            new_str += block.split(split_layer_threshold=split_layer_threshold)
+        return new_str
+
+    def init_weights(self, pretrained=None):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.xavier_normal_(m.weight.data, gain=3.26033)
+                if hasattr(m, 'bias') and m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.ones_(m.weight)
+                nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(
+                    m.weight, 0, 3.26033 *
+                    np.sqrt(2 / (m.weight.shape[0] + m.weight.shape[1])))
+                if hasattr(m, 'bias') and m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            else:
+                pass
+
+        for superblock in self.block_list:
+            if not isinstance(superblock,
+                              super_blocks.PlainNetSuperBlockClass):
+                continue
+            for block in superblock.block_list:
+                if not (isinstance(block, basic_blocks.ResBlock)
+                        or isinstance(block, basic_blocks.ResBlockProj)):
+                    continue
+                # print('---debug set bn weight zero in resblock {}:{}'.format(superblock, block))
+                last_bn_block = None
+                for inner_resblock in block.block_list:
+                    if isinstance(inner_resblock, basic_blocks.BN):
+                        last_bn_block = inner_resblock
+                    pass
+                pass  # end for
+                assert last_bn_block is not None
+                # print('-------- last_bn_block={}'.format(last_bn_block))
+                nn.init.zeros_(last_bn_block.netblock.weight)
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/mobilenet.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/mobilenet.py
new file mode 100644
index 0000000..0076850
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/mobilenet.py
@@ -0,0 +1,97 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/backbones/mobilenet.py
+"""
+
+import torch
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_norm_layer, build_plugin_layer,
+                      constant_init, kaiming_init)
+from mmcv.runner import load_checkpoint
+from mmdet.models.builder import BACKBONES
+from mmdet.utils import get_root_logger
+from torch.nn.modules.batchnorm import _BatchNorm
+
+
+@BACKBONES.register_module()
+class MobileNetV1(nn.Module):
+    def __init__(self,
+                 in_channels=3,
+                 block_cfg=None,
+                 num_stages=4,
+                 out_indices=(0, 1, 2, 3)):
+        super(MobileNetV1, self).__init__()
+        self.out_indices = out_indices
+
+        def conv_bn(inp, oup, stride):
+            return nn.Sequential(nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+                                 nn.BatchNorm2d(oup), nn.ReLU(inplace=True))
+
+        def conv_dw(inp, oup, stride):
+            return nn.Sequential(
+                nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
+                nn.BatchNorm2d(inp),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup),
+                nn.ReLU(inplace=True),
+            )
+
+        if block_cfg is None:
+            stage_planes = [8, 16, 32, 64, 128, 256]
+            stage_blocks = [2, 4, 4, 2]
+        else:
+            stage_planes = block_cfg['stage_planes']
+            stage_blocks = block_cfg['stage_blocks']
+        assert len(stage_planes) == 6
+        assert len(stage_blocks) == 4
+        self.stem = nn.Sequential(
+            conv_bn(3, stage_planes[0], 2),
+            conv_dw(stage_planes[0], stage_planes[1], 1),
+        )
+        self.stage_layers = []
+        for i, num_blocks in enumerate(stage_blocks):
+            _layers = []
+            for n in range(num_blocks):
+                if n == 0:
+                    _layer = conv_dw(stage_planes[i + 1], stage_planes[i + 2],
+                                     2)
+                else:
+                    _layer = conv_dw(stage_planes[i + 2], stage_planes[i + 2],
+                                     1)
+                _layers.append(_layer)
+
+            _block = nn.Sequential(*_layers)
+            layer_name = f'layer{i + 1}'
+            self.add_module(layer_name, _block)
+            self.stage_layers.append(layer_name)
+
+    def forward(self, x):
+        output = []
+        x = self.stem(x)
+        for i, layer_name in enumerate(self.stage_layers):
+            stage_layer = getattr(self, layer_name)
+            x = stage_layer(x)
+            if i in self.out_indices:
+                output.append(x)
+
+        return tuple(output)
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone.
+
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d):
+                    kaiming_init(m)
+                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+                    constant_init(m, 1)
+        else:
+            raise TypeError('pretrained must be a str or None')
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/resnet.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/resnet.py
new file mode 100644
index 0000000..a67bd07
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/resnet.py
@@ -0,0 +1,411 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/backbones/resnet.py
+"""
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import (build_conv_layer, build_norm_layer, build_plugin_layer,
+                      constant_init, kaiming_init)
+from mmcv.runner import load_checkpoint
+from mmdet.models.backbones.resnet import BasicBlock, Bottleneck
+from mmdet.models.builder import BACKBONES
+from mmdet.models.utils import ResLayer
+from mmdet.utils import get_root_logger
+from torch.nn.modules.batchnorm import _BatchNorm
+
+
+class ResNet(nn.Module):
+    """ResNet backbone.
+
+    Args:
+        depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
+        stem_channels (int | None): Number of stem channels. If not specified,
+            it will be the same as `base_channels`. Default: None.
+        base_channels (int): Number of base channels of res layer. Default: 64.
+        in_channels (int): Number of input image channels. Default: 3.
+        num_stages (int): Resnet stages. Default: 4.
+        strides (Sequence[int]): Strides of the first block of each stage.
+        dilations (Sequence[int]): Dilation of each stage.
+        out_indices (Sequence[int]): Output from which stages.
+        style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+            layer is the 3x3 conv layer, otherwise the stride-two layer is
+            the first 1x1 conv layer.
+        deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv
+        avg_down (bool): Use AvgPool instead of stride conv when
+            downsampling in the bottleneck.
+        frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+            -1 means not freezing any parameters.
+        norm_cfg (dict): Dictionary to construct and config norm layer.
+        norm_eval (bool): Whether to set norm layers to eval mode, namely,
+            freeze running stats (mean and var). Note: Effect on Batch Norm
+            and its variants only.
+        plugins (list[dict]): List of plugins for stages, each dict contains:
+
+            - cfg (dict, required): Cfg dict to build plugin.
+            - position (str, required): Position inside block to insert
+              plugin, options are 'after_conv1', 'after_conv2', 'after_conv3'.
+            - stages (tuple[bool], optional): Stages to apply plugin, length
+              should be same as 'num_stages'.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed.
+        zero_init_residual (bool): Whether to use zero init for last norm layer
+            in resblocks to let them behave as identity.
+
+    Examples:
+
+        >>> from mmdet.models import ResNet
+        >>> import torch
+        >>> self = ResNet(depth=18)
+        >>> self.eval()
+        >>> inputs = torch.rand(1, 3, 32, 32)
+        >>> level_outputs = self.forward(inputs)
+        >>> for level_out in level_outputs:
+        ...     print(tuple(level_out.shape))
+        (1, 64, 8, 8)
+        (1, 128, 4, 4)
+        (1, 256, 2, 2)
+        (1, 512, 1, 1)
+    """
+
+    arch_settings = {
+        0: (BasicBlock, (2, 2, 2, 2)),
+        18: (BasicBlock, (2, 2, 2, 2)),
+        19: (BasicBlock, (2, 4, 4, 1)),
+        20: (BasicBlock, (2, 3, 2, 2)),
+        22: (BasicBlock, (2, 4, 3, 1)),
+        24: (BasicBlock, (2, 4, 4, 1)),
+        26: (BasicBlock, (2, 4, 4, 2)),
+        28: (BasicBlock, (2, 5, 4, 2)),
+        29: (BasicBlock, (2, 6, 3, 2)),
+        30: (BasicBlock, (2, 5, 5, 2)),
+        32: (BasicBlock, (2, 6, 5, 2)),
+        34: (BasicBlock, (3, 4, 6, 3)),
+        35: (BasicBlock, (3, 6, 4, 3)),
+        38: (BasicBlock, (3, 8, 4, 3)),
+        40: (BasicBlock, (3, 8, 5, 3)),
+        50: (Bottleneck, (3, 4, 6, 3)),
+        56: (Bottleneck, (3, 8, 4, 3)),
+        68: (Bottleneck, (3, 10, 6, 3)),
+        74: (Bottleneck, (3, 12, 6, 3)),
+        101: (Bottleneck, (3, 4, 23, 3)),
+        152: (Bottleneck, (3, 8, 36, 3))
+    }
+
+    def __init__(self,
+                 depth,
+                 in_channels=3,
+                 stem_channels=None,
+                 base_channels=64,
+                 num_stages=4,
+                 block_cfg=None,
+                 strides=(1, 2, 2, 2),
+                 dilations=(1, 1, 1, 1),
+                 out_indices=(0, 1, 2, 3),
+                 style='pytorch',
+                 deep_stem=False,
+                 avg_down=False,
+                 no_pool33=False,
+                 frozen_stages=-1,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN', requires_grad=True),
+                 norm_eval=True,
+                 dcn=None,
+                 stage_with_dcn=(False, False, False, False),
+                 plugins=None,
+                 with_cp=False,
+                 zero_init_residual=True):
+        super(ResNet, self).__init__()
+        if depth not in self.arch_settings:
+            raise KeyError(f'invalid depth {depth} for resnet')
+        self.depth = depth
+        if stem_channels is None:
+            stem_channels = base_channels
+        self.stem_channels = stem_channels
+        self.base_channels = base_channels
+        self.num_stages = num_stages
+        assert num_stages >= 1 and num_stages <= 4
+        self.strides = strides
+        self.dilations = dilations
+        assert len(strides) == len(dilations) == num_stages
+        self.out_indices = out_indices
+        assert max(out_indices) < num_stages
+        self.style = style
+        self.deep_stem = deep_stem
+        self.avg_down = avg_down
+        self.no_pool33 = no_pool33
+        self.frozen_stages = frozen_stages
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.with_cp = with_cp
+        self.norm_eval = norm_eval
+        self.dcn = dcn
+        self.stage_with_dcn = stage_with_dcn
+        if dcn is not None:
+            assert len(stage_with_dcn) == num_stages
+        self.plugins = plugins
+        self.zero_init_residual = zero_init_residual
+        if block_cfg is None:
+            self.block, stage_blocks = self.arch_settings[depth]
+        else:
+            self.block = BasicBlock if block_cfg[
+                'block'] == 'BasicBlock' else Bottleneck
+            stage_blocks = block_cfg['stage_blocks']
+            assert len(stage_blocks) >= num_stages
+        self.stage_blocks = stage_blocks[:num_stages]
+        self.inplanes = stem_channels
+
+        self._make_stem_layer(in_channels, stem_channels)
+        if block_cfg is not None and 'stage_planes' in block_cfg:
+            stage_planes = block_cfg['stage_planes']
+        else:
+            stage_planes = [base_channels * 2**i for i in range(num_stages)]
+
+        # print('resnet cfg:', stage_blocks, stage_planes)
+        self.res_layers = []
+        for i, num_blocks in enumerate(self.stage_blocks):
+            stride = strides[i]
+            dilation = dilations[i]
+            dcn = self.dcn if self.stage_with_dcn[i] else None
+            if plugins is not None:
+                stage_plugins = self.make_stage_plugins(plugins, i)
+            else:
+                stage_plugins = None
+            planes = stage_planes[i]
+            res_layer = self.make_res_layer(block=self.block,
+                                            inplanes=self.inplanes,
+                                            planes=planes,
+                                            num_blocks=num_blocks,
+                                            stride=stride,
+                                            dilation=dilation,
+                                            style=self.style,
+                                            avg_down=self.avg_down,
+                                            with_cp=with_cp,
+                                            conv_cfg=conv_cfg,
+                                            norm_cfg=norm_cfg,
+                                            dcn=dcn,
+                                            plugins=stage_plugins)
+            self.inplanes = planes * self.block.expansion
+            layer_name = f'layer{i + 1}'
+            self.add_module(layer_name, res_layer)
+            self.res_layers.append(layer_name)
+
+        self._freeze_stages()
+
+        self.feat_dim = self.block.expansion * base_channels * 2**(
+            len(self.stage_blocks) - 1)
+
+    def make_stage_plugins(self, plugins, stage_idx):
+        """Make plugins for ResNet ``stage_idx`` th stage.
+
+        Currently we support to insert ``context_block``,
+        ``empirical_attention_block``, ``nonlocal_block`` into the backbone
+        like ResNet/ResNeXt. They could be inserted after conv1/conv2/conv3 of
+        Bottleneck.
+
+        An example of plugins format could be:
+
+        Examples:
+            >>> plugins=[
+            ...     dict(cfg=dict(type='xxx', arg1='xxx'),
+            ...          stages=(False, True, True, True),
+            ...          position='after_conv2'),
+            ...     dict(cfg=dict(type='yyy'),
+            ...          stages=(True, True, True, True),
+            ...          position='after_conv3'),
+            ...     dict(cfg=dict(type='zzz', postfix='1'),
+            ...          stages=(True, True, True, True),
+            ...          position='after_conv3'),
+            ...     dict(cfg=dict(type='zzz', postfix='2'),
+            ...          stages=(True, True, True, True),
+            ...          position='after_conv3')
+            ... ]
+            >>> self = ResNet(depth=18)
+            >>> stage_plugins = self.make_stage_plugins(plugins, 0)
+            >>> assert len(stage_plugins) == 3
+
+        Suppose ``stage_idx=0``, the structure of blocks in the stage would be:
+
+        .. code-block:: none
+
+            conv1-> conv2->conv3->yyy->zzz1->zzz2
+
+        Suppose 'stage_idx=1', the structure of blocks in the stage would be:
+
+        .. code-block:: none
+
+            conv1-> conv2->xxx->conv3->yyy->zzz1->zzz2
+
+        If stages is missing, the plugin would be applied to all stages.
+
+        Args:
+            plugins (list[dict]): List of plugins cfg to build. The postfix is
+                required if multiple same type plugins are inserted.
+            stage_idx (int): Index of stage to build
+
+        Returns:
+            list[dict]: Plugins for current stage
+        """
+        stage_plugins = []
+        for plugin in plugins:
+            plugin = plugin.copy()
+            stages = plugin.pop('stages', None)
+            assert stages is None or len(stages) == self.num_stages
+            # whether to insert plugin into current stage
+            if stages is None or stages[stage_idx]:
+                stage_plugins.append(plugin)
+
+        return stage_plugins
+
+    def make_res_layer(self, **kwargs):
+        """Pack all blocks in a stage into a ``ResLayer``."""
+        return ResLayer(**kwargs)
+
+    @property
+    def norm1(self):
+        """nn.Module: the normalization layer named "norm1" """
+        return getattr(self, self.norm1_name)
+
+    def _make_stem_layer(self, in_channels, stem_channels):
+        if self.deep_stem:
+            self.stem = nn.Sequential(
+                build_conv_layer(self.conv_cfg,
+                                 in_channels,
+                                 stem_channels // 2,
+                                 kernel_size=3,
+                                 stride=2,
+                                 padding=1,
+                                 bias=False),
+                build_norm_layer(self.norm_cfg, stem_channels // 2)[1],
+                nn.ReLU(inplace=True),
+                build_conv_layer(self.conv_cfg,
+                                 stem_channels // 2,
+                                 stem_channels // 2,
+                                 kernel_size=3,
+                                 stride=1,
+                                 padding=1,
+                                 bias=False),
+                build_norm_layer(self.norm_cfg, stem_channels // 2)[1],
+                nn.ReLU(inplace=True),
+                build_conv_layer(self.conv_cfg,
+                                 stem_channels // 2,
+                                 stem_channels,
+                                 kernel_size=3,
+                                 stride=1,
+                                 padding=1,
+                                 bias=False),
+                build_norm_layer(self.norm_cfg, stem_channels)[1],
+                nn.ReLU(inplace=True))
+        else:
+            self.conv1 = build_conv_layer(self.conv_cfg,
+                                          in_channels,
+                                          stem_channels,
+                                          kernel_size=7,
+                                          stride=2,
+                                          padding=3,
+                                          bias=False)
+            self.norm1_name, norm1 = build_norm_layer(self.norm_cfg,
+                                                      stem_channels,
+                                                      postfix=1)
+            self.add_module(self.norm1_name, norm1)
+            self.relu = nn.ReLU(inplace=True)
+        if self.no_pool33:
+            assert self.deep_stem
+            self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
+        else:
+            self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+    def _freeze_stages(self):
+        if self.frozen_stages >= 0:
+            if self.deep_stem:
+                self.stem.eval()
+                for param in self.stem.parameters():
+                    param.requires_grad = False
+            else:
+                self.norm1.eval()
+                for m in [self.conv1, self.norm1]:
+                    for param in m.parameters():
+                        param.requires_grad = False
+
+        for i in range(1, self.frozen_stages + 1):
+            m = getattr(self, f'layer{i}')
+            m.eval()
+            for param in m.parameters():
+                param.requires_grad = False
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone.
+
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d):
+                    kaiming_init(m)
+                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+                    constant_init(m, 1)
+
+            if self.dcn is not None:
+                for m in self.modules():
+                    if isinstance(m, Bottleneck) and hasattr(
+                            m.conv2, 'conv_offset'):
+                        constant_init(m.conv2.conv_offset, 0)
+
+            if self.zero_init_residual:
+                for m in self.modules():
+                    if isinstance(m, Bottleneck):
+                        constant_init(m.norm3, 0)
+                    elif isinstance(m, BasicBlock):
+                        constant_init(m.norm2, 0)
+        else:
+            raise TypeError('pretrained must be a str or None')
+
+    def forward(self, x):
+        """Forward function."""
+        if self.deep_stem:
+            x = self.stem(x)
+        else:
+            x = self.conv1(x)
+            x = self.norm1(x)
+            x = self.relu(x)
+        x = self.maxpool(x)
+        outs = []
+        for i, layer_name in enumerate(self.res_layers):
+            res_layer = getattr(self, layer_name)
+            x = res_layer(x)
+            if i in self.out_indices:
+                outs.append(x)
+        return tuple(outs)
+
+    def train(self, mode=True):
+        """Convert the model into training mode while keep normalization layer
+        freezed."""
+        super(ResNet, self).train(mode)
+        self._freeze_stages()
+        if mode and self.norm_eval:
+            for m in self.modules():
+                # trick: eval have effect on BatchNorm only
+                if isinstance(m, _BatchNorm):
+                    m.eval()
+
+
+@BACKBONES.register_module()
+class ResNetV1e(ResNet):
+    r"""ResNetV1d variant described in `Bag of Tricks
+    <https://arxiv.org/pdf/1812.01187.pdf>`_.
+
+    Compared with default ResNet(ResNetV1b), ResNetV1d replaces the 7x7 conv in
+    the input stem with three 3x3 convs. And in the downsampling block, a 2x2
+    avg_pool with stride 2 is added before conv, whose stride is changed to 1.
+
+    Compared with ResNetV1d, ResNetV1e change maxpooling from 3x3 to 2x2 pad=1
+    """
+    def __init__(self, **kwargs):
+        super(ResNetV1e, self).__init__(deep_stem=True,
+                                        avg_down=True,
+                                        no_pool33=True,
+                                        **kwargs)
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/__init__.py
new file mode 100755
index 0000000..55ddf65
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/__init__.py
@@ -0,0 +1,7 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/dense_heads
+"""
+from .scrfd_head import SCRFDHead
+
+__all__ = ['SCRFDHead']
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/scrfd_head.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/scrfd_head.py
new file mode 100755
index 0000000..bffcbfb
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/scrfd_head.py
@@ -0,0 +1,1074 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py
+"""
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule, Scale,
+                      bias_init_with_prob, constant_init, kaiming_init,
+                      normal_init)
+from mmcv.runner import force_fp32
+from mmdet.core import (anchor_inside_flags, bbox2distance, bbox_overlaps,
+                        build_assigner, build_sampler, distance2bbox,
+                        images_to_levels, multi_apply, reduce_mean, unmap)
+from mmdet.models.builder import HEADS, build_loss
+from mmdet.models.dense_heads.anchor_head import AnchorHead
+
+from ....mmdet_patch.core.bbox import distance2kps, kps2distance
+from ....mmdet_patch.core.post_processing import multiclass_nms
+
+
+class Integral(nn.Module):
+    """A fixed layer for calculating integral result from distribution.
+
+    This layer calculates the target location by :math: `sum{P(y_i) * y_i}`,
+    P(y_i) denotes the softmax vector that represents the discrete distribution
+    y_i denotes the discrete set, usually {0, 1, 2, ..., reg_max}
+
+    Args:
+        reg_max (int): The maximal value of the discrete set. Default: 16. You
+            may want to reset it according to your new dataset or related
+            settings.
+    """
+    def __init__(self, reg_max=16):
+        super(Integral, self).__init__()
+        self.reg_max = reg_max
+        self.register_buffer('project',
+                             torch.linspace(0, self.reg_max, self.reg_max + 1))
+
+    def forward(self, x):
+        """Forward feature from the regression head to get integral result of
+        bounding box location.
+
+        Args:
+            x (Tensor): Features of the regression head, shape (N, 4*(n+1)),
+                n is self.reg_max.
+
+        Returns:
+            x (Tensor): Integral result of box locations, i.e., distance
+                offsets from the box center in four directions, shape (N, 4).
+        """
+        x = F.softmax(x.reshape(-1, self.reg_max + 1), dim=1)
+        x = F.linear(x, self.project.type_as(x)).reshape(-1, 4)
+        return x
+
+
+@HEADS.register_module()
+class SCRFDHead(AnchorHead):
+    """Generalized Focal Loss: Learning Qualified and Distributed Bounding
+    Boxes for Dense Object Detection.
+
+    GFL head structure is similar with ATSS, however GFL uses
+    1) joint representation for classification and localization quality, and
+    2) flexible General distribution for bounding box locations,
+    which are supervised by
+    Quality Focal Loss (QFL) and Distribution Focal Loss (DFL), respectively
+
+    https://arxiv.org/abs/2006.04388
+
+    Args:
+        num_classes (int): Number of categories excluding the background
+            category.
+        in_channels (int): Number of channels in the input feature map.
+        stacked_convs (int): Number of conv layers in cls and reg tower.
+            Default: 4.
+        conv_cfg (dict): dictionary to construct and config conv layer.
+            Default: None.
+        norm_cfg (dict): dictionary to construct and config norm layer.
+            Default: dict(type='GN', num_groups=32, requires_grad=True).
+        loss_qfl (dict): Config of Quality Focal Loss (QFL).
+        reg_max (int): Max value of integral set :math: `{0, ..., reg_max}`
+            in QFL setting. Default: 16.
+    Example:
+
+        >>> self = GFLHead(11, 7)
+        >>> feats = [torch.rand(1, 7, s, s) for s in [4, 8, 16, 32, 64]]
+        >>> cls_quality_score, bbox_pred = self.forward(feats)
+        >>> assert len(cls_quality_score) == len(self.scales)
+    """
+    def __init__(self,
+                 num_classes,
+                 in_channels,
+                 stacked_convs=4,
+                 feat_mults=None,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
+                 loss_dfl=None,
+                 reg_max=8,
+                 cls_reg_share=False,
+                 strides_share=True,
+                 scale_mode=1,
+                 dw_conv=False,
+                 use_kps=False,
+                 num_kps=5,
+                 loss_kps=dict(type='SmoothL1Loss',
+                               beta=1.0 / 9.0,
+                               loss_weight=0.1),
+                 **kwargs):
+        self.stacked_convs = stacked_convs
+        self.feat_mults = feat_mults
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.reg_max = reg_max
+        self.cls_reg_share = cls_reg_share
+        self.strides_share = strides_share
+        self.scale_mode = scale_mode
+        self.use_dfl = True
+        self.dw_conv = dw_conv
+        self.NK = num_kps
+        self.extra_flops = 0.0
+        if loss_dfl is None or not loss_dfl:
+            self.use_dfl = False
+        self.use_scale = False
+        self.use_kps = use_kps
+        if self.scale_mode > 0 and (self.strides_share
+                                    or self.scale_mode == 2):
+            self.use_scale = True
+        super(SCRFDHead, self).__init__(num_classes, in_channels, **kwargs)
+
+        self.sampling = False
+        if self.train_cfg:
+            self.assigner = build_assigner(self.train_cfg.assigner)
+            # SSD sampling=False so use PseudoSampler
+            sampler_cfg = dict(type='PseudoSampler')
+            self.sampler = build_sampler(sampler_cfg, context=self)
+
+        self.integral = Integral(self.reg_max)
+        if self.use_dfl:
+            self.loss_dfl = build_loss(loss_dfl)
+        self.loss_kps = build_loss(loss_kps)
+        self.loss_kps_std = 1.0
+        self.train_step = 0
+        self.pos_count = {}
+        self.gtgroup_count = {}
+        for stride in self.anchor_generator.strides:
+            self.pos_count[stride[0]] = 0
+
+    def _get_conv_module(self, in_channel, out_channel):
+        if not self.dw_conv:
+            conv = ConvModule(in_channel,
+                              out_channel,
+                              3,
+                              stride=1,
+                              padding=1,
+                              conv_cfg=self.conv_cfg,
+                              norm_cfg=self.norm_cfg)
+        else:
+            conv = DepthwiseSeparableConvModule(in_channel,
+                                                out_channel,
+                                                3,
+                                                stride=1,
+                                                padding=1,
+                                                pw_norm_cfg=self.norm_cfg,
+                                                dw_norm_cfg=self.norm_cfg)
+        return conv
+
+    def _init_layers(self):
+        """Initialize layers of the head."""
+        self.relu = nn.ReLU(inplace=True)
+        conv_strides = [0] if self.strides_share else \
+            self.anchor_generator.strides
+        self.cls_stride_convs = nn.ModuleDict()
+        self.reg_stride_convs = nn.ModuleDict()
+        self.stride_cls = nn.ModuleDict()
+        self.stride_reg = nn.ModuleDict()
+        if self.use_kps:
+            self.stride_kps = nn.ModuleDict()
+        for stride_idx, conv_stride in enumerate(conv_strides):
+            key = str(conv_stride)
+            cls_convs = nn.ModuleList()
+            reg_convs = nn.ModuleList()
+            stacked_convs = self.stacked_convs[stride_idx] if \
+                isinstance(self.stacked_convs, (list, tuple)) else \
+                self.stacked_convs
+            feat_mult = self.feat_mults[stride_idx] if \
+                self.feat_mults is not None else 1
+            feat_ch = int(self.feat_channels * feat_mult)
+            last_feat_ch = 0
+            for i in range(stacked_convs):
+                chn = self.in_channels if i == 0 else last_feat_ch
+                cls_convs.append(self._get_conv_module(chn, feat_ch))
+                if not self.cls_reg_share:
+                    reg_convs.append(self._get_conv_module(chn, feat_ch))
+                last_feat_ch = feat_ch
+            self.cls_stride_convs[key] = cls_convs
+            self.reg_stride_convs[key] = reg_convs
+            self.stride_cls[key] = nn.Conv2d(feat_ch,
+                                             self.cls_out_channels *
+                                             self.num_anchors,
+                                             3,
+                                             padding=1)
+            if not self.use_dfl:
+                self.stride_reg[key] = nn.Conv2d(feat_ch,
+                                                 4 * self.num_anchors,
+                                                 3,
+                                                 padding=1)
+            else:
+                self.stride_reg[key] = nn.Conv2d(feat_ch,
+                                                 4 * (self.reg_max + 1) *
+                                                 self.num_anchors,
+                                                 3,
+                                                 padding=1)
+            if self.use_kps:
+                self.stride_kps[key] = nn.Conv2d(feat_ch,
+                                                 self.NK * 2 *
+                                                 self.num_anchors,
+                                                 3,
+                                                 padding=1)
+        if self.use_scale:
+            self.scales = nn.ModuleList(
+                [Scale(1.0) for _ in self.anchor_generator.strides])
+        else:
+            self.scales = [None for _ in self.anchor_generator.strides]
+
+    def init_weights(self):
+        """Initialize weights of the head."""
+        for stride, cls_convs in self.cls_stride_convs.items():
+            for m in cls_convs:
+                if not self.dw_conv:
+                    try:
+                        normal_init(m.conv, std=0.01)
+                    except Exception:
+                        pass
+                else:
+                    normal_init(m.depthwise_conv.conv, std=0.01)
+                    normal_init(m.pointwise_conv.conv, std=0.01)
+        for stride, reg_convs in self.reg_stride_convs.items():
+            for m in reg_convs:
+                if not self.dw_conv:
+                    normal_init(m.conv, std=0.01)
+                else:
+                    normal_init(m.depthwise_conv.conv, std=0.01)
+                    normal_init(m.pointwise_conv.conv, std=0.01)
+        bias_cls = -4.595
+        for stride, conv in self.stride_cls.items():
+            normal_init(conv, std=0.01, bias=bias_cls)
+        for stride, conv in self.stride_reg.items():
+            normal_init(conv, std=0.01)
+        if self.use_kps:
+            for stride, conv in self.stride_kps.items():
+                normal_init(conv, std=0.01)
+
+    def forward(self, feats):
+        """Forward features from the upstream network.
+
+        Args:
+            feats (tuple[Tensor]): Features from the upstream network, each is
+                a 4D-tensor.
+
+        Returns:
+            tuple: Usually a tuple of classification scores and bbox prediction
+                cls_scores (list[Tensor]): Classification and quality (IoU)
+                    joint scores for all scale levels, each is a 4D-tensor,
+                    the channel number is num_classes.
+                bbox_preds (list[Tensor]): Box distribution logits for all
+                    scale levels, each is a 4D-tensor, the channel number is
+                    4*(n+1), n is max value of integral set.
+        """
+        return multi_apply(self.forward_single, feats, self.scales,
+                           self.anchor_generator.strides)
+
+    def forward_single(self, x, scale, stride):
+        """Forward feature of a single scale level.
+
+        Args:
+            x (Tensor): Features of a single scale level.
+            scale (:obj: `mmcv.cnn.Scale`): Learnable scale module to resize
+                the bbox prediction.
+
+        Returns:
+            tuple:
+                cls_score (Tensor): Cls and quality joint scores for a single
+                    scale level the channel number is num_classes.
+                bbox_pred (Tensor): Box distribution logits for a single scale
+                    level, the channel number is 4*(n+1), n is max value of
+                    integral set.
+        """
+        cls_feat = x
+        reg_feat = x
+        cls_convs = self.cls_stride_convs[
+            '0'] if self.strides_share else self.cls_stride_convs[str(stride)]
+        for cls_conv in cls_convs:
+            cls_feat = cls_conv(cls_feat)
+        if not self.cls_reg_share:
+            reg_convs = self.reg_stride_convs[
+                '0'] if self.strides_share else self.reg_stride_convs[str(
+                    stride)]
+            for reg_conv in reg_convs:
+                reg_feat = reg_conv(reg_feat)
+        else:
+            reg_feat = cls_feat
+        cls_pred_module = self.stride_cls[
+            '0'] if self.strides_share else self.stride_cls[str(stride)]
+        cls_score = cls_pred_module(cls_feat)
+        reg_pred_module = self.stride_reg[
+            '0'] if self.strides_share else self.stride_reg[str(stride)]
+        _bbox_pred = reg_pred_module(reg_feat)
+        if self.use_scale:
+            bbox_pred = scale(_bbox_pred)
+        else:
+            bbox_pred = _bbox_pred
+        if self.use_kps:
+            kps_pred_module = self.stride_kps[
+                '0'] if self.strides_share else self.stride_kps[str(stride)]
+            kps_pred = kps_pred_module(reg_feat)
+        else:
+            kps_pred = bbox_pred.new_zeros(
+                (bbox_pred.shape[0], self.NK * 2, bbox_pred.shape[2],
+                 bbox_pred.shape[3]))
+        if torch.onnx.is_in_onnx_export():
+            assert not self.use_dfl
+            print('in-onnx-export', cls_score.shape, bbox_pred.shape)
+            # Add output batch dim, based on pull request #1593
+            batch_size = cls_score.shape[0]
+            cls_score = cls_score.permute(0, 2, 3, 1).reshape(
+                batch_size, -1, self.cls_out_channels).sigmoid()
+            bbox_pred = bbox_pred.permute(0, 2, 3,
+                                          1).reshape(batch_size, -1, 4)
+            kps_pred = kps_pred.permute(0, 2, 3,
+                                        1).reshape(batch_size, -1, self.NK * 2)
+        return cls_score, bbox_pred, kps_pred
+
+    def forward_train(self,
+                      x,
+                      img_metas,
+                      gt_bboxes,
+                      gt_labels=None,
+                      gt_keypointss=None,
+                      gt_bboxes_ignore=None,
+                      proposal_cfg=None,
+                      **kwargs):
+        """
+        Args:
+            x (list[Tensor]): Features from FPN.
+            img_metas (list[dict]): Meta information of each image, e.g.,
+                image size, scaling factor, etc.
+            gt_bboxes (Tensor): Ground truth bboxes of the image,
+                shape (num_gts, 4).
+            gt_labels (Tensor): Ground truth labels of each box,
+                shape (num_gts,).
+            gt_bboxes_ignore (Tensor): Ground truth bboxes to be
+                ignored, shape (num_ignored_gts, 4).
+            proposal_cfg (mmcv.Config): Test / postprocessing configuration,
+                if None, test_cfg would be used
+
+        Returns:
+            tuple:
+                losses: (dict[str, Tensor]): A dictionary of loss components.
+                proposal_list (list[Tensor]): Proposals of each image.
+        """
+        outs = self(x)
+        if gt_labels is None:
+            loss_inputs = outs + (gt_bboxes, img_metas)
+        else:
+            loss_inputs = outs + (gt_bboxes, gt_labels, gt_keypointss,
+                                  img_metas)
+        losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
+        if proposal_cfg is None:
+            return losses
+        else:
+            proposal_list = self.get_bboxes(*outs, img_metas, cfg=proposal_cfg)
+            return losses, proposal_list
+
+    def get_anchors(self, featmap_sizes, img_metas, device='cuda'):
+        """Get anchors according to feature map sizes.
+
+        Args:
+            featmap_sizes (list[tuple]): Multi-level feature map sizes.
+            img_metas (list[dict]): Image meta info.
+            device (torch.device | str): Device for returned tensors
+
+        Returns:
+            tuple:
+                anchor_list (list[Tensor]): Anchors of each image.
+                valid_flag_list (list[Tensor]): Valid flags of each image.
+        """
+        num_imgs = len(img_metas)
+
+        # since feature map sizes of all images are the same, we only compute
+        # anchors for one time
+        multi_level_anchors = self.anchor_generator.grid_anchors(
+            featmap_sizes, device)
+        anchor_list = [multi_level_anchors for _ in range(num_imgs)]
+
+        # for each image, we compute valid flags of multi level anchors
+        valid_flag_list = []
+        for img_id, img_meta in enumerate(img_metas):
+            multi_level_flags = self.anchor_generator.valid_flags(
+                featmap_sizes, img_meta['pad_shape'], device)
+            valid_flag_list.append(multi_level_flags)
+
+        return anchor_list, valid_flag_list
+
+    def anchor_center(self, anchors):
+        """Get anchor centers from anchors.
+
+        Args:
+            anchors (Tensor): Anchor list with shape (N, 4), "xyxy" format.
+
+        Returns:
+            Tensor: Anchor centers with shape (N, 2), "xy" format.
+        """
+        anchors_cx = (anchors[:, 2] + anchors[:, 0]) / 2
+        anchors_cy = (anchors[:, 3] + anchors[:, 1]) / 2
+        return torch.stack([anchors_cx, anchors_cy], dim=-1)
+
+    def loss_single(self, anchors, cls_score, bbox_pred, kps_pred, labels,
+                    label_weights, bbox_targets, kps_targets, kps_weights,
+                    stride, num_total_samples):
+        """Compute loss of a single scale level.
+
+        Args:
+            anchors (Tensor): Box reference for each scale level with shape
+                (N, num_total_anchors, 4).
+            cls_score (Tensor): Cls and quality joint scores for each scale
+                level has shape (N, num_classes, H, W).
+            bbox_pred (Tensor): Box distribution logits for each scale
+                level with shape (N, 4*(n+1), H, W), n is max value of integral
+                set.
+            labels (Tensor): Labels of each anchors with shape
+                (N, num_total_anchors).
+            label_weights (Tensor): Label weights of each anchor with shape
+                (N, num_total_anchors)
+            bbox_targets (Tensor): BBox regression targets of each anchor wight
+                shape (N, num_total_anchors, 4).
+            stride (tuple): Stride in this scale level.
+            num_total_samples (int): Number of positive samples that is
+                reduced over all GPUs.
+
+        Returns:
+            dict[str, Tensor]: A dictionary of loss components.
+        """
+        assert stride[0] == stride[1], 'h stride is not equal to w stride!'
+        use_qscore = True
+        anchors = anchors.reshape(-1, 4)
+        cls_score = cls_score.permute(0, 2, 3,
+                                      1).reshape(-1, self.cls_out_channels)
+        if not self.use_dfl:
+            bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
+        else:
+            bbox_pred = bbox_pred.permute(0, 2, 3, 1)
+            bbox_pred = bbox_pred.reshape(-1, 4 * (self.reg_max + 1))
+        bbox_targets = bbox_targets.reshape(-1, 4)
+        labels = labels.reshape(-1)
+        label_weights = label_weights.reshape(-1)
+
+        if self.use_kps:
+            kps_pred = kps_pred.permute(0, 2, 3, 1).reshape(-1, self.NK * 2)
+            kps_targets = kps_targets.reshape((-1, self.NK * 2))
+            kps_weights = kps_weights.reshape((-1, self.NK * 2))
+
+        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes
+        bg_class_ind = self.num_classes
+        pos_inds = ((labels >= 0)
+                    & (labels < bg_class_ind)).nonzero().squeeze(1)
+        score = label_weights.new_zeros(labels.shape)
+
+        if len(pos_inds) > 0:
+            pos_bbox_targets = bbox_targets[pos_inds]
+            pos_bbox_pred = bbox_pred[pos_inds]
+            pos_anchors = anchors[pos_inds]
+            pos_anchor_centers = self.anchor_center(pos_anchors) / stride[0]
+
+            weight_targets = cls_score.detach().sigmoid()
+            weight_targets = weight_targets.max(dim=1)[0][pos_inds]
+            pos_decode_bbox_targets = pos_bbox_targets / stride[0]
+
+            if self.use_dfl:
+                pos_bbox_pred_corners = self.integral(pos_bbox_pred)
+                pos_decode_bbox_pred = distance2bbox(pos_anchor_centers,
+                                                     pos_bbox_pred_corners)
+            else:
+                pos_decode_bbox_pred = distance2bbox(pos_anchor_centers,
+                                                     pos_bbox_pred)
+            if self.use_kps:
+                pos_kps_targets = kps_targets[pos_inds]
+                pos_kps_pred = kps_pred[pos_inds]
+                pos_kps_weights = kps_weights.max(
+                    dim=1)[0][pos_inds] * weight_targets
+                pos_kps_weights = pos_kps_weights.reshape((-1, 1))
+                pos_decode_kps_targets = kps2distance(
+                    pos_anchor_centers, pos_kps_targets / stride[0])
+                pos_decode_kps_pred = pos_kps_pred
+            if use_qscore:
+                score[pos_inds] = bbox_overlaps(pos_decode_bbox_pred.detach(),
+                                                pos_decode_bbox_targets,
+                                                is_aligned=True)
+            else:
+                score[pos_inds] = 1.0
+
+            # regression loss
+            loss_bbox = self.loss_bbox(pos_decode_bbox_pred,
+                                       pos_decode_bbox_targets,
+                                       weight=weight_targets,
+                                       avg_factor=1.0)
+
+            if self.use_kps:
+                loss_kps = self.loss_kps(
+                    pos_decode_kps_pred * self.loss_kps_std,
+                    pos_decode_kps_targets * self.loss_kps_std,
+                    weight=pos_kps_weights,
+                    avg_factor=1.0)
+            else:
+                loss_kps = kps_pred.sum() * 0
+
+            # dfl loss
+            if self.use_dfl:
+                pred_corners = pos_bbox_pred.reshape(-1, self.reg_max + 1)
+                target_corners = bbox2distance(pos_anchor_centers,
+                                               pos_decode_bbox_targets,
+                                               self.reg_max).reshape(-1)
+                loss_dfl = self.loss_dfl(pred_corners,
+                                         target_corners,
+                                         weight=weight_targets[:, None].expand(
+                                             -1, 4).reshape(-1),
+                                         avg_factor=4.0)
+            else:
+                loss_dfl = bbox_pred.sum() * 0
+        else:
+            loss_bbox = bbox_pred.sum() * 0
+            loss_dfl = bbox_pred.sum() * 0
+            loss_kps = kps_pred.sum() * 0
+            weight_targets = torch.tensor(0).cuda()
+
+        loss_cls = self.loss_cls(cls_score, (labels, score),
+                                 weight=label_weights,
+                                 avg_factor=num_total_samples)
+        return loss_cls, loss_bbox, loss_dfl, loss_kps, weight_targets.sum()
+
+    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))
+    def loss(self,
+             cls_scores,
+             bbox_preds,
+             kps_preds,
+             gt_bboxes,
+             gt_labels,
+             gt_keypointss,
+             img_metas,
+             gt_bboxes_ignore=None):
+        """Compute losses of the head.
+
+        Args:
+            cls_scores (list[Tensor]): Cls and quality scores for each scale
+                level has shape (N, num_classes, H, W).
+            bbox_preds (list[Tensor]): Box distribution logits for each scale
+                level with shape (N, 4*(n+1), H, W), n is max value of integral
+                set.
+            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with
+                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
+            gt_labels (list[Tensor]): class indices corresponding to each box
+            img_metas (list[dict]): Meta information of each image, e.g.,
+                image size, scaling factor, etc.
+            gt_bboxes_ignore (list[Tensor] | None): specify which bounding
+                boxes can be ignored when computing the loss.
+
+        Returns:
+            dict[str, Tensor]: A dictionary of loss components.
+        """
+
+        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+        assert len(featmap_sizes) == self.anchor_generator.num_levels
+
+        device = cls_scores[0].device
+        anchor_list, valid_flag_list = self.get_anchors(featmap_sizes,
+                                                        img_metas,
+                                                        device=device)
+        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1
+
+        cls_reg_targets = self.get_targets(
+            anchor_list,
+            valid_flag_list,
+            gt_bboxes,
+            gt_keypointss,
+            img_metas,
+            gt_bboxes_ignore_list=gt_bboxes_ignore,
+            gt_labels_list=gt_labels,
+            label_channels=label_channels)
+        if cls_reg_targets is None:
+            return None
+
+        (anchor_list, labels_list, label_weights_list, bbox_targets_list,
+         bbox_weights_list, keypoints_targets_list, keypoints_weights_list,
+         num_total_pos, num_total_neg) = cls_reg_targets
+
+        num_total_samples = reduce_mean(
+            torch.tensor(num_total_pos, dtype=torch.float,
+                         device=device)).item()
+        num_total_samples = max(num_total_samples, 1.0)
+
+        losses_cls, losses_bbox, losses_dfl, losses_kps,\
+            avg_factor = multi_apply(
+                self.loss_single,
+                anchor_list,
+                cls_scores,
+                bbox_preds,
+                kps_preds,
+                labels_list,
+                label_weights_list,
+                bbox_targets_list,
+                keypoints_targets_list,
+                keypoints_weights_list,
+                self.anchor_generator.strides,
+                num_total_samples=num_total_samples)
+
+        avg_factor = sum(avg_factor)
+        avg_factor = reduce_mean(avg_factor).item()
+        losses_bbox = list(map(lambda x: x / avg_factor, losses_bbox))
+        losses = dict(loss_cls=losses_cls, loss_bbox=losses_bbox)
+        if self.use_kps:
+            losses_kps = list(map(lambda x: x / avg_factor, losses_kps))
+            losses['loss_kps'] = losses_kps
+        if self.use_dfl:
+            losses_dfl = list(map(lambda x: x / avg_factor, losses_dfl))
+            losses['loss_dfl'] = losses_dfl
+        return losses
+
+    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'kps_preds'))
+    def get_bboxes(self,
+                   cls_scores,
+                   bbox_preds,
+                   kps_preds,
+                   img_metas,
+                   cfg=None,
+                   rescale=False,
+                   with_nms=True):
+        """Transform network output for a batch into bbox predictions.
+
+        Args:
+            cls_scores (list[Tensor]): Box scores for each scale level
+                Has shape (N, num_anchors * num_classes, H, W)
+            bbox_preds (list[Tensor]): Box energies / deltas for each scale
+                level with shape (N, num_anchors * 4, H, W)
+            img_metas (list[dict]): Meta information of each image, e.g.,
+                image size, scaling factor, etc.
+            cfg (mmcv.Config | None): Test / postprocessing configuration,
+                if None, test_cfg would be used
+            rescale (bool): If True, return boxes in original image space.
+                Default: False.
+            with_nms (bool): If True, do nms before return boxes.
+                Default: True.
+
+        Returns:
+            list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
+                The first item is an (n, 5) tensor, where the first 4 columns
+                are bounding box positions (tl_x, tl_y, br_x, br_y) and the
+                5-th column is a score between 0 and 1. The second item is a
+                (n,) tensor where each item is the predicted class labelof the
+                corresponding box.
+
+        Example:
+            >>> import mmcv
+            >>> self = AnchorHead(
+            >>>     num_classes=9,
+            >>>     in_channels=1,
+            >>>     anchor_generator=dict(
+            >>>         type='AnchorGenerator',
+            >>>         scales=[8],
+            >>>         ratios=[0.5, 1.0, 2.0],
+            >>>         strides=[4,]))
+            >>> img_metas = [{'img_shape': (32, 32, 3), 'scale_factor': 1}]
+            >>> cfg = mmcv.Config(dict(
+            >>>     score_thr=0.00,
+            >>>     nms=dict(type='nms', iou_thr=1.0),
+            >>>     max_per_img=10))
+            >>> feat = torch.rand(1, 1, 3, 3)
+            >>> cls_score, bbox_pred = self.forward_single(feat)
+            >>> # note the input lists are over different levels, not images
+            >>> cls_scores, bbox_preds = [cls_score], [bbox_pred]
+            >>> result_list = self.get_bboxes(cls_scores, bbox_preds,
+            >>>                               img_metas, cfg)
+            >>> det_bboxes, det_labels = result_list[0]
+            >>> assert len(result_list) == 1
+            >>> assert det_bboxes.shape[1] == 5
+            >>> assert len(det_bboxes) == len(det_labels) == cfg.max_per_img
+        """
+        assert len(cls_scores) == len(bbox_preds)
+        num_levels = len(cls_scores)
+
+        device = cls_scores[0].device
+        featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]
+        mlvl_anchors = self.anchor_generator.grid_anchors(featmap_sizes,
+                                                          device=device)
+
+        result_list = []
+        # bbox_preds and kps_preds are list of 3 tensor, each tensor is NCHW
+        # corresponding to a stage, C is 8 for bbox and 20 for kps
+        for img_id in range(len(img_metas)):
+            cls_score_list = [
+                cls_scores[i][img_id].detach() for i in range(num_levels)
+            ]
+            bbox_pred_list = [
+                bbox_preds[i][img_id].detach() for i in range(num_levels)
+            ]
+            if self.use_kps:
+                kps_pred_list = [
+                    kps_preds[i][img_id].detach() for i in range(num_levels)
+                ]
+            else:
+                kps_pred_list = [None for i in range(num_levels)]
+            img_shape = img_metas[img_id]['img_shape']
+            scale_factor = img_metas[img_id]['scale_factor']
+            if with_nms:
+                # some heads don't support with_nms argument
+                proposals = self._get_bboxes_single(cls_score_list,
+                                                    bbox_pred_list,
+                                                    kps_pred_list,
+                                                    mlvl_anchors, img_shape,
+                                                    scale_factor, cfg, rescale)
+            else:
+                proposals = self._get_bboxes_single(cls_score_list,
+                                                    bbox_pred_list,
+                                                    kps_pred_list,
+                                                    mlvl_anchors, img_shape,
+                                                    scale_factor, cfg, rescale,
+                                                    with_nms)
+            result_list.append(proposals)
+        return result_list
+
+    def _get_bboxes_single(self,
+                           cls_scores,
+                           bbox_preds,
+                           kps_preds,
+                           mlvl_anchors,
+                           img_shape,
+                           scale_factor,
+                           cfg,
+                           rescale=False,
+                           with_nms=True):
+        """Transform outputs for a single batch item into labeled boxes.
+
+        Args:
+            cls_scores (list[Tensor]): Box scores for a single scale level
+                has shape (num_classes, H, W).
+            bbox_preds (list[Tensor]): Box distribution logits for a single
+                scale level with shape (4*(n+1), H, W), n is max value of
+                integral set.
+            mlvl_anchors (list[Tensor]): Box reference for a single scale level
+                with shape (num_total_anchors, 4).
+            img_shape (tuple[int]): Shape of the input image,
+                (height, width, 3).
+            scale_factor (ndarray): Scale factor of the image arange as
+                (w_scale, h_scale, w_scale, h_scale).
+            cfg (mmcv.Config | None): Test / postprocessing configuration,
+                if None, test_cfg would be used.
+            rescale (bool): If True, return boxes in original image space.
+                Default: False.
+            with_nms (bool): If True, do nms before return boxes.
+                Default: True.
+
+        Returns:
+            tuple(Tensor):
+                det_bboxes (Tensor): Bbox predictions in shape (N, 5), where
+                    the first 4 columns are bounding box positions
+                    (tl_x, tl_y, br_x, br_y) and the 5-th column is a score
+                    between 0 and 1.
+                det_labels (Tensor): A (N,) tensor where each item is the
+                    predicted class label of the corresponding box.
+        """
+        cfg = self.test_cfg if cfg is None else cfg
+        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
+        mlvl_bboxes = []
+        mlvl_scores = []
+        mlvl_kps = []
+        for cls_score, bbox_pred, kps_pred, stride, anchors in zip(
+                cls_scores, bbox_preds, kps_preds,
+                self.anchor_generator.strides, mlvl_anchors):
+            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
+            assert stride[0] == stride[1]
+
+            scores = cls_score.permute(1, 2, 0).reshape(
+                -1, self.cls_out_channels).sigmoid()
+            bbox_pred = bbox_pred.permute(1, 2, 0)
+            if self.use_dfl:
+                bbox_pred = self.integral(bbox_pred) * stride[0]
+            else:
+                bbox_pred = bbox_pred.reshape((-1, 4)) * stride[0]
+            if kps_pred is not None:
+                kps_pred = kps_pred.permute(1, 2, 0)
+                if self.use_dfl:
+                    kps_pred = self.integral(kps_pred) * stride[0]
+                else:
+                    kps_pred = kps_pred.reshape((-1, self.NK * 2)) * stride[0]
+
+            nms_pre = cfg.get('nms_pre', -1)
+            if nms_pre > 0 and scores.shape[0] > nms_pre:
+                max_scores, _ = scores.max(dim=1)
+                _, topk_inds = max_scores.topk(nms_pre)
+                anchors = anchors[topk_inds, :]
+                bbox_pred = bbox_pred[topk_inds, :]
+                scores = scores[topk_inds, :]
+                if kps_pred is not None:
+                    kps_pred = kps_pred[topk_inds, :]
+
+            bboxes = distance2bbox(self.anchor_center(anchors),
+                                   bbox_pred,
+                                   max_shape=img_shape)
+            mlvl_bboxes.append(bboxes)
+            mlvl_scores.append(scores)
+            if kps_pred is not None:
+                kps = distance2kps(self.anchor_center(anchors), kps_pred)
+                mlvl_kps.append(kps)
+
+        mlvl_bboxes = torch.cat(mlvl_bboxes)
+        if mlvl_kps is not None:
+            mlvl_kps = torch.cat(mlvl_kps)
+        if rescale:
+            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
+            if mlvl_kps is not None:
+                scale_factor2 = torch.tensor(
+                    [scale_factor[0], scale_factor[1]] * self.NK)
+                mlvl_kps /= scale_factor2.to(mlvl_kps.device)
+
+        mlvl_scores = torch.cat(mlvl_scores)
+        # Add a dummy background class to the backend when using sigmoid
+        # remind that we set FG labels to [0, num_class-1] since mmdet v2.0
+        # BG cat_id: num_class
+        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
+        mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
+
+        if with_nms:
+            det_bboxes, det_labels, det_kps = multiclass_nms(
+                mlvl_bboxes,
+                mlvl_scores,
+                cfg.score_thr,
+                cfg.nms,
+                cfg.max_per_img,
+                multi_kps=mlvl_kps)
+            if det_kps is not None:
+                return det_bboxes, det_labels, det_kps
+            else:
+                return det_bboxes, det_labels
+        else:
+            if mlvl_kps is not None:
+                return mlvl_bboxes, mlvl_scores, mlvl_kps
+            else:
+                return mlvl_bboxes, mlvl_scores
+
+    def get_targets(self,
+                    anchor_list,
+                    valid_flag_list,
+                    gt_bboxes_list,
+                    gt_keypointss_list,
+                    img_metas,
+                    gt_bboxes_ignore_list=None,
+                    gt_labels_list=None,
+                    label_channels=1,
+                    unmap_outputs=True):
+        """Get targets for GFL head.
+
+        This method is almost the same as `AnchorHead.get_targets()`. Besides
+        returning the targets as the parent method does, it also returns the
+        anchors as the first element of the returned tuple.
+        """
+        num_imgs = len(img_metas)
+        assert len(anchor_list) == len(valid_flag_list) == num_imgs
+
+        # anchor number of multi levels
+        num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
+        num_level_anchors_list = [num_level_anchors] * num_imgs
+
+        # concat all level anchors and flags to a single tensor
+        for i in range(num_imgs):
+            assert len(anchor_list[i]) == len(valid_flag_list[i])
+            anchor_list[i] = torch.cat(anchor_list[i])
+            valid_flag_list[i] = torch.cat(valid_flag_list[i])
+
+        # compute targets for each image
+        if gt_bboxes_ignore_list is None:
+            gt_bboxes_ignore_list = [None for _ in range(num_imgs)]
+        if gt_labels_list is None:
+            gt_labels_list = [None for _ in range(num_imgs)]
+        if gt_keypointss_list is None:
+            gt_keypointss_list = [None for _ in range(num_imgs)]
+        (all_anchors, all_labels, all_label_weights, all_bbox_targets,
+         all_bbox_weights, all_keypoints_targets, all_keypoints_weights,
+         pos_inds_list,
+         neg_inds_list) = multi_apply(self._get_target_single,
+                                      anchor_list,
+                                      valid_flag_list,
+                                      num_level_anchors_list,
+                                      gt_bboxes_list,
+                                      gt_bboxes_ignore_list,
+                                      gt_labels_list,
+                                      gt_keypointss_list,
+                                      img_metas,
+                                      label_channels=label_channels,
+                                      unmap_outputs=unmap_outputs)
+        # no valid anchors
+        if any([labels is None for labels in all_labels]):
+            return None
+        # sampled anchors of all images
+        num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
+        num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
+        # split targets to a list w.r.t. multiple levels
+        anchors_list = images_to_levels(all_anchors, num_level_anchors)
+        labels_list = images_to_levels(all_labels, num_level_anchors)
+        label_weights_list = images_to_levels(all_label_weights,
+                                              num_level_anchors)
+        bbox_targets_list = images_to_levels(all_bbox_targets,
+                                             num_level_anchors)
+        bbox_weights_list = images_to_levels(all_bbox_weights,
+                                             num_level_anchors)
+        keypoints_targets_list = images_to_levels(all_keypoints_targets,
+                                                  num_level_anchors)
+        keypoints_weights_list = images_to_levels(all_keypoints_weights,
+                                                  num_level_anchors)
+        return (anchors_list, labels_list, label_weights_list,
+                bbox_targets_list, bbox_weights_list, keypoints_targets_list,
+                keypoints_weights_list, num_total_pos, num_total_neg)
+
+    def _get_target_single(self,
+                           flat_anchors,
+                           valid_flags,
+                           num_level_anchors,
+                           gt_bboxes,
+                           gt_bboxes_ignore,
+                           gt_labels,
+                           gt_keypointss,
+                           img_meta,
+                           label_channels=1,
+                           unmap_outputs=True):
+        """Compute regression, classification targets for anchors in a single
+        image.
+
+        Args:
+            flat_anchors (Tensor): Multi-level anchors of the image, which are
+                concatenated into a single tensor of shape (num_anchors, 4)
+            valid_flags (Tensor): Multi level valid flags of the image,
+                which are concatenated into a single tensor of
+                    shape (num_anchors,).
+            num_level_anchors Tensor): Number of anchors of each scale level.
+            gt_bboxes (Tensor): Ground truth bboxes of the image,
+                shape (num_gts, 4).
+            gt_bboxes_ignore (Tensor): Ground truth bboxes to be
+                ignored, shape (num_ignored_gts, 4).
+            gt_labels (Tensor): Ground truth labels of each box,
+                shape (num_gts,).
+            img_meta (dict): Meta info of the image.
+            label_channels (int): Channel of label.
+            unmap_outputs (bool): Whether to map outputs back to the original
+                set of anchors.
+
+        Returns:
+            tuple: N is the number of total anchors in the image.
+                anchors (Tensor): All anchors in the image with shape (N, 4).
+                labels (Tensor): Labels of all anchors in the image with shape
+                    (N,).
+                label_weights (Tensor): Label weights of all anchor in the
+                    image with shape (N,).
+                bbox_targets (Tensor): BBox targets of all anchors in the
+                    image with shape (N, 4).
+                bbox_weights (Tensor): BBox weights of all anchors in the
+                    image with shape (N, 4).
+                pos_inds (Tensor): Indices of postive anchor with shape
+                    (num_pos,).
+                neg_inds (Tensor): Indices of negative anchor with shape
+                    (num_neg,).
+        """
+        inside_flags = anchor_inside_flags(flat_anchors, valid_flags,
+                                           img_meta['img_shape'][:2],
+                                           self.train_cfg.allowed_border)
+        if not inside_flags.any():
+            return (None, ) * 7
+        # assign gt and sample anchors
+        anchors = flat_anchors[inside_flags, :]
+
+        num_level_anchors_inside = self.get_num_level_anchors_inside(
+            num_level_anchors, inside_flags)
+        if self.assigner.__class__.__name__ == 'ATSSAssigner':
+            assign_result = self.assigner.assign(anchors,
+                                                 num_level_anchors_inside,
+                                                 gt_bboxes, gt_bboxes_ignore,
+                                                 gt_labels)
+        else:
+            assign_result = self.assigner.assign(anchors, gt_bboxes,
+                                                 gt_bboxes_ignore, gt_labels)
+
+        sampling_result = self.sampler.sample(assign_result, anchors,
+                                              gt_bboxes)
+
+        num_valid_anchors = anchors.shape[0]
+        bbox_targets = torch.zeros_like(anchors)
+        bbox_weights = torch.zeros_like(anchors)
+        kps_targets = anchors.new_zeros(size=(anchors.shape[0], self.NK * 2))
+        kps_weights = anchors.new_zeros(size=(anchors.shape[0], self.NK * 2))
+        labels = anchors.new_full((num_valid_anchors, ),
+                                  self.num_classes,
+                                  dtype=torch.long)
+        label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
+
+        pos_inds = sampling_result.pos_inds
+        neg_inds = sampling_result.neg_inds
+        if len(pos_inds) > 0:
+            pos_bbox_targets = sampling_result.pos_gt_bboxes
+            bbox_targets[pos_inds, :] = pos_bbox_targets
+            bbox_weights[pos_inds, :] = 1.0
+            if self.use_kps:
+                pos_assigned_gt_inds = sampling_result.pos_assigned_gt_inds
+                kps_targets[pos_inds, :] = gt_keypointss[
+                    pos_assigned_gt_inds, :, :2].reshape((-1, self.NK * 2))
+                kps_weights[pos_inds, :] = torch.mean(
+                    gt_keypointss[pos_assigned_gt_inds, :, 2],
+                    dim=1,
+                    keepdims=True)
+            if gt_labels is None:
+                # Only rpn gives gt_labels as None
+                # Foreground is the first class
+                labels[pos_inds] = 0
+            else:
+                labels[pos_inds] = gt_labels[
+                    sampling_result.pos_assigned_gt_inds]
+            if self.train_cfg.pos_weight <= 0:
+                label_weights[pos_inds] = 1.0
+            else:
+                label_weights[pos_inds] = self.train_cfg.pos_weight
+        if len(neg_inds) > 0:
+            label_weights[neg_inds] = 1.0
+
+        # map up to original set of anchors
+        if unmap_outputs:
+            num_total_anchors = flat_anchors.size(0)
+            anchors = unmap(anchors, num_total_anchors, inside_flags)
+            labels = unmap(labels,
+                           num_total_anchors,
+                           inside_flags,
+                           fill=self.num_classes)
+            label_weights = unmap(label_weights, num_total_anchors,
+                                  inside_flags)
+            bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
+            bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)
+            if self.use_kps:
+                kps_targets = unmap(kps_targets, num_total_anchors,
+                                    inside_flags)
+                kps_weights = unmap(kps_weights, num_total_anchors,
+                                    inside_flags)
+
+        return (anchors, labels, label_weights, bbox_targets, bbox_weights,
+                kps_targets, kps_weights, pos_inds, neg_inds)
+
+    def get_num_level_anchors_inside(self, num_level_anchors, inside_flags):
+        split_inside_flags = torch.split(inside_flags, num_level_anchors)
+        num_level_anchors_inside = [
+            int(flags.sum()) for flags in split_inside_flags
+        ]
+        return num_level_anchors_inside
+
+    def aug_test(self, feats, img_metas, rescale=False):
+        """Test function with test time augmentation.
+
+        Args:
+            feats (list[Tensor]): the outer list indicates test-time
+                augmentations and inner Tensor should have a shape NxCxHxW,
+                which contains features for all images in the batch.
+            img_metas (list[list[dict]]): the outer list indicates test-time
+                augs (multiscale, flip, etc.) and the inner list indicates
+                images in a batch. each dict has image information.
+            rescale (bool, optional): Whether to rescale the results.
+                Defaults to False.
+
+        Returns:
+            list[ndarray]: bbox results of each class
+        """
+        return self.aug_test_bboxes(feats, img_metas, rescale=rescale)
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/__init__.py
new file mode 100755
index 0000000..a9ee67d
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/__init__.py
@@ -0,0 +1,8 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/detectors
+"""
+from .scrfd import SCRFD
+from .tinymog import TinyMog
+
+__all__ = ['SCRFD', 'TinyMog']
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/scrfd.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/scrfd.py
new file mode 100755
index 0000000..f240980
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/scrfd.py
@@ -0,0 +1,149 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/detectors/scrfd.py
+"""
+import torch
+from mmdet.models.builder import DETECTORS
+from mmdet.models.detectors.single_stage import SingleStageDetector
+
+from ....mmdet_patch.core.bbox import bbox2result
+
+
+@DETECTORS.register_module()
+class SCRFD(SingleStageDetector):
+    def __init__(self,
+                 backbone,
+                 neck,
+                 bbox_head,
+                 train_cfg=None,
+                 test_cfg=None,
+                 pretrained=None):
+        super(SCRFD, self).__init__(backbone, neck, bbox_head, train_cfg,
+                                    test_cfg, pretrained)
+
+    def forward_train(self,
+                      img,
+                      img_metas,
+                      gt_bboxes,
+                      gt_labels,
+                      gt_keypointss=None,
+                      gt_bboxes_ignore=None):
+        """
+        Args:
+            img (Tensor): Input images of shape (N, C, H, W).
+                Typically these should be mean centered and std scaled.
+            img_metas (list[dict]): A List of image info dict where each dict
+                has: 'img_shape', 'scale_factor', 'flip', and may also contain
+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+                For details on the values of these keys see
+                :class:`mmdet.datasets.pipelines.Collect`.
+            gt_bboxes (list[Tensor]): Each item are the truth boxes for each
+                image in [tl_x, tl_y, br_x, br_y] format.
+            gt_labels (list[Tensor]): Class indices corresponding to each box
+            gt_bboxes_ignore (None | list[Tensor]): Specify which bounding
+                boxes can be ignored when computing the loss.
+
+        Returns:
+            dict[str, Tensor]: A dictionary of loss components.
+        """
+        super(SingleStageDetector, self).forward_train(img, img_metas)
+        x = self.extract_feat(img)
+        losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes,
+                                              gt_labels, gt_keypointss,
+                                              gt_bboxes_ignore)
+        return losses
+
+    def simple_test(self,
+                    img,
+                    img_metas,
+                    rescale=False,
+                    repeat_head=1,
+                    output_kps_var=0,
+                    output_results=1):
+        """Test function without test time augmentation.
+
+        Args:
+            imgs (list[torch.Tensor]): List of multiple images
+            img_metas (list[dict]): List of image information.
+            rescale (bool, optional): Whether to rescale the results.
+                Defaults to False.
+            repeat_head (int): repeat inference times in head
+            output_kps_var (int): whether output kps var to calculate quality
+            output_results (int): 0: nothing  1: bbox  2: both bbox and kps
+
+        Returns:
+            list[list[np.ndarray]]: BBox results of each image and classes.
+                The outer list corresponds to each image. The inner list
+                corresponds to each class.
+        """
+        x = self.extract_feat(img)
+        assert repeat_head >= 1
+        kps_out0 = []
+        kps_out1 = []
+        kps_out2 = []
+        for i in range(repeat_head):
+            outs = self.bbox_head(x)
+            kps_out0 += [outs[2][0].detach().cpu().numpy()]
+            kps_out1 += [outs[2][1].detach().cpu().numpy()]
+            kps_out2 += [outs[2][2].detach().cpu().numpy()]
+        if output_kps_var:
+            var0 = np.var(np.vstack(kps_out0), axis=0).mean()
+            var1 = np.var(np.vstack(kps_out1), axis=0).mean()
+            var2 = np.var(np.vstack(kps_out2), axis=0).mean()
+            var = np.mean([var0, var1, var2])
+        else:
+            var = None
+
+        if output_results > 0:
+            if torch.onnx.is_in_onnx_export():
+                print('single_stage.py in-onnx-export')
+                print(outs.__class__)
+                cls_score, bbox_pred, kps_pred = outs
+                for c in cls_score:
+                    print(c.shape)
+                for c in bbox_pred:
+                    print(c.shape)
+                if self.bbox_head.use_kps:
+                    for c in kps_pred:
+                        print(c.shape)
+                    return (cls_score, bbox_pred, kps_pred)
+                else:
+                    return (cls_score, bbox_pred)
+            bbox_list = self.bbox_head.get_bboxes(*outs,
+                                                  img_metas,
+                                                  rescale=rescale)
+
+            # return kps if use_kps
+            if len(bbox_list[0]) == 2:
+                bbox_results = [
+                    bbox2result(det_bboxes, det_labels,
+                                self.bbox_head.num_classes)
+                    for det_bboxes, det_labels in bbox_list
+                ]
+            elif len(bbox_list[0]) == 3:
+                if output_results == 2:
+                    bbox_results = [
+                        bbox2result(det_bboxes,
+                                    det_labels,
+                                    self.bbox_head.num_classes,
+                                    kps=det_kps,
+                                    num_kps=self.bbox_head.NK)
+                        for det_bboxes, det_labels, det_kps in bbox_list
+                    ]
+                elif output_results == 1:
+                    bbox_results = [
+                        bbox2result(det_bboxes, det_labels,
+                                    self.bbox_head.num_classes)
+                        for det_bboxes, det_labels, _ in bbox_list
+                    ]
+        else:
+            bbox_results = None
+        if var is not None:
+            return bbox_results, var
+        else:
+            return bbox_results
+
+    def feature_test(self, img):
+        x = self.extract_feat(img)
+        outs = self.bbox_head(x)
+        return outs
diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/tinymog.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/tinymog.py
new file mode 100755
index 0000000..77017f2
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/tinymog.py
@@ -0,0 +1,147 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/detectors/scrfd.py
+"""
+import torch
+from mmdet.models.builder import DETECTORS
+from mmdet.models.detectors.single_stage import SingleStageDetector
+
+from ....mmdet_patch.core.bbox import bbox2result
+
+
+@DETECTORS.register_module()
+class TinyMog(SingleStageDetector):
+    def __init__(self,
+                 backbone,
+                 neck,
+                 bbox_head,
+                 train_cfg=None,
+                 test_cfg=None,
+                 pretrained=None):
+        super(TinyMog, self).__init__(backbone, neck, bbox_head, train_cfg,
+                                      test_cfg, pretrained)
+
+    def forward_train(self,
+                      img,
+                      img_metas,
+                      gt_bboxes,
+                      gt_labels,
+                      gt_keypointss=None,
+                      gt_bboxes_ignore=None):
+        """
+        Args:
+            img (Tensor): Input images of shape (N, C, H, W).
+                Typically these should be mean centered and std scaled.
+            img_metas (list[dict]): A List of image info dict where each dict
+                has: 'img_shape', 'scale_factor', 'flip', and may also contain
+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+                For details on the values of these keys see
+                :class:`mmdet.datasets.pipelines.Collect`.
+            gt_bboxes (list[Tensor]): Each item are the truth boxes for each
+                image in [tl_x, tl_y, br_x, br_y] format.
+            gt_labels (list[Tensor]): Class indices corresponding to each box
+            gt_bboxes_ignore (None | list[Tensor]): Specify which bounding
+                boxes can be ignored when computing the loss.
+
+        Returns:
+            dict[str, Tensor]: A dictionary of loss components.
+        """
+        super(SingleStageDetector, self).forward_train(img, img_metas)
+        x = self.extract_feat(img)
+        losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes,
+                                              gt_labels, gt_keypointss,
+                                              gt_bboxes_ignore)
+        return losses
+
+    def simple_test(self,
+                    img,
+                    img_metas,
+                    rescale=False,
+                    repeat_head=1,
+                    output_kps_var=0,
+                    output_results=1):
+        """Test function without test time augmentation.
+
+        Args:
+            imgs (list[torch.Tensor]): List of multiple images
+            img_metas (list[dict]): List of image information.
+            rescale (bool, optional): Whether to rescale the results.
+                Defaults to False.
+            repeat_head (int): repeat inference times in head
+            output_kps_var (int): whether output kps var to calculate quality
+            output_results (int): 0: nothing  1: bbox  2: both bbox and kps
+
+        Returns:
+            list[list[np.ndarray]]: BBox results of each image and classes.
+                The outer list corresponds to each image. The inner list
+                corresponds to each class.
+        """
+        x = self.extract_feat(img)
+        assert repeat_head >= 1
+        kps_out0 = []
+        kps_out1 = []
+        kps_out2 = []
+        for i in range(repeat_head):
+            outs = self.bbox_head(x)
+            kps_out0 += [outs[2][0].detach().cpu().numpy()]
+            kps_out1 += [outs[2][1].detach().cpu().numpy()]
+            kps_out2 += [outs[2][2].detach().cpu().numpy()]
+        if output_kps_var:
+            var0 = np.var(np.vstack(kps_out0), axis=0).mean()
+            var1 = np.var(np.vstack(kps_out1), axis=0).mean()
+            var2 = np.var(np.vstack(kps_out2), axis=0).mean()
+            var = np.mean([var0, var1, var2])
+        else:
+            var = None
+
+        if output_results > 0:
+            if torch.onnx.is_in_onnx_export():
+                cls_score, bbox_pred, kps_pred = outs
+                for c in cls_score:
+                    print(c.shape)
+                for c in bbox_pred:
+                    print(c.shape)
+                if self.bbox_head.use_kps:
+                    for c in kps_pred:
+                        print(c.shape)
+                    return (cls_score, bbox_pred, kps_pred)
+                else:
+                    return (cls_score, bbox_pred)
+            bbox_list = self.bbox_head.get_bboxes(*outs,
+                                                  img_metas,
+                                                  rescale=rescale)
+
+            # return kps if use_kps
+            if len(bbox_list[0]) == 2:
+                bbox_results = [
+                    bbox2result(det_bboxes, det_labels,
+                                self.bbox_head.num_classes)
+                    for det_bboxes, det_labels in bbox_list
+                ]
+            elif len(bbox_list[0]) == 3:
+                if output_results == 2:
+                    bbox_results = [
+                        bbox2result(det_bboxes,
+                                    det_labels,
+                                    self.bbox_head.num_classes,
+                                    kps=det_kps,
+                                    num_kps=self.bbox_head.NK)
+                        for det_bboxes, det_labels, det_kps in bbox_list
+                    ]
+                elif output_results == 1:
+                    bbox_results = [
+                        bbox2result(det_bboxes, det_labels,
+                                    self.bbox_head.num_classes)
+                        for det_bboxes, det_labels, _ in bbox_list
+                    ]
+        else:
+            bbox_results = None
+        if var is not None:
+            return bbox_results, var
+        else:
+            return bbox_results
+
+    def feature_test(self, img):
+        x = self.extract_feat(img)
+        outs = self.bbox_head(x)
+        return outs
diff --git a/modelscope/models/cv/face_detection/scrfd/preprocessor.py b/modelscope/models/cv/face_detection/scrfd/preprocessor.py
new file mode 100644
index 0000000..c2917a1
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/preprocessor.py
@@ -0,0 +1,90 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from typing import Any, Dict, Union
+
+import numpy as np
+from PIL import Image
+
+from modelscope.metainfo import Preprocessors
+from modelscope.preprocessors import Preprocessor
+from modelscope.preprocessors.builder import PREPROCESSORS
+from modelscope.preprocessors.image import LoadImage
+from modelscope.utils.constant import Fields, ModeKeys
+
+
+@PREPROCESSORS.register_module(Fields.cv,
+                               module_name=Preprocessors.object_detection_scrfd
+                               )
+class SCRFDPreprocessor(Preprocessor):
+    def __init__(self, model_dir: str = None, mode: str = ModeKeys.INFERENCE):
+        """The base constructor for all the fill-mask preprocessors.
+
+        Args:
+            model_dir (str): model directory to initialize some resource
+            mode: The mode for the preprocessor.
+        """
+        super().__init__(mode)
+        pre_pipeline = [
+            dict(type='MultiScaleFlipAug',
+                 img_scale=(640, 640),
+                 flip=False,
+                 transforms=[
+                     dict(type='Resize', keep_ratio=True),
+                     dict(type='RandomFlip', flip_ratio=0.0),
+                     dict(type='Normalize',
+                          mean=[127.5, 127.5, 127.5],
+                          std=[128.0, 128.0, 128.0],
+                          to_rgb=False),
+                     dict(type='Pad', size=(640, 640), pad_val=0),
+                     dict(type='ImageToTensor', keys=['img']),
+                     dict(type='Collect', keys=['img'])
+                 ])
+        ]
+        from mmdet.datasets.pipelines import Compose
+        self.pipeline = Compose(pre_pipeline)
+
+    def __call__(self, data: Union[str, Dict], **kwargs) -> Dict[str, Any]:
+        """process the raw input data
+        Args:
+            data (str or dict):  image path or data dict containing following info:
+                filename, ori_filename, img, img_shape, ori_shape, img_fields
+                Example:
+                    >>> {
+                    >>>     "filename": "xxx.jpg"
+                    >>>     "ori_filename": "xxx.jpg",
+                    >>>     "img": np.ndarray,
+                    >>>     "img_shape": (300, 300, 3)
+                    >>>     "ori_shape": (300, 300, 3)
+                    >>>     "img_fields": "img"
+                    >>> }
+
+        Returns:
+            Dict[str, Any]: the preprocessed data
+        """
+        if isinstance(data, str):
+            img = LoadImage.convert_to_ndarray(data)
+            img = img.astype(np.float32)
+            data_dict = {}
+            data_dict['filename'] = ''
+            data_dict['ori_filename'] = ''
+            data_dict['img'] = img
+            data_dict['img_shape'] = img.shape
+            data_dict['ori_shape'] = img.shape
+            data_dict['img_fields'] = ['img']
+        elif isinstance(data, (np.ndarray, Image.Image)):
+            if isinstance(data, Image.Image):
+                data = LoadImage.convert_to_ndarray(data)
+
+            data = data.astype(np.float32)
+            data_dict = {}
+            data_dict['filename'] = ''
+            data_dict['ori_filename'] = ''
+            data_dict['img'] = data
+            data_dict['img_shape'] = data.shape
+            data_dict['ori_shape'] = data.shape
+            data_dict['img_fields'] = ['img']
+
+        elif isinstance(data, dict):
+            data_dict = data
+
+        return self.pipeline(data_dict)
diff --git a/modelscope/models/cv/face_detection/scrfd/scrfd_detect.py b/modelscope/models/cv/face_detection/scrfd/scrfd_detect.py
new file mode 100644
index 0000000..eff3149
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/scrfd_detect.py
@@ -0,0 +1,94 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from copy import deepcopy
+from typing import Any, Dict, List, Union
+
+import numpy as np
+import torch
+
+from modelscope.metainfo import Models
+from modelscope.models.base import TorchModel
+from modelscope.models.builder import MODELS
+from modelscope.outputs import OutputKeys
+from modelscope.outputs.cv_outputs import DetectionOutput
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+__all__ = ['ScrfdDetect']
+
+
+@MODELS.register_module(Tasks.face_detection, module_name=Models.scrfd)
+@MODELS.register_module(Tasks.card_detection, module_name=Models.scrfd)
+class ScrfdDetect(TorchModel):
+    def __init__(self, model_dir: str, *args, **kwargs):
+        """initialize the face detection model from the `model_dir` path.
+
+        Args:
+            model_dir (str): the model path.
+        """
+        super().__init__(model_dir, *args, **kwargs)
+        from mmcv import Config
+        from mmcv.parallel import MMDataParallel
+        from mmcv.runner import load_checkpoint
+        from mmdet.models import build_detector
+        from modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets import RetinaFaceDataset
+        from modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines import RandomSquareCrop
+        from modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones import ResNetV1e
+        from modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.dense_heads import SCRFDHead
+        from modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors import SCRFD
+        cfg_file = kwargs.get('config_file', 'mmcv_scrfd.py')
+        cfg = Config.fromfile(osp.join(model_dir, cfg_file))
+        model_file = kwargs.get('model_file', ModelFile.TORCH_MODEL_BIN_FILE)
+        ckpt_path = osp.join(model_dir, model_file)
+        cfg.model.test_cfg.score_thr = kwargs.get('score_thr', 0.3)
+        detector = build_detector(cfg.model)
+        logger.info(f'loading model from {ckpt_path}')
+        load_checkpoint(detector, ckpt_path, map_location='cpu')
+        detector = MMDataParallel(detector, device_ids=[0])
+        detector.eval()
+        self.detector = detector
+        logger.info('load model done')
+
+    def forward(
+        self, img: Union[torch.Tensor, List[torch.Tensor]],
+        img_metas: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]
+    ) -> DetectionOutput:
+        """
+        Args:
+            img (`torch.Tensor` or `List[torch.Tensor]`): batched image tensor or list of
+                batched image tensor, shape of each tensor is [N, h, w, 3]. When input is
+                a list, each element is a different augmentation image to do multi-view
+                augmentation test.
+            img_metas (`List[List[Dict[str, Any]]]`): image meta info.
+
+        Return:
+            `:obj:DetectionOutput`
+        """
+        if isinstance(img, torch.Tensor):
+            img = [img]
+            img_metas = [img_metas]
+
+        result = self.detector(return_loss=False,
+                               rescale=True,
+                               img=img,
+                               img_metas=img_metas,
+                               output_results=2)
+        assert result is not None
+        result = result[0][0]
+        bboxes = result[:, :4]
+        kpss = result[:, 5:]
+        scores = result[:, 4]
+        return DetectionOutput(scores=scores, boxes=bboxes, keypoints=kpss)
+
+    def postprocess(self, detection_out: DetectionOutput,
+                    **kwargs) -> Dict[str, Any]:
+        scores = detection_out['scores'].tolist()
+        boxes = detection_out['boxes'].tolist()
+        kpss = detection_out['keypoints'].tolist()
+        return {
+            OutputKeys.SCORES: scores,
+            OutputKeys.BOXES: boxes,
+            OutputKeys.KEYPOINTS: kpss
+        }
diff --git a/modelscope/models/cv/face_detection/scrfd/tinymog_detect.py b/modelscope/models/cv/face_detection/scrfd/tinymog_detect.py
new file mode 100644
index 0000000..f95d34a
--- /dev/null
+++ b/modelscope/models/cv/face_detection/scrfd/tinymog_detect.py
@@ -0,0 +1,31 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from copy import deepcopy
+from typing import Any, Dict
+
+import torch
+
+from modelscope.metainfo import Models
+from modelscope.models.base import TorchModel
+from modelscope.models.builder import MODELS
+from modelscope.outputs import OutputKeys
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+from .scrfd_detect import ScrfdDetect
+
+logger = get_logger()
+
+__all__ = ['TinyMogDetect']
+
+
+@MODELS.register_module(Tasks.face_detection, module_name=Models.tinymog)
+class TinyMogDetect(ScrfdDetect):
+    def __init__(self, model_dir, *args, **kwargs):
+        """
+        initialize the tinymog face detection model from the `model_dir` path.
+        """
+        config_file = 'mmcv_tinymog.py'
+        kwargs['config_file'] = config_file
+        kwargs['model_file'] = ModelFile.TORCH_MODEL_FILE
+        super().__init__(model_dir, **kwargs)
diff --git a/modelscope/models/cv/face_detection/ulfd_slim/__init__.py b/modelscope/models/cv/face_detection/ulfd_slim/__init__.py
new file mode 100644
index 0000000..af1e7b4
--- /dev/null
+++ b/modelscope/models/cv/face_detection/ulfd_slim/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .detection import UlfdFaceDetector
diff --git a/modelscope/models/cv/face_detection/ulfd_slim/detection.py b/modelscope/models/cv/face_detection/ulfd_slim/detection.py
new file mode 100755
index 0000000..a619eea
--- /dev/null
+++ b/modelscope/models/cv/face_detection/ulfd_slim/detection.py
@@ -0,0 +1,44 @@
+# The implementation is based on ULFD, available at
+# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
+import os
+
+import cv2
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+import torch.nn.functional as F
+
+from modelscope.metainfo import Models
+from modelscope.models.base import Tensor, TorchModel
+from modelscope.models.builder import MODELS
+from modelscope.utils.constant import ModelFile, Tasks
+
+from .vision.ssd.fd_config import define_img_size
+from .vision.ssd.mb_tiny_fd import (create_mb_tiny_fd,
+                                    create_mb_tiny_fd_predictor)
+
+define_img_size(640)
+
+
+@MODELS.register_module(Tasks.face_detection, module_name=Models.ulfd)
+class UlfdFaceDetector(TorchModel):
+    def __init__(self, model_path, device='cuda'):
+        super().__init__(model_path)
+        cudnn.benchmark = True
+        self.model_path = model_path
+        self.device = device
+        self.net = create_mb_tiny_fd(2, is_test=True, device=device)
+        self.predictor = create_mb_tiny_fd_predictor(self.net,
+                                                     candidate_size=1500,
+                                                     device=device)
+        self.net.load(model_path)
+        self.net = self.net.to(device)
+
+    def forward(self, input):
+        img_raw = input['img']
+        img = np.array(img_raw.cpu().detach())
+        img = img[:, :, ::-1]
+        prob_th = 0.85
+        keep_top_k = 750
+        boxes, labels, probs = self.predictor.predict(img, keep_top_k, prob_th)
+        return boxes, probs
diff --git a/modelscope/models/cv/face_detection/ulfd_slim/vision/__init__.py b/modelscope/models/cv/face_detection/ulfd_slim/vision/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/models/cv/face_detection/ulfd_slim/vision/box_utils.py b/modelscope/models/cv/face_detection/ulfd_slim/vision/box_utils.py
new file mode 100644
index 0000000..7f095f3
--- /dev/null
+++ b/modelscope/models/cv/face_detection/ulfd_slim/vision/box_utils.py
@@ -0,0 +1,126 @@
+# The implementation is based on ULFD, available at
+# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
+import math
+
+import torch
+
+
+def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
+    """
+
+    Args:
+        box_scores (N, 5): boxes in corner-form and probabilities.
+        iou_threshold: intersection over union threshold.
+        top_k: keep top_k results. If k <= 0, keep all the results.
+        candidate_size: only consider the candidates with the highest scores.
+    Returns:
+         picked: a list of indexes of the kept boxes
+    """
+    scores = box_scores[:, -1]
+    boxes = box_scores[:, :-1]
+    picked = []
+    _, indexes = scores.sort(descending=True)
+    indexes = indexes[:candidate_size]
+    while len(indexes) > 0:
+        current = indexes[0]
+        picked.append(current.item())
+        if 0 < top_k == len(picked) or len(indexes) == 1:
+            break
+        current_box = boxes[current, :]
+        indexes = indexes[1:]
+        rest_boxes = boxes[indexes, :]
+        iou = iou_of(
+            rest_boxes,
+            current_box.unsqueeze(0),
+        )
+        indexes = indexes[iou <= iou_threshold]
+
+    return box_scores[picked, :]
+
+
+def nms(box_scores,
+        nms_method=None,
+        score_threshold=None,
+        iou_threshold=None,
+        sigma=0.5,
+        top_k=-1,
+        candidate_size=200):
+    return hard_nms(box_scores,
+                    iou_threshold,
+                    top_k,
+                    candidate_size=candidate_size)
+
+
+def generate_priors(feature_map_list,
+                    shrinkage_list,
+                    image_size,
+                    min_boxes,
+                    clamp=True) -> torch.Tensor:
+    priors = []
+    for index in range(0, len(feature_map_list[0])):
+        scale_w = image_size[0] / shrinkage_list[0][index]
+        scale_h = image_size[1] / shrinkage_list[1][index]
+        for j in range(0, feature_map_list[1][index]):
+            for i in range(0, feature_map_list[0][index]):
+                x_center = (i + 0.5) / scale_w
+                y_center = (j + 0.5) / scale_h
+
+                for min_box in min_boxes[index]:
+                    w = min_box / image_size[0]
+                    h = min_box / image_size[1]
+                    priors.append([x_center, y_center, w, h])
+    priors = torch.tensor(priors)
+    if clamp:
+        torch.clamp(priors, 0.0, 1.0, out=priors)
+    return priors
+
+
+def convert_locations_to_boxes(locations, priors, center_variance,
+                               size_variance):
+    # priors can have one dimension less.
+    if priors.dim() + 1 == locations.dim():
+        priors = priors.unsqueeze(0)
+    a = locations[..., :2] * center_variance * priors[...,
+                                                      2:] + priors[..., :2]
+    b = torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
+
+    return torch.cat([a, b], dim=locations.dim() - 1)
+
+
+def center_form_to_corner_form(locations):
+    a = locations[..., :2] - locations[..., 2:] / 2
+    b = locations[..., :2] + locations[..., 2:] / 2
+    return torch.cat([a, b], locations.dim() - 1)
+
+
+def iou_of(boxes0, boxes1, eps=1e-5):
+    """Return intersection-over-union (Jaccard index) of boxes.
+
+    Args:
+        boxes0 (N, 4): ground truth boxes.
+        boxes1 (N or 1, 4): predicted boxes.
+        eps: a small number to avoid 0 as denominator.
+    Returns:
+        iou (N): IoU values.
+    """
+    overlap_left_top = torch.max(boxes0[..., :2], boxes1[..., :2])
+    overlap_right_bottom = torch.min(boxes0[..., 2:], boxes1[..., 2:])
+
+    overlap_area = area_of(overlap_left_top, overlap_right_bottom)
+    area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
+    area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
+    return overlap_area / (area0 + area1 - overlap_area + eps)
+
+
+def area_of(left_top, right_bottom) -> torch.Tensor:
+    """Compute the areas of rectangles given two corners.
+
+    Args:
+        left_top (N, 2): left top corner.
+        right_bottom (N, 2): right bottom corner.
+
+    Returns:
+        area (N): return the area.
+    """
+    hw = torch.clamp(right_bottom - left_top, min=0.0)
+    return hw[..., 0] * hw[..., 1]
diff --git a/modelscope/models/cv/face_detection/ulfd_slim/vision/mb_tiny.py b/modelscope/models/cv/face_detection/ulfd_slim/vision/mb_tiny.py
new file mode 100644
index 0000000..3e7604e
--- /dev/null
+++ b/modelscope/models/cv/face_detection/ulfd_slim/vision/mb_tiny.py
@@ -0,0 +1,47 @@
+# The implementation is based on ULFD, available at
+# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Mb_Tiny(nn.Module):
+    def __init__(self, num_classes=2):
+        super(Mb_Tiny, self).__init__()
+        self.base_channel = 8 * 2
+
+        def conv_bn(inp, oup, stride):
+            return nn.Sequential(nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+                                 nn.BatchNorm2d(oup), nn.ReLU(inplace=True))
+
+        def conv_dw(inp, oup, stride):
+            return nn.Sequential(
+                nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
+                nn.BatchNorm2d(inp),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup),
+                nn.ReLU(inplace=True),
+            )
+
+        self.model = nn.Sequential(
+            conv_bn(3, self.base_channel, 2),  # 160*120
+            conv_dw(self.base_channel, self.base_channel * 2, 1),
+            conv_dw(self.base_channel * 2, self.base_channel * 2, 2),  # 80*60
+            conv_dw(self.base_channel * 2, self.base_channel * 2, 1),
+            conv_dw(self.base_channel * 2, self.base_channel * 4, 2),  # 40*30
+            conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
+            conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
+            conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
+            conv_dw(self.base_channel * 4, self.base_channel * 8, 2),  # 20*15
+            conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
+            conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
+            conv_dw(self.base_channel * 8, self.base_channel * 16, 2),  # 10*8
+            conv_dw(self.base_channel * 16, self.base_channel * 16, 1))
+        self.fc = nn.Linear(1024, num_classes)
+
+    def forward(self, x):
+        x = self.model(x)
+        x = F.avg_pool2d(x, 7)
+        x = x.view(-1, 1024)
+        x = self.fc(x)
+        return x
diff --git a/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/__init__.py b/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/data_preprocessing.py b/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/data_preprocessing.py
new file mode 100644
index 0000000..04b8297
--- /dev/null
+++ b/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/data_preprocessing.py
@@ -0,0 +1,17 @@
+# The implementation is based on ULFD, available at
+# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
+from ..transforms import Compose, Resize, SubtractMeans, ToTensor
+
+
+class PredictionTransform:
+    def __init__(self, size, mean=0.0, std=1.0):
+        self.transform = Compose([
+            Resize(size),
+            SubtractMeans(mean), lambda img, boxes=None, labels=None:
+            (img / std, boxes, labels),
+            ToTensor()
+        ])
+
+    def __call__(self, image):
+        image, _, _ = self.transform(image)
+        return image
diff --git a/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/fd_config.py b/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/fd_config.py
new file mode 100644
index 0000000..495a2fc
--- /dev/null
+++ b/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/fd_config.py
@@ -0,0 +1,49 @@
+# The implementation is based on ULFD, available at
+# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
+import numpy as np
+
+from ..box_utils import generate_priors
+
+image_mean_test = image_mean = np.array([127, 127, 127])
+image_std = 128.0
+iou_threshold = 0.3
+center_variance = 0.1
+size_variance = 0.2
+
+min_boxes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
+shrinkage_list = []
+image_size = [320, 240]  # default input size 320*240
+feature_map_w_h_list = [[40, 20, 10, 5], [30, 15, 8,
+                                          4]]  # default feature map size
+priors = []
+
+
+def define_img_size(size):
+    global image_size, feature_map_w_h_list, priors
+    img_size_dict = {
+        128: [128, 96],
+        160: [160, 120],
+        320: [320, 240],
+        480: [480, 360],
+        640: [640, 480],
+        1280: [1280, 960]
+    }
+    image_size = img_size_dict[size]
+
+    feature_map_w_h_list_dict = {
+        128: [[16, 8, 4, 2], [12, 6, 3, 2]],
+        160: [[20, 10, 5, 3], [15, 8, 4, 2]],
+        320: [[40, 20, 10, 5], [30, 15, 8, 4]],
+        480: [[60, 30, 15, 8], [45, 23, 12, 6]],
+        640: [[80, 40, 20, 10], [60, 30, 15, 8]],
+        1280: [[160, 80, 40, 20], [120, 60, 30, 15]]
+    }
+    feature_map_w_h_list = feature_map_w_h_list_dict[size]
+
+    for i in range(0, len(image_size)):
+        item_list = []
+        for k in range(0, len(feature_map_w_h_list[i])):
+            item_list.append(image_size[i] / feature_map_w_h_list[i][k])
+        shrinkage_list.append(item_list)
+    priors = generate_priors(feature_map_w_h_list, shrinkage_list, image_size,
+                             min_boxes)
diff --git a/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/mb_tiny_fd.py b/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/mb_tiny_fd.py
new file mode 100644
index 0000000..c0c43ff
--- /dev/null
+++ b/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/mb_tiny_fd.py
@@ -0,0 +1,112 @@
+# The implementation is based on ULFD, available at
+# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
+from torch.nn import Conv2d, ModuleList, ReLU, Sequential
+
+from ..mb_tiny import Mb_Tiny
+from . import fd_config as config
+from .predictor import Predictor
+from .ssd import SSD
+
+
+def SeperableConv2d(in_channels,
+                    out_channels,
+                    kernel_size=1,
+                    stride=1,
+                    padding=0):
+    """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d.
+    """
+    return Sequential(
+        Conv2d(in_channels=in_channels,
+               out_channels=in_channels,
+               kernel_size=kernel_size,
+               groups=in_channels,
+               stride=stride,
+               padding=padding),
+        ReLU(),
+        Conv2d(in_channels=in_channels,
+               out_channels=out_channels,
+               kernel_size=1),
+    )
+
+
+def create_mb_tiny_fd(num_classes, is_test=False, device='cuda'):
+    base_net = Mb_Tiny(2)
+    base_net_model = base_net.model  # disable dropout layer
+
+    source_layer_indexes = [8, 11, 13]
+    extras = ModuleList([
+        Sequential(
+            Conv2d(in_channels=base_net.base_channel * 16,
+                   out_channels=base_net.base_channel * 4,
+                   kernel_size=1), ReLU(),
+            SeperableConv2d(in_channels=base_net.base_channel * 4,
+                            out_channels=base_net.base_channel * 16,
+                            kernel_size=3,
+                            stride=2,
+                            padding=1), ReLU())
+    ])
+
+    regression_headers = ModuleList([
+        SeperableConv2d(in_channels=base_net.base_channel * 4,
+                        out_channels=3 * 4,
+                        kernel_size=3,
+                        padding=1),
+        SeperableConv2d(in_channels=base_net.base_channel * 8,
+                        out_channels=2 * 4,
+                        kernel_size=3,
+                        padding=1),
+        SeperableConv2d(in_channels=base_net.base_channel * 16,
+                        out_channels=2 * 4,
+                        kernel_size=3,
+                        padding=1),
+        Conv2d(in_channels=base_net.base_channel * 16,
+               out_channels=3 * 4,
+               kernel_size=3,
+               padding=1)
+    ])
+
+    classification_headers = ModuleList([
+        SeperableConv2d(in_channels=base_net.base_channel * 4,
+                        out_channels=3 * num_classes,
+                        kernel_size=3,
+                        padding=1),
+        SeperableConv2d(in_channels=base_net.base_channel * 8,
+                        out_channels=2 * num_classes,
+                        kernel_size=3,
+                        padding=1),
+        SeperableConv2d(in_channels=base_net.base_channel * 16,
+                        out_channels=2 * num_classes,
+                        kernel_size=3,
+                        padding=1),
+        Conv2d(in_channels=base_net.base_channel * 16,
+               out_channels=3 * num_classes,
+               kernel_size=3,
+               padding=1)
+    ])
+
+    return SSD(num_classes,
+               base_net_model,
+               source_layer_indexes,
+               extras,
+               classification_headers,
+               regression_headers,
+               is_test=is_test,
+               config=config,
+               device=device)
+
+
+def create_mb_tiny_fd_predictor(net,
+                                candidate_size=200,
+                                nms_method=None,
+                                sigma=0.5,
+                                device=None):
+    predictor = Predictor(net,
+                          config.image_size,
+                          config.image_mean_test,
+                          config.image_std,
+                          nms_method=nms_method,
+                          iou_threshold=config.iou_threshold,
+                          candidate_size=candidate_size,
+                          sigma=sigma,
+                          device=device)
+    return predictor
diff --git a/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/predictor.py b/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/predictor.py
new file mode 100644
index 0000000..c1e6757
--- /dev/null
+++ b/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/predictor.py
@@ -0,0 +1,77 @@
+# The implementation is based on ULFD, available at
+# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
+import torch
+
+from .. import box_utils
+from .data_preprocessing import PredictionTransform
+
+
+class Predictor:
+    def __init__(self,
+                 net,
+                 size,
+                 mean=0.0,
+                 std=1.0,
+                 nms_method=None,
+                 iou_threshold=0.3,
+                 filter_threshold=0.85,
+                 candidate_size=200,
+                 sigma=0.5,
+                 device=None):
+        self.net = net
+        self.transform = PredictionTransform(size, mean, std)
+        self.iou_threshold = iou_threshold
+        self.filter_threshold = filter_threshold
+        self.candidate_size = candidate_size
+        self.nms_method = nms_method
+
+        self.sigma = sigma
+        if device:
+            self.device = device
+        else:
+            self.device = torch.device(
+                'cuda:0' if torch.cuda.is_available() else 'cpu')
+
+        self.net.to(self.device)
+        self.net.eval()
+
+    def predict(self, image, top_k=-1, prob_threshold=None):
+        height, width, _ = image.shape
+        image = self.transform(image)
+        images = image.unsqueeze(0)
+        images = images.to(self.device)
+        with torch.no_grad():
+            scores, boxes = self.net.forward(images)
+        boxes = boxes[0]
+        scores = scores[0]
+        if not prob_threshold:
+            prob_threshold = self.filter_threshold
+        # this version of nms is slower on GPU, so we move data to CPU.
+        picked_box_probs = []
+        picked_labels = []
+        for class_index in range(1, scores.size(1)):
+            probs = scores[:, class_index]
+            mask = probs > prob_threshold
+            probs = probs[mask]
+            if probs.size(0) == 0:
+                continue
+            subset_boxes = boxes[mask, :]
+            box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
+            box_probs = box_utils.nms(box_probs,
+                                      self.nms_method,
+                                      score_threshold=prob_threshold,
+                                      iou_threshold=self.iou_threshold,
+                                      sigma=self.sigma,
+                                      top_k=top_k,
+                                      candidate_size=self.candidate_size)
+            picked_box_probs.append(box_probs)
+            picked_labels.extend([class_index] * box_probs.size(0))
+        if not picked_box_probs:
+            return torch.tensor([]), torch.tensor([]), torch.tensor([])
+        picked_box_probs = torch.cat(picked_box_probs)
+        picked_box_probs[:, 0] *= width
+        picked_box_probs[:, 1] *= height
+        picked_box_probs[:, 2] *= width
+        picked_box_probs[:, 3] *= height
+        return picked_box_probs[:, :4], torch.tensor(
+            picked_labels), picked_box_probs[:, 4]
diff --git a/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/ssd.py b/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/ssd.py
new file mode 100644
index 0000000..f50b425
--- /dev/null
+++ b/modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/ssd.py
@@ -0,0 +1,128 @@
+# The implementation is based on ULFD, available at
+# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
+from collections import namedtuple
+from typing import List, Tuple
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .. import box_utils
+
+GraphPath = namedtuple('GraphPath', ['s0', 'name', 's1'])
+
+
+class SSD(nn.Module):
+    def __init__(self,
+                 num_classes: int,
+                 base_net: nn.ModuleList,
+                 source_layer_indexes: List[int],
+                 extras: nn.ModuleList,
+                 classification_headers: nn.ModuleList,
+                 regression_headers: nn.ModuleList,
+                 is_test=False,
+                 config=None,
+                 device=None):
+        """Compose a SSD model using the given components.
+        """
+        super(SSD, self).__init__()
+
+        self.num_classes = num_classes
+        self.base_net = base_net
+        self.source_layer_indexes = source_layer_indexes
+        self.extras = extras
+        self.classification_headers = classification_headers
+        self.regression_headers = regression_headers
+        self.is_test = is_test
+        self.config = config
+
+        # register layers in source_layer_indexes by adding them to a module list
+        self.source_layer_add_ons = nn.ModuleList([
+            t[1] for t in source_layer_indexes
+            if isinstance(t, tuple) and not isinstance(t, GraphPath)
+        ])
+        if device:
+            self.device = device
+        else:
+            self.device = torch.device(
+                'cuda:0' if torch.cuda.is_available() else 'cpu')
+        if is_test:
+            self.config = config
+            self.priors = config.priors.to(self.device)
+
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        confidences = []
+        locations = []
+        start_layer_index = 0
+        header_index = 0
+        end_layer_index = 0
+        for end_layer_index in self.source_layer_indexes:
+            if isinstance(end_layer_index, GraphPath):
+                path = end_layer_index
+                end_layer_index = end_layer_index.s0
+                added_layer = None
+            elif isinstance(end_layer_index, tuple):
+                added_layer = end_layer_index[1]
+                end_layer_index = end_layer_index[0]
+                path = None
+            else:
+                added_layer = None
+                path = None
+            for layer in self.base_net[start_layer_index:end_layer_index]:
+                x = layer(x)
+            if added_layer:
+                y = added_layer(x)
+            else:
+                y = x
+            if path:
+                sub = getattr(self.base_net[end_layer_index], path.name)
+                for layer in sub[:path.s1]:
+                    x = layer(x)
+                y = x
+                for layer in sub[path.s1:]:
+                    x = layer(x)
+                end_layer_index += 1
+            start_layer_index = end_layer_index
+            confidence, location = self.compute_header(header_index, y)
+            header_index += 1
+            confidences.append(confidence)
+            locations.append(location)
+
+        for layer in self.base_net[end_layer_index:]:
+            x = layer(x)
+
+        for layer in self.extras:
+            x = layer(x)
+            confidence, location = self.compute_header(header_index, x)
+            header_index += 1
+            confidences.append(confidence)
+            locations.append(location)
+
+        confidences = torch.cat(confidences, 1)
+        locations = torch.cat(locations, 1)
+
+        if self.is_test:
+            confidences = F.softmax(confidences, dim=2)
+            boxes = box_utils.convert_locations_to_boxes(
+                locations, self.priors, self.config.center_variance,
+                self.config.size_variance)
+            boxes = box_utils.center_form_to_corner_form(boxes)
+            return confidences, boxes
+        else:
+            return confidences, locations
+
+    def compute_header(self, i, x):
+        confidence = self.classification_headers[i](x)
+        confidence = confidence.permute(0, 2, 3, 1).contiguous()
+        confidence = confidence.view(confidence.size(0), -1, self.num_classes)
+
+        location = self.regression_headers[i](x)
+        location = location.permute(0, 2, 3, 1).contiguous()
+        location = location.view(location.size(0), -1, 4)
+
+        return confidence, location
+
+    def load(self, model):
+        self.load_state_dict(
+            torch.load(model, map_location=lambda storage, loc: storage))
diff --git a/modelscope/models/cv/face_detection/ulfd_slim/vision/transforms.py b/modelscope/models/cv/face_detection/ulfd_slim/vision/transforms.py
new file mode 100644
index 0000000..28e747a
--- /dev/null
+++ b/modelscope/models/cv/face_detection/ulfd_slim/vision/transforms.py
@@ -0,0 +1,52 @@
+# The implementation is based on ULFD, available at
+# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
+import types
+
+import cv2
+import numpy as np
+import torch
+from numpy import random
+
+
+class Compose(object):
+    """Composes several augmentations together.
+    Args:
+        transforms (List[Transform]): list of transforms to compose.
+    Example:
+        >>> augmentations.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.ToTensor(),
+        >>> ])
+    """
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, img, boxes=None, labels=None):
+        for t in self.transforms:
+            img, boxes, labels = t(img, boxes, labels)
+        return img, boxes, labels
+
+
+class SubtractMeans(object):
+    def __init__(self, mean):
+        self.mean = np.array(mean, dtype=np.float32)
+
+    def __call__(self, image, boxes=None, labels=None):
+        image = image.astype(np.float32)
+        image -= self.mean
+        return image.astype(np.float32), boxes, labels
+
+
+class Resize(object):
+    def __init__(self, size=(300, 300)):
+        self.size = size
+
+    def __call__(self, image, boxes=None, labels=None):
+        image = cv2.resize(image, (self.size[0], self.size[1]))
+        return image, boxes, labels
+
+
+class ToTensor(object):
+    def __call__(self, cvimage, boxes=None, labels=None):
+        return torch.from_numpy(cvimage.astype(np.float32)).permute(
+            2, 0, 1), boxes, labels
diff --git a/modelscope/models/cv/face_recognition/__init__.py b/modelscope/models/cv/face_recognition/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/models/cv/face_recognition/align_face.py b/modelscope/models/cv/face_recognition/align_face.py
new file mode 100644
index 0000000..b5fa95b
--- /dev/null
+++ b/modelscope/models/cv/face_recognition/align_face.py
@@ -0,0 +1,54 @@
+"""
+The implementation here is modified based on insightface, originally MIT license and publicly available at
+https://github.com/deepinsight/insightface/blob/master/python-package/insightface/utils/face_align.py
+"""
+import cv2
+import numpy as np
+from skimage import transform as trans
+
+
+def align_face(image, size, lmks):
+    dst_w = size[1]
+    dst_h = size[0]
+    # landmark calculation of dst images
+    base_w = 96
+    base_h = 112
+    assert (dst_w >= base_w)
+    assert (dst_h >= base_h)
+    base_lmk = [
+        30.2946, 51.6963, 65.5318, 51.5014, 48.0252, 71.7366, 33.5493, 92.3655,
+        62.7299, 92.2041
+    ]
+
+    dst_lmk = np.array(base_lmk).reshape((5, 2)).astype(np.float32)
+    if dst_w != base_w:
+        slide = (dst_w - base_w) / 2
+        dst_lmk[:, 0] += slide
+
+    if dst_h != base_h:
+        slide = (dst_h - base_h) / 2
+        dst_lmk[:, 1] += slide
+
+    src_lmk = lmks
+    # using skimage method
+    tform = trans.SimilarityTransform()
+    tform.estimate(src_lmk, dst_lmk)
+    t = tform.params[0:2, :]
+
+    assert (image.shape[2] == 3)
+
+    dst_image = cv2.warpAffine(image.copy(), t, (dst_w, dst_h))
+    dst_pts = GetAffinePoints(src_lmk, t)
+    return dst_image, dst_pts
+
+
+def GetAffinePoints(pts_in, trans):
+    pts_out = pts_in.copy()
+    assert (pts_in.shape[1] == 2)
+
+    for k in range(pts_in.shape[0]):
+        pts_out[k, 0] = pts_in[k, 0] * trans[0, 0] + pts_in[k, 1] * trans[
+            0, 1] + trans[0, 2]
+        pts_out[k, 1] = pts_in[k, 0] * trans[1, 0] + pts_in[k, 1] * trans[
+            1, 1] + trans[1, 2]
+    return pts_out
diff --git a/modelscope/models/cv/face_recognition/torchkit/__init__.py b/modelscope/models/cv/face_recognition/torchkit/__init__.py
new file mode 100755
index 0000000..ab12c55
--- /dev/null
+++ b/modelscope/models/cv/face_recognition/torchkit/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .rts_backbone import RTSBackbone
+else:
+    _import_structure = {'rts_backbone': ['RTSBackbone']}
+    import sys
+    sys.modules[__name__] = LazyImportModule(__name__,
+                                             globals()['__file__'],
+                                             _import_structure,
+                                             module_spec=__spec__)
diff --git a/modelscope/models/cv/face_recognition/torchkit/backbone/__init__.py b/modelscope/models/cv/face_recognition/torchkit/backbone/__init__.py
new file mode 100755
index 0000000..afe8996
--- /dev/null
+++ b/modelscope/models/cv/face_recognition/torchkit/backbone/__init__.py
@@ -0,0 +1,33 @@
+# The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at
+# https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone
+from .model_irse import (IR_18, IR_34, IR_50, IR_101, IR_152, IR_200, IR_SE_50,
+                         IR_SE_101, IR_SE_152, IR_SE_200)
+from .model_resnet import ResNet_50, ResNet_101, ResNet_152
+
+_model_dict = {
+    'ResNet_50': ResNet_50,
+    'ResNet_101': ResNet_101,
+    'ResNet_152': ResNet_152,
+    'IR_18': IR_18,
+    'IR_34': IR_34,
+    'IR_50': IR_50,
+    'IR_101': IR_101,
+    'IR_152': IR_152,
+    'IR_200': IR_200,
+    'IR_SE_50': IR_SE_50,
+    'IR_SE_101': IR_SE_101,
+    'IR_SE_152': IR_SE_152,
+    'IR_SE_200': IR_SE_200
+}
+
+
+def get_model(key):
+    """ Get different backbone network by key,
+        support ResNet50, ResNet_101, ResNet_152
+        IR_18, IR_34, IR_50, IR_101, IR_152, IR_200,
+        IR_SE_50, IR_SE_101, IR_SE_152, IR_SE_200.
+    """
+    if key in _model_dict.keys():
+        return _model_dict[key]
+    else:
+        raise KeyError('not support model {}'.format(key))
diff --git a/modelscope/models/cv/face_recognition/torchkit/backbone/arcface_backbone.py b/modelscope/models/cv/face_recognition/torchkit/backbone/arcface_backbone.py
new file mode 100644
index 0000000..0b92a60
--- /dev/null
+++ b/modelscope/models/cv/face_recognition/torchkit/backbone/arcface_backbone.py
@@ -0,0 +1,202 @@
+# The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at
+# https://github.com/deepinsight/insightface/blob/master/recognition/arcface_torch/backbones/iresnet.py
+import torch
+from torch import nn
+from torch.utils.checkpoint import checkpoint
+
+using_ckpt = False
+
+
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes,
+                     out_planes,
+                     kernel_size=3,
+                     stride=stride,
+                     padding=dilation,
+                     groups=groups,
+                     bias=False,
+                     dilation=dilation)
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes,
+                     out_planes,
+                     kernel_size=1,
+                     stride=stride,
+                     bias=False)
+
+
+class IBasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 groups=1,
+                 base_width=64,
+                 dilation=1):
+        super(IBasicBlock, self).__init__()
+        if groups != 1 or base_width != 64:
+            raise ValueError(
+                'BasicBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError(
+                'Dilation > 1 not supported in BasicBlock')
+        self.bn1 = nn.BatchNorm2d(
+            inplanes,
+            eps=1e-05,
+        )
+        self.conv1 = conv3x3(inplanes, planes)
+        self.bn2 = nn.BatchNorm2d(
+            planes,
+            eps=1e-05,
+        )
+        self.prelu = nn.PReLU(planes)
+        self.conv2 = conv3x3(planes, planes, stride)
+        self.bn3 = nn.BatchNorm2d(
+            planes,
+            eps=1e-05,
+        )
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+        out = self.bn1(x)
+        out = self.conv1(out)
+        out = self.bn2(out)
+        out = self.prelu(out)
+        out = self.conv2(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        return out
+
+
+class IResNet(nn.Module):
+    fc_scale = 7 * 7
+
+    def __init__(self,
+                 block,
+                 layers,
+                 dropout=0,
+                 num_features=512,
+                 zero_init_residual=False,
+                 groups=1,
+                 width_per_group=64,
+                 replace_stride_with_dilation=None,
+                 fp16=False):
+        super(IResNet, self).__init__()
+        self.extra_gflops = 0.0
+        self.fp16 = fp16
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError('replace_stride_with_dilation should be None '
+                             'or a 3-element tuple, got {}'.format(
+                                 replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(3,
+                               self.inplanes,
+                               kernel_size=3,
+                               stride=1,
+                               padding=1,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
+        self.prelu = nn.PReLU(self.inplanes)
+        self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
+        self.layer2 = self._make_layer(block,
+                                       128,
+                                       layers[1],
+                                       stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block,
+                                       256,
+                                       layers[2],
+                                       stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.layer4 = self._make_layer(block,
+                                       512,
+                                       layers[3],
+                                       stride=2,
+                                       dilate=replace_stride_with_dilation[2])
+        self.bn2 = nn.BatchNorm2d(
+            512 * block.expansion,
+            eps=1e-05,
+        )
+        self.dropout = nn.Dropout(p=dropout, inplace=True)
+        self.fc = nn.Linear(512 * block.expansion * self.fc_scale,
+                            num_features)
+        self.features = nn.BatchNorm1d(num_features, eps=1e-05)
+        nn.init.constant_(self.features.weight, 1.0)
+        self.features.weight.requires_grad = False
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.normal_(m.weight, 0, 0.1)
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, IBasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                nn.BatchNorm2d(
+                    planes * block.expansion,
+                    eps=1e-05,
+                ),
+            )
+        layers = []
+        layers.append(
+            block(self.inplanes, planes, stride, downsample, self.groups,
+                  self.base_width, previous_dilation))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(
+                block(self.inplanes,
+                      planes,
+                      groups=self.groups,
+                      base_width=self.base_width,
+                      dilation=self.dilation))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        with torch.cuda.amp.autocast(self.fp16):
+            x = self.conv1(x)
+            x = self.bn1(x)
+            x = self.prelu(x)
+            x = self.layer1(x)
+            x = self.layer2(x)
+            x = self.layer3(x)
+            x = self.layer4(x)
+            x = self.bn2(x)
+            x = torch.flatten(x, 1)
+            x = self.dropout(x)
+        x = self.fc(x.float() if self.fp16 else x)
+        x = self.features(x)
+        return x
+
+
+def _iresnet(arch, layers):
+    model = IResNet(IBasicBlock, layers)
+    return model
diff --git a/modelscope/models/cv/face_recognition/torchkit/backbone/common.py b/modelscope/models/cv/face_recognition/torchkit/backbone/common.py
new file mode 100755
index 0000000..8c5da7d
--- /dev/null
+++ b/modelscope/models/cv/face_recognition/torchkit/backbone/common.py
@@ -0,0 +1,68 @@
+# The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at
+# https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone/common.py
+import torch
+import torch.nn as nn
+from torch.nn import (BatchNorm1d, BatchNorm2d, Conv2d, Linear, Module, ReLU,
+                      Sigmoid)
+
+
+def initialize_weights(modules):
+    """ Weight initilize, conv2d and linear is initialized with kaiming_normal
+    """
+    for m in modules:
+        if isinstance(m, nn.Conv2d):
+            nn.init.kaiming_normal_(m.weight,
+                                    mode='fan_out',
+                                    nonlinearity='relu')
+            if m.bias is not None:
+                m.bias.data.zero_()
+        elif isinstance(m, nn.BatchNorm2d):
+            m.weight.data.fill_(1)
+            m.bias.data.zero_()
+        elif isinstance(m, nn.Linear):
+            nn.init.kaiming_normal_(m.weight,
+                                    mode='fan_out',
+                                    nonlinearity='relu')
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+
+class Flatten(Module):
+    """ Flat tensor
+    """
+    def forward(self, input):
+        return input.view(input.size(0), -1)
+
+
+class SEModule(Module):
+    """ SE block
+    """
+    def __init__(self, channels, reduction):
+        super(SEModule, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc1 = Conv2d(channels,
+                          channels // reduction,
+                          kernel_size=1,
+                          padding=0,
+                          bias=False)
+
+        nn.init.xavier_uniform_(self.fc1.weight.data)
+
+        self.relu = ReLU(inplace=True)
+        self.fc2 = Conv2d(channels // reduction,
+                          channels,
+                          kernel_size=1,
+                          padding=0,
+                          bias=False)
+
+        self.sigmoid = Sigmoid()
+
+    def forward(self, x):
+        module_input = x
+        x = self.avg_pool(x)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.sigmoid(x)
+
+        return module_input * x
diff --git a/modelscope/models/cv/face_recognition/torchkit/backbone/facemask_backbone.py b/modelscope/models/cv/face_recognition/torchkit/backbone/facemask_backbone.py
new file mode 100644
index 0000000..1184d71
--- /dev/null
+++ b/modelscope/models/cv/face_recognition/torchkit/backbone/facemask_backbone.py
@@ -0,0 +1,206 @@
+# The implementation is adopted from InsightFace, made pubicly available under the Apache-2.0 license at
+# https://github.com/TreB1eN/InsightFace_Pytorch/blob/master/model.py
+
+from collections import namedtuple
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+from torch.nn import (AdaptiveAvgPool2d, AvgPool2d, BatchNorm1d, BatchNorm2d,
+                      Conv2d, Dropout, Dropout2d, Linear, MaxPool2d, Module,
+                      Parameter, PReLU, ReLU, Sequential, Sigmoid)
+
+
+class Flatten(Module):
+    def forward(self, input):
+        return input.view(input.size(0), -1)
+
+
+class SEModule(Module):
+    def __init__(self, channels, reduction):
+        super(SEModule, self).__init__()
+        self.avg_pool = AdaptiveAvgPool2d(1)
+        self.fc1 = Conv2d(channels,
+                          channels // reduction,
+                          kernel_size=1,
+                          padding=0,
+                          bias=False)
+        self.relu = ReLU(inplace=True)
+        self.fc2 = Conv2d(channels // reduction,
+                          channels,
+                          kernel_size=1,
+                          padding=0,
+                          bias=False)
+        self.sigmoid = Sigmoid()
+
+    def forward(self, x):
+        module_input = x
+        x = self.avg_pool(x)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.sigmoid(x)
+        return module_input * x
+
+
+class BottleneckIR(Module):
+    def __init__(self, in_channel, depth, stride):
+        super(BottleneckIR, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+                BatchNorm2d(depth))
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
+            PReLU(depth), Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
+            BatchNorm2d(depth))
+
+    def forward(self, x):
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+        return res + shortcut
+
+
+class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
+    '''A named tuple describing a ResNet block.'''
+
+
+def get_block(in_channel, depth, num_units, stride=2):
+    return [Bottleneck(in_channel, depth, stride)
+            ] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
+
+
+def get_blocks(num_layers):
+    if num_layers == 50:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=4),
+            get_block(in_channel=128, depth=256, num_units=14),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 100:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=13),
+            get_block(in_channel=128, depth=256, num_units=30),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 152:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=8),
+            get_block(in_channel=128, depth=256, num_units=36),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 252:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=6),
+            get_block(in_channel=64, depth=128, num_units=21),
+            get_block(in_channel=128, depth=256, num_units=66),
+            get_block(in_channel=256, depth=512, num_units=6)
+        ]
+    return blocks
+
+
+class IResNet(Module):
+    def __init__(self,
+                 dropout=0,
+                 num_features=512,
+                 zero_init_residual=False,
+                 groups=1,
+                 width_per_group=64,
+                 replace_stride_with_dilation=None,
+                 fp16=False,
+                 with_wcd=False,
+                 wrs_M=400,
+                 wrs_q=0.9):
+        super(IResNet, self).__init__()
+        num_layers = 252
+        mode = 'ir'
+        assert num_layers in [50, 100, 152,
+                              252], 'num_layers should be 50,100, or 152'
+        assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
+        self.fc_scale = 7 * 7
+        num_features = 512
+        self.fp16 = fp16
+        drop_ratio = 0.0
+        self.with_wcd = with_wcd
+        if self.with_wcd:
+            self.wrs_M = wrs_M
+            self.wrs_q = wrs_q
+        blocks = get_blocks(num_layers)
+        if mode == 'ir':
+            unit_module = BottleneckIR
+        self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
+                                      BatchNorm2d(64), PReLU(64))
+        self.bn2 = nn.BatchNorm2d(
+            512,
+            eps=1e-05,
+        )
+        self.dropout = nn.Dropout(p=drop_ratio, inplace=True)
+        self.fc = nn.Linear(512 * self.fc_scale, num_features)
+        self.features = nn.BatchNorm1d(num_features, eps=1e-05)
+        nn.init.constant_(self.features.weight, 1.0)
+        self.features.weight.requires_grad = False
+
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(
+                    unit_module(bottleneck.in_channel, bottleneck.depth,
+                                bottleneck.stride))
+        self.body = Sequential(*modules)
+
+    def forward(self, x):
+        with torch.cuda.amp.autocast(self.fp16):
+            x = self.input_layer(x)
+            x = self.body(x)
+            x = self.bn2(x)
+            if self.with_wcd:
+                B = x.size()[0]
+                C = x.size()[1]
+                x_abs = torch.abs(x)
+                score = torch.nn.functional.adaptive_avg_pool2d(x_abs,
+                                                                1).reshape(
+                                                                    (B, C))
+                r = torch.rand((B, C), device=x.device)
+                key = torch.pow(r, 1. / score)
+                _, topidx = torch.topk(key, self.wrs_M, dim=1)
+                mask = torch.zeros_like(key, dtype=torch.float32)
+                mask.scatter_(1, topidx, 1.)
+                maskq = torch.rand((B, C), device=x.device)
+                maskq_ones = torch.ones_like(maskq, dtype=torch.float32)
+                maskq_zeros = torch.zeros_like(maskq, dtype=torch.float32)
+                maskq_m = torch.where(maskq < self.wrs_q, maskq_ones,
+                                      maskq_zeros)
+                new_mask = mask * maskq_m
+                score_sum = torch.sum(score, dim=1, keepdim=True)
+                selected_score_sum = torch.sum(new_mask * score,
+                                               dim=1,
+                                               keepdim=True)
+                alpha = score_sum / (selected_score_sum + 1e-6)
+                alpha = alpha.reshape((B, 1, 1, 1))
+                new_mask = new_mask.reshape((B, C, 1, 1))
+                x = x * new_mask * alpha
+            x = torch.flatten(x, 1)
+            x = self.dropout(x)
+        x = self.fc(x.float() if self.fp16 else x)
+        x = self.features(x)
+        return x
+
+
+def iresnet286(pretrained=False, progress=True, **kwargs):
+    model = IResNet(dropout=0,
+                    num_features=512,
+                    zero_init_residual=False,
+                    groups=1,
+                    width_per_group=64,
+                    replace_stride_with_dilation=None,
+                    fp16=False,
+                    with_wcd=False,
+                    wrs_M=400,
+                    wrs_q=0.9)
+    return model
diff --git a/modelscope/models/cv/face_recognition/torchkit/backbone/model_irse.py b/modelscope/models/cv/face_recognition/torchkit/backbone/model_irse.py
new file mode 100755
index 0000000..3c48bee
--- /dev/null
+++ b/modelscope/models/cv/face_recognition/torchkit/backbone/model_irse.py
@@ -0,0 +1,274 @@
+# The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at
+# https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone/model_irse.py
+from collections import namedtuple
+
+from torch.nn import (BatchNorm1d, BatchNorm2d, Conv2d, Dropout, Linear,
+                      MaxPool2d, Module, PReLU, Sequential)
+
+from .common import Flatten, SEModule, initialize_weights
+
+
+class BasicBlockIR(Module):
+    """ BasicBlock for IRNet
+    """
+    def __init__(self, in_channel, depth, stride):
+        super(BasicBlockIR, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+                BatchNorm2d(depth))
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
+            BatchNorm2d(depth), PReLU(depth),
+            Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
+            BatchNorm2d(depth))
+
+    def forward(self, x):
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+
+        return res + shortcut
+
+
+class BottleneckIR(Module):
+    """ BasicBlock with bottleneck for IRNet
+    """
+    def __init__(self, in_channel, depth, stride):
+        super(BottleneckIR, self).__init__()
+        reduction_channel = depth // 4
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+                BatchNorm2d(depth))
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel,
+                   reduction_channel, (1, 1), (1, 1),
+                   0,
+                   bias=False), BatchNorm2d(reduction_channel),
+            PReLU(reduction_channel),
+            Conv2d(reduction_channel,
+                   reduction_channel, (3, 3), (1, 1),
+                   1,
+                   bias=False), BatchNorm2d(reduction_channel),
+            PReLU(reduction_channel),
+            Conv2d(reduction_channel, depth, (1, 1), stride, 0, bias=False),
+            BatchNorm2d(depth))
+
+    def forward(self, x):
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+
+        return res + shortcut
+
+
+class BasicBlockIRSE(BasicBlockIR):
+    def __init__(self, in_channel, depth, stride):
+        super(BasicBlockIRSE, self).__init__(in_channel, depth, stride)
+        self.res_layer.add_module('se_block', SEModule(depth, 16))
+
+
+class BottleneckIRSE(BottleneckIR):
+    def __init__(self, in_channel, depth, stride):
+        super(BottleneckIRSE, self).__init__(in_channel, depth, stride)
+        self.res_layer.add_module('se_block', SEModule(depth, 16))
+
+
+class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
+    '''A named tuple describing a ResNet block.'''
+
+
+def get_block(in_channel, depth, num_units, stride=2):
+
+    return [Bottleneck(in_channel, depth, stride)] +\
+           [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
+
+
+def get_blocks(num_layers):
+    if num_layers == 18:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=2),
+            get_block(in_channel=64, depth=128, num_units=2),
+            get_block(in_channel=128, depth=256, num_units=2),
+            get_block(in_channel=256, depth=512, num_units=2)
+        ]
+    elif num_layers == 34:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=4),
+            get_block(in_channel=128, depth=256, num_units=6),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 50:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=4),
+            get_block(in_channel=128, depth=256, num_units=14),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 100:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=13),
+            get_block(in_channel=128, depth=256, num_units=30),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 152:
+        blocks = [
+            get_block(in_channel=64, depth=256, num_units=3),
+            get_block(in_channel=256, depth=512, num_units=8),
+            get_block(in_channel=512, depth=1024, num_units=36),
+            get_block(in_channel=1024, depth=2048, num_units=3)
+        ]
+    elif num_layers == 200:
+        blocks = [
+            get_block(in_channel=64, depth=256, num_units=3),
+            get_block(in_channel=256, depth=512, num_units=24),
+            get_block(in_channel=512, depth=1024, num_units=36),
+            get_block(in_channel=1024, depth=2048, num_units=3)
+        ]
+
+    return blocks
+
+
+class Backbone(Module):
+    def __init__(self, input_size, num_layers, mode='ir'):
+        """ Args:
+            input_size: input_size of backbone
+            num_layers: num_layers of backbone
+            mode: support ir or irse
+        """
+        super(Backbone, self).__init__()
+        assert input_size[0] in [112, 224], \
+            'input_size should be [112, 112] or [224, 224]'
+        assert num_layers in [18, 34, 50, 100, 152, 200], \
+            'num_layers should be 18, 34, 50, 100 or 152'
+        assert mode in ['ir', 'ir_se'], \
+            'mode should be ir or ir_se'
+        self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
+                                      BatchNorm2d(64), PReLU(64))
+        blocks = get_blocks(num_layers)
+        if num_layers <= 100:
+            if mode == 'ir':
+                unit_module = BasicBlockIR
+            elif mode == 'ir_se':
+                unit_module = BasicBlockIRSE
+            output_channel = 512
+        else:
+            if mode == 'ir':
+                unit_module = BottleneckIR
+            elif mode == 'ir_se':
+                unit_module = BottleneckIRSE
+            output_channel = 2048
+
+        if input_size[0] == 112:
+            self.output_layer = Sequential(BatchNorm2d(output_channel),
+                                           Dropout(0.4), Flatten(),
+                                           Linear(output_channel * 7 * 7, 512),
+                                           BatchNorm1d(512, affine=False))
+        else:
+            self.output_layer = Sequential(
+                BatchNorm2d(output_channel), Dropout(0.4), Flatten(),
+                Linear(output_channel * 14 * 14, 512),
+                BatchNorm1d(512, affine=False))
+
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(
+                    unit_module(bottleneck.in_channel, bottleneck.depth,
+                                bottleneck.stride))
+        self.body = Sequential(*modules)
+
+        initialize_weights(self.modules())
+
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.body(x)
+        x = self.output_layer(x)
+        return x
+
+
+def IR_18(input_size):
+    """ Constructs a ir-18 model.
+    """
+    model = Backbone(input_size, 18, 'ir')
+
+    return model
+
+
+def IR_34(input_size):
+    """ Constructs a ir-34 model.
+    """
+    model = Backbone(input_size, 34, 'ir')
+
+    return model
+
+
+def IR_50(input_size):
+    """ Constructs a ir-50 model.
+    """
+    model = Backbone(input_size, 50, 'ir')
+
+    return model
+
+
+def IR_101(input_size):
+    """ Constructs a ir-101 model.
+    """
+    model = Backbone(input_size, 100, 'ir')
+
+    return model
+
+
+def IR_152(input_size):
+    """ Constructs a ir-152 model.
+    """
+    model = Backbone(input_size, 152, 'ir')
+
+    return model
+
+
+def IR_200(input_size):
+    """ Constructs a ir-200 model.
+    """
+    model = Backbone(input_size, 200, 'ir')
+
+    return model
+
+
+def IR_SE_50(input_size):
+    """ Constructs a ir_se-50 model.
+    """
+    model = Backbone(input_size, 50, 'ir_se')
+
+    return model
+
+
+def IR_SE_101(input_size):
+    """ Constructs a ir_se-101 model.
+    """
+    model = Backbone(input_size, 100, 'ir_se')
+
+    return model
+
+
+def IR_SE_152(input_size):
+    """ Constructs a ir_se-152 model.
+    """
+    model = Backbone(input_size, 152, 'ir_se')
+
+    return model
+
+
+def IR_SE_200(input_size):
+    """ Constructs a ir_se-200 model.
+    """
+    model = Backbone(input_size, 200, 'ir_se')
+
+    return model
diff --git a/modelscope/models/cv/face_recognition/torchkit/backbone/model_resnet.py b/modelscope/models/cv/face_recognition/torchkit/backbone/model_resnet.py
new file mode 100755
index 0000000..f07b7ad
--- /dev/null
+++ b/modelscope/models/cv/face_recognition/torchkit/backbone/model_resnet.py
@@ -0,0 +1,167 @@
+# The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at
+# https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone/model_resnet.py
+import torch.nn as nn
+from torch.nn import (BatchNorm1d, BatchNorm2d, Conv2d, Dropout, Linear,
+                      MaxPool2d, Module, ReLU, Sequential)
+
+from .common import initialize_weights
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """ 3x3 convolution with padding
+    """
+    return Conv2d(in_planes,
+                  out_planes,
+                  kernel_size=3,
+                  stride=stride,
+                  padding=1,
+                  bias=False)
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+    """ 1x1 convolution
+    """
+    return Conv2d(in_planes,
+                  out_planes,
+                  kernel_size=1,
+                  stride=stride,
+                  bias=False)
+
+
+class Bottleneck(Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = conv1x1(inplanes, planes)
+        self.bn1 = BatchNorm2d(planes)
+        self.conv2 = conv3x3(planes, planes, stride)
+        self.bn2 = BatchNorm2d(planes)
+        self.conv3 = conv1x1(planes, planes * self.expansion)
+        self.bn3 = BatchNorm2d(planes * self.expansion)
+        self.relu = ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(Module):
+    """ ResNet backbone
+    """
+    def __init__(self, input_size, block, layers, zero_init_residual=True):
+        """ Args:
+            input_size: input_size of backbone
+            block: block function
+            layers: layers in each block
+        """
+        super(ResNet, self).__init__()
+        assert input_size[0] in [112, 224],\
+            'input_size should be [112, 112] or [224, 224]'
+        self.inplanes = 64
+        self.conv1 = Conv2d(3,
+                            64,
+                            kernel_size=7,
+                            stride=2,
+                            padding=3,
+                            bias=False)
+        self.bn1 = BatchNorm2d(64)
+        self.relu = ReLU(inplace=True)
+        self.maxpool = MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+
+        self.bn_o1 = BatchNorm2d(2048)
+        self.dropout = Dropout()
+        if input_size[0] == 112:
+            self.fc = Linear(2048 * 4 * 4, 512)
+        else:
+            self.fc = Linear(2048 * 7 * 7, 512)
+        self.bn_o2 = BatchNorm1d(512)
+
+        initialize_weights(self.modules)
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                BatchNorm2d(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.bn_o1(x)
+        x = self.dropout(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        x = self.bn_o2(x)
+
+        return x
+
+
+def ResNet_50(input_size, **kwargs):
+    """ Constructs a ResNet-50 model.
+    """
+    model = ResNet(input_size, Bottleneck, [3, 4, 6, 3], **kwargs)
+
+    return model
+
+
+def ResNet_101(input_size, **kwargs):
+    """ Constructs a ResNet-101 model.
+    """
+    model = ResNet(input_size, Bottleneck, [3, 4, 23, 3], **kwargs)
+
+    return model
+
+
+def ResNet_152(input_size, **kwargs):
+    """ Constructs a ResNet-152 model.
+    """
+    model = ResNet(input_size, Bottleneck, [3, 8, 36, 3], **kwargs)
+
+    return model
diff --git a/modelscope/models/cv/face_recognition/torchkit/rts_backbone.py b/modelscope/models/cv/face_recognition/torchkit/rts_backbone.py
new file mode 100644
index 0000000..d9b57ba
--- /dev/null
+++ b/modelscope/models/cv/face_recognition/torchkit/rts_backbone.py
@@ -0,0 +1,217 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from collections import namedtuple
+from math import lgamma
+
+import torch
+import torch.nn as nn
+from torch.nn import (AdaptiveAvgPool2d, BatchNorm1d, BatchNorm2d, Conv2d,
+                      Dropout, Linear, MaxPool2d, Module, PReLU, ReLU,
+                      Sequential, Sigmoid)
+from torch.nn.modules.flatten import Flatten
+
+from modelscope.models import MODELS
+from modelscope.models.base import TorchModel
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+@MODELS.register_module('face-recognition', 'rts-backbone')
+class RTSBackbone(TorchModel):
+    def __init__(self, *args, **kwargs):
+        super(RTSBackbone, self).__init__()
+        # model initialization
+        self.alpha = kwargs.get('alpha')
+        self.rts_plus = kwargs.get('rts_plus')
+        resnet = Backbone([112, 112], 64, mode='ir_se')
+
+        self.features = nn.Sequential(
+            resnet.input_layer, resnet.body,
+            Sequential(
+                BatchNorm2d(512),
+                Dropout(),
+                Flatten(),
+            ))
+
+        self.features_backbone = nn.Sequential(
+            Linear(512 * 7 * 7, 512),
+            BatchNorm1d(512),
+        )
+
+        self.logvar_rts_backbone = nn.Sequential(
+            Linear(512 * 7 * 7, 1),
+            BatchNorm1d(1),
+        )
+
+        self.logvar_rts_plus_backbone = nn.Sequential(
+            Linear(512 * 7 * 7, self.alpha),
+            BatchNorm1d(self.alpha),
+        )
+
+    def forward(self, img):
+        x = self.features(img)
+        image_features = self.features_backbone(x)
+        if not self.rts_plus:
+            logvar = self.logvar_rts_backbone(x)
+        else:
+            logvar = self.logvar_rts_plus_backbone(x)
+        return image_features, logvar
+
+    @classmethod
+    def _instantiate(cls, **kwargs):
+        model_file = kwargs.get('am_model_name', ModelFile.TORCH_MODEL_FILE)
+        ckpt_path = os.path.join(kwargs['model_dir'], model_file)
+        logger.info(f'loading model from {ckpt_path}')
+        model_dir = kwargs.pop('model_dir')
+        model = cls(**kwargs)
+        ckpt_path = os.path.join(model_dir, model_file)
+        model.load_state_dict(torch.load(ckpt_path, map_location='cpu'))
+        return model
+
+
+def l2_norm(input, axis=1):
+    norm = torch.norm(input, 2, axis, True)
+    output = torch.div(input, norm)
+
+    return output
+
+
+class SEModule(Module):
+    def __init__(self, channels, reduction):
+        super(SEModule, self).__init__()
+        self.avg_pool = AdaptiveAvgPool2d(1)
+        self.fc1 = Conv2d(channels,
+                          channels // reduction,
+                          kernel_size=1,
+                          padding=0,
+                          bias=False)
+
+        nn.init.xavier_uniform_(self.fc1.weight.data)
+
+        self.relu = ReLU(inplace=True)
+        self.fc2 = Conv2d(channels // reduction,
+                          channels,
+                          kernel_size=1,
+                          padding=0,
+                          bias=False)
+
+        self.sigmoid = Sigmoid()
+
+    def forward(self, x):
+        module_input = x
+        x = self.avg_pool(x)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.sigmoid(x)
+
+        return module_input * x
+
+
+class bottleneck_IR_SE(Module):
+    def __init__(self, in_channel, depth, stride):
+        super(bottleneck_IR_SE, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+                BatchNorm2d(depth))
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
+            PReLU(depth), Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
+            BatchNorm2d(depth), SEModule(depth, 16))
+
+    def forward(self, x):
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+
+        return res + shortcut
+
+
+class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
+    '''A named tuple describing a ResNet block.'''
+
+
+def get_block(in_channel, depth, num_units, stride=2):
+
+    return [Bottleneck(in_channel, depth, stride)
+            ] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
+
+
+def get_blocks(num_layers):
+    if num_layers == 50:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=4),
+            get_block(in_channel=128, depth=256, num_units=14),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 64:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=8),
+            get_block(in_channel=128, depth=256, num_units=16),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 100:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=13),
+            get_block(in_channel=128, depth=256, num_units=30),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 152:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=8),
+            get_block(in_channel=128, depth=256, num_units=36),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+
+    return blocks
+
+
+class Backbone(Module):
+    def __init__(self, input_size, num_layers, mode='ir'):
+        super(Backbone, self).__init__()
+        assert input_size[0] in [
+            112, 224
+        ], 'input_size should be [112, 112] or [224, 224]'
+        assert num_layers in [50, 64, 100,
+                              152], 'num_layers should be 50, 64, 100 or 152'
+        assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
+        blocks = get_blocks(num_layers)
+        if mode == 'ir':
+            unit_module = bottleneck_IR
+        elif mode == 'ir_se':
+            unit_module = bottleneck_IR_SE
+        self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
+                                      BatchNorm2d(64), PReLU(64))
+        if input_size[0] == 112:
+            self.output_layer = Sequential(BatchNorm2d(512), Dropout(),
+                                           Flatten(), Linear(512 * 7 * 7, 512),
+                                           BatchNorm1d(512))
+        else:
+            self.output_layer = Sequential(BatchNorm2d(512), Dropout(),
+                                           Flatten(),
+                                           Linear(512 * 14 * 14, 512),
+                                           BatchNorm1d(512))
+
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(
+                    unit_module(bottleneck.in_channel, bottleneck.depth,
+                                bottleneck.stride))
+        self.body = Sequential(*modules)
+
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.body(x)
+        x = self.output_layer(x)
+
+        return x
diff --git a/modelscope/models/cv/facial_expression_recognition/__init__.py b/modelscope/models/cv/facial_expression_recognition/__init__.py
new file mode 100644
index 0000000..35a15d1
--- /dev/null
+++ b/modelscope/models/cv/facial_expression_recognition/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .fer import FacialExpressionRecognition
+
+else:
+    _import_structure = {'fer': ['FacialExpressionRecognition']}
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/models/cv/facial_expression_recognition/fer/__init__.py b/modelscope/models/cv/facial_expression_recognition/fer/__init__.py
new file mode 100644
index 0000000..2546035
--- /dev/null
+++ b/modelscope/models/cv/facial_expression_recognition/fer/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .facial_expression_recognition import FacialExpressionRecognition
diff --git a/modelscope/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py b/modelscope/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py
new file mode 100644
index 0000000..42cbebe
--- /dev/null
+++ b/modelscope/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py
@@ -0,0 +1,71 @@
+# The implementation is based on Facial-Expression-Recognition, available at
+# https://github.com/WuJie1010/Facial-Expression-Recognition.Pytorch
+import os
+
+import cv2
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+import torch.nn.functional as F
+from PIL import Image
+from torch.autograd import Variable
+
+from modelscope.metainfo import Models
+from modelscope.models.base import Tensor, TorchModel
+from modelscope.models.builder import MODELS
+from modelscope.utils.constant import ModelFile, Tasks
+
+from . import transforms
+from .vgg import VGG
+
+
+@MODELS.register_module(Tasks.facial_expression_recognition,
+                        module_name=Models.fer)
+class FacialExpressionRecognition(TorchModel):
+    def __init__(self, model_path, device='cuda'):
+        super().__init__(model_path)
+        cudnn.benchmark = True
+        self.model_path = model_path
+        self.device = device
+        self.cfg_path = model_path.replace(ModelFile.TORCH_MODEL_FILE,
+                                           ModelFile.CONFIGURATION)
+        self.net = VGG('VGG19', cfg_path=self.cfg_path)
+        self.load_model()
+        self.net = self.net.to(device)
+        self.transform_test = transforms.Compose([
+            transforms.TenCrop(44),
+            transforms.Lambda(lambda crops: torch.stack(
+                [transforms.ToTensor()(crop) for crop in crops])),
+        ])
+
+        self.mean = np.array([[104, 117, 123]])
+
+    def load_model(self, load_to_cpu=False):
+        pretrained_dict = torch.load(self.model_path,
+                                     map_location=torch.device('cpu'))
+        self.net.load_state_dict(pretrained_dict['net'], strict=True)
+        self.net.eval()
+
+    def forward(self, input):
+        img = input['img']
+        img = cv2.cvtColor(img.cpu().numpy(), cv2.COLOR_BGR2GRAY)
+        img = cv2.resize(img, (48, 48))
+        img = img[:, :, np.newaxis]
+        img = np.concatenate((img, img, img), axis=2)
+
+        img = Image.fromarray(np.uint8(img))
+        inputs = self.transform_test(img)
+
+        ncrops, c, h, w = inputs.shape
+
+        inputs = inputs.view(-1, c, h, w)
+        inputs = inputs.to(self.device)
+        inputs = Variable(inputs, volatile=True)
+        outputs = self.net(inputs)
+
+        outputs_avg = outputs.view(ncrops, -1).mean(0)  # avg over crops
+
+        score = F.softmax(outputs_avg)
+        _, predicted = torch.max(outputs_avg.data, 0)
+
+        return score, predicted
diff --git a/modelscope/models/cv/facial_expression_recognition/fer/transforms.py b/modelscope/models/cv/facial_expression_recognition/fer/transforms.py
new file mode 100644
index 0000000..663ee55
--- /dev/null
+++ b/modelscope/models/cv/facial_expression_recognition/fer/transforms.py
@@ -0,0 +1,114 @@
+# The implementation is based on Facial-Expression-Recognition, available at
+# https://github.com/WuJie1010/Facial-Expression-Recognition.Pytorch
+import numbers
+import types
+
+import numpy as np
+import torch
+from PIL import Image
+
+
+def to_tensor(pic):
+
+    # handle PIL Image
+    if pic.mode == 'I':
+        img = torch.from_numpy(np.array(pic, np.int32, copy=False))
+    elif pic.mode == 'I;16':
+        img = torch.from_numpy(np.array(pic, np.int16, copy=False))
+    else:
+        img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
+    # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
+    if pic.mode == 'YCbCr':
+        nchannel = 3
+    elif pic.mode == 'I;16':
+        nchannel = 1
+    else:
+        nchannel = len(pic.mode)
+    img = img.view(pic.size[1], pic.size[0], nchannel)
+    # put it from HWC to CHW format
+    # yikes, this transpose takes 80% of the loading time/CPU
+    img = img.transpose(0, 1).transpose(0, 2).contiguous()
+    if isinstance(img, torch.ByteTensor):
+        return img.float().div(255)
+    else:
+        return img
+
+
+def center_crop(img, output_size):
+    if isinstance(output_size, numbers.Number):
+        output_size = (int(output_size), int(output_size))
+    w, h = img.size
+    th, tw = output_size
+    i = int(round((h - th) / 2.))
+    j = int(round((w - tw) / 2.))
+    return img.crop((j, i, j + tw, i + th))
+
+
+def five_crop(img, size):
+    if isinstance(size, numbers.Number):
+        size = (int(size), int(size))
+    else:
+        assert len(
+            size) == 2, 'Please provide only two dimensions (h, w) for size.'
+
+    w, h = img.size
+    crop_h, crop_w = size
+    if crop_w > w or crop_h > h:
+        raise ValueError(
+            'Requested crop size {} is bigger than input size {}'.format(
+                size, (h, w)))
+    tl = img.crop((0, 0, crop_w, crop_h))
+    tr = img.crop((w - crop_w, 0, w, crop_h))
+    bl = img.crop((0, h - crop_h, crop_w, h))
+    br = img.crop((w - crop_w, h - crop_h, w, h))
+    center = center_crop(img, (crop_h, crop_w))
+    return (tl, tr, bl, br, center)
+
+
+class TenCrop(object):
+    def __init__(self, size, vertical_flip=False):
+        self.size = size
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        else:
+            assert len(
+                size
+            ) == 2, 'Please provide only two dimensions (h, w) for size.'
+            self.size = size
+        self.vertical_flip = vertical_flip
+
+    def __call__(self, img):
+        first_five = five_crop(img, self.size)
+
+        if self.vertical_flip:
+            img = img.transpose(Image.FLIP_TOP_BOTTOM)
+        else:
+            img = img.transpose(Image.FLIP_LEFT_RIGHT)
+
+        second_five = five_crop(img, self.size)
+
+        return first_five + second_five
+
+
+class Compose(object):
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, img):
+        for t in self.transforms:
+            img = t(img)
+        return img
+
+
+class ToTensor(object):
+    def __call__(self, pic):
+        return to_tensor(pic)
+
+
+class Lambda(object):
+    def __init__(self, lambd):
+        assert isinstance(lambd, types.LambdaType)
+        self.lambd = lambd
+
+    def __call__(self, img):
+        return self.lambd(img)
diff --git a/modelscope/models/cv/facial_expression_recognition/fer/vgg.py b/modelscope/models/cv/facial_expression_recognition/fer/vgg.py
new file mode 100644
index 0000000..f739bea
--- /dev/null
+++ b/modelscope/models/cv/facial_expression_recognition/fer/vgg.py
@@ -0,0 +1,39 @@
+# The implementation is based on Facial-Expression-Recognition, available at
+# https://github.com/WuJie1010/Facial-Expression-Recognition.Pytorch
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+
+from modelscope.utils.config import Config
+
+
+class VGG(nn.Module):
+    def __init__(self, vgg_name, cfg_path):
+        super(VGG, self).__init__()
+        model_cfg = Config.from_file(cfg_path)['models']
+        self.features = self._make_layers(model_cfg[vgg_name])
+        self.classifier = nn.Linear(512, 7)
+
+    def forward(self, x):
+        out = self.features(x)
+        out = out.view(out.size(0), -1)
+        out = F.dropout(out, p=0.5, training=self.training)
+        out = self.classifier(out)
+        return out
+
+    def _make_layers(self, cfg):
+        layers = []
+        in_channels = 3
+        for x in cfg:
+            if x == 'M':
+                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
+            else:
+                layers += [
+                    nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
+                    nn.BatchNorm2d(x),
+                    nn.ReLU(inplace=True)
+                ]
+                in_channels = x
+        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
+        return nn.Sequential(*layers)
diff --git a/modelscope/models/cv/facial_landmark_confidence/__init__.py b/modelscope/models/cv/facial_landmark_confidence/__init__.py
new file mode 100644
index 0000000..594e9ae
--- /dev/null
+++ b/modelscope/models/cv/facial_landmark_confidence/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .flc import FacialLandmarkConfidence
+
+else:
+    _import_structure = {'flc': ['FacialLandmarkConfidence']}
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/models/cv/facial_landmark_confidence/flc/__init__.py b/modelscope/models/cv/facial_landmark_confidence/flc/__init__.py
new file mode 100644
index 0000000..eaf7e3e
--- /dev/null
+++ b/modelscope/models/cv/facial_landmark_confidence/flc/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .facial_landmark_confidence import FacialLandmarkConfidence
diff --git a/modelscope/models/cv/facial_landmark_confidence/flc/facial_landmark_confidence.py b/modelscope/models/cv/facial_landmark_confidence/flc/facial_landmark_confidence.py
new file mode 100644
index 0000000..9065f3e
--- /dev/null
+++ b/modelscope/models/cv/facial_landmark_confidence/flc/facial_landmark_confidence.py
@@ -0,0 +1,93 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+
+import cv2
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+import torch.nn.functional as F
+from PIL import Image
+from torch.autograd import Variable
+
+from modelscope.metainfo import Models
+from modelscope.models.base import Tensor, TorchModel
+from modelscope.models.builder import MODELS
+from modelscope.utils.constant import ModelFile, Tasks
+
+from .manual_landmark_net import LandmarkConfidence
+
+
+@MODELS.register_module(Tasks.face_2d_keypoints, module_name=Models.flc)
+class FacialLandmarkConfidence(TorchModel):
+    def __init__(self, model_path, device='cuda'):
+        super().__init__(model_path)
+        cudnn.benchmark = True
+        self.model_path = model_path
+        self.device = device
+        self.cfg_path = model_path.replace(ModelFile.TORCH_MODEL_FILE,
+                                           ModelFile.CONFIGURATION)
+        self.landmark_count = 5
+        self.net = LandmarkConfidence(landmark_count=self.landmark_count)
+        self.load_model()
+        self.net = self.net.to(device)
+
+    def load_model(self, load_to_cpu=False):
+        pretrained_dict = torch.load(
+            self.model_path, map_location=torch.device('cpu'))['state_dict']
+        pretrained_dict['rp_net.binary_cls.weight'] = 32.0 * F.normalize(
+            pretrained_dict['rp_net.binary_cls.weight'], dim=1).t()
+        self.net.load_state_dict(pretrained_dict, strict=True)
+        self.net.eval()
+
+    def forward(self, input):
+        img_org = input['orig_img']
+        bbox = input['bbox']
+        img_org = img_org.cpu().numpy()
+
+        image_height = img_org.shape[0]
+        image_width = img_org.shape[1]
+        x1 = max(0, int(bbox[0]))
+        y1 = max(0, int(bbox[1]))
+        x2 = min(image_width, int(bbox[2]))
+        y2 = min(image_height, int(bbox[3]))
+        box_w = x2 - x1 + 1
+        box_h = y2 - y1 + 1
+        if box_h > box_w:
+            delta = box_h - box_w
+            dy = edy = 0
+            dx = delta // 2
+            edx = delta - dx
+        else:
+            dx = edx = 0
+            delta = box_w - box_h
+            dy = delta // 2
+            edy = delta - dy
+
+        cv_img = img_org[y1:y2, x1:x2]
+        if dx > 0 or dy > 0 or edx > 0 or edy > 0:
+            cv_img = cv2.copyMakeBorder(cv_img, dy, edy, dx, edx,
+                                        cv2.BORDER_CONSTANT, 0)
+        inter_x = cv_img.shape[1]
+        inter_y = cv_img.shape[0]
+
+        cv_img = cv2.resize(cv_img, (120, 120))
+
+        cv_img = cv_img.transpose((2, 0, 1))
+
+        input_blob = torch.from_numpy(cv_img[np.newaxis, :, :, :].astype(
+            np.float32))
+
+        tmp_conf_lms, tmp_feat, tmp_conf_resp, tmp_nose = self.net(
+            input_blob.to(self.device))
+        conf_lms = tmp_conf_lms.cpu().numpy().squeeze()
+        feat = tmp_feat.cpu().numpy().squeeze()
+
+        pts5pt = []
+        for i in range(feat.shape[0]):
+            if i < self.landmark_count:
+                pts5pt.append(feat[i] * inter_x - dx + x1)
+            else:
+                pts5pt.append(feat[i] * inter_y - dy + y1)
+
+        lm5pt = np.array(pts5pt).reshape(2, 5).T
+        return lm5pt, conf_lms
diff --git a/modelscope/models/cv/facial_landmark_confidence/flc/manual_landmark_net.py b/modelscope/models/cv/facial_landmark_confidence/flc/manual_landmark_net.py
new file mode 100644
index 0000000..7a65ddf
--- /dev/null
+++ b/modelscope/models/cv/facial_landmark_confidence/flc/manual_landmark_net.py
@@ -0,0 +1,146 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import math
+
+import torch
+import torch.nn.functional as F
+from torch.nn import (AdaptiveAvgPool2d, BatchNorm2d, Conv2d, Linear,
+                      MaxPool2d, Module, Parameter, ReLU, Sequential)
+
+
+class LandmarkConfidence(Module):
+    def __init__(self, landmark_count=5):
+        super(LandmarkConfidence, self).__init__()
+        self.landmark_net = LandmarkNetD(landmark_count)
+        self.landmark_net.eval()
+        self.cls_net = ClassNet()
+        self.cls_net.eval()
+        self.rp_net = RespiratorNet()
+
+    def forward(self, x):
+        feat, nose_feat, lms = self.landmark_net(x)
+        cls_respirator, nose = self.rp_net(feat, nose_feat)
+        confidence = self.cls_net(feat)
+        return confidence, lms, cls_respirator, nose
+
+
+class FC(Module):
+    def __init__(self, feat_dim=256, num_class=2):
+        super(FC, self).__init__()
+        self.weight = Parameter(
+            torch.zeros(num_class, feat_dim, dtype=torch.float32))
+
+    def forward(self, x):
+        cos_theta = F.linear(x, self.weight)
+        return F.softmax(cos_theta, dim=1)
+
+
+class Flatten(Module):
+    def forward(self, x):
+        return torch.flatten(x, 1)
+
+
+class RespiratorNet(Module):
+    def __init__(self):
+        super(RespiratorNet, self).__init__()
+        self.conv1 = Sequential(Conv2d(48, 48, 3, 2, 1), BatchNorm2d(48),
+                                ReLU(True))
+        self.conv2 = AdaptiveAvgPool2d(
+            (1, 1)
+        )  # Sequential(Conv2d(48, 48, 5, 1, 0), BatchNorm2d(48), ReLU(True))
+        self.binary_cls = FC(feat_dim=48, num_class=2)
+        self.nose_layer = Sequential(Conv2d(48, 64, 3, 1, 0), BatchNorm2d(64),
+                                     ReLU(True), Conv2d(64, 64, 3, 1, 0),
+                                     BatchNorm2d(64), ReLU(True), Flatten(),
+                                     Linear(64, 96), ReLU(True), Linear(96, 6))
+
+    def train(self, mode=True):
+        self.conv1.train(mode)
+        self.conv2.train(mode)
+        # self.nose_feat.train(mode)
+        self.nose_layer.train(mode)
+        self.binary_cls.train(mode)
+
+    def forward(self, x, y):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        cls = self.binary_cls(torch.flatten(x, 1))
+        # loc = self.nose_feat(y)
+        loc = self.nose_layer(y)
+        return cls, loc
+
+
+class ClassNet(Module):
+    def __init__(self):
+        super(ClassNet, self).__init__()
+        self.conv1 = Sequential(Conv2d(48, 48, 3, 1, 1), BatchNorm2d(48),
+                                ReLU(True))
+        self.conv2 = Sequential(Conv2d(48, 54, 3, 2, 1), BatchNorm2d(54),
+                                ReLU(True))
+        self.conv3 = Sequential(Conv2d(54, 54, 5, 1, 0), BatchNorm2d(54),
+                                ReLU(True))
+        self.fc1 = Sequential(Flatten(), Linear(54, 54), ReLU(True))
+        self.fc2 = Linear(54, 1)
+
+    def forward(self, x):
+        y = self.conv1(x)
+        y = self.conv2(y)
+        y = self.conv3(y)
+        y = self.fc1(y)
+        y = self.fc2(y)
+        return y
+
+
+class LandmarkNetD(Module):
+    def __init__(self, landmark_count=5):
+        super(LandmarkNetD, self).__init__()
+        self.conv_pre = Sequential(Conv2d(3, 16, 5, 2, 0), BatchNorm2d(16),
+                                   ReLU(True))
+        self.pool_pre = MaxPool2d(2, 2)  # output is 29
+
+        self.conv1 = Sequential(Conv2d(16, 32, 3, 1, 1), BatchNorm2d(32),
+                                ReLU(True), Conv2d(32, 32, 3, 1, 1),
+                                BatchNorm2d(32), ReLU(True))
+        self.pool1 = MaxPool2d(2, 2)  # 14
+
+        self.conv2 = Sequential(Conv2d(32, 48, 3, 1, 0), BatchNorm2d(48),
+                                ReLU(True), Conv2d(48, 48, 3, 1, 0),
+                                BatchNorm2d(48), ReLU(True))
+        self.pool2 = MaxPool2d(2, 2)  # 5
+
+        self.conv3 = Sequential(Conv2d(48, 80, 3, 1, 0), BatchNorm2d(80),
+                                ReLU(True), Conv2d(80, 80, 3, 1, 0),
+                                BatchNorm2d(80), ReLU(True))
+
+        self.fc1 = Sequential(Linear(80, 128), ReLU(True))
+        self.fc2 = Sequential(Linear(128, 128), ReLU(True))
+
+        self.output = Linear(128, landmark_count * 2)
+
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, Linear):
+                n = m.weight.size(1)
+                m.weight.data.normal_(0, 0.01)
+                m.bias.data.zero_()
+
+    def forward(self, x):
+        y = self.conv_pre(x)
+        y = self.pool_pre(y)
+        y = self.conv1(y)
+        y = self.pool1(y[:, :, :28, :28])
+        feat = self.conv2(y)
+        y2 = self.pool2(feat)
+        y = self.conv3(y2)
+        y = torch.flatten(y, 1)
+        y = self.fc1(y)
+        y = self.fc2(y)
+        y = self.output(y)
+        return feat, y2, y
diff --git a/modelscope/msdatasets/__init__.py b/modelscope/msdatasets/__init__.py
new file mode 100644
index 0000000..073f939
--- /dev/null
+++ b/modelscope/msdatasets/__init__.py
@@ -0,0 +1,3 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from . import cv
+from .ms_dataset import MsDataset
diff --git a/modelscope/msdatasets/auth/__init__.py b/modelscope/msdatasets/auth/__init__.py
new file mode 100644
index 0000000..b937315
--- /dev/null
+++ b/modelscope/msdatasets/auth/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
diff --git a/modelscope/msdatasets/auth/auth_config.py b/modelscope/msdatasets/auth/auth_config.py
new file mode 100644
index 0000000..213f2a6
--- /dev/null
+++ b/modelscope/msdatasets/auth/auth_config.py
@@ -0,0 +1,33 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from http.cookiejar import CookieJar
+from typing import Tuple
+
+
+class BaseAuthConfig(object):
+    """Base authorization config class."""
+    def __init__(self, cookies: CookieJar, git_token: str,
+                 user_info: Tuple[str, str]):
+        self.cookies = cookies
+        self.git_token = git_token
+        self.user_info = user_info
+
+
+class OssAuthConfig(BaseAuthConfig):
+    """The authorization config for oss dataset."""
+    def __init__(self, cookies: CookieJar, git_token: str,
+                 user_info: Tuple[str, str]):
+        super().__init__(cookies=cookies,
+                         git_token=git_token,
+                         user_info=user_info)
+
+
+class MaxComputeAuthConfig(BaseAuthConfig):
+    # TODO: MaxCompute dataset to be supported.
+    def __init__(self, cookies: CookieJar, git_token: str,
+                 user_info: Tuple[str, str]):
+        super().__init__(cookies=cookies,
+                         git_token=git_token,
+                         user_info=user_info)
+
+        self.max_compute_grant_cmd = None
diff --git a/modelscope/msdatasets/context/__init__.py b/modelscope/msdatasets/context/__init__.py
new file mode 100644
index 0000000..b937315
--- /dev/null
+++ b/modelscope/msdatasets/context/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
diff --git a/modelscope/msdatasets/context/dataset_context_config.py b/modelscope/msdatasets/context/dataset_context_config.py
new file mode 100644
index 0000000..b7daa08
--- /dev/null
+++ b/modelscope/msdatasets/context/dataset_context_config.py
@@ -0,0 +1,99 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from typing import Mapping, Sequence, Union
+
+from modelscope.msdatasets.auth.auth_config import BaseAuthConfig
+from modelscope.msdatasets.download.download_config import DataDownloadConfig
+from modelscope.msdatasets.meta.data_meta_config import DataMetaConfig
+from modelscope.utils.constant import DownloadMode, Hubs
+
+
+class DatasetContextConfig:
+    """Context configuration of dataset."""
+    def __init__(self, dataset_name: Union[str, list], namespace: str,
+                 version: str, subset_name: str, split: Union[str, list],
+                 target: str, hub: Hubs, data_dir: str,
+                 data_files: Union[str, Sequence[str],
+                                   Mapping[str, Union[str, Sequence[str]]]],
+                 download_mode: DownloadMode, cache_root_dir: str,
+                 use_streaming: bool, **kwargs):
+
+        self._download_config = None
+        self._data_meta_config = None
+        self._config_kwargs = kwargs
+        self._dataset_version_cache_root_dir = None
+        self._auth_config = None
+
+        # The lock file path for meta-files and data-files
+        self._global_meta_lock_file_path = None
+        self._global_data_lock_file_path = None
+
+        # General arguments for dataset
+        self.hub = hub
+        self.download_mode = download_mode
+        self.dataset_name = dataset_name
+        self.namespace = namespace
+        self.version = version
+        self.subset_name = subset_name
+        self.split = split
+        self.target = target
+        self.data_dir = data_dir
+        self.data_files = data_files
+        self.cache_root_dir = cache_root_dir
+        self.use_streaming = use_streaming
+
+    @property
+    def config_kwargs(self) -> dict:
+        return self._config_kwargs
+
+    @config_kwargs.setter
+    def config_kwargs(self, val: dict):
+        self._config_kwargs = val
+
+    @property
+    def download_config(self) -> DataDownloadConfig:
+        return self._download_config
+
+    @download_config.setter
+    def download_config(self, val: DataDownloadConfig):
+        self._download_config = val
+
+    @property
+    def data_meta_config(self) -> DataMetaConfig:
+        return self._data_meta_config
+
+    @data_meta_config.setter
+    def data_meta_config(self, val: DataMetaConfig):
+        self._data_meta_config = val
+
+    @property
+    def dataset_version_cache_root_dir(self) -> str:
+        return self._dataset_version_cache_root_dir
+
+    @dataset_version_cache_root_dir.setter
+    def dataset_version_cache_root_dir(self, val: str):
+        self._dataset_version_cache_root_dir = val
+
+    @property
+    def global_meta_lock_file_path(self) -> str:
+        return self._global_meta_lock_file_path
+
+    @global_meta_lock_file_path.setter
+    def global_meta_lock_file_path(self, val: str):
+        self._global_meta_lock_file_path = val
+
+    @property
+    def global_data_lock_file_path(self) -> str:
+        return self._global_data_lock_file_path
+
+    @global_data_lock_file_path.setter
+    def global_data_lock_file_path(self, val: str):
+        self._global_data_lock_file_path = val
+
+    @property
+    def auth_config(self) -> BaseAuthConfig:
+        return self._auth_config
+
+    @auth_config.setter
+    def auth_config(self, val: BaseAuthConfig):
+        self._auth_config = val
diff --git a/modelscope/msdatasets/cv/__init__.py b/modelscope/msdatasets/cv/__init__.py
new file mode 100644
index 0000000..fad91bc
--- /dev/null
+++ b/modelscope/msdatasets/cv/__init__.py
@@ -0,0 +1,3 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from . import (image_classification, image_semantic_segmentation,
+               object_detection)
diff --git a/modelscope/msdatasets/cv/easycv_base.py b/modelscope/msdatasets/cv/easycv_base.py
new file mode 100644
index 0000000..7b6df6e
--- /dev/null
+++ b/modelscope/msdatasets/cv/easycv_base.py
@@ -0,0 +1,41 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+
+
+class EasyCVBaseDataset(object):
+    """Adapt to MSDataset.
+
+    Args:
+        split_config (dict): Dataset root path from MSDataset, e.g.
+            {"train":"local cache path"} or {"evaluation":"local cache path"}.
+        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
+            the model if supplied. Not support yet.
+        mode: Training or Evaluation.
+    """
+    DATA_ROOT_PATTERN = '${data_root}'
+
+    def __init__(self,
+                 split_config=None,
+                 preprocessor=None,
+                 mode=None,
+                 args=(),
+                 kwargs={}) -> None:
+        self.split_config = split_config
+        self.preprocessor = preprocessor
+        self.mode = mode
+        if self.split_config is not None:
+            self._update_data_source(kwargs['data_source'])
+
+    def _update_data_root(self, input_dict, data_root):
+        for k, v in input_dict.items():
+            if isinstance(v, str) and self.DATA_ROOT_PATTERN in v:
+                input_dict.update(
+                    {k: v.replace(self.DATA_ROOT_PATTERN, data_root)})
+            elif isinstance(v, dict):
+                self._update_data_root(v, data_root)
+
+    def _update_data_source(self, data_source):
+        data_root = next(iter(self.split_config.values()))
+        data_root = data_root.rstrip(osp.sep)
+
+        self._update_data_root(data_source, data_root)
diff --git a/modelscope/msdatasets/cv/face_2d_keypoins/__init__.py b/modelscope/msdatasets/cv/face_2d_keypoins/__init__.py
new file mode 100644
index 0000000..e9d76b7
--- /dev/null
+++ b/modelscope/msdatasets/cv/face_2d_keypoins/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .face_2d_keypoints_dataset import FaceKeypointDataset
+
+else:
+    _import_structure = {'face_2d_keypoints_dataset': ['FaceKeypointDataset']}
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/msdatasets/cv/face_2d_keypoins/face_2d_keypoints_dataset.py b/modelscope/msdatasets/cv/face_2d_keypoins/face_2d_keypoints_dataset.py
new file mode 100644
index 0000000..d26ff05
--- /dev/null
+++ b/modelscope/msdatasets/cv/face_2d_keypoins/face_2d_keypoints_dataset.py
@@ -0,0 +1,34 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from easycv.datasets.face import FaceKeypointDataset as _FaceKeypointDataset
+
+from modelscope.metainfo import Datasets
+from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset
+from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
+from modelscope.utils.constant import Tasks
+
+
+@TASK_DATASETS.register_module(group_key=Tasks.face_2d_keypoints,
+                               module_name=Datasets.Face2dKeypointsDataset)
+class FaceKeypointDataset(EasyCVBaseDataset, _FaceKeypointDataset):
+    """EasyCV dataset for face 2d keypoints.
+
+    Args:
+        split_config (dict): Dataset root path from MSDataset, e.g.
+            {"train":"local cache path"} or {"evaluation":"local cache path"}.
+        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
+            the model if supplied. Not support yet.
+        mode: Training or Evaluation.
+    """
+    def __init__(self,
+                 split_config=None,
+                 preprocessor=None,
+                 mode=None,
+                 *args,
+                 **kwargs) -> None:
+        EasyCVBaseDataset.__init__(self,
+                                   split_config=split_config,
+                                   preprocessor=preprocessor,
+                                   mode=mode,
+                                   args=args,
+                                   kwargs=kwargs)
+        _FaceKeypointDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/cv/hand_2d_keypoints/__init__.py b/modelscope/msdatasets/cv/hand_2d_keypoints/__init__.py
new file mode 100644
index 0000000..5c1c72c
--- /dev/null
+++ b/modelscope/msdatasets/cv/hand_2d_keypoints/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .hand_2d_keypoints_dataset import Hand2DKeypointDataset
+
+else:
+    _import_structure = {
+        'hand_2d_keypoints_dataset': ['Hand2DKeypointDataset']
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/msdatasets/cv/hand_2d_keypoints/hand_2d_keypoints_dataset.py b/modelscope/msdatasets/cv/hand_2d_keypoints/hand_2d_keypoints_dataset.py
new file mode 100644
index 0000000..85cf0d4
--- /dev/null
+++ b/modelscope/msdatasets/cv/hand_2d_keypoints/hand_2d_keypoints_dataset.py
@@ -0,0 +1,35 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from easycv.datasets.pose import \
+    HandCocoWholeBodyDataset as _HandCocoWholeBodyDataset
+
+from modelscope.metainfo import Datasets
+from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset
+from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
+from modelscope.utils.constant import Tasks
+
+
+@TASK_DATASETS.register_module(group_key=Tasks.hand_2d_keypoints,
+                               module_name=Datasets.HandCocoWholeBodyDataset)
+class HandCocoWholeBodyDataset(EasyCVBaseDataset, _HandCocoWholeBodyDataset):
+    """EasyCV dataset for human hand 2d keypoints.
+
+    Args:
+        split_config (dict): Dataset root path from MSDataset, e.g.
+            {"train":"local cache path"} or {"evaluation":"local cache path"}.
+        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
+            the model if supplied. Not support yet.
+        mode: Training or Evaluation.
+    """
+    def __init__(self,
+                 split_config=None,
+                 preprocessor=None,
+                 mode=None,
+                 *args,
+                 **kwargs) -> None:
+        EasyCVBaseDataset.__init__(self,
+                                   split_config=split_config,
+                                   preprocessor=preprocessor,
+                                   mode=mode,
+                                   args=args,
+                                   kwargs=kwargs)
+        _HandCocoWholeBodyDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/cv/human_wholebody_keypoint/__init__.py b/modelscope/msdatasets/cv/human_wholebody_keypoint/__init__.py
new file mode 100644
index 0000000..472ed2d
--- /dev/null
+++ b/modelscope/msdatasets/cv/human_wholebody_keypoint/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .human_wholebody_keypoint_dataset import WholeBodyCocoTopDownDataset
+
+else:
+    _import_structure = {
+        'human_wholebody_keypoint_dataset': ['WholeBodyCocoTopDownDataset']
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/msdatasets/cv/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py b/modelscope/msdatasets/cv/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py
new file mode 100644
index 0000000..31bca1d
--- /dev/null
+++ b/modelscope/msdatasets/cv/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py
@@ -0,0 +1,37 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from easycv.datasets.pose import \
+    WholeBodyCocoTopDownDataset as _WholeBodyCocoTopDownDataset
+
+from modelscope.metainfo import Datasets
+from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset
+from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
+from modelscope.utils.constant import Tasks
+
+
+@TASK_DATASETS.register_module(
+    group_key=Tasks.human_wholebody_keypoint,
+    module_name=Datasets.HumanWholeBodyKeypointDataset)
+class WholeBodyCocoTopDownDataset(EasyCVBaseDataset,
+                                  _WholeBodyCocoTopDownDataset):
+    """EasyCV dataset for human whole body 2d keypoints.
+
+    Args:
+        split_config (dict): Dataset root path from MSDataset, e.g.
+            {"train":"local cache path"} or {"evaluation":"local cache path"}.
+        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
+            the model if supplied. Not support yet.
+        mode: Training or Evaluation.
+    """
+    def __init__(self,
+                 split_config=None,
+                 preprocessor=None,
+                 mode=None,
+                 *args,
+                 **kwargs) -> None:
+        EasyCVBaseDataset.__init__(self,
+                                   split_config=split_config,
+                                   preprocessor=preprocessor,
+                                   mode=mode,
+                                   args=args,
+                                   kwargs=kwargs)
+        _WholeBodyCocoTopDownDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/cv/image_classification/__init__.py b/modelscope/msdatasets/cv/image_classification/__init__.py
new file mode 100644
index 0000000..95e8d7a
--- /dev/null
+++ b/modelscope/msdatasets/cv/image_classification/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .classification_dataset import ClsDataset
+
+else:
+    _import_structure = {'classification_dataset': ['ClsDataset']}
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/msdatasets/cv/image_classification/classification_dataset.py b/modelscope/msdatasets/cv/image_classification/classification_dataset.py
new file mode 100644
index 0000000..7fcf17d
--- /dev/null
+++ b/modelscope/msdatasets/cv/image_classification/classification_dataset.py
@@ -0,0 +1,34 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from easycv.datasets.classification import ClsDataset as _ClsDataset
+
+from modelscope.metainfo import Datasets
+from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset
+from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
+from modelscope.utils.constant import Tasks
+
+
+@TASK_DATASETS.register_module(group_key=Tasks.image_classification,
+                               module_name=Datasets.ClsDataset)
+class ClsDataset(_ClsDataset):
+    """EasyCV dataset for classification.
+
+    Args:
+        split_config (dict): Dataset root path from MSDataset, e.g.
+            {"train":"local cache path"} or {"evaluation":"local cache path"}.
+        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
+            the model if supplied. Not support yet.
+        mode: Training or Evaluation.
+    """
+    def __init__(self,
+                 split_config=None,
+                 preprocessor=None,
+                 mode=None,
+                 *args,
+                 **kwargs) -> None:
+        EasyCVBaseDataset.__init__(self,
+                                   split_config=split_config,
+                                   preprocessor=preprocessor,
+                                   mode=mode,
+                                   args=args,
+                                   kwargs=kwargs)
+        _ClsDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/cv/image_semantic_segmentation/__init__.py b/modelscope/msdatasets/cv/image_semantic_segmentation/__init__.py
new file mode 100644
index 0000000..26121bd
--- /dev/null
+++ b/modelscope/msdatasets/cv/image_semantic_segmentation/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .segmentation_dataset import SegDataset
+
+else:
+    _import_structure = {'easycv_segmentation': ['SegDataset']}
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/msdatasets/cv/image_semantic_segmentation/segmentation_dataset.py b/modelscope/msdatasets/cv/image_semantic_segmentation/segmentation_dataset.py
new file mode 100644
index 0000000..6345ae4
--- /dev/null
+++ b/modelscope/msdatasets/cv/image_semantic_segmentation/segmentation_dataset.py
@@ -0,0 +1,40 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from easycv.datasets.segmentation import SegDataset as _SegDataset
+
+from modelscope.metainfo import Datasets
+from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset
+from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
+from modelscope.utils.constant import Tasks
+
+
+@TASK_DATASETS.register_module(group_key=Tasks.image_segmentation,
+                               module_name=Datasets.SegDataset)
+class SegDataset(EasyCVBaseDataset, _SegDataset):
+    """EasyCV dataset for Sementic segmentation.
+    For more details, please refer to :
+    https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/segmentation/raw.py .
+
+    Args:
+        split_config (dict): Dataset root path from MSDataset, e.g.
+            {"train":"local cache path"} or {"evaluation":"local cache path"}.
+        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
+            the model if supplied. Not support yet.
+        mode: Training or Evaluation.
+        data_source: Data source config to parse input data.
+        pipeline: Sequence of transform object or config dict to be composed.
+        ignore_index (int): Label index to be ignored.
+        profiling: If set True, will print transform time.
+    """
+    def __init__(self,
+                 split_config=None,
+                 preprocessor=None,
+                 mode=None,
+                 *args,
+                 **kwargs) -> None:
+        EasyCVBaseDataset.__init__(self,
+                                   split_config=split_config,
+                                   preprocessor=preprocessor,
+                                   mode=mode,
+                                   args=args,
+                                   kwargs=kwargs)
+        _SegDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/cv/object_detection/__init__.py b/modelscope/msdatasets/cv/object_detection/__init__.py
new file mode 100644
index 0000000..403163e
--- /dev/null
+++ b/modelscope/msdatasets/cv/object_detection/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .detection_dataset import DetDataset, DetImagesMixDataset
+
+else:
+    _import_structure = {
+        'detection_dataset': ['DetDataset', 'DetImagesMixDataset']
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/msdatasets/cv/object_detection/detection_dataset.py b/modelscope/msdatasets/cv/object_detection/detection_dataset.py
new file mode 100644
index 0000000..8d6e807
--- /dev/null
+++ b/modelscope/msdatasets/cv/object_detection/detection_dataset.py
@@ -0,0 +1,92 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+
+from easycv.datasets.detection import DetDataset as _DetDataset
+from easycv.datasets.detection import \
+    DetImagesMixDataset as _DetImagesMixDataset
+
+from modelscope.metainfo import Datasets
+from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset
+from modelscope.msdatasets.task_datasets import TASK_DATASETS
+from modelscope.utils.constant import Tasks
+
+
+@TASK_DATASETS.register_module(group_key=Tasks.image_object_detection,
+                               module_name=Datasets.DetDataset)
+@TASK_DATASETS.register_module(group_key=Tasks.image_segmentation,
+                               module_name=Datasets.DetDataset)
+class DetDataset(EasyCVBaseDataset, _DetDataset):
+    """EasyCV dataset for object detection.
+    For more details, please refer to https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/detection/raw.py .
+
+    Args:
+        split_config (dict): Dataset root path from MSDataset, e.g.
+            {"train":"local cache path"} or {"evaluation":"local cache path"}.
+        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
+            the model if supplied. Not support yet.
+        mode: Training or Evaluation.
+        data_source: Data source config to parse input data.
+        pipeline: Transform config list
+        profiling: If set True, will print pipeline time
+        classes: A list of class names, used in evaluation for result and groundtruth visualization
+    """
+    def __init__(self,
+                 split_config=None,
+                 preprocessor=None,
+                 mode=None,
+                 *args,
+                 **kwargs) -> None:
+        EasyCVBaseDataset.__init__(self,
+                                   split_config=split_config,
+                                   preprocessor=preprocessor,
+                                   mode=mode,
+                                   args=args,
+                                   kwargs=kwargs)
+        _DetDataset.__init__(self, *args, **kwargs)
+
+
+@TASK_DATASETS.register_module(group_key=Tasks.image_object_detection,
+                               module_name=Datasets.DetImagesMixDataset)
+@TASK_DATASETS.register_module(
+    group_key=Tasks.domain_specific_object_detection,
+    module_name=Datasets.DetImagesMixDataset)
+class DetImagesMixDataset(EasyCVBaseDataset, _DetImagesMixDataset):
+    """EasyCV dataset for object detection, a wrapper of multiple images mixed dataset.
+    Suitable for training on multiple images mixed data augmentation like
+    mosaic and mixup. For the augmentation pipeline of mixed image data,
+    the `get_indexes` method needs to be provided to obtain the image
+    indexes, and you can set `skip_flags` to change the pipeline running
+    process. At the same time, we provide the `dynamic_scale` parameter
+    to dynamically change the output image size.
+    output boxes format: cx, cy, w, h
+
+    For more details, please refer to https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/detection/mix.py .
+
+    Args:
+        split_config (dict): Dataset root path from MSDataset, e.g.
+            {"train":"local cache path"} or {"evaluation":"local cache path"}.
+        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
+            the model if supplied. Not support yet.
+        mode: Training or Evaluation.
+        data_source (:obj:`DetSourceCoco`): Data source config to parse input data.
+        pipeline (Sequence[dict]): Sequence of transform object or
+            config dict to be composed.
+        dynamic_scale (tuple[int], optional): The image scale can be changed
+            dynamically. Default to None.
+        skip_type_keys (list[str], optional): Sequence of type string to
+            be skip pipeline. Default to None.
+        label_padding: out labeling padding [N, 120, 5]
+    """
+    def __init__(self,
+                 split_config=None,
+                 preprocessor=None,
+                 mode=None,
+                 *args,
+                 **kwargs) -> None:
+        EasyCVBaseDataset.__init__(self,
+                                   split_config=split_config,
+                                   preprocessor=preprocessor,
+                                   mode=mode,
+                                   args=args,
+                                   kwargs=kwargs)
+        _DetImagesMixDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/data_files/__init__.py b/modelscope/msdatasets/data_files/__init__.py
new file mode 100644
index 0000000..b937315
--- /dev/null
+++ b/modelscope/msdatasets/data_files/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
diff --git a/modelscope/msdatasets/data_files/data_files_manager.py b/modelscope/msdatasets/data_files/data_files_manager.py
new file mode 100644
index 0000000..88f5f1c
--- /dev/null
+++ b/modelscope/msdatasets/data_files/data_files_manager.py
@@ -0,0 +1,113 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+from typing import Union
+
+from datasets import DatasetBuilder
+
+from modelscope.hub.api import HubApi
+from modelscope.msdatasets.context.dataset_context_config import \
+    DatasetContextConfig
+from modelscope.msdatasets.download.dataset_builder import (
+    CsvDatasetBuilder, IterableDatasetBuilder, TaskSpecificDatasetBuilder)
+from modelscope.msdatasets.download.download_config import DataDownloadConfig
+from modelscope.msdatasets.download.download_manager import (
+    DataDownloadManager, DataStreamingDownloadManager)
+from modelscope.utils.constant import (DatasetPathName, DownloadMode,
+                                       MetaDataFields)
+
+
+class DataFilesManager(object):
+    """The modelscope data-files manager."""
+    def __init__(self, dataset_context_config: DatasetContextConfig):
+
+        # Get dataset config info
+        self.dataset_name = dataset_context_config.dataset_name
+        self.namespace = dataset_context_config.namespace
+        self.version = dataset_context_config.version
+        self.subset_name = dataset_context_config.subset_name
+        self.split = dataset_context_config.split
+        self.meta_data_files = dataset_context_config.data_meta_config.meta_data_files
+        self.meta_args_map = dataset_context_config.data_meta_config.meta_args_map
+        self.zip_data_files = dataset_context_config.data_meta_config.zip_data_files
+        self.download_mode = dataset_context_config.download_mode
+        self.use_streaming = dataset_context_config.use_streaming
+        self.input_config_kwargs = dataset_context_config.config_kwargs
+
+        # Get download_config
+        download_config = dataset_context_config.download_config or DataDownloadConfig(
+        )
+        download_config.dataset_name = dataset_context_config.dataset_name
+        download_config.namespace = dataset_context_config.namespace
+        download_config.version = dataset_context_config.version
+        download_config.split = dataset_context_config.split
+        download_config.cache_dir = os.path.join(
+            dataset_context_config.cache_root_dir, self.namespace,
+            self.dataset_name, self.version, DatasetPathName.DATA_FILES_NAME)
+
+        is_force_download = dataset_context_config.download_mode == DownloadMode.FORCE_REDOWNLOAD
+        download_config.force_download = bool(is_force_download)
+        download_config.force_extract = bool(is_force_download)
+        download_config.use_etag = False
+
+        # Get oss config
+        api = HubApi()
+        self.oss_config = api.get_dataset_access_config(
+            self.dataset_name, self.namespace, self.version)
+
+        # Set context. Note: no need to update context_config.
+        download_config.oss_config = self.oss_config
+        dataset_context_config.download_config = download_config
+        self.dataset_context_config = dataset_context_config
+        os.makedirs(download_config.cache_dir, exist_ok=True)
+
+    def get_data_files_builder(self) -> Union[DatasetBuilder, None]:
+        """ Build download manager. """
+
+        if self.use_streaming:
+            return IterableDatasetBuilder.get_builder_instance(
+                dataset_context_config=self.dataset_context_config)
+
+        if not self.meta_data_files:
+            return None
+
+        meta_data_file = next(iter(self.meta_data_files.values()))
+        meta_args_map_file = next(iter(self.meta_args_map.values()))
+        if meta_args_map_file is None:
+            meta_args_map_file = {}
+
+        if not meta_data_file or meta_args_map_file.get(
+                MetaDataFields.ARGS_BIG_DATA):
+            meta_args_map_file.update(self.input_config_kwargs)
+            self.dataset_context_config.data_meta_config.meta_args_map = meta_args_map_file
+
+            builder = TaskSpecificDatasetBuilder(
+                dataset_context_config=self.dataset_context_config)
+        elif meta_data_file.endswith('.csv'):
+            builder = CsvDatasetBuilder(
+                dataset_context_config=self.dataset_context_config)
+        else:
+            raise NotImplementedError(
+                f'Dataset meta file extensions "{os.path.splitext(meta_data_file)[-1]}" is not implemented yet'
+            )
+        return builder
+
+    def fetch_data_files(self, builder):
+        """ Fetch the data-files from dataset-hub. """
+
+        if self.dataset_context_config.use_streaming:
+            dl_manager = DataStreamingDownloadManager(
+                download_config=self.dataset_context_config.download_config)
+            return builder.as_streaming_dataset(dl_manager)
+        else:
+
+            self.dataset_context_config.download_config.meta_args_map = \
+                self.dataset_context_config.data_meta_config.meta_args_map
+
+            dl_manager = DataDownloadManager(
+                download_config=self.dataset_context_config.download_config)
+            builder.download_and_prepare(
+                dl_manager=dl_manager,
+                download_mode=self.download_mode.value,
+                try_from_hf_gcs=False)
+            return builder.as_dataset()
diff --git a/modelscope/msdatasets/data_loader/__init__.py b/modelscope/msdatasets/data_loader/__init__.py
new file mode 100644
index 0000000..b937315
--- /dev/null
+++ b/modelscope/msdatasets/data_loader/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
diff --git a/modelscope/msdatasets/data_loader/data_loader.py b/modelscope/msdatasets/data_loader/data_loader.py
new file mode 100644
index 0000000..1d9d50a
--- /dev/null
+++ b/modelscope/msdatasets/data_loader/data_loader.py
@@ -0,0 +1,166 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from abc import ABC, abstractmethod
+from typing import Optional, Union
+
+from datasets import (Dataset, DatasetBuilder, DatasetDict, IterableDataset,
+                      IterableDatasetDict)
+from datasets import load_dataset as hf_data_loader
+
+from modelscope.hub.api import ModelScopeConfig
+from modelscope.msdatasets.auth.auth_config import OssAuthConfig
+from modelscope.msdatasets.context.dataset_context_config import \
+    DatasetContextConfig
+from modelscope.msdatasets.data_files.data_files_manager import \
+    DataFilesManager
+from modelscope.msdatasets.meta.data_meta_manager import DataMetaManager
+from modelscope.utils.constant import DatasetFormations
+
+
+class BaseDataLoader(ABC):
+    """Base dataset loader to load data."""
+    def __init__(self, dataset_context_config: DatasetContextConfig):
+        self.dataset_context_config = dataset_context_config
+
+    @abstractmethod
+    def process(self):
+        """The entity processing pipeline for fetching the data. """
+        raise NotImplementedError(
+            f'No default implementation provided for {BaseDataLoader.__name__}.process.'
+        )
+
+    @abstractmethod
+    def _authorize(self):
+        raise NotImplementedError(
+            f'No default implementation provided for {BaseDataLoader.__name__}._authorize.'
+        )
+
+    @abstractmethod
+    def _build(self):
+        raise NotImplementedError(
+            f'No default implementation provided for {BaseDataLoader.__name__}._build.'
+        )
+
+    @abstractmethod
+    def _prepare_and_download(self):
+        raise NotImplementedError(
+            f'No default implementation provided for {BaseDataLoader.__name__}._prepare_and_download.'
+        )
+
+    @abstractmethod
+    def _post_process(self):
+        raise NotImplementedError(
+            f'No default implementation provided for {BaseDataLoader.__name__}._post_process.'
+        )
+
+
+class OssDataLoader(BaseDataLoader):
+    def __init__(self, dataset_context_config: DatasetContextConfig):
+        super().__init__(dataset_context_config)
+
+        self.data_files_builder: Optional[DataFilesManager] = None
+        self.dataset: Optional[Union[Dataset, IterableDataset, DatasetDict,
+                                     IterableDatasetDict]] = None
+        self.builder: Optional[DatasetBuilder] = None
+        self.data_files_manager: Optional[DataFilesManager] = None
+
+    def process(self) -> None:
+        """ Sequential data fetching process: authorize -> build -> prepare_and_download -> post_process,
+        to keep dataset_context_config updated. """
+
+        self._authorize()
+        self._build()
+        self._prepare_and_download()
+        self._post_process()
+
+    def _authorize(self) -> None:
+        """ Authorization of target dataset.
+        Get credentials from cache and send to the modelscope-hub in the future. """
+        # TODO: obtain credentials from loacl cache when available.
+        cookies = ModelScopeConfig.get_cookies()
+        git_token = ModelScopeConfig.get_token()
+        user_info = ModelScopeConfig.get_user_info()
+
+        if not self.dataset_context_config.auth_config:
+            auth_config = OssAuthConfig(cookies=cookies,
+                                        git_token=git_token,
+                                        user_info=user_info)
+        else:
+            auth_config = self.dataset_context_config.auth_config
+            auth_config.cookies = cookies
+            auth_config.git_token = git_token
+            auth_config.user_info = user_info
+
+        self.dataset_context_config.auth_config = auth_config
+
+    def _build(self) -> None:
+        """ Sequential data files building process: build_meta -> build_data_files , to keep context_config updated. """
+        # Build meta data
+        meta_manager = DataMetaManager(self.dataset_context_config)
+        meta_manager.fetch_meta_files()
+        meta_manager.parse_dataset_structure()
+        self.dataset_context_config = meta_manager.dataset_context_config
+
+        # Build data-files manager
+        self.data_files_manager = DataFilesManager(
+            dataset_context_config=self.dataset_context_config)
+        self.builder = self.data_files_manager.get_data_files_builder()
+
+    def _prepare_and_download(self) -> None:
+        """ Fetch data-files from modelscope dataset-hub. """
+        dataset_py_script = self.dataset_context_config.data_meta_config.dataset_py_script
+        dataset_formation = self.dataset_context_config.data_meta_config.dataset_formation
+        dataset_name = self.dataset_context_config.dataset_name
+        subset_name = self.dataset_context_config.subset_name
+        version = self.dataset_context_config.version
+        split = self.dataset_context_config.split
+        data_dir = self.dataset_context_config.data_dir
+        data_files = self.dataset_context_config.data_files
+        cache_dir = self.dataset_context_config.cache_root_dir
+        download_mode = self.dataset_context_config.download_mode
+        input_kwargs = self.dataset_context_config.config_kwargs
+
+        if self.builder is None and not dataset_py_script:
+            raise f'meta-file: {dataset_name}.py not found on the modelscope hub.'
+
+        if dataset_py_script and dataset_formation == DatasetFormations.hf_compatible:
+            self.dataset = hf_data_loader(dataset_py_script,
+                                          name=subset_name,
+                                          revision=version,
+                                          split=split,
+                                          data_dir=data_dir,
+                                          data_files=data_files,
+                                          cache_dir=cache_dir,
+                                          download_mode=download_mode.value,
+                                          ignore_verifications=True,
+                                          **input_kwargs)
+        else:
+            self.dataset = self.data_files_manager.fetch_data_files(
+                self.builder)
+
+    def _post_process(self) -> None:
+        ...
+
+
+class MaxComputeDataLoader(BaseDataLoader):
+    """Data loader for MaxCompute data source."""
+
+    # TODO: MaxCompute data source to be supported .
+    def __init__(self, dataset_context_config: DatasetContextConfig):
+        super().__init__(dataset_context_config)
+        self.dataset = None
+
+    def process(self):
+        ...
+
+    def _authorize(self):
+        ...
+
+    def _build(self):
+        ...
+
+    def _prepare_and_download(self):
+        ...
+
+    def _post_process(self):
+        ...
diff --git a/modelscope/msdatasets/data_loader/data_loader_manager.py b/modelscope/msdatasets/data_loader/data_loader_manager.py
new file mode 100644
index 0000000..d600794
--- /dev/null
+++ b/modelscope/msdatasets/data_loader/data_loader_manager.py
@@ -0,0 +1,134 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import enum
+import os
+from abc import ABC, abstractmethod
+
+from datasets import load_dataset as hf_data_loader
+
+from modelscope.hub.api import HubApi
+from modelscope.msdatasets.context.dataset_context_config import \
+    DatasetContextConfig
+from modelscope.msdatasets.data_loader.data_loader import OssDataLoader
+from modelscope.utils.constant import EXTENSIONS_TO_LOAD
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+class LocalDataLoaderType(enum.Enum):
+    """ Supported data loader types for local dataset: huggingface, PyTorch, Tensorflow """
+    HF_DATA_LOADER = 'hf_data_loader'
+    TORCH_DATA_LOADER = 'torch_data_loader'
+    TF_DATA_LOADER = 'tf_data_loader'
+
+
+class RemoteDataLoaderType(enum.Enum):
+    """ Supported data loader types for remote dataset: huggingface, modelscope """
+    HF_DATA_LOADER = 'hf_data_loader'
+    MS_DATA_LOADER = 'ms_data_loader'
+
+
+class DataLoaderManager(ABC):
+    """Data loader manager, base class."""
+    def __init__(self, dataset_context_config: DatasetContextConfig):
+        self.dataset_context_config = dataset_context_config
+
+    @abstractmethod
+    def load_dataset(self, data_loader_type: enum.Enum):
+        ...
+
+
+class LocalDataLoaderManager(DataLoaderManager):
+    """Data loader manager for loading local data."""
+    def __init__(self, dataset_context_config: DatasetContextConfig):
+        super().__init__(dataset_context_config=dataset_context_config)
+
+    def load_dataset(self, data_loader_type: enum.Enum):
+        # Get args from context
+        dataset_name = self.dataset_context_config.dataset_name
+        subset_name = self.dataset_context_config.subset_name
+        version = self.dataset_context_config.version
+        split = self.dataset_context_config.split
+        data_dir = self.dataset_context_config.data_dir
+        data_files = self.dataset_context_config.data_files
+        cache_root_dir = self.dataset_context_config.cache_root_dir
+        download_mode = self.dataset_context_config.download_mode
+        use_streaming = self.dataset_context_config.use_streaming
+        input_config_kwargs = self.dataset_context_config.config_kwargs
+
+        # load local single file
+        if os.path.isfile(dataset_name):
+            file_ext = os.path.splitext(dataset_name)[1].strip('.')
+            if file_ext in EXTENSIONS_TO_LOAD:
+                split = None
+                data_files = [dataset_name]
+                dataset_name = EXTENSIONS_TO_LOAD.get(file_ext)
+
+        # Select local data loader
+        # TODO: more loaders to be supported.
+        if data_loader_type == LocalDataLoaderType.HF_DATA_LOADER:
+            # Build huggingface data loader and return dataset.
+            return hf_data_loader(dataset_name,
+                                  name=subset_name,
+                                  revision=version,
+                                  split=split,
+                                  data_dir=data_dir,
+                                  data_files=data_files,
+                                  cache_dir=cache_root_dir,
+                                  download_mode=download_mode.value,
+                                  streaming=use_streaming,
+                                  ignore_verifications=True,
+                                  **input_config_kwargs)
+        raise f'Expected local data loader type: {LocalDataLoaderType.HF_DATA_LOADER.value}.'
+
+
+class RemoteDataLoaderManager(DataLoaderManager):
+    """Data loader manager for loading remote data."""
+    def __init__(self, dataset_context_config: DatasetContextConfig):
+        super().__init__(dataset_context_config=dataset_context_config)
+        self.api = HubApi()
+
+    def load_dataset(self, data_loader_type: enum.Enum):
+        # Get args from context
+        dataset_name = self.dataset_context_config.dataset_name
+        namespace = self.dataset_context_config.namespace
+        subset_name = self.dataset_context_config.subset_name
+        version = self.dataset_context_config.version
+        split = self.dataset_context_config.split
+        data_dir = self.dataset_context_config.data_dir
+        data_files = self.dataset_context_config.data_files
+        download_mode_val = self.dataset_context_config.download_mode.value
+        use_streaming = self.dataset_context_config.use_streaming
+        input_config_kwargs = self.dataset_context_config.config_kwargs
+
+        # To use the huggingface data loader
+        if data_loader_type == RemoteDataLoaderType.HF_DATA_LOADER:
+            dataset_ret = hf_data_loader(dataset_name,
+                                         name=subset_name,
+                                         revision=version,
+                                         split=split,
+                                         data_dir=data_dir,
+                                         data_files=data_files,
+                                         download_mode=download_mode_val,
+                                         streaming=use_streaming,
+                                         ignore_verifications=True,
+                                         **input_config_kwargs)
+            # download statistics
+            self.api.dataset_download_statistics(dataset_name=dataset_name,
+                                                 namespace=namespace,
+                                                 use_streaming=use_streaming)
+            return dataset_ret
+        # To use the modelscope data loader
+        elif data_loader_type == RemoteDataLoaderType.MS_DATA_LOADER:
+            oss_data_loader = OssDataLoader(
+                dataset_context_config=self.dataset_context_config)
+            oss_data_loader.process()
+            # download statistics
+            self.api.dataset_download_statistics(dataset_name=dataset_name,
+                                                 namespace=namespace,
+                                                 use_streaming=use_streaming)
+            return oss_data_loader.dataset
+        else:
+            raise f'Expected remote data loader type: {RemoteDataLoaderType.HF_DATA_LOADER.value}/' \
+                  f'{RemoteDataLoaderType.MS_DATA_LOADER.value}, but got {data_loader_type} .'
diff --git a/modelscope/msdatasets/dataset_cls/__init__.py b/modelscope/msdatasets/dataset_cls/__init__.py
new file mode 100644
index 0000000..b937315
--- /dev/null
+++ b/modelscope/msdatasets/dataset_cls/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
diff --git a/modelscope/msdatasets/dataset_cls/dataset.py b/modelscope/msdatasets/dataset_cls/dataset.py
new file mode 100644
index 0000000..1ec796e
--- /dev/null
+++ b/modelscope/msdatasets/dataset_cls/dataset.py
@@ -0,0 +1,99 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import copy
+import os
+
+import datasets
+from datasets import IterableDataset
+from PIL import Image
+
+from modelscope.utils.constant import EXTENSIONS_TO_LOAD
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+class ExternalDataset(object):
+    def __init__(self, split_path_dict, config_kwargs):
+        self.split_path_dict = split_path_dict
+        self.config_kwargs = copy.deepcopy(config_kwargs)
+        self.config_kwargs.update({'split_config': split_path_dict})
+        self.ext_dataset = None
+        self.split_data_files = {k: [] for k, _ in split_path_dict.items()}
+        file_ext = ''
+
+        for split_name, split_dir in split_path_dict.items():
+            if isinstance(split_dir, str) and os.path.isdir(split_dir):
+                split_file_names = os.listdir(split_dir)
+                set_files_exts = set([
+                    os.path.splitext(file_name)[-1].strip('.')
+                    for file_name in split_file_names
+                ])
+                if '' in set_files_exts:
+                    continue
+                # ensure these files have same extensions
+                if len(set_files_exts) != 1:
+                    supported_exts = ','.join(EXTENSIONS_TO_LOAD.keys())
+                    logger.error(
+                        f'Split-{split_name} has been ignored, please flatten your folder structure, '
+                        f'and make sure these files have same extensions. '
+                        f'Supported extensions: {supported_exts} .')
+                    continue
+                file_ext = list(set_files_exts)[0]
+                if file_ext not in EXTENSIONS_TO_LOAD:
+                    continue
+
+                split_file_paths = [
+                    os.path.join(split_dir, file_name)
+                    for file_name in split_file_names
+                ]
+                self.split_data_files[split_name] = split_file_paths
+
+        if file_ext:
+            file_ext = EXTENSIONS_TO_LOAD.get(file_ext)
+            self.ext_dataset = datasets.load_dataset(
+                file_ext, data_files=self.split_data_files, **config_kwargs)
+
+    def __len__(self):
+        return len(self.split_path_dict
+                   ) if not self.ext_dataset else self.ext_dataset.__len__()
+
+    def __getitem__(self, item):
+        if not self.ext_dataset:
+            return self.split_path_dict.get(item)
+        else:
+            return self.ext_dataset.__getitem__(item)
+
+    def __iter__(self):
+        if not self.ext_dataset:
+            for k, v in self.split_path_dict.items():
+                yield k, v
+        else:
+            for k, v in self.ext_dataset.items():
+                yield k, v
+
+
+class NativeIterableDataset(IterableDataset):
+    """The modelscope iterable dataset class."""
+    def __init__(self, ex_iterable, info, split):
+        super().__init__(ex_iterable=ex_iterable, info=info, split=split)
+
+    def __iter__(self):
+        for key, entity in self._iter():
+            if isinstance(entity, dict):
+                ret = {}
+                for k, v in entity.items():
+                    ret[k] = v
+                    if k.endswith(':FILE'):
+                        dl_manager = self._ex_iterable.kwargs.get('dl_manager')
+                        ex_cache_path = dl_manager.download_and_extract(v)
+                        ret[k] = ex_cache_path
+                        if k.endswith('Image:FILE'):
+                            ret[k + ':Object'] = Image.open(fp=ex_cache_path)
+                        if k.endswith('Audio:FILE'):
+                            import torchaudio
+                            waveform_and_rate = torchaudio.load(ex_cache_path)
+                            ret[k + ':Object'] = waveform_and_rate
+                entity = ret
+
+            yield entity
diff --git a/modelscope/msdatasets/download/__init__.py b/modelscope/msdatasets/download/__init__.py
new file mode 100644
index 0000000..b937315
--- /dev/null
+++ b/modelscope/msdatasets/download/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
diff --git a/modelscope/msdatasets/download/dataset_builder.py b/modelscope/msdatasets/download/dataset_builder.py
new file mode 100644
index 0000000..1aa53a5
--- /dev/null
+++ b/modelscope/msdatasets/download/dataset_builder.py
@@ -0,0 +1,395 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+from typing import Dict, Union
+
+import datasets
+import pandas as pd
+import pyarrow as pa
+from datasets import (ArrowBasedBuilder, GeneratorBasedBuilder,
+                      IterableDataset, IterableDatasetDict)
+from datasets.filesystems import is_remote_filesystem
+from datasets.info import DatasetInfo
+from datasets.naming import camelcase_to_snakecase
+from datasets.packaged_modules import csv
+from datasets.utils.filelock import FileLock
+from datasets.utils.py_utils import map_nested
+
+from modelscope.hub.api import HubApi
+from modelscope.msdatasets.context.dataset_context_config import \
+    DatasetContextConfig
+from modelscope.msdatasets.dataset_cls.dataset import (ExternalDataset,
+                                                       NativeIterableDataset)
+from modelscope.msdatasets.download.download_manager import \
+    DataStreamingDownloadManager
+from modelscope.msdatasets.utils.dataset_utils import \
+    get_subdir_hash_from_split
+from modelscope.utils.constant import (DEFAULT_DATASET_NAMESPACE,
+                                       DatasetPathName, DownloadMode)
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+DELIMITER_NAME = 'delimiter'
+DEFAULT_CSV_DELIMITER = ','
+
+
+class CsvDatasetBuilder(csv.Csv):
+    def __init__(self, dataset_context_config: DatasetContextConfig):
+        # Init config args
+        self.dataset_name = dataset_context_config.dataset_name
+        self.cache_root_dir = dataset_context_config.cache_root_dir
+        self.namespace = dataset_context_config.namespace
+        self.version = dataset_context_config.version
+        self.subset_name = dataset_context_config.subset_name
+        self.split = dataset_context_config.split
+        self.meta_data_files = dataset_context_config.data_meta_config.meta_data_files
+        self.zip_data_files = dataset_context_config.data_meta_config.zip_data_files
+        self.input_config_kwargs = dataset_context_config.config_kwargs
+
+        self.cache_build_dir = os.path.join(self.cache_root_dir,
+                                            self.namespace, self.dataset_name,
+                                            self.version,
+                                            DatasetPathName.META_NAME)
+        self.csv_delimiter = DEFAULT_CSV_DELIMITER
+        if DELIMITER_NAME in self.input_config_kwargs:
+            self.csv_delimiter = self.input_config_kwargs[DELIMITER_NAME]
+
+        split = self.split or list(dataset_context_config.data_meta_config.
+                                   target_dataset_structure.keys())
+        sub_dir_hash = get_subdir_hash_from_split(split=split,
+                                                  version=self.version)
+
+        super().__init__(cache_dir=self.cache_build_dir,
+                         config_name=self.namespace,
+                         hash=sub_dir_hash,
+                         data_files=self.meta_data_files,
+                         **self.input_config_kwargs)
+
+        self.info.builder_name = self.dataset_name
+        self.name = camelcase_to_snakecase(self.dataset_name)
+
+    def _build_cache_dir(self, namespace=DEFAULT_DATASET_NAMESPACE):
+        builder_data_dir = os.path.join(
+            self._cache_dir_root,
+            self._relative_data_dir(with_version=False,
+                                    with_hash=True,
+                                    namespace=namespace))
+
+        return builder_data_dir
+
+    def _relative_data_dir(self,
+                           with_version=True,
+                           with_hash=True,
+                           namespace=DEFAULT_DATASET_NAMESPACE) -> str:
+        """Relative path of this dataset in cache_dir:
+        Will be:
+            self.name/self.config.version/self.hash/
+        or if a namespace has been specified:
+            self.namespace___self.name/self.config.version/self.hash/
+        """
+        builder_data_dir = self.info.builder_name if namespace is None else f'{namespace}___{self.info.builder_name}'
+        builder_config = self.config
+        hash = self.hash
+        if builder_config:
+            builder_data_dir = os.path.join(builder_data_dir, self.config_id)
+        if with_version:
+            builder_data_dir = os.path.join(builder_data_dir,
+                                            str(self.config.version))
+        if with_hash and hash and isinstance(hash, str):
+            builder_data_dir = os.path.join(builder_data_dir, hash)
+        return builder_data_dir
+
+    def _split_generators(self, dl_manager):
+        if not self.config.data_files:
+            raise ValueError(
+                'At least one data file must be specified, but got none.')
+        data_files = dl_manager.download_and_extract(self.config.data_files)
+        zip_data_files = dl_manager.download_and_extract(self.zip_data_files)
+        splits = []
+        for split_name, files in data_files.items():
+            if isinstance(files, str):
+                files = [files]
+            splits.append(
+                datasets.SplitGenerator(name=split_name,
+                                        gen_kwargs={
+                                            'files':
+                                            dl_manager.iter_files(files),
+                                            'base_dir':
+                                            zip_data_files.get(split_name)
+                                        }))
+        return splits
+
+    def _generate_tables(self, files, base_dir):
+        schema = pa.schema(self.config.features.type
+                           ) if self.config.features is not None else None
+        dtype = {
+            name: dtype.to_pandas_dtype()
+            for name, dtype in zip(schema.names, schema.types)
+        } if schema else None
+        for file_idx, file in enumerate(files):
+            csv_file_reader = pd.read_csv(file,
+                                          iterator=True,
+                                          dtype=dtype,
+                                          delimiter=self.csv_delimiter)
+            transform_fields = []
+            for field_name in csv_file_reader._engine.names:
+                if field_name.endswith(':FILE'):
+                    transform_fields.append(field_name)
+            try:
+                for batch_idx, df in enumerate(csv_file_reader):
+                    for field_name in transform_fields:
+                        if base_dir:
+                            df[field_name] = df[field_name].apply(
+                                lambda x: os.path.join(base_dir, x))
+                    pa_table = pa.Table.from_pandas(df, schema=schema)
+                    yield (file_idx, batch_idx), pa_table
+            except ValueError as e:
+                logger.error(
+                    f"Failed to read file '{file}' with error {type(e)}: {e}")
+                raise
+
+
+class TaskSpecificDatasetBuilder(CsvDatasetBuilder):
+    def __init__(self, dataset_context_config: DatasetContextConfig):
+
+        # Init args
+        self.name = dataset_context_config.dataset_name
+        self.subset_name = dataset_context_config.subset_name
+        self.namespace = dataset_context_config.namespace
+        self.split = dataset_context_config.split
+        self.version = dataset_context_config.version
+        split = self.split or list(dataset_context_config.data_meta_config.
+                                   target_dataset_structure.keys())
+        self.hash = get_subdir_hash_from_split(split=split,
+                                               version=self.version)
+        self.data_files = dataset_context_config.data_meta_config.meta_data_files
+        self.zip_data_files = dataset_context_config.data_meta_config.zip_data_files
+        self.split_path_dict = None
+        self.config = None
+        self.info = DatasetInfo.from_dict(
+            {'builder_name': dataset_context_config.dataset_name})
+        self._cache_dir_root = os.path.expanduser(
+            dataset_context_config.cache_root_dir)
+        self._cache_dir = self._build_cache_dir()
+        self._config_kwargs = dataset_context_config.data_meta_config.meta_args_map
+
+    def download_and_prepare(self, download_mode, dl_manager,
+                             **download_kwargs):
+        # Prevent parallel disk operations
+        lock_path = os.path.join(
+            self._cache_dir_root,
+            self._cache_dir.replace(os.sep, '_') + '.lock')
+        with FileLock(lock_path):
+            data_exists = os.path.exists(self._cache_dir)
+            if data_exists and download_mode == DownloadMode.REUSE_DATASET_IF_EXISTS:
+                logger.warning(
+                    f'Reusing dataset {self.name} ({self._cache_dir})')
+                return
+            logger.info(f'Generating dataset {self.name} ({self._cache_dir})')
+        self._download_and_prepare(dl_manager=dl_manager)
+
+    def _download_and_prepare(self, dl_manager):
+        self.split_path_dict = dl_manager.download_and_extract(
+            self.zip_data_files)
+
+    def as_dataset(self):
+        return ExternalDataset(self.split_path_dict, self._config_kwargs)
+
+
+class IterableDatasetBuilder(csv.Csv):
+    def __init__(self, dataset_context_config: DatasetContextConfig):
+        # Init config args
+        self.dataset_name = dataset_context_config.dataset_name
+        self.cache_root_dir = dataset_context_config.cache_root_dir
+        self.namespace = dataset_context_config.namespace
+        self.version = dataset_context_config.version
+        self.subset_name = dataset_context_config.subset_name
+        self.split = dataset_context_config.split
+        self.meta_data_files = dataset_context_config.data_meta_config.meta_data_files
+        self.zip_data_files = dataset_context_config.data_meta_config.zip_data_files
+        self.input_config_kwargs = dataset_context_config.config_kwargs
+
+        self.cache_build_dir = os.path.join(self.cache_root_dir,
+                                            self.namespace, self.dataset_name,
+                                            self.version,
+                                            DatasetPathName.META_NAME)
+        self.csv_delimiter = DEFAULT_CSV_DELIMITER
+        if DELIMITER_NAME in self.input_config_kwargs:
+            self.csv_delimiter = self.input_config_kwargs[DELIMITER_NAME]
+
+        split = self.split or list(dataset_context_config.data_meta_config.
+                                   target_dataset_structure.keys())
+        sub_dir_hash = get_subdir_hash_from_split(split=split,
+                                                  version=self.version)
+
+        super().__init__(
+            cache_dir=self.cache_build_dir,
+            config_name=self.namespace,
+            hash=sub_dir_hash,
+            data_files=None,  # TODO: self.meta_data_files,
+            **self.input_config_kwargs)
+
+        self.info.builder_name = self.dataset_name
+        self.name = camelcase_to_snakecase(self.dataset_name)
+
+    @staticmethod
+    def get_builder_instance(
+            dataset_context_config: DatasetContextConfig) -> csv.Csv:
+        builder_instance = IterableDatasetBuilder(
+            dataset_context_config=dataset_context_config)
+        return builder_instance
+
+    def as_streaming_dataset(
+        self, dl_manager: DataStreamingDownloadManager
+    ) -> Union[Dict[str, IterableDataset], IterableDataset]:
+
+        if not isinstance(self, (GeneratorBasedBuilder, ArrowBasedBuilder)):
+            raise ValueError(f'Builder {self.name} is not streamable.')
+
+        is_local = not is_remote_filesystem(self._fs)
+        if not is_local:
+            raise NotImplementedError(
+                f'Loading a streaming dataset cached in a {type(self._fs).__name__} is not supported yet.'
+            )
+
+        self._check_manual_download(dl_manager)
+        splits_generators = {
+            sg.name: sg
+            for sg in self._split_generators(dl_manager)
+        }
+
+        # By default, return all splits
+        split = dl_manager.download_config.split
+        if split is None:
+            splits_generator = splits_generators
+        elif split in splits_generators:
+            splits_generator = splits_generators[split]
+        else:
+            raise ValueError(
+                f'Bad split: {split}. Available splits: {list(splits_generators)}'
+            )
+
+        # Create a dataset for each of the given splits
+        streaming_datasets = map_nested(
+            self._as_streaming_dataset_single,
+            splits_generator,
+            map_tuple=True,
+        )
+        if isinstance(streaming_datasets, dict):
+            streaming_datasets = IterableDatasetDict(streaming_datasets)
+        return streaming_datasets
+
+    def _split_generators(self, dl_manager: DataStreamingDownloadManager):
+        splits = []
+        meta_data_file = ''
+        zip_data_file = ''
+        if self.meta_data_files:
+            meta_data_file = next(iter(self.meta_data_files.values()))
+        if self.zip_data_files:
+            zip_data_file = next(iter(self.zip_data_files.values()))
+        if meta_data_file and not zip_data_file:
+            for split_name, meta_file_url in self.meta_data_files.items():
+                splits.append(
+                    datasets.SplitGenerator(name=split_name,
+                                            gen_kwargs={
+                                                'meta': meta_file_url,
+                                                'files': [],
+                                                'dl_manager': dl_manager,
+                                            }))
+
+        elif meta_data_file and zip_data_file:
+            for split_name, files in self.zip_data_files.items():
+                if isinstance(files, str):
+                    files = [files]
+                meta_file_url = self.meta_data_files.get(split_name)
+                splits.append(
+                    datasets.SplitGenerator(name=split_name,
+                                            gen_kwargs={
+                                                'meta': meta_file_url,
+                                                'files': files,
+                                                'dl_manager': dl_manager,
+                                            }))
+
+        elif not meta_data_file and zip_data_file:
+            for split_name, files in self.zip_data_files.items():
+                if isinstance(files, str):
+                    files = [files]
+                splits.append(
+                    datasets.SplitGenerator(name=split_name,
+                                            gen_kwargs={
+                                                'meta': '',
+                                                'files': files,
+                                                'dl_manager': dl_manager,
+                                            }))
+
+        else:
+            raise f'Neither column meta nor data file found in {self.dataset_name}.json, specify at least one column.'
+
+        return splits
+
+    def _as_streaming_dataset_single(
+        self,
+        splits_generator,
+    ) -> NativeIterableDataset:
+
+        ex_iterable = self._get_examples_iterable_for_split(splits_generator)
+        return NativeIterableDataset(ex_iterable,
+                                     info=self.info,
+                                     split=splits_generator.name)
+
+    def _generate_tables(self, **gen_kwargs):
+
+        meta_file_url = gen_kwargs.get('meta')
+        files = gen_kwargs.get('files')
+        dl_manager = gen_kwargs.get('dl_manager')
+
+        hub_api = HubApi()
+        is_zip = False
+        zip_file_name = ''
+
+        if files:
+            zip_file = str(next(iter(files)))
+            if zip_file.endswith('.zip'):
+                is_zip = True
+                zip_file_name = os.path.splitext(zip_file)[0]
+
+        if meta_file_url and not files:
+            headers, texts = hub_api.fetch_single_csv_script(meta_file_url)
+            meta_csv_mapping = IterableDatasetBuilder.trans_data_to_mapping(
+                headers, texts, self.csv_delimiter)
+            pa_table = pa.Table.from_pydict(meta_csv_mapping)
+            yield 0, pa_table
+
+        elif meta_file_url and files:
+            # Get meta file
+            headers, texts = hub_api.fetch_single_csv_script(meta_file_url)
+            meta_csv_mapping = IterableDatasetBuilder.trans_data_to_mapping(
+                headers, texts, self.csv_delimiter)
+
+            if is_zip:
+                oss_config_for_unzipped = hub_api.get_dataset_access_config_for_unzipped(
+                    self.dataset_name, self.namespace, self.version,
+                    zip_file_name)
+                dl_manager.download_config.oss_config = oss_config_for_unzipped
+
+            pa_table = pa.Table.from_pydict(meta_csv_mapping)
+            yield 0, pa_table
+
+        elif not meta_file_url and files:
+            pa_table = pa.Table.from_pydict({'Input:FILE': files})
+            yield 0, pa_table
+
+        else:
+            raise f'Neither column meta nor data file found in {self.dataset_name}.json .'
+
+    @staticmethod
+    def trans_data_to_mapping(headers: str, texts: list, delimiter: str):
+        res = {}
+        headers = headers.split(delimiter)
+        for idx in range(0, len(headers)):
+            col_list = []
+            for line in texts:
+                col_list.append(line.split(delimiter)[idx])
+            res[headers[idx]] = col_list
+        return res
diff --git a/modelscope/msdatasets/download/download_config.py b/modelscope/msdatasets/download/download_config.py
new file mode 100644
index 0000000..9d12da7
--- /dev/null
+++ b/modelscope/msdatasets/download/download_config.py
@@ -0,0 +1,19 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from typing import Optional, Union
+
+from datasets.download.download_config import DownloadConfig
+
+
+class DataDownloadConfig(DownloadConfig):
+    def __init__(self):
+        self.dataset_name: Optional[str] = None
+        self.namespace: Optional[str] = None
+        self.version: Optional[str] = None
+        self.split: Optional[Union[str, list]] = None
+        self.data_dir: Optional[str] = None
+        self.oss_config: Optional[dict] = {}
+        self.meta_args_map: Optional[dict] = {}
+
+    def copy(self) -> 'DataDownloadConfig':
+        return self
diff --git a/modelscope/msdatasets/download/download_manager.py b/modelscope/msdatasets/download/download_manager.py
new file mode 100644
index 0000000..b85c530
--- /dev/null
+++ b/modelscope/msdatasets/download/download_manager.py
@@ -0,0 +1,59 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from datasets.download.download_manager import DownloadManager
+from datasets.download.streaming_download_manager import \
+    StreamingDownloadManager
+from datasets.utils.file_utils import cached_path, is_relative_path
+
+from modelscope.msdatasets.download.download_config import DataDownloadConfig
+from modelscope.msdatasets.utils.oss_utils import OssUtilities
+
+
+class DataDownloadManager(DownloadManager):
+    def __init__(self, download_config: DataDownloadConfig):
+        super().__init__(dataset_name=download_config.dataset_name,
+                         data_dir=download_config.data_dir,
+                         download_config=download_config,
+                         record_checksums=True)
+
+    def _download(self, url_or_filename: str,
+                  download_config: DataDownloadConfig) -> str:
+        url_or_filename = str(url_or_filename)
+
+        oss_utilities = OssUtilities(oss_config=download_config.oss_config,
+                                     dataset_name=download_config.dataset_name,
+                                     namespace=download_config.namespace,
+                                     revision=download_config.version)
+
+        if is_relative_path(url_or_filename):
+            # fetch oss files
+            return oss_utilities.download(url_or_filename,
+                                          download_config=download_config)
+        else:
+            return cached_path(url_or_filename,
+                               download_config=download_config)
+
+
+class DataStreamingDownloadManager(StreamingDownloadManager):
+    """The data streaming download manager."""
+    def __init__(self, download_config: DataDownloadConfig):
+        super().__init__(dataset_name=download_config.dataset_name,
+                         data_dir=download_config.data_dir,
+                         download_config=download_config,
+                         base_path=download_config.cache_dir)
+
+    def _download(self, url_or_filename: str) -> str:
+        url_or_filename = str(url_or_filename)
+        oss_utilities = OssUtilities(
+            oss_config=self.download_config.oss_config,
+            dataset_name=self.download_config.dataset_name,
+            namespace=self.download_config.namespace,
+            revision=self.download_config.version)
+
+        if is_relative_path(url_or_filename):
+            # fetch oss files
+            return oss_utilities.download(url_or_filename,
+                                          download_config=self.download_config)
+        else:
+            return cached_path(url_or_filename,
+                               download_config=self.download_config)
diff --git a/modelscope/msdatasets/meta/__init__.py b/modelscope/msdatasets/meta/__init__.py
new file mode 100644
index 0000000..b937315
--- /dev/null
+++ b/modelscope/msdatasets/meta/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
diff --git a/modelscope/msdatasets/meta/data_meta_config.py b/modelscope/msdatasets/meta/data_meta_config.py
new file mode 100644
index 0000000..b50e961
--- /dev/null
+++ b/modelscope/msdatasets/meta/data_meta_config.py
@@ -0,0 +1,14 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+
+class DataMetaConfig(object):
+    """Modelscope data-meta config class."""
+    def __init__(self):
+        self.dataset_scripts = None
+        self.dataset_formation = None
+        self.meta_cache_dir = None
+        self.meta_data_files = None
+        self.zip_data_files = None
+        self.meta_args_map = None
+        self.target_dataset_structure = None
+        self.dataset_py_script = None
diff --git a/modelscope/msdatasets/meta/data_meta_manager.py b/modelscope/msdatasets/meta/data_meta_manager.py
new file mode 100644
index 0000000..475653f
--- /dev/null
+++ b/modelscope/msdatasets/meta/data_meta_manager.py
@@ -0,0 +1,174 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import json
+import os
+import shutil
+from collections import defaultdict
+
+from datasets.utils.filelock import FileLock
+
+from modelscope.hub.api import HubApi
+from modelscope.msdatasets.context.dataset_context_config import \
+    DatasetContextConfig
+from modelscope.msdatasets.meta.data_meta_config import DataMetaConfig
+from modelscope.msdatasets.utils.dataset_utils import (
+    get_dataset_files, get_target_dataset_structure)
+from modelscope.utils.constant import (DatasetFormations, DatasetPathName,
+                                       DownloadMode)
+
+
+class DataMetaManager(object):
+    """Data-meta manager."""
+    def __init__(self, dataset_context_config: DatasetContextConfig):
+        self.dataset_context_config = dataset_context_config
+        self.api = HubApi()
+
+    def fetch_meta_files(self) -> None:
+
+        # Init meta infos
+        dataset_name = self.dataset_context_config.dataset_name
+        namespace = self.dataset_context_config.namespace
+        download_mode = self.dataset_context_config.download_mode
+        version = self.dataset_context_config.version
+        cache_root_dir = self.dataset_context_config.cache_root_dir
+        subset_name = self.dataset_context_config.subset_name
+        split = self.dataset_context_config.split
+
+        dataset_version_cache_root_dir = os.path.join(cache_root_dir,
+                                                      namespace, dataset_name,
+                                                      version)
+        meta_cache_dir = os.path.join(dataset_version_cache_root_dir,
+                                      DatasetPathName.META_NAME)
+        data_meta_config = self.dataset_context_config.data_meta_config or DataMetaConfig(
+        )
+
+        # Get lock file path
+        if not subset_name:
+            lock_subset_name = DatasetPathName.LOCK_FILE_NAME_ANY
+        else:
+            lock_subset_name = subset_name
+        if not split:
+            lock_split = DatasetPathName.LOCK_FILE_NAME_ANY
+        else:
+            lock_split = split
+        lock_file_name = f'{DatasetPathName.META_NAME}{DatasetPathName.LOCK_FILE_NAME_DELIMITER}{dataset_name}' \
+                         f'{DatasetPathName.LOCK_FILE_NAME_DELIMITER}{version}' \
+                         f'{DatasetPathName.LOCK_FILE_NAME_DELIMITER}' \
+                         f'{lock_subset_name}{DatasetPathName.LOCK_FILE_NAME_DELIMITER}{lock_split}.lock'
+        lock_file_path = os.path.join(dataset_version_cache_root_dir,
+                                      lock_file_name)
+        os.makedirs(dataset_version_cache_root_dir, exist_ok=True)
+
+        # Fetch meta from cache or hub if reuse dataset
+        if download_mode == DownloadMode.REUSE_DATASET_IF_EXISTS:
+            if os.path.exists(meta_cache_dir) and os.listdir(meta_cache_dir):
+                dataset_scripts, dataset_formation = self._fetch_meta_from_cache(
+                    meta_cache_dir)
+            else:
+                # Fetch meta-files from modelscope-hub if cache does not exist
+                with FileLock(lock_file=lock_file_path):
+                    os.makedirs(meta_cache_dir, exist_ok=True)
+                    dataset_scripts, dataset_formation = self._fetch_meta_from_hub(
+                        dataset_name, namespace, version, meta_cache_dir)
+        # Fetch meta from hub if force download
+        elif download_mode == DownloadMode.FORCE_REDOWNLOAD:
+            # Clean meta-files
+            if os.path.exists(meta_cache_dir) and os.listdir(meta_cache_dir):
+                shutil.rmtree(meta_cache_dir)
+            # Re-download meta-files
+            with FileLock(lock_file=lock_file_path):
+                os.makedirs(meta_cache_dir, exist_ok=True)
+                dataset_scripts, dataset_formation = self._fetch_meta_from_hub(
+                    dataset_name, namespace, version, meta_cache_dir)
+        else:
+            raise ValueError(
+                f'Expected values of download_mode: '
+                f'{DownloadMode.REUSE_DATASET_IF_EXISTS.value} or '
+                f'{DownloadMode.FORCE_REDOWNLOAD.value}, but got {download_mode} .'
+            )
+
+        # Set data_meta_config
+        data_meta_config.meta_cache_dir = meta_cache_dir
+        data_meta_config.dataset_scripts = dataset_scripts
+        data_meta_config.dataset_formation = dataset_formation
+
+        # Set dataset_context_config
+        self.dataset_context_config.data_meta_config = data_meta_config
+        self.dataset_context_config.dataset_version_cache_root_dir = dataset_version_cache_root_dir
+        self.dataset_context_config.global_meta_lock_file_path = lock_file_path
+
+    def parse_dataset_structure(self):
+        # Get dataset_name.json
+        dataset_name = self.dataset_context_config.dataset_name
+        subset_name = self.dataset_context_config.subset_name
+        split = self.dataset_context_config.split
+        namespace = self.dataset_context_config.namespace
+        version = self.dataset_context_config.version
+        data_meta_config = self.dataset_context_config.data_meta_config or DataMetaConfig(
+        )
+
+        dataset_json = None
+        dataset_py_script = None
+        dataset_scripts = data_meta_config.dataset_scripts
+        if not dataset_scripts or len(dataset_scripts) == 0:
+            raise 'Cannot find dataset meta-files, please fetch meta from modelscope hub.'
+        if '.py' in dataset_scripts:
+            dataset_py_script = dataset_scripts['.py'][0]
+        for json_path in dataset_scripts['.json']:
+            if json_path.endswith(f'{dataset_name}.json'):
+                with open(json_path, encoding='utf-8') as dataset_json_file:
+                    dataset_json = json.load(dataset_json_file)
+                break
+        if not dataset_json and not dataset_py_script:
+            raise f'File {dataset_name}.json and {dataset_name}.py not found, please specify at least one meta-file.'
+
+        # Parse meta and get dataset structure
+        if dataset_py_script:
+            data_meta_config.dataset_py_script = dataset_py_script
+        else:
+            target_subset_name, target_dataset_structure = get_target_dataset_structure(
+                dataset_json, subset_name, split)
+            meta_map, file_map, args_map = get_dataset_files(
+                target_dataset_structure, dataset_name, namespace, version)
+
+            data_meta_config.meta_data_files = meta_map
+            data_meta_config.zip_data_files = file_map
+            data_meta_config.meta_args_map = args_map
+            data_meta_config.target_dataset_structure = target_dataset_structure
+
+        self.dataset_context_config.data_meta_config = data_meta_config
+
+    def _fetch_meta_from_cache(self, meta_cache_dir):
+        local_paths = defaultdict(list)
+        dataset_type = None
+        for meta_file_name in os.listdir(meta_cache_dir):
+            file_ext = os.path.splitext(meta_file_name)[-1]
+            if file_ext == DatasetFormations.formation_mark_ext.value:
+                dataset_type = int(os.path.splitext(meta_file_name)[0])
+                continue
+            local_paths[file_ext].append(
+                os.path.join(meta_cache_dir, meta_file_name))
+        if not dataset_type:
+            raise FileNotFoundError(
+                f'{DatasetFormations.formation_mark_ext.value} file does not exist, '
+                f'please use {DownloadMode.FORCE_REDOWNLOAD.value} .')
+
+        return local_paths, DatasetFormations(dataset_type)
+
+    def _fetch_meta_from_hub(self, dataset_name: str, namespace: str,
+                             revision: str, meta_cache_dir: str):
+
+        # Fetch id and type of dataset
+        dataset_id, dataset_type = self.api.get_dataset_id_and_type(
+            dataset_name, namespace)
+
+        # Fetch meta file-list of dataset
+        file_list = self.api.get_dataset_meta_file_list(
+            dataset_name, namespace, dataset_id, revision)
+
+        # Fetch urls of meta-files
+        local_paths, dataset_formation = self.api.get_dataset_meta_files_local_paths(
+            dataset_name, namespace, revision, meta_cache_dir, dataset_type,
+            file_list)
+
+        return local_paths, dataset_formation
diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py
new file mode 100644
index 0000000..45765c5
--- /dev/null
+++ b/modelscope/msdatasets/ms_dataset.py
@@ -0,0 +1,693 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import warnings
+from typing import (Any, Callable, Dict, Iterable, List, Mapping, Optional,
+                    Sequence, Union)
+
+import numpy as np
+from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
+from datasets.packaged_modules import _PACKAGED_DATASETS_MODULES
+from datasets.utils.file_utils import is_relative_path
+
+from modelscope.hub.repository import DatasetRepository
+from modelscope.msdatasets.context.dataset_context_config import \
+    DatasetContextConfig
+from modelscope.msdatasets.data_loader.data_loader_manager import (
+    LocalDataLoaderManager, LocalDataLoaderType, RemoteDataLoaderManager,
+    RemoteDataLoaderType)
+from modelscope.msdatasets.dataset_cls.dataset import (ExternalDataset,
+                                                       NativeIterableDataset)
+from modelscope.msdatasets.task_datasets.builder import build_task_dataset
+from modelscope.msdatasets.utils.delete_utils import DatasetDeleteManager
+from modelscope.msdatasets.utils.upload_utils import DatasetUploadManager
+from modelscope.utils.config import ConfigDict
+from modelscope.utils.config_ds import MS_DATASETS_CACHE
+from modelscope.utils.constant import (DEFAULT_DATASET_NAMESPACE,
+                                       DEFAULT_DATASET_REVISION, DownloadMode,
+                                       Hubs, UploadMode)
+from modelscope.utils.import_utils import is_tf_available, is_torch_available
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+def format_list(para) -> List:
+    if para is None:
+        para = []
+    elif isinstance(para, str):
+        para = [para]
+    elif len(set(para)) < len(para):
+        raise ValueError(f'List columns contains duplicates: {para}')
+    return para
+
+
+class MsDataset:
+    """
+    ModelScope Dataset (aka, MsDataset) is backed by a huggingface Dataset to
+    provide efficient data access and local storage managements. On top of
+    that, MsDataset supports the data integration and interactions with multiple
+    remote hubs, particularly, ModelScope's own Dataset-hub. MsDataset also
+    abstracts away data-access details with other remote storage, including both
+    general external web-hosted data and cloud storage such as OSS.
+    """
+    # the underlying huggingface Dataset
+    _hf_ds = None
+
+    def __init__(self,
+                 ds_instance: Union[Dataset, IterableDataset, ExternalDataset],
+                 target: Optional[str] = None):
+        self._hf_ds = ds_instance
+        if target is not None and target not in self._hf_ds.features:
+            raise TypeError(
+                f'"target" must be a column of the dataset({list(self._hf_ds.features.keys())}, but got {target}'
+            )
+        self.target = target
+
+    def __iter__(self):
+        for item in self._hf_ds:
+            if self.target is not None:
+                yield item[self.target]
+            else:
+                yield item
+
+    def __getitem__(self, key):
+        return self._hf_ds[key]
+
+    def __len__(self):
+        if isinstance(self._hf_ds, IterableDataset) or isinstance(
+                self._hf_ds, NativeIterableDataset):
+            logger.error(
+                f'object of type `{self._hf_ds.__class__.__name__}` has no __len__()'
+            )
+            return None
+        return len(self._hf_ds)
+
+    @property
+    def ds_instance(self):
+        return self._hf_ds
+
+    @property
+    def config_kwargs(self):
+        if isinstance(self._hf_ds, ExternalDataset):
+            return self._hf_ds.config_kwargs
+        else:
+            return None
+
+    @classmethod
+    def from_hf_dataset(cls,
+                        hf_ds: Union[Dataset, DatasetDict, ExternalDataset],
+                        target: str = None) -> Union[dict, 'MsDataset']:
+        r"""
+        @deprecated
+        This method is deprecated and may be removed in future releases, please use `to_ms_dataset()` instead.
+        """
+        warnings.warn(
+            'from_hf_dataset is deprecated, please use to_ms_dataset instead.',
+            DeprecationWarning)
+        if isinstance(hf_ds, Dataset):
+            return cls(hf_ds, target)
+        elif isinstance(hf_ds, DatasetDict):
+            if len(hf_ds.keys()) == 1:
+                return cls(next(iter(hf_ds.values())), target)
+            return {k: cls(v, target) for k, v in hf_ds.items()}
+        elif isinstance(hf_ds, ExternalDataset):
+            return cls(hf_ds)
+        else:
+            raise TypeError(
+                f'"hf_ds" must be a Dataset or DatasetDict, but got {type(hf_ds)}'
+            )
+
+    @classmethod
+    def to_ms_dataset(cls,
+                      ds_instance: Union[Dataset, DatasetDict, ExternalDataset,
+                                         NativeIterableDataset,
+                                         IterableDataset, IterableDatasetDict],
+                      target: str = None) -> Union[dict, 'MsDataset']:
+        """Convert input to `MsDataset` instance."""
+        if isinstance(ds_instance, Dataset):
+            return cls(ds_instance, target)
+        elif isinstance(ds_instance, DatasetDict):
+            if len(ds_instance.keys()) == 1:
+                return cls(next(iter(ds_instance.values())), target)
+            return {k: cls(v, target) for k, v in ds_instance.items()}
+        elif isinstance(ds_instance, ExternalDataset):
+            return cls(ds_instance)
+        elif isinstance(ds_instance, NativeIterableDataset):
+            return cls(ds_instance)
+        elif isinstance(ds_instance, IterableDataset):
+            return cls(ds_instance)
+        elif isinstance(ds_instance, IterableDatasetDict):
+            if len(ds_instance.keys()) == 1:
+                return cls(next(iter(ds_instance.values())), target)
+            return {k: cls(v, target) for k, v in ds_instance.items()}
+        else:
+            raise TypeError(
+                f'"ds_instance" must be a Dataset or DatasetDict, but got {type(ds_instance)}'
+            )
+
+    @staticmethod
+    def load(
+        dataset_name: Union[str, list],
+        namespace: Optional[str] = DEFAULT_DATASET_NAMESPACE,
+        target: Optional[str] = None,
+        version: Optional[str] = DEFAULT_DATASET_REVISION,
+        hub: Optional[Hubs] = Hubs.modelscope,
+        subset_name: Optional[str] = None,
+        split: Optional[str] = None,
+        data_dir: Optional[str] = None,
+        data_files: Optional[Union[str, Sequence[str],
+                                   Mapping[str, Union[str,
+                                                      Sequence[str]]]]] = None,
+        download_mode: Optional[DownloadMode] = DownloadMode.
+        REUSE_DATASET_IF_EXISTS,
+        cache_dir: Optional[str] = MS_DATASETS_CACHE,
+        use_streaming: Optional[bool] = False,
+        **config_kwargs,
+    ) -> Union[dict, 'MsDataset', NativeIterableDataset]:
+        """Load a MsDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset.
+
+            Args:
+                dataset_name (str): Path or name of the dataset.
+                    The form of `namespace/dataset_name` is also supported.
+                namespace(str, optional): Namespace of the dataset. It should not be None if you load a remote dataset
+                    from Hubs.modelscope,
+                namespace (str, optional):
+                    Namespace of the dataset. It should not be None if you load a remote dataset
+                    from Hubs.modelscope,
+                target (str, optional): Name of the column to output.
+                version (str, optional): Version of the dataset script to load:
+                subset_name (str, optional): Defining the subset_name of the dataset.
+                data_dir (str, optional): Defining the data_dir of the dataset configuration. I
+                data_files (str or Sequence or Mapping, optional): Path(s) to source data file(s).
+                split (str, optional): Which split of the data to load.
+                hub (Hubs or str, optional): When loading from a remote hub, where it is from. default Hubs.modelscope
+                download_mode (DownloadMode or str, optional):
+                    How to treat existing datasets. default DownloadMode.REUSE_DATASET_IF_EXISTS
+                config_kwargs (additional keyword arguments): Keyword arguments to be passed
+                download_mode (DownloadMode or str, optional): How to treat existing datasets. default
+                                                               DownloadMode.REUSE_DATASET_IF_EXISTS
+                cache_dir (str, Optional): User-define local cache directory.
+                use_streaming (bool, Optional): If set to True, no need to download all data files.
+                                                Instead, it streams the data progressively, and returns
+                                                NativeIterableDataset or a dict of NativeIterableDataset.
+                **config_kwargs (additional keyword arguments): Keyword arguments to be passed
+
+            Returns:
+                MsDataset (MsDataset): MsDataset object for a certain dataset.
+            """
+
+        download_mode = DownloadMode(download_mode
+                                     or DownloadMode.REUSE_DATASET_IF_EXISTS)
+        hub = Hubs(hub or Hubs.modelscope)
+
+        if not isinstance(dataset_name, str) and not isinstance(
+                dataset_name, list):
+            raise TypeError(
+                f'dataset_name must be `str` or `list`, but got {type(dataset_name)}'
+            )
+
+        if isinstance(dataset_name, list):
+            if target is None:
+                target = 'target'
+            dataset_inst = Dataset.from_dict({target: dataset_name})
+            return MsDataset.to_ms_dataset(dataset_inst, target=target)
+
+        dataset_name = os.path.expanduser(dataset_name)
+        if is_relative_path(dataset_name) and dataset_name.count('/') == 1:
+            dataset_name_split = dataset_name.split('/')
+            namespace = dataset_name_split[0].strip()
+            dataset_name = dataset_name_split[1].strip()
+            if not namespace or not dataset_name:
+                raise 'The dataset_name should be in the form of `namespace/dataset_name` or `dataset_name`.'
+
+        # Init context config
+        dataset_context_config = DatasetContextConfig(
+            dataset_name=dataset_name,
+            namespace=namespace,
+            version=version,
+            subset_name=subset_name,
+            split=split,
+            target=target,
+            hub=hub,
+            data_dir=data_dir,
+            data_files=data_files,
+            download_mode=download_mode,
+            cache_root_dir=cache_dir,
+            use_streaming=use_streaming,
+            **config_kwargs)
+
+        # Load from local disk
+        if dataset_name in _PACKAGED_DATASETS_MODULES or os.path.isdir(
+                dataset_name) or os.path.isfile(dataset_name):
+            dataset_inst = LocalDataLoaderManager(
+                dataset_context_config).load_dataset(
+                    LocalDataLoaderType.HF_DATA_LOADER)
+            return MsDataset.to_ms_dataset(dataset_inst, target=target)
+        # Load from the huggingface hub
+        elif hub == Hubs.huggingface:
+            dataset_inst = RemoteDataLoaderManager(
+                dataset_context_config).load_dataset(
+                    RemoteDataLoaderType.HF_DATA_LOADER)
+            return MsDataset.to_ms_dataset(dataset_inst, target=target)
+        # Load from the modelscope hub
+        elif hub == Hubs.modelscope:
+            dataset_inst = RemoteDataLoaderManager(
+                dataset_context_config).load_dataset(
+                    RemoteDataLoaderType.MS_DATA_LOADER)
+            return MsDataset.to_ms_dataset(dataset_inst, target=target)
+        else:
+            raise 'Please adjust input args to specify a loading mode, we support following scenes: ' \
+                  'loading from local disk, huggingface hub and modelscope hub.'
+
+    def to_torch_dataset_with_processors(
+        self,
+        preprocessors: Union[Callable, List[Callable]],
+        columns: Union[str, List[str]] = None,
+        to_tensor: bool = True,
+    ):
+        import torch
+        preprocessor_list = preprocessors if isinstance(
+            preprocessors, list) else [preprocessors]
+
+        columns = format_list(columns)
+
+        columns = [
+            key for key in self._hf_ds.features.keys() if key in columns
+        ]
+        retained_columns = []
+        if to_tensor:
+            sample = next(iter(self._hf_ds))
+
+            sample_res = {k: np.array(sample[k]) for k in columns}
+            for processor in preprocessor_list:
+                sample_res.update(
+                    {k: np.array(v)
+                     for k, v in processor(sample).items()})
+
+            def is_numpy_number(value):
+                return np.issubdtype(value.dtype, np.integer) or np.issubdtype(
+                    value.dtype, np.floating)
+
+            for k in sample_res.keys():
+                if not is_numpy_number(sample_res[k]):
+                    logger.warning(
+                        f'Data of column {k} is non-numeric, will be removed')
+                    continue
+                retained_columns.append(k)
+
+        class MsMapDataset(torch.utils.data.Dataset):
+            def __init__(self, dataset: Iterable, preprocessor_list,
+                         retained_columns, columns, to_tensor):
+                super(MsDataset).__init__()
+                self.dataset = dataset
+                self.preprocessor_list = preprocessor_list
+                self.to_tensor = to_tensor
+                self.retained_columns = retained_columns
+                self.columns = columns
+
+            def __len__(self):
+                return len(self.dataset)
+
+            def type_converter(self, x):
+                import torch
+                if self.to_tensor:
+                    return torch.tensor(x)
+                else:
+                    return x
+
+            def __getitem__(self, index):
+                item_dict = self.dataset[index]
+                res = {
+                    k: self.type_converter(item_dict[k])
+                    for k in self.columns
+                    if (not self.to_tensor) or k in self.retained_columns
+                }
+                for preprocessor in self.preprocessor_list:
+                    res.update({
+                        k: self.type_converter(v)
+                        for k, v in preprocessor(item_dict).items()
+                        if (not self.to_tensor) or k in self.retained_columns
+                    })
+                return res
+
+        return MsMapDataset(self._hf_ds, preprocessor_list, retained_columns,
+                            columns, to_tensor)
+
+    def to_torch_dataset(
+        self,
+        columns: Union[str, List[str]] = None,
+        preprocessors: Union[Callable, List[Callable]] = None,
+        task_name: str = None,
+        task_data_config: ConfigDict = None,
+        to_tensor: bool = True,
+        **format_kwargs,
+    ):
+        """Create a torch.utils.data.Dataset from the MS Dataset. The torch.utils.data.Dataset can be passed to
+           torch.utils.data.DataLoader.
+
+        Args:
+            preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process
+                every sample of the dataset. The output type of processors is dict, and each (numeric) field of the dict
+                will be used as a field of torch.utils.data.Dataset.
+            columns (str or List[str], default None): Dataset column(s) to be loaded (numeric data only if
+                `to_tensor` is True). If the preprocessor is None, the arg columns must have at least one column.
+                If the `preprocessors` is not None, the output fields of processors will also be added.
+            task_name (str, default None):  task name, refer to :obj:`Tasks` for more details
+            task_data_config (ConfigDict, default None): config dict for model object.
+            to_tensor (bool, default None): whether convert the data types of dataset column(s) to torch.tensor or not.
+            format_kwargs: A `dict` of arguments to be passed to the `torch.tensor`.
+
+        Returns:
+            :class:`tf.data.Dataset`
+
+        """
+        if not is_torch_available():
+            raise ImportError(
+                'The function to_torch_dataset requires pytorch to be installed'
+            )
+        if isinstance(self._hf_ds, ExternalDataset):
+            task_data_config.update({'preprocessor': preprocessors})
+            task_data_config.update(self._hf_ds.config_kwargs)
+            return build_task_dataset(task_data_config, task_name)
+        if preprocessors is not None:
+            return self.to_torch_dataset_with_processors(preprocessors,
+                                                         columns=columns,
+                                                         to_tensor=to_tensor)
+        else:
+            self._hf_ds.reset_format()
+            self._hf_ds.set_format(type='torch',
+                                   columns=columns,
+                                   format_kwargs=format_kwargs)
+            return self._hf_ds
+
+    def to_tf_dataset_with_processors(
+        self,
+        batch_size: int,
+        shuffle: bool,
+        preprocessors: Union[Callable, List[Callable]],
+        drop_remainder: bool = None,
+        prefetch: bool = True,
+        label_cols: Union[str, List[str]] = None,
+        columns: Union[str, List[str]] = None,
+    ):
+        preprocessor_list = preprocessors if isinstance(
+            preprocessors, list) else [preprocessors]
+
+        label_cols = format_list(label_cols)
+        columns = format_list(columns)
+        cols_to_retain = list(set(label_cols + columns))
+        retained_columns = [
+            key for key in self._hf_ds.features.keys() if key in cols_to_retain
+        ]
+        import tensorflow as tf
+        tf_dataset = tf.data.Dataset.from_tensor_slices(
+            np.arange(len(self._hf_ds), dtype=np.int64))
+        if shuffle:
+            tf_dataset = tf_dataset.shuffle(buffer_size=len(self._hf_ds))
+
+        def func(i, return_dict=False):
+            i = int(i)
+            res = {k: np.array(self._hf_ds[i][k]) for k in retained_columns}
+            for preprocessor in preprocessor_list:
+                # TODO preprocessor output may have the same key
+                res.update({
+                    k: np.array(v)
+                    for k, v in preprocessor(self._hf_ds[i]).items()
+                })
+            if return_dict:
+                return res
+            return tuple(list(res.values()))
+
+        sample_res = func(0, True)
+
+        @tf.function(input_signature=[tf.TensorSpec(None, tf.int64)])
+        def fetch_function(i):
+            output = tf.numpy_function(
+                func,
+                inp=[i],
+                Tout=[
+                    tf.dtypes.as_dtype(val.dtype)
+                    for val in sample_res.values()
+                ],
+            )
+            return {key: output[i] for i, key in enumerate(sample_res)}
+
+        from tensorflow.data.experimental import AUTOTUNE
+        tf_dataset = tf_dataset.map(fetch_function,
+                                    num_parallel_calls=AUTOTUNE)
+        if label_cols:
+
+            def split_features_and_labels(input_batch):
+                labels = {
+                    key: tensor
+                    for key, tensor in input_batch.items() if key in label_cols
+                }
+                if len(input_batch) == 1:
+                    input_batch = next(iter(input_batch.values()))
+                if len(labels) == 1:
+                    labels = next(iter(labels.values()))
+                return input_batch, labels
+
+            tf_dataset = tf_dataset.map(split_features_and_labels)
+
+        elif len(columns) == 1:
+            tf_dataset = tf_dataset.map(lambda x: next(iter(x.values())))
+        if batch_size > 1:
+            tf_dataset = tf_dataset.batch(batch_size,
+                                          drop_remainder=drop_remainder)
+
+        if prefetch:
+            tf_dataset = tf_dataset.prefetch(AUTOTUNE)
+        return tf_dataset
+
+    def to_tf_dataset(
+        self,
+        batch_size: int,
+        shuffle: bool,
+        preprocessors: Union[Callable, List[Callable]] = None,
+        columns: Union[str, List[str]] = None,
+        collate_fn: Callable = None,
+        drop_remainder: bool = None,
+        collate_fn_args: Dict[str, Any] = None,
+        label_cols: Union[str, List[str]] = None,
+        prefetch: bool = True,
+    ):
+        """Create a tf.data.Dataset from the MS Dataset. This tf.data.Dataset can be passed to tf methods like
+           model.fit() or model.predict().
+
+        Args:
+            batch_size (int): Number of samples in a single batch.
+            shuffle(bool): Shuffle the dataset order.
+            preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process
+                every sample of the dataset. The output type of processors is dict, and each field of the dict will be
+                used as a field of the tf.data. Dataset. If the `preprocessors` is None, the `collate_fn`
+                shouldn't be None.
+            columns (str or List[str], default None): Dataset column(s) to be loaded. If the preprocessor is None,
+                the arg columns must have at least one column. If the `preprocessors` is not None, the output fields of
+                processors will also be added.
+            collate_fn(Callable, default None): A callable object used to collect lists of samples into a batch. If
+                the `preprocessors` is None, the `collate_fn` shouldn't be None.
+            drop_remainder(bool, default None): Drop the last incomplete batch when loading.
+            collate_fn_args (Dict, optional): A `dict` of arguments to be passed to the`collate_fn`.
+            label_cols (str or List[str], defalut None): Dataset column(s) to load as labels.
+            prefetch (bool, default True): Prefetch data.
+
+        Returns:
+            :class:`tf.data.Dataset`
+
+        """
+        if not is_tf_available():
+            raise ImportError(
+                'The function to_tf_dataset requires Tensorflow to be installed.'
+            )
+        if preprocessors is not None:
+            return self.to_tf_dataset_with_processors(
+                batch_size,
+                shuffle,
+                preprocessors,
+                drop_remainder=drop_remainder,
+                prefetch=prefetch,
+                label_cols=label_cols,
+                columns=columns)
+
+        if collate_fn is None:
+            logger.error(
+                'The `preprocessors` and the `collate_fn` should`t be both None.'
+            )
+            return None
+        self._hf_ds.reset_format()
+        return self._hf_ds.to_tf_dataset(columns,
+                                         batch_size,
+                                         shuffle,
+                                         collate_fn,
+                                         drop_remainder=drop_remainder,
+                                         collate_fn_args=collate_fn_args,
+                                         label_cols=label_cols,
+                                         prefetch=prefetch)
+
+    def to_hf_dataset(self) -> Dataset:
+        self._hf_ds.reset_format()
+        return self._hf_ds
+
+    def remap_columns(self, column_mapping: Dict[str, str]) -> Dataset:
+        """
+        Rename columns and return the underlying hf dataset directly
+        TODO: support native MsDataset column rename.
+        Args:
+            column_mapping: the mapping of the original and new column names
+        Returns:
+            underlying hf dataset
+        """
+        self._hf_ds.reset_format()
+        return self._hf_ds.rename_columns(column_mapping)
+
+    @staticmethod
+    def upload(
+            object_name: str,
+            local_file_path: str,
+            dataset_name: str,
+            namespace: Optional[str] = DEFAULT_DATASET_NAMESPACE,
+            version: Optional[str] = DEFAULT_DATASET_REVISION,
+            num_processes: Optional[int] = None,
+            chunksize: Optional[int] = 1,
+            filter_hidden_files: Optional[bool] = True,
+            upload_mode: Optional[UploadMode] = UploadMode.OVERWRITE) -> None:
+        """Upload dataset file or directory to the ModelScope Hub. Please log in to the ModelScope Hub first.
+
+        Args:
+            object_name (str): The object name on ModelScope, in the form of your-dataset-name.zip or your-dataset-name
+            local_file_path (str): Local file or directory to upload
+            dataset_name (str): Name of the dataset
+            namespace(str, optional): Namespace of the dataset
+            version: Optional[str]: Version of the dataset
+            num_processes: Optional[int]: The number of processes used for multiprocess uploading.
+                This is only applicable when local_file_path is a directory, and we are uploading mutliple-files
+                insided the directory. When None provided, the number returned by os.cpu_count() is used as default.
+            chunksize: Optional[int]: The chunksize of objects to upload.
+                For very long iterables using a large value for chunksize can make the job complete much faster than
+                using the default value of 1. Available if local_file_path is a directory.
+            filter_hidden_files: Optional[bool]: Whether to filter hidden files.
+                Available if local_file_path is a directory.
+            upload_mode: Optional[UploadMode]: How to upload objects from local.  Default: UploadMode.OVERWRITE, upload
+                all objects from local, existing remote objects may be overwritten.
+
+        Returns:
+            None
+
+        """
+        if not object_name:
+            raise ValueError('object_name cannot be empty!')
+
+        _upload_manager = DatasetUploadManager(dataset_name=dataset_name,
+                                               namespace=namespace,
+                                               version=version)
+
+        upload_mode = UploadMode(upload_mode or UploadMode.OVERWRITE)
+
+        if os.path.isfile(local_file_path):
+            _upload_manager.upload(object_name=object_name,
+                                   local_file_path=local_file_path,
+                                   upload_mode=upload_mode)
+        elif os.path.isdir(local_file_path):
+            _upload_manager.upload_dir(object_dir_name=object_name,
+                                       local_dir_path=local_file_path,
+                                       num_processes=num_processes,
+                                       chunksize=chunksize,
+                                       filter_hidden_files=filter_hidden_files,
+                                       upload_mode=upload_mode)
+        else:
+            raise ValueError(
+                f'{local_file_path} is not a valid file path or directory')
+
+    @staticmethod
+    def clone_meta(dataset_work_dir: str,
+                   dataset_id: str,
+                   revision: Optional[str] = DEFAULT_DATASET_REVISION,
+                   auth_token: Optional[str] = None,
+                   git_path: Optional[str] = None) -> None:
+        """Clone meta-file of dataset from the ModelScope Hub.
+
+        Args:
+            dataset_work_dir (str): Current git working directory.
+            dataset_id (str): Dataset id, in the form of your-namespace/your-dataset-name .
+            revision (str, optional):
+                revision of the model you want to clone from. Can be any of a branch, tag or commit hash
+            auth_token (str, optional):
+                token obtained when calling `HubApi.login()`. Usually you can safely ignore the parameter
+                as the token is already saved when you login the first time, if None, we will use saved token.
+            git_path (str, optional):
+                The git command line path, if None, we use 'git'
+        Returns:
+            None
+        """
+
+        _repo = DatasetRepository(repo_work_dir=dataset_work_dir,
+                                  dataset_id=dataset_id,
+                                  revision=revision,
+                                  auth_token=auth_token,
+                                  git_path=git_path)
+        clone_work_dir = _repo.clone()
+        if clone_work_dir:
+            logger.info('Already cloned repo to: {}'.format(clone_work_dir))
+        else:
+            logger.warning(
+                'Repo dir already exists: {}'.format(clone_work_dir))
+
+    @staticmethod
+    def upload_meta(dataset_work_dir: str,
+                    commit_message: str,
+                    revision: Optional[str] = DEFAULT_DATASET_REVISION,
+                    auth_token: Optional[str] = None,
+                    git_path: Optional[str] = None,
+                    force: bool = False) -> None:
+        """Upload meta-file of dataset to the ModelScope Hub. Please clone the meta-data from the ModelScope Hub first.
+
+        Args:
+            dataset_work_dir (str): Current working directory.
+            commit_message (str): Commit message.
+            revision(`Optional[str]`):
+                revision of the model you want to clone from. Can be any of a branch, tag or commit hash
+            auth_token(`Optional[str]`):
+                token obtained when calling `HubApi.login()`. Usually you can safely ignore the parameter
+                as the token is already saved when you log in the first time, if None, we will use saved token.
+            git_path:(`Optional[str]`):
+                The git command line path, if None, we use 'git'
+            force (Optional[bool]): whether to use forced-push.
+
+        Returns:
+            None
+
+        """
+        _repo = DatasetRepository(repo_work_dir=dataset_work_dir,
+                                  dataset_id='',
+                                  revision=revision,
+                                  auth_token=auth_token,
+                                  git_path=git_path)
+        _repo.push(commit_message=commit_message, branch=revision, force=force)
+
+    @staticmethod
+    def delete(object_name: str,
+               dataset_name: str,
+               namespace: Optional[str] = DEFAULT_DATASET_NAMESPACE,
+               version: Optional[str] = DEFAULT_DATASET_REVISION) -> str:
+        """ Delete object of dataset. Please log in first and make sure you have permission to manage the dataset.
+
+        Args:
+            object_name (str): The object name of dataset to be deleted. Could be a name of file or directory. If it's
+                directory, then ends with `/`.
+                For example: your-data-name.zip, train/001/img_001.png, train/, ...
+            dataset_name (str): Path or name of the dataset.
+            namespace(str, optional): Namespace of the dataset.
+            version (str, optional): Version of the dataset.
+
+        Returns:
+            res_msg (str): Response message.
+
+        """
+        _delete_manager = DatasetDeleteManager(dataset_name=dataset_name,
+                                               namespace=namespace,
+                                               version=version)
+        resp_msg = _delete_manager.delete(object_name=object_name)
+        logger.info(f'Object {object_name} successfully removed!')
+        return resp_msg
diff --git a/modelscope/msdatasets/task_datasets/__init__.py b/modelscope/msdatasets/task_datasets/__init__.py
new file mode 100644
index 0000000..558abb8
--- /dev/null
+++ b/modelscope/msdatasets/task_datasets/__init__.py
@@ -0,0 +1,24 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule, is_torch_available
+
+if TYPE_CHECKING:
+    from .base import TaskDataset
+    from .builder import TASK_DATASETS, build_task_dataset
+    from .torch_base_dataset import TorchTaskDataset
+else:
+    _import_structure = {
+        'base': ['TaskDataset'],
+        'builder': ['TASK_DATASETS', 'build_task_dataset'],
+        'torch_base_dataset': ['TorchTaskDataset'],
+    }
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/msdatasets/task_datasets/base.py b/modelscope/msdatasets/task_datasets/base.py
new file mode 100644
index 0000000..9048f25
--- /dev/null
+++ b/modelscope/msdatasets/task_datasets/base.py
@@ -0,0 +1,47 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from abc import ABC, abstractmethod
+from typing import Any, List, Tuple, Union
+
+
+class TaskDataset(ABC):
+    """The task dataset base class for all the task specific dataset processors.
+    """
+    def __init__(self,
+                 datasets: Union[Any, List[Any]],
+                 mode,
+                 preprocessor=None,
+                 **kwargs):
+        super().__init__()
+        self.mode = mode
+        self.preprocessor = preprocessor
+        self._inner_dataset = self.prepare_dataset(datasets)
+
+    @abstractmethod
+    def prepare_dataset(self, datasets: Union[Any, List[Any]]) -> Any:
+        """Prepare a dataset.
+
+        User can process the input datasets in a whole dataset perspective.
+        This method also helps to merge several datasets to one.
+
+        Args:
+            datasets: The original dataset(s)
+
+        Returns: A single dataset, which may be created after merging.
+
+        """
+        pass
+
+    @abstractmethod
+    def prepare_sample(self, data):
+        """Preprocess the data fetched from the inner_dataset.
+
+        If the preprocessor is None, the original data will be returned, else the preprocessor will be called.
+        User can override this method to implement custom logics.
+
+        Args:
+            data: The data fetched from the dataset.
+
+        Returns: The processed data.
+
+        """
+        pass
diff --git a/modelscope/msdatasets/task_datasets/builder.py b/modelscope/msdatasets/task_datasets/builder.py
new file mode 100644
index 0000000..62f984d
--- /dev/null
+++ b/modelscope/msdatasets/task_datasets/builder.py
@@ -0,0 +1,23 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from modelscope.utils.config import ConfigDict
+from modelscope.utils.registry import Registry, build_from_cfg
+
+TASK_DATASETS = Registry('task_datasets')
+
+
+def build_task_dataset(cfg: ConfigDict,
+                       task_name: str = None,
+                       default_args: dict = None):
+    """ Build task specific dataset processor given model config dict and the task name.
+
+    Args:
+        cfg (:obj:`ConfigDict`): config dict for model object.
+        task_name (str, optional):  task name, refer to
+            :obj:`Tasks` for more details
+        default_args (dict, optional): Default initialization arguments.
+    """
+    return build_from_cfg(cfg,
+                          TASK_DATASETS,
+                          group_key=task_name,
+                          default_args=default_args)
diff --git a/modelscope/msdatasets/task_datasets/torch_base_dataset.py b/modelscope/msdatasets/task_datasets/torch_base_dataset.py
new file mode 100644
index 0000000..5f6b7fd
--- /dev/null
+++ b/modelscope/msdatasets/task_datasets/torch_base_dataset.py
@@ -0,0 +1,63 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import Any, List, Tuple, Union
+
+from torch.utils.data import ConcatDataset, Dataset
+
+from .base import TaskDataset
+
+
+class TorchTaskDataset(TaskDataset, Dataset):
+    """The task dataset base class for all the torch-based task processors.
+
+    This base class is enough for most cases, except there are procedures which can not be executed in
+    preprocessors and Datasets like dataset merging.
+    """
+    def __init__(self,
+                 datasets: Union[Any, List[Any]],
+                 mode,
+                 preprocessor=None,
+                 **kwargs):
+        TaskDataset.__init__(self, datasets, mode, preprocessor, **kwargs)
+        self.trainer = None
+
+    def __getitem__(self, index) -> Any:
+        return self.prepare_sample(self._inner_dataset[index])
+
+    def __len__(self):
+        return len(self._inner_dataset)
+
+    def prepare_dataset(self, datasets: Union[Any, List[Any]]) -> Any:
+        """Prepare a dataset.
+
+        User can process the input datasets in a whole dataset perspective.
+        This method gives a default implementation of datasets merging, user can override this
+        method to write custom logics.
+
+        Args:
+            datasets: The original dataset(s)
+
+        Returns: A single dataset, which may be created after merging.
+
+        """
+        if isinstance(datasets, List):
+            if len(datasets) == 1:
+                return datasets[0]
+            elif len(datasets) > 1:
+                return ConcatDataset(datasets)
+        else:
+            return datasets
+
+    def prepare_sample(self, data):
+        """Preprocess the data fetched from the inner_dataset.
+
+        If the preprocessor is None, the original data will be returned, else the preprocessor will be called.
+        User can override this method to implement custom logics.
+
+        Args:
+            data: The data fetched from the dataset.
+
+        Returns: The processed data.
+
+        """
+        return self.preprocessor(
+            data) if self.preprocessor is not None else data
diff --git a/modelscope/msdatasets/utils/__init__.py b/modelscope/msdatasets/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/msdatasets/utils/dataset_utils.py b/modelscope/msdatasets/utils/dataset_utils.py
new file mode 100644
index 0000000..a4571a2
--- /dev/null
+++ b/modelscope/msdatasets/utils/dataset_utils.py
@@ -0,0 +1,222 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+from collections import defaultdict
+from typing import Optional, Union
+
+from modelscope.hub.api import HubApi
+from modelscope.utils.constant import DEFAULT_DATASET_REVISION, MetaDataFields
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+def format_dataset_structure(dataset_structure):
+    return {
+        k: v
+        for k, v in dataset_structure.items()
+        if (v.get('meta') or v.get('file'))
+    }
+
+
+def get_target_dataset_structure(dataset_structure: dict,
+                                 subset_name: Optional[str] = None,
+                                 split: Optional[str] = None):
+    """
+    Args:
+        dataset_structure (dict): Dataset Structure, like
+         {
+            "default":{
+                "train":{
+                    "meta":"my_train.csv",
+                    "file":"pictures.zip"
+                }
+            },
+            "subsetA":{
+                "test":{
+                    "meta":"mytest.csv",
+                    "file":"pictures.zip"
+                }
+            }
+        }
+        subset_name (str, optional): Defining the subset_name of the dataset.
+        split (str, optional): Which split of the data to load.
+    Returns:
+           target_subset_name (str): Name of the chosen subset.
+           target_dataset_structure (dict): Structure of the chosen split(s), like
+           {
+               "test":{
+                        "meta":"mytest.csv",
+                        "file":"pictures.zip"
+                    }
+            }
+    """
+    # verify dataset subset
+    if (subset_name and subset_name not in dataset_structure) or (
+            not subset_name and len(dataset_structure.keys()) > 1):
+        raise ValueError(
+            f'subset_name {subset_name} not found. Available: {dataset_structure.keys()}'
+        )
+    target_subset_name = subset_name
+    if not subset_name:
+        target_subset_name = next(iter(dataset_structure.keys()))
+        logger.info(
+            f'No subset_name specified, defaulting to the {target_subset_name}'
+        )
+    # verify dataset split
+    target_dataset_structure = format_dataset_structure(
+        dataset_structure[target_subset_name])
+    if split and split not in target_dataset_structure:
+        raise ValueError(
+            f'split {split} not found. Available: {target_dataset_structure.keys()}'
+        )
+    if split:
+        target_dataset_structure = {split: target_dataset_structure[split]}
+    return target_subset_name, target_dataset_structure
+
+
+def list_dataset_objects(hub_api: HubApi, max_limit: int, is_recursive: bool,
+                         dataset_name: str, namespace: str,
+                         version: str) -> list:
+    """
+    List all objects for specific dataset.
+
+    Args:
+        hub_api (class HubApi): HubApi instance.
+        max_limit (int): Max number of objects.
+        is_recursive (bool): Whether to list objects recursively.
+        dataset_name (str): Dataset name.
+        namespace (str): Namespace.
+        version (str): Dataset version.
+    Returns:
+        res (list): List of objects, i.e., ['train/images/001.png', 'train/images/002.png', 'val/images/001.png', ...]
+    """
+    res = []
+    objects = hub_api.list_oss_dataset_objects(dataset_name=dataset_name,
+                                               namespace=namespace,
+                                               max_limit=max_limit,
+                                               is_recursive=is_recursive,
+                                               is_filter_dir=True,
+                                               revision=version)
+
+    for item in objects:
+        object_key = item.get('Key')
+        if not object_key:
+            continue
+        res.append(object_key)
+
+    return res
+
+
+def contains_dir(file_map) -> bool:
+    """
+    To check whether input contains at least one directory.
+
+    Args:
+        file_map (dict): Structure of data files. e.g., {'train': 'train.zip', 'validation': 'val.zip'}
+    Returns:
+        True if input contains at least one directory, False otherwise.
+    """
+    res = False
+    for k, v in file_map.items():
+        if isinstance(v, str) and not v.endswith('.zip'):
+            res = True
+            break
+    return res
+
+
+def get_subdir_hash_from_split(split: Union[str, list], version: str) -> str:
+    if isinstance(split, str):
+        split = [split]
+    return os.path.join(version, '_'.join(split))
+
+
+def get_split_list(split: Union[str, list]) -> list:
+    """ Unify the split to list-format. """
+    if isinstance(split, str):
+        return [split]
+    elif isinstance(split, list):
+        return split
+    else:
+        raise f'Expected format of split: str or list, but got {type(split)}.'
+
+
+def get_split_objects_map(file_map, objects):
+    """
+    Get the map between dataset split and oss objects.
+
+    Args:
+        file_map (dict): Structure of data files. e.g., {'train': 'train', 'validation': 'val'}, both of train and val
+            are dirs.
+        objects (list): List of oss objects. e.g., ['train/001/1_123.png', 'train/001/1_124.png', 'val/003/3_38.png']
+    Returns:
+        A map of split-objects. e.g., {'train': ['train/001/1_123.png', 'train/001/1_124.png'],
+            'validation':['val/003/3_38.png']}
+    """
+    res = {}
+    for k, v in file_map.items():
+        res[k] = []
+
+    for obj_key in objects:
+        for k, v in file_map.items():
+            if obj_key.startswith(v + '/'):
+                res[k].append(obj_key)
+
+    return res
+
+
+def get_dataset_files(subset_split_into: dict,
+                      dataset_name: str,
+                      namespace: str,
+                      revision: Optional[str] = DEFAULT_DATASET_REVISION):
+    """
+    Return:
+        meta_map: Structure of meta files (.csv), the meta file name will be replaced by url, like
+        {
+           "test": "https://xxx/mytest.csv"
+        }
+        file_map: Structure of data files (.zip), like
+        {
+            "test": "pictures.zip"
+        }
+    """
+    meta_map = defaultdict(dict)
+    file_map = defaultdict(dict)
+    args_map = defaultdict(dict)
+    modelscope_api = HubApi()
+
+    for split, info in subset_split_into.items():
+        meta_map[split] = modelscope_api.get_dataset_file_url(
+            info.get('meta', ''), dataset_name, namespace, revision)
+        if info.get('file'):
+            file_map[split] = info['file']
+        args_map[split] = info.get('args')
+
+    objects = []
+    # If `big_data` is true, then fetch objects from meta-csv file directly.
+    for split, args_dict in args_map.items():
+        if args_dict and args_dict.get(MetaDataFields.ARGS_BIG_DATA):
+            meta_csv_file_url = meta_map[split]
+            _, script_content = modelscope_api.fetch_single_csv_script(
+                meta_csv_file_url)
+            if not script_content:
+                raise 'Meta-csv file cannot be empty when meta-args `big_data` is true.'
+            for item in script_content:
+                if not item:
+                    continue
+                item = item.strip().split(',')[0]
+                if item:
+                    objects.append(item)
+            file_map[split] = objects
+    # More general but low-efficiency.
+    if not objects:
+        objects = list_dataset_objects(hub_api=modelscope_api,
+                                       max_limit=-1,
+                                       is_recursive=True,
+                                       dataset_name=dataset_name,
+                                       namespace=namespace,
+                                       version=revision)
+        if contains_dir(file_map):
+            file_map = get_split_objects_map(file_map, objects)
+
+    return meta_map, file_map, args_map
diff --git a/modelscope/msdatasets/utils/delete_utils.py b/modelscope/msdatasets/utils/delete_utils.py
new file mode 100644
index 0000000..5afc15e
--- /dev/null
+++ b/modelscope/msdatasets/utils/delete_utils.py
@@ -0,0 +1,31 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from modelscope.hub.api import HubApi
+
+
+class DatasetDeleteManager(object):
+    def __init__(self, dataset_name: str, namespace: str, version: str):
+        self.api = HubApi()
+        self.dataset_name = dataset_name
+        self.namespace = namespace
+        self.version = version
+
+    def delete(self, object_name: str) -> str:
+
+        # single object
+        if not object_name.endswith('/'):
+            resp_msg = self.api.delete_oss_dataset_object(
+                object_name=object_name,
+                dataset_name=self.dataset_name,
+                namespace=self.namespace,
+                revision=self.version)
+        else:
+            #  multiple objects
+            object_name = object_name.strip('/')
+            resp_msg = self.api.delete_oss_dataset_dir(
+                object_name=object_name,
+                dataset_name=self.dataset_name,
+                namespace=self.namespace,
+                revision=self.version)
+
+        return resp_msg
diff --git a/modelscope/msdatasets/utils/oss_utils.py b/modelscope/msdatasets/utils/oss_utils.py
new file mode 100644
index 0000000..420388b
--- /dev/null
+++ b/modelscope/msdatasets/utils/oss_utils.py
@@ -0,0 +1,161 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from __future__ import print_function
+
+import multiprocessing
+import os
+
+import oss2
+from datasets.utils.file_utils import hash_url_to_filename
+
+from modelscope.hub.api import HubApi
+from modelscope.msdatasets.download.download_config import DataDownloadConfig
+from modelscope.utils.config_ds import MS_CACHE_HOME
+from modelscope.utils.constant import (DEFAULT_DATA_ACCELERATION_ENDPOINT,
+                                       MetaDataFields, UploadMode)
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+ACCESS_ID = 'AccessId'
+ACCESS_SECRET = 'AccessSecret'
+SECURITY_TOKEN = 'SecurityToken'
+BUCKET = 'Bucket'
+BACK_DIR = 'BackupDir'
+DIR = 'Dir'
+
+
+class OssUtilities:
+    def __init__(self, oss_config, dataset_name, namespace, revision):
+        self._do_init(oss_config=oss_config)
+
+        self.dataset_name = dataset_name
+        self.namespace = namespace
+        self.revision = revision
+
+        self.resumable_store_root_path = os.path.join(MS_CACHE_HOME,
+                                                      'tmp/resumable_store')
+        self.num_threads = multiprocessing.cpu_count()
+        self.part_size = 1 * 1024 * 1024
+        self.multipart_threshold = 50 * 1024 * 1024
+        self.max_retries = 3
+
+        self.resumable_store_download = oss2.ResumableDownloadStore(
+            root=self.resumable_store_root_path)
+        self.resumable_store_upload = oss2.ResumableStore(
+            root=self.resumable_store_root_path)
+        self.api = HubApi()
+
+    def _do_init(self, oss_config):
+        self.key = oss_config[ACCESS_ID]
+        self.secret = oss_config[ACCESS_SECRET]
+        self.token = oss_config[SECURITY_TOKEN]
+        if os.getenv('ENABLE_DATASET_ACCELERATION') == 'True':
+            self.endpoint = DEFAULT_DATA_ACCELERATION_ENDPOINT
+        else:
+            self.endpoint = f"https://{oss_config['Region']}.aliyuncs.com"
+        self.bucket_name = oss_config[BUCKET]
+        auth = oss2.StsAuth(self.key, self.secret, self.token)
+        self.bucket = oss2.Bucket(auth, self.endpoint, self.bucket_name)
+        self.oss_dir = oss_config[DIR]
+        self.oss_backup_dir = oss_config[BACK_DIR]
+
+    def _reload_sts(self):
+        logger.info('Reloading sts token automatically.')
+        oss_config_refresh = self.api.get_dataset_access_config_session(
+            dataset_name=self.dataset_name,
+            namespace=self.namespace,
+            check_cookie=True,
+            revision=self.revision)
+        self._do_init(oss_config_refresh)
+
+    @staticmethod
+    def _percentage(consumed_bytes, total_bytes):
+        if total_bytes:
+            rate = int(100 * (float(consumed_bytes) / float(total_bytes)))
+            print('\r{0}% '.format(rate), end='', flush=True)
+
+    def download(self, oss_file_name: str,
+                 download_config: DataDownloadConfig):
+        cache_dir = download_config.cache_dir
+        candidate_key = os.path.join(self.oss_dir, oss_file_name)
+        candidate_key_backup = os.path.join(self.oss_backup_dir, oss_file_name)
+        split = download_config.split
+
+        big_data = False
+        if split:
+            args_dict = download_config.meta_args_map.get(split)
+            if args_dict:
+                big_data = args_dict.get(MetaDataFields.ARGS_BIG_DATA)
+
+        retry_count = 0
+        while True:
+            try:
+                retry_count += 1
+                # big_data is True when the dataset contains large number of objects
+                if big_data:
+                    file_oss_key = candidate_key
+                else:
+                    file_oss_key = candidate_key if self.bucket.object_exists(
+                        candidate_key) else candidate_key_backup
+                filename = hash_url_to_filename(file_oss_key, etag=None)
+                local_path = os.path.join(cache_dir, filename)
+
+                if download_config.force_download or not os.path.exists(
+                        local_path):
+                    oss2.resumable_download(
+                        self.bucket,
+                        file_oss_key,
+                        local_path,
+                        store=self.resumable_store_download,
+                        multiget_threshold=self.multipart_threshold,
+                        part_size=self.part_size,
+                        progress_callback=self._percentage,
+                        num_threads=self.num_threads)
+                break
+            except Exception as e:
+                if e.__dict__.get('status') == 403:
+                    self._reload_sts()
+                if retry_count >= self.max_retries:
+                    raise
+
+        return local_path
+
+    def upload(self, oss_object_name: str, local_file_path: str,
+               indicate_individual_progress: bool,
+               upload_mode: UploadMode) -> str:
+        retry_count = 0
+        object_key = os.path.join(self.oss_dir, oss_object_name)
+
+        if indicate_individual_progress:
+            progress_callback = self._percentage
+        else:
+            progress_callback = None
+
+        while True:
+            try:
+                retry_count += 1
+                exist = self.bucket.object_exists(object_key)
+                if upload_mode == UploadMode.APPEND and exist:
+                    logger.info(
+                        f'Skip {oss_object_name} in case of {upload_mode.value} mode.'
+                    )
+                    break
+
+                oss2.resumable_upload(
+                    self.bucket,
+                    object_key,
+                    local_file_path,
+                    store=self.resumable_store_upload,
+                    multipart_threshold=self.multipart_threshold,
+                    part_size=self.part_size,
+                    progress_callback=progress_callback,
+                    num_threads=self.num_threads)
+                break
+            except Exception as e:
+                if e.__dict__.get('status') == 403:
+                    self._reload_sts()
+                if retry_count >= self.max_retries:
+                    raise
+
+        return object_key
diff --git a/modelscope/msdatasets/utils/upload_utils.py b/modelscope/msdatasets/utils/upload_utils.py
new file mode 100644
index 0000000..4ab875d
--- /dev/null
+++ b/modelscope/msdatasets/utils/upload_utils.py
@@ -0,0 +1,63 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+from multiprocessing.dummy import Pool as ThreadPool
+
+from tqdm import tqdm
+
+from modelscope.msdatasets.utils.oss_utils import OssUtilities
+from modelscope.utils.constant import UploadMode
+
+
+class DatasetUploadManager(object):
+    def __init__(self, dataset_name: str, namespace: str, version: str):
+        from modelscope.hub.api import HubApi
+        _hub_api = HubApi()
+        _oss_config = _hub_api.get_dataset_access_config_session(
+            dataset_name=dataset_name,
+            namespace=namespace,
+            check_cookie=False,
+            revision=version)
+
+        self.oss_utilities = OssUtilities(oss_config=_oss_config,
+                                          dataset_name=dataset_name,
+                                          namespace=namespace,
+                                          revision=version)
+
+    def upload(self, object_name: str, local_file_path: str,
+               upload_mode: UploadMode) -> str:
+        object_key = self.oss_utilities.upload(
+            oss_object_name=object_name,
+            local_file_path=local_file_path,
+            indicate_individual_progress=True,
+            upload_mode=upload_mode)
+        return object_key
+
+    def upload_dir(self, object_dir_name: str, local_dir_path: str,
+                   num_processes: int, chunksize: int,
+                   filter_hidden_files: bool, upload_mode: UploadMode) -> int:
+        def run_upload(args):
+            self.oss_utilities.upload(oss_object_name=args[0],
+                                      local_file_path=args[1],
+                                      indicate_individual_progress=False,
+                                      upload_mode=upload_mode)
+
+        files_list = []
+        for root, dirs, files in os.walk(local_dir_path):
+            for file_name in files:
+                if filter_hidden_files and file_name.startswith('.'):
+                    continue
+                # Concatenate directory name and relative path into oss object key. e.g., train/001/1_1230.png
+                object_name = os.path.join(
+                    object_dir_name,
+                    root.replace(local_dir_path, '', 1).strip('/'), file_name)
+
+                local_file_path = os.path.join(root, file_name)
+                files_list.append((object_name, local_file_path))
+
+        with ThreadPool(processes=num_processes) as pool:
+            result = list(
+                tqdm(pool.imap(run_upload, files_list, chunksize=chunksize),
+                     total=len(files_list)))
+
+        return len(result)
diff --git a/modelscope/outputs/__init__.py b/modelscope/outputs/__init__.py
new file mode 100644
index 0000000..dbfdac0
--- /dev/null
+++ b/modelscope/outputs/__init__.py
@@ -0,0 +1,3 @@
+from .cv_outputs import *  # noqa
+from .nlp_outputs import *  # noqa
+from .outputs import TASK_OUTPUTS, ModelOutputBase, OutputKeys
diff --git a/modelscope/outputs/cv_outputs.py b/modelscope/outputs/cv_outputs.py
new file mode 100644
index 0000000..0283cfa
--- /dev/null
+++ b/modelscope/outputs/cv_outputs.py
@@ -0,0 +1,29 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from dataclasses import dataclass
+from typing import Optional, Tuple, Union
+
+import numpy as np
+
+from modelscope.outputs.outputs import ModelOutputBase
+
+Tensor = Union['torch.Tensor', 'tf.Tensor']
+
+
+@dataclass
+class DetectionOutput(ModelOutputBase):
+    """The output class for object detection models.
+
+    Args:
+        class_ids (`Tensor`, *optional*): class id for each object.
+        boxes (`Tensor`, *optional*): Bounding box for each detected object in  [left, top, right, bottom] format.
+        scores (`Tensor`, *optional*): Detection score for each object.
+        keypoints (`Tensor`, *optional*): Keypoints for each object using four corner points in a 8-dim tensor
+            in the order of (x, y) for each corner point.
+
+    """
+
+    class_ids: Tensor = None
+    scores: Tensor = None
+    boxes: Tensor = None
+    keypoints: Tensor = None
diff --git a/modelscope/outputs/nlp_outputs.py b/modelscope/outputs/nlp_outputs.py
new file mode 100644
index 0000000..a48e3b0
--- /dev/null
+++ b/modelscope/outputs/nlp_outputs.py
@@ -0,0 +1,440 @@
+from dataclasses import dataclass
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+
+from modelscope.outputs.outputs import ModelOutputBase
+
+Tensor = Union['torch.Tensor', 'tf.Tensor']
+
+
+@dataclass
+class BackboneModelOutput(ModelOutputBase):
+    """The output class for text classification models.
+
+    Args:
+        last_hidden_state (`Tensor`, *optional*): Sequence of hidden-states at
+            the output of the last layer of the model.
+        pooler_output (`Tensor`, *optional*) The tensor of the pooled hidden state.
+        hidden_states (`Tensor`, *optional*) Hidden-states of the model at
+            the output of each layer plus the optional initial embedding outputs.
+    """
+
+    last_hidden_state: Tensor = None
+    pooler_output: Tensor = None
+    hidden_states: Tensor = None
+
+
+@dataclass
+class AttentionBackboneModelOutput(BackboneModelOutput):
+    """The output class for backbones of attention based models.
+
+    Args:
+        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when
+        `output_attentions=True` is passed or when
+        `config.output_attentions=True`):
+            Tuple of `torch.FloatTensor` (one for each layer) of shape
+            `(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights after the attention softmax, used to compute the
+            weighted average in the self-attention heads.
+        cross_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when
+        `output_attentions=True` and `config.add_cross_attention=True` is passed
+        or when `config.output_attentions=True`):
+            Tuple of `torch.FloatTensor` (one for each layer) of shape
+            `(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights of the decoder's cross-attention layer, after the
+            attention softmax, used to compute the weighted average in the
+            cross-attention heads.
+        past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned
+        when `use_cache=True` is passed or when `config.use_cache=True`):
+            Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`,
+            with each tuple having 2 tensors of shape `(batch_size, num_heads,
+            sequence_length, embed_size_per_head)`) and optionally if
+            `config.is_encoder_decoder=True` 2 additional tensors of shape
+            `(batch_size, num_heads, encoder_sequence_length,
+            embed_size_per_head)`.
+
+            Contains pre-computed hidden-states (key and values in the
+            self-attention blocks and optionally if
+            `config.is_encoder_decoder=True` in the cross-attention blocks) that
+            can be used (see `past_key_values` input) to speed up sequential
+            decoding.
+    """
+    attentions: Tensor = None
+    past_key_values: Tensor = None
+    cross_attentions: Tensor = None
+
+
+@dataclass
+class Seq2SeqModelOutput(ModelOutputBase):
+    """
+    Base class for model encoder's outputs that also contains : pre-computed
+    hidden states that can speed up sequential decoding.
+
+    Args:
+        last_hidden_state (`torch.FloatTensor` of shape `(batch_size,
+        sequence_length, hidden_size)`):
+            Sequence of hidden-states at the output of the last layer of the
+            decoder of the model.
+
+            If `past_key_values` is used only the last hidden-state of the
+            sequences of shape `(batch_size, 1, hidden_size)` is output.
+        past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned
+        when `use_cache=True` is passed or when `config.use_cache=True`):
+            Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`,
+            with each tuple having 2 tensors of shape `(batch_size, num_heads,
+            sequence_length, embed_size_per_head)`) and 2 additional tensors of
+            shape `(batch_size, num_heads, encoder_sequence_length,
+            embed_size_per_head)`.
+
+            Contains pre-computed hidden-states (key and values in the
+            self-attention blocks and in the cross-attention blocks) that can be
+            used (see `past_key_values` input) to speed up sequential decoding.
+        decoder_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned
+        when `output_hidden_states=True` is passed or when
+        `config.output_hidden_states=True`):
+            Tuple of `torch.FloatTensor` (one for the output of the embeddings,
+            if the model has an embedding layer, + one for the output of each
+            layer) of shape `(batch_size, sequence_length, hidden_size)`.
+
+            Hidden-states of the decoder at the output of each layer plus the
+            optional initial embedding outputs.
+        decoder_attentions (`tuple(torch.FloatTensor)`, *optional*, returned
+        when `output_attentions=True` is passed or when
+        `config.output_attentions=True`):
+            Tuple of `torch.FloatTensor` (one for each layer) of shape
+            `(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights of the decoder, after the attention softmax, used
+            to compute the weighted average in the self-attention heads.
+        cross_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when
+        `output_attentions=True` is passed or when
+        `config.output_attentions=True`):
+            Tuple of `torch.FloatTensor` (one for each layer) of shape
+            `(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights of the decoder's cross-attention layer, after the
+            attention softmax, used to compute the weighted average in the
+            cross-attention heads.
+        encoder_last_hidden_state (`torch.FloatTensor` of shape `(batch_size,
+        sequence_length, hidden_size)`, *optional*):
+            Sequence of hidden-states at the output of the last layer of the
+            encoder of the model.
+        encoder_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned
+        when `output_hidden_states=True` is passed or when
+        `config.output_hidden_states=True`):
+            Tuple of `torch.FloatTensor` (one for the output of the embeddings,
+            if the model has an embedding layer, + one for the output of each
+            layer) of shape `(batch_size, sequence_length, hidden_size)`.
+
+            Hidden-states of the encoder at the output of each layer plus the
+            optional initial embedding outputs.
+        encoder_attentions (`tuple(torch.FloatTensor)`, *optional*, returned
+        when `output_attentions=True` is passed or when
+        `config.output_attentions=True`):
+            Tuple of `torch.FloatTensor` (one for each layer) of shape
+            `(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights of the encoder, after the attention softmax, used
+            to compute the weighted average in the self-attention heads.
+    """
+
+    last_hidden_state: Tensor = None
+    past_key_values: Optional[Tuple[Tuple[Tensor]]] = None
+    decoder_hidden_states: Optional[Tuple[Tensor]] = None
+    decoder_attentions: Optional[Tuple[Tensor]] = None
+    cross_attentions: Optional[Tuple[Tensor]] = None
+    encoder_last_hidden_state: Optional[Tensor] = None
+    encoder_hidden_states: Optional[Tuple[Tensor]] = None
+    encoder_attentions: Optional[Tuple[Tensor]] = None
+
+
+@dataclass
+class FaqQuestionAnsweringOutput(ModelOutputBase):
+    """The output class for faq QA models.
+    """
+
+    scores: Tensor = None
+    labels: Tensor = None
+    loss: Tensor = None
+    logits: Tensor = None
+
+
+@dataclass
+class FeatureExtractionOutput(ModelOutputBase):
+    """The output class for feature extraction models.
+    """
+
+    text_embedding: Tensor = None
+
+
+@dataclass
+class FillMaskModelOutput(ModelOutputBase):
+    """The output class for fill mask models.
+
+    Args:
+        logits (`Tensor`): The logits output of the model.
+        loss (`Tensor`, *optional*) The loss of the model, available when training.
+        input_ids (`Tensor`, *optional*) The input id tensor fed into the model.
+        hidden_states (`Tensor`, *optional*) Hidden-states of the model at the
+            output of each layer plus the optional initial embedding outputs.
+    """
+
+    logits: Tensor = None
+    loss: Tensor = None
+    input_ids: Tensor = None
+    hidden_states: Tensor = None
+
+
+@dataclass
+class AttentionFillMaskModelOutput(FillMaskModelOutput):
+    """The output class for the fill mask and attention based models.
+
+    Args:
+        attentions (`tuple(Tensor)`, *optional* Attentions weights after the
+        attention softmax, used to compute the weighted average in the
+        self-attention heads.
+    """
+    attentions: Tensor = None
+
+
+@dataclass
+class InformationExtractionOutput(ModelOutputBase):
+    """The output class for information extraction models.
+    """
+
+    spo_list: np.ndarray = None
+
+
+@dataclass
+class Seq2SeqLMOutput(ModelOutputBase):
+    """
+    Base class for sequence-to-sequence language models outputs.
+
+    Args:
+        loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when
+        `labels` is provided):
+            Language modeling loss.
+        logits (`torch.FloatTensor` of shape `(batch_size, sequence_length,
+        config.vocab_size)`):
+            Prediction scores of the language modeling head (scores for each
+            vocabulary token before SoftMax).
+        past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned
+        when `use_cache=True` is passed or when `config.use_cache=True`):
+            Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`,
+            with each tuple having 2 tensors of shape `(batch_size, num_heads,
+            sequence_length, embed_size_per_head)`) and 2 additional tensors of
+            shape `(batch_size, num_heads, encoder_sequence_length,
+            embed_size_per_head)`.
+
+            Contains pre-computed hidden-states (key and values in the
+            self-attention blocks and in the cross-attention blocks) that can be
+            used (see `past_key_values` input) to speed up sequential decoding.
+        decoder_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned
+        when `output_hidden_states=True` is passed or when
+        `config.output_hidden_states=True`):
+            Tuple of `torch.FloatTensor` (one for the output of the embeddings,
+            if the model has an embedding layer, + one for the output of each
+            layer) of shape `(batch_size, sequence_length, hidden_size)`.
+
+            Hidden-states of the decoder at the output of each layer plus the
+            initial embedding outputs.
+        decoder_attentions (`tuple(torch.FloatTensor)`, *optional*, returned
+        when `output_attentions=True` is passed or when
+        `config.output_attentions=True`):
+            Tuple of `torch.FloatTensor` (one for each layer) of shape
+            `(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights of the decoder, after the attention softmax, used
+            to compute the weighted average in the self-attention heads.
+        cross_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when
+        `output_attentions=True` is passed or when
+        `config.output_attentions=True`):
+            Tuple of `torch.FloatTensor` (one for each layer) of shape
+            `(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights of the decoder's cross-attention layer, after the
+            attention softmax, used to compute the weighted average in the
+            cross-attention heads.
+        encoder_last_hidden_state (`torch.FloatTensor` of shape `(batch_size,
+        sequence_length, hidden_size)`, *optional*):
+            Sequence of hidden-states at the output of the last layer of the
+            encoder of the model.
+        encoder_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned
+        when `output_hidden_states=True` is passed or when
+        `config.output_hidden_states=True`):
+            Tuple of `torch.FloatTensor` (one for the output of the embeddings,
+            if the model has an embedding layer, + one for the output of each
+            layer) of shape `(batch_size, sequence_length, hidden_size)`.
+
+            Hidden-states of the encoder at the output of each layer plus the
+            initial embedding outputs.
+        encoder_attentions (`tuple(torch.FloatTensor)`, *optional*, returned
+        when `output_attentions=True` is passed or when
+        `config.output_attentions=True`):
+            Tuple of `torch.FloatTensor` (one for each layer) of shape
+            `(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights of the encoder, after the attention softmax, used
+            to compute the weighted average in the self-attention heads.
+    """
+
+    loss: Optional[Tensor] = None
+    logits: Tensor = None
+    past_key_values: Optional[Tuple[Tuple[Tensor]]] = None
+    decoder_hidden_states: Optional[Tuple[Tensor]] = None
+    decoder_attentions: Optional[Tuple[Tensor]] = None
+    cross_attentions: Optional[Tuple[Tensor]] = None
+    encoder_last_hidden_state: Optional[Tensor] = None
+    encoder_hidden_states: Optional[Tuple[Tensor]] = None
+    encoder_attentions: Optional[Tuple[Tensor]] = None
+
+
+@dataclass
+class TextClassificationModelOutput(ModelOutputBase):
+    """The output class for text classification models.
+
+    Args:
+        logits (`Tensor`): The logits output of the model. loss (`Tensor`,
+        *optional*) The loss of the model, available when training.
+        hidden_states (`Tensor`, *optional*) Hidden-states of the model at the
+        output of each layer plus the optional initial embedding outputs.
+    """
+
+    logits: Tensor = None
+    loss: Tensor = None
+
+
+@dataclass
+class AttentionTextClassificationModelOutput(TextClassificationModelOutput):
+    """The output class for backbones of attention based models.
+
+    Args:
+        attentions (`tuple(Tensor)`, *optional* Attentions weights after the
+        attention softmax, used to compute the weighted average in the
+        self-attention heads.
+    """
+    attentions: Tensor = None
+    hidden_states: Tensor = None
+
+
+@dataclass
+class TextErrorCorrectionOutput(ModelOutputBase):
+    """The output class for information extraction models.
+    """
+
+    predictions: np.ndarray = None
+
+
+@dataclass
+class WordAlignmentOutput(ModelOutputBase):
+    """The output class for word alignment models.
+    """
+
+    predictions: List = None
+
+
+@dataclass
+class TextGenerationModelOutput(ModelOutputBase):
+    """The output class for text generation models.
+
+    Args:
+        logits (`Tensor`): The logits output of the model. loss (`Tensor`,
+        *optional*) The loss of the model, available when training.
+        hidden_states (`Tensor`, *optional*) Hidden-states of the model at the
+        output of each layer plus the optional initial embedding outputs.
+    """
+
+    logits: Tensor = None
+    loss: Tensor = None
+
+
+@dataclass
+class TokenGeneratorOutput(ModelOutputBase):
+    """
+    The output class for generate method of text generation models.
+
+
+    Args:
+        sequences (`torch.LongTensor` of shape `(batch_size*num_return_sequences, sequence_length)`):
+            The generated sequences. The second dimension (sequence_length) is either equal to `max_length` or shorter
+            if all batches finished early due to the `eos_token_id`.
+        scores (`tuple(torch.FloatTensor)` *optional*, returned when `output_scores=True`
+        is passed or when `config.output_scores=True`):
+            Processed prediction scores of the language modeling head (scores for each vocabulary token before SoftMax)
+            at each generation step. Tuple of `torch.FloatTensor` with up to `max_new_tokens` elements (one element for
+            each generated token), with each tensor of shape `(batch_size*num_return_sequences, config.vocab_size)`.
+        attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True`
+        is passed or `config.output_attentions=True`):
+            Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
+            `torch.FloatTensor` of shape `(num_return_sequences*batch_size, num_heads, generated_length,
+            sequence_length)`.
+        hidden_states (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_hidden_states=True`
+        is passed or when `config.output_hidden_states=True`):
+            Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
+            `torch.FloatTensor` of shape `(num_return_sequences*batch_size, generated_length, hidden_size)`.
+    """
+
+    sequences: Tensor = None
+    scores: Optional[Tuple[Tensor]] = None
+    attentions: Optional[Tuple[Tuple[Tensor]]] = None
+    hidden_states: Optional[Tuple[Tuple[Tensor]]] = None
+
+
+@dataclass
+class TokenClassificationModelOutput(ModelOutputBase):
+    """The output class for token classification models.
+        logits (`Tensor`): The logits output of the model.
+        loss (`Tensor`, *optional*) The loss of the model, available when training.
+        predictions: A PyTorch tensor of the best tag sequence for each batch of shape
+            (nbest, batch_size, seq_length)
+        offset_mapping (:obj:`torch.FloatTensor` of shape :obj:`(batch_size,
+        sequence_length)`, `optional`):
+            Indices of positions of each input sequence tokens in the sentence.
+            Selected in the range ``[0, sequence_length - 1]``.
+    """
+
+    logits: Tensor = None
+    loss: Tensor = None
+    offset_mapping: Tensor = None
+    predictions: Tensor = None
+    label_mask: Tensor = None
+
+
+@dataclass
+class AttentionTokenClassificationModelOutput(TokenClassificationModelOutput):
+    """The output class for backbones of attention based models.
+
+    Args:
+        attentions (`tuple(Tensor)`, *optional* Attentions weights after the attention softmax,
+        used to compute the weighted average in the self-attention heads.
+    """
+    attentions: Tensor = None
+    hidden_states: Tensor = None
+
+
+@dataclass
+class DialogueUserSatisfactionEstimationModelOutput(ModelOutputBase):
+    """The output class for user satisfaction estimation.
+
+    Args:
+        logits (`Tensor`): The logits output of the model.
+    """
+    logits: Tensor = None
+
+
+@dataclass
+class SentencEmbeddingModelOutput(ModelOutputBase):
+    """The output class for text classification models.
+
+    Args:
+        query_embs (`Tensor`, *optional*): The tensor of the query embeddings.
+        doc_embs (`Tensor`, *optional*) Then tensor of the doc embeddings.
+        loss (`torch.FloatTensor` of shape `(1,)`, *optional*): Sentence Embedding modeling loss.
+    """
+
+    query_embeddings: Tensor = None
+    doc_embeddings: Tensor = None
+    loss: Tensor = None
diff --git a/modelscope/outputs/outputs.py b/modelscope/outputs/outputs.py
new file mode 100644
index 0000000..deecaee
--- /dev/null
+++ b/modelscope/outputs/outputs.py
@@ -0,0 +1,1135 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from collections import OrderedDict, namedtuple
+from dataclasses import dataclass, fields
+
+from modelscope.utils.constant import Tasks
+
+
+class OutputKeys(object):
+    LOSS = 'loss'
+    LOGITS = 'logits'
+    SCORES = 'scores'
+    SCORE = 'score'
+    LABEL = 'label'
+    LABELS = 'labels'
+    INPUT_IDS = 'input_ids'
+    LABEL_POS = 'label_pos'
+    POSES = 'poses'
+    CAPTION = 'caption'
+    BOXES = 'boxes'
+    KEYPOINTS = 'keypoints'
+    MASKS = 'masks'
+    DEPTHS = 'depths'
+    DEPTHS_COLOR = 'depths_color'
+    LAYOUT = 'layout'
+    TEXT = 'text'
+    POLYGONS = 'polygons'
+    OUTPUT = 'output'
+    OUTPUT_IMG = 'output_img'
+    OUTPUT_IMGS = 'output_imgs'
+    OUTPUT_VIDEO = 'output_video'
+    OUTPUT_PCM = 'output_pcm'
+    OUTPUT_PCM_LIST = 'output_pcm_list'
+    OUTPUT_WAV = 'output_wav'
+    IMG_EMBEDDING = 'img_embedding'
+    SPK_EMBEDDING = 'spk_embedding'
+    SPO_LIST = 'spo_list'
+    TEXT_EMBEDDING = 'text_embedding'
+    TRANSLATION = 'translation'
+    RESPONSE = 'response'
+    PREDICTION = 'prediction'
+    PREDICTIONS = 'predictions'
+    PROBABILITIES = 'probabilities'
+    DIALOG_STATES = 'dialog_states'
+    VIDEO_EMBEDDING = 'video_embedding'
+    UUID = 'uuid'
+    WORD = 'word'
+    KWS_LIST = 'kws_list'
+    SQL_STRING = 'sql_string'
+    SQL_QUERY = 'sql_query'
+    HISTORY = 'history'
+    QUERT_RESULT = 'query_result'
+    TIMESTAMPS = 'timestamps'
+    SHOT_NUM = 'shot_num'
+    SCENE_NUM = 'scene_num'
+    SCENE_META_LIST = 'scene_meta_list'
+    SHOT_META_LIST = 'shot_meta_list'
+    MATCHES = 'matches'
+    PCD12 = 'pcd12'
+    PCD12_ALIGN = 'pcd12_align'
+
+
+TASK_OUTPUTS = {
+
+    # ============ vision tasks ===================
+
+    # ocr detection result for single sample
+    # {
+    #   "polygons": np.array with shape [num_text, 8], each polygon is
+    #       [x1, y1, x2, y2, x3, y3, x4, y4]
+    # }
+    Tasks.ocr_detection: [OutputKeys.POLYGONS],
+    Tasks.table_recognition: [OutputKeys.POLYGONS],
+    Tasks.license_plate_detection: [OutputKeys.POLYGONS, OutputKeys.TEXT],
+
+    # ocr recognition result for single sample
+    # {
+    #    "text": "电子元器件提供BOM配单"
+    # }
+    Tasks.ocr_recognition: [OutputKeys.TEXT],
+    Tasks.sudoku: [OutputKeys.TEXT],
+    Tasks.text2sql: [OutputKeys.TEXT],
+
+    # document vl embedding for single sample
+    # {
+    #    "img_embedding": np.array with shape [M, D],
+    #    "text_embedding": np.array with shape [N, D]
+    # }
+    Tasks.document_vl_embedding:
+    [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING],
+
+    # face 2d keypoint result for single sample
+    #   {
+    #       "keypoints": [
+    #           [[x, y]*106],
+    #           [[x, y]*106],
+    #           [[x, y]*106],
+    #       ],
+    #       "poses": [
+    #            [pitch, roll, yaw],
+    #            [pitch, roll, yaw],
+    #            [pitch, roll, yaw],
+    #        ],
+    #        "boxes": [
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #       ]
+    #   }
+    Tasks.face_2d_keypoints:
+    [OutputKeys.KEYPOINTS, OutputKeys.POSES, OutputKeys.BOXES],
+
+    # face detection result for single sample
+    #   {
+    #       "scores": [0.9, 0.1, 0.05, 0.05]
+    #       "boxes": [
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #       ],
+    #       "keypoints": [
+    #           [x1, y1, x2, y2, x3, y3, x4, y4, x5, y5],
+    #           [x1, y1, x2, y2, x3, y3, x4, y4, x5, y5],
+    #           [x1, y1, x2, y2, x3, y3, x4, y4, x5, y5],
+    #           [x1, y1, x2, y2, x3, y3, x4, y4, x5, y5],
+    #       ],
+    #   }
+    Tasks.face_detection:
+    [OutputKeys.SCORES, OutputKeys.BOXES, OutputKeys.KEYPOINTS],
+
+    # card detection result for single sample
+    #   {
+    #       "scores": [0.9, 0.1, 0.05, 0.05]
+    #       "boxes": [
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #       ],
+    #       "keypoints": [
+    #           [x1, y1, x2, y2, x3, y3, x4, y4],
+    #           [x1, y1, x2, y2, x3, y3, x4, y4],
+    #           [x1, y1, x2, y2, x3, y3, x4, y4],
+    #           [x1, y1, x2, y2, x3, y3, x4, y4],
+    #       ],
+    #   }
+    Tasks.card_detection:
+    [OutputKeys.SCORES, OutputKeys.BOXES, OutputKeys.KEYPOINTS],
+
+    # content check result for single sample
+    #   {
+    #       "scores": [0.9] # non sexy probability
+    #   }
+    Tasks.content_check: [OutputKeys.SCORES],
+
+    # image driving perception result for single sample
+    #   {
+    #       "boxes": [
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #       ],
+    #       "masks": [
+    #            [np.array], # with fixed shape(h=720, w=1280, 3) containing only 0, 1
+    #            [np.array], # with fixed shape(h=720, w=1280, 3) containing only 0, 1
+    #       ]
+    #   }
+    Tasks.image_driving_perception: [OutputKeys.BOXES, OutputKeys.MASKS],
+
+    # facial expression recognition result for single sample
+    #   {
+    #       "scores": [0.9]
+    #       "boxes": [x1, y1, x2, y2]
+    #   }
+    Tasks.face_liveness: [OutputKeys.SCORES, OutputKeys.BOXES],
+
+    # face quality assessment for single sample
+    #   {
+    #       "scores": [0.9]
+    #       "boxes": [x1, y1, x2, y2]
+    #   }
+    Tasks.face_quality_assessment: [OutputKeys.SCORES, OutputKeys.BOXES],
+
+    # facial expression recognition result for single sample
+    #   {
+    #       "scores": [0.9, 0.1, 0.02, 0.02, 0.02, 0.02, 0.02],
+    #       "labels": ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
+    #   }
+    Tasks.facial_expression_recognition:
+    [OutputKeys.SCORES, OutputKeys.LABELS],
+
+    # face processing base result for single img
+    #   {
+    #       "scores": [0.85]
+    #       "boxes": [x1, y1, x2, y2]
+    #       "keypoints": [x1, y1, x2, y2, x3, y3, x4, y4]
+    #   }
+    Tasks.face_processing_base: [
+        OutputKeys.OUTPUT_IMG, OutputKeys.SCORES, OutputKeys.BOXES,
+        OutputKeys.KEYPOINTS
+    ],
+
+    # face attribute recognition result for single sample
+    #   {
+    #       "scores": [[0.9, 0.1], [0.92, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01]
+    #       "labels": [['Male', 'Female'], [0-2, 3-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70+]]
+    #   }
+    Tasks.face_attribute_recognition: [OutputKeys.SCORES, OutputKeys.LABELS],
+
+    # face recognition result for single sample
+    #   {
+    #       "img_embedding": np.array with shape [1, D],
+    #   }
+    Tasks.face_recognition: [OutputKeys.IMG_EMBEDDING],
+
+    # human detection result for single sample
+    #   {
+    #       "scores": [0.9, 0.1, 0.05, 0.05]
+    #       "labels": ["person", "person", "person", "person"],
+    #       "boxes": [
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #       ],
+    #   }
+    #
+    Tasks.human_detection:
+    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES],
+
+    # face generation result for single sample
+    # {
+    #   "output_img": np.array with shape(h, w, 3)
+    # }
+    Tasks.face_image_generation: [OutputKeys.OUTPUT_IMG],
+
+    # image classification result for single sample
+    #   {
+    #       "scores": [0.9, 0.1, 0.05, 0.05]
+    #       "labels": ["dog", "horse", "cow", "cat"],
+    #   }
+    Tasks.image_classification: [OutputKeys.SCORES, OutputKeys.LABELS],
+
+    # object detection result for single sample
+    #   {
+    #       "scores": [0.9, 0.1, 0.05, 0.05]
+    #       "labels": ["dog", "horse", "cow", "cat"],
+    #       "boxes": [
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #       ],
+    #   }
+    Tasks.image_object_detection:
+    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES],
+    Tasks.domain_specific_object_detection:
+    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES],
+    Tasks.open_vocabulary_detection:
+    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES],
+
+    # video object detection result for single sample
+    #   {
+
+    #         "scores": [[0.8, 0.25, 0.05, 0.05], [0.9, 0.1, 0.05, 0.05]]
+    #         "labels": [["person", "traffic light", "car", "bus"],
+    #                     ["person", "traffic light", "car", "bus"]]
+    #         "boxes":
+    #          [
+    #              [
+    #                [x1, y1, x2, y2],
+    #                [x1, y1, x2, y2],
+    #                [x1, y1, x2, y2],
+    #                [x1, y1, x2, y2],
+    #              ],
+    #              [
+    #                [x1, y1, x2, y2],
+    #                [x1, y1, x2, y2],
+    #                [x1, y1, x2, y2],
+    #                [x1, y1, x2, y2],
+    #               ]
+    #           ],
+
+    #   }
+    Tasks.video_object_detection:
+    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES],
+
+    # 3d object detection result for single sample
+    # {
+    #   "output_img": np.array with shape(h, w, 3)
+    # }
+    Tasks.object_detection_3d: [OutputKeys.OUTPUT_IMG],
+
+    # instance segmentation result for single sample
+    #   {
+    #       "scores": [0.9, 0.1, 0.05, 0.05],
+    #       "labels": ["dog", "horse", "cow", "cat"],
+    #       "masks": [
+    #           np.array # 2D array containing only 0, 1
+    #       ]
+    #   }
+    Tasks.image_segmentation:
+    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.MASKS],
+
+    # video panoptic segmentation result for single sample
+    #         "scores": [[0.8, 0.25, 0.05, 0.05], [0.9, 0.1, 0.05, 0.05]]
+    #         "labels": [["person", "traffic light", "car", "bus"],
+    #                     ["person", "traffic light", "car", "bus"]]
+    #       "masks": [ #array containing only 0, 1
+    #           [np.array, np.array, np.array, np.array],
+    #           [np.array, np.array, np.array, np.array],
+    #       ]
+    #       "boxes":
+    #          [
+    #              [
+    #                [x1, y1, x2, y2],
+    #                [x1, y1, x2, y2],
+    #                [x1, y1, x2, y2],
+    #                [x1, y1, x2, y2],
+    #              ],
+    #              [
+    #                [x1, y1, x2, y2],
+    #                [x1, y1, x2, y2],
+    #                [x1, y1, x2, y2],
+    #                [x1, y1, x2, y2],
+    #               ]
+    #           ],
+    #       "uuid": [[0, 1, 2, 3],[0, 1, 2, 3]]
+    #   }
+    Tasks.video_panoptic_segmentation: [
+        OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.MASKS,
+        OutputKeys.BOXES, OutputKeys.UUID
+    ],
+
+    # semantic segmentation result for single sample
+    #   {
+    #       "masks": [np.array # 2D array with shape [height, width]]
+    #   }
+    Tasks.semantic_segmentation: [OutputKeys.MASKS],
+
+    # image matting result for single sample
+    # {
+    #   "output_img": np.array with shape(h, w, 4)
+    #                 for matting or (h, w, 3) for general purpose
+    #                 , shape(h, w) for crowd counting
+    # }
+    Tasks.portrait_matting: [OutputKeys.OUTPUT_IMG],
+    Tasks.universal_matting: [OutputKeys.OUTPUT_IMG],
+
+    # image_quality_assessment_mos result for a single image is a score in range [0, 1]
+    # {0.5}
+    Tasks.image_quality_assessment_mos: [OutputKeys.SCORE],
+
+    # image editing task result for a single image
+    # {"output_img": np.array with shape (h, w, 3)}
+    Tasks.skin_retouching: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_super_resolution: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_colorization: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_color_enhancement: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_denoising: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_portrait_enhancement: [OutputKeys.OUTPUT_IMG],
+    Tasks.crowd_counting: [OutputKeys.SCORES, OutputKeys.OUTPUT_IMG],
+    Tasks.image_inpainting: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_paintbyexample: [OutputKeys.OUTPUT_IMG],
+
+    # image generation task result for a single image
+    # {"output_img": np.array with shape (h, w, 3)}
+    Tasks.image_to_image_generation: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_to_image_translation: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_style_transfer: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_portrait_stylization: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_body_reshaping: [OutputKeys.OUTPUT_IMG],
+
+    # video editing task result for a single video
+    # {"output_video": "path_to_rendered_video"}
+    Tasks.video_frame_interpolation: [OutputKeys.OUTPUT_VIDEO],
+    Tasks.video_super_resolution: [OutputKeys.OUTPUT_VIDEO],
+    Tasks.video_deinterlace: [OutputKeys.OUTPUT_VIDEO],
+    Tasks.nerf_recon_acc: [OutputKeys.OUTPUT_VIDEO],
+    Tasks.video_colorization: [OutputKeys.OUTPUT_VIDEO],
+
+    # image quality assessment degradation result for single image
+    # {
+    #       "scores": [0.885272, 0.014790631, 0.014558001]
+    #       "labels": ['噪声强度', '模糊程度', '压缩强度'],
+    # }
+    Tasks.image_quality_assessment_degradation:
+    [OutputKeys.SCORES, OutputKeys.LABELS],
+
+    # live category recognition result for single video
+    # {
+    #       "scores": [0.885272, 0.014790631, 0.014558001]
+    #       "labels": ['女装/女士精品>>棉衣/棉服', '女装/女士精品>>牛仔裤', '女装/女士精品>>裤子>>休闲裤'],
+    # }
+    Tasks.live_category: [OutputKeys.SCORES, OutputKeys.LABELS],
+
+    # action recognition result for single video
+    # {
+    #   "output_label": "abseiling"
+    # }
+    Tasks.action_recognition: [OutputKeys.LABELS],
+
+    # human body keypoints detection result for single sample
+    # {
+    #   "keypoints": [
+    #               [[x, y]*15],
+    #               [[x, y]*15],
+    #               [[x, y]*15]
+    #             ]
+    #   "scores": [
+    #               [[score]*15],
+    #               [[score]*15],
+    #               [[score]*15]
+    #              ]
+    #   "boxes": [
+    #               [x1, y1, x2, y2],
+    #               [x1, y1, x2, y2],
+    #               [x1, y1, x2, y2],
+    #             ]
+    # }
+    Tasks.body_2d_keypoints: [
+        OutputKeys.KEYPOINTS, OutputKeys.SCORES, OutputKeys.BOXES
+    ],
+
+    # 3D human body keypoints detection result for single sample
+    # {
+    #   "keypoints": [		    # 3d pose coordinate in camera coordinate
+    #     	[[x, y, z]*17],	# joints of per image
+    #     	[[x, y, z]*17],
+    #     	...
+    #     ],
+    #   "timestamps": [     # timestamps of all frames
+    #     "00:00:0.230",
+    #     "00:00:0.560",
+    #     "00:00:0.690",
+    #   ],
+    #   "output_video": "path_to_rendered_video" , this is optional
+    # and is only available when the "render" option is enabled.
+    # }
+    Tasks.body_3d_keypoints: [
+        OutputKeys.KEYPOINTS, OutputKeys.TIMESTAMPS, OutputKeys.OUTPUT_VIDEO
+    ],
+
+    # 3D face reconstruction result for single sample
+    # {
+    #     "output": {
+    #         "vertices": np.array with shape(n, 3),
+    #         "faces": np.array with shape(n, 3),
+    #         "faces_uv": np.array with shape(n, 3),
+    #         "faces_normal": np.array with shape(n, 3),
+    #         "colors": np.array with shape(n, 3),
+    #         "UVs": np.array with shape(n, 2),
+    #         "normals": np.array with shape(n, 3),
+    #         "texture_map": np.array with shape(h, w, 3),
+    #     }
+    # }
+    Tasks.face_reconstruction: [OutputKeys.OUTPUT],
+
+    # 2D hand keypoints result for single sample
+    # {
+    #     "keypoints": [
+    #                     [[x, y, score] * 21],
+    #                     [[x, y, score] * 21],
+    #                     [[x, y, score] * 21],
+    #                  ],
+    #     "boxes": [
+    #                 [x1, y1, x2, y2],
+    #                 [x1, y1, x2, y2],
+    #                 [x1, y1, x2, y2],
+    #             ]
+    # }
+    Tasks.hand_2d_keypoints: [OutputKeys.KEYPOINTS, OutputKeys.BOXES],
+
+    # video single object tracking result for single video
+    # {
+    #   "boxes": [
+    #               [x1, y1, x2, y2],
+    #               [x1, y1, x2, y2],
+    #               [x1, y1, x2, y2],
+    #             ],
+    #   "timestamps": ["hh:mm:ss", "hh:mm:ss", "hh:mm:ss"]
+    # }
+    Tasks.video_single_object_tracking: [
+        OutputKeys.BOXES, OutputKeys.TIMESTAMPS
+    ],
+
+    # video multi object tracking result for single video
+    # {
+    #   "boxes": [
+    #               [
+    #                   [x1, y1, x2, y2],
+    #                   [x1, y1, x2, y2],
+    #                   ...
+    #               ],
+    #               [
+    #                   [x1, y1, x2, y2],
+    #                   [x1, y1, x2, y2],
+    #                   ...
+    #               ],
+    #               [
+    #                   [x1, y1, x2, y2]
+    #                   ...
+    #               ]
+    #             ],
+    #   "labels": [[obj_id0, obj_id1, ...], [obj_id1, obj_id2, ...], [obj_id3, ...]],
+    #   "timestamps": ["hh:mm:ss", "hh:mm:ss", "hh:mm:ss"]
+    # }
+    Tasks.video_multi_object_tracking: [
+        OutputKeys.BOXES, OutputKeys.LABELS, OutputKeys.TIMESTAMPS
+    ],
+
+    # live category recognition result for single video
+    # {
+    #       "scores": [0.885272, 0.014790631, 0.014558001],
+    #       'labels': ['修身型棉衣', '高腰牛仔裤', '休闲连体裤']
+    # }
+    Tasks.live_category: [OutputKeys.SCORES, OutputKeys.LABELS],
+
+    # video category recognition result for single video
+    # {
+    #       "scores": [0.7716429233551025],
+    #       "labels": ['生活>>好物推荐']
+    # }
+    Tasks.video_category: [OutputKeys.SCORES, OutputKeys.LABELS],
+
+    # image embedding result for a single image
+    # {
+    #   "image_bedding": np.array with shape [D]
+    # }
+    Tasks.product_retrieval_embedding: [OutputKeys.IMG_EMBEDDING],
+
+    # video embedding result for single video
+    # {
+    #   "video_embedding": np.array with shape [D],
+    # }
+    Tasks.video_embedding: [OutputKeys.VIDEO_EMBEDDING],
+
+    # video stabilization task result for a single video
+    # {"output_video": "path_to_rendered_video"}
+    Tasks.video_stabilization: [OutputKeys.OUTPUT_VIDEO],
+
+    # virtual_try_on result for a single sample
+    # {
+    #    "output_img": np.ndarray with shape [height, width, 3]
+    # }
+    Tasks.virtual_try_on: [OutputKeys.OUTPUT_IMG],
+    # text driven segmentation result for single sample
+    #   {
+    #       "masks": [
+    #           np.array # 2D array containing only 0, 255
+    #       ]
+    #   }
+    Tasks.text_driven_segmentation: [OutputKeys.MASKS],
+    # shop segmentation result for single sample
+    #   {
+    #       "masks": [
+    #           np.array # 2D array containing only 0, 255
+    #       ]
+    #   }
+    Tasks.shop_segmentation: [OutputKeys.MASKS],
+    # movide scene segmentation result for a single video
+    # {
+    #        "shot_num":15,
+    #        "shot_meta_list":
+    #        [
+    #           {
+    #               "frame": [start_frame, end_frame],
+    #               "timestamps": [start_timestamp, end_timestamp] # ['00:00:01.133', '00:00:02.245']
+    #
+    #           }
+    #         ]
+    #        "scene_num":3,
+    #        "scene_meta_list":
+    #        [
+    #           {
+    #               "shot": [0,1,2],
+    #               "frame": [start_frame, end_frame],
+    #               "timestamps": [start_timestamp, end_timestamp] # ['00:00:01.133', '00:00:02.245']
+    #           }
+    #        ]
+    #
+    # }
+    Tasks.movie_scene_segmentation: [
+        OutputKeys.SHOT_NUM, OutputKeys.SHOT_META_LIST, OutputKeys.SCENE_NUM,
+        OutputKeys.SCENE_META_LIST
+    ],
+
+    # human whole body keypoints detection result for single sample
+    # {
+    #   "keypoints": [
+    #               [[x, y]*133],
+    #               [[x, y]*133],
+    #               [[x, y]*133]
+    #             ]
+    #   "boxes": [
+    #               [x1, y1, x2, y2],
+    #               [x1, y1, x2, y2],
+    #               [x1, y1, x2, y2],
+    #             ]
+    # }
+    Tasks.human_wholebody_keypoint: [OutputKeys.KEYPOINTS, OutputKeys.BOXES],
+
+    # video summarization result for a single video
+    # {
+    #        "output":
+    #        [
+    #           {
+    #               "frame": [start_frame, end_frame]
+    #               "timestamps": [start_time, end_time]
+    #           },
+    #           {
+    #               "frame": [start_frame, end_frame]
+    #               "timestamps": [start_time, end_time]
+    #           }
+    #        ]
+    # }
+    Tasks.video_summarization: [OutputKeys.OUTPUT],
+
+    # referring video object segmentation result for a single video
+    #   {
+    #       "masks": [np.array # 3D array with shape [frame_num, height, width]]
+    #       "timestamps": ["hh:mm:ss", "hh:mm:ss", "hh:mm:ss"]
+    #       "output_video": "path_to_rendered_video" , this is optional
+    # and is only available when the "render" option is enabled.
+    #   }
+    Tasks.referring_video_object_segmentation: [
+        OutputKeys.MASKS, OutputKeys.TIMESTAMPS, OutputKeys.OUTPUT_VIDEO
+    ],
+
+    # video human matting result for a single video
+    #   {
+    #       "masks": [np.array # 2D array with shape [height, width]]
+    #       "output_video": "path_to_matting_video"
+    #   }
+    Tasks.video_human_matting: [OutputKeys.MASKS, OutputKeys.OUTPUT_VIDEO],
+
+    # ============ nlp tasks ===================
+
+    # text classification result for single sample
+    #   {
+    #       "scores": [0.9, 0.1, 0.05, 0.05]
+    #       "labels": ["happy", "sad", "calm", "angry"],
+    #   }
+    Tasks.text_classification: [OutputKeys.SCORES, OutputKeys.LABELS],
+
+    # sentence similarity result for single sample
+    #   {
+    #       "scores": 0.9
+    #       "labels": "1",
+    #   }
+    Tasks.sentence_similarity: [OutputKeys.SCORES, OutputKeys.LABELS],
+
+    # nli result for single sample
+    #   {
+    #       "labels": ["happy", "sad", "calm", "angry"],
+    #       "scores": [0.9, 0.1, 0.05, 0.05]
+    #   }
+    Tasks.nli: [OutputKeys.SCORES, OutputKeys.LABELS],
+
+    # sentiment classification result for single sample
+    # {
+    #     'scores': [0.07183828949928284, 0.9281617403030396],
+    #     'labels': ['1', '0']
+    # }
+    Tasks.sentiment_classification: [OutputKeys.SCORES, OutputKeys.LABELS],
+
+    # zero-shot classification result for single sample
+    #   {
+    #       "scores": [0.9, 0.1, 0.05, 0.05]
+    #       "labels": ["happy", "sad", "calm", "angry"],
+    #   }
+    Tasks.zero_shot_classification: [OutputKeys.SCORES, OutputKeys.LABELS],
+
+    # relation extraction result for a single sample
+    # {
+    #     "uuid": "人生信息-1",
+    #     "text": "《父老乡亲》是由是由由中国人民解放军海政文工团创作的军旅歌曲，石顺义作词，王锡仁作曲，范琳琳演唱",
+    #     "spo_list": [{"subject": "石顺义", "predicate": "国籍", "object": "中国"}]
+    # }
+    Tasks.relation_extraction: [OutputKeys.SPO_LIST],
+
+    # translation result for a source sentence
+    #   {
+    #       "translation": “北京是中国的首都”
+    #   }
+    Tasks.translation: [OutputKeys.TRANSLATION],
+
+    # word segmentation result for single sample
+    # {
+    #   "output": ["今天", "天气", "不错", "，", "适合", "出去", "游玩"]
+    # }
+    # {
+    #   'output': ['รถ', 'คัน', 'เก่า', 'ก็', 'ยัง', 'เก็บ', 'เอา']
+    # }
+    Tasks.word_segmentation: [OutputKeys.OUTPUT],
+
+    # TODO @wenmeng.zwm support list of result check
+    # named entity recognition result for single sample
+    # {
+    #   "output": [
+    #     {"type": "LOC", "start": 2, "end": 5, "span": "温岭市"},
+    #     {"type": "LOC", "start": 5, "end": 8, "span": "新河镇"}
+    #   ]
+    # }
+    Tasks.named_entity_recognition: [OutputKeys.OUTPUT],
+    Tasks.part_of_speech: [OutputKeys.OUTPUT],
+
+    # text_error_correction result for a single sample
+    # {
+    #    "output": "我想吃苹果"
+    # }
+    Tasks.text_error_correction: [OutputKeys.OUTPUT],
+    # word_alignment result for a single sample
+    # {
+    #    "output": "0-0 1-3 2-4 3-1 4-2 5-5"
+    # }
+    Tasks.word_alignment: [OutputKeys.OUTPUT],
+    Tasks.sentence_embedding: [OutputKeys.TEXT_EMBEDDING, OutputKeys.SCORES],
+    Tasks.text_ranking: [OutputKeys.SCORES],
+
+    # text generation result for single sample
+    # {
+    #   "text": "this is the text generated by a model."
+    # }
+    Tasks.text_generation: [OutputKeys.TEXT],
+
+    # fid dialogue result for single sample
+    # {
+    #   "text": "My name is Mike"
+    # }
+    Tasks.fid_dialogue: [OutputKeys.TEXT],
+
+    # summarization result for single sample
+    # {
+    #   "text": "this is the text generated by a model."
+    # }
+    Tasks.text_summarization: [OutputKeys.TEXT],
+
+    # text generation result for single sample
+    # {
+    #   "text": "北京"
+    # }
+    Tasks.text2text_generation: [OutputKeys.TEXT],
+
+    # fill mask result for single sample
+    # {
+    #   "text": "this is the text which masks filled by model."
+    # }
+    Tasks.fill_mask: [OutputKeys.TEXT],
+
+    # feature extraction result for single sample
+    # {
+    #   "text_embedding": [[
+    #     [1.08599677e-04, 1.72710388e-05, 2.95618793e-05, 1.93638436e-04],
+    #     [6.45841064e-05, 1.15997791e-04, 5.11605394e-05, 9.87020373e-01],
+    #     [2.66957268e-05, 4.72324500e-05, 9.74208378e-05, 4.18022355e-05]
+    #   ],
+    #   [
+    #     [2.97343540e-05, 5.81317654e-05, 5.44203431e-05, 6.28319322e-05],
+    #     [8.24327726e-05, 4.66077945e-05, 5.32869453e-05, 4.16190960e-05],
+    #     [3.61441926e-05, 3.38475402e-05, 3.44323053e-05, 5.70138109e-05]
+    #   ]
+    # ]
+    # }
+    Tasks.feature_extraction: [OutputKeys.TEXT_EMBEDDING],
+
+    # (Deprecated) dialog intent prediction result for single sample
+    # {'output': {'prediction': array([2.62349960e-03, 4.12110658e-03, 4.12748595e-05, 3.77560973e-05,
+    #        1.08599677e-04, 1.72710388e-05, 2.95618793e-05, 1.93638436e-04,
+    #        6.45841064e-05, 1.15997791e-04, 5.11605394e-05, 9.87020373e-01,
+    #        2.66957268e-05, 4.72324500e-05, 9.74208378e-05, 4.18022355e-05,
+    #        2.97343540e-05, 5.81317654e-05, 5.44203431e-05, 6.28319322e-05,
+    #        7.34537680e-05, 6.61411541e-05, 3.62534920e-05, 8.58885178e-05,
+    #        8.24327726e-05, 4.66077945e-05, 5.32869453e-05, 4.16190960e-05,
+    #        5.97518992e-05, 3.92273068e-05, 3.44069012e-05, 9.92335918e-05,
+    #        9.25978165e-05, 6.26462061e-05, 3.32317031e-05, 1.32061413e-03,
+    #        2.01607945e-05, 3.36636294e-05, 3.99156743e-05, 5.84108493e-05,
+    #        2.53432900e-05, 4.95731190e-04, 2.64443643e-05, 4.46992999e-05,
+    #        2.42672231e-05, 4.75615161e-05, 2.66230145e-05, 4.00083954e-05,
+    #        2.90536875e-04, 4.23891543e-05, 8.63691166e-05, 4.98188965e-05,
+    #        3.47019341e-05, 4.52718523e-05, 4.20905781e-05, 5.50173208e-05,
+    #        4.92360487e-05, 3.56021264e-05, 2.13957210e-05, 6.17428886e-05,
+    #        1.43893281e-04, 7.32152112e-05, 2.91354867e-04, 2.46623786e-05,
+    #        3.61441926e-05, 3.38475402e-05, 3.44323053e-05, 5.70138109e-05,
+    #        4.31488479e-05, 4.94503947e-05, 4.30105974e-05, 1.00963116e-04,
+    #        2.82062047e-05, 1.15582036e-04, 4.48261271e-05, 3.99339879e-05,
+    #        7.27692823e-05], dtype=float32), 'label_pos': array([11]), 'label': 'lost_or_stolen_card'}}
+
+    # (Deprecated) dialog modeling prediction result for single sample
+    # {'output' : ['you', 'are', 'welcome', '.', 'have', 'a', 'great', 'day', '!']}
+
+    # (Deprecated) dialog state tracking result for single sample
+    # {
+    #     "output":{
+    #         "dialog_states": {
+    #             "taxi-leaveAt": "none",
+    #             "taxi-destination": "none",
+    #             "taxi-departure": "none",
+    #             "taxi-arriveBy": "none",
+    #             "restaurant-book_people": "none",
+    #             "restaurant-book_day": "none",
+    #             "restaurant-book_time": "none",
+    #             "restaurant-food": "none",
+    #             "restaurant-pricerange": "none",
+    #             "restaurant-name": "none",
+    #             "restaurant-area": "none",
+    #             "hotel-book_people": "none",
+    #             "hotel-book_day": "none",
+    #             "hotel-book_stay": "none",
+    #             "hotel-name": "none",
+    #             "hotel-area": "none",
+    #             "hotel-parking": "none",
+    #             "hotel-pricerange": "cheap",
+    #             "hotel-stars": "none",
+    #             "hotel-internet": "none",
+    #             "hotel-type": "true",
+    #             "attraction-type": "none",
+    #             "attraction-name": "none",
+    #             "attraction-area": "none",
+    #             "train-book_people": "none",
+    #             "train-leaveAt": "none",
+    #             "train-destination": "none",
+    #             "train-day": "none",
+    #             "train-arriveBy": "none",
+    #             "train-departure": "none"
+    #         }
+    #     }
+    # }
+    Tasks.task_oriented_conversation: [OutputKeys.OUTPUT],
+
+    # table-question-answering result for single sample
+    # {
+    #   "sql": "SELECT shop.Name FROM shop."
+    #   "sql_history": {sel: 0, agg: 0, conds: [[0, 0, 'val']]}
+    # }
+    Tasks.table_question_answering: [OutputKeys.OUTPUT],
+
+    # ============ audio tasks ===================
+    # asr result for single sample
+    # { "text": "每一天都要快乐喔"}
+    Tasks.auto_speech_recognition: [OutputKeys.TEXT],
+
+    # itn result for single sample
+    # {"text": "123"}
+    Tasks.inverse_text_processing: [OutputKeys.TEXT],
+
+    # speaker verification for single compare task
+    # {'score': 84.2332}
+    Tasks.speaker_verification: [OutputKeys.SCORES],
+
+    # punctuation result for single sample
+    # { "text": "你好，明天！"}
+    Tasks.punctuation: [OutputKeys.TEXT],
+    # language model result for single sample
+    # { "text": " hel@@ lo 大 家 好 呀 </s>
+    #               p( hel@@ | <s> ) = 0.00057767 [ -7.45650959 ]
+    #               p( lo | hel@@ ) = 0.99832278 [ -0.00167861 ]
+    #               p( 大 | lo ) = 0.49116334 [ -0.71097857 ]
+    #               p( 家 | 大 ) = 0.99691027 [ -0.00309453 ]
+    #               p( 好 | 家 ) = 0.97999156 [ -0.02021134 ]
+    #               p( 呀 | 好 ) = 0.00461205 [ -5.37908363 ]
+    #               p( </s> | 呀 ) = 0.01524554 [ -4.18346834 ]
+    #           logprob= -17.755 ppl= 12.6345
+    # "}
+    Tasks.language_model: [OutputKeys.TEXT],
+
+    # audio processed for single file in PCM format
+    # {
+    #   "output_pcm": pcm encoded audio bytes
+    # }
+    Tasks.speech_signal_process: [OutputKeys.OUTPUT_PCM],
+    Tasks.acoustic_echo_cancellation: [OutputKeys.OUTPUT_PCM],
+    Tasks.acoustic_noise_suppression: [OutputKeys.OUTPUT_PCM],
+    Tasks.speech_separation: [OutputKeys.OUTPUT_PCM_LIST],
+
+    # text_to_speech result for a single sample
+    # {
+    #    "output_wav": {"input_label" : bytes}
+    # }
+    Tasks.text_to_speech: [OutputKeys.OUTPUT_WAV],
+
+    # {
+    #     "kws_list": [
+    #         {
+    #             'keyword': '',        # the keyword spotted
+    #             'offset': 19.4,       # the keyword start time in second
+    #             'length': 0.68,       # the keyword length in second
+    #             'confidence': 0.85    # the possibility if it is the keyword
+    #         },
+    #         ...
+    #     ]
+    # }
+    Tasks.keyword_spotting: [OutputKeys.KWS_LIST],
+
+    # ============ multi-modal tasks ===================
+
+    # image caption result for single sample
+    # {
+    #   "caption": "this is an image caption text."
+    # }
+    Tasks.image_captioning: [OutputKeys.CAPTION],
+
+    # video caption result for single sample
+    # {
+    #   "caption": "this is an video caption text."
+    # }
+    Tasks.video_captioning: [OutputKeys.CAPTION],
+    Tasks.ocr_recognition: [OutputKeys.TEXT],
+
+    # visual grounding result for single sample
+    # {
+    #       "boxes": [
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #       ],
+    #       "scores": [0.9, 0.1, 0.05, 0.05]
+    # }
+    Tasks.visual_grounding: [OutputKeys.BOXES, OutputKeys.SCORES],
+
+    # text_to_image result for samples
+    # {
+    #    "output_imgs": np.ndarray list with shape [[height, width, 3], ...]
+    # }
+    Tasks.text_to_image_synthesis: [OutputKeys.OUTPUT_IMGS],
+
+    # text_to_speech result for a single sample
+    # {
+    #    "output_wav": {"input_label" : bytes}
+    # }
+    Tasks.text_to_speech: [OutputKeys.OUTPUT_WAV],
+
+    # multi-modal embedding result for single sample
+    # {
+    #   "img_embedding": np.array with shape [1, D],
+    #   "text_embedding": np.array with shape [1, D]
+    # }
+    Tasks.multi_modal_embedding: [
+        OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING
+    ],
+
+    # generative multi-modal embedding result for single sample
+    # {
+    #   "img_embedding": np.array with shape [1, D],
+    #   "text_embedding": np.array with shape [1, D],
+    #   "caption": "this is an image caption text."
+    # }
+    Tasks.generative_multi_modal_embedding: [
+        OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING, OutputKeys.CAPTION
+    ],
+
+    # multi-modal similarity result for single sample
+    # {
+    #   "img_embedding": np.array with shape [1, D],
+    #   "text_embedding": np.array with shape [1, D],
+    #   "similarity": float
+    # }
+    Tasks.multi_modal_similarity: [
+        OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING, OutputKeys.SCORES
+    ],
+
+    # VQA result for a sample
+    # {"text": "this is a text answser. "}
+    Tasks.visual_question_answering: [OutputKeys.TEXT],
+
+    # VideoQA result for a sample
+    # {"text": "this is a text answser. "}
+    Tasks.video_question_answering: [OutputKeys.TEXT],
+
+    # auto_speech_recognition result for a single sample
+    # {
+    #    "text": "每天都要快乐喔"
+    # }
+    Tasks.auto_speech_recognition: [OutputKeys.TEXT],
+
+    # {
+    #       "scores": [0.9, 0.1, 0.1],
+    #       "labels": ["entailment", "contradiction", "neutral"]
+    # }
+    Tasks.visual_entailment: [OutputKeys.SCORES, OutputKeys.LABELS],
+
+    # {
+    #     'labels': ['吸烟', '打电话', '吸烟'],
+    #     'scores': [0.7527753114700317, 0.753358006477356, 0.6880350708961487],
+    #     'boxes': [[547, 2, 1225, 719], [529, 8, 1255, 719], [584, 0, 1269, 719]],
+    #     'timestamps': [1, 3, 5]
+    # }
+    Tasks.action_detection: [
+        OutputKeys.TIMESTAMPS,
+        OutputKeys.LABELS,
+        OutputKeys.SCORES,
+        OutputKeys.BOXES,
+    ],
+
+    # {
+    #   'output': [
+    #     [{'label': '6527856', 'score': 0.9942756295204163}, {'label': '1000012000', 'score': 0.0379515215754509},
+    #      {'label': '13421097', 'score': 2.2825044965202324e-08}],
+    #     [{'label': '1000012000', 'score': 0.910681426525116}, {'label': '6527856', 'score': 0.0005046309670433402},
+    #      {'label': '13421097', 'score': 2.75914817393641e-06}],
+    #     [{'label': '1000012000', 'score': 0.910681426525116}, {'label': '6527856', 'score': 0.0005046309670433402},
+    #      {'label': '13421097', 'score': 2.75914817393641e-06}]]
+    # }
+    Tasks.faq_question_answering: [OutputKeys.OUTPUT],
+
+    # image person reid result for single sample
+    #   {
+    #       "img_embedding": np.array with shape [1, D],
+    #   }
+    Tasks.image_reid_person: [OutputKeys.IMG_EMBEDDING],
+
+    # {
+    #     'output': ['Done' / 'Decode_Error']
+    # }
+    Tasks.video_inpainting: [OutputKeys.OUTPUT],
+
+    # {
+    #     'output': ['bixin']
+    # }
+    Tasks.hand_static: [OutputKeys.OUTPUT],
+
+    # {    'labels': [2, 1, 0],
+    #      'boxes':[[[78, 282, 240, 504], [127, 87, 332, 370], [0, 0, 367, 639]]
+    #      'scores':[0.8202137351036072, 0.8987470269203186, 0.9679114818572998]
+    # }
+    Tasks.face_human_hand_detection: [
+        OutputKeys.LABELS, OutputKeys.BOXES, OutputKeys.SCORES
+    ],
+
+    # {
+    #   {'output': 'Happiness', 'boxes': (203, 104, 663, 564)}
+    # }
+    Tasks.face_emotion: [OutputKeys.OUTPUT, OutputKeys.BOXES],
+
+    # {
+    #     "masks": [
+    #           np.array # 2D array containing only 0, 255
+    #       ]
+    # }
+    Tasks.product_segmentation: [OutputKeys.MASKS],
+
+    # image_skychange result for a single sample
+    # {
+    #    "output_img": np.ndarray with shape [height, width, 3]
+    # }
+    Tasks.image_skychange: [OutputKeys.OUTPUT_IMG],
+    # {
+    #     'scores': [0.1, 0.2, 0.3, ...]
+    # }
+    Tasks.translation_evaluation: [OutputKeys.SCORES],
+
+    # video object segmentation result for a single video
+    #   {
+    #       "masks": [np.array # 3D array with shape [frame_num, height, width]]
+    #   }
+    Tasks.video_object_segmentation: [OutputKeys.MASKS],
+
+    # motion generation result for a single input
+    #   {
+    #       "keypoints": [np.array # 3D array with shape [frame_num, joint_num, 3]]
+    #       "output_video": "path_to_rendered_video"
+    #   }
+    Tasks.motion_generation: [OutputKeys.KEYPOINTS, OutputKeys.OUTPUT_VIDEO],
+
+    # bad image detecting for a single input
+    #   {
+    #       "scores": [0.8, 0.1, 0.1]
+    #       "labels": ["正常", "花屏", "绿屏"],
+    Tasks.bad_image_detecting: [OutputKeys.SCORES, OutputKeys.LABELS],
+
+    # vision efficient tuning result for single sample
+    #   {
+    #       "scores": [0.9, 0.1, 0.05, 0.05]
+    #       "labels": ["dog", "horse", "cow", "cat"],
+    #   }
+    Tasks.vision_efficient_tuning: [OutputKeys.SCORES, OutputKeys.LABELS],
+    Tasks.document_grounded_dialog_generate: [OutputKeys.TEXT],
+    Tasks.document_grounded_dialog_rerank: [OutputKeys.OUTPUT],
+    Tasks.document_grounded_dialog_retrieval: [OutputKeys.OUTPUT],
+}
+
+
+class ModelOutputBase(list):
+    def __post_init__(self):
+        self.reconstruct()
+        self.post_init = True
+
+    def reconstruct(self):
+        # Low performance, but low frequency.
+        self.clear()
+        for idx, key in enumerate(self.keys()):
+            self.append(getattr(self, key))
+
+    def __getitem__(self, item):
+        if isinstance(item, str):
+            if hasattr(self, item):
+                return getattr(self, item)
+        elif isinstance(item, (int, slice)):
+            return super().__getitem__(item)
+        raise IndexError(f'No Index {item} found in the dataclass.')
+
+    def __setitem__(self, key, value):
+        if isinstance(key, str):
+            if key in [f.name for f in fields(self)]:
+                if key not in self.keys():
+                    super().__setattr__(key, value)
+                    self.reconstruct()
+                elif id(getattr(self, key)) != id(value):
+                    super().__setattr__(key, value)
+                    super().__setitem__(self.keys().index(key), value)
+            else:
+                super().__setattr__(key, value)
+        elif isinstance(key, int):
+            super().__setitem__(key, value)
+            key_name = self.keys()[key]
+            super().__setattr__(key_name, value)
+
+    def __setattr__(self, key, value):
+        if getattr(self, 'post_init', False):
+            return self.__setitem__(key, value)
+        else:
+            return super().__setattr__(key, value)
+
+    def keys(self):
+        return [
+            f.name for f in fields(self) if getattr(self, f.name) is not None
+        ]
+
+    def items(self):
+        return self.to_dict().items()
+
+    def to_dict(self):
+        output = OrderedDict()
+        for key in self.keys():
+            output[key] = getattr(self, key)
+        return output
diff --git a/modelscope/pipeline_inputs.py b/modelscope/pipeline_inputs.py
new file mode 100644
index 0000000..6bd5e83
--- /dev/null
+++ b/modelscope/pipeline_inputs.py
@@ -0,0 +1,67 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import numpy as np
+from PIL import Image
+
+from modelscope.utils.constant import Tasks
+
+
+class InputKeys(object):
+    IMAGE = 'image'
+    TEXT = 'text'
+    VIDEO = 'video'
+
+
+class InputType(object):
+    IMAGE = 'image'
+    TEXT = 'text'
+    AUDIO = 'audio'
+    VIDEO = 'video'
+    BOX = 'box'
+    DICT = 'dict'
+    LIST = 'list'
+    INT = 'int'
+
+
+INPUT_TYPE = {
+    InputType.IMAGE: (str, np.ndarray, Image.Image),
+    InputType.TEXT: str,
+    InputType.AUDIO: (str, bytes, np.ndarray),
+    InputType.VIDEO: (str, np.ndarray, 'cv2.VideoCapture'),
+    InputType.BOX: (list, np.ndarray),
+    InputType.DICT: (dict, type(None)),
+    InputType.LIST: (list, type(None)),
+    InputType.INT: int,
+}
+
+
+def check_input_type(input_type, input):
+    expected_type = INPUT_TYPE[input_type]
+    if input_type == InputType.VIDEO:
+        # special type checking using class name, to avoid introduction of opencv dependency into fundamental framework.
+        assert type(input).__name__ == 'VideoCapture' or isinstance(input, expected_type),\
+            f'invalid input type for {input_type}, expected {expected_type} but got {type(input)}\n {input}'
+    else:
+        assert isinstance(input, expected_type), \
+            f'invalid input type for {input_type}, expected {expected_type} but got {type(input)}\n {input}'
+
+
+TASK_INPUTS = {
+    # if task input is single var, value is  InputType
+    # if task input is a tuple,  value is tuple of InputType
+    # if task input is a dict, value is a dict of InputType, where key
+    # equals the one needed in pipeline input dict
+    # if task input is a list, value is a set of input format, in which
+    # each element corresponds to one input format as described above.
+    # ============ face tasks ===================
+    Tasks.face_2d_keypoints:
+    InputType.IMAGE,
+    Tasks.face_detection:
+    InputType.IMAGE,
+    Tasks.facial_expression_recognition:
+    InputType.IMAGE,
+    Tasks.face_attribute_recognition:
+    InputType.IMAGE,
+    Tasks.face_recognition:
+    InputType.IMAGE
+}
diff --git a/modelscope/pipelines/__init__.py b/modelscope/pipelines/__init__.py
new file mode 100644
index 0000000..47856e6
--- /dev/null
+++ b/modelscope/pipelines/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+from . import cv
+from .base import Pipeline
+from .builder import pipeline
diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py
new file mode 100644
index 0000000..63c6338
--- /dev/null
+++ b/modelscope/pipelines/base.py
@@ -0,0 +1,535 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import os.path as osp
+from abc import ABC, abstractmethod
+from functools import partial
+from multiprocessing import Pool
+from threading import Lock
+from typing import Any, Dict, Generator, List, Mapping, Union
+
+import numpy as np
+
+from modelscope.models.base import Model
+from modelscope.msdatasets import MsDataset
+from modelscope.outputs import TASK_OUTPUTS, ModelOutputBase
+from modelscope.pipeline_inputs import TASK_INPUTS, check_input_type
+from modelscope.preprocessors import Preprocessor
+from modelscope.utils.config import Config
+from modelscope.utils.constant import Frameworks, Invoke, ModelFile
+from modelscope.utils.device import (create_device, device_placement,
+                                     verify_device)
+from modelscope.utils.hub import read_config, snapshot_download
+from modelscope.utils.import_utils import is_tf_available, is_torch_available
+from modelscope.utils.logger import get_logger
+
+from .util import is_model, is_official_hub_path
+
+if is_torch_available():
+    import torch
+
+if is_tf_available():
+    pass
+
+Tensor = Union['torch.Tensor', 'tf.Tensor']
+Input = Union[str, tuple, MsDataset, 'Image.Image', 'numpy.ndarray']
+InputModel = Union[str, Model, 'torch.nn.Module']
+
+logger = get_logger()
+
+
+class Pipeline(ABC):
+    """Pipeline base.
+    """
+    def initiate_single_model(self, model):
+        if isinstance(model, str):
+            logger.info(f'initiate model from {model}')
+        if isinstance(model, str) and is_official_hub_path(model):
+            logger.info(f'initiate model from location {model}.')
+            # expecting model has been prefetched to local cache beforehand
+            return Model.from_pretrained(
+                model,
+                device=self.device_name,
+                model_prefetched=True,
+                invoked_by=Invoke.PIPELINE) if is_model(model) else model
+        else:
+            return model
+
+    def initiate_multiple_models(self, input_models: List[InputModel]):
+        models = []
+        for model in input_models:
+            models.append(self.initiate_single_model(model))
+        return models
+
+    def __init__(self,
+                 config_file: str = None,
+                 model: Union[InputModel, List[InputModel]] = None,
+                 preprocessor: Union[Preprocessor, List[Preprocessor]] = None,
+                 device: str = 'gpu',
+                 auto_collate=True,
+                 **kwargs):
+        """ Base class for pipeline.
+
+        If config_file is provided, model and preprocessor will be
+        instantiated from corresponding config. Otherwise, model
+        and preprocessor will be constructed separately.
+
+        Args:
+            config_file(str, optional): Filepath to configuration file.
+            model: (list of) Model name or model object
+            preprocessor: (list of) Preprocessor object
+            device (str): device str, should be either cpu, cuda, gpu, gpu:X or cuda:X
+            auto_collate (bool): automatically to convert data to tensor or not.
+        """
+        verify_device(device)
+        self.device_name = device
+
+        if not isinstance(model, List):
+            self.model = self.initiate_single_model(model)
+            self.models = [self.model]
+        else:
+            self.model = None
+            self.models = self.initiate_multiple_models(model)
+
+        self.has_multiple_models = len(self.models) > 1
+
+        if config_file is not None:
+            self.cfg = Config.from_file(config_file)
+            model_dir = os.path.dirname(config_file)
+        elif not self.has_multiple_models:
+            if isinstance(self.model, str):
+                model_dir = self.model
+            else:
+                model_dir = self.model.model_dir
+            self.cfg = read_config(model_dir)
+
+        if preprocessor is None and not self.has_multiple_models:
+            self.preprocessor = Preprocessor.from_pretrained(model_dir)
+        else:
+            self.preprocessor = preprocessor
+
+        if self.model or (self.has_multiple_models and self.models[0]):
+            self.framework = self._get_framework()
+        else:
+            self.framework = None
+
+        if self.framework == Frameworks.torch:
+            self.device = create_device(self.device_name)
+        self._model_prepare = False
+        self._model_prepare_lock = Lock()
+        self._auto_collate = auto_collate
+
+    def prepare_model(self):
+        """ Place model on certain device for pytorch models before first inference
+        """
+        self._model_prepare_lock.acquire(timeout=600)
+
+        def _prepare_single(model):
+            if isinstance(model, torch.nn.Module):
+                model.to(self.device)
+                model.eval()
+            elif hasattr(model, 'model') and isinstance(
+                    model.model, torch.nn.Module):
+                model.model.to(self.device)
+                model.model.eval()
+
+        if not self._model_prepare:
+            # prepare model for pytorch
+            if self.framework == Frameworks.torch:
+                if self.has_multiple_models:
+                    for m in self.models:
+                        _prepare_single(m)
+                else:
+                    _prepare_single(self.model)
+            self._model_prepare = True
+        self._model_prepare_lock.release()
+
+    def _get_framework(self) -> str:
+        frameworks = []
+        for m in self.models:
+            if isinstance(m, str):
+                model_dir = m
+            else:
+                model_dir = m.model_dir
+            cfg_file = osp.join(model_dir, ModelFile.CONFIGURATION)
+            cfg = Config.from_file(cfg_file)
+            frameworks.append(cfg.framework)
+        if not all(x == frameworks[0] for x in frameworks):
+            logger.warning(
+                f'got multiple models, but they are in different frameworks {frameworks}'
+            )
+            return None
+
+        return frameworks[0]
+
+    def __call__(self, input: Union[Input, List[Input]], *args,
+                 **kwargs) -> Union[Dict[str, Any], Generator]:
+        # model provider should leave it as it is
+        # modelscope library developer will handle this function
+        # place model to cpu or gpu
+        if (self.model or (self.has_multiple_models and self.models[0])):
+            if not self._model_prepare:
+                self.prepare_model()
+
+        # simple showcase, need to support iterator type for both tensorflow and pytorch
+        # input_dict = self._handle_input(input)
+
+        # sanitize the parameters
+        batch_size = kwargs.pop('batch_size', None)
+        preprocess_params, forward_params, postprocess_params = self._sanitize_parameters(
+            **kwargs)
+        kwargs['preprocess_params'] = preprocess_params
+        kwargs['forward_params'] = forward_params
+        kwargs['postprocess_params'] = postprocess_params
+        if isinstance(input, list):
+            if batch_size is None:
+                output = []
+                for ele in input:
+                    output.append(self._process_single(ele, *args, **kwargs))
+            else:
+                output = self._process_batch(input, batch_size, **kwargs)
+
+        elif isinstance(input, MsDataset):
+            return self._process_iterator(input, *args, **kwargs)
+
+        else:
+            output = self._process_single(input, *args, **kwargs)
+        return output
+
+    def _sanitize_parameters(self, **pipeline_parameters):
+        """
+        this method should sanitize the keyword args to preprocessor params,
+        forward params and postprocess params on '__call__' or '_process_single' method
+        considered to be a normal classmethod with default implementation / output
+
+        Default Returns:
+            Dict[str, str]:  preprocess_params = {}
+            Dict[str, str]:  forward_params = {}
+            Dict[str, str]:  postprocess_params = pipeline_parameters
+        """
+        return {}, {}, pipeline_parameters
+
+    def _process_iterator(self, input: Input, *args, **kwargs):
+        for ele in input:
+            yield self._process_single(ele, *args, **kwargs)
+
+    def _collate_fn(self, data):
+        return collate_fn(data, self.device)
+
+    def _process_single(self, input: Input, *args, **kwargs) -> Dict[str, Any]:
+        preprocess_params = kwargs.get('preprocess_params', {})
+        forward_params = kwargs.get('forward_params', {})
+        postprocess_params = kwargs.get('postprocess_params', {})
+        self._check_input(input)
+        out = self.preprocess(input, **preprocess_params)
+
+        with device_placement(self.framework, self.device_name):
+            if self.framework == Frameworks.torch:
+                with torch.no_grad():
+                    if self._auto_collate:
+                        out = self._collate_fn(out)
+                    out = self.forward(out, **forward_params)
+            else:
+                out = self.forward(out, **forward_params)
+
+        out = self.postprocess(out, **postprocess_params)
+        self._check_output(out)
+        return out
+
+    def _batch(self, data_list):
+        batch_data = {}
+        for sample_preprocessed in data_list:
+            for k, v in sample_preprocessed.items():
+                value_list = batch_data.get(k, [])
+                value_list.append(v)
+                batch_data[k] = value_list
+        for k in batch_data.keys():
+            if isinstance(batch_data[k][0], torch.Tensor):
+                batch_data[k] = torch.cat(batch_data[k])
+        return batch_data
+
+    def _process_batch(self, input: List[Input], batch_size,
+                       **kwargs) -> Dict[str, Any]:
+        preprocess_params = kwargs.get('preprocess_params')
+        forward_params = kwargs.get('forward_params')
+        postprocess_params = kwargs.get('postprocess_params')
+
+        # batch data
+        output_list = []
+        for i in range(0, len(input), batch_size):
+            end = min(i + batch_size, len(input))
+            real_batch_size = end - i
+            preprocessed_list = [
+                self.preprocess(i, **preprocess_params) for i in input[i:end]
+            ]
+
+            with device_placement(self.framework, self.device_name):
+                if self.framework == Frameworks.torch:
+                    with torch.no_grad():
+                        batched_out = self._batch(preprocessed_list)
+                        if self._auto_collate:
+                            batched_out = self._collate_fn(batched_out)
+                        batched_out = self.forward(batched_out,
+                                                   **forward_params)
+                else:
+                    batched_out = self._batch(preprocessed_list)
+                    batched_out = self.forward(batched_out, **forward_params)
+
+            for batch_idx in range(real_batch_size):
+                out = {}
+                for k, element in batched_out.items():
+                    if element is not None:
+                        out[k] = element[batch_idx]
+                out = self.postprocess(out, **postprocess_params)
+                self._check_output(out)
+                output_list.append(out)
+
+        return output_list
+
+    def _check_input(self, input):
+        task_name = self.group_key
+        if task_name in TASK_INPUTS:
+            input_type = TASK_INPUTS[task_name]
+
+            # if multiple input formats are defined, we first
+            # found the one that match input data and check
+            if isinstance(input_type, list):
+                matched_type = None
+                for t in input_type:
+                    if isinstance(input, (dict, tuple)):
+                        if type(t) == type(input):
+                            matched_type = t
+                            break
+                    elif isinstance(t, str):
+                        matched_type = t
+                        break
+                if matched_type is None:
+                    err_msg = 'input data format for current pipeline should be one of following: \n'
+                    for t in input_type:
+                        err_msg += f'{t}\n'
+                    raise ValueError(err_msg)
+                else:
+                    input_type = matched_type
+
+            if isinstance(input_type, str):
+                check_input_type(input_type, input)
+            elif isinstance(input_type, tuple):
+                for t, input_ele in zip(input_type, input):
+                    check_input_type(t, input_ele)
+            elif isinstance(input_type, dict):
+                for k in input_type.keys():
+                    # allow single input for multi-modal models
+                    if k in input:
+                        check_input_type(input_type[k], input[k])
+            else:
+                raise ValueError(f'invalid input_type definition {input_type}')
+        else:
+            logger.warning(f'task {task_name} input definition is missing')
+
+    def _check_output(self, input):
+        # this attribute is dynamically attached by registry
+        # when cls is registered in registry using task name
+        task_name = self.group_key
+        if task_name not in TASK_OUTPUTS:
+            logger.warning(f'task {task_name} output keys are missing')
+            return
+        output_keys = TASK_OUTPUTS[task_name]
+        missing_keys = []
+        input = input.keys() if isinstance(input,
+                                           (dict, ModelOutputBase)) else input
+        for k in output_keys:
+            if k not in input:
+                missing_keys.append(k)
+        if len(missing_keys) > 0:
+            raise ValueError(f'expected output keys are {output_keys}, '
+                             f'those {missing_keys} are missing')
+
+    def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]:
+        """ Provide default implementation based on preprocess_cfg and user can reimplement it
+        """
+        assert self.preprocessor is not None, 'preprocess method should be implemented'
+        assert not isinstance(self.preprocessor, List),\
+            'default implementation does not support using multiple preprocessors.'
+        return self.preprocessor(inputs, **preprocess_params)
+
+    def forward(self, inputs: Dict[str, Any],
+                **forward_params) -> Dict[str, Any]:
+        """ Provide default implementation using self.model and user can reimplement it
+        """
+        assert self.model is not None, 'forward method should be implemented'
+        assert not self.has_multiple_models, 'default implementation does not support multiple models in a pipeline.'
+        return self.model(inputs, **forward_params)
+
+    @abstractmethod
+    def postprocess(self, inputs: Dict[str, Any],
+                    **post_params) -> Dict[str, Any]:
+        """ If current pipeline support model reuse, common postprocess
+            code should be write here.
+
+        Args:
+            inputs:  input data
+            post_params:   post process parameters
+
+        Return:
+            dict of results:  a dict containing outputs of model, each
+                output should have the standard output name.
+        """
+        raise NotImplementedError('postprocess')
+
+
+class DistributedPipeline(Pipeline):
+    """This pipeline is used to load multi gpu models.
+
+    What will this class do:
+    1. Read the global config from the configuration.json
+    2. Set the multiprocessing method to spawn
+    3. Open a multiprocessing pool of the world_size to instantiate model pieces.
+    4. Set the master port and ip
+    5. Call _instantiate_one to instantiate one model piece,
+    This method should be implemented by the derived class.
+    6. After the forward method is called, do preprocess in main process and
+    call _forward_one to collect results, and do post process in main process.
+
+    NOTE: _instantiate_one and _forward_one are class methods, any derived class should implement them and
+    store the model handler in the class field.
+    """
+    def __init__(self,
+                 model: str = None,
+                 preprocessor: Union[Preprocessor, List[Preprocessor]] = None,
+                 auto_collate=True,
+                 **kwargs):
+        # DistributedPipeline uses classmethod to initialize model
+        # without calling super().__init__ method
+        self.preprocessor = preprocessor
+        self._model_prepare = False
+        self._model_prepare_lock = Lock()
+        self._auto_collate = auto_collate
+
+        if os.path.exists(model):
+            self.model_dir = model
+        else:
+            self.model_dir = snapshot_download(model)
+        self.cfg = read_config(self.model_dir)
+        self.world_size = self.cfg.model.world_size
+        self.model_pool = None
+        self.device_name = 'cpu'
+        self.device = create_device(self.device_name)
+        self.has_multiple_models = False
+        self.framework = self.cfg.framework
+        torch.multiprocessing.set_start_method('spawn', force=True)
+
+        ranks = list(range(self.world_size))
+        self.model_pool = Pool(self.world_size)
+        master_ip = '127.0.0.1' if 'master_ip' not in kwargs else kwargs[
+            'master_ip']
+        master_port = '29500' if 'master_port' not in kwargs else kwargs[
+            'master_port']
+        from modelscope.utils.torch_utils import _find_free_port, _is_free_port
+        if not _is_free_port(int(master_port)):
+            master_port = str(_find_free_port())
+        self.model_pool.map(
+            partial(self.__class__._instantiate_one,
+                    model_dir=self.model_dir,
+                    master_ip=master_ip,
+                    master_port=master_port,
+                    **self.cfg.model,
+                    **kwargs), ranks)
+        self.models = []
+
+    def __del__(self):
+        if hasattr(self, 'model_pool') and self.model_pool is not None:
+            self.model_pool.terminate()
+
+    def __getstate__(self):
+        self_dict = self.__dict__.copy()
+        del self_dict['model_pool']
+        del self_dict['preprocessor']
+        del self_dict['_model_prepare_lock']
+        return self_dict
+
+    @classmethod
+    def _instantiate_one(cls, rank, model_dir, **kwargs):
+        """Instantiate one model piece.
+
+        Args:
+            rank: The model rank.
+            model_dir: The model_dir in the node.
+            kwargs: Any extra args.
+
+        Returns:
+            None. The model handler should be kept in the class field.
+        """
+        pass
+
+    def forward(self, inputs: Dict[str, Any],
+                **forward_params) -> Dict[str, Any]:
+        inputs = {
+            'inputs': inputs,
+            'forward_params': forward_params,
+        }
+        res = self.model_pool.map(self.__class__._forward_one,
+                                  [inputs] * self.world_size)
+        return res[0]
+
+    @classmethod
+    def _forward_one(cls, inputs):
+        """Forward the inputs to one model piece.
+
+        Use the model handler kept in the class field to forward.
+
+        Args:
+            inputs: The inputs after the preprocessing.
+
+        Returns:
+            The forward results.
+        """
+        pass
+
+
+def collate_fn(data, device):
+    """Prepare the input just before the forward function.
+    This method will move the tensors to the right device.
+    Usually this method does not need to be overridden.
+
+    Args:
+        data: The data out of the dataloader.
+        device: The device to move data to.
+
+    Returns: The processed data.
+
+    """
+    from torch.utils.data.dataloader import default_collate
+
+    def get_class_name(obj):
+        return obj.__class__.__name__
+
+    if isinstance(data, dict) or isinstance(data, Mapping):
+        # add compatibility for img_metas for mmlab models
+        return type(data)({
+            k: collate_fn(v, device) if k != 'img_metas' else v
+            for k, v in data.items()
+        })
+    elif isinstance(data, (tuple, list)):
+        if 0 == len(data):
+            return torch.Tensor([])
+        if isinstance(data[0], (int, float)):
+            return default_collate(data).to(device)
+        else:
+            return type(data)(collate_fn(v, device) for v in data)
+    elif isinstance(data, np.ndarray):
+        if data.dtype.type is np.str_:
+            return data
+        else:
+            return collate_fn(torch.from_numpy(data), device)
+    elif isinstance(data, torch.Tensor):
+        return data.to(device)
+    elif isinstance(data, (bytes, str, int, float, bool, type(None))):
+        return data
+    elif get_class_name(data) == 'InputFeatures':
+        # modelscope.preprocessors.nlp.InputFeatures
+        return data
+    elif get_class_name(data) == 'DataContainer':
+        # mmcv.parallel.DataContainer
+        return data
+    else:
+        raise ValueError(f'Unsupported data type {type(data)}')
diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py
new file mode 100644
index 0000000..2177248
--- /dev/null
+++ b/modelscope/pipelines/builder.py
@@ -0,0 +1,189 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+from typing import List, Optional, Union
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import DEFAULT_MODEL_FOR_PIPELINE, Pipelines
+from modelscope.models.base import Model
+from modelscope.utils.config import ConfigDict, check_config
+from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, Tasks
+from modelscope.utils.hub import read_config
+from modelscope.utils.registry import Registry, build_from_cfg
+
+from .base import Pipeline
+from .util import is_official_hub_path
+
+PIPELINES = Registry('pipelines')
+
+
+def normalize_model_input(model, model_revision):
+    """ normalize the input model, to ensure that a model str is a valid local path: in other words,
+    for model represented by a model id, the model shall be downloaded locally
+    """
+    if isinstance(model, str) and is_official_hub_path(model, model_revision):
+        # skip revision download if model is a local directory
+        if not os.path.exists(model):
+            # note that if there is already a local copy, snapshot_download will check and skip downloading
+            model = snapshot_download(model,
+                                      revision=model_revision,
+                                      user_agent={Invoke.KEY: Invoke.PIPELINE})
+    elif isinstance(model, list) and isinstance(model[0], str):
+        for idx in range(len(model)):
+            if is_official_hub_path(
+                    model[idx],
+                    model_revision) and not os.path.exists(model[idx]):
+                model[idx] = snapshot_download(
+                    model[idx],
+                    revision=model_revision,
+                    user_agent={Invoke.KEY: Invoke.PIPELINE})
+    return model
+
+
+def build_pipeline(cfg: ConfigDict,
+                   task_name: str = None,
+                   default_args: dict = None):
+    """ build pipeline given model config dict.
+
+    Args:
+        cfg (:obj:`ConfigDict`): config dict for model object.
+        task_name (str, optional):  task name, refer to
+            :obj:`Tasks` for more details.
+        default_args (dict, optional): Default initialization arguments.
+    """
+    return build_from_cfg(cfg,
+                          PIPELINES,
+                          group_key=task_name,
+                          default_args=default_args)
+
+
+def pipeline(task: str = None,
+             model: Union[str, List[str], Model, List[Model]] = None,
+             preprocessor=None,
+             config_file: str = None,
+             pipeline_name: str = None,
+             framework: str = None,
+             device: str = 'gpu',
+             model_revision: Optional[str] = DEFAULT_MODEL_REVISION,
+             plugins: List[str] = None,
+             **kwargs) -> Pipeline:
+    """ Factory method to build an obj:`Pipeline`.
+
+
+    Args:
+        task (str): Task name defining which pipeline will be returned.
+        model (str or List[str] or obj:`Model` or obj:list[`Model`]): (list of) model name or model object.
+        preprocessor: preprocessor object.
+        config_file (str, optional): path to config file.
+        pipeline_name (str, optional): pipeline class name or alias name.
+        framework (str, optional): framework type.
+        model_revision: revision of model(s) if getting from model hub, for multiple models, expecting
+        all models to have the same revision
+        device (str, optional): whether to use gpu or cpu is used to do inference.
+
+    Return:
+        pipeline (obj:`Pipeline`): pipeline object for certain task.
+
+    Examples:
+        >>> # Using default model for a task
+        >>> p = pipeline('image-classification')
+        >>> # Using pipeline with a model name
+        >>> p = pipeline('text-classification', model='damo/distilbert-base-uncased')
+        >>> # Using pipeline with a model object
+        >>> resnet = Model.from_pretrained('Resnet')
+        >>> p = pipeline('image-classification', model=resnet)
+        >>> # Using pipeline with a list of model names
+        >>> p = pipeline('audio-kws', model=['damo/audio-tts', 'damo/auto-tts2'])
+    """
+    if task is None and pipeline_name is None:
+        raise ValueError('task or pipeline_name is required')
+
+    try_import_plugins(plugins)
+
+    model = normalize_model_input(model, model_revision)
+    pipeline_props = {'type': pipeline_name}
+    if pipeline_name is None:
+        # get default pipeline for this task
+        if isinstance(model, str) \
+           or (isinstance(model, list) and isinstance(model[0], str)):
+            if is_official_hub_path(model, revision=model_revision):
+                # read config file from hub and parse
+                cfg = read_config(
+                    model, revision=model_revision) if isinstance(
+                        model, str) else read_config(model[0],
+                                                     revision=model_revision)
+                check_config(cfg)
+                try_import_plugins(cfg.safe_get('plugins'))
+                pipeline_props = cfg.pipeline
+        elif model is not None:
+            # get pipeline info from Model object
+            first_model = model[0] if isinstance(model, list) else model
+            if not hasattr(first_model, 'pipeline'):
+                # model is instantiated by user, we should parse config again
+                cfg = read_config(first_model.model_dir)
+                check_config(cfg)
+                try_import_plugins(cfg.safe_get('plugins'))
+                first_model.pipeline = cfg.pipeline
+            pipeline_props = first_model.pipeline
+        else:
+            pipeline_name, default_model_repo = get_default_pipeline_info(task)
+            model = normalize_model_input(default_model_repo, model_revision)
+            pipeline_props = {'type': pipeline_name}
+
+    pipeline_props['model'] = model
+    pipeline_props['device'] = device
+    cfg = ConfigDict(pipeline_props)
+
+    if kwargs:
+        cfg.update(kwargs)
+
+    if preprocessor is not None:
+        cfg.preprocessor = preprocessor
+
+    return build_pipeline(cfg, task_name=task)
+
+
+def add_default_pipeline_info(task: str,
+                              model_name: str,
+                              modelhub_name: str = None,
+                              overwrite: bool = False):
+    """ Add default model for a task.
+
+    Args:
+        task (str): task name.
+        model_name (str): model_name.
+        modelhub_name (str): name for default modelhub.
+        overwrite (bool): overwrite default info.
+    """
+    if not overwrite:
+        assert task not in DEFAULT_MODEL_FOR_PIPELINE, \
+            f'task {task} already has default model.'
+
+    DEFAULT_MODEL_FOR_PIPELINE[task] = (model_name, modelhub_name)
+
+
+def get_default_pipeline_info(task):
+    """ Get default info for certain task.
+
+    Args:
+        task (str): task name.
+
+    Return:
+        A tuple: first element is pipeline name(model_name), second element
+            is modelhub name.
+    """
+
+    if task not in DEFAULT_MODEL_FOR_PIPELINE:
+        # support pipeline which does not register default model
+        pipeline_name = list(PIPELINES.modules[task].keys())[0]
+        default_model = None
+    else:
+        pipeline_name, default_model = DEFAULT_MODEL_FOR_PIPELINE[task]
+    return pipeline_name, default_model
+
+
+def try_import_plugins(plugins: List[str]) -> None:
+    """ Try to import plugins """
+    if plugins is not None:
+        from modelscope.utils.plugins import import_plugins
+        import_plugins(plugins)
diff --git a/modelscope/pipelines/cv/arc_face_recognition_pipeline.py b/modelscope/pipelines/cv/arc_face_recognition_pipeline.py
new file mode 100644
index 0000000..b28969a
--- /dev/null
+++ b/modelscope/pipelines/cv/arc_face_recognition_pipeline.py
@@ -0,0 +1,65 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import PIL
+import torch
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_recognition.align_face import align_face
+from modelscope.models.cv.face_recognition.torchkit.backbone.arcface_backbone import \
+    _iresnet
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+from . import FaceProcessingBasePipeline
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_recognition,
+                           module_name=Pipelines.arc_face_recognition)
+class ArcFaceRecognitionPipeline(FaceProcessingBasePipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        use `model` to create a face recognition pipeline for prediction
+        Args:
+            model: model id on modelscope hub.
+        """
+
+        # face recong model
+        super().__init__(model=model, **kwargs)
+        face_model = _iresnet('arcface_i50', [3, 4, 14, 3])
+        face_model.load_state_dict(
+            torch.load(osp.join(model, ModelFile.TORCH_MODEL_FILE),
+                       map_location=self.device))
+        face_model = face_model.to(self.device)
+        face_model.eval()
+        self.face_model = face_model
+        logger.info('face recognition model loaded!')
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        result = super().preprocess(input)
+        align_img = result['img']
+        face_img = align_img[:, :, ::-1]  # to rgb
+        face_img = np.transpose(face_img, axes=(2, 0, 1))
+        face_img = (face_img / 255. - 0.5) / 0.5
+        face_img = face_img.astype(np.float32)
+        result['img'] = face_img
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        img = input['img'].unsqueeze(0)
+        emb = self.face_model(img).detach().cpu().numpy()
+        emb /= np.sqrt(np.sum(emb**2, -1, keepdims=True))  # l2 norm
+        return {OutputKeys.IMG_EMBEDDING: emb}
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/face_attribute_recognition_pipeline.py b/modelscope/pipelines/cv/face_attribute_recognition_pipeline.py
new file mode 100644
index 0000000..ef8ae30
--- /dev/null
+++ b/modelscope/pipelines/cv/face_attribute_recognition_pipeline.py
@@ -0,0 +1,64 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import PIL
+import torch
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_attribute_recognition import \
+    FaceAttributeRecognition
+from modelscope.models.cv.face_recognition.align_face import align_face
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+from . import FaceProcessingBasePipeline
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_attribute_recognition,
+                           module_name=Pipelines.face_attribute_recognition)
+class FaceAttributeRecognitionPipeline(FaceProcessingBasePipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        use `model` to create a face attribute recognition pipeline for prediction
+        Args:
+            model: model id on modelscope hub.
+        """
+        super().__init__(model=model, **kwargs)
+        ckpt_path = osp.join(model, ModelFile.TORCH_MODEL_FILE)
+        logger.info(f'loading model from {ckpt_path}')
+        device = torch.device(
+            f'cuda:{0}' if torch.cuda.is_available() else 'cpu')
+        fairface = FaceAttributeRecognition(model_path=ckpt_path,
+                                            device=device)
+        self.fairface = fairface
+        self.device = device
+        logger.info('load model done')
+
+        male_list = ['Male', 'Female']
+        age_list = [
+            '0-2', '3-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69',
+            '70+'
+        ]
+        self.map_list = [male_list, age_list]
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        result = super().preprocess(input)
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        scores = self.fairface(input['img'])
+        assert scores is not None
+        return {OutputKeys.SCORES: scores, OutputKeys.LABELS: self.map_list}
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/face_detection_pipeline.py b/modelscope/pipelines/cv/face_detection_pipeline.py
new file mode 100644
index 0000000..ee8a9bc
--- /dev/null
+++ b/modelscope/pipelines/cv/face_detection_pipeline.py
@@ -0,0 +1,99 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict, List, Union
+
+import cv2
+import numpy as np
+import PIL
+import torch
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.base.base_model import Model
+from modelscope.models.cv.face_detection import ScrfdDetect, SCRFDPreprocessor
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+from modelscope.utils.typing import Image
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_detection,
+                           module_name=Pipelines.face_detection)
+class FaceDetectionPipeline(Pipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        use `model` to create a face detection pipeline for prediction
+        Args:
+            model (`str` or `Model`): model_id or `ScrfdDetect` or `TinyMogDetect` model.
+            preprocessor(`Preprocessor`, *optional*,  defaults to None): `SCRFDPreprocessor`.
+        """
+        super().__init__(model=model, **kwargs)
+        config_path = osp.join(model, ModelFile.CONFIGURATION)
+        cfg = Config.from_file(config_path)
+        cfg_model = getattr(cfg, 'model', None)
+        if cfg_model is None:
+            # backward compatibility
+            detector = ScrfdDetect(model_dir=model, **kwargs)
+        else:
+            assert isinstance(self.model,
+                              Model), 'model object is not initialized.'
+            detector = self.model.to(self.device)
+
+        # backward compatibility
+        if self.preprocessor is None:
+            self.preprocessor = SCRFDPreprocessor()
+
+        self.detector = detector
+
+    def __call__(self, input: Union[Image, List[Image]], **kwargs):
+        """
+        Detect objects (bounding boxes or keypoints) in the image(s) passed as inputs.
+
+        Args:
+            input (`Image` or `List[Image]`):
+                The pipeline handles three types of images:
+
+                - A string containing an HTTP(S) link pointing to an image
+                - A string containing a local path to an image
+                - An image loaded in PIL directly
+
+                The pipeline accepts either a single image or a batch of images. Images in a batch must all be in the
+                same format.
+
+        Return:
+            A dictionary of result or a list of dictionary of result. If the input is an image, a dictionary
+            is returned. If input is a list of image, a list of dictionary is returned.
+
+            The dictionary contain the following keys:
+
+            - **scores** (`List[float]`) -- The detection score for each card in the image.
+            - **boxes** (`List[float]) -- The bounding boxe [x1, y1, x2, y2] of detected objects in in image's
+                original size.
+            - **keypoints** (`List[Dict[str, int]]`, optional) -- The corner kepoint [x1, y1, x2, y2, x3, y3, x4, y4]
+                of detected object in image's original size.
+        """
+        return super().__call__(input, **kwargs)
+
+    def preprocess(self, input: Image) -> Dict[str, Any]:
+        result = self.preprocessor(input)
+
+        # openmmlab model compatibility
+        if 'img_metas' in result:
+            from mmcv.parallel import collate, scatter
+            result = collate([result], samples_per_gpu=1)
+            if next(self.model.parameters()).is_cuda:
+                # scatter to specified GPU
+                result = scatter(result,
+                                 [next(self.model.parameters()).device])[0]
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        return self.detector(**input)
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/face_liveness_ir_pipeline.py b/modelscope/pipelines/cv/face_liveness_ir_pipeline.py
new file mode 100644
index 0000000..964489f
--- /dev/null
+++ b/modelscope/pipelines/cv/face_liveness_ir_pipeline.py
@@ -0,0 +1,84 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import onnxruntime
+import PIL
+import torch
+import torch.nn.functional as F
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_recognition.align_face import align_face
+from modelscope.models.cv.facial_landmark_confidence import \
+    FacialLandmarkConfidence
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+from . import FaceProcessingBasePipeline
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_liveness,
+                           module_name=Pipelines.face_liveness_ir)
+class FaceLivenessIrPipeline(FaceProcessingBasePipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        use `model` to create a face lievness ir pipeline for prediction
+        Args:
+            model: model id on modelscope hub.
+        """
+        super().__init__(model=model, **kwargs)
+        onnx_path = osp.join(model, ModelFile.ONNX_MODEL_FILE)
+        logger.info(f'loading model from {onnx_path}')
+        self.sess, self.input_node_name, self.out_node_name = self.load_onnx_model(
+            onnx_path)
+        logger.info('load model done')
+
+    def load_onnx_model(self, onnx_path):
+        sess = onnxruntime.InferenceSession(onnx_path)
+        out_node_name = []
+        input_node_name = []
+        for node in sess.get_outputs():
+            out_node_name.append(node.name)
+
+        for node in sess.get_inputs():
+            input_node_name.append(node.name)
+
+        return sess, input_node_name, out_node_name
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+
+        result = super().preprocess(input)
+        orig_img = LoadImage.convert_to_ndarray(input)
+        orig_img = orig_img[:, :, ::-1]
+        img = super(FaceLivenessIrPipeline,
+                    self).align_face_padding(orig_img, result['bbox'], 16)
+        if img.shape[0] != 112:
+            img = img[8:120, 8:120, :]
+        img = (img - 127.5) * 0.0078125
+        input_tensor = img.astype('float32').transpose(
+            (2, 0, 1))[np.newaxis, :]
+        result['input_tensor'] = input_tensor
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        input_feed = {}
+        input_feed[
+            self.input_node_name[0]] = input['input_tensor'].cpu().numpy()
+        result = self.sess.run(self.out_node_name, input_feed=input_feed)
+        out = F.softmax(torch.FloatTensor(result), dim=-1)[0][0]
+        assert result is not None
+        scores = [1 - out[1].tolist()]
+        boxes = input['bbox'].cpu().numpy()[np.newaxis, :].tolist()
+        return {OutputKeys.SCORES: scores, OutputKeys.BOXES: boxes}
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/face_liveness_xc_pipeline.py b/modelscope/pipelines/cv/face_liveness_xc_pipeline.py
new file mode 100644
index 0000000..450a8c6
--- /dev/null
+++ b/modelscope/pipelines/cv/face_liveness_xc_pipeline.py
@@ -0,0 +1,87 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import onnxruntime
+import PIL
+import torch
+import torch.nn.functional as F
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_recognition.align_face import align_face
+from modelscope.models.cv.facial_landmark_confidence import \
+    FacialLandmarkConfidence
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+from . import FaceProcessingBasePipeline
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_liveness,
+                           module_name=Pipelines.face_liveness_xc)
+class FaceLivenessXcPipeline(FaceProcessingBasePipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        FaceLivenessXcPipeline can judge the input face is a real or fake face.
+        use `model` to create a face lievness ir pipeline for prediction
+        Args:
+            model: model id on modelscope hub.
+        ```python
+        >>> from modelscope.pipelines import pipeline
+        >>> fl_xc = pipeline('face_liveness', 'damo/cv_manual_face-liveness_flxc')
+        >>> fl_xc("https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/face_liveness_xc.png")
+        {'scores': [0.03821974992752075], 'boxes': [[12.569677352905273, 6.428711891174316,
+            94.17887115478516, 106.74441528320312]]}
+        ```
+        """
+        super().__init__(model=model, **kwargs)
+        onnx_path = osp.join(model, ModelFile.ONNX_MODEL_FILE)
+        logger.info(f'loading model from {onnx_path}')
+        self.sess, self.input_node_name, self.out_node_name = self.load_onnx_model(
+            onnx_path)
+        logger.info('load model done')
+
+    def load_onnx_model(self, onnx_path):
+        sess = onnxruntime.InferenceSession(onnx_path)
+        out_node_name = []
+        input_node_name = []
+        for node in sess.get_outputs():
+            out_node_name.append(node.name)
+
+        for node in sess.get_inputs():
+            input_node_name.append(node.name)
+
+        return sess, input_node_name, out_node_name
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        result = super().preprocess(input)
+        img = result['img']
+        img = (img - 127.5) * 0.0078125
+        img = np.expand_dims(img, 0).copy()
+        input_tensor = np.concatenate([img, img, img, img], axis=3)
+        input_tensor = np.transpose(input_tensor,
+                                    axes=(0, 3, 1, 2)).astype(np.float32)
+        result['input_tensor'] = input_tensor
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        input_feed = {}
+        input_feed[
+            self.input_node_name[0]] = input['input_tensor'].cpu().numpy()
+        result = self.sess.run(self.out_node_name, input_feed=input_feed)
+        scores = [result[0][0][0].tolist()]
+
+        boxes = input['bbox'].cpu().numpy()[np.newaxis, :].tolist()
+        return {OutputKeys.SCORES: scores, OutputKeys.BOXES: boxes}
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/face_processing_base_pipeline.py b/modelscope/pipelines/cv/face_processing_base_pipeline.py
new file mode 100644
index 0000000..0ded70c
--- /dev/null
+++ b/modelscope/pipelines/cv/face_processing_base_pipeline.py
@@ -0,0 +1,198 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import PIL
+import torch
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_recognition.align_face import align_face
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+class FaceProcessingBasePipeline(Pipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        use `model` to create a face processing pipeline and output cropped img, scores, bbox and lmks.
+
+        Args:
+            model: model id on modelscope hub.
+
+        """
+        super().__init__(model=model, **kwargs)
+        # face detect pipeline
+        det_model_id = 'damo/cv_resnet50_face-detection_retinaface'
+        self.face_detection = pipeline(Tasks.face_detection,
+                                       model=det_model_id)
+
+    def _choose_face(self,
+                     det_result,
+                     min_face=10,
+                     top_face=1,
+                     center_face=False,
+                     img_shape=None):
+        '''
+        choose face with maximum area
+        Args:
+            det_result: output of face detection pipeline
+            min_face: minimum size of valid face w/h
+            top_face: take faces with top max areas
+            center_face: choose the most centerd face from multi faces, only valid if top_face > 1
+        '''
+        bboxes = np.array(det_result[OutputKeys.BOXES])
+        landmarks = np.array(det_result[OutputKeys.KEYPOINTS])
+        scores = np.array(det_result[OutputKeys.SCORES])
+        if bboxes.shape[0] == 0:
+            logger.info('Warning: No face detected!')
+            return None
+        # face idx with enough size
+        face_idx = []
+        for i in range(bboxes.shape[0]):
+            box = bboxes[i]
+            if (box[2] - box[0]) >= min_face and (box[3] - box[1]) >= min_face:
+                face_idx += [i]
+        if len(face_idx) == 0:
+            logger.info(
+                f'Warning: Face size not enough, less than {min_face}x{min_face}!'
+            )
+            return None
+        bboxes = bboxes[face_idx]
+        landmarks = landmarks[face_idx]
+        scores = scores[face_idx]
+        # find max faces
+        boxes = np.array(bboxes)
+        area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+        sort_idx = np.argsort(area)[-top_face:]
+        # find center face
+        if top_face > 1 and center_face and bboxes.shape[0] > 1 and img_shape:
+            img_center = [img_shape[1] // 2, img_shape[0] // 2]
+            min_dist = float('inf')
+            sel_idx = -1
+            for _idx in sort_idx:
+                box = boxes[_idx]
+                dist = np.square(
+                    np.abs((box[0] + box[2]) / 2 - img_center[0])) + np.square(
+                        np.abs((box[1] + box[3]) / 2 - img_center[1]))
+                if dist < min_dist:
+                    min_dist = dist
+                    sel_idx = _idx
+            sort_idx = [sel_idx]
+        main_idx = sort_idx[-1]
+        return scores[main_idx], bboxes[main_idx], landmarks[main_idx]
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        img = LoadImage.convert_to_ndarray(input)
+        img = img[:, :, ::-1]
+        det_result = self.face_detection(img.copy())
+        rtn = self._choose_face(det_result, img_shape=img.shape)
+        if rtn is not None:
+            scores, bboxes, face_lmks = rtn
+            face_lmks = face_lmks.reshape(5, 2)
+            align_img, _ = align_face(img, (112, 112), face_lmks)
+
+        result = {}
+        result['img'] = np.ascontiguousarray(align_img)
+        result['scores'] = [scores]
+        result['bbox'] = bboxes
+        result['lmks'] = face_lmks
+        return result
+
+    def align_face_padding(self, img, rect, padding_size=16, pad_pixel=127):
+        rect = np.reshape(rect, (-1, 4))
+        if img is None:
+            return None
+        if img.ndim == 2:
+            w, h = img.shape
+            ret = np.empty((w, h, 3), dtype=np.uint8)
+            ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
+            img = ret
+        img = img[:, :, 0:3]
+        img = img[..., ::-1]
+        nrof = np.zeros((5, ), dtype=np.int32)
+
+        bounding_boxes = rect
+        nrof_faces = bounding_boxes.shape[0]
+        if nrof_faces > 0:
+            det = bounding_boxes[:, 0:4]
+            img_size = np.asarray(img.shape)[0:2]
+            bindex = 0
+            if nrof_faces > 1:
+                img_center = img_size / 2
+                offsets = np.vstack([
+                    (det[:, 0] + det[:, 2]) / 2 - img_center[1],
+                    (det[:, 1] + det[:, 3]) / 2 - img_center[0]
+                ])
+                offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
+                bindex = np.argmax(0 - offset_dist_squared * 2.0)
+            _bbox = bounding_boxes[bindex, 0:4]
+            nrof[0] += 1
+        else:
+            nrof[1] += 1
+        if _bbox is None:
+            nrof[2] += 1
+            return None
+        _bbox = [int(_bbox[0]), int(_bbox[1]), int(_bbox[2]), int(_bbox[3])]
+        x1 = _bbox[0] - int(
+            (_bbox[2] - _bbox[0] + 1) * padding_size * 1.0 / 112)
+        x2 = _bbox[2] + int(
+            (_bbox[2] - _bbox[0] + 1) * padding_size * 1.0 / 112)
+        y1 = _bbox[1] - int(
+            (_bbox[3] - _bbox[1] + 1) * padding_size * 1.0 / 112)
+        y2 = _bbox[3] + int(
+            (_bbox[3] - _bbox[1] + 1) * padding_size * 1.0 / 112)
+        _bbox[0] = max(0, x1)
+        _bbox[1] = max(0, y1)
+        _bbox[2] = min(img.shape[1] - 1, x2)
+        _bbox[3] = min(img.shape[0] - 1, y2)
+        padding_h = _bbox[3] - _bbox[1] + 1
+        padding_w = _bbox[2] - _bbox[0] + 1
+        if padding_w > padding_h:
+            offset = int((padding_w - padding_h) / 2)
+            _bbox[1] = _bbox[1] - offset
+            _bbox[3] = _bbox[1] + padding_w - 1
+            _bbox[1] = max(0, _bbox[1])
+            _bbox[3] = min(img.shape[0] - 1, _bbox[3])
+            dst_size = padding_w
+        else:
+            offset = int((padding_h - padding_w) / 2)
+            _bbox[0] = _bbox[0] - offset
+            _bbox[2] = _bbox[0] + padding_h - 1
+            _bbox[0] = max(0, _bbox[0])
+            _bbox[2] = min(img.shape[1] - 1, _bbox[2])
+            dst_size = padding_h
+
+        dst = np.full((dst_size, dst_size, 3), pad_pixel, dtype=np.uint8)
+        dst_x_offset = int((dst_size - (_bbox[2] - _bbox[0] + 1)) / 2)
+        dst_y_offset = int((dst_size - (_bbox[3] - _bbox[1] + 1)) / 2)
+
+        y_start = dst_y_offset
+        y_end = dst_y_offset + _bbox[3] + 1 - _bbox[1]
+        x_start = dst_x_offset
+        x_end = dst_x_offset + _bbox[2] + 1 - _bbox[0]
+        dst[y_start:y_end, x_start:x_end, :] = img[_bbox[1]:_bbox[3] + 1,
+                                                   _bbox[0]:_bbox[2] + 1, :]
+
+        dst = cv2.resize(dst, (128, 128), interpolation=cv2.INTER_LINEAR)
+
+        return dst
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        return {
+            OutputKeys.OUTPUT_IMG: input['img'].cpu().numpy(),
+            OutputKeys.SCORES: input['scores'].cpu().tolist(),
+            OutputKeys.BOXES: [input['bbox'].cpu().tolist()],
+            OutputKeys.KEYPOINTS: [input['lmks'].cpu().tolist()]
+        }
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/face_quality_assessment_pipeline.py b/modelscope/pipelines/cv/face_quality_assessment_pipeline.py
new file mode 100644
index 0000000..bb33955
--- /dev/null
+++ b/modelscope/pipelines/cv/face_quality_assessment_pipeline.py
@@ -0,0 +1,95 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import onnxruntime
+import PIL
+import torch
+import torch.nn.functional as F
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_recognition.align_face import align_face
+from modelscope.models.cv.facial_landmark_confidence import \
+    FacialLandmarkConfidence
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.pipelines.util import batch_process
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+from . import FaceProcessingBasePipeline
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_quality_assessment,
+                           module_name=Pipelines.face_quality_assessment)
+class FaceQualityAssessmentPipeline(FaceProcessingBasePipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        use `model` to create a face quality assessment pipeline for prediction
+        Args:
+            model: model id on modelscope hub.
+        Example:
+        FaceQualityAssessmentPipeline can measure the quality of an input face image,
+        the higher output score represents the better quality
+
+        ```python
+        >>> from modelscope.pipelines import pipeline
+        >>> fqa = pipeline('face-quality-assessment', 'damo/cv_manual_face-quality-assessment_fqa')
+        >>> frfm("https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/face_recognition_1.png")
+        {'scores': [0.99949193], 'boxes': [[157.72341918945312, 67.5608139038086,
+            305.8574523925781, 271.25555419921875]]}
+
+        ```
+        """
+        super().__init__(model=model, **kwargs)
+        onnx_path = osp.join(model, ModelFile.ONNX_MODEL_FILE)
+        logger.info(f'loading model from {onnx_path}')
+        self.sess, self.input_node_name, self.out_node_name = self.load_onnx_model(
+            onnx_path)
+        logger.info('load model done')
+
+    def _batch(self, data):
+        return batch_process(self.model, data)
+
+    def load_onnx_model(self, onnx_path):
+        sess = onnxruntime.InferenceSession(onnx_path)
+        out_node_name = []
+        input_node_name = []
+        for node in sess.get_outputs():
+            out_node_name.append(node.name)
+
+        for node in sess.get_inputs():
+            input_node_name.append(node.name)
+
+        return sess, input_node_name, out_node_name
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        result = super().preprocess(input)
+        align_img = result['img']
+        face_img = align_img[:, :, ::-1]  # to rgb
+        face_img = (face_img / 255. - 0.5) / 0.5
+        face_img = np.expand_dims(face_img, 0).copy()
+        face_img = np.transpose(face_img, axes=(0, 3, 1, 2))
+        face_img = face_img.astype(np.float32)
+        result['input_tensor'] = face_img
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        input_feed = {}
+        input_feed[
+            self.input_node_name[0]] = input['input_tensor'].cpu().numpy()
+        result = self.sess.run(self.out_node_name, input_feed=input_feed)
+        assert result is not None
+        scores = [result[0][0][0]]
+        boxes = input['bbox'].cpu().numpy()[np.newaxis, :].tolist()
+        return {OutputKeys.SCORES: scores, OutputKeys.BOXES: boxes}
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/face_recognition_onnx_fm_pipeline.py b/modelscope/pipelines/cv/face_recognition_onnx_fm_pipeline.py
new file mode 100644
index 0000000..6fc8c42
--- /dev/null
+++ b/modelscope/pipelines/cv/face_recognition_onnx_fm_pipeline.py
@@ -0,0 +1,87 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import onnxruntime
+import PIL
+import torch
+import torch.nn.functional as F
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_recognition.align_face import align_face
+from modelscope.models.cv.facial_landmark_confidence import \
+    FacialLandmarkConfidence
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+from . import FaceProcessingBasePipeline
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_recognition,
+                           module_name=Pipelines.face_recognition_onnx_fm)
+class FaceRecognitionOnnxFmPipeline(FaceProcessingBasePipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        FaceRecognitionOnnxFmPipeline can extract 512-dim feature of mask or non-masked face image. use `model`
+        to create a face recognition face mask onnx pipeline for prediction.
+
+        Args:
+            model: model id on modelscope hub.
+
+        Examples:
+
+        >>> from modelscope.pipelines import pipeline
+        >>> frfm = pipeline('face-recognition-ood', 'damo/cv_manual_face-recognition_frfm')
+        >>> frfm("https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/face_recognition_1.png")
+        >>> {{'img_embedding': array([[ 0.02276129, -0.00761525, ...,0.05735306]],
+        >>>    dtype=float32)} }
+        """
+        super().__init__(model=model, **kwargs)
+        onnx_path = osp.join(model, ModelFile.ONNX_MODEL_FILE)
+        logger.info(f'loading model from {onnx_path}')
+        self.sess, self.input_node_name, self.out_node_name = self.load_onnx_model(
+            onnx_path)
+        logger.info('load model done')
+
+    def load_onnx_model(self, onnx_path):
+        sess = onnxruntime.InferenceSession(onnx_path)
+        out_node_name = []
+        input_node_name = []
+        for node in sess.get_outputs():
+            out_node_name.append(node.name)
+
+        for node in sess.get_inputs():
+            input_node_name.append(node.name)
+
+        return sess, input_node_name, out_node_name
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        result = super().preprocess(input)
+        align_img = result['img']
+        face_img = align_img[:, :, ::-1]  # to rgb
+        face_img = (face_img / 255. - 0.5) / 0.5
+        face_img = np.expand_dims(face_img, 0).copy()
+        face_img = np.transpose(face_img, axes=(0, 3, 1, 2))
+        face_img = face_img.astype(np.float32)
+        result['input_tensor'] = face_img
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        input_feed = {}
+        input_feed[
+            self.input_node_name[0]] = input['input_tensor'].cpu().numpy()
+        emb = self.sess.run(self.out_node_name, input_feed=input_feed)[0]
+        emb /= np.sqrt(np.sum(emb**2, -1, keepdims=True))  # l2 norm
+        return {OutputKeys.IMG_EMBEDDING: emb}
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/face_recognition_onnx_ir_pipeline.py b/modelscope/pipelines/cv/face_recognition_onnx_ir_pipeline.py
new file mode 100644
index 0000000..f557bdb
--- /dev/null
+++ b/modelscope/pipelines/cv/face_recognition_onnx_ir_pipeline.py
@@ -0,0 +1,84 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import onnxruntime
+import PIL
+import torch
+import torch.nn.functional as F
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_recognition.align_face import align_face
+from modelscope.models.cv.facial_landmark_confidence import \
+    FacialLandmarkConfidence
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+from . import FaceProcessingBasePipeline
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_recognition,
+                           module_name=Pipelines.face_recognition_onnx_ir)
+class FaceRecognitionOnnxIrPipeline(FaceProcessingBasePipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        FaceRecognitionOnnxIrPipeline  can extract 512-dim feature of IR face image.
+        use `model` to create a face recognition ir onnx pipeline for prediction.
+        Args:
+            model: model id on modelscope hub.
+        Example:
+
+        >>> from modelscope.pipelines import pipeline
+        >>> frir = pipeline('face-recognition-ood', 'damo/cv_manual_face-recognition_frir')
+        >>> frir("https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/ir_face_recognition_1.png")
+        >>> # {{'img_embedding': array([[ 0.02276129, -0.00761525, ...,0.05735306]], dtype=float32)} }
+        """
+        super().__init__(model=model, **kwargs)
+        onnx_path = osp.join(model, ModelFile.ONNX_MODEL_FILE)
+        logger.info(f'loading model from {onnx_path}')
+        self.sess, self.input_node_name, self.out_node_name = self.load_onnx_model(
+            onnx_path)
+        logger.info('load model done')
+
+    def load_onnx_model(self, onnx_path):
+        sess = onnxruntime.InferenceSession(onnx_path)
+        out_node_name = []
+        input_node_name = []
+        for node in sess.get_outputs():
+            out_node_name.append(node.name)
+
+        for node in sess.get_inputs():
+            input_node_name.append(node.name)
+
+        return sess, input_node_name, out_node_name
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        result = super().preprocess(input)
+        align_img = result['img']
+        face_img = align_img[:, :, ::-1]  # to rgb
+        face_img = (face_img / 255. - 0.5) / 0.5
+        face_img = np.expand_dims(face_img, 0).copy()
+        face_img = np.transpose(face_img, axes=(0, 3, 1, 2))
+        face_img = face_img.astype(np.float32)
+        result['input_tensor'] = face_img
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        input_feed = {}
+        input_feed[
+            self.input_node_name[0]] = input['input_tensor'].cpu().numpy()
+        emb = self.sess.run(self.out_node_name, input_feed=input_feed)[0]
+        emb /= np.sqrt(np.sum(emb**2, -1, keepdims=True))  # l2 norm
+        return {OutputKeys.IMG_EMBEDDING: emb}
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/face_recognition_ood_pipeline.py b/modelscope/pipelines/cv/face_recognition_ood_pipeline.py
new file mode 100644
index 0000000..3979948
--- /dev/null
+++ b/modelscope/pipelines/cv/face_recognition_ood_pipeline.py
@@ -0,0 +1,72 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import PIL
+import torch
+
+from modelscope.metainfo import Pipelines
+from modelscope.models import Model
+from modelscope.models.cv.face_recognition.align_face import align_face
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+from . import FaceProcessingBasePipeline
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_recognition,
+                           module_name=Pipelines.face_recognition_ood)
+class FaceRecognitionOodPipeline(FaceProcessingBasePipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        use `model` to create a face recognition ood pipeline for prediction
+        Args:
+            model: model id on modelscope hub.
+
+        Examples:
+
+        >>> from modelscope.pipelines import pipeline
+        >>> fr_ood= pipeline('face-recognition-ood', 'damo/cv_ir_face-recognition-ood_rts')
+        >>> fr_ood("https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/face_recognition_1.png")
+        {{'img_embedding': array([[ 0.02276129, -0.00761525, ...,0.05735306]],
+            dtype=float32, 'scores': [[0.7656678557395935]]}
+        """
+
+        # face recong model
+        super().__init__(model=model, **kwargs)
+        face_model = self.model
+        face_model = face_model.to(self.device)
+        face_model.eval()
+        self.face_model = face_model
+        logger.info('face recognition model loaded!')
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        result = super().preprocess(input)
+        align_img = result['img']
+        face_img = align_img[:, :, ::-1]  # to rgb
+        face_img = np.transpose(face_img, axes=(2, 0, 1))
+        face_img = (face_img / 255. - 0.5) / 0.5
+        face_img = face_img.astype(np.float32)
+        result['img'] = face_img
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        assert input['img'] is not None
+        img = input['img'].unsqueeze(0)
+        output = self.face_model(img)
+        emb = output[0].detach().cpu().numpy()
+        emb /= np.sqrt(np.sum(emb**2, -1, keepdims=True))  # l2 norm
+        scores = output[1].exp().detach().cpu().numpy().tolist()
+        return {OutputKeys.IMG_EMBEDDING: emb, OutputKeys.SCORES: scores}
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/face_recognition_pipeline.py b/modelscope/pipelines/cv/face_recognition_pipeline.py
new file mode 100644
index 0000000..961b268
--- /dev/null
+++ b/modelscope/pipelines/cv/face_recognition_pipeline.py
@@ -0,0 +1,68 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import PIL
+import torch
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_recognition.align_face import align_face
+from modelscope.models.cv.face_recognition.torchkit.backbone import get_model
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+from . import FaceProcessingBasePipeline
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_recognition,
+                           module_name=Pipelines.face_recognition)
+class FaceRecognitionPipeline(FaceProcessingBasePipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        use `model` to create a face recognition pipeline for prediction
+        Args:
+            model: model id on modelscope hub.
+        """
+
+        # face recong model
+        super().__init__(model=model, **kwargs)
+        device = torch.device(
+            f'cuda:{0}' if torch.cuda.is_available() else 'cpu')
+        self.device = device
+        face_model = get_model('IR_101')([112, 112])
+        face_model.load_state_dict(
+            torch.load(osp.join(model, ModelFile.TORCH_MODEL_BIN_FILE),
+                       map_location=device))
+        face_model = face_model.to(device)
+        face_model.eval()
+        self.face_model = face_model
+        logger.info('face recognition model loaded!')
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        result = super().preprocess(input)
+        align_img = result['img']
+        face_img = align_img[:, :, ::-1]  # to rgb
+        face_img = np.transpose(face_img, axes=(2, 0, 1))
+        face_img = (face_img / 255. - 0.5) / 0.5
+        face_img = face_img.astype(np.float32)
+        result['img'] = face_img
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        assert input['img'] is not None
+        img = input['img'].unsqueeze(0)
+        emb = self.face_model(img).detach().cpu().numpy()
+        emb /= np.sqrt(np.sum(emb**2, -1, keepdims=True))  # l2 norm
+        return {OutputKeys.IMG_EMBEDDING: emb}
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/facial_expression_recognition_pipeline.py b/modelscope/pipelines/cv/facial_expression_recognition_pipeline.py
new file mode 100644
index 0000000..6fa182b
--- /dev/null
+++ b/modelscope/pipelines/cv/facial_expression_recognition_pipeline.py
@@ -0,0 +1,62 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import PIL
+import torch
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_recognition.align_face import align_face
+from modelscope.models.cv.facial_expression_recognition import \
+    FacialExpressionRecognition
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+from . import FaceProcessingBasePipeline
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.facial_expression_recognition,
+                           module_name=Pipelines.facial_expression_recognition)
+class FacialExpressionRecognitionPipeline(FaceProcessingBasePipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        use `model` to create a face detection pipeline for prediction
+        Args:
+            model: model id on modelscope hub.
+        """
+        super().__init__(model=model, **kwargs)
+        ckpt_path = osp.join(model, ModelFile.TORCH_MODEL_FILE)
+        logger.info(f'loading model from {ckpt_path}')
+        device = torch.device(
+            f'cuda:{0}' if torch.cuda.is_available() else 'cpu')
+        fer = FacialExpressionRecognition(model_path=ckpt_path, device=device)
+        self.fer = fer
+        self.device = device
+        logger.info('load model done')
+
+        self.map_list = [
+            'Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'
+        ]
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        result = super(FacialExpressionRecognitionPipeline,
+                       self).preprocess(input)
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        result = self.fer(input)
+        assert result is not None
+        scores = result[0].tolist()
+        return {OutputKeys.SCORES: scores, OutputKeys.LABELS: self.map_list}
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/facial_landmark_confidence_pipeline.py b/modelscope/pipelines/cv/facial_landmark_confidence_pipeline.py
new file mode 100644
index 0000000..7eb5b4f
--- /dev/null
+++ b/modelscope/pipelines/cv/facial_landmark_confidence_pipeline.py
@@ -0,0 +1,67 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import PIL
+import torch
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_recognition.align_face import align_face
+from modelscope.models.cv.facial_landmark_confidence import \
+    FacialLandmarkConfidence
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+from . import FaceProcessingBasePipeline
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_2d_keypoints,
+                           module_name=Pipelines.facial_landmark_confidence)
+class FacialLandmarkConfidencePipeline(FaceProcessingBasePipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        use `model` to create a facial landmrk confidence pipeline for prediction
+        Args:
+            model: model id on modelscope hub.
+        """
+        super().__init__(model=model, **kwargs)
+        ckpt_path = osp.join(model, ModelFile.TORCH_MODEL_FILE)
+        logger.info(f'loading model from {ckpt_path}')
+        flcm = FacialLandmarkConfidence(model_path=ckpt_path,
+                                        device=self.device)
+        self.flcm = flcm
+        logger.info('load model done')
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+
+        result = super().preprocess(input)
+        img = LoadImage.convert_to_ndarray(input)
+        img = img[:, :, ::-1]
+        result['orig_img'] = img.astype(np.float32)
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        result = self.flcm(input)
+        assert result is not None
+        lms = result[0].reshape(-1, 10).tolist()
+        scores = [1 - result[1].tolist()]
+        boxes = input['bbox'].cpu().numpy()[np.newaxis, :].tolist()
+        output_poses = []
+        return {
+            OutputKeys.SCORES: scores,
+            OutputKeys.POSES: output_poses,
+            OutputKeys.KEYPOINTS: lms,
+            OutputKeys.BOXES: boxes
+        }
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/mask_face_recognition_pipeline.py b/modelscope/pipelines/cv/mask_face_recognition_pipeline.py
new file mode 100644
index 0000000..8c36195
--- /dev/null
+++ b/modelscope/pipelines/cv/mask_face_recognition_pipeline.py
@@ -0,0 +1,76 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from collections import OrderedDict
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import PIL
+import torch
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_recognition.align_face import align_face
+from modelscope.models.cv.face_recognition.torchkit.backbone.facemask_backbone import \
+    iresnet286
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+from . import FaceProcessingBasePipeline
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_recognition,
+                           module_name=Pipelines.mask_face_recognition)
+class MaskFaceRecognitionPipeline(FaceProcessingBasePipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        use `model` to create a mask face recognition pipeline for prediction
+        Args:
+            model: model id on modelscope hub.
+        """
+
+        # face recong model
+        super().__init__(model=model, **kwargs)
+        face_model = iresnet286()
+        state_dict = torch.load(osp.join(model, ModelFile.TORCH_MODEL_FILE))
+        reviesed_state_dict = self._prefix_revision(state_dict)
+        face_model.load_state_dict(reviesed_state_dict, strict=True)
+        face_model = face_model.to(self.device)
+        face_model.eval()
+        self.face_model = face_model
+        logger.info('face recognition model loaded!')
+
+    def _prefix_revision(self, state_dict):
+        new_state_dict = OrderedDict()
+        for k, v in state_dict.items():
+            if k.startswith('module.'):
+                k = k[7:]
+            new_state_dict[k] = v
+        state = new_state_dict
+        return state
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        result = super().preprocess(input)
+        align_img = result['img']
+        face_img = align_img[:, :, ::-1]  # to rgb
+        face_img = np.transpose(face_img, axes=(2, 0, 1))
+        face_img = (face_img / 255. - 0.5) / 0.5
+        face_img = face_img.astype(np.float32)
+        result['img'] = face_img
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        assert input['img'] is not None
+        img = input['img'].unsqueeze(0)
+        emb = self.face_model(img).detach().cpu().numpy()
+        emb /= np.sqrt(np.sum(emb**2, -1, keepdims=True))  # l2 norm
+        return {OutputKeys.IMG_EMBEDDING: emb}
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/mog_face_detection_pipeline.py b/modelscope/pipelines/cv/mog_face_detection_pipeline.py
new file mode 100644
index 0000000..4830387
--- /dev/null
+++ b/modelscope/pipelines/cv/mog_face_detection_pipeline.py
@@ -0,0 +1,55 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import numpy as np
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_detection import MogFaceDetector
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_detection,
+                           module_name=Pipelines.mog_face_detection)
+class MogFaceDetectionPipeline(Pipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        use `model` to create a face detection pipeline for prediction
+        Args:
+            model: model id on modelscope hub.
+        """
+        super().__init__(model=model, **kwargs)
+        ckpt_path = osp.join(model, ModelFile.TORCH_MODEL_FILE)
+        logger.info(f'loading model from {ckpt_path}')
+        kwargs.pop('device')
+        detector = MogFaceDetector(model_path=ckpt_path, device=self.device, **kwargs)
+        self.detector = detector
+        logger.info('load model done')
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        img = LoadImage.convert_to_ndarray(input)
+        img = img.astype(np.float32)
+        result = {'img': img}
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+
+        result = self.detector(input)
+        assert result is not None
+        bboxes = result[:, :4].tolist()
+        scores = result[:, 4].tolist()
+        return {
+            OutputKeys.SCORES: scores,
+            OutputKeys.BOXES: bboxes,
+            OutputKeys.KEYPOINTS: None,
+        }
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/mtcnn_face_detection_pipeline.py b/modelscope/pipelines/cv/mtcnn_face_detection_pipeline.py
new file mode 100644
index 0000000..16e3537
--- /dev/null
+++ b/modelscope/pipelines/cv/mtcnn_face_detection_pipeline.py
@@ -0,0 +1,57 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import torch
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_detection import MtcnnFaceDetector
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_detection,
+                           module_name=Pipelines.mtcnn_face_detection)
+class MtcnnFaceDetectionPipeline(Pipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        use `model` to create a face detection pipeline for prediction
+        Args:
+            model: model id on modelscope hub.
+        """
+        super().__init__(model=model, **kwargs)
+        ckpt_path = osp.join(model, './weights')
+        logger.info(f'loading model from {ckpt_path}')
+        device = torch.device(
+            f'cuda:{0}' if torch.cuda.is_available() else 'cpu')
+        kwargs.pop('device')
+        detector = MtcnnFaceDetector(model_path=ckpt_path, device=device, **kwargs)
+        self.detector = detector
+        self.device = device
+        logger.info('load model done')
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        img = LoadImage.convert_to_ndarray(input)
+        result = {'img': img}
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        result = self.detector(input)
+        assert result is not None
+        bboxes = result[0][:, :4].tolist()
+        scores = result[0][:, 4].tolist()
+        lms = result[1].tolist()
+        return {
+            OutputKeys.SCORES: scores,
+            OutputKeys.BOXES: bboxes,
+            OutputKeys.KEYPOINTS: lms,
+        }
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/retina_face_detection_pipeline.py b/modelscope/pipelines/cv/retina_face_detection_pipeline.py
new file mode 100644
index 0000000..7daf0c0
--- /dev/null
+++ b/modelscope/pipelines/cv/retina_face_detection_pipeline.py
@@ -0,0 +1,59 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import PIL
+import torch
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_detection import RetinaFaceDetection
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_detection,
+                           module_name=Pipelines.retina_face_detection)
+class RetinaFaceDetectionPipeline(Pipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        use `model` to create a face detection pipeline for prediction
+        Args:
+            model: model id on modelscope hub.
+        """
+        super().__init__(model=model, **kwargs)
+        ckpt_path = osp.join(model, ModelFile.TORCH_MODEL_FILE)
+        logger.info(f'loading model from {ckpt_path}')
+        kwargs.pop('device')
+        detector = RetinaFaceDetection(model_path=ckpt_path,
+                                       device=self.device, **kwargs)
+        self.detector = detector
+        logger.info('load model done')
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        img = LoadImage.convert_to_ndarray(input)
+        img = img.astype(np.float32)
+        result = {'img': img}
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        result = self.detector(input)
+        assert result is not None
+        bboxes = result[0][:, :4].tolist()
+        scores = result[0][:, 4].tolist()
+        lms = result[1].tolist()
+        return {
+            OutputKeys.SCORES: scores,
+            OutputKeys.BOXES: bboxes,
+            OutputKeys.KEYPOINTS: lms,
+        }
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/cv/ulfd_face_detection_pipeline.py b/modelscope/pipelines/cv/ulfd_face_detection_pipeline.py
new file mode 100644
index 0000000..4cc4fbd
--- /dev/null
+++ b/modelscope/pipelines/cv/ulfd_face_detection_pipeline.py
@@ -0,0 +1,56 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import PIL
+import torch
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.face_detection import UlfdFaceDetector
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.face_detection,
+                           module_name=Pipelines.ulfd_face_detection)
+class UlfdFaceDetectionPipeline(Pipeline):
+    def __init__(self, model: str, **kwargs):
+        """
+        use `model` to create a face detection pipeline for prediction
+        Args:
+            model: model id on modelscope hub.
+        """
+        super().__init__(model=model, **kwargs)
+        ckpt_path = osp.join(model, ModelFile.TORCH_MODEL_FILE)
+        logger.info(f'loading model from {ckpt_path}')
+        detector = UlfdFaceDetector(model_path=ckpt_path, device=self.device)
+        self.detector = detector
+        logger.info('load model done')
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        img = LoadImage.convert_to_ndarray(input)
+        img = img.astype(np.float32)
+        result = {'img': img}
+        return result
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        result = self.detector(input)
+        assert result is not None
+        bboxes = result[0].tolist()
+        scores = result[1].tolist()
+        return {
+            OutputKeys.SCORES: scores,
+            OutputKeys.BOXES: bboxes,
+            OutputKeys.KEYPOINTS: None,
+        }
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/modelscope/pipelines/util.py b/modelscope/pipelines/util.py
new file mode 100644
index 0000000..ed6b52a
--- /dev/null
+++ b/modelscope/pipelines/util.py
@@ -0,0 +1,104 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import List, Optional, Union
+
+from modelscope.hub.api import HubApi
+from modelscope.hub.file_download import model_file_download
+from modelscope.utils.config import Config
+from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+def is_config_has_model(cfg_file):
+    try:
+        cfg = Config.from_file(cfg_file)
+        return hasattr(cfg, 'model')
+    except Exception as e:
+        logger.error(f'parse config file {cfg_file} failed: {e}')
+        return False
+
+
+def is_official_hub_path(path: Union[str, List],
+                         revision: Optional[str] = DEFAULT_MODEL_REVISION):
+    """ Whether path is an official hub name or a valid local
+    path to official hub directory.
+    """
+    def is_official_hub_impl(path):
+        if osp.exists(path):
+            cfg_file = osp.join(path, ModelFile.CONFIGURATION)
+            return osp.exists(cfg_file)
+        else:
+            try:
+                _ = HubApi().get_model(path, revision=revision)
+                return True
+            except Exception as e:
+                raise ValueError(f'invalid model repo path {e}')
+
+    if isinstance(path, str):
+        return is_official_hub_impl(path)
+    else:
+        results = [is_official_hub_impl(m) for m in path]
+        all_true = all(results)
+        any_true = any(results)
+        if any_true and not all_true:
+            raise ValueError(
+                f'some model are hub address, some are not, model list: {path}'
+            )
+
+        return all_true
+
+
+def is_model(path: Union[str, List]):
+    """ whether path is a valid modelhub path and containing model config
+    """
+    def is_modelhub_path_impl(path):
+        if osp.exists(path):
+            cfg_file = osp.join(path, ModelFile.CONFIGURATION)
+            if osp.exists(cfg_file):
+                return is_config_has_model(cfg_file)
+            else:
+                return False
+        else:
+            try:
+                cfg_file = model_file_download(path, ModelFile.CONFIGURATION)
+                return is_config_has_model(cfg_file)
+            except Exception:
+                return False
+
+    if isinstance(path, str):
+        return is_modelhub_path_impl(path)
+    else:
+        results = [is_modelhub_path_impl(m) for m in path]
+        all_true = all(results)
+        any_true = any(results)
+        if any_true and not all_true:
+            raise ValueError(
+                f'some models are hub address, some are not, model list: {path}'
+            )
+
+        return all_true
+
+
+def batch_process(model, data):
+    import torch
+    if model.__class__.__name__ == 'OfaForAllTasks':
+        # collate batch data due to the nested data structure
+        assert isinstance(data, list)
+        batch_data = {
+            'nsentences': len(data),
+            'samples': [d['samples'][0] for d in data],
+            'net_input': {}
+        }
+        for k in data[0]['net_input'].keys():
+            batch_data['net_input'][k] = torch.cat(
+                [d['net_input'][k] for d in data])
+        if 'w_resize_ratios' in data[0]:
+            batch_data['w_resize_ratios'] = torch.cat(
+                [d['w_resize_ratios'] for d in data])
+        if 'h_resize_ratios' in data[0]:
+            batch_data['h_resize_ratios'] = torch.cat(
+                [d['h_resize_ratios'] for d in data])
+
+        return batch_data
diff --git a/modelscope/preprocessors/__init__.py b/modelscope/preprocessors/__init__.py
new file mode 100644
index 0000000..2f3bfbe
--- /dev/null
+++ b/modelscope/preprocessors/__init__.py
@@ -0,0 +1,34 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .base import Preprocessor
+    from .builder import PREPROCESSORS, build_preprocessor
+    from .common import Compose, ToTensor, Filter
+    from .image import (LoadImage, load_image,
+                        ImageColorEnhanceFinetunePreprocessor,
+                        ImageInstanceSegmentationPreprocessor,
+                        ImageDenoisePreprocessor, ImageDeblurPreprocessor)
+else:
+    _import_structure = {
+        'base': ['Preprocessor'],
+        'builder': ['PREPROCESSORS', 'build_preprocessor'],
+        'common': ['Compose', 'ToTensor', 'Filter'],
+        'image': [
+            'LoadImage', 'load_image', 'ImageColorEnhanceFinetunePreprocessor',
+            'ImageInstanceSegmentationPreprocessor',
+            'ImageDenoisePreprocessor', 'ImageDeblurPreprocessor'
+        ]
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/preprocessors/base.py b/modelscope/preprocessors/base.py
new file mode 100644
index 0000000..c2d6c3d
--- /dev/null
+++ b/modelscope/preprocessors/base.py
@@ -0,0 +1,220 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from abc import ABC, abstractmethod
+from copy import deepcopy
+from typing import Any, Callable, Dict, Optional, Sequence, Union
+
+from modelscope.metainfo import Models, Preprocessors, TaskModels
+from modelscope.utils.config import Config, ConfigDict
+from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, Invoke,
+                                       ModeKeys, Tasks)
+from modelscope.utils.hub import read_config, snapshot_download
+from modelscope.utils.logger import get_logger
+
+from .builder import build_preprocessor
+
+logger = get_logger()
+
+PREPROCESSOR_MAP = {
+}
+
+
+class Preprocessor(ABC):
+    """Base of preprocessors.
+    """
+    def __init__(self, mode=ModeKeys.INFERENCE, *args, **kwargs):
+        self._mode = mode
+        assert self._mode in (ModeKeys.INFERENCE, ModeKeys.TRAIN,
+                              ModeKeys.EVAL)
+        self.device = int(
+            os.environ['LOCAL_RANK']) if 'LOCAL_RANK' in os.environ else None
+        pass
+
+    @abstractmethod
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        pass
+
+    @property
+    def mode(self):
+        return self._mode
+
+    @mode.setter
+    def mode(self, value):
+        self._mode = value
+
+    @classmethod
+    def from_pretrained(cls,
+                        model_name_or_path: str,
+                        revision: Optional[str] = DEFAULT_MODEL_REVISION,
+                        cfg_dict: Config = None,
+                        preprocessor_mode=ModeKeys.INFERENCE,
+                        **kwargs):
+        """Instantiate a preprocessor from local directory or remote model repo. Note
+        that when loading from remote, the model revision can be specified.
+
+        Args:
+            model_name_or_path(str): A model dir or a model id used to load the preprocessor out.
+            revision(str, `optional`): The revision used when the model_name_or_path is
+                a model id of the remote hub. default `master`.
+            cfg_dict(Config, `optional`): An optional config. If provided, it will replace
+                the config read out of the `model_name_or_path`
+            preprocessor_mode(str, `optional`): Specify the working mode of the preprocessor, can be `train`, `eval`,
+                or `inference`. Default value `inference`.
+                The preprocessor field in the config may contain two sub preprocessors:
+                >>> {
+                >>>     "train": {
+                >>>         "type": "some-train-preprocessor"
+                >>>     },
+                >>>     "val": {
+                >>>         "type": "some-eval-preprocessor"
+                >>>     }
+                >>> }
+                In this scenario, the `train` preprocessor will be loaded in the `train` mode, the `val` preprocessor
+                will be loaded in the `eval` or `inference` mode. The `mode` field in the preprocessor class
+                will be assigned in all the modes.
+                Or just one:
+                >>> {
+                >>>     "type": "some-train-preprocessor"
+                >>> }
+                In this scenario, the sole preprocessor will be loaded in all the modes,
+                and the `mode` field in the preprocessor class will be assigned.
+
+            **kwargs:
+                task(str, `optional`): The `Tasks` enumeration value to replace the task value
+                read out of config in the `model_name_or_path`.
+                This is useful when the preprocessor does not have a `type` field and the task to be used is not
+                equal to the task of which the model is saved.
+                Other kwargs will be directly fed into the preprocessor, to replace the default configs.
+
+        Returns:
+            The preprocessor instance.
+
+        Examples:
+            >>> from modelscope.preprocessors import Preprocessor
+            >>> Preprocessor.from_pretrained('damo/nlp_debertav2_fill-mask_chinese-base')
+
+        """
+        if not os.path.exists(model_name_or_path):
+            model_dir = snapshot_download(
+                model_name_or_path,
+                revision=revision,
+                user_agent={Invoke.KEY: Invoke.PREPROCESSOR},
+                ignore_file_pattern=[
+                    '.*.bin',
+                    '.*.ts',
+                    '.*.pt',
+                    '.*.data-00000-of-00001',
+                    '.*.onnx',
+                    '.*.meta',
+                    '.*.pb',
+                    '.*.index',
+                ])
+        else:
+            model_dir = model_name_or_path
+        if cfg_dict is None:
+            cfg = read_config(model_dir)
+        else:
+            cfg = cfg_dict
+        task = cfg.task
+        if 'task' in kwargs:
+            task = kwargs.pop('task')
+        field_name = Tasks.find_field_by_task(task)
+        if 'field' in kwargs:
+            field_name = kwargs.pop('field')
+        sub_key = 'train' if preprocessor_mode == ModeKeys.TRAIN else 'val'
+
+        if not hasattr(cfg, 'preprocessor') or len(cfg.preprocessor) == 0:
+            logger.warning('No preprocessor field found in cfg.')
+            preprocessor_cfg = ConfigDict()
+        else:
+            preprocessor_cfg = cfg.preprocessor
+
+        if 'type' not in preprocessor_cfg:
+            if sub_key in preprocessor_cfg:
+                sub_cfg = getattr(preprocessor_cfg, sub_key)
+            else:
+                logger.warning(
+                    f'No {sub_key} key and type key found in '
+                    f'preprocessor domain of configuration.json file.')
+                sub_cfg = preprocessor_cfg
+        else:
+            sub_cfg = preprocessor_cfg
+
+        # TODO @wenmeng.zwm refine this logic when preprocessor has no model_dir param
+        # for cv models.
+        sub_cfg.update({'model_dir': model_dir})
+        sub_cfg.update(kwargs)
+        if 'type' in sub_cfg:
+            if isinstance(sub_cfg, Sequence):
+                # TODO: for Sequence, need adapt to `mode` and `mode_dir` args,
+                # and add mode for Compose or other plans
+                raise NotImplementedError('Not supported yet!')
+
+            preprocessor = build_preprocessor(sub_cfg, field_name)
+        else:
+            logger.warning(
+                f'Cannot find available config to build preprocessor at mode {preprocessor_mode}, '
+                f'current config: {sub_cfg}. trying to build by task and model information.'
+            )
+            model_cfg = getattr(cfg, 'model', ConfigDict())
+            model_type = model_cfg.type if hasattr(
+                model_cfg, 'type') else getattr(model_cfg, 'model_type', None)
+            if task is None or model_type is None:
+                logger.warning(
+                    f'Find task: {task}, model type: {model_type}. '
+                    f'Insufficient information to build preprocessor, skip building preprocessor'
+                )
+                return None
+            if (model_type, task) not in PREPROCESSOR_MAP:
+                logger.warning(
+                    f'No preprocessor key {(model_type, task)} found in PREPROCESSOR_MAP, '
+                    f'skip building preprocessor.')
+                return None
+
+            sub_cfg = ConfigDict({
+                'type': PREPROCESSOR_MAP[(model_type, task)],
+                **sub_cfg
+            })
+            preprocessor = build_preprocessor(sub_cfg, field_name)
+        preprocessor.mode = preprocessor_mode
+        sub_cfg.pop('model_dir', None)
+        if not hasattr(preprocessor, 'cfg'):
+            preprocessor.cfg = cfg
+        return preprocessor
+
+    def save_pretrained(self,
+                        target_folder: Union[str, os.PathLike],
+                        config: Optional[dict] = None,
+                        save_config_function: Callable = None):
+        """Save the preprocessor, its configuration and other related files to a directory,
+            so that it can be re-loaded
+
+        By default, this method will save the preprocessor's config with mode `inference`.
+
+        Args:
+            target_folder (Union[str, os.PathLike]):
+            Directory to which to save. Will be created if it doesn't exist.
+
+            config (Optional[dict], optional):
+            The config for the configuration.json
+
+            save_config_function (Callable): The function used to save the configuration, call this function
+                after the config is updated.
+
+        """
+        if config is None and hasattr(self, 'cfg'):
+            config = self.cfg
+
+        if config is not None:
+            # Update the mode to `inference` in the preprocessor field.
+            if 'preprocessor' in config and config['preprocessor'] is not None:
+                if 'mode' in config['preprocessor']:
+                    config['preprocessor']['mode'] = 'inference'
+                elif 'val' in config['preprocessor'] and 'mode' in config[
+                        'preprocessor']['val']:
+                    config['preprocessor']['val']['mode'] = 'inference'
+
+            if save_config_function is None:
+                from modelscope.utils.checkpoint import save_configuration
+                save_config_function = save_configuration
+            save_config_function(target_folder, config)
diff --git a/modelscope/preprocessors/builder.py b/modelscope/preprocessors/builder.py
new file mode 100644
index 0000000..55e19db
--- /dev/null
+++ b/modelscope/preprocessors/builder.py
@@ -0,0 +1,24 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from modelscope.utils.config import ConfigDict
+from modelscope.utils.constant import Fields
+from modelscope.utils.registry import Registry, build_from_cfg
+
+PREPROCESSORS = Registry('preprocessors')
+
+
+def build_preprocessor(cfg: ConfigDict,
+                       field_name: str = None,
+                       default_args: dict = None):
+    """ build preprocessor given model config dict
+
+    Args:
+        cfg (:obj:`ConfigDict`): config dict for model object.
+        field_name (str, optional):  application field name, refer to
+            :obj:`Fields` for more details
+        default_args (dict, optional): Default initialization arguments.
+    """
+    return build_from_cfg(cfg,
+                          PREPROCESSORS,
+                          group_key=field_name,
+                          default_args=default_args)
diff --git a/modelscope/preprocessors/common.py b/modelscope/preprocessors/common.py
new file mode 100644
index 0000000..dffe933
--- /dev/null
+++ b/modelscope/preprocessors/common.py
@@ -0,0 +1,140 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import time
+from collections.abc import Sequence
+from typing import Mapping
+
+import numpy as np
+import torch
+
+from .builder import PREPROCESSORS, build_preprocessor
+
+
+@PREPROCESSORS.register_module()
+class Compose(object):
+    """Compose a data pipeline with a sequence of transforms.
+    Args:
+        transforms (list[dict | callable]):
+            Either config dicts of transforms or transform objects.
+        profiling (bool, optional): If set True, will profile and
+            print preprocess time for each step.
+    """
+    def __init__(self, transforms, field_name=None, profiling=False):
+        assert isinstance(transforms, Sequence)
+        self.profiling = profiling
+        self.transforms = []
+        self.field_name = field_name
+        for transform in transforms:
+            if isinstance(transform, dict):
+                if self.field_name is None:
+                    transform = build_preprocessor(transform, field_name)
+                else:
+                    # if not found key in field_name, try field_name=None(default_group)
+                    try:
+                        transform = build_preprocessor(transform, field_name)
+                    except KeyError:
+                        transform = build_preprocessor(transform, None)
+            elif callable(transform):
+                pass
+            else:
+                raise TypeError('transform must be callable or a dict, but got'
+                                f' {type(transform)}')
+            self.transforms.append(transform)
+
+    def __call__(self, data):
+        for t in self.transforms:
+            if self.profiling:
+                start = time.time()
+
+            data = t(data)
+
+            if self.profiling:
+                print(f'{t} time {time.time()-start}')
+
+            if data is None:
+                return None
+        return data
+
+    def __repr__(self):
+        format_string = self.__class__.__name__ + '('
+        for t in self.transforms:
+            format_string += f'\n    {t}'
+        format_string += '\n)'
+        return format_string
+
+
+def to_tensor(data):
+    """Convert objects of various python types to :obj:`torch.Tensor`.
+
+    Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
+    :class:`Sequence`, :class:`int` and :class:`float`.
+
+    Args:
+        data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to
+            be converted.
+    """
+
+    if isinstance(data, torch.Tensor):
+        return data
+    elif isinstance(data, np.ndarray):
+        return torch.from_numpy(data)
+    elif isinstance(data, Sequence) and not isinstance(data, str):
+        return torch.tensor(data)
+    elif isinstance(data, int):
+        return torch.LongTensor([data])
+    elif isinstance(data, float):
+        return torch.FloatTensor([data])
+    else:
+        raise TypeError(f'type {type(data)} cannot be converted to tensor.')
+
+
+@PREPROCESSORS.register_module()
+class ToTensor(object):
+    """Convert target object to tensor.
+
+    Args:
+        keys (Sequence[str]): Key of data to be converted to Tensor.
+            Only valid when data is type of `Mapping`. If `keys` is None,
+            all values of keys ​​will be converted to tensor by default.
+    """
+    def __init__(self, keys=None):
+        self.keys = keys
+
+    def __call__(self, data):
+        if isinstance(data, Mapping):
+            if self.keys is None:
+                self.keys = list(data.keys())
+
+            for key in self.keys:
+                data[key] = to_tensor(data[key])
+        else:
+            data = to_tensor(data)
+
+        return data
+
+    def __repr__(self):
+        return self.__class__.__name__ + f'(keys={self.keys})'
+
+
+@PREPROCESSORS.register_module()
+class Filter(object):
+    """This is usually the last stage of the dataloader transform.
+    Only data of reserved keys will be kept and passed directly to the model, others will be removed.
+
+    Args:
+        keys (Sequence[str]): Keys of data to be reserved, others will be removed.
+    """
+    def __init__(self, reserved_keys):
+        self.reserved_keys = reserved_keys
+
+    def __call__(self, data):
+        assert isinstance(data, Mapping)
+
+        reserved_data = {}
+        for key in self.reserved_keys:
+            reserved_data[key] = data[key]
+
+        return reserved_data
+
+    def __repr__(self):
+        return self.__class__.__name__ + f'(keys={self.reserved_keys})'
diff --git a/modelscope/preprocessors/image.py b/modelscope/preprocessors/image.py
new file mode 100644
index 0000000..7366bad
--- /dev/null
+++ b/modelscope/preprocessors/image.py
@@ -0,0 +1,399 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import io
+from typing import Any, Dict, Union
+
+import cv2
+import numpy as np
+import PIL
+from numpy import ndarray
+from PIL import Image, ImageOps
+
+from modelscope.fileio import File
+from modelscope.metainfo import Preprocessors
+from modelscope.utils.constant import Fields
+from modelscope.utils.type_assert import type_assert
+
+from .base import Preprocessor
+from .builder import PREPROCESSORS
+
+
+@PREPROCESSORS.register_module(Fields.cv, Preprocessors.load_image)
+class LoadImage:
+    """Load an image from file or url.
+    Added or updated keys are "filename", "img", "img_shape",
+    "ori_shape" (same as `img_shape`), "pad_shape" (same as `img_shape`),
+    "scale_factor" (1.0) and "img_norm_cfg" (means=0 and stds=1).
+    Args:
+        mode (str): See :ref:`PIL.Mode<https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes>`.
+    """
+    def __init__(self, mode='rgb'):
+        self.mode = mode.upper()
+
+    def __call__(self, input: Union[str, Dict[str, str]]):
+        """Call functions to load image and get image meta information.
+        Args:
+            input (str or dict): input image path or input dict with
+                a key `filename`.
+        Returns:
+            dict: The dict contains loaded image.
+        """
+        if isinstance(input, dict):
+            image_path_or_url = input['filename']
+        else:
+            image_path_or_url = input
+
+        bytes = File.read(image_path_or_url)
+        # TODO @wenmeng.zwm add opencv decode as optional
+        # we should also look at the input format which is the most commonly
+        # used in Mind' image related models
+        with io.BytesIO(bytes) as infile:
+            img = Image.open(infile)
+            img = ImageOps.exif_transpose(img)
+            img = img.convert(self.mode)
+
+        results = {
+            'filename': image_path_or_url,
+            'img': img,
+            'img_shape': (img.size[1], img.size[0], 3),
+            'img_field': 'img',
+        }
+        return results
+
+    def __repr__(self):
+        repr_str = f'{self.__class__.__name__}(' f'mode={self.mode})'
+        return repr_str
+
+    @staticmethod
+    def convert_to_ndarray(input) -> ndarray:
+        if isinstance(input, str):
+            img = np.array(load_image(input))
+        elif isinstance(input, PIL.Image.Image):
+            img = np.array(input.convert('RGB'))
+        elif isinstance(input, np.ndarray):
+            if len(input.shape) == 2:
+                input = cv2.cvtColor(input, cv2.COLOR_GRAY2BGR)
+            img = input[:, :, ::-1]
+        else:
+            raise TypeError(f'input should be either str, PIL.Image,'
+                            f' np.array, but got {type(input)}')
+        return img
+
+    @staticmethod
+    def convert_to_img(input) -> ndarray:
+        if isinstance(input, str):
+            img = load_image(input)
+        elif isinstance(input, PIL.Image.Image):
+            img = input.convert('RGB')
+        elif isinstance(input, np.ndarray):
+            if len(input.shape) == 2:
+                img = cv2.cvtColor(input, cv2.COLOR_GRAY2BGR)
+            img = input[:, :, ::-1]
+            img = Image.fromarray(img.astype('uint8')).convert('RGB')
+        else:
+            raise TypeError(f'input should be either str, PIL.Image,'
+                            f' np.array, but got {type(input)}')
+        return img
+
+
+def load_image(image_path_or_url: str) -> Image.Image:
+    """ simple interface to load an image from file or url
+
+    Args:
+        image_path_or_url (str): image file path or http url
+    """
+    loader = LoadImage()
+    return loader(image_path_or_url)['img']
+
+
+@PREPROCESSORS.register_module(
+    Fields.cv, module_name=Preprocessors.object_detection_tinynas_preprocessor)
+class ObjectDetectionTinynasPreprocessor(Preprocessor):
+    def __init__(self, size_divisible=32, **kwargs):
+        """Preprocess the image.
+
+        What this preprocessor will do:
+        1. Transpose the image matrix to make the channel the first dim.
+        2. If the size_divisible is gt than 0, it will be used to pad the image.
+        3. Expand an extra image dim as dim 0.
+
+        Args:
+            size_divisible (int): The number will be used as a length unit to pad the image.
+                Formula: int(math.ceil(shape / size_divisible) * size_divisible)
+                Default 32.
+        """
+
+        super().__init__(**kwargs)
+        self.size_divisible = size_divisible
+
+    @type_assert(object, object)
+    def __call__(self, data: np.ndarray) -> Dict[str, ndarray]:
+        """Preprocess the image.
+
+        Args:
+            data: The input image with 3 dimensions.
+
+        Returns:
+            The processed data in dict.
+            {'img': np.ndarray}
+
+        """
+        image = data.astype(np.float32)
+        image = image.transpose((2, 0, 1))
+        shape = image.shape  # c, h, w
+        if self.size_divisible > 0:
+            import math
+            stride = self.size_divisible
+            shape = list(shape)
+            shape[1] = int(math.ceil(shape[1] / stride) * stride)
+            shape[2] = int(math.ceil(shape[2] / stride) * stride)
+            shape = tuple(shape)
+        pad_img = np.zeros(shape).astype(np.float32)
+        pad_img[:, :image.shape[1], :image.shape[2]] = image
+        pad_img = np.expand_dims(pad_img, 0)
+        return {'img': pad_img}
+
+
+@PREPROCESSORS.register_module(
+    Fields.cv, module_name=Preprocessors.image_color_enhance_preprocessor)
+class ImageColorEnhanceFinetunePreprocessor(Preprocessor):
+    def __init__(self, model_dir: str, *args, **kwargs):
+        """preprocess the data from the `model_dir` path
+
+        Args:
+            model_dir (str): model path
+        """
+
+        super().__init__(*args, **kwargs)
+        self.model_dir: str = model_dir
+
+    @type_assert(object, object)
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """process the raw input data
+
+        Args:
+            data (tuple): [sentence1, sentence2]
+                sentence1 (str): a sentence
+                    Example:
+                        'you are so handsome.'
+                sentence2 (str): a sentence
+                    Example:
+                        'you are so beautiful.'
+        Returns:
+            Dict[str, Any]: the preprocessed data
+        """
+
+        return data
+
+
+@PREPROCESSORS.register_module(
+    Fields.cv, module_name=Preprocessors.image_denoise_preprocessor)
+class ImageDenoisePreprocessor(Preprocessor):
+    def __init__(self, model_dir: str, *args, **kwargs):
+        """
+
+        Args:
+            model_dir (str): model path
+        """
+        super().__init__(*args, **kwargs)
+        self.model_dir: str = model_dir
+
+        from .common import Filter
+
+        # TODO: `Filter` should be moved to configurarion file of each model
+        self._transforms = [Filter(reserved_keys=['input', 'target'])]
+
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """process the raw input data
+
+        Args:
+            data Dict[str, Any]
+
+        Returns:
+            Dict[str, Any]: the preprocessed data
+        """
+        for t in self._transforms:
+            data = t(data)
+
+        return data
+
+
+@PREPROCESSORS.register_module(
+    Fields.cv, module_name=Preprocessors.image_deblur_preprocessor)
+class ImageDeblurPreprocessor(Preprocessor):
+    def __init__(self, model_dir: str, *args, **kwargs):
+        """
+
+        Args:
+            model_dir (str): model path
+        """
+        super().__init__(*args, **kwargs)
+        self.model_dir: str = model_dir
+
+        from .common import Filter
+
+        # TODO: `Filter` should be moved to configurarion file of each model
+        self._transforms = [Filter(reserved_keys=['input', 'target'])]
+
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """process the raw input data
+
+        Args:
+            data Dict[str, Any]
+
+        Returns:
+            Dict[str, Any]: the preprocessed data
+        """
+        for t in self._transforms:
+            data = t(data)
+
+        return data
+
+
+@PREPROCESSORS.register_module(
+    Fields.cv,
+    module_name=Preprocessors.image_portrait_enhancement_preprocessor)
+class ImagePortraitEnhancementPreprocessor(Preprocessor):
+    def __init__(self, model_dir: str, *args, **kwargs):
+        """
+
+        Args:
+            model_dir (str): model path
+        """
+        super().__init__(*args, **kwargs)
+        self.model_dir: str = model_dir
+
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """process the raw input data
+
+        Args:
+            data Dict[str, Any]
+
+        Returns:
+            Dict[str, Any]: the preprocessed data
+        """
+        return data
+
+
+@PREPROCESSORS.register_module(
+    Fields.cv,
+    module_name=Preprocessors.image_instance_segmentation_preprocessor)
+class ImageInstanceSegmentationPreprocessor(Preprocessor):
+    def __init__(self, *args, **kwargs):
+        """image instance segmentation preprocessor in the fine-tune scenario
+        """
+
+        super().__init__(*args, **kwargs)
+
+        self.training = kwargs.pop('training', True)
+        self.preprocessor_train_cfg = kwargs.pop('train', None)
+        self.preprocessor_test_cfg = kwargs.pop('val', None)
+
+        self.train_transforms = []
+        self.test_transforms = []
+
+        from modelscope.models.cv.image_instance_segmentation.datasets import \
+            build_preprocess_transform
+
+        if self.preprocessor_train_cfg is not None:
+            if isinstance(self.preprocessor_train_cfg, dict):
+                self.preprocessor_train_cfg = [self.preprocessor_train_cfg]
+            for cfg in self.preprocessor_train_cfg:
+                transform = build_preprocess_transform(cfg)
+                self.train_transforms.append(transform)
+
+        if self.preprocessor_test_cfg is not None:
+            if isinstance(self.preprocessor_test_cfg, dict):
+                self.preprocessor_test_cfg = [self.preprocessor_test_cfg]
+            for cfg in self.preprocessor_test_cfg:
+                transform = build_preprocess_transform(cfg)
+                self.test_transforms.append(transform)
+
+    def train(self):
+        self.training = True
+        return
+
+    def eval(self):
+        self.training = False
+        return
+
+    @type_assert(object, object)
+    def __call__(self, results: Dict[str, Any]):
+        """process the raw input data
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            Dict[str, Any] | None: the preprocessed data
+        """
+
+        if self.training:
+            transforms = self.train_transforms
+        else:
+            transforms = self.test_transforms
+
+        for t in transforms:
+
+            results = t(results)
+
+            if results is None:
+                return None
+
+        return results
+
+
+@PREPROCESSORS.register_module(
+    Fields.cv, module_name=Preprocessors.video_summarization_preprocessor)
+class VideoSummarizationPreprocessor(Preprocessor):
+    def __init__(self, model_dir: str, *args, **kwargs):
+        """
+
+        Args:
+            model_dir (str): model path
+        """
+        super().__init__(*args, **kwargs)
+        self.model_dir: str = model_dir
+
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """process the raw input data
+
+        Args:
+            data Dict[str, Any]
+
+        Returns:
+            Dict[str, Any]: the preprocessed data
+        """
+        return data
+
+
+@PREPROCESSORS.register_module(
+    Fields.cv,
+    module_name=Preprocessors.image_classification_bypass_preprocessor)
+class ImageClassificationBypassPreprocessor(Preprocessor):
+    def __init__(self, *args, **kwargs):
+        """image classification bypass preprocessor in the fine-tune scenario
+        """
+        super().__init__(*args, **kwargs)
+
+        self.training = kwargs.pop('training', True)
+        self.preprocessor_train_cfg = kwargs.pop('train', None)
+        self.preprocessor_val_cfg = kwargs.pop('val', None)
+
+    def train(self):
+        self.training = True
+        return
+
+    def eval(self):
+        self.training = False
+        return
+
+    def __call__(self, results: Dict[str, Any]):
+        """process the raw input data
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            Dict[str, Any] | None: the preprocessed data
+        """
+        pass
diff --git a/modelscope/trainers/__init__.py b/modelscope/trainers/__init__.py
new file mode 100644
index 0000000..dd49e9d
--- /dev/null
+++ b/modelscope/trainers/__init__.py
@@ -0,0 +1,25 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .builder import build_trainer
+    from .trainer import EpochBasedTrainer
+
+else:
+    _import_structure = {
+        'base': ['DummyTrainer'],
+        'builder': ['build_trainer'],
+        'trainer': ['EpochBasedTrainer']
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/trainers/base.py b/modelscope/trainers/base.py
new file mode 100644
index 0000000..6420da7
--- /dev/null
+++ b/modelscope/trainers/base.py
@@ -0,0 +1,107 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import time
+from abc import ABC, abstractmethod
+from typing import Callable, Dict, List, Optional, Tuple, Union
+
+from modelscope.hub.check_model import check_local_model_is_latest
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.trainers.builder import TRAINERS
+from modelscope.utils.config import Config
+from modelscope.utils.constant import Invoke
+
+from .utils.log_buffer import LogBuffer
+
+
+class BaseTrainer(ABC):
+    """ Base class for trainer which can not be instantiated.
+
+    BaseTrainer defines necessary interface
+    and provide default implementation for basic initialization
+    such as parsing config file and parsing commandline args.
+    """
+    def __init__(self, cfg_file: str, arg_parse_fn: Optional[Callable] = None):
+        """ Trainer basic init, should be called in derived class
+
+        Args:
+            cfg_file: Path to configuration file.
+            arg_parse_fn: Same as ``parse_fn`` in :obj:`Config.to_args`.
+        """
+        self.cfg = Config.from_file(cfg_file)
+        if arg_parse_fn:
+            self.args = self.cfg.to_args(arg_parse_fn)
+        else:
+            self.args = None
+        self.log_buffer = LogBuffer()
+        self.visualization_buffer = LogBuffer()
+        self.timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
+
+    def get_or_download_model_dir(self, model, model_revision=None):
+        if os.path.exists(model):
+            model_cache_dir = model if os.path.isdir(
+                model) else os.path.dirname(model)
+            check_local_model_is_latest(
+                model_cache_dir, user_agent={Invoke.KEY: Invoke.LOCAL_TRAINER})
+        else:
+            model_cache_dir = snapshot_download(
+                model,
+                revision=model_revision,
+                user_agent={Invoke.KEY: Invoke.TRAINER})
+        return model_cache_dir
+
+    @abstractmethod
+    def train(self, *args, **kwargs):
+        """ Train (and evaluate) process
+
+        Train process should be implemented for specific task or
+        model, related parameters have been initialized in
+        ``BaseTrainer.__init__`` and should be used in this function
+        """
+        pass
+
+    @abstractmethod
+    def evaluate(self, checkpoint_path: str, *args,
+                 **kwargs) -> Dict[str, float]:
+        """ Evaluation process
+
+        Evaluation process should be implemented for specific task or
+        model, related parameters have been initialized in
+        ``BaseTrainer.__init__`` and should be used in this function
+        """
+        pass
+
+
+@TRAINERS.register_module(module_name='dummy')
+class DummyTrainer(BaseTrainer):
+    def __init__(self, cfg_file: str, *args, **kwargs):
+        """ Dummy Trainer.
+
+        Args:
+            cfg_file: Path to configuration file.
+        """
+        super().__init__(cfg_file)
+
+    def train(self, *args, **kwargs):
+        """ Train (and evaluate) process
+
+        Train process should be implemented for specific task or
+        model, related parameters have been initialized in
+        ``BaseTrainer.__init__`` and should be used in this function
+        """
+        cfg = self.cfg.train
+        print(f'train cfg {cfg}')
+
+    def evaluate(self,
+                 checkpoint_path: str = None,
+                 *args,
+                 **kwargs) -> Dict[str, float]:
+        """ Evaluation process
+
+        Evaluation process should be implemented for specific task or
+        model, related parameters have been initialized in
+        ``BaseTrainer.__init__`` and should be used in this function
+        """
+        cfg = self.cfg.evaluation
+        print(f'eval cfg {cfg}')
+        print(f'checkpoint_path {checkpoint_path}')
diff --git a/modelscope/trainers/builder.py b/modelscope/trainers/builder.py
new file mode 100644
index 0000000..387024a
--- /dev/null
+++ b/modelscope/trainers/builder.py
@@ -0,0 +1,19 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from modelscope.metainfo import Trainers
+from modelscope.utils.config import ConfigDict
+from modelscope.utils.constant import Tasks
+from modelscope.utils.registry import Registry, build_from_cfg
+
+TRAINERS = Registry('trainers')
+
+
+def build_trainer(name: str = Trainers.default, default_args: dict = None):
+    """ build trainer given a trainer name
+
+    Args:
+        name (str, optional):  Trainer name, if None, default trainer
+            will be used.
+        default_args (dict, optional): Default initialization arguments.
+    """
+    cfg = dict(type=name)
+    return build_from_cfg(cfg, TRAINERS, default_args=default_args)
diff --git a/modelscope/trainers/cv/__init__.py b/modelscope/trainers/cv/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/trainers/cv/face_detection_scrfd_trainer.py b/modelscope/trainers/cv/face_detection_scrfd_trainer.py
new file mode 100644
index 0000000..b157fd8
--- /dev/null
+++ b/modelscope/trainers/cv/face_detection_scrfd_trainer.py
@@ -0,0 +1,152 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import copy
+import os
+import os.path as osp
+import time
+from typing import Callable, Dict, Optional
+
+from modelscope.metainfo import Trainers
+from modelscope.trainers.base import BaseTrainer
+from modelscope.trainers.builder import TRAINERS
+
+
+@TRAINERS.register_module(module_name=Trainers.face_detection_scrfd)
+class FaceDetectionScrfdTrainer(BaseTrainer):
+    def __init__(self,
+                 cfg_file: str,
+                 cfg_modify_fn: Optional[Callable] = None,
+                 *args,
+                 **kwargs):
+        """ High-level finetune api for SCRFD.
+
+        Args:
+            cfg_file: Path to configuration file.
+            cfg_modify_fn: An input fn which is used to modify the cfg read out of the file.
+        """
+        import mmcv
+        from mmcv.runner import get_dist_info, init_dist
+        from mmcv.utils import get_git_hash
+        from mmdet.utils import collect_env, get_root_logger
+        from mmdet.apis import set_random_seed
+        from mmdet.models import build_detector
+        from mmdet.datasets import build_dataset
+        from mmdet import __version__
+        from modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets import RetinaFaceDataset
+        from modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines import DefaultFormatBundleV2
+        from modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines import LoadAnnotationsV2
+        from modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines import RotateV2
+        from modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines import RandomSquareCrop
+        from modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones import ResNetV1e
+        from modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.dense_heads import SCRFDHead
+        from modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors import SCRFD
+        super().__init__(cfg_file)
+        cfg = self.cfg
+        if 'work_dir' in kwargs:
+            cfg.work_dir = kwargs['work_dir']
+        else:
+            # use config filename as default work_dir if work_dir is None
+            cfg.work_dir = osp.join('./work_dirs',
+                                    osp.splitext(osp.basename(cfg_file))[0])
+        mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
+
+        if 'resume_from' in kwargs:  # pretrain model for finetune
+            cfg.resume_from = kwargs['resume_from']
+        cfg.device = 'cuda'
+        if 'gpu_ids' in kwargs:
+            cfg.gpu_ids = kwargs['gpu_ids']
+        else:
+            cfg.gpu_ids = range(1)
+        labelfile_name = kwargs.pop('labelfile_name', 'labelv2.txt')
+        imgdir_name = kwargs.pop('imgdir_name', 'images/')
+        if 'train_root' in kwargs:
+            cfg.data.train.ann_file = kwargs['train_root'] + labelfile_name
+            cfg.data.train.img_prefix = kwargs['train_root'] + imgdir_name
+        if 'val_root' in kwargs:
+            cfg.data.val.ann_file = kwargs['val_root'] + labelfile_name
+            cfg.data.val.img_prefix = kwargs['val_root'] + imgdir_name
+        if 'total_epochs' in kwargs:
+            cfg.total_epochs = kwargs['total_epochs']
+        if cfg_modify_fn is not None:
+            cfg = cfg_modify_fn(cfg)
+        if 'launcher' in kwargs:
+            distributed = True
+            init_dist(kwargs['launcher'], **cfg.dist_params)
+            # re-set gpu_ids with distributed training mode
+            _, world_size = get_dist_info()
+            cfg.gpu_ids = range(world_size)
+        else:
+            distributed = False
+        # no_validate=True will not evaluate checkpoint during training
+        cfg.no_validate = kwargs.get('no_validate', False)
+        # init the logger before other steps
+        timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
+        log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
+        logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
+        # init the meta dict to record some important information such as
+        # environment info and seed, which will be logged
+        meta = dict()
+        # log env info
+        env_info_dict = collect_env()
+        env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
+        dash_line = '-' * 60 + '\n'
+        logger.info('Environment info:\n' + dash_line + env_info + '\n' +
+                    dash_line)
+        meta['env_info'] = env_info
+        meta['config'] = cfg.pretty_text
+        # log some basic info
+        logger.info(f'Distributed training: {distributed}')
+        logger.info(f'Config:\n{cfg.pretty_text}')
+
+        # set random seeds
+        if 'seed' in kwargs:
+            cfg.seed = kwargs['seed']
+            _deterministic = kwargs.get('deterministic', False)
+            logger.info(f'Set random seed to {kwargs["seed"]}, '
+                        f'deterministic: {_deterministic}')
+            set_random_seed(kwargs['seed'], deterministic=_deterministic)
+        else:
+            cfg.seed = None
+        meta['seed'] = cfg.seed
+        meta['exp_name'] = osp.basename(cfg_file)
+
+        model = build_detector(cfg.model)
+        model.init_weights()
+        datasets = [build_dataset(cfg.data.train)]
+        if len(cfg.workflow) == 2:
+            val_dataset = copy.deepcopy(cfg.data.val)
+            val_dataset.pipeline = cfg.data.train.pipeline
+            datasets.append(build_dataset(val_dataset))
+        if cfg.checkpoint_config is not None:
+            # save mmdet version, config file content and class names in
+            # checkpoints as meta data
+            cfg.checkpoint_config.meta = dict(mmdet_version=__version__ +
+                                              get_git_hash()[:7],
+                                              CLASSES=datasets[0].CLASSES)
+        # add an attribute for visualization convenience
+        model.CLASSES = datasets[0].CLASSES
+
+        self.cfg = cfg
+        self.datasets = datasets
+        self.model = model
+        self.distributed = distributed
+        self.timestamp = timestamp
+        self.meta = meta
+        self.logger = logger
+
+    def train(self, *args, **kwargs):
+        from mmdet.apis import train_detector
+        train_detector(self.model,
+                       self.datasets,
+                       self.cfg,
+                       distributed=self.distributed,
+                       validate=(not self.cfg.no_validate),
+                       timestamp=self.timestamp,
+                       meta=self.meta)
+
+    def evaluate(self,
+                 checkpoint_path: str = None,
+                 *args,
+                 **kwargs) -> Dict[str, float]:
+        cfg = self.cfg.evaluation
+        logger.info(f'eval cfg {cfg}')
+        logger.info(f'checkpoint_path {checkpoint_path}')
diff --git a/modelscope/trainers/default_config.py b/modelscope/trainers/default_config.py
new file mode 100644
index 0000000..7619633
--- /dev/null
+++ b/modelscope/trainers/default_config.py
@@ -0,0 +1,97 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from typing import Dict, List, Optional
+
+from modelscope.utils.config import Config
+
+DEFAULT_CONFIG = Config({
+    'framework': 'pytorch',
+    'train': {
+        'work_dir': '/tmp',
+        'max_epochs': 10,
+        'dataloader': {
+            'batch_size_per_gpu': 16,
+            'workers_per_gpu': 0
+        },
+        'optimizer': {
+            'type': 'SGD',
+            'lr': 1e-3
+        },
+        'lr_scheduler': {
+            'type': 'StepLR',
+            'step_size': 2
+        },
+        'hooks': [{
+            'type': 'CheckpointHook',
+            'interval': 1
+        }]
+    },
+    'evaluation': {
+        'dataloader': {
+            'batch_size_per_gpu': 16,
+            'workers_per_gpu': 0,
+            'shuffle': False
+        },
+    }
+})
+
+DEFAULT_HOOKS_CONFIG = {
+    'train': {
+        'hooks': [{
+            'type': 'CheckpointHook',
+            'interval': 1
+        }, {
+            'type': 'TextLoggerHook',
+            'interval': 10
+        }, {
+            'type': 'IterTimerHook'
+        }]
+    }
+}
+
+
+def merge_cfg(cfg: Config):
+    """Merge the default config into the input cfg.
+
+    This function will pop the default CheckpointHook when the BestCkptSaverHook exists in the input cfg.
+
+    Aegs:
+        cfg: The input cfg to be merged into.
+    """
+    cfg.merge_from_dict(DEFAULT_HOOKS_CONFIG, force=False)
+
+
+def merge_hooks(cfg: Config) -> List[Dict]:
+    key_chain_hook_map = {
+        'train.logging': 'TextLoggerHook',
+        'train.checkpoint.period': 'CheckpointHook',
+        'train.checkpoint.best': 'BestCkptSaverHook',
+        'evaluation.period': 'EvaluationHook'
+    }
+    hooks = cfg.train.hooks.copy()
+    for key_chain, hook_type in key_chain_hook_map.items():
+        hook = _key_chain_to_hook(cfg, key_chain, hook_type)
+        if hook is not None:
+            hooks.append(hook)
+    return hooks
+
+
+def _key_chain_to_hook(cfg: Config, key_chain: str,
+                       hook_type: str) -> Optional[Dict]:
+    if not _check_basic_hook(cfg, key_chain, hook_type):
+        return None
+    hook_params: Dict = cfg.safe_get(key_chain)
+    hook = {'type': hook_type}
+    hook.update(hook_params)
+    return hook
+
+
+def _check_basic_hook(cfg: Config, key_chain: str, hook_type: str) -> bool:
+    if cfg.safe_get(key_chain) is None:
+        return False
+    hooks = list(
+        filter(lambda hook: hook['type'] == hook_type, cfg.train.hooks))
+    assert len(hooks) == 0, f'The key_chain {key_chain} and the traditional hook ' \
+                            f'cannot exist at the same time, ' \
+                            f'please delete {hook_type} in the configuration file.'
+    return True
diff --git a/modelscope/trainers/hooks/__init__.py b/modelscope/trainers/hooks/__init__.py
new file mode 100644
index 0000000..51677f2
--- /dev/null
+++ b/modelscope/trainers/hooks/__init__.py
@@ -0,0 +1,46 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .builder import HOOKS, build_hook
+    from .checkpoint_hook import BestCkptSaverHook, CheckpointHook, LoadCheckpointHook
+    from .early_stop_hook import EarlyStopHook
+    from .compression import SparsityHook
+    from .evaluation_hook import EvaluationHook
+    from .hook import Hook
+    from .iter_timer_hook import IterTimerHook
+    from .logger import TensorboardHook, TextLoggerHook
+    from .lr_scheduler_hook import LrSchedulerHook
+    from .optimizer import (ApexAMPOptimizerHook, NoneOptimizerHook,
+                            OptimizerHook, TorchAMPOptimizerHook)
+    from .priority import Priority, get_priority
+
+else:
+    _import_structure = {
+        'builder': ['HOOKS', 'build_hook'],
+        'checkpoint_hook':
+        ['BestCkptSaverHook', 'CheckpointHook', 'LoadCheckpointHook'],
+        'compression': ['SparsityHook'],
+        'evaluation_hook': ['EvaluationHook'],
+        'hook': ['Hook'],
+        'iter_timer_hook': ['IterTimerHook'],
+        'logger': ['TensorboardHook', 'TextLoggerHook'],
+        'lr_scheduler_hook': ['LrSchedulerHook', 'NoneLrSchedulerHook'],
+        'optimizer': [
+            'ApexAMPOptimizerHook', 'NoneOptimizerHook', 'OptimizerHook',
+            'TorchAMPOptimizerHook'
+        ],
+        'priority': ['Priority', 'get']
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/trainers/hooks/builder.py b/modelscope/trainers/hooks/builder.py
new file mode 100644
index 0000000..90e84c8
--- /dev/null
+++ b/modelscope/trainers/hooks/builder.py
@@ -0,0 +1,11 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from modelscope.utils.registry import Registry, build_from_cfg, default_group
+
+HOOKS = Registry('hooks')
+
+
+def build_hook(cfg, default_args=None):
+    return build_from_cfg(cfg,
+                          HOOKS,
+                          group_key=default_group,
+                          default_args=default_args)
diff --git a/modelscope/trainers/hooks/checkpoint_hook.py b/modelscope/trainers/hooks/checkpoint_hook.py
new file mode 100644
index 0000000..ccf16cf
--- /dev/null
+++ b/modelscope/trainers/hooks/checkpoint_hook.py
@@ -0,0 +1,506 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import importlib
+import os
+import random
+
+import numpy as np
+import torch
+
+from modelscope import __version__
+from modelscope.metainfo import Hooks, Pipelines
+from modelscope.utils.checkpoint import (load_checkpoint, save_checkpoint,
+                                         save_configuration)
+from modelscope.utils.constant import LogKeys, ModelFile
+from modelscope.utils.logger import get_logger
+from modelscope.utils.torch_utils import is_master
+
+from .builder import HOOKS
+from .hook import Hook
+from .priority import Priority
+
+
+@HOOKS.register_module(module_name=Hooks.CheckpointHook)
+class CheckpointHook(Hook):
+    """Save checkpoints periodically.
+
+    Args:
+        interval (int): The frequency to save model. If `by_epoch=True`,
+            it means the number of epochs, else means the number of iterations
+        by_epoch (bool): Saving checkpoints by epoch or by iteration.
+        save_optimizer (bool): Whether to save optimizer state dict.  Default: True.
+        save_dir (str): The directory to save checkpoints. If is None, use `trainer.work_dir`
+        output_sub_dir (str): The sub folder under the `save_dir` to save the output checkpoint for inference.
+            Default 'output'.
+        save_last (bool): Whether to save the last checkpoint. Default: True.
+        max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything.
+            If the number exceeding the limit, earlier checkpoints will be deleted first.
+    """
+
+    PRIORITY = Priority.LOW
+
+    def __init__(self,
+                 interval=0,
+                 by_epoch=True,
+                 save_optimizer=True,
+                 save_dir=None,
+                 output_sub_dir=ModelFile.TRAIN_OUTPUT_DIR,
+                 save_last=True,
+                 max_checkpoint_num=None,
+                 **kwargs):
+        self.interval = interval
+        self.by_epoch = by_epoch
+        self.save_optimizer = save_optimizer
+        self.save_dir = save_dir
+        self.output_sub_dir = output_sub_dir
+        self.save_last = save_last
+        self.rng_state = None
+        self.max_checkpoint_num = None
+        if max_checkpoint_num is not None:
+            self.max_checkpoint_num = max(int(max_checkpoint_num), 1)
+        self.history_checkpoints = []
+
+    def before_run(self, trainer):
+        if not self.save_dir:
+            self.save_dir = trainer.work_dir
+
+        if not os.path.exists(self.save_dir) and is_master():
+            os.makedirs(self.save_dir)
+
+        if not hasattr(trainer, 'logger'):
+            self.logger = get_logger()
+        else:
+            self.logger = trainer.logger
+
+        if is_master():
+            self.logger.info(f'Checkpoints will be saved to {self.save_dir}')
+
+    def after_train_epoch(self, trainer):
+        if not self.by_epoch:
+            return
+
+        if self._should_save(trainer):
+            if is_master() or trainer.cfg.model.get('model_parallel_size',
+                                                    1) != 1:
+                self.logger.info(
+                    f'Saving checkpoint at {trainer.epoch + 1} epoch')
+                self._save_checkpoint(trainer)
+
+    def _save_checkpoint(self, trainer):
+        if self.by_epoch:
+            cur_save_name = os.path.join(
+                self.save_dir, f'{LogKeys.EPOCH}_{trainer.epoch + 1}.pth')
+        else:
+            cur_save_name = os.path.join(
+                self.save_dir, f'{LogKeys.ITER}_{trainer.iter + 1}.pth')
+        cur_save_name = extend_save_name_for_parallel(cur_save_name)
+
+        self.rng_state = {
+            'random': random.getstate(),
+            'numpy': np.random.get_state(),
+            'cpu': torch.random.get_rng_state(),
+            'cuda': torch.cuda.get_rng_state_all(),
+        }
+        meta = {
+            'epoch': trainer.epoch,
+            'iter': trainer.iter + 1,
+            'inner_iter': trainer.inner_iter + 1,
+            'rng_state': self.rng_state,
+        }
+
+        i = 0
+        for hook in trainer.hooks:
+            if hasattr(hook, 'state_dict') and getattr(hook, '_should_save',
+                                                       True):
+                meta[f'{hook.__class__}-{i}'] = hook.state_dict()
+                i += 1
+
+        save_checkpoint(trainer.model,
+                        cur_save_name,
+                        trainer.optimizer,
+                        trainer.lr_scheduler,
+                        meta=meta)
+        if (self.is_last_epoch(trainer)
+                and self.by_epoch) or (self.is_last_iter(trainer)
+                                       and not self.by_epoch):
+            self._save_pretrained(trainer)
+
+        self.history_checkpoints.append(cur_save_name)
+        self.remove_obsolete_checkpoints()
+
+    def remove_obsolete_checkpoints(self):
+        if self.max_checkpoint_num is not None and \
+                len(self.history_checkpoints) > self.max_checkpoint_num:
+            history_checkpoints = [ckpt for ckpt in self.history_checkpoints]
+            self.history_checkpoints.clear()
+            for i, ckpt_file in enumerate(history_checkpoints):
+                if i < len(history_checkpoints) - self.max_checkpoint_num:
+                    if os.path.isfile(ckpt_file):
+                        os.remove(ckpt_file)
+                else:
+                    self.history_checkpoints.append(ckpt_file)
+
+    def _save_pretrained(self, trainer):
+        output_dir = os.path.join(self.save_dir, self.output_sub_dir)
+        from modelscope.trainers.parallel.utils import is_parallel
+
+        if is_parallel(trainer.model):
+            model = trainer.model.module
+        else:
+            model = trainer.model
+
+        config = trainer.cfg.to_dict()
+        # override pipeline by tasks name after finetune done,
+        # avoid case like fill mask pipeline with a text cls task
+        if config['task'] in [
+                getattr(Pipelines, attr) for attr in dir(Pipelines)
+                if not attr.startswith('__')
+        ]:
+            # TODO a temp fix to avoid pipeline_name and task mismatch
+            config['pipeline'] = {'type': config['task']}
+
+        # remove parallel module that is not JSON serializable
+        if 'parallel' in config and 'module' in config['parallel']:
+            del config['parallel']['module']
+
+        class SaveConfig:
+            def __init__(self, output_dir, config):
+                self.output_dir = output_dir
+                self.config = config
+
+            def __call__(self, _output_dir, _config):
+                self.config = _config
+
+            def save_config(self):
+                save_configuration(self.output_dir, self.config)
+
+        save_config_fn = SaveConfig(output_dir, config)
+
+        if hasattr(model, 'save_pretrained'):
+            # Now support two binary files: pytorch_model.bin and pytorch_model.pt
+            default_bin_file = ModelFile.TORCH_MODEL_BIN_FILE
+            if hasattr(
+                    model,
+                    'model_dir') and ModelFile.TORCH_MODEL_FILE in os.listdir(
+                        model.model_dir):
+                default_bin_file = ModelFile.TORCH_MODEL_FILE
+            model.save_pretrained(output_dir,
+                                  default_bin_file,
+                                  save_function=save_checkpoint,
+                                  config=save_config_fn.config,
+                                  save_config_function=save_config_fn,
+                                  with_meta=False)
+        if trainer.train_preprocessor is not None:
+            trainer.train_preprocessor.save_pretrained(
+                output_dir,
+                save_config_fn.config,
+                save_config_function=save_config_fn)
+        if trainer.eval_preprocessor is not None:
+            trainer.eval_preprocessor.save_pretrained(
+                output_dir,
+                save_config_fn.config,
+                save_config_function=save_config_fn)
+        save_config_fn.save_config()
+
+    def after_train_iter(self, trainer):
+        if self.by_epoch:
+            return
+
+        if self._should_save(trainer):
+            if is_master() or trainer.cfg.model.get('model_parallel_size',
+                                                    1) != 1:
+                self.logger.info(
+                    f'Saving checkpoint at {trainer.iter + 1} iterations')
+                self._save_checkpoint(trainer)
+
+    def _should_save(self, trainer):
+        if self.by_epoch:
+            check_last = self.is_last_epoch
+            check_frequency = self.every_n_epochs
+        else:
+            check_last = self.is_last_iter
+            check_frequency = self.every_n_iters
+
+        if check_frequency(trainer,
+                           self.interval) or (self.save_last
+                                              and check_last(trainer)):
+            return True
+        return False
+
+
+@HOOKS.register_module(module_name=Hooks.BestCkptSaverHook)
+class BestCkptSaverHook(CheckpointHook):
+    """
+    Save best checkpoints hook.
+
+    Args:
+        metric_key (str): Metric key to compare rule for best score.
+        rule (str): Comparison rule for best score. Support "max" and "min". If rule is "max", the checkpoint
+            at the maximum `metric_key` will be saved, If rule is "min", the checkpoint at the minimum `metric_key`
+            will be saved.
+        by_epoch (bool): Save best checkpoints by epoch or by iteration.
+        save_optimizer (bool): Whether to save optimizer state dict.  Default: True.
+        save_dir (str): Output directory to save best checkpoint.
+        output_sub_dir (str): The sub folder under the `save_dir` to save the output checkpoint for inference.
+            Default 'output_best'.
+        restore_best (bool): Whether to restore the best checkpoint after training.
+        max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything.
+            If the number exceeding the limit, checkpoints with worse metric will be deleted, which is judged by the
+            `rule` and `metric_key` arguments.
+    """
+
+    PRIORITY = Priority.LOW
+    rule_map = {'max': lambda x, y: x > y, 'min': lambda x, y: x < y}
+
+    def __init__(self,
+                 metric_key,
+                 rule='max',
+                 by_epoch=True,
+                 save_optimizer=True,
+                 save_dir=None,
+                 output_sub_dir=ModelFile.TRAIN_BEST_OUTPUT_DIR,
+                 save_file_name=None,
+                 restore_best=False,
+                 max_checkpoint_num=1,
+                 interval=0,
+                 **kwargs):
+        assert rule in ['max', 'min'], 'Only support "max" or "min" rule now.'
+        super().__init__(
+            interval=interval,
+            by_epoch=by_epoch,
+            save_optimizer=save_optimizer,
+            save_dir=save_dir,
+            output_sub_dir=output_sub_dir,
+            max_checkpoint_num=max_checkpoint_num,
+            **kwargs,
+        )
+        self.metric_key = metric_key
+        self.rule = rule
+        self._best_metric = None
+        self._best_ckpt_file = None
+        self.save_file_name = save_file_name
+        self.restore_best = restore_best
+        self.history_checkpoints = set()
+
+    def _should_save(self, trainer):
+        return self._is_best_metric(trainer.metric_values)
+
+    def _is_best_metric(self, metric_values):
+        if metric_values is None:
+            return False
+
+        if self.metric_key not in metric_values:
+            raise ValueError(
+                f'Not find metric_key: {self.metric_key} in {metric_values}')
+
+        if self._best_metric is None:
+            self._best_metric = metric_values[self.metric_key]
+            return True
+        else:
+            compare_fn = self.rule_map[self.rule]
+            if compare_fn(metric_values[self.metric_key], self._best_metric):
+                self._best_metric = metric_values[self.metric_key]
+                return True
+        return False
+
+    def _save_checkpoint(self, trainer):
+        cur_save_name = self.save_file_name
+        if cur_save_name is None:
+            if self.by_epoch:
+                cur_save_name = os.path.join(
+                    self.save_dir,
+                    f'best_{LogKeys.EPOCH}{trainer.epoch + 1}_{self.metric_key}{self._best_metric}.pth'
+                )
+            else:
+                cur_save_name = os.path.join(
+                    self.save_dir,
+                    f'best_{LogKeys.ITER}{trainer.iter + 1}_{self.metric_key}{self._best_metric}.pth'
+                )
+        else:
+            if '.' not in cur_save_name:
+                cur_save_name = f'{cur_save_name}.pth'
+            cur_save_name = os.path.join(self.save_dir, cur_save_name)
+        cur_save_name = extend_save_name_for_parallel(cur_save_name)
+
+        meta = {
+            'epoch': trainer.epoch,
+            'iter': trainer.iter + 1,
+            'inner_iter': trainer.inner_iter + 1,
+            'rng_state': self.rng_state,
+        }
+
+        i = 0
+        for hook in trainer.hooks:
+            if hasattr(hook, 'state_dict') and getattr(hook, '_should_save',
+                                                       True):
+                meta[f'{hook.__class__}-{i}'] = hook.state_dict()
+                i += 1
+
+        if os.path.isfile(cur_save_name):
+            os.remove(cur_save_name)
+        save_checkpoint(trainer.model, cur_save_name, trainer.optimizer,
+                        trainer.lr_scheduler, meta)
+        self._best_ckpt_file = cur_save_name
+        self._save_pretrained(trainer)
+        self.history_checkpoints.add(cur_save_name)
+        self.remove_obsolete_checkpoints()
+
+    def remove_obsolete_checkpoints(self):
+        def extract_metric_from_filename(name1):
+            metric1 = float('.'.join(
+                name1.split(self.metric_key)[1].split('.')[:-1]))
+            if self.rule == 'max':
+                return -metric1
+            else:
+                return metric1
+
+        if self.max_checkpoint_num is not None and \
+                len(self.history_checkpoints) > self.max_checkpoint_num:
+            history_checkpoints = sorted(self.history_checkpoints,
+                                         key=extract_metric_from_filename)
+            self.history_checkpoints.clear()
+            for i, ckpt_file in enumerate(history_checkpoints):
+                if i < self.max_checkpoint_num:
+                    self.history_checkpoints.add(ckpt_file)
+                elif os.path.isfile(ckpt_file):
+                    os.remove(ckpt_file)
+
+    def state_dict(self):
+        return {
+            'best_metric': self._best_metric,
+        }
+
+    def load_state_dict(self, state_dict):
+        if state_dict is not None and len(state_dict) > 0:
+            self._best_metric = state_dict.get('best_metric')
+        else:
+            self.logger.warning(
+                'The state_dict is not available, the best metric value will be affected.'
+            )
+
+    def after_run(self, trainer):
+        if self.restore_best:
+            if is_master():
+                LoadCheckpointHook.load_checkpoint(self._best_ckpt_file,
+                                                   trainer)
+
+
+@HOOKS.register_module(module_name=Hooks.LoadCheckpointHook)
+class LoadCheckpointHook(Hook):
+    """Load a checkpoint file at the beginning of training or evaluating.
+
+    This hook does not need to be configured or saved in the config file.
+    User should use it by:
+    >>> trainer.train('some-checkpoint', load_all_state=True)
+    or
+    >>> trainer.evaluate('some-checkpoint')
+    instead.
+
+    Args:
+        checkpoint_file (str): The checkpoint file to be loaded.
+        load_all_state (bool): Load all states(optimizer, epoch, lr_scheduler, random_state, etc.) when loading old
+            training state file or not. The model's state dict will only be loaded if False.
+    """
+
+    PRIORITY = Priority.HIGH
+
+    _should_save = False
+
+    def __init__(
+        self,
+        checkpoint_file=None,
+        load_all_state=True,
+    ):
+        self.checkpoint_file = checkpoint_file
+        self.rng_state = None
+        self.need_load_rng_state = False
+        self.load_all_state = load_all_state
+
+    def before_run(self, trainer):
+        if not hasattr(trainer, 'logger'):
+            self.logger = get_logger()
+        else:
+            self.logger = trainer.logger
+
+        if self.checkpoint_file is not None and os.path.isfile(
+                self.checkpoint_file):
+            meta = self.load_checkpoint(self.checkpoint_file, trainer,
+                                        self.load_all_state)
+            self.rng_state = meta.get('rng_state')
+            self.need_load_rng_state = self.load_all_state
+
+    def before_train_iter(self, trainer):
+        if self.need_load_rng_state:
+            if self.rng_state is not None:
+                random.setstate(self.rng_state['random'])
+                np.random.set_state(self.rng_state['numpy'])
+                torch.random.set_rng_state(self.rng_state['cpu'])
+                if torch.cuda.is_available():
+                    torch.cuda.random.set_rng_state_all(self.rng_state['cuda'])
+                self.need_load_rng_state = False
+            else:
+                self.logger.warning(
+                    'Random state cannot be found in checkpoint file, '
+                    'this may cause a random data order or model initialization.'
+                )
+
+    @classmethod
+    def load_checkpoint(cls, filename, trainer, load_all_state=True):
+        from modelscope.trainers.parallel.utils import is_parallel
+        if is_parallel(trainer.model):
+            model = trainer.model.module
+        else:
+            model = trainer.model
+        meta = load_checkpoint(
+            filename, model,
+            getattr(trainer, 'optimizer', None) if load_all_state else None,
+            getattr(trainer, 'lr_scheduler', None) if load_all_state else None)
+        if load_all_state:
+            trainer._epoch = meta.get('epoch', trainer._epoch)
+            trainer._iter = meta.get('iter', trainer._iter)
+            trainer._inner_iter = meta.get('inner_iter', trainer._inner_iter)
+
+            i = 0
+            for hook in trainer.hooks:
+                if hasattr(hook, 'load_state_dict') and getattr(
+                        hook, '_should_save', True):
+                    key = f'{hook.__class__}-{i}'
+                    if key in meta:
+                        hook.load_state_dict(meta.get(key, {}))
+                    else:
+                        trainer.logger.warning(
+                            f'The state_dict of hook {hook.__class__} at index {i} is not found in the checkpoint file.'
+                        )
+                    i += 1
+
+        version = meta.get('modelscope')
+        if version != __version__:
+            trainer.logger.warning(
+                f'The modelscope version of loaded checkpoint does not match the runtime version. '
+                f'The saved version: {version}, runtime version: {__version__}'
+            )
+        trainer.logger.info(
+            f'Checkpoint {filename} saving time: {meta.get("time")}')
+        return meta
+
+
+def extend_save_name_for_parallel(cur_save_name: str) -> str:
+    """Saving model parameters during tensor parallel training
+    requires each process to save its own parameters,
+    This function will try to get the local rank of the process
+    and extend save name for multi-slice model.
+
+    Args:
+        cur_save_name (str): Original save name.
+
+    Returns:
+        str: Extended save name.
+    """
+    try:
+        mpu = importlib.import_module('megatron_util.mpu')
+        tp_world_size = mpu.get_tensor_model_parallel_world_size()
+        if tp_world_size == 1:
+            return cur_save_name
+        mp_rank = mpu.get_tensor_model_parallel_rank()
+        return cur_save_name.replace('.', '_mp_rank_{:02d}.'.format(mp_rank))
+    except (ImportError, AssertionError):
+        return cur_save_name
diff --git a/modelscope/trainers/hooks/clip_clamp_logit_scale_hook.py b/modelscope/trainers/hooks/clip_clamp_logit_scale_hook.py
new file mode 100644
index 0000000..9677041
--- /dev/null
+++ b/modelscope/trainers/hooks/clip_clamp_logit_scale_hook.py
@@ -0,0 +1,18 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import torch
+
+from modelscope.metainfo import Hooks
+from modelscope.trainers.multi_modal.clip.clip_trainer import CLIPTrainer
+
+from .builder import HOOKS
+from .hook import Hook
+
+
+@HOOKS.register_module(module_name=Hooks.ClipClampLogitScaleHook)
+class ClipClampLogitScaleHook(Hook):
+    """ClipClampLogitScaleHook hook which performs clamp on CLIP logit scale parameter after update"""
+    def after_train_iter(self, trainer: CLIPTrainer):
+        """Called after every training iter to evaluate the results."""
+        unwrapped_model = getattr(trainer.model, 'module', trainer.model)
+        logit_scale = unwrapped_model.clip_model.logit_scale
+        logit_scale.data = torch.clamp(logit_scale.data, 0, 4.6052)
diff --git a/modelscope/trainers/hooks/compression/__init__.py b/modelscope/trainers/hooks/compression/__init__.py
new file mode 100644
index 0000000..f755b2c
--- /dev/null
+++ b/modelscope/trainers/hooks/compression/__init__.py
@@ -0,0 +1,24 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .sparsity_hook import SparsityHook
+    from .utils import SparseLinear, convert_sparse_network
+
+else:
+    _import_structure = {
+        'sparsity_hook': ['SparsityHook'],
+        'utils': ['convert_sparse_network', 'SparseLinear'],
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/trainers/hooks/compression/sparsity_hook.py b/modelscope/trainers/hooks/compression/sparsity_hook.py
new file mode 100644
index 0000000..993488d
--- /dev/null
+++ b/modelscope/trainers/hooks/compression/sparsity_hook.py
@@ -0,0 +1,131 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+
+from modelscope import __version__
+from modelscope.metainfo import Hooks
+from modelscope.trainers.hooks.builder import HOOKS
+from modelscope.trainers.hooks.hook import Hook
+from modelscope.trainers.hooks.priority import Priority
+from modelscope.utils.checkpoint import save_checkpoint
+from modelscope.utils.torch_utils import is_master
+
+
+@HOOKS.register_module(module_name=Hooks.SparsityHook)
+class SparsityHook(Hook):
+
+    PRIORITY = Priority.HIGHEST
+
+    def __init__(self, pruning_method, config={}, save_dir=None):
+        self.pruning_method = pruning_method
+        self.save_dir = save_dir
+
+        self.compress_module = config.get('compress_module', [])
+        self.weight_rank = config.get('weight_rank', 8)
+        self.weight_beta = config.get('weight_beta', 1)
+        self.mask_rank = config.get('mask_rank', 8)
+        self.mask_alpha1 = config.get('mask_alpha1', 1)
+        self.mask_alpha2 = config.get('mask_alpha2', 1)
+
+        self.step = 0
+        self.total_step = 0
+        self.frequency = config.get('frequency', 1)
+        self.initial_warmup = config.get('initial_warmup', 0.1)
+        self.final_warmup = config.get('final_warmup', 0.3)
+        self.initial_sparsity = config.get('initial_sparsity', 0.0)
+        self.final_sparsity = config.get('final_sparsity', 0.0)
+
+    def before_run(self, trainer):
+        import torch
+
+        from .utils import SparseLinear, convert_sparse_network
+
+        if self.save_dir is None:
+            self.save_dir = trainer.work_dir
+
+        if len(self.compress_module) == 0:
+            convert_sparse_network(
+                trainer.model,
+                pruning_method=self.pruning_method,
+                weight_rank=self.weight_rank,
+                weight_beta=self.weight_beta,
+                mask_rank=self.mask_rank,
+                mask_alpha1=self.mask_alpha1,
+                mask_alpha2=self.mask_alpha2,
+                logger=trainer.logger,
+            )
+        else:
+            for cm in self.compress_module:
+                for name, module in trainer.model.named_modules():
+                    if name != cm:
+                        continue
+                    convert_sparse_network(
+                        module,
+                        pruning_method=self.pruning_method,
+                        weight_rank=self.weight_rank,
+                        weight_beta=self.weight_beta,
+                        mask_rank=self.mask_rank,
+                        mask_alpha1=self.mask_alpha1,
+                        mask_alpha2=self.mask_alpha2,
+                        logger=trainer.logger,
+                    )
+
+        for i in range(len(trainer.optimizer.param_groups)):
+            new_train_params = []
+            for param in trainer.optimizer.param_groups[i]['params']:
+                is_find = False
+                for name, module in trainer.model.named_modules():
+                    if isinstance(module, SparseLinear):
+                        if torch.equal(param.half(),
+                                       module.weight.data.half()):
+                            is_find = True
+                            break
+
+                if not is_find:
+                    new_train_params.append(param)
+
+            trainer.optimizer.param_groups[i]['params'] = new_train_params
+
+        new_params = []
+        for name, module in trainer.model.named_modules():
+            if isinstance(module, SparseLinear):
+                new_params.extend(
+                    [p for p in module.parameters() if p.requires_grad])
+
+        trainer.optimizer.add_param_group({'params': new_params})
+
+        self.total_step = trainer.iters_per_epoch * trainer._max_epochs
+
+    def before_train_iter(self, trainer):
+        from .utils import schedule_sparsity_ratio, update_network_sparsity
+
+        cur_sparsity = schedule_sparsity_ratio(
+            self.step,
+            self.total_step,
+            self.frequency,
+            self.initial_warmup,
+            self.final_warmup,
+            self.initial_sparsity,
+            self.final_sparsity,
+        )
+
+        update_network_sparsity(trainer.model, cur_sparsity)
+
+        if is_master():
+            trainer.logger.info(
+                f'Step[{self.step}/{self.total_step}] current sparsity ratio = {cur_sparsity}'
+            )
+
+        self.step += 1
+
+    def after_run(self, trainer):
+        from .utils import generate_sparse_model
+
+        generate_sparse_model(trainer.model, logger=trainer.logger)
+
+        self._save_checkpoint(trainer)
+
+    def _save_checkpoint(self, trainer):
+        if is_master():
+            trainer.logger.info('Saving checkpoint at final compress')
+        cur_save_name = os.path.join(self.save_dir, 'compress_model.pth')
+        save_checkpoint(trainer.model, cur_save_name, trainer.optimizer)
diff --git a/modelscope/trainers/hooks/compression/utils.py b/modelscope/trainers/hooks/compression/utils.py
new file mode 100644
index 0000000..767a426
--- /dev/null
+++ b/modelscope/trainers/hooks/compression/utils.py
@@ -0,0 +1,210 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import torch
+import torch.nn as nn
+
+from modelscope.utils.torch_utils import is_master
+
+
+class SparseBinarizer(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, mask_scores, sparsity):
+        num_prune = int(mask_scores.numel() * sparsity)
+        prune_indices = torch.argsort(mask_scores.reshape(-1))[:num_prune]
+        mask = mask_scores.clone().fill_(1)
+        mask.reshape(-1)[prune_indices] = 0.0
+        return mask
+
+    @staticmethod
+    def backward(ctx, gradOutput):
+        return gradOutput, None
+
+
+class SparseLinear(nn.Module):
+    """
+    Fully Connected layer with on the fly adaptive mask.
+    """
+    def __init__(
+        self,
+        module,
+        pruning_method='pst',
+        weight_rank=8,
+        weight_beta=1.0,
+        mask_rank=8,
+        mask_alpha1=1.0,
+        mask_alpha2=1.0,
+    ):
+        super(SparseLinear, self).__init__()
+        self.module = module
+        out_features = self.module.weight.shape[0]
+        in_features = self.module.weight.shape[1]
+
+        self.weight = self.module.weight
+        self.module.weight = None
+        self.module._parameters.pop('weight')
+
+        self.pruning_method = pruning_method
+
+        self.cur_sparsity = 0.0
+
+        if self.pruning_method == 'pst':
+            self.weight_rank = weight_rank
+            self.weight_beta = weight_beta
+            self.mask_rank = mask_rank
+            self.mask_alpha1 = mask_alpha1
+            self.mask_alpha2 = mask_alpha2
+
+            # create trainable params
+            self.weight_U = nn.Parameter(
+                torch.randn(out_features,
+                            self.weight_rank).to(device=self.weight.device,
+                                                 dtype=self.weight.dtype))
+            self.weight_V = nn.Parameter(
+                torch.zeros(self.weight_rank,
+                            in_features).to(device=self.weight.device,
+                                            dtype=self.weight.dtype))
+
+            self.mask_scores_A = nn.Parameter(
+                torch.randn(out_features,
+                            self.mask_rank).to(device=self.weight.device,
+                                               dtype=self.weight.dtype))
+            self.mask_scores_B = nn.Parameter(
+                torch.zeros(self.mask_rank,
+                            in_features).to(device=self.weight.device,
+                                            dtype=self.weight.dtype))
+            self.mask_scores_R = nn.Parameter(
+                torch.zeros(out_features).to(device=self.weight.device,
+                                             dtype=self.weight.dtype))
+            self.mask_scores_C = nn.Parameter(
+                torch.zeros(in_features).to(device=self.weight.device,
+                                            dtype=self.weight.dtype))
+
+            self.weight.requires_grad = False
+            if self.module.bias is not None:
+                self.module.bias.requires_grad = False
+
+    def forward(self, *inputs):
+        if self.pruning_method == 'pst':
+            weight = self.weight + self.weight_beta * self.weight_U @ self.weight_V
+            mask_scores = (
+                weight.abs() +
+                self.mask_alpha1 * self.mask_scores_A @ self.mask_scores_B +
+                self.mask_alpha2 * (self.mask_scores_R.unsqueeze(1) +
+                                    self.mask_scores_C.unsqueeze(0)))
+
+            mask = SparseBinarizer.apply(mask_scores, self.cur_sparsity)
+            masked_weight = mask * weight
+
+            self.module.weight = masked_weight
+            return self.module(*inputs)
+        else:
+            return self.module(*inputs)
+
+    def convert(self):
+        if self.pruning_method == 'pst':
+            weight = self.weight + self.weight_beta * self.weight_U @ self.weight_V
+            mask_scores = (
+                weight.abs() +
+                self.mask_alpha1 * self.mask_scores_A @ self.mask_scores_B +
+                self.mask_alpha2 * (self.mask_scores_R.unsqueeze(1) +
+                                    self.mask_scores_C.unsqueeze(0)))
+
+            mask = SparseBinarizer.apply(mask_scores, self.cur_sparsity)
+
+            masked_weight = mask * weight
+            self.module.weight = nn.Parameter(masked_weight.data)
+
+
+def _setattr(model, name, module):
+    name_list = name.split('.')
+    for name in name_list[:-1]:
+        model = getattr(model, name)
+    setattr(model, name_list[-1], module)
+
+
+def convert_sparse_network(
+    model,
+    pruning_method,
+    weight_rank,
+    weight_beta,
+    mask_rank,
+    mask_alpha1,
+    mask_alpha2,
+    logger=None,
+):
+    compress_module = [nn.Linear]
+    try:
+        from megatron_util import mpu
+        compress_module.extend(
+            [mpu.RowParallelLinear, mpu.ColumnParallelLinear])
+    except ImportError:
+        pass
+
+    for name, module in model.named_modules():
+        if type(module) in compress_module:
+            new_module = SparseLinear(
+                module,
+                pruning_method,
+                weight_rank,
+                weight_beta,
+                mask_rank,
+                mask_alpha1,
+                mask_alpha2,
+            )
+
+            # replace original module by new sparse module
+            _setattr(model, name, new_module)
+
+            if is_master():
+                if logger:
+                    logger.info(f'convert {name} to sparse module.')
+                else:
+                    print(f'convert {name} to sparse module.')
+
+
+def update_network_sparsity(model, sparsity):
+    for name, module in model.named_modules():
+        if isinstance(module, SparseLinear):
+            module.cur_sparsity = sparsity
+
+
+def schedule_sparsity_ratio(
+    step,
+    total_step,
+    frequency,
+    initial_warmup,
+    final_warmup,
+    initial_sparsity,
+    final_sparsity,
+):
+    if step <= initial_warmup * total_step:
+        sparsity = initial_sparsity
+    elif step > (total_step - final_warmup * total_step):
+        sparsity = final_sparsity
+    else:
+        spars_warmup_steps = initial_warmup * total_step
+        spars_schedu_steps = (final_warmup + initial_warmup) * total_step
+        step = (step - spars_warmup_steps) // frequency * frequency
+        mul_coeff = 1 - step / (total_step - spars_schedu_steps)
+        sparsity = final_sparsity + (initial_sparsity -
+                                     final_sparsity) * (mul_coeff**3)
+    return sparsity
+
+
+def generate_sparse_model(model, logger=None):
+    # generate sparse weight for saving
+    for name, module in model.named_modules():
+        if isinstance(module, SparseLinear):
+            module.convert()
+
+            _setattr(model, name, module.module)
+
+            if is_master():
+                if logger:
+                    logger.info(f'convert {name} weight to sparse weight, \
+                            sparsity ratio={torch.mean(1.0*(module.module.weight==0)).item()}.'
+                                )
+                else:
+                    print(f'convert {name} weight to sparse, \
+                            sparsity ratio={torch.mean(1.0*(module.module.weight==0)).item()}.'
+                          )
diff --git a/modelscope/trainers/hooks/deepspeed_hook.py b/modelscope/trainers/hooks/deepspeed_hook.py
new file mode 100644
index 0000000..4180b8c
--- /dev/null
+++ b/modelscope/trainers/hooks/deepspeed_hook.py
@@ -0,0 +1,116 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from types import MethodType
+
+import deepspeed
+from megatron_util import mpu
+from modelscope.metainfo import Hooks
+from modelscope.trainers.hooks import (BestCkptSaverHook, CheckpointHook,
+                                       LrSchedulerHook, NoneLrSchedulerHook,
+                                       NoneOptimizerHook, OptimizerHook)
+from modelscope.trainers.lrscheduler.builder import build_lr_scheduler
+from modelscope.utils.constant import LogKeys, ModelFile
+from modelscope.utils.torch_utils import is_master
+
+from .builder import HOOKS
+from .hook import Hook
+from .priority import Priority
+
+
+@HOOKS.register_module(module_name=Hooks.DeepspeedHook)
+class DeepspeedHook(Hook):
+    PRIORITY = Priority.VERY_HIGH
+
+    def __init__(self,
+                 deepspeed_activation_checkpointing=True,
+                 save_zero_checkpoint=False,
+                 loss_key='loss'):
+        self.save_zero_checkpoint = save_zero_checkpoint
+        self.loss_key = loss_key
+        self.deepspeed_activation_checkpointing = deepspeed_activation_checkpointing
+
+    def before_run(self, trainer):
+        # deepspeed init
+        args = trainer.cfg.train
+        args.deepspeed_config = os.path.join(trainer.model_dir,
+                                             args.deepspeed_config)
+
+        trainer.model, _, _, _ = deepspeed.initialize(
+            model=trainer.model,
+            optimizer=trainer.optimizer,
+            args=args,
+            lr_scheduler=trainer.lr_scheduler,
+            mpu=mpu,
+            dist_init_required=False)
+        trainer.model.save_zero_checkpoint = self.save_zero_checkpoint
+
+        if self.deepspeed_activation_checkpointing:
+            model = trainer.model
+            while hasattr(model, 'module'):
+                model = model.module
+            deepspeed.checkpointing.configure(
+                mpu,
+                deepspeed_config=args.deepspeed_config,
+                num_checkpoints=model.config.num_hidden_layers)
+
+            mpu.checkpoint = deepspeed.checkpointing.checkpoint
+            mpu.get_cuda_rng_tracker = deepspeed.checkpointing.get_cuda_rng_tracker
+            mpu.model_parallel_cuda_manual_seed = deepspeed.checkpointing.model_parallel_cuda_manual_seed
+
+        # modify hooks
+        for i, hook in enumerate(trainer._hooks):
+            # backward & step
+            if isinstance(hook, OptimizerHook):
+                trainer._hooks[i] = NoneOptimizerHook()
+            if isinstance(hook, LrSchedulerHook):
+                trainer._hooks[i] = NoneLrSchedulerHook()
+
+            # save checkpoint
+            if isinstance(hook, CheckpointHook):
+
+                def _save_checkpoint(self, trainer):
+                    if self.by_epoch:
+                        cur_save_dir = os.path.join(
+                            self.save_dir,
+                            f'{LogKeys.EPOCH}_{trainer.epoch + 1}')
+                    else:
+                        cur_save_dir = os.path.join(
+                            self.save_dir,
+                            f'{LogKeys.ITER}_{trainer.iter + 1}')
+                    if (self.is_last_epoch(trainer)
+                            and self.by_epoch) or (self.is_last_iter(trainer)
+                                                   and not self.by_epoch):
+                        cur_save_dir = os.path.join(self.save_dir,
+                                                    ModelFile.TRAIN_OUTPUT_DIR)
+                    trainer.model.save_checkpoint(cur_save_dir)
+
+                trainer._hooks[i]._save_checkpoint = MethodType(
+                    _save_checkpoint, trainer._hooks[i])
+
+            if isinstance(hook, BestCkptSaverHook):
+
+                def _save_checkpoint(self, trainer):
+                    if self.by_epoch:
+                        cur_save_dir = os.path.join(
+                            self.save_dir,
+                            f'best_{LogKeys.EPOCH}{trainer.epoch + 1}_{self.metric_key}{self._best_metric}'
+                        )
+                    else:
+                        cur_save_dir = os.path.join(
+                            self.save_dir,
+                            f'best_{LogKeys.ITER}{trainer.iter + 1}_{self.metric_key}{self._best_metric}.pth'
+                        )
+                    trainer.model.save_checkpoint(cur_save_dir)
+                    self._best_ckpt_file = cur_save_dir
+
+                trainer._hooks[i]._save_checkpoint = MethodType(
+                    _save_checkpoint, trainer._hooks[i])
+
+    def after_train_iter(self, trainer):
+        # The `trainer.model` here is actually a deepspeed engine object.
+        # backward step
+        loss = trainer.train_outputs[self.loss_key]
+        trainer.model.backward(loss)
+
+        # update parameters
+        trainer.model.step()
diff --git a/modelscope/trainers/hooks/early_stop_hook.py b/modelscope/trainers/hooks/early_stop_hook.py
new file mode 100644
index 0000000..5222344
--- /dev/null
+++ b/modelscope/trainers/hooks/early_stop_hook.py
@@ -0,0 +1,110 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import numpy as np
+
+from modelscope.metainfo import Hooks
+from modelscope.utils.logger import get_logger
+
+from .builder import HOOKS
+from .hook import Hook
+from .priority import Priority
+
+
+@HOOKS.register_module(module_name=Hooks.EarlyStopHook)
+class EarlyStopHook(Hook):
+    """Early stop when a specific metric stops improving.
+
+    Args:
+        metric_key (str):  Metric key to be monitored.
+        rule (str): Comparison rule for best score. Support "max" and "min".
+            If rule is "max", the training will stop when `metric_key` has stopped increaing.
+            If rule is "min", the training will stop when `metric_key` has stopped decreasing.
+        patience (int): Trainer will stop if the monitored metric did not improve for the last `patience` times.
+        min_delta (float): Minimum change in the monitored metric to quailfy as an improvement.
+        check_finite (bool): If true, stops training when the metric becomes NaN or infinite.
+        by_epoch (int): Saving checkpoints by epoch or by iteration.
+        interval (int): The frequency to trigger early stop check. If `by_epoch=True`,
+            it means the number of epochs, else means the number of iterations.
+    """
+
+    PRIORITY = Priority.VERY_LOW
+    rule_map = {'max': lambda x, y: x > y, 'min': lambda x, y: x < y}
+
+    def __init__(self,
+                 metric_key: str,
+                 rule: str = 'max',
+                 patience: int = 3,
+                 min_delta: float = 0.0,
+                 check_finite: bool = True,
+                 by_epoch: bool = True,
+                 interval: int = 1):
+        self.metric_key = metric_key
+        self.rule = rule
+        self.patience = patience
+        self.min_delta = min_delta
+        self.check_finite = check_finite
+        self.by_epoch = by_epoch
+        self.interval = interval
+
+        self.wait_count = 0
+        self.best_score = float('inf') if rule == 'min' else -float('inf')
+
+    def before_run(self, trainer):
+        if not hasattr(trainer, 'logger'):
+            self.logger = get_logger()
+        else:
+            self.logger = trainer.logger
+
+    def _should_stop(self, trainer):
+        metric_values = trainer.metric_values
+
+        if metric_values is None:
+            return False
+
+        if self.metric_key not in metric_values:
+            raise ValueError(
+                f'Metric not found: {self.metric_key} not in {metric_values}')
+
+        should_stop = False
+        current_score = metric_values[self.metric_key]
+        if self.check_finite and not np.isfinite(current_score):
+            should_stop = True
+            self.logger.warning(
+                f'Metric {self.metric_key} = {current_score} is not finite. '
+                f'Previous best metric: {self.best_score:.4f}.')
+        elif self.rule_map[self.rule](current_score - self.min_delta,
+                                      self.best_score):
+            self.best_score = current_score
+            self.wait_count = 0
+        else:
+            self.wait_count += 1
+            if self.wait_count >= self.patience:
+                should_stop = True
+                self.logger.info(
+                    f'Metric {self.metric_key} did not improve in the last {self.wait_count} epochs or iterations. '
+                    f'Best score: {self.best_score:.4f}.')
+        return should_stop
+
+    def _stop_training(self, trainer):
+        self.logger.info('Early Stopping!')
+        trainer._stop_training = True
+
+    def after_train_epoch(self, trainer):
+        if not self.by_epoch:
+            return
+
+        if not self.every_n_epochs(trainer, self.interval):
+            return
+
+        if self._should_stop(trainer):
+            self._stop_training(trainer)
+
+    def after_train_iter(self, trainer):
+        if self.by_epoch:
+            return
+
+        if not self.every_n_iters(trainer, self.interval):
+            return
+
+        if self._should_stop(trainer):
+            self._stop_training(trainer)
diff --git a/modelscope/trainers/hooks/evaluation_hook.py b/modelscope/trainers/hooks/evaluation_hook.py
new file mode 100644
index 0000000..7a74f6a
--- /dev/null
+++ b/modelscope/trainers/hooks/evaluation_hook.py
@@ -0,0 +1,82 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from collections import OrderedDict
+
+from modelscope.metainfo import Hooks
+
+from .builder import HOOKS
+from .hook import Hook
+
+
+@HOOKS.register_module(module_name=Hooks.EvaluationHook)
+class EvaluationHook(Hook):
+    """
+    Evaluation hook.
+
+    Args:
+        interval (int): Evaluation interval.
+        by_epoch (bool): Evaluate by epoch or by iteration.
+        start_idx (int or None, optional): The epoch or iterations validation begins.
+            Default: None, validate every interval epochs/iterations from scratch.
+    """
+    def __init__(self, interval=1, by_epoch=True, start_idx=None):
+        assert interval > 0, 'interval must be a positive number'
+        self.interval = interval
+        self.start_idx = start_idx
+        self.by_epoch = by_epoch
+
+    def after_train_iter(self, trainer):
+        """Called after every training iter to evaluate the results."""
+        if not self.by_epoch and self._should_evaluate(trainer):
+            self.do_evaluate(trainer)
+
+    def after_train_epoch(self, trainer):
+        """Called after every training epoch to evaluate the results."""
+        if self.by_epoch and self._should_evaluate(trainer):
+            self.do_evaluate(trainer)
+
+    def add_visualization_info(self, trainer, results):
+        if trainer.visualization_buffer.output.get('eval_results',
+                                                   None) is None:
+            trainer.visualization_buffer.output['eval_results'] = OrderedDict()
+
+            trainer.visualization_buffer.output['eval_results'].update(
+                trainer.visualize(results))
+
+    def do_evaluate(self, trainer):
+        """Evaluate the results."""
+        eval_res = trainer.evaluate()
+        for name, val in eval_res.items():
+            trainer.log_buffer.output['evaluation/' + name] = val
+
+        trainer.log_buffer.ready = True
+
+    def _should_evaluate(self, trainer):
+        """Judge whether to perform evaluation.
+
+        Here is the rule to judge whether to perform evaluation:
+        1. It will not perform evaluation during the epoch/iteration interval,
+           which is determined by ``self.interval``.
+        2. It will not perform evaluation if the ``start_idx`` is larger than
+           current epochs/iters.
+        3. It will not perform evaluation when current epochs/iters is larger than
+           the ``start_idx`` but during epoch/iteration interval.
+
+        Returns:
+            bool: The flag indicating whether to perform evaluation.
+        """
+        if self.by_epoch:
+            current = trainer.epoch
+            check_time = self.every_n_epochs
+        else:
+            current = trainer.iter
+            check_time = self.every_n_iters
+
+        if self.start_idx is None:
+            if not check_time(trainer, self.interval):
+                return False
+        elif (current + 1) < self.start_idx:
+            return False
+        else:
+            if (current + 1 - self.start_idx) % self.interval:
+                return False
+        return True
diff --git a/modelscope/trainers/hooks/hook.py b/modelscope/trainers/hooks/hook.py
new file mode 100644
index 0000000..1508fa9
--- /dev/null
+++ b/modelscope/trainers/hooks/hook.py
@@ -0,0 +1,224 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from modelscope.utils.constant import TrainerStages
+from modelscope.utils.import_utils import is_method_overridden
+
+from .priority import Priority
+
+
+class Hook:
+    """
+    The Hook base class of any modelscope trainer. You can build your own hook inherited from this class.
+    """
+
+    stages = (TrainerStages.before_run, TrainerStages.before_train_epoch,
+              TrainerStages.before_train_iter, TrainerStages.after_train_iter,
+              TrainerStages.after_train_epoch, TrainerStages.before_val_epoch,
+              TrainerStages.before_val_iter, TrainerStages.after_val_iter,
+              TrainerStages.after_val_epoch, TrainerStages.after_run)
+
+    PRIORITY = Priority.NORMAL
+
+    def before_run(self, trainer):
+        """
+        Will be called before any loop begins.
+        Args:
+            trainer: The trainer instance.
+
+        Returns: None
+
+        """
+        pass
+
+    def after_run(self, trainer):
+        """
+        Will be called after all loops end.
+        Args:
+            trainer: The trainer instance.
+
+        Returns: None
+
+        """
+        pass
+
+    def before_epoch(self, trainer):
+        """
+        Will be called before every epoch begins.
+        Args:
+            trainer: The trainer instance.
+
+        Returns: None
+
+        """
+        pass
+
+    def after_epoch(self, trainer):
+        """
+        Will be called after every epoch ends.
+        Args:
+            trainer: The trainer instance.
+
+        Returns: None
+
+        """
+        pass
+
+    def before_iter(self, trainer):
+        """
+        Will be called before every loop begins.
+        Args:
+            trainer: The trainer instance.
+
+        Returns: None
+        """
+        pass
+
+    def after_iter(self, trainer):
+        """
+        Will be called after every loop ends.
+        Args:
+            trainer: The trainer instance.
+
+        Returns: None
+        """
+        pass
+
+    def before_train_epoch(self, trainer):
+        """
+        Will be called before every train epoch begins. Default call ``self.before_epoch``
+        Args:
+            trainer: The trainer instance.
+
+        Returns: None
+
+        """
+        self.before_epoch(trainer)
+
+    def before_val_epoch(self, trainer):
+        """
+        Will be called before every validation epoch begins. Default call ``self.before_epoch``
+        Args:
+            trainer: The trainer instance.
+
+        Returns: None
+
+        """
+        self.before_epoch(trainer)
+
+    def after_train_epoch(self, trainer):
+        """
+        Will be called after every train epoch ends. Default call ``self.after_epoch``
+        Args:
+            trainer: The trainer instance.
+
+        Returns: None
+
+        """
+        self.after_epoch(trainer)
+
+    def after_val_epoch(self, trainer):
+        """
+        Will be called after every validation epoch ends. Default call ``self.after_epoch``
+        Args:
+            trainer: The trainer instance.
+
+        Returns: None
+
+        """
+        self.after_epoch(trainer)
+
+    def before_train_iter(self, trainer):
+        """
+        Will be called before every train loop begins. Default call ``self.before_iter``
+        Args:
+            trainer: The trainer instance.
+
+        Returns: None
+        """
+        self.before_iter(trainer)
+
+    def before_val_iter(self, trainer):
+        """
+        Will be called before every validation loop begins. Default call ``self.before_iter``
+        Args:
+            trainer: The trainer instance.
+
+        Returns: None
+        """
+        self.before_iter(trainer)
+
+    def after_train_iter(self, trainer):
+        """
+        Will be called after every train loop ends. Default call ``self.after_iter``
+        Args:
+            trainer: The trainer instance.
+
+        Returns: None
+        """
+        self.after_iter(trainer)
+
+    def after_val_iter(self, trainer):
+        """
+        Will be called after every validation loop ends. Default call ``self.after_iter``
+        Args:
+            trainer: The trainer instance.
+
+        Returns: None
+        """
+        self.after_iter(trainer)
+
+    def every_n_epochs(self, trainer, n):
+        """
+        Whether to reach every ``n`` epochs
+        Returns: bool
+        """
+        return (trainer.epoch + 1) % n == 0 if n > 0 else False
+
+    def every_n_inner_iters(self, runner, n):
+        """
+        Whether to reach every ``n`` iterations at every epoch
+        Returns: bool
+        """
+        return (runner.inner_iter + 1) % n == 0 if n > 0 else False
+
+    def every_n_iters(self, trainer, n):
+        """
+        Whether to reach every ``n`` iterations
+        Returns: bool
+        """
+        return (trainer.iter + 1) % n == 0 if n > 0 else False
+
+    def end_of_epoch(self, trainer):
+        """
+        Whether to reach the end of every epoch
+        Returns: bool
+        """
+        return trainer.inner_iter + 1 == trainer.iters_per_epoch
+
+    def is_last_epoch(self, trainer):
+        """
+        Whether to reach the last epoch
+        Returns: bool
+        """
+        return trainer.epoch + 1 == trainer.max_epochs
+
+    def is_last_iter(self, trainer):
+        """
+        Whether to reach the last iteration in the entire training process
+        Returns: bool
+        """
+        return trainer.iter + 1 == trainer.max_iters
+
+    def get_triggered_stages(self):
+        trigger_stages = set()
+        for stage in Hook.stages:
+            if is_method_overridden(stage, Hook, self):
+                trigger_stages.add(stage)
+
+        return [stage for stage in Hook.stages if stage in trigger_stages]
+
+    def state_dict(self):
+        return {}
+
+    def load_state_dict(self, state_dict):
+        pass
diff --git a/modelscope/trainers/hooks/iter_timer_hook.py b/modelscope/trainers/hooks/iter_timer_hook.py
new file mode 100644
index 0000000..2f04e75
--- /dev/null
+++ b/modelscope/trainers/hooks/iter_timer_hook.py
@@ -0,0 +1,26 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import time
+
+from modelscope.metainfo import Hooks
+from modelscope.utils.constant import LogKeys
+
+from .builder import HOOKS
+from .hook import Hook
+from .priority import Priority
+
+
+@HOOKS.register_module(module_name=Hooks.IterTimerHook)
+class IterTimerHook(Hook):
+    PRIORITY = Priority.LOW
+
+    def before_epoch(self, trainer):
+        self.start_time = time.time()
+
+    def before_iter(self, trainer):
+        trainer.log_buffer.update(
+            {LogKeys.DATA_LOAD_TIME: time.time() - self.start_time})
+
+    def after_iter(self, trainer):
+        trainer.log_buffer.update(
+            {LogKeys.ITER_TIME: time.time() - self.start_time})
+        self.start_time = time.time()
diff --git a/modelscope/trainers/hooks/logger/__init__.py b/modelscope/trainers/hooks/logger/__init__.py
new file mode 100644
index 0000000..583cd32
--- /dev/null
+++ b/modelscope/trainers/hooks/logger/__init__.py
@@ -0,0 +1,27 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.trainers.utils.log_buffer import LogBuffer
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .base import LoggerHook
+    from .tensorboard_hook import TensorboardHook
+    from .text_logger_hook import TextLoggerHook
+
+else:
+    _import_structure = {
+        'base': ['LoggerHook'],
+        'tensorboard_hook': ['TensorboardHook'],
+        'text_logger_hook': ['TextLoggerHook']
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/trainers/hooks/logger/base.py b/modelscope/trainers/hooks/logger/base.py
new file mode 100644
index 0000000..684c4a8
--- /dev/null
+++ b/modelscope/trainers/hooks/logger/base.py
@@ -0,0 +1,129 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import numbers
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+import torch
+
+from modelscope.trainers.hooks.hook import Hook
+from modelscope.trainers.hooks.priority import Priority
+from modelscope.utils.constant import ModeKeys
+
+
+class LoggerHook(Hook):
+    """Base class for logger hooks.
+
+    Args:
+        interval (int): Logging interval (every k iterations). It is interval of iterations even by_epoch is true.
+        ignore_last (bool): Ignore the log of last iterations in each epoch
+            if less than `interval`.
+        reset_flag (bool): Whether to clear the output buffer after logging.
+        by_epoch (bool): Whether EpochBasedtrainer is used.
+    """
+
+    __metaclass__ = ABCMeta
+    PRIORITY = Priority.VERY_LOW
+
+    def __init__(self,
+                 interval=10,
+                 ignore_last=True,
+                 reset_flag=False,
+                 by_epoch=True):
+        self.interval = interval
+        self.ignore_last = ignore_last
+        self.reset_flag = reset_flag
+        self.by_epoch = by_epoch
+
+    @abstractmethod
+    def log(self, trainer):
+        pass
+
+    @staticmethod
+    def is_scalar(val, include_np=True, include_torch=True):
+        """Tell the input variable is a scalar or not.
+
+        Args:
+            val: Input variable.
+            include_np (bool): Whether to treat 0-d np.ndarray as a scalar.
+            include_torch (bool): Whether to treat 0-d torch.Tensor as a scalar.
+
+        Returns:
+            bool: True or False.
+        """
+        if isinstance(val, numbers.Number):
+            return True
+        elif include_np and isinstance(val, np.ndarray) and val.ndim == 0:
+            return True
+        elif include_torch and isinstance(val, torch.Tensor) and len(val) == 1:
+            return True
+        else:
+            return False
+
+    def fetch_tensor(self, trainer, n=0):
+        """Fetch latest n values or all values, process tensor type, convert to numpy for dump logs."""
+        assert n >= 0
+        for key in trainer.log_buffer.val_history:
+            values = trainer.log_buffer.val_history[key][-n:]
+
+            for i, v in enumerate(values):
+                if isinstance(v, torch.Tensor):
+                    values[i] = v.clone().detach().cpu().numpy()
+
+            trainer.log_buffer.val_history[key][-n:] = values
+
+    def get_epoch(self, trainer):
+        if trainer.mode in [ModeKeys.TRAIN, ModeKeys.EVAL]:
+            epoch = trainer.epoch + 1
+        else:
+            raise ValueError(
+                f'trainer mode should be {ModeKeys.TRAIN} or {ModeKeys.EVAL}, '
+                f'but got {trainer.mode}')
+        return epoch
+
+    def get_iter(self, trainer, inner_iter=False):
+        """Get the current training iteration step."""
+        if self.by_epoch and inner_iter:
+            current_iter = trainer.inner_iter + 1
+        else:
+            current_iter = trainer.iter + 1
+        return current_iter
+
+    def before_run(self, trainer):
+        for hook in trainer.hooks[::-1]:
+            if isinstance(hook, LoggerHook):
+                hook.reset_flag = True
+                break
+
+    def before_epoch(self, trainer):
+        trainer.log_buffer.clear()  # clear logs of last epoch
+
+    def after_train_iter(self, trainer):
+        if self.by_epoch and self.every_n_inner_iters(trainer, self.interval):
+            self.fetch_tensor(trainer, self.interval)
+            trainer.log_buffer.average(self.interval)
+        elif not self.by_epoch and self.every_n_iters(trainer, self.interval):
+            self.fetch_tensor(trainer, self.interval)
+            trainer.log_buffer.average(self.interval)
+        elif self.end_of_epoch(trainer) and not self.ignore_last:
+            # not precise but more stable
+            self.fetch_tensor(trainer, self.interval)
+            trainer.log_buffer.average(self.interval)
+
+        if trainer.log_buffer.ready:
+            self.log(trainer)
+            if self.reset_flag:
+                trainer.log_buffer.clear_output()
+
+    def after_train_epoch(self, trainer):
+        if trainer.log_buffer.ready:
+            self.log(trainer)
+            if self.reset_flag:
+                trainer.log_buffer.clear_output()
+
+    def after_val_epoch(self, trainer):
+        self.fetch_tensor(trainer)
+        trainer.log_buffer.average()
+        self.log(trainer)
+        if self.reset_flag:
+            trainer.log_buffer.clear_output()
diff --git a/modelscope/trainers/hooks/logger/tensorboard_hook.py b/modelscope/trainers/hooks/logger/tensorboard_hook.py
new file mode 100644
index 0000000..df1dbda
--- /dev/null
+++ b/modelscope/trainers/hooks/logger/tensorboard_hook.py
@@ -0,0 +1,115 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+
+import numpy as np
+import torch
+
+from modelscope.metainfo import Hooks
+from modelscope.trainers.hooks.builder import HOOKS
+from modelscope.utils.constant import LogKeys
+from modelscope.utils.torch_utils import master_only
+
+from .base import LoggerHook
+
+
+@HOOKS.register_module(module_name=Hooks.TensorboardHook)
+class TensorboardHook(LoggerHook):
+    """
+    TensorBoard hook for visualization.
+
+    Args:
+        out_dir: output directory to save tensorboard files
+        interval (int): Logging interval (every k iterations).
+        ignore_last (bool): Ignore the log of last iterations in each epoch
+            if less than `interval`.
+        reset_flag (bool): Whether to clear the output buffer after logging.
+        by_epoch (bool): Whether EpochBasedtrainer is used.
+        skip_keys (list): list of keys which will not add to tensorboard
+    """
+    def __init__(self,
+                 out_dir=None,
+                 interval=10,
+                 ignore_last=True,
+                 reset_flag=False,
+                 by_epoch=True,
+                 skip_keys=[LogKeys.ITER_TIME, LogKeys.DATA_LOAD_TIME]):
+        super(TensorboardHook, self).__init__(interval=interval,
+                                              ignore_last=ignore_last,
+                                              reset_flag=reset_flag,
+                                              by_epoch=by_epoch)
+        self.out_dir = out_dir
+        self.skip_keys = skip_keys
+
+    @master_only
+    def before_run(self, trainer):
+        super(TensorboardHook, self).before_run(trainer)
+        try:
+            from torch.utils.tensorboard import SummaryWriter
+        except ImportError as e:
+            raise ImportError(
+                e.msg + ' '
+                'Please pip install tensorboard by ``pip install future tensorboard`` '
+                'or upgrade version by ``pip install future tensorboard --upgrade``.'
+            )
+
+        if self.out_dir is None:
+            self.out_dir = os.path.join(trainer.work_dir, 'tensorboard_output')
+        trainer.logger.info(
+            f'tensorboard files will be saved to {self.out_dir}')
+        self.writer = SummaryWriter(self.out_dir)
+
+    @master_only
+    def log(self, trainer):
+        if len(trainer.visualization_buffer.output) > 0:
+            self.visualization_log(trainer)
+        for key, val in trainer.log_buffer.output.items():
+            if key in self.skip_keys:
+                continue
+            if isinstance(val, str):
+                self.writer.add_text(key, val, self.get_iter(trainer))
+            elif self.is_scalar(val):
+                self.writer.add_scalar(key, val, self.get_iter(trainer))
+            else:
+                pass
+        self.writer.flush()
+
+    def visualization_log(self, trainer):
+        """ Images Visulization.
+        `visualization_buffer` is a dictionary containing:
+            images (list): list of visulaized images.
+            filenames (list of str, optional): image filenames.
+        """
+        visual_results = trainer.visualization_buffer.output
+        for vis_key, vis_result in visual_results.items():
+            images = vis_result.get('images', [])
+            filenames = vis_result.get('filenames', None)
+            if filenames is not None:
+                assert len(images) == len(
+                    filenames
+                ), 'Output `images` and `filenames` must keep the same length!'
+
+            for i, img in enumerate(images):
+                if isinstance(img, np.ndarray):
+                    img = torch.from_numpy(img)
+                else:
+                    assert isinstance(
+                        img, torch.Tensor
+                    ), f'Only support np.ndarray and torch.Tensor type! Got {type(img)} for img {filenames[i]}'
+
+                default_name = 'image_%i' % i
+                filename = filenames[
+                    i] if filenames is not None else default_name
+                self.writer.add_image(f'{vis_key}/{filename}',
+                                      img,
+                                      self.get_iter(trainer),
+                                      dataformats='HWC')
+
+    def after_train_iter(self, trainer):
+        super(TensorboardHook, self).after_train_iter(trainer)
+        # clear visualization_buffer after each iter to ensure that it is only written once,
+        # avoiding repeated writing of the same image buffer every self.interval
+        trainer.visualization_buffer.clear_output()
+
+    @master_only
+    def after_run(self, trainer):
+        self.writer.close()
diff --git a/modelscope/trainers/hooks/logger/text_logger_hook.py b/modelscope/trainers/hooks/logger/text_logger_hook.py
new file mode 100644
index 0000000..23e536d
--- /dev/null
+++ b/modelscope/trainers/hooks/logger/text_logger_hook.py
@@ -0,0 +1,188 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import datetime
+import json
+import os
+import os.path as osp
+from collections import OrderedDict
+
+import torch
+from torch import distributed as dist
+
+from modelscope.metainfo import Hooks
+from modelscope.outputs import OutputKeys
+from modelscope.trainers.hooks.builder import HOOKS
+from modelscope.trainers.hooks.logger.base import LoggerHook
+from modelscope.utils.constant import LogKeys, ModeKeys
+from modelscope.utils.json_utils import EnhancedEncoder
+from modelscope.utils.torch_utils import get_dist_info, is_master
+
+
+@HOOKS.register_module(module_name=Hooks.TextLoggerHook)
+class TextLoggerHook(LoggerHook):
+    """Logger hook in text, Output log to both console and local json file.
+
+    Args:
+        by_epoch (bool, optional): Whether EpochBasedtrainer is used.
+            Default: True.
+        interval (int, optional): Logging interval (every k iterations).
+            It is interval of iterations even by_epoch is true. Default: 10.
+        ignore_last (bool, optional): Ignore the log of last iterations in each
+            epoch if less than :attr:`interval`. Default: True.
+        reset_flag (bool, optional): Whether to clear the output buffer after
+            logging. Default: False.
+        out_dir (str): The directory to save log. If is None, use `trainer.work_dir`
+        ignore_rounding_keys (`Union[str, List]`): The keys to ignore float rounding, default 'lr'
+        rounding_digits (`int`): The digits of rounding, exceeding parts will be ignored.
+    """
+    def __init__(self,
+                 by_epoch=True,
+                 interval=10,
+                 ignore_last=True,
+                 reset_flag=False,
+                 out_dir=None,
+                 ignore_rounding_keys='lr',
+                 rounding_digits=5):
+        super(TextLoggerHook, self).__init__(interval, ignore_last, reset_flag,
+                                             by_epoch)
+        self.by_epoch = by_epoch
+        self.time_sec_tot = 0
+        self.out_dir = out_dir
+        self._logged_keys = []  # store the key has been logged
+        if isinstance(ignore_rounding_keys,
+                      str) or ignore_rounding_keys is None:
+            ignore_rounding_keys = [ignore_rounding_keys]
+        self.ignore_rounding_keys = ignore_rounding_keys
+        self.rounding_digits = rounding_digits
+
+    def before_run(self, trainer):
+        super(TextLoggerHook, self).before_run(trainer)
+
+        if self.out_dir is None:
+            self.out_dir = trainer.work_dir
+
+        if not osp.exists(self.out_dir) and is_master():
+            os.makedirs(self.out_dir)
+
+        trainer.logger.info('Text logs will be saved to {}'.format(
+            self.out_dir))
+
+        self.start_iter = trainer.iter
+        self.json_log_path = osp.join(self.out_dir,
+                                      '{}.log.json'.format(trainer.timestamp))
+        if hasattr(trainer, 'meta') and trainer.meta is not None:
+            self._dump_log(trainer.meta)
+
+    def _get_max_memory(self, trainer):
+        device = torch.cuda.current_device()
+        mem = torch.cuda.max_memory_allocated(device=device)
+        mem_mb = torch.tensor([mem / (1024 * 1024)],
+                              dtype=torch.int,
+                              device=device)
+        _, world_size = get_dist_info()
+        if world_size > 1 and getattr(trainer.cfg.model, 'model_parallel_size',
+                                      1) < world_size:
+            dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX)
+        return mem_mb.item()
+
+    def _log_info(self, log_dict, trainer):
+        lr_key = LogKeys.LR
+        epoch_key = LogKeys.EPOCH
+        iter_key = LogKeys.ITER
+        mode_key = LogKeys.MODE
+        iter_time_key = LogKeys.ITER_TIME
+        data_load_time_key = LogKeys.DATA_LOAD_TIME
+        eta_key = LogKeys.ETA
+
+        if log_dict[mode_key] == ModeKeys.TRAIN:
+            if isinstance(log_dict[lr_key], dict):
+                lr_str = []
+                for k, val in log_dict[lr_key].items():
+                    lr_str.append(f'{lr_key}_{k}: {val:.3e}')
+                lr_str = ' '.join(lr_str)
+            else:
+                lr_str = f'{lr_key}: {log_dict[lr_key]:.3e}'
+
+            if self.by_epoch:
+                log_str = f'{epoch_key} [{log_dict[epoch_key]}][{log_dict[iter_key]}/{trainer.iters_per_epoch}]\t'
+            else:
+                log_str = f'{iter_key} [{log_dict[iter_key]}/{trainer.max_iters}]\t'
+            log_str += f'{lr_str}, '
+            self._logged_keys.extend([lr_key, mode_key, iter_key, epoch_key])
+
+            if iter_time_key in log_dict.keys():
+                self.time_sec_tot += (log_dict[iter_time_key] * self.interval)
+                time_sec_avg = self.time_sec_tot / (trainer.iter -
+                                                    self.start_iter + 1)
+                eta_sec = time_sec_avg * (trainer.max_iters - trainer.iter - 1)
+                eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
+                log_str += f'{eta_key}: {eta_str}, '
+                log_str += f'{iter_time_key}: {log_dict[iter_time_key]:.3f}, '
+                log_str += f'{data_load_time_key}: {log_dict[data_load_time_key]:.3f}, '
+                self._logged_keys.extend([
+                    iter_time_key,
+                    data_load_time_key,
+                ])
+        else:
+            # val/test time
+            # here 1000 is the length of the val dataloader
+            # by epoch: epoch[val] [4][1000]
+            # by iter: iter[val] [1000]
+            if self.by_epoch:
+                log_str = f'{epoch_key}({log_dict[mode_key]}) [{log_dict[epoch_key]}][{log_dict[iter_key]}]\t'
+            else:
+                # TODO log_dict[iter_key] is not correct because of it's train_loop's inner_iter
+                log_str = f'{iter_key}({log_dict[mode_key]}) [{log_dict[iter_key]}]\t'
+            self._logged_keys.extend([mode_key, iter_key, epoch_key])
+
+        log_items = []
+        for name, val in log_dict.items():
+            if name in self._logged_keys:
+                continue
+            if isinstance(val,
+                          float) and name not in self.ignore_rounding_keys:
+                val = f'{val:.4f}'
+            log_items.append(f'{name}: {val}')
+        log_str += ', '.join(log_items)
+
+        if is_master():
+            trainer.logger.info(log_str)
+
+    def _dump_log(self, log_dict):
+        # dump log in json format
+        json_log = OrderedDict()
+        for k, v in log_dict.items():
+            json_log[
+                k] = v if k in self.ignore_rounding_keys else self._round_float(
+                    v, self.rounding_digits)
+
+        if is_master():
+            with open(self.json_log_path, 'a+') as f:
+                json.dump(json_log, f, cls=EnhancedEncoder)
+                f.write('\n')
+
+    def _round_float(self, items, ndigits=5):
+        if isinstance(items, list):
+            return [self._round_float(item, ndigits) for item in items]
+        elif isinstance(items, float):
+            return round(items, ndigits)
+        else:
+            return items
+
+    def log(self, trainer):
+        cur_iter = self.get_iter(
+            trainer, inner_iter=True
+        ) if trainer.mode == ModeKeys.TRAIN else trainer.iters_per_epoch
+
+        log_dict = OrderedDict(mode=trainer.mode,
+                               epoch=self.get_epoch(trainer),
+                               iter=cur_iter)
+
+        # statistic memory
+        if torch.cuda.is_available():
+            log_dict[LogKeys.MEMORY] = self._get_max_memory(trainer)
+
+        log_dict = dict(log_dict, **trainer.log_buffer.output)
+
+        self._log_info(log_dict, trainer)
+        self._dump_log(log_dict)
+        return log_dict
diff --git a/modelscope/trainers/hooks/lr_scheduler_hook.py b/modelscope/trainers/hooks/lr_scheduler_hook.py
new file mode 100644
index 0000000..82bffa7
--- /dev/null
+++ b/modelscope/trainers/hooks/lr_scheduler_hook.py
@@ -0,0 +1,137 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from modelscope.metainfo import Hooks
+from modelscope.trainers.lrscheduler.builder import build_lr_scheduler
+from modelscope.utils.constant import LogKeys
+from modelscope.utils.logger import get_logger
+from modelscope.utils.torch_utils import is_master
+
+from .builder import HOOKS
+from .hook import Hook
+from .priority import Priority
+
+
+@HOOKS.register_module(module_name=Hooks.LrSchedulerHook)
+class LrSchedulerHook(Hook):
+    """Lr scheduler.
+
+    Args:
+        by_epoch (bool): Whether lr changes by epoch
+        warmup (dict): warm up config
+    """
+    PRIORITY = Priority.VERY_HIGH
+
+    def __init__(self, by_epoch=True, warmup=None) -> None:
+        super().__init__()
+        self.by_epoch = by_epoch
+        self.warmup = warmup
+        self.warmup_lr_scheduler = None
+
+    def before_run(self, trainer):
+        if self.warmup is not None:
+            assert isinstance(self.warmup, dict) and 'type' in self.warmup
+            self.warmup_lr_scheduler = build_lr_scheduler(
+                cfg=self.warmup,
+                default_args={'base_scheduler': trainer.lr_scheduler})
+
+    def get_current_lr(self, trainer):
+        import torch
+
+        if isinstance(trainer.optimizer, torch.optim.Optimizer):
+            lr = [group['lr'] for group in trainer.optimizer.param_groups]
+        elif isinstance(trainer.optimizer, dict):
+            lr = dict()
+            for name, optim in trainer.optimizer.items():
+                lr[name] = [group['lr'] for group in optim.param_groups]
+        else:
+            raise RuntimeError(
+                'lr is not applicable because optimizer does not exist.')
+        return lr
+
+    def before_train_iter(self, trainer):
+        if not self.by_epoch and trainer.iter >= getattr(
+                trainer, 'cumulative_iters', 1):
+            if self.warmup_lr_scheduler is not None:
+                self.warmup_lr_scheduler.step()
+            else:
+                trainer.lr_scheduler.step()
+        trainer.log_buffer.output[LogKeys.LR] = self._get_log_lr(trainer)
+
+    def before_train_epoch(self, trainer):
+        trainer.log_buffer.output[LogKeys.LR] = self._get_log_lr(trainer)
+
+    def after_train_epoch(self, trainer):
+        if self.by_epoch:
+            if self.warmup_lr_scheduler is not None:
+                self.warmup_lr_scheduler.step()
+            else:
+                trainer.lr_scheduler.step()
+
+    def _get_log_lr(self, trainer):
+        cur_lr = self.get_current_lr(trainer)
+        # only record lr of the first param group
+        if isinstance(cur_lr, list):
+            lr = cur_lr[0]
+        else:
+            assert isinstance(cur_lr, dict)
+            lr = {}
+            for k, lr_ in cur_lr.items():
+                assert isinstance(lr_, list)
+                lr.update({k: lr_[0]})
+
+        return lr
+
+
+@HOOKS.register_module(module_name=Hooks.PlateauLrSchedulerHook)
+class PlateauLrSchedulerHook(LrSchedulerHook):
+    """Lr scheduler hook for `ReduceLROnPlateau`.
+
+    Args:
+        metric_key (str): Metric key returned from `trainer.metric_values`,
+            get the value of metric key and pass it to `ReduceLROnPlateau.step`.
+        by_epoch (bool): Whether lr changes by epoch
+        warmup (dict): warm up config
+    """
+    PRIORITY = Priority.LOW  # should be after EvaluationHook
+
+    def __init__(self, metric_key, by_epoch=True, warmup=None) -> None:
+        super().__init__(by_epoch=by_epoch, warmup=warmup)
+        self.metric_key = metric_key
+
+    def before_run(self, trainer):
+        super().before_run(trainer)
+        if not hasattr(trainer, 'logger'):
+            self.logger = get_logger()
+        else:
+            self.logger = trainer.logger
+
+    def after_train_epoch(self, trainer):
+        # adapt to evaluation intervel is greater than 1
+        if trainer.metric_values is None:
+            if is_master():
+                self.logger.warning(
+                    f'Current epoch {trainer.epoch} has no evaluation metric values, skip lr_scheduler.step() !'
+                )
+            return
+
+        metrics = trainer.metric_values[self.metric_key]
+
+        if self.by_epoch:
+            if self.warmup_lr_scheduler is not None:
+                self.warmup_lr_scheduler.step(metrics=metrics)
+            else:
+                trainer.lr_scheduler.step(metrics=metrics)
+
+
+@HOOKS.register_module(module_name=Hooks.NoneLrSchedulerHook)
+class NoneLrSchedulerHook(LrSchedulerHook):
+
+    PRIORITY = Priority.LOW  # should be after EvaluationHook
+
+    def __init__(self, by_epoch=True, warmup=None) -> None:
+        super().__init__(by_epoch=by_epoch, warmup=warmup)
+
+    def before_run(self, trainer):
+        return
+
+    def after_train_epoch(self, trainer):
+        return
diff --git a/modelscope/trainers/hooks/optimizer/__init__.py b/modelscope/trainers/hooks/optimizer/__init__.py
new file mode 100644
index 0000000..d7c8c86
--- /dev/null
+++ b/modelscope/trainers/hooks/optimizer/__init__.py
@@ -0,0 +1,26 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .apex_optimizer_hook import ApexAMPOptimizerHook
+    from .base import OptimizerHook, NoneOptimizerHook
+    from .torch_optimizer_hook import TorchAMPOptimizerHook
+
+else:
+    _import_structure = {
+        'apex_optimizer_hook': ['ApexAMPOptimizerHook'],
+        'base': ['OptimizerHook', 'NoneOptimizerHook'],
+        'torch_optimizer_hook': ['TorchAMPOptimizerHook']
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py b/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py
new file mode 100644
index 0000000..3702698
--- /dev/null
+++ b/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py
@@ -0,0 +1,77 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+
+from modelscope.metainfo import Hooks
+from modelscope.trainers.hooks.builder import HOOKS
+
+from .base import OptimizerHook
+
+
+@HOOKS.register_module(module_name=Hooks.ApexAMPOptimizerHook)
+class ApexAMPOptimizerHook(OptimizerHook):
+    """
+    Fp16 optimizer, if torch version is less than 1.6.0,
+    you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default
+
+    Args:
+        cumulative_iters (int): interval of gradients accumulation. Default: 1
+        grad_clip (dict): Default None. Containing keys:
+            max_norm (float or int): max norm of the gradients
+            norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm.
+            More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_`
+        loss_keys (str | list): keys list of loss
+        opt_level (str): "O0" and "O3" are not true mixed precision,
+            but they are useful for establishing accuracy and speed baselines, respectively.
+            "O1" and "O2" are different implementations of mixed precision.
+            Try both, and see what gives the best speedup and accuracy for your model.
+    """
+    def __init__(self,
+                 cumulative_iters=1,
+                 grad_clip=None,
+                 loss_keys='loss',
+                 opt_level='O1'):
+
+        super(ApexAMPOptimizerHook, self).__init__(grad_clip=grad_clip,
+                                                   loss_keys=loss_keys)
+        self.cumulative_iters = cumulative_iters
+        self.opt_level = opt_level
+
+        try:
+            from apex import amp
+        except ImportError:
+            raise ValueError(
+                'apex not installed, please install apex from https://www.github.com/nvidia/apex.'
+            )
+
+    def before_run(self, trainer):
+        from apex import amp
+
+        logging.info('open fp16')
+        # TODO: fix it should initialze amp with model not wrapper by DDP or DP
+        if hasattr(trainer.model, 'module'):
+            trainer.model, trainer.optimizer = amp.initialize(
+                trainer.model.module,
+                trainer.optimizer,
+                opt_level=self.opt_level)
+        else:
+            trainer.model, trainer.optimizer = amp.initialize(
+                trainer.model, trainer.optimizer, opt_level=self.opt_level)
+
+        trainer.optimizer.zero_grad()
+
+    def after_train_iter(self, trainer):
+        for k in self.loss_keys:
+            trainer.train_outputs[k] /= self.cumulative_iters
+
+        from apex import amp
+        for k in self.loss_keys:
+            with amp.scale_loss(trainer.train_outputs[k],
+                                trainer.optimizer) as scaled_loss:
+                scaled_loss.backward()
+
+        if self.every_n_iters(trainer, self.cumulative_iters):
+            if self.grad_clip is not None:
+                self.clip_grads(trainer.model.parameters(), **self.grad_clip)
+
+            trainer.optimizer.step()
+            trainer.optimizer.zero_grad()
diff --git a/modelscope/trainers/hooks/optimizer/base.py b/modelscope/trainers/hooks/optimizer/base.py
new file mode 100644
index 0000000..20c60bb
--- /dev/null
+++ b/modelscope/trainers/hooks/optimizer/base.py
@@ -0,0 +1,74 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+
+from torch.nn.utils import clip_grad
+
+from modelscope.metainfo import Hooks
+from modelscope.outputs import OutputKeys
+from modelscope.trainers.hooks.builder import HOOKS
+from modelscope.trainers.hooks.hook import Hook
+from modelscope.trainers.hooks.priority import Priority
+
+
+@HOOKS.register_module(module_name=Hooks.OptimizerHook)
+class OptimizerHook(Hook):
+    """Optimizer hook
+
+    Args:
+        cumulative_iters (int): interval of gradients accumulation. Default: 1
+        grad_clip (dict): Default None. Containing keys:
+            max_norm (float or int): max norm of the gradients
+            norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm.
+            More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_`
+        loss_keys (str | list): keys list of loss
+    """
+
+    PRIORITY = Priority.ABOVE_NORMAL
+
+    def __init__(self,
+                 cumulative_iters=1,
+                 grad_clip=None,
+                 loss_keys=OutputKeys.LOSS) -> None:
+        if isinstance(loss_keys, str):
+            loss_keys = [loss_keys]
+        assert isinstance(loss_keys, (tuple, list))
+        self.loss_keys = loss_keys
+        self.cumulative_iters = cumulative_iters
+        self.grad_clip = grad_clip
+
+    def clip_grads(self, params, **clip_args):
+        params = list(
+            filter(lambda p: p.requires_grad and p.grad is not None, params))
+        if len(params) > 0:
+            return clip_grad.clip_grad_norm_(params, **clip_args)
+
+    def before_run(self, trainer):
+        trainer.optimizer.zero_grad()
+        trainer.cumulative_iters = self.cumulative_iters
+
+    def after_train_iter(self, trainer):
+        for k in self.loss_keys:
+            trainer.train_outputs[k] /= self.cumulative_iters
+            trainer.train_outputs[k].backward()
+
+        if self.every_n_iters(trainer, self.cumulative_iters):
+            if self.grad_clip is not None:
+                self.clip_grads(trainer.model.parameters(), **self.grad_clip)
+
+            trainer.optimizer.step()
+            trainer.optimizer.zero_grad()
+
+
+@HOOKS.register_module(module_name=Hooks.NoneOptimizerHook)
+class NoneOptimizerHook(OptimizerHook):
+    def __init__(self, cumulative_iters=1, grad_clip=None, loss_keys='loss'):
+
+        super(NoneOptimizerHook, self).__init__(grad_clip=grad_clip,
+                                                loss_keys=loss_keys)
+        self.cumulative_iters = cumulative_iters
+
+    def before_run(self, trainer):
+        return
+
+    def after_train_iter(self, trainer):
+        return
diff --git a/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py b/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py
new file mode 100644
index 0000000..4e6b371
--- /dev/null
+++ b/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py
@@ -0,0 +1,85 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+
+from modelscope.metainfo import Hooks
+from modelscope.trainers.hooks.builder import HOOKS
+
+from .base import OptimizerHook
+
+
+@HOOKS.register_module(module_name=Hooks.TorchAMPOptimizerHook)
+class TorchAMPOptimizerHook(OptimizerHook):
+    """
+    Fp16 optimizer, if torch version is less than 1.6.0,
+    you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default
+
+    Args:
+        cumulative_iters (int): interval of gradients accumulation. Default: 1
+        grad_clip (dict): Default None. Containing keys:
+            max_norm (float or int): max norm of the gradients
+            norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm.
+            More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_`
+        loss_keys (str | list): keys list of loss
+        loss_scale (float | dict): grade scale config. If loss_scale is a float,
+            static loss scaling will be used with the specified scale.
+            It can also be a dict containing arguments of GradScalar. For Pytorch >= 1.6,
+            we use official torch.cuda.amp.GradScaler.
+            please refer to: https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler for the parameters.
+    """
+    def __init__(self,
+                 cumulative_iters=1,
+                 grad_clip=None,
+                 loss_keys='loss',
+                 loss_scale={}):
+
+        super(TorchAMPOptimizerHook, self).__init__(grad_clip=grad_clip,
+                                                    loss_keys=loss_keys)
+        self.cumulative_iters = cumulative_iters
+        self._scale_update_param = None
+
+        from torch.cuda import amp
+
+        if isinstance(loss_scale, float):
+            self._scale_update_param = loss_scale
+            self.scaler = amp.GradScaler(init_scale=loss_scale)
+        elif isinstance(loss_scale, dict):
+            self.scaler = amp.GradScaler(**loss_scale)
+        else:
+            raise ValueError(
+                '`loss_scale` type must be in [float, dict], but got {loss_scale}'
+            )
+
+    def before_run(self, trainer):
+        logging.info('open fp16')
+        trainer.optimizer.zero_grad()
+
+        if hasattr(trainer.model, 'module'):
+            self._ori_model_forward = trainer.model.module.forward
+            self._model = trainer.model.module
+        else:
+            self._ori_model_forward = trainer.model.forward
+            self._model = trainer.model
+
+        self.ori_model_forward = trainer.model.forward
+
+    def before_train_iter(self, trainer):
+        from torch.cuda import amp
+        setattr(self._model, 'forward', amp.autocast()(self._model.forward))
+
+    def after_train_iter(self, trainer):
+        for k in self.loss_keys:
+            trainer.train_outputs[k] /= self.cumulative_iters
+
+        for k in self.loss_keys:
+            self.scaler.scale(trainer.train_outputs[k]).backward()
+
+        if self.every_n_iters(trainer, self.cumulative_iters):
+            self.scaler.unscale_(trainer.optimizer)
+            if self.grad_clip is not None:
+                self.clip_grads(trainer.model.parameters(), **self.grad_clip)
+
+            self.scaler.step(trainer.optimizer)
+            self.scaler.update(self._scale_update_param)
+            trainer.optimizer.zero_grad()
+
+        setattr(self._model, 'forward', self._ori_model_forward)
diff --git a/modelscope/trainers/hooks/priority.py b/modelscope/trainers/hooks/priority.py
new file mode 100644
index 0000000..db74965
--- /dev/null
+++ b/modelscope/trainers/hooks/priority.py
@@ -0,0 +1,62 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from enum import Enum
+from typing import Union
+
+
+class Priority(Enum):
+    """Hook priority levels.
+
+    +--------------+------------+
+    | Level        | Value      |
+    +==============+============+
+    | HIGHEST      | 0          |
+    +--------------+------------+
+    | VERY_HIGH    | 10         |
+    +--------------+------------+
+    | HIGH         | 30         |
+    +--------------+------------+
+    | ABOVE_NORMAL | 40         |
+    +--------------+------------+
+    | NORMAL       | 50         |
+    +--------------+------------+
+    | BELOW_NORMAL | 60         |
+    +--------------+------------+
+    | LOW          | 70         |
+    +--------------+------------+
+    | VERY_LOW     | 90         |
+    +--------------+------------+
+    | LOWEST       | 100        |
+    +--------------+------------+
+    """
+
+    HIGHEST = 0
+    VERY_HIGH = 10
+    HIGH = 30
+    ABOVE_NORMAL = 40
+    NORMAL = 50
+    BELOW_NORMAL = 60
+    LOW = 70
+    VERY_LOW = 90
+    LOWEST = 100
+
+
+def get_priority(priority: Union[int, str, Priority]) -> int:
+    """Get priority value.
+
+    Args:
+        priority (int or str or :obj:`Priority`): Priority.
+
+    Returns:
+        int: The priority value.
+    """
+    if isinstance(priority, int):
+        if priority < 0 or priority > 100:
+            raise ValueError('priority must be between 0 and 100')
+        return priority
+    elif isinstance(priority, Priority):
+        return priority.value
+    elif isinstance(priority, str):
+        return Priority[priority.upper()].value
+    else:
+        raise TypeError('priority must be an integer or Priority enum value')
diff --git a/modelscope/trainers/lrscheduler/__init__.py b/modelscope/trainers/lrscheduler/__init__.py
new file mode 100644
index 0000000..5457635
--- /dev/null
+++ b/modelscope/trainers/lrscheduler/__init__.py
@@ -0,0 +1,25 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .builder import LR_SCHEDULER, build_lr_scheduler
+    from .warmup import BaseWarmup, ConstantWarmup, ExponentialWarmup, LinearWarmup
+
+else:
+    _import_structure = {
+        'builder': ['LR_SCHEDULER', 'build_lr_scheduler'],
+        'warmup':
+        ['BaseWarmup', 'ConstantWarmup', 'ExponentialWarmup', 'LinearWarmup']
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/trainers/lrscheduler/builder.py b/modelscope/trainers/lrscheduler/builder.py
new file mode 100644
index 0000000..d9e550f
--- /dev/null
+++ b/modelscope/trainers/lrscheduler/builder.py
@@ -0,0 +1,50 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import inspect
+
+from modelscope.utils.config import ConfigDict
+from modelscope.utils.registry import Registry, build_from_cfg, default_group
+
+LR_SCHEDULER = Registry('lr_scheduler')
+
+
+def build_lr_scheduler(cfg: ConfigDict, default_args: dict = None):
+    """ build lr scheduler from given lr scheduler config dict
+
+    Args:
+        cfg (:obj:`ConfigDict`): config dict for lr scheduler object.
+        default_args (dict, optional): Default initialization arguments.
+    """
+    if cfg['type'].lower().endswith('warmup'):
+        # build warmup lr scheduler
+        if not hasattr(cfg, 'base_scheduler'):
+            if default_args is None or ('base_scheduler' not in default_args):
+                raise ValueError(
+                    'Must provide ``base_scheduler`` which is an instance of ``torch.optim.lr_scheduler._LRScheduler`` '
+                    'for build warmup lr scheduler.')
+    else:
+        # build lr scheduler without warmup
+        if not hasattr(cfg, 'optimizer'):
+            if default_args is None or ('optimizer' not in default_args):
+                raise ValueError(
+                    'Must provide ``optimizer`` which is an instance of ``torch.optim.Optimizer`` '
+                    'for build lr scheduler')
+
+    return build_from_cfg(cfg,
+                          LR_SCHEDULER,
+                          group_key=default_group,
+                          default_args=default_args)
+
+
+def register_torch_lr_scheduler():
+    from torch.optim import lr_scheduler
+    from torch.optim.lr_scheduler import _LRScheduler
+
+    members = inspect.getmembers(lr_scheduler)
+
+    for name, obj in members:
+        if (inspect.isclass(obj) and issubclass(
+                obj, _LRScheduler)) or name in ['ReduceLROnPlateau']:
+            LR_SCHEDULER.register_module(module_name=name, module_cls=obj)
+
+
+register_torch_lr_scheduler()
diff --git a/modelscope/trainers/lrscheduler/warmup/__init__.py b/modelscope/trainers/lrscheduler/warmup/__init__.py
new file mode 100644
index 0000000..5263f2f
--- /dev/null
+++ b/modelscope/trainers/lrscheduler/warmup/__init__.py
@@ -0,0 +1,25 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .base import BaseWarmup
+    from .warmup import ConstantWarmup, ExponentialWarmup, LinearWarmup
+
+else:
+    _import_structure = {
+        'base': ['BaseWarmup'],
+        'warmup': ['ConstantWarmup', 'ExponentialWarmup', 'LinearWarmup']
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/trainers/lrscheduler/warmup/base.py b/modelscope/trainers/lrscheduler/warmup/base.py
new file mode 100644
index 0000000..b687268
--- /dev/null
+++ b/modelscope/trainers/lrscheduler/warmup/base.py
@@ -0,0 +1,75 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from torch.optim.lr_scheduler import _LRScheduler
+
+
+class BaseWarmup(_LRScheduler):
+    """Base warmup scheduler
+
+    Args:
+        base_scheduler (torch.optim._LRScheduler): an instance of torch.optim._LRScheduler type
+        warmup_iters (int | list): Warmup iterations
+        last_epoch (int): The index of last epoch.
+    """
+    def __init__(self,
+                 base_scheduler,
+                 warmup_iters,
+                 last_epoch=-1,
+                 verbose=False):
+        self.base_scheduler = base_scheduler
+        self.warmup_iters = warmup_iters
+        optimizer = self.base_scheduler.optimizer
+        self._is_init_step = True
+
+        super(BaseWarmup, self).__init__(optimizer,
+                                         last_epoch=last_epoch,
+                                         verbose=verbose)
+
+    def get_lr(self):
+        return self.base_scheduler.get_lr()
+
+    def state_dict(self):
+        return self.base_scheduler.state_dict()
+
+    def load_state_dict(self, state_dict):
+        return self.base_scheduler.load_state_dict(state_dict)
+
+    def scale(self):
+        """Scale the learning rates.
+        """
+        scale_value = self.get_warmup_scale(self.base_scheduler._step_count -
+                                            1)
+        if isinstance(scale_value, (int, float)):
+            scale_value = [
+                scale_value for _ in range(len(self.optimizer.param_groups))
+            ]
+        else:
+            assert isinstance(
+                scale_value, (list, tuple)), 'Only support list or tuple type!'
+            assert len(scale_value) == len(
+                self.optimizer.param_groups), ('Size mismatch {} != {}'.format(
+                    len(scale_value), len(self.optimizer.param_groups)))
+
+        for i, group in enumerate(self.optimizer.param_groups):
+            group['lr'] *= scale_value[i]
+
+    def step(self, *args, **kwargs):
+        """
+        When ``self.base_scheduler._step_count`` is less than ``self.warmup_iters``, multiply lr by scale
+        """
+        if self.base_scheduler._step_count > self.warmup_iters:
+            return self.base_scheduler.step(*args, **kwargs)
+
+        for group, lr in zip(self.optimizer.param_groups, self.base_lrs):
+            group['lr'] = lr
+
+        # `base_scheduler` has done step() at init when build
+        if self._is_init_step:
+            self._is_init_step = False
+        else:
+            self.base_scheduler.step(*args, **kwargs)
+
+        self.scale()
+
+    @classmethod
+    def get_warmup_scale(self, cur_iter):
+        pass
diff --git a/modelscope/trainers/lrscheduler/warmup/warmup.py b/modelscope/trainers/lrscheduler/warmup/warmup.py
new file mode 100644
index 0000000..89bed91
--- /dev/null
+++ b/modelscope/trainers/lrscheduler/warmup/warmup.py
@@ -0,0 +1,81 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from modelscope.metainfo import LR_Schedulers
+from modelscope.trainers.lrscheduler.builder import LR_SCHEDULER
+
+from .base import BaseWarmup
+
+
+@LR_SCHEDULER.register_module(module_name=LR_Schedulers.ConstantWarmup)
+class ConstantWarmup(BaseWarmup):
+    """Linear warmup scheduler.
+
+    Args:
+        base_scheduler (torch.optim._LRScheduler): an instance of torch.optim._LRScheduler type
+        warmup_ratio (float): Lr used at warmup stage equals to warmup_ratio * initial_lr
+        warmup_iters (int | list): Warmup iterations
+        last_epoch (int): The index of last epoch.
+    """
+    def __init__(self,
+                 base_scheduler,
+                 warmup_iters,
+                 warmup_ratio=0.1,
+                 last_epoch=-1):
+        self.warmup_ratio = warmup_ratio
+        super(ConstantWarmup, self).__init__(base_scheduler,
+                                             warmup_iters=warmup_iters,
+                                             last_epoch=last_epoch)
+
+    def get_warmup_scale(self, cur_iter):
+        if cur_iter >= self.warmup_iters:
+            return 1.0
+        return self.warmup_ratio
+
+
+@LR_SCHEDULER.register_module(module_name=LR_Schedulers.LinearWarmup)
+class LinearWarmup(BaseWarmup):
+    """Linear warmup scheduler.
+
+    Args:
+        base_scheduler (torch.optim._LRScheduler): an instance of torch.optim._LRScheduler type
+        warmup_iters (int | list): Warmup iterations
+        warmup_ratio (float): Lr used at the beginning of warmup equals to warmup_ratio * initial_lr
+        last_epoch (int): The index of last epoch.
+    """
+    def __init__(self,
+                 base_scheduler,
+                 warmup_iters,
+                 warmup_ratio=0.1,
+                 last_epoch=-1):
+        self.warmup_ratio = warmup_ratio
+        super(LinearWarmup, self).__init__(base_scheduler,
+                                           warmup_iters=warmup_iters,
+                                           last_epoch=last_epoch)
+
+    def get_warmup_scale(self, cur_iter):
+        k = (1 - cur_iter / self.warmup_iters) * (1 - self.warmup_ratio)
+        return 1 - k
+
+
+@LR_SCHEDULER.register_module(module_name=LR_Schedulers.ExponentialWarmup)
+class ExponentialWarmup(BaseWarmup):
+    """Exponential warmup scheduler.
+
+    Args:
+        base_scheduler (torch.optim._LRScheduler): an instance of torch.optim._LRScheduler type
+        warmup_iters (int | list): Warmup iterations
+        warmup_ratio (float): Lr used at the beginning of warmup equals to warmup_ratio * initial_lr
+        last_epoch (int): The index of last epoch.
+    """
+    def __init__(self,
+                 base_scheduler,
+                 warmup_iters,
+                 warmup_ratio=0.1,
+                 last_epoch=-1):
+        self.warmup_ratio = warmup_ratio
+        super(ExponentialWarmup, self).__init__(base_scheduler,
+                                                warmup_iters=warmup_iters,
+                                                last_epoch=last_epoch)
+
+    def get_warmup_scale(self, cur_iter):
+        k = self.warmup_ratio**(1 - cur_iter / self.warmup_iters)
+        return k
diff --git a/modelscope/trainers/optimizer/__init__.py b/modelscope/trainers/optimizer/__init__.py
new file mode 100644
index 0000000..9962c2c
--- /dev/null
+++ b/modelscope/trainers/optimizer/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .builder import OPTIMIZERS, build_optimizer
+from .child_tuning_adamw_optimizer import ChildTuningAdamW
+
+__all__ = ['OPTIMIZERS', 'build_optimizer', 'ChildTuningAdamW']
diff --git a/modelscope/trainers/optimizer/builder.py b/modelscope/trainers/optimizer/builder.py
new file mode 100644
index 0000000..acc1a51
--- /dev/null
+++ b/modelscope/trainers/optimizer/builder.py
@@ -0,0 +1,54 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import inspect
+from typing import Iterable, Union
+
+import torch
+
+from modelscope.utils.config import ConfigDict
+from modelscope.utils.registry import Registry, build_from_cfg, default_group
+
+OPTIMIZERS = Registry('optimizer')
+
+
+def build_optimizer(model: Union[torch.nn.Module,
+                                 Iterable[torch.nn.parameter.Parameter]],
+                    cfg: ConfigDict,
+                    default_args: dict = None):
+    """ build optimizer from optimizer config dict
+
+    Args:
+        model: A torch.nn.Module or an iterable of parameters.
+        cfg (:obj:`ConfigDict`): config dict for optimizer object.
+        default_args (dict, optional): Default initialization arguments.
+    """
+    if default_args is None:
+        default_args = {}
+
+    if isinstance(model, torch.nn.Module) or (hasattr(
+            model, 'module') and isinstance(model.module, torch.nn.Module)):
+        if hasattr(model, 'module'):
+            model = model.module
+
+        default_args['params'] = model.parameters()
+    else:
+        # Input is a iterable of parameters, this case fits for the scenario of user-defined parameter groups.
+        default_args['params'] = model
+
+    return build_from_cfg(cfg,
+                          OPTIMIZERS,
+                          group_key=default_group,
+                          default_args=default_args)
+
+
+def register_torch_optimizers():
+    for name, module in inspect.getmembers(torch.optim):
+        if name.startswith('__'):
+            continue
+        if inspect.isclass(module) and issubclass(module,
+                                                  torch.optim.Optimizer):
+            OPTIMIZERS.register_module(default_group,
+                                       module_name=name,
+                                       module_cls=module)
+
+
+register_torch_optimizers()
diff --git a/modelscope/trainers/optimizer/child_tuning_adamw_optimizer.py b/modelscope/trainers/optimizer/child_tuning_adamw_optimizer.py
new file mode 100644
index 0000000..652b808
--- /dev/null
+++ b/modelscope/trainers/optimizer/child_tuning_adamw_optimizer.py
@@ -0,0 +1,187 @@
+# Copyright 2021-2022 The Alibaba DAMO NLP Team Authors.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import types
+from typing import Callable, Iterable, Tuple
+
+import numpy as np
+import torch
+from torch.distributions.bernoulli import Bernoulli
+from torch.optim import Optimizer
+
+from modelscope.utils.logger import get_logger
+
+from .builder import OPTIMIZERS, default_group
+
+logger = get_logger()
+
+__all__ = ['calculate_fisher', 'ChildTuningAdamW']
+
+
+def calculate_fisher(model: torch.nn.Module,
+                     data_loader,
+                     forward_step,
+                     reserve_p,
+                     grad_clip=None):
+
+    gradient_mask = dict()
+    model.train()
+    for name, params in model.named_parameters():
+        if 'layer' in name:
+            gradient_mask[params] = params.new_zeros(params.size())
+
+    iters = len(data_loader)
+    for inputs in data_loader:
+        loss = forward_step(model, inputs)
+        loss.backward()
+        for name, params in model.named_parameters():
+            if 'layer' in name:
+                if grad_clip is not None:
+                    torch.nn.utils.clip_grad_norm_(params, **grad_clip)
+                gradient_mask[params] += (params.grad**2) / iters
+        model.zero_grad()
+
+    logger.info('Calculate Fisher Information...')
+
+    # Numpy
+    r = None
+    for k, v in gradient_mask.items():
+        v = v.view(-1).cpu().numpy()
+        if r is None:
+            r = v
+        else:
+            r = np.append(r, v)
+    polar = np.percentile(r, (1 - reserve_p) * 100)
+    for k in gradient_mask:
+        gradient_mask[k] = gradient_mask[k] >= polar
+    print('Polar => {}'.format(polar))
+
+    # TODO: pytorch: torch.kthvalue
+
+    return gradient_mask
+
+
+@OPTIMIZERS.register_module(group_key=default_group,
+                            module_name='ChildTuningAdamW')
+class ChildTuningAdamW(Optimizer):
+    def __init__(self,
+                 params: Iterable[torch.nn.parameter.Parameter],
+                 lr: float = 1e-3,
+                 betas: Tuple[float, float] = (0.9, 0.999),
+                 eps: float = 1e-6,
+                 weight_decay: float = 0.0,
+                 correct_bias: bool = True,
+                 reserve_p=1.0,
+                 mode=None):
+        if lr < 0.0:
+            raise ValueError(
+                'Invalid learning rate: {} - should be >= 0.0'.format(lr))
+        if not 0.0 <= betas[0] < 1.0:
+            raise ValueError(
+                'Invalid beta parameter: {} - should be in [0.0, 1.0['.format(
+                    betas[0]))
+        if not 0.0 <= betas[1] < 1.0:
+            raise ValueError(
+                'Invalid beta parameter: {} - should be in [0.0, 1.0['.format(
+                    betas[1]))
+        if not 0.0 <= eps:
+            raise ValueError(
+                'Invalid epsilon value: {} - should be >= 0.0'.format(eps))
+        defaults = dict(lr=lr,
+                        betas=betas,
+                        eps=eps,
+                        weight_decay=weight_decay,
+                        correct_bias=correct_bias)
+        super().__init__(params, defaults)
+
+        self.gradient_mask = None
+        self.reserve_p = reserve_p
+        self.mode = mode
+
+    def set_gradient_mask(self, gradient_mask):
+        self.gradient_mask = gradient_mask
+
+    def step(self, closure: Callable = None):
+        """
+        Performs a single optimization step.
+        Arguments:
+            closure (:obj:`Callable`, `optional`): A closure that reevaluates the model and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                grad = p.grad.data
+                if grad.is_sparse:
+                    raise RuntimeError(
+                        'Adam does not support sparse gradients, please consider SparseAdam instead'
+                    )
+
+                # ChildTuning code
+                if self.mode is not None:
+                    if self.mode == 'ChildTuning-D':
+                        if p in self.gradient_mask:
+                            grad *= self.gradient_mask[p]
+                    else:
+                        # ChildTuning-F
+                        grad_mask = Bernoulli(
+                            grad.new_full(size=grad.size(),
+                                          fill_value=self.reserve_p))
+                        grad *= grad_mask.sample() / self.reserve_p
+
+                state = self.state[p]
+
+                # State initialization
+                if len(state) == 0:
+                    state['step'] = 0
+                    # Exponential moving average of gradient values
+                    state['exp_avg'] = torch.zeros_like(p.data)
+                    # Exponential moving average of squared gradient values
+                    state['exp_avg_sq'] = torch.zeros_like(p.data)
+
+                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
+                beta1, beta2 = group['betas']
+
+                state['step'] += 1
+
+                # Decay the first and second moment running average coefficient
+                # In-place operations to update the averages at the same time
+                exp_avg.mul_(beta1).add_(grad, alpha=1.0 - beta1)
+                exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1.0 - beta2)
+                denom = exp_avg_sq.sqrt().add_(group['eps'])
+
+                step_size = group['lr']
+                if group['correct_bias']:  # No bias correction for Bert
+                    bias_correction1 = 1.0 - beta1**state['step']
+                    bias_correction2 = 1.0 - beta2**state['step']
+                    step_size = step_size * math.sqrt(
+                        bias_correction2) / bias_correction1
+
+                p.data.addcdiv_(exp_avg, denom, value=-step_size)
+
+                # Just adding the square of the weights to the loss function is *not*
+                # the correct way of using L2 regularization/weight decay with Adam,
+                # since that will interact with the m and v parameters in strange ways.
+                #
+                # Instead we want to decay the weights in a manner that doesn't interact
+                # with the m/v parameters. This is equivalent to adding the square
+                # of the weights to the loss with plain (non-momentum) SGD.
+                # Add weight decay at the end (fixed version)
+                p.data.add_(p.data, alpha=-group['lr'] * group['weight_decay'])
+
+        return loss
diff --git a/modelscope/trainers/parallel/__init__.py b/modelscope/trainers/parallel/__init__.py
new file mode 100644
index 0000000..3d71a75
--- /dev/null
+++ b/modelscope/trainers/parallel/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .builder import PARALLEL
diff --git a/modelscope/trainers/parallel/builder.py b/modelscope/trainers/parallel/builder.py
new file mode 100644
index 0000000..8a24369
--- /dev/null
+++ b/modelscope/trainers/parallel/builder.py
@@ -0,0 +1,20 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from torch.nn.parallel.distributed import DistributedDataParallel
+
+from modelscope.utils.config import ConfigDict
+from modelscope.utils.registry import Registry, build_from_cfg
+
+PARALLEL = Registry('parallel')
+PARALLEL.register_module(module_name='DistributedDataParallel',
+                         module_cls=DistributedDataParallel)
+
+
+def build_parallel(cfg: ConfigDict, default_args: dict = None):
+    """ build parallel
+
+    Args:
+        cfg (:obj:`ConfigDict`): config dict for parallel object.
+        default_args (dict, optional): Default initialization arguments.
+    """
+    return build_from_cfg(cfg, PARALLEL, default_args=default_args)
diff --git a/modelscope/trainers/parallel/utils.py b/modelscope/trainers/parallel/utils.py
new file mode 100644
index 0000000..a80b43b
--- /dev/null
+++ b/modelscope/trainers/parallel/utils.py
@@ -0,0 +1,23 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .builder import PARALLEL
+
+
+def is_parallel(module):
+    """Check if a module is wrapped by parallel object.
+
+    The following modules are regarded as parallel object:
+     - torch.nn.parallel.DataParallel
+     - torch.nn.parallel.distributed.DistributedDataParallel
+    You may add you own parallel object by registering it to `modelscope.parallel.PARALLEL`.
+
+    Args:
+        module (nn.Module): The module to be checked.
+
+    Returns:
+        bool: True if the is wrapped by parallel object.
+    """
+    module_wrappers = []
+    for group, module_dict in PARALLEL.modules.items():
+        module_wrappers.extend(list(module_dict.values()))
+
+    return isinstance(module, tuple(module_wrappers))
diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py
new file mode 100644
index 0000000..df36440
--- /dev/null
+++ b/modelscope/trainers/trainer.py
@@ -0,0 +1,1200 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import inspect
+import json
+import os
+import time
+from collections.abc import Mapping
+from distutils.version import LooseVersion
+from functools import partial
+from typing import Callable, Dict, List, Optional, Tuple, Union
+
+import torch
+from torch import distributed as dist
+from torch import nn
+from torch.utils.data import DataLoader, Dataset
+from torch.utils.data.dataloader import default_collate
+from torch.utils.data.distributed import DistributedSampler
+
+from modelscope.metainfo import Trainers
+from modelscope.metrics import build_metric, task_default_metrics
+from modelscope.metrics.prediction_saving_wrapper import \
+    PredictionSavingWrapper
+from modelscope.models.base import Model, TorchModel
+from modelscope.msdatasets.ms_dataset import MsDataset
+from modelscope.msdatasets.task_datasets.builder import build_task_dataset
+from modelscope.msdatasets.task_datasets.torch_base_dataset import \
+    TorchTaskDataset
+from modelscope.outputs import ModelOutputBase
+from modelscope.preprocessors.base import Preprocessor
+from modelscope.trainers.hooks.builder import HOOKS
+from modelscope.trainers.hooks.priority import Priority, get_priority
+from modelscope.trainers.lrscheduler.builder import build_lr_scheduler
+from modelscope.trainers.optimizer.builder import build_optimizer
+from modelscope.utils.config import Config, ConfigDict, JSONIteratorEncoder
+from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigFields,
+                                       ConfigKeys, ModeKeys, ModelFile,
+                                       TrainerStages)
+from modelscope.utils.data_utils import to_device
+from modelscope.utils.device import create_device
+from modelscope.utils.file_utils import func_receive_dict_inputs
+from modelscope.utils.logger import get_logger
+from modelscope.utils.registry import build_from_cfg
+from modelscope.utils.torch_utils import (broadcast, get_dist_info,
+                                          get_local_rank, init_dist, is_dist,
+                                          is_master, set_random_seed)
+
+from .base import BaseTrainer
+from .builder import TRAINERS
+from .default_config import merge_cfg, merge_hooks
+from .hooks.hook import Hook
+from .parallel.builder import build_parallel
+from .parallel.utils import is_parallel
+
+
+@TRAINERS.register_module(module_name=Trainers.default)
+class EpochBasedTrainer(BaseTrainer):
+    """Epoch based Trainer, a training helper for PyTorch.
+
+    Args:
+        cfg_file(str): The local config file.
+        model (:obj:`torch.nn.Module` or :obj:`TorchModel` or `str`): The model to be run, or a valid model dir
+            or a model id. If model is None, build_model method will be called.
+        data_collator (`Callable`, *optional*):
+            The function to use to form a batch from a list of elements of `train_dataset` or `eval_dataset`.
+        train_dataset (`MsDataset` or `torch.utils.data.Dataset`, *optional*):
+            The dataset to use for training.
+
+            Note that if it's a `torch.utils.data.IterableDataset` with some randomization and you are training in a
+            distributed fashion, your iterable dataset should either use a internal attribute `generator` that is a
+            `torch.Generator` for the randomization that must be identical on all processes (and the Trainer will
+            manually set the seed of this `generator` at each epoch) or have a `set_epoch()` method that internally
+            sets the seed of the RNGs used.
+        eval_dataset (`MsDataset` or `torch.utils.data.Dataset`, *optional*): The dataset to use for evaluation.
+        preprocessor (:obj:`Preprocessor`, *optional*): The optional preprocessor.
+            NOTE: If the preprocessor has been called before the dataset fed into this trainer by user's custom code,
+            this parameter should be None, meanwhile remove the 'preprocessor' key from the cfg_file.
+            Else the preprocessor will be instantiated from the cfg_file or assigned from this parameter and
+            this preprocessing action will be executed every time the dataset's __getitem__ is called.
+        optimizers (`Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler._LRScheduler]`, *optional*): A tuple
+            containing the optimizer and the scheduler to use.
+        seed (int): The optional random seed for torch, cuda, numpy and random.
+        max_epochs: (int, optional): Total training epochs.
+        cfg_modify_fn: An input fn which is used to modify the cfg read out of the file.
+        remove_unused_data: Automatically remove unused data keys in mini-batches.
+            The remove action based on the `inspect` on the model's forward method, the removed columns will be
+            moved to the mini-batch's attributes.
+
+        Examples of cfg_modify_fn:
+            >>> def cfg_modify_fn(cfg):
+            >>>     cfg.preprocessor.first_sequence= 'text1'
+            >>>     cfg.preprocessor.second_sequence='text2'
+            >>>     return cfg
+    """
+    def __init__(
+            self,
+            model: Optional[Union[TorchModel, nn.Module, str]] = None,
+            cfg_file: Optional[str] = None,
+            cfg_modify_fn: Optional[Callable] = None,
+            arg_parse_fn: Optional[Callable] = None,
+            data_collator: Optional[Union[Callable, Dict[str,
+                                                         Callable]]] = None,
+            train_dataset: Optional[Union[MsDataset, Dataset]] = None,
+            eval_dataset: Optional[Union[MsDataset, Dataset]] = None,
+            preprocessor: Optional[Union[Preprocessor,
+                                         Dict[str, Preprocessor]]] = None,
+            optimizers: Tuple[torch.optim.Optimizer,
+                              torch.optim.lr_scheduler._LRScheduler] = (None,
+                                                                        None),
+            model_revision: Optional[str] = DEFAULT_MODEL_REVISION,
+            seed: int = 42,
+            **kwargs):
+
+        self._seed = seed
+        set_random_seed(self._seed)
+        self._metric_values = None
+        self.optimizers = optimizers
+        self._mode = ModeKeys.TRAIN
+        self._hooks: List[Hook] = []
+        self._epoch = 0
+        self._iter = 0
+        self._inner_iter = 0
+        self._stop_training = False
+
+        if isinstance(model, str):
+            self.model_dir = self.get_or_download_model_dir(
+                model, model_revision)
+            if cfg_file is None:
+                cfg_file = os.path.join(self.model_dir,
+                                        ModelFile.CONFIGURATION)
+        else:
+            assert cfg_file is not None, 'Config file should not be None if model is not from pretrained!'
+            self.model_dir = os.path.dirname(cfg_file)
+
+        super().__init__(cfg_file, arg_parse_fn)
+        self.cfg_modify_fn = cfg_modify_fn
+        # add default config
+        merge_cfg(self.cfg)
+        self.cfg = self.rebuild_config(self.cfg)
+        self.logger = get_logger(log_level=self.cfg.get('log_level', 'INFO'))
+        self.logger.info(
+            '==========================Training Config Start=========================='
+        )
+        self.logger.info(
+            json.dumps(self.cfg._cfg_dict, indent=4, cls=JSONIteratorEncoder))
+        self.logger.info(
+            '===========================Training Config End==========================='
+        )
+        if 'cfg_options' in kwargs:
+            self.cfg.merge_from_dict(kwargs['cfg_options'])
+
+        if isinstance(model, (TorchModel, nn.Module)):
+            self.model = model
+        else:
+            self.model = self.build_model()
+
+        if 'work_dir' in kwargs:
+            self.work_dir = kwargs['work_dir']
+        else:
+            self.work_dir = self.cfg.train.get('work_dir', './work_dir')
+
+        self.train_preprocessor, self.eval_preprocessor = self.get_preprocessors(
+            preprocessor)
+
+        self._dist = self.init_dist(kwargs.get('launcher'))
+
+        if is_master() and not os.path.exists(self.work_dir):
+            os.makedirs(self.work_dir)
+
+        self.device = self.get_device(kwargs.get('device'))
+
+        # init logger after distribution init
+        log_file = os.path.join(self.work_dir, '{}.log'.format(self.timestamp))
+        self.logger = get_logger(log_file=log_file,
+                                 log_level=self.cfg.get('log_level', 'INFO'))
+
+        self.train_dataset = self.to_task_dataset(
+            train_dataset,
+            mode=ModeKeys.TRAIN,
+            task_data_config=self.cfg.safe_get('dataset.train'),
+            preprocessor=self.train_preprocessor,
+            **kwargs)
+        self.eval_dataset = self.to_task_dataset(
+            eval_dataset,
+            mode=ModeKeys.EVAL,
+            task_data_config=self.cfg.safe_get('dataset.val'),
+            preprocessor=self.eval_preprocessor,
+            **kwargs)
+
+        self.train_data_collator, self.eval_data_collator = self.get_data_collator(
+            data_collator,
+            remove_unused_data=kwargs.get('remove_unused_data', False))
+        self.metrics = self.get_metrics()
+        self._max_epochs = kwargs.get('max_epochs',
+                                      self.cfg.safe_get('train.max_epochs'))
+        assert self._max_epochs is not None, 'max_epochs should be provided by the init arguments or configured ' \
+                                             'in the `train.max_epochs` key in the configuration file.'
+        self._train_iters_per_epoch = kwargs.get(
+            'train_iters_per_epoch',
+            self.cfg.safe_get('train.train_iters_per_epoch'))
+        self._eval_iters_per_epoch = kwargs.get(
+            'val_iters_per_epoch',
+            self.cfg.safe_get('evaluation.val_iters_per_epoch'))
+        self.use_fp16 = kwargs.get('use_fp16', False)
+        # model placement
+        self.place_model()
+
+    def place_model(self):
+        """Place model to device, or to DDP
+        """
+        if self.device.type == 'cuda':
+            self.model.to(self.device)
+            if not is_parallel(self.model) and self._dist:
+                self.model = self.to_parallel(self.model)
+
+    def get_data_collator(self, data_collator, remove_unused_data=False):
+        """Get the data collator for both training and evaluating.
+
+        Args:
+            data_collator: The input data_collator param.
+            remove_unused_data: Remove the unused data with 'RemoveColumnsCollator'.
+        Returns:
+            The train_data_collator and eval_data_collator, can be None.
+        """
+
+        train_data_collator, eval_data_collator = None, None
+        if isinstance(data_collator, Mapping):
+            if ConfigKeys.train in data_collator:
+                assert isinstance(data_collator[ConfigKeys.train], Callable)
+                train_data_collator = data_collator[ConfigKeys.train]
+            if ConfigKeys.val in data_collator:
+                assert isinstance(data_collator[ConfigKeys.val], Callable)
+                eval_data_collator = data_collator[ConfigKeys.val]
+        else:
+            collate_fn = default_collate if data_collator is None else data_collator
+            train_data_collator = collate_fn
+            eval_data_collator = collate_fn
+
+        if remove_unused_data:
+            from modelscope.utils.data_collators import RemoveColumnsCollator
+
+            def _set_signature_columns_if_needed():
+                signature = inspect.signature(self.model.forward)
+                return list(signature.parameters.keys())
+
+            model_inputs = _set_signature_columns_if_needed()
+            train_data_collator = RemoveColumnsCollator(
+                train_data_collator, model_inputs)
+            eval_data_collator = RemoveColumnsCollator(eval_data_collator,
+                                                       model_inputs)
+        return train_data_collator, eval_data_collator
+
+    def init_dist(self, launcher=None):
+        """Init dist and returns the dist information.
+
+        Args:
+            launcher: The launcher info.
+
+        Returns:
+            _dist: If world_size is greater than 1.
+        """
+        if launcher is not None:
+            init_dist(launcher)
+
+        _, world_size = get_dist_info()
+        _dist = world_size > 1
+        return _dist
+
+    def get_device(self, device=None):
+        """Get the device information.
+
+        Args:
+            device: The input device info.
+
+        Returns:
+            device_name: The final device name.
+        """
+        device_name = device if device is not None else 'gpu'
+        if is_dist():
+            local_rank = get_local_rank()
+            device_name = f'cuda:{local_rank}'
+
+        return create_device(device_name)
+
+    def get_preprocessors(self, preprocessor):
+        """Get the preprocessors information.
+
+        Args:
+            preprocessor: The input preprocessor info.
+
+        Returns:
+            The train_preprocessor and eval_preprocessor, can be None.
+        """
+        train_preprocessor = None
+        eval_preprocessor = None
+        if isinstance(preprocessor, Preprocessor):
+            train_preprocessor = preprocessor
+            eval_preprocessor = preprocessor
+        elif isinstance(preprocessor, Mapping):
+            if ConfigKeys.train in preprocessor:
+                assert isinstance(preprocessor[ConfigKeys.train], Callable)
+                train_preprocessor = preprocessor[ConfigKeys.train]
+            if ConfigKeys.val in preprocessor:
+                assert isinstance(preprocessor[ConfigKeys.val], Callable)
+                eval_preprocessor = preprocessor[ConfigKeys.val]
+        elif hasattr(self.cfg, ConfigFields.preprocessor
+                     ) and self.cfg.preprocessor is not None:
+            train_preprocessor, eval_preprocessor = self.build_preprocessor()
+
+        if train_preprocessor is not None:
+            train_preprocessor.mode = ModeKeys.TRAIN
+        if eval_preprocessor is not None:
+            eval_preprocessor.mode = ModeKeys.EVAL
+        return train_preprocessor, eval_preprocessor
+
+    def rebuild_config(self, cfg: Config):
+        """A method used to rebuild the config, any subclass can override this method.
+
+        Returns: The rebuilt config
+
+        """
+        if hasattr(self, 'cfg_modify_fn') and self.cfg_modify_fn is not None:
+            cfg = self.cfg_modify_fn(cfg)
+        return cfg
+
+    @property
+    def mode(self):
+        return self._mode
+
+    @property
+    def hooks(self) -> List[Hook]:
+        """list[:obj:`Hook`]: A list of registered hooks."""
+        return self._hooks
+
+    @property
+    def epoch(self) -> int:
+        """int: Current epoch."""
+        return self._epoch
+
+    @property
+    def iter(self) -> int:
+        """int: Current iteration."""
+        return self._iter
+
+    @property
+    def inner_iter(self) -> int:
+        """int: Iteration in an epoch."""
+        return self._inner_iter
+
+    @property
+    def max_epochs(self):
+        """int: Maximum training epochs."""
+        return self._max_epochs
+
+    @property
+    def max_iters(self):
+        """int: Maximum training iterations."""
+        return self._max_epochs * self.iters_per_epoch
+
+    @property
+    def iters_per_epoch(self):
+        """int: Total iterations of one epoch"""
+        def _get_data_len(data_loader):
+            try:
+                return len(data_loader)
+            except Exception as e:
+                self.logger.error(e)
+                raise ValueError(
+                    'Please implement ``__len__`` method for your dataset, '
+                    'or add `train_iters_per_epoch` and `train_iters_per_epoch` '
+                    'to your configuration file or kwargs')
+
+        if self.mode == ModeKeys.TRAIN:
+            if self._train_iters_per_epoch is not None:
+                return self._train_iters_per_epoch
+            else:
+                return _get_data_len(self.train_dataloader)
+        elif self.mode == ModeKeys.EVAL:
+            if self._eval_iters_per_epoch is not None:
+                return self._eval_iters_per_epoch
+            else:
+                return _get_data_len(self.eval_dataloader)
+
+    def to_task_dataset(self,
+                        datasets: Union[Dataset, List[Dataset]],
+                        mode: str,
+                        task_data_config: Config = None,
+                        preprocessor: Optional[Preprocessor] = None,
+                        **kwargs):
+        """Build the task specific dataset processor for this trainer.
+
+        Returns: The task dataset processor for the task. If no result for the very model-type and task,
+        the default TaskDataset will be returned.
+        """
+        try:
+            to_tensor = kwargs.get('to_tensor', True)
+            if not datasets:
+                return datasets
+            if isinstance(datasets, TorchTaskDataset):
+                return datasets
+            elif isinstance(datasets, MsDataset):
+                if task_data_config is None:
+                    # adapt to some special models
+                    task_data_config = ConfigDict(
+                        type=self.cfg.model.type) if hasattr(
+                            self.cfg, ConfigFields.model) else ConfigDict(
+                                type=None)
+                task_data_config.update(dict(mode=mode))
+                return datasets.to_torch_dataset(
+                    task_data_config=task_data_config,
+                    task_name=self.cfg.task,
+                    preprocessors=preprocessor,
+                    to_tensor=to_tensor)
+            elif isinstance(datasets, List) and isinstance(
+                    datasets[0], MsDataset):
+                if task_data_config is None:
+                    # adapt to some special models
+                    task_data_config = ConfigDict(
+                        type=self.cfg.model.type) if hasattr(
+                            self.cfg, ConfigFields.model) else ConfigDict(
+                                type=None)
+                task_data_config.update(dict(mode=mode))
+                datasets = [
+                    d.to_torch_dataset(task_data_config=task_data_config,
+                                       task_name=self.cfg.task,
+                                       preprocessors=preprocessor,
+                                       to_tensor=to_tensor) for d in datasets
+                ]
+                cfg = ConfigDict(type=self.cfg.model.type,
+                                 mode=mode,
+                                 datasets=datasets)
+                task_dataset = build_task_dataset(cfg, self.cfg.task)
+                task_dataset.trainer = self
+                return task_dataset
+            else:
+                if task_data_config is None:
+                    # adapt to some special models
+                    task_data_config = {}
+                # avoid add no str value datasets, preprocessors in cfg
+                task_data_build_config = ConfigDict(type=self.cfg.model.type,
+                                                    mode=mode,
+                                                    datasets=datasets,
+                                                    preprocessor=preprocessor)
+                task_data_build_config.update(task_data_config)
+                task_dataset = build_task_dataset(task_data_build_config,
+                                                  self.cfg.task)
+                task_dataset.trainer = self
+                return task_dataset
+        except Exception:
+            if isinstance(datasets, (List, Tuple)) or preprocessor is not None:
+                task_dataset = TorchTaskDataset(
+                    datasets,
+                    mode=mode,
+                    preprocessor=preprocessor,
+                    **(dict(type=self.cfg.model.type) if hasattr(
+                        self.cfg, 'model') else {}))
+                task_dataset.trainer = self
+                return task_dataset
+            else:
+                return datasets
+
+    def build_preprocessor(self) -> Tuple[Preprocessor, Preprocessor]:
+        """Build train and eval preprocessor.
+
+        User can override this method to implement custom logits.
+
+        Returns: The train preprocessor and eval preprocessor instance.
+
+        """
+        train_preprocessor = Preprocessor.from_pretrained(
+            self.model_dir,
+            cfg_dict=self.cfg,
+            preprocessor_mode=ModeKeys.TRAIN)
+        eval_preprocessor = Preprocessor.from_pretrained(
+            self.model_dir, cfg_dict=self.cfg, preprocessor_mode=ModeKeys.EVAL)
+        return train_preprocessor, eval_preprocessor
+
+    def get_metrics(self) -> List[Union[str, Dict]]:
+        """Get the metric class types.
+
+        The first choice will be the metrics configured in the config file, if not found, the default metrics will be
+        used.
+        If no metrics is found and the eval dataset exists, the method will raise an error.
+
+        Returns: The metric types.
+
+        """
+        metrics = self.cfg.evaluation.metrics if hasattr(
+            self.cfg, 'evaluation') and hasattr(self.cfg.evaluation,
+                                                'metrics') else None
+        metrics = metrics if metrics is not None else task_default_metrics.get(
+            self.cfg.task)
+        if metrics is None and self.eval_dataset is not None:
+            raise ValueError(
+                f'Metrics are needed in evaluation, please try to either '
+                f'add metrics in configuration.json or add the default metric for {self.cfg.task}.'
+            )
+        if isinstance(metrics, (str, Mapping)):
+            metrics = [metrics]
+        return metrics
+
+    def set_checkpoint_file_to_hook(self, checkpoint_path, load_all_state):
+        if checkpoint_path is not None:
+            if os.path.isfile(checkpoint_path):
+                from modelscope.trainers.hooks import LoadCheckpointHook
+                load_ckpt_hooks = list(
+                    filter(lambda hook: isinstance(hook, LoadCheckpointHook),
+                           self.hooks))
+                if len(load_ckpt_hooks) == 0:
+                    load_ckpt_hook = LoadCheckpointHook()
+                    self.hooks.append(load_ckpt_hook)
+                    load_ckpt_hooks.append(load_ckpt_hook)
+                load_ckpt_hooks[0].checkpoint_file = checkpoint_path
+                load_ckpt_hooks[0].load_all_state = load_all_state
+            else:
+                self.logger.error(
+                    f'No {checkpoint_path} found in local file system.')
+
+    def train(self,
+              checkpoint_path=None,
+              load_all_state=True,
+              *args,
+              **kwargs):
+        """Start training.
+
+        Args:
+            checkpoint_path(`str`, `optional`): The previous saving checkpoint to read,
+                usually it's a `some-file-name.pth` file generated by this trainer.
+            load_all_state(`bool`: `optional`): Load all state out of the `checkpoint_path` file, including the
+                state dict of model, optimizer, lr_scheduler, the random state and epoch/iter number. If False, only
+                the model's state dict will be read, and model will be trained again.
+        """
+
+        self._mode = ModeKeys.TRAIN
+        self.train_dataloader = self.get_train_dataloader()
+        self.data_loader = self.train_dataloader
+        self.register_optimizers_hook()
+        hooks = merge_hooks(self.cfg)
+        self.register_hook_from_cfg(hooks)
+        self.set_checkpoint_file_to_hook(checkpoint_path, load_all_state)
+        self.model.train()
+
+        self.train_loop(self.train_dataloader)
+
+    def predict(self,
+                predict_datasets: Union[Dataset, List[Dataset]],
+                saving_fn,
+                checkpoint_path=None):
+        """Start prediction.
+
+        Args:
+            predict_datasets(Union[Dataset, List[Dataset]]): The datasets used to predict ground truth.
+
+            saving_fn(`Callable`): The callable used to save the prediction values to files. Like:
+                >>> class SavingFn:
+                >>>     def __init__(self):
+                >>>         self.filename = '/tmp/results.txt'
+                >>>
+                >>>     def __call__(self, inputs, outputs):
+                >>>         import numpy as np
+                >>>         ids = inputs.ids
+                >>>         predictions = np.argmax(outputs['logits'].cpu().numpy(), axis=1)
+                >>>         with open(self.filename, 'a') as f:
+                >>>             for id, pred in zip(ids, predictions):
+                >>>                 f.writelines(f'{id}, {pred}')
+
+                This saving_fn's result will not be collected to one file, Training with multiprocessing please
+                consider combining these files manually.
+
+            checkpoint_path(`str`, `optional`): The previous saving checkpoint to read,
+                usually it's a `some-file-name.pth` file or a pure PyTorch `some-file.bin` file
+                generated by this trainer.
+        """
+
+        if checkpoint_path is not None and os.path.isfile(checkpoint_path):
+            from modelscope.trainers.hooks import LoadCheckpointHook
+            LoadCheckpointHook.load_checkpoint(checkpoint_path, self)
+        self.model.eval()
+        self._mode = ModeKeys.EVAL
+        predict_dataloader = self.get_predict_data_loader(predict_datasets)
+        metric_classes = [PredictionSavingWrapper(saving_fn=saving_fn)]
+
+        for m in metric_classes:
+            m.trainer = self
+
+        self.evaluation_loop(predict_dataloader, metric_classes)
+
+    def evaluate(self, checkpoint_path=None, saving_fn=None, **kwargs):
+        """Start evaluation.
+
+        Args:
+            checkpoint_path(`str`, `optional`): The previous saving checkpoint to read,
+                usually it's a `some-file-name.pth` file or a pure PyTorch `some-file.bin` file
+                generated by this trainer.
+
+            saving_fn(`Callable`): The callable used to save the prediction values to files. Like:
+                >>> class SavingFn:
+                >>>     def __init__(self):
+                >>>         self.filename = '/tmp/results.txt'
+                >>>
+                >>>     def __call__(self, inputs, outputs):
+                >>>         import numpy as np
+                >>>         ids = inputs.ids
+                >>>         predictions = np.argmax(outputs['logits'].cpu().numpy(), axis=1)
+                >>>         with open(self.filename, 'a') as f:
+                >>>             for id, pred in zip(ids, predictions):
+                >>>                 f.writelines(f'{id}, {pred}')
+        """
+        if checkpoint_path is not None and os.path.isfile(checkpoint_path):
+            from modelscope.trainers.hooks import LoadCheckpointHook
+            LoadCheckpointHook.load_checkpoint(checkpoint_path, self)
+        self.model.eval()
+        self._mode = ModeKeys.EVAL
+        self.eval_dataloader = self.get_eval_data_loader()
+        self.data_loader = self.eval_dataloader
+        metric_classes = [build_metric(metric) for metric in self.metrics]
+        if saving_fn is not None:
+            metric_classes.append(PredictionSavingWrapper(saving_fn=saving_fn))
+        for m in metric_classes:
+            m.trainer = self
+
+        metric_values = self.evaluation_loop(self.eval_dataloader,
+                                             metric_classes)
+
+        self._metric_values = metric_values
+        return metric_values
+
+    @property
+    def metric_values(self):
+        return self._metric_values
+
+    def build_model(self) -> Union[nn.Module, TorchModel]:
+        """ Instantiate a pytorch model and return.
+
+        By default, we will create a model using config from configuration file. You can
+        override this method in a subclass.
+
+        """
+        model = Model.from_pretrained(self.model_dir, cfg_dict=self.cfg)
+        if not isinstance(model, nn.Module) and hasattr(model, 'model'):
+            return model.model
+        elif isinstance(model, nn.Module):
+            return model
+
+    def to_parallel(self, model) -> Union[nn.Module, TorchModel]:
+        # config format to reserve custom ddp
+        if self.cfg.get('parallel', None) is not None:
+            self.cfg.parallel.update(
+                dict(module=model, device_ids=[torch.cuda.current_device()]))
+            return build_parallel(self.cfg.parallel)
+
+        dp_cfg = dict(type='DistributedDataParallel',
+                      module=model,
+                      find_unused_parameters=True,
+                      device_ids=[torch.cuda.current_device()])
+
+        return build_parallel(dp_cfg)
+
+    def train_step(self, model, inputs):
+        """ Perform a training step on a batch of inputs.
+
+        Subclass and override to inject custom behavior.
+
+        Args:
+            model (`TorchModel`): The model to train.
+            inputs (`Dict[str, Union[torch.Tensor, Any]]`):
+                The inputs and targets of the model.
+
+                The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
+                argument `labels`. Check your model's documentation for all accepted arguments.
+
+        Return:
+            `torch.Tensor`: The tensor with training loss on this batch.
+        """
+        # EvaluationHook will do evaluate and change mode to val, return to train mode
+        # TODO: find more pretty way to change mode
+        model.train()
+        self._mode = ModeKeys.TRAIN
+        # call model forward but not __call__ to skip postprocess
+
+        if is_parallel(model):
+            receive_dict_inputs = func_receive_dict_inputs(
+                model.module.forward)
+        else:
+            receive_dict_inputs = func_receive_dict_inputs(model.forward)
+
+        if isinstance(inputs, Mapping) and not receive_dict_inputs:
+            train_outputs = model.forward(**inputs)
+        else:
+            train_outputs = model.forward(inputs)
+
+        if isinstance(train_outputs, ModelOutputBase):
+            train_outputs = train_outputs.to_dict()
+        if not isinstance(train_outputs, dict):
+            raise TypeError('"model.forward()" must return a dict')
+
+        # add model output info to log
+        if 'log_vars' not in train_outputs:
+            default_keys_pattern = ['loss']
+            match_keys = set([])
+            for key_p in default_keys_pattern:
+                match_keys.update(
+                    [key for key in train_outputs.keys() if key_p in key])
+
+            log_vars = {}
+            for key in match_keys:
+                value = train_outputs.get(key, None)
+                if value is not None:
+                    if is_dist():
+                        value = value.data.clone().to('cuda')
+                        dist.all_reduce(value.div_(dist.get_world_size()))
+                    log_vars.update({key: value.item()})
+            self.log_buffer.update(log_vars)
+        else:
+            self.log_buffer.update(train_outputs['log_vars'])
+
+        self.train_outputs = train_outputs
+
+    def prediction_step(self, model, inputs):
+        """ Perform forward step by `model` using `inputs`.
+
+        Args:
+            model (`TorchModel`): The model to evaluate.
+            inputs (`Dict[str, Union[torch.Tensor, Any]]`):
+                The inputs and targets of the model.
+
+                The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
+                argument `labels`. Check your model's documentation for all accepted arguments.
+            prediction_loss_only (`bool`):
+                Whether or not to return the loss only.
+            ignore_keys (`Lst[str]`, *optional*):
+                A list of keys in the output of your model (if it is a dictionary) that should be ignored when
+                gathering predictions.
+
+        Return:
+            Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss,
+            logits and labels (each being optional).
+        """
+        raise NotImplementedError
+
+    def get_train_dataloader(self):
+        """ Builder torch dataloader for training.
+
+        We provide a reasonable default that works well. If you want to use something else, you can change
+        the config for data.train in configuration file, or subclass and override this method
+        (or `get_train_dataloader` in a subclass.
+        """
+        if self.train_dataset is None:
+            train_data = self.cfg.dataset.train
+            self.train_dataset = self.build_dataset(
+                train_data,
+                mode=ModeKeys.TRAIN,
+                preprocessor=self.train_preprocessor)
+
+        data_loader = self._build_dataloader_with_dataset(
+            self.train_dataset,
+            dist=self._dist,
+            seed=self._seed,
+            collate_fn=self.train_data_collator,
+            **self.cfg.train.get('dataloader', {}))
+        return data_loader
+
+    def get_eval_data_loader(self):
+        """ Builder torch dataloader for evaluation.
+
+        We provide a reasonable default that works well. If you want to use something else, you can change
+        the config for dataset.eval in configuration file, or subclass and override this method in a subclass.
+        pass
+        """
+        if self.eval_dataset is None:
+            val_data = self.cfg.dataset.val
+            self.eval_dataset = self.build_dataset(
+                val_data,
+                mode=ModeKeys.EVAL,
+                preprocessor=self.eval_preprocessor)
+
+        default_config = {'shuffle': False}
+        default_config.update(self.cfg.evaluation.get('dataloader', {}))
+        data_loader = self._build_dataloader_with_dataset(
+            self.eval_dataset,
+            dist=self._dist,
+            seed=self._seed,
+            collate_fn=self.eval_data_collator,
+            **default_config)
+        return data_loader
+
+    def get_predict_data_loader(self, predict_datasets: Union[Dataset,
+                                                              List[Dataset]]):
+        """ Builder torch dataloader for prediction with the config of evaluation.
+
+        Args:
+            predict_datasets(Union[Dataset, List[Dataset]]): The datasets used to predict ground truth.
+        """
+        dataset = self.to_task_dataset(predict_datasets,
+                                       mode=ModeKeys.EVAL,
+                                       preprocessor=self.eval_preprocessor)
+
+        default_config = {'shuffle': False}
+        default_config.update(self.cfg.evaluation.get('dataloader', {}))
+        data_loader = self._build_dataloader_with_dataset(
+            dataset,
+            dist=self._dist,
+            seed=self._seed,
+            collate_fn=self.eval_data_collator,
+            **default_config)
+        return data_loader
+
+    def build_dataset(self, data_cfg, mode, preprocessor=None):
+        """ Build torch dataset object using data config
+        """
+        # TODO: support MsDataset load for cv
+        if hasattr(data_cfg, 'name'):
+            dataset_name = data_cfg.pop('name')
+            dataset = MsDataset.load(
+                dataset_name=dataset_name,
+                **data_cfg,
+            )
+            cfg = ConfigDict(type=self.cfg.model.type, mode=mode)
+            torch_dataset = dataset.to_torch_dataset(
+                task_data_config=cfg,
+                task_name=self.cfg.task,
+                preprocessors=preprocessor)
+        else:
+            torch_dataset = build_task_dataset(data_cfg, self.cfg.task)
+        dataset = self.to_task_dataset(torch_dataset, mode)
+        return dataset
+
+    def build_optimizer(self, cfg: ConfigDict, default_args: dict = None):
+        try:
+            return build_optimizer(self.model,
+                                   cfg=cfg,
+                                   default_args=default_args)
+        except KeyError as e:
+            self.logger.error(
+                f'Build optimizer error, the optimizer {cfg} is a torch native component, '
+                f'please check if your torch with version: {torch.__version__} matches the config.'
+            )
+            raise e
+
+    def build_lr_scheduler(self, cfg: ConfigDict, default_args: dict = None):
+        try:
+            return build_lr_scheduler(cfg=cfg, default_args=default_args)
+        except KeyError as e:
+            self.logger.error(
+                f'Build lr_scheduler error, the lr_scheduler {cfg} is a torch native component, '
+                f'please check if your torch with version: {torch.__version__} matches the config.'
+            )
+            raise e
+
+    def create_optimizer_and_scheduler(self):
+        """ Create optimizer and lr scheduler
+
+        We provide a default implementation, if you want to customize your own optimizer
+        and lr scheduler, you can either pass a tuple through trainer init function or
+        subclass this class and override this method.
+        """
+        optimizer, lr_scheduler = self.optimizers
+        if optimizer is None:
+            optimizer_cfg = self.cfg.train.get('optimizer', None)
+        else:
+            optimizer_cfg = None
+
+        optim_options = {}
+        if optimizer_cfg is not None:
+            optim_options = optimizer_cfg.pop('options', {})
+            optimizer = self.build_optimizer(cfg=optimizer_cfg)
+
+        if lr_scheduler is None:
+            lr_scheduler_cfg = self.cfg.train.get('lr_scheduler', None)
+        else:
+            lr_scheduler_cfg = None
+
+        lr_options = {}
+        if lr_scheduler_cfg is not None:
+            assert optimizer is not None
+            lr_options = lr_scheduler_cfg.pop('options', {})
+            lr_scheduler = self.build_lr_scheduler(
+                cfg=lr_scheduler_cfg, default_args={'optimizer': optimizer})
+
+        self.optimizer = optimizer
+        self.lr_scheduler = lr_scheduler
+        return self.optimizer, self.lr_scheduler, optim_options, lr_options
+
+    def register_optimizers_hook(self):
+        """ Register optimizer hook and lr scheduler hook.
+        """
+        _, lr_scheduler, optim_options, lr_options = self.create_optimizer_and_scheduler(
+        )
+
+        optim_hook = self.cfg.train.get('optimizer_hook', None)
+        lr_hook = self.cfg.train.get('lr_scheduler_hook', None)
+
+        # adapt to `ReduceLROnPlateau`
+        from torch.optim.lr_scheduler import ReduceLROnPlateau
+        if isinstance(lr_scheduler, ReduceLROnPlateau) and lr_hook is None:
+            plateau_cfg = {
+                'train': {
+                    'lr_scheduler_hook': {
+                        'type': 'PlateauLrSchedulerHook',
+                        'metric_key':
+                        'Metric Key used for PlateauLrSchedulerHook'
+                    }
+                }
+            }
+            plateau_cfg = json.dumps(plateau_cfg,
+                                     sort_keys=False,
+                                     indent=4,
+                                     separators=(',', ':'))
+            raise ValueError(
+                'Must add `lr_scheduler_hook` to configuration for `ReduceLROnPlateau` lr scheduler as follows:'
+                + '\n' + plateau_cfg)
+
+        if lr_hook is None:
+            lr_hook = dict(type='LrSchedulerHook', **lr_options)
+        if optim_hook is None:
+            if self.use_fp16:
+                optim_hook = dict(type='TorchAMPOptimizerHook',
+                                  **optim_options)
+            else:
+                optim_hook = dict(type='OptimizerHook', **optim_options)
+
+        self.register_hook_from_cfg([lr_hook, optim_hook])
+
+    def _build_dataloader_with_dataset(self,
+                                       dataset: Dataset,
+                                       batch_size_per_gpu: int,
+                                       workers_per_gpu: int,
+                                       dist: bool = False,
+                                       shuffle: bool = True,
+                                       seed: int = 0,
+                                       persistent_workers=False,
+                                       **kwargs) -> DataLoader:
+        """Build dataloader using input dataset and cfg. Used by `EpochBasedTrainer.train()`
+        and `EpochBasedTrainer.evaluate()`.
+
+        In distributed training, each GPU/process has a dataloader.
+        In non-distributed training, there is only one dataloader for all GPUs.
+
+        Args:
+            dataset (Dataset): A PyTorch dataset.
+            batch_size_per_gpu (int): Number of training samples on each GPU, i.e.,
+                batch size of each GPU.
+            workers_per_gpu (int): How many subprocesses to use for data loading
+                for each GPU.
+            dist (bool): Distributed training/test or not. Default: True.
+            shuffle (bool): Whether to shuffle the data at every epoch.
+                Default: True.
+            seed (int, Optional): Seed to be used. Default: 0.
+            runner_type (str): Type of runner. Default: `EpochBasedRunner`
+            persistent_workers (bool): If True, the data loader will not shutdown
+                the worker processes after a dataset has been consumed once.
+                This allows to maintain the workers `Dataset` instances alive.
+                This argument is only valid when PyTorch>=1.7.0. Default: False.
+            kwargs: any keyword argument to be used to initialize DataLoader
+
+        Returns:
+            DataLoader: A PyTorch dataloader.
+        """
+        rank, world_size = get_dist_info()
+
+        if dist:
+            # When model is :obj:`DistributedDataParallel`,
+            # `batch_size` of :obj:`dataloader` is the
+            # number of training samples on each GPU.
+            batch_size = batch_size_per_gpu
+            num_workers = workers_per_gpu
+        else:
+            batch_size = batch_size_per_gpu
+            num_workers = workers_per_gpu
+
+        if dist and not isinstance(
+                dataset,
+                torch.utils.data.IterableDataset) and self.cfg.model.get(
+                    'model_parallel_size', 1) == 1:
+            sampler = DistributedSampler(dataset,
+                                         num_replicas=world_size,
+                                         rank=rank,
+                                         shuffle=shuffle)
+        else:
+            sampler = None
+            if not isinstance(dataset, torch.utils.data.IterableDataset):
+                kwargs['shuffle'] = shuffle
+
+        batch_sampler = None
+
+        init_fn = partial(
+            worker_init_fn, num_workers=num_workers, rank=rank,
+            seed=seed) if seed is not None else None
+
+        if LooseVersion(torch.__version__) >= LooseVersion('1.7.0'):
+            kwargs['persistent_workers'] = persistent_workers
+        elif persistent_workers is True:
+            self.logger.warning(
+                'persistent_workers is invalid because your pytorch '
+                'version is lower than 1.7.0')
+
+        data_loader = DataLoader(dataset,
+                                 batch_size=batch_size,
+                                 sampler=sampler,
+                                 num_workers=num_workers,
+                                 batch_sampler=batch_sampler,
+                                 pin_memory=kwargs.pop('pin_memory', False),
+                                 worker_init_fn=init_fn,
+                                 **kwargs)
+
+        return data_loader
+
+    def train_loop(self, data_loader):
+        """ Training loop used by `EpochBasedTrainer.train()`
+        """
+        self.invoke_hook(TrainerStages.before_run)
+        kwargs = {}
+        self.model.train()
+        for _ in range(self._epoch, self._max_epochs):
+            self.invoke_hook(TrainerStages.before_train_epoch)
+            for i, data_batch in enumerate(data_loader):
+                if i < self.inner_iter:
+                    # inner_iter may be read out from the checkpoint file, so skip the trained iters in the epoch.
+                    continue
+                data_batch = to_device(data_batch, self.device)
+                self.data_batch = data_batch
+                self._inner_iter = i
+                self.invoke_hook(TrainerStages.before_train_iter)
+                self.train_step(self.model, data_batch, **kwargs)
+                self.invoke_hook(TrainerStages.after_train_iter)
+                # Value changed after the hooks are invoked, do not move them above the invoke_hook code.
+                del self.data_batch
+                self._iter += 1
+                self._mode = ModeKeys.TRAIN
+
+                if i + 1 >= self.iters_per_epoch:
+                    break
+
+            self.invoke_hook(TrainerStages.after_train_epoch)
+            # Value changed after the hooks are invoked, do not move them above the invoke_hook code.
+            self._inner_iter = 0
+            self._epoch += 1
+            if self._stop_training:
+                break
+
+        self.invoke_hook(TrainerStages.after_run)
+
+    def evaluation_step(self, data):
+        """Perform a training step on a batch of inputs.
+
+        Subclass and override to inject custom behavior.
+
+        """
+        model = self.model.module if self._dist else self.model
+        model.eval()
+
+        if is_parallel(model):
+            receive_dict_inputs = func_receive_dict_inputs(
+                model.module.forward)
+        else:
+            receive_dict_inputs = func_receive_dict_inputs(model.forward)
+
+        with torch.no_grad():
+            if isinstance(data, Mapping) and not receive_dict_inputs:
+                result = model.forward(**data)
+            else:
+                result = model.forward(data)
+        return result
+
+    def evaluation_loop(self, data_loader, metric_classes):
+        """ Evaluation loop used by `EpochBasedTrainer.evaluate()`.
+
+        """
+        vis_closure = None
+        if hasattr(self.cfg.evaluation, 'visualization'):
+            vis_cfg = self.cfg.evaluation.visualization
+            vis_closure = partial(self.visualization,
+                                  dataset=self.eval_dataset,
+                                  **vis_cfg)
+
+        if self._dist and self.cfg.model.get('model_parallel_size', 1) == 1:
+            from modelscope.trainers.utils.inference import multi_gpu_test
+            # list of batched result and data samples
+            metric_values = multi_gpu_test(
+                self,
+                data_loader,
+                device=self.device,
+                metric_classes=metric_classes,
+                vis_closure=vis_closure,
+                tmpdir=self.cfg.evaluation.get('cache_dir', None),
+                gpu_collect=self.cfg.evaluation.get('gpu_collect', False),
+                data_loader_iters_per_gpu=self._eval_iters_per_epoch)
+        else:
+            from modelscope.trainers.utils.inference import single_gpu_test
+            metric_values = single_gpu_test(
+                self,
+                data_loader,
+                device=self.device,
+                metric_classes=metric_classes,
+                vis_closure=vis_closure,
+                data_loader_iters=self._eval_iters_per_epoch)
+
+        return metric_values
+
+    def visualization(self, batch_result, dataset, **kwargs):
+        """ visualization function for evaluation results.
+
+        Examples:
+            >>> # draw list of images as numpy array
+            >>> images = draw_images(num_of_visualization)
+
+            >>> # set displayed name for each image
+            >>> filenames = get_image_display_names()
+            >>> vis_results = {'images': images, 'filenames' : filenames}
+
+            >>> # visualization results will be displayed in group named eva_vis
+            >>> self.visualization_buffer.output['eval_vis'] = vis_results
+
+        Args:
+            results (list(dict)):  a list of result dict.
+            dataset (Dataset): torch dataset object to access original data.
+        """
+        # TODO @wenmeng.zwm add visualization support for cv evaluation
+        raise NotImplementedError(
+            'visualization for evaluation will be supported in the future')
+
+    def register_hook(self, hook: Hook) -> None:
+        """Register a hook into the hook list.
+
+        The hook will be inserted into a priority queue, with the specified
+        priority (See :class:`Priority` for details of priorities).
+        For hooks with the same priority, they will be triggered in the same
+        order as they are registered.
+
+        Args:
+            hook (:obj:`Hook`): The hook to be registered.
+        """
+        # insert the hook to a sorted list
+        inserted = False
+        for i in range(len(self._hooks) - 1, -1, -1):
+            p = hook.PRIORITY if hasattr(hook, 'PRIORITY') else Priority.NORMAL
+            p_i = self._hooks[i].PRIORITY if hasattr(
+                self._hooks[i], 'PRIORITY') else Priority.NORMAL
+
+            if get_priority(p) > get_priority(p_i):
+                self._hooks.insert(i + 1, hook)
+                inserted = True
+                break
+        if not inserted:
+            self._hooks.insert(0, hook)
+
+    def register_hook_from_cfg(self, hook_cfg: List) -> None:
+        """Register a hook from its cfg.
+
+        Args:
+            hook_cfg (dict): Hook config. It should have at least keys 'type'
+              and 'priority' indicating its type and priority.
+
+        Note:
+            The specific hook class to register should not use 'type' and
+            'priority' arguments during initialization.
+        """
+        hook_cfg = hook_cfg.copy()
+        assert isinstance(hook_cfg, list)
+        for cfg_i in hook_cfg:
+            hook = build_from_cfg(cfg_i, HOOKS)
+            self.register_hook(hook)
+
+    def invoke_hook(self, fn_name: str) -> None:
+        """Call all hooks.
+
+        Args:
+            fn_name (str): The function name in each hook to be called, such as
+                "before_train_epoch".
+        """
+        for hook in self._hooks:
+            getattr(hook, fn_name)(self)
+
+    def get_hook_info(self) -> str:
+        # Get hooks info in each stage
+        stage_hook_map: Dict[str, list] = {stage: [] for stage in Hook.stages}
+        for hook in self.hooks:
+            try:
+                priority = Priority(hook.priority).name  # type: ignore
+            except ValueError:
+                priority = hook.priority  # type: ignore
+            classname = hook.__class__.__name__
+            hook_info = f'({priority:<12}) {classname:<35}'
+            for trigger_stage in hook.get_triggered_stages():
+                stage_hook_map[trigger_stage].append(hook_info)
+
+        stage_hook_infos = []
+        for stage in Hook.stages:
+            hook_infos = stage_hook_map[stage]
+            if len(hook_infos) > 0:
+                info = f'{stage}:\n'
+                info += '\n'.join(hook_infos)
+                info += '\n -------------------- '
+                stage_hook_infos.append(info)
+        return '\n'.join(stage_hook_infos)
+
+
+def worker_init_fn(worker_id, num_workers, rank, seed):
+    # The seed of each worker equals to
+    # num_worker * rank + worker_id + user_seed
+    worker_seed = num_workers * rank + worker_id + seed
+    set_random_seed(worker_seed)
diff --git a/modelscope/trainers/training_args.py b/modelscope/trainers/training_args.py
new file mode 100644
index 0000000..6b4225e
--- /dev/null
+++ b/modelscope/trainers/training_args.py
@@ -0,0 +1,715 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import re
+from argparse import Action, ArgumentDefaultsHelpFormatter, ArgumentParser
+from dataclasses import dataclass, field, fields
+from functools import partial
+from typing import Any, Dict, List, Tuple, Union
+
+from modelscope.trainers.default_config import DEFAULT_CONFIG
+from modelscope.utils.config import Config, ConfigDict
+from modelscope.utils.hub import read_config
+
+
+def get_flatten_value(config: Config, metadata: Dict, exclusions=None):
+    cfg_node = metadata['cfg_node']
+    if exclusions is None:
+        exclusions = []
+
+    values = config.safe_get(cfg_node)
+    if isinstance(values, dict):
+        param_map = []
+        for key, value in values.items():
+            if key in exclusions or not isinstance(value,
+                                                   (str, int, float, bool)):
+                continue
+            value = add_quotes_for_str(value)
+            param_map.append(f'{key}={value}')
+        return ','.join(param_map)
+    else:
+        return values
+
+
+def set_flatten_value(config: Config, values: Union[str, List[str]],
+                      metadata: Dict):
+    cfg_node = metadata['cfg_node']
+    if values is None:
+        return config
+
+    pairs = values.split(',') if isinstance(values, str) else values
+    for kv in pairs:
+        if len(kv.strip()) == 0:
+            continue
+        key, value = kv.split('=')
+        value = parse_value(value)
+        config.merge_from_dict({cfg_node + '.' + key: value})
+    return config
+
+
+def get_base_hook_args(config: Config, metadata: Dict):
+    cfg_node = metadata['cfg_node']
+    hook_type = metadata['hook_type']
+    key = metadata['key']
+    value = config.safe_get(cfg_node)
+    if value is None:
+        return get_hook_param(config, hook_type, key)
+    else:
+        return True if key == 'type' else value
+
+
+def set_base_hook_args(config: Config, value: Any, metadata: Dict):
+    cfg_node = metadata['cfg_node']
+    hook_type = metadata['hook_type']
+    key = metadata['key']
+    if 'hooks' in config.train:
+        config.train.hooks = [
+            hook for hook in config.train.hooks if hook['type'] != hook_type
+        ]
+    if key == 'type':
+        if value and config.safe_get(cfg_node) is None:
+            config.merge_from_dict({cfg_node: {}})
+    else:
+        config.merge_from_dict({cfg_node: value})
+
+
+def get_strategy(config: Config,
+                 metadata: Dict,
+                 value_pair: Tuple[str] = ('by_epoch', 'by_step')):
+    flag = get_base_hook_args(config, metadata)
+    if flag is None:
+        return None
+    return value_pair[0] if flag else value_pair[1]
+
+
+def set_strategy(config: Config,
+                 value: Any,
+                 metadata: Dict,
+                 value_pair: Tuple[str] = ('by_epoch', 'by_step')):
+    set_base_hook_args(config, value == value_pair[0], metadata)
+
+
+def get_hook_param(config, hook_type: str, key='type'):
+    hooks = config.safe_get('train.hooks', [])
+    _hooks = list(filter(lambda hook: hook['type'] == hook_type, hooks))
+    if key == 'type':
+        return len(_hooks) > 0
+    elif len(_hooks) > 0:
+        return getattr(_hooks[0], key, None)
+    return None
+
+
+def add_quotes_for_str(value: Union[str, float, bool, None]) -> str:
+    if isinstance(value, str):
+        return f'"{value}"'
+    else:
+        return str(value)
+
+
+def parse_value(value: str) -> Union[str, float, bool, None]:
+    const_map = {
+        'True': True,
+        'true': True,
+        'False': False,
+        'false': False,
+        'None': None,
+        'none': None,
+        'null': None
+    }
+    if value in const_map:
+        return const_map[value]
+    elif '"' in value or "'" in value:
+        return value.replace('"', '').replace("'", '')
+    elif re.match(r'^\d+$', value):
+        return int(value)
+    elif re.match(r'[+-]?(?=\d*[.eE])(?=\.?\d)\d*\.?\d*(?:[eE][+-]?\d+)?',
+                  value):
+        return float(value)
+    else:
+        return value
+
+
+@dataclass
+class TrainingArgs:
+    model: str = field(default=None,
+                       metadata={
+                           'help': 'A model id or model dir',
+                       })
+
+    seed: int = field(default=42, metadata={
+        'help': 'The random seed',
+    })
+
+    task: str = field(default=None,
+                      metadata={
+                          'help': 'The task code to be used',
+                          'cfg_node': 'task'
+                      })
+
+    dataset_name: str = field(default=None,
+                              metadata={
+                                  'help': 'The dataset name',
+                              })
+
+    subset_name: str = field(default=None,
+                             metadata={
+                                 'help': 'The subset name of the dataset',
+                             })
+
+    train_dataset_name: str = field(default=None,
+                                    metadata={
+                                        'help': 'The train dataset name',
+                                    })
+
+    val_dataset_name: str = field(default=None,
+                                  metadata={
+                                      'help': 'The validation dataset name',
+                                  })
+
+    per_device_train_batch_size: int = field(
+        default=None,
+        metadata={
+            'cfg_node': 'train.dataloader.batch_size_per_gpu',
+            'help': 'The training batch size per GPU',
+        })
+
+    train_data_worker: int = field(
+        default=None,
+        metadata={
+            'cfg_node': 'train.dataloader.workers_per_gpu',
+            'help': 'The number of data workers for train dataloader',
+        })
+
+    train_shuffle: bool = field(default=None,
+                                metadata={
+                                    'cfg_node': 'train.dataloader.shuffle',
+                                    'help': 'Shuffle the train dataset or not',
+                                })
+
+    per_device_eval_batch_size: int = field(
+        default=None,
+        metadata={
+            'cfg_node': 'evaluation.dataloader.batch_size_per_gpu',
+            'help': 'The eval batch size per GPU',
+        })
+
+    eval_data_worker: int = field(
+        default=None,
+        metadata={
+            'cfg_node': 'evaluation.dataloader.workers_per_gpu',
+            'help': 'The number of data workers for eval dataloader',
+        })
+
+    eval_shuffle: bool = field(default=None,
+                               metadata={
+                                   'cfg_node': 'evaluation.dataloader.shuffle',
+                                   'help': 'Shuffle the eval dataset or not',
+                               })
+
+    max_epochs: int = field(default=None,
+                            metadata={
+                                'cfg_node': 'train.max_epochs',
+                                'help': 'The training epochs',
+                            })
+
+    work_dir: str = field(default=None,
+                          metadata={
+                              'cfg_node': 'train.work_dir',
+                              'help':
+                              'The training dir to save models and logs',
+                          })
+
+    lr: float = field(default=None,
+                      metadata={
+                          'cfg_node': 'train.optimizer.lr',
+                          'help': 'The learning rate of the optimizer',
+                      })
+
+    optimizer: str = field(default=None,
+                           metadata={
+                               'cfg_node': 'train.optimizer.type',
+                               'help': 'The optimizer type',
+                           })
+
+    optimizer_params: str = field(
+        default=None,
+        metadata={
+            'cfg_node':
+            'train.optimizer',
+            'cfg_getter':
+            partial(get_flatten_value, exclusions=['type', 'lr', 'options']),
+            'cfg_setter':
+            set_flatten_value,
+            'help':
+            'The optimizer init params except `lr`',
+        })
+
+    lr_scheduler_params: str = field(
+        default=None,
+        metadata={
+            'cfg_node':
+            'train.lr_scheduler',
+            'cfg_getter':
+            partial(get_flatten_value, exclusions=['type', 'lr', 'options']),
+            'cfg_setter':
+            set_flatten_value,
+            'help':
+            'The lr_scheduler init params',
+        })
+
+    local_rank: int = field(default=0,
+                            metadata={
+                                'help': 'The training local rank',
+                            })
+
+    save_ckpt: bool = field(
+        default=True,
+        metadata={
+            'help':
+            'Periodically save checkpoint when True, corresponding to CheckpointHook',
+            'cfg_node': 'train.checkpoint.period',
+            'hook_type': 'CheckpointHook',
+            'key': 'type',
+            'cfg_getter': get_base_hook_args,
+            'cfg_setter': set_base_hook_args,
+        })
+
+    save_ckpt_best: bool = field(
+        default=None,
+        metadata={
+            'help':
+            'Save best checkpoint when True, corresponding to BestCkptSaverHook',
+            'cfg_node': 'train.checkpoint.best',
+            'hook_type': 'BestCkptSaverHook',
+            'key': 'type',
+            'cfg_getter': get_base_hook_args,
+            'cfg_setter': set_base_hook_args,
+        })
+
+    evaluate: bool = field(
+        default=True,
+        metadata={
+            'help': 'Evaluate when True, corresponding to EvaluationHook',
+            'cfg_node': 'evaluation.period',
+            'hook_type': 'EvaluationHook',
+            'key': 'type',
+            'cfg_getter': get_base_hook_args,
+            'cfg_setter': set_base_hook_args,
+        })
+
+    save_ckpt_strategy: str = field(
+        default=None,
+        metadata={
+            'help': 'Periodically save checkpoint by epoch or by step'
+            'use with `CheckpointHook`, can be `by_epoch` or `by_step`',
+            'cfg_node': 'train.checkpoint.period.by_epoch',
+            'hook_type': 'CheckpointHook',
+            'key': 'by_epoch',
+            'choices': ['by_epoch', 'by_step'],
+            'cfg_getter': get_strategy,
+            'cfg_setter': set_strategy,
+        })
+
+    save_ckpt_best_strategy: str = field(
+        default=None,
+        metadata={
+            'help': 'Save best checkpoint by epoch or by step'
+            'use with `BestCkptSaverHook`, can be `by_epoch` or `by_step`',
+            'cfg_node': 'train.checkpoint.best.by_epoch',
+            'hook_type': 'BestCkptSaverHook',
+            'key': 'by_epoch',
+            'choices': ['by_epoch', 'by_step'],
+            'cfg_getter': get_strategy,
+            'cfg_setter': set_strategy,
+        })
+
+    ckpt_period_interval: int = field(
+        default=1,
+        metadata={
+            'help':
+            'The interval of epoch or iter of saving checkpoint period',
+            'cfg_node': 'train.checkpoint.period.interval',
+            'hook_type': 'CheckpointHook',
+            'key': 'interval',
+            'cfg_getter': get_base_hook_args,
+            'cfg_setter': set_base_hook_args,
+        })
+
+    ckpt_best_interval: int = field(
+        default=None,
+        metadata={
+            'help': 'The interval of epoch or iter of saving checkpoint best',
+            'cfg_node': 'train.checkpoint.best.interval',
+            'hook_type': 'BestCkptSaverHook',
+            'key': 'interval',
+            'cfg_getter': get_base_hook_args,
+            'cfg_setter': set_base_hook_args,
+        })
+
+    metric_for_best_model: str = field(
+        default=None,
+        metadata={
+            'help':
+            'Which metric key to judge the checkpoint is better or not, use with `BestCkptSaverHook`, '
+            'please make sure this key is returned by the `evaluation_metrics` classes',
+            'cfg_node':
+            'train.checkpoint.best.metric_key',
+            'hook_type':
+            'BestCkptSaverHook',
+            'key':
+            'metric_key',
+            'cfg_getter':
+            get_base_hook_args,
+            'cfg_setter':
+            set_base_hook_args,
+        })
+
+    metric_rule_for_best_model: str = field(
+        default=None,
+        metadata={
+            'help':
+            'Which rule to compare the value of `checkpoint_saving_metric`, '
+            'use with `BestCkptSaverHook`, can be `max` or `min`',
+            'cfg_node':
+            'train.checkpoint.best.rule',
+            'hook_type':
+            'BestCkptSaverHook',
+            'key':
+            'rule',
+            'cfg_getter':
+            get_base_hook_args,
+            'cfg_setter':
+            set_base_hook_args,
+        })
+
+    save_ckpt_peroid_limit: int = field(
+        default=None,
+        metadata={
+            'help':
+            'The max saving number of checkpoint, older checkpoints will be deleted.',
+            'cfg_node': 'train.checkpoint.period.max_checkpoint_num',
+            'hook_type': 'CheckpointHook',
+            'key': 'max_checkpoint_num',
+            'cfg_getter': get_base_hook_args,
+            'cfg_setter': set_base_hook_args,
+        })
+
+    save_ckpt_best_limit: int = field(
+        default=None,
+        metadata={
+            'help':
+            'The max saving number of checkpoint, worse checkpoints will be deleted.',
+            'cfg_node': 'train.checkpoint.best.max_checkpoint_num',
+            'hook_type': 'BestCkptSaverHook',
+            'key': 'max_checkpoint_num',
+            'cfg_getter': get_base_hook_args,
+            'cfg_setter': set_base_hook_args,
+        })
+
+    logging_interval: int = field(
+        default=None,
+        metadata={
+            'help': 'The interval of iter of logging information',
+            'cfg_node': 'train.logging.interval',
+            'hook_type': 'TextLoggerHook',
+            'key': 'interval',
+            'cfg_getter': get_base_hook_args,
+            'cfg_setter': set_base_hook_args,
+        })
+
+    eval_strategy: str = field(
+        default=None,
+        metadata={
+            'help': 'Evaluate model by epoch or by step'
+            'use with `EvaluationHook`, can be `by_epoch` or `by_step`',
+            'cfg_node': 'evaluation.period.by_epoch',
+            'hook_type': 'EvaluationHook',
+            'key': 'by_epoch',
+            'choices': ['by_epoch', 'by_step'],
+            'cfg_getter': get_strategy,
+            'cfg_setter': set_strategy,
+        })
+
+    eval_interval: int = field(default=1,
+                               metadata={
+                                   'help':
+                                   'Evaluation interval by epoch or iter',
+                                   'cfg_node': 'evaluation.period.interval',
+                                   'hook_type': 'EvaluationHook',
+                                   'key': 'interval',
+                                   'cfg_getter': get_base_hook_args,
+                                   'cfg_setter': set_base_hook_args,
+                               })
+
+    eval_metrics: str = field(default=None,
+                              metadata={
+                                  'help':
+                                  'The metric module name used in evaluation',
+                                  'cfg_node': 'evaluation.metrics'
+                              })
+
+    @classmethod
+    def from_cli(cls, parser_args=None, **extra_kwargs):
+        """Construct a TrainingArg class by the parameters of CLI.
+
+        Args:
+            **extra_kwargs: Extra args which can be defined in code.
+
+        Returns:
+            The output TrainingArg class with the parameters from CLI.
+        """
+        self = cls(**extra_kwargs)
+        parser = CliArgumentParser(self)
+        args, unknown = parser.parse_known_args(parser_args)
+        unknown = [item for item in unknown if item not in ('\\', '\n')]
+        _unknown = {}
+        for i in range(0, len(unknown), 2):
+            _unknown[unknown[i].replace('-', '')] = parse_value(unknown[i + 1])
+        cfg_dict = vars(args)
+
+        if args.model is not None:
+            try:
+                cfg = read_config(args.model)
+            except Exception as e:
+                print('Read config failed with error:', e)
+            else:
+                cfg.merge_from_dict(_unknown)
+                self = cls.from_config(cfg, **extra_kwargs)
+        for key, value in cfg_dict.items():
+            if key is not None and hasattr(self,
+                                           key) and key in parser.manual_args:
+                setattr(self, key, value)
+        return self
+
+    def to_args(self):
+        """Convert the TrainingArg class to key-value pairs.
+
+        Returns: The key-value pair.
+
+        """
+        _args = {}
+        for f in fields(self):
+            _args[f.name] = getattr(self, f.name)
+        return _args
+
+    @classmethod
+    def from_config(cls, config=DEFAULT_CONFIG, **kwargs):
+        """Construct the TrainingArg class by a `Config` class.
+
+        Args:
+            config: The Config class. By default, `DEFAULT_CONFIG` is used.
+            **kwargs: Extra args which can be defined in code.
+
+        Returns: The output TrainingArg class with the parameters from the config.
+
+        """
+
+        self = cls(**kwargs)
+        for f in fields(self):
+            if 'cfg_node' in f.metadata and getattr(self, f.name) is None:
+                self._to_field(f, config)
+        return self
+
+    def _to_field(self, f, config):
+        assert 'cfg_node' in f.metadata
+        if 'cfg_getter' in f.metadata:
+            cfg_getter = f.metadata['cfg_getter']
+            setattr(self, f.name, cfg_getter(config, f.metadata))
+        else:
+            cfg_node = f.metadata['cfg_node']
+            setattr(self, f.name, config.safe_get(cfg_node))
+
+    def _to_config(self, f, config: Config):
+        assert 'cfg_node' in f.metadata
+        value = getattr(self, f.name)
+        if 'cfg_setter' in f.metadata:
+            cfg_setter = f.metadata['cfg_setter']
+            config = cfg_setter(config, value, f.metadata)
+        else:
+            cfg_node = f.metadata['cfg_node']
+            if isinstance(cfg_node, str):
+                cfg_node = [cfg_node]
+            for _node in cfg_node:
+                config.merge_from_dict({_node: value})
+        return config
+
+    def __call__(self, cfg: Config):
+        for f in fields(self):
+            if 'cfg_node' not in f.metadata:
+                continue
+
+            value = getattr(self, f.name)
+            if value is not None:
+                self._to_config(f, cfg)
+            else:
+                self._to_field(f, cfg)
+        return cfg
+
+
+class CliArgumentParser(ArgumentParser):
+    """ Argument Parser to define and parse command-line args for training.
+
+    Args:
+        training_args (TrainingArgs): dict or list of dict which defines different
+            paramters for training.
+    """
+    def __init__(self, training_args: TrainingArgs = None, **kwargs):
+        if 'formatter_class' not in kwargs:
+            kwargs['formatter_class'] = ArgumentDefaultsHelpFormatter
+        super().__init__(**kwargs)
+        self.training_args = training_args
+        self.define_args()
+
+    def get_manual_args(self, args):
+        return [arg[2:] for arg in args if arg.startswith('--')]
+
+    def _parse_known_args(self, args: List = None, namespace=None):
+        self.model_id = namespace.model if namespace is not None else None
+        if '--model' in args:
+            self.model_id = args[args.index('--model') + 1]
+        self.manual_args = self.get_manual_args(args)
+        return super()._parse_known_args(args, namespace)
+
+    def print_help(self, file=None):
+        config = DEFAULT_CONFIG
+        if self.model_id is not None:
+            try:
+                config = read_config(self.model_id)
+            except Exception as e:
+                print('Read config failed with error:', e)
+
+        if config is not None:
+            for action_group in self._optionals._group_actions:
+                if hasattr(self.training_args, action_group.dest):
+                    value = getattr(self.training_args, action_group.dest)
+                    f = {f.name: f
+                         for f in fields(self.training_args)
+                         }.get(action_group.dest)
+                    if value is not None:
+                        action_group.default = value
+                    elif 'cfg_node' in f.metadata:
+                        cfg_node = f.metadata['cfg_node']
+                        if isinstance(cfg_node, str):
+                            cfg_node = [cfg_node]
+
+                        assert isinstance(cfg_node, (list, tuple))
+                        if isinstance(cfg_node[0], str):
+                            action_group.default = config.safe_get(cfg_node[0])
+                        else:
+                            action_group.default = cfg_node[0](config)
+        return super().print_help(file)
+
+    def define_args(self):
+        if self.training_args is not None:
+            for f in fields(self.training_args):
+                arg_name = f.name
+                arg_attr = getattr(self.training_args, f.name)
+                name = f'--{arg_name}'
+                kwargs = dict(type=f.type, help=f.metadata['help'])
+                kwargs['default'] = arg_attr
+
+                if 'choices' in f.metadata:
+                    kwargs['choices'] = f.metadata['choices']
+
+                kwargs['action'] = SingleAction
+                self.add_argument(name, **kwargs)
+
+
+class DictAction(Action):
+    """
+    argparse action to split an argument into KEY=VALUE form
+    on the first = and append to a dictionary. List options can
+    be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit
+    brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build
+    list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]'
+    """
+    @staticmethod
+    def parse_int_float_bool_str(val):
+        try:
+            return int(val)
+        except ValueError:
+            pass
+        try:
+            return float(val)
+        except ValueError:
+            pass
+        if val.lower() in ['true', 'false']:
+            return val.lower() == 'true'
+        if val == 'None':
+            return None
+        return val
+
+    @staticmethod
+    def parse_iterable(val):
+        """Parse iterable values in the string.
+        All elements inside '()' or '[]' are treated as iterable values.
+        Args:
+            val (str): Value string.
+        Returns:
+            list | tuple: The expanded list or tuple from the string.
+        Examples:
+            >>> DictAction._parse_iterable('1,2,3')
+            [1, 2, 3]
+            >>> DictAction._parse_iterable('[a, b, c]')
+            ['a', 'b', 'c']
+            >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]')
+            [(1, 2, 3), ['a', 'b'], 'c']
+        """
+        def find_next_comma(string):
+            """Find the position of next comma in the string.
+            If no ',' is found in the string, return the string length. All
+            chars inside '()' and '[]' are treated as one element and thus ','
+            inside these brackets are ignored.
+            """
+            assert (string.count('(') == string.count(')')) and (
+                string.count('[')
+                == string.count(']')), f'Imbalanced brackets exist in {string}'
+            end = len(string)
+            for idx, char in enumerate(string):
+                pre = string[:idx]
+                # The string before this ',' is balanced
+                if ((char == ',') and (pre.count('(') == pre.count(')'))
+                        and (pre.count('[') == pre.count(']'))):
+                    end = idx
+                    break
+            return end
+
+        # Strip ' and " characters and replace whitespace.
+        val = val.strip('\'\"').replace(' ', '')
+        is_tuple = False
+        if val.startswith('(') and val.endswith(')'):
+            is_tuple = True
+            val = val[1:-1]
+        elif val.startswith('[') and val.endswith(']'):
+            val = val[1:-1]
+        elif ',' not in val:
+            # val is a single value
+            return DictAction.parse_int_float_bool_str(val)
+
+        values = []
+        while len(val) > 0:
+            comma_idx = find_next_comma(val)
+            element = DictAction.parse_iterable(val[:comma_idx])
+            values.append(element)
+            val = val[comma_idx + 1:]
+        if is_tuple:
+            values = tuple(values)
+        return values
+
+    def __call__(self, parser, namespace, values, option_string):
+        options = {}
+        for kv in values:
+            key, val = kv.split('=', maxsplit=1)
+            options[key] = self.parse_iterable(val)
+        setattr(namespace, self.dest, options)
+
+
+class SingleAction(DictAction):
+    """ Argparse action to convert value to tuple or list or nested structure of
+    list and tuple, i.e 'V1,V2,V3', or with explicit brackets, i.e. '[V1,V2,V3]'.
+    It also support nested brackets to build list/tuple values. e.g. '[(V1,V2),(V3,V4)]'
+    """
+    def __call__(self, parser, namespace, value, option_string):
+        if isinstance(value, str):
+            setattr(namespace, self.dest, self.parse_iterable(value))
+        else:
+            setattr(namespace, self.dest, value)
diff --git a/modelscope/trainers/utils/__init__.py b/modelscope/trainers/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/trainers/utils/inference.py b/modelscope/trainers/utils/inference.py
new file mode 100644
index 0000000..6d764af
--- /dev/null
+++ b/modelscope/trainers/utils/inference.py
@@ -0,0 +1,303 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+import os
+import pickle
+import shutil
+from collections.abc import Mapping
+
+import torch
+from torch import distributed as dist
+from tqdm import tqdm
+
+from modelscope.utils.data_utils import to_device
+from modelscope.utils.torch_utils import (broadcast, get_dist_info, is_master,
+                                          make_tmp_dir)
+
+
+def single_gpu_test(trainer,
+                    data_loader,
+                    device,
+                    metric_classes=None,
+                    vis_closure=None,
+                    data_loader_iters=None):
+    """Test model in EpochBasedTrainer with a single gpu.
+
+    Args:
+        trainer (modelscope.trainers.EpochBasedTrainer): Trainer to be tested.
+        data_loader (nn.Dataloader): Pytorch data loader.
+        device (str | torch.device): The target device for the data.
+        metric_classes (List): List of Metric class that uses to collect metrics.
+        vis_closure (Callable): Collect data for TensorboardHook.
+        data_loader_iters (int): Used when dataset has no attribute __len__ or only load part of dataset.
+
+    Returns:
+        list: The prediction results.
+    """
+    dataset = data_loader.dataset
+    progress_with_iters = False
+    if data_loader_iters is None:
+        try:
+            data_len = len(dataset)
+        except Exception as e:
+            logging.error(e)
+            raise ValueError(
+                'Please implement ``__len__`` method for your dataset, or provide ``data_loader_iters``'
+            )
+        desc = 'Total test samples'
+    else:
+        progress_with_iters = True
+        data_len = data_loader_iters
+        desc = 'Test iterations'
+
+    with tqdm(total=data_len, desc=desc) as pbar:
+        for i, data in enumerate(data_loader):
+            data = to_device(data, device)
+            evaluate_batch(trainer, data, metric_classes, vis_closure)
+
+            if progress_with_iters:
+                batch_size = 1  # iteration count
+            else:
+                if isinstance(data, Mapping):
+                    if 'nsentences' in data:
+                        batch_size = data['nsentences']
+                    else:
+                        try:
+                            batch_size = len(next(iter(data.values())))
+                        except Exception:
+                            batch_size = data_loader.batch_size
+                else:
+                    batch_size = len(data)
+            for _ in range(batch_size):
+                pbar.update()
+
+            if progress_with_iters and (i + 1) >= data_len:
+                break
+
+    return get_metric_values(metric_classes)
+
+
+def multi_gpu_test(trainer,
+                   data_loader,
+                   device,
+                   metric_classes=None,
+                   vis_closure=None,
+                   tmpdir=None,
+                   gpu_collect=False,
+                   data_loader_iters_per_gpu=None):
+    """Test model in EpochBasedTrainer with multiple gpus.
+
+    This method tests model with multiple gpus and collects the results
+    under two different modes: gpu and cpu modes. By setting
+    ``gpu_collect=True``, it encodes results to gpu tensors and use gpu
+    communication for results collection. On cpu mode it saves the results on
+    different gpus to ``tmpdir`` and collects them by the rank 0 worker.
+
+    Args:
+        trainer (modelscope.trainers.EpochBasedTrainer): Trainer to be tested.
+        data_loader (nn.Dataloader): Pytorch data loader.
+        device: (str | torch.device): The target device for the data.
+        tmpdir (str): Path of directory to save the temporary results from
+            different gpus under cpu mode.
+        gpu_collect (bool): Option to use either gpu or cpu to collect results.
+        data_loader_iters_per_gpu (int): Used when dataset has no attribute __len__ or only load part of dataset.
+    Returns:
+        list: The prediction results.
+    """
+    dataset = data_loader.dataset
+    rank, world_size = get_dist_info()
+
+    progress_with_iters = False
+    if data_loader_iters_per_gpu is None:
+        try:
+            data_len = len(dataset)
+            total_samples = data_len
+        except Exception as e:
+            logging.error(e)
+            raise ValueError(
+                'Please implement ``__len__`` method for your dataset, or provide ``data_loader_iters_per_gpu``'
+            )
+        desc = 'Total test samples with multi gpus'
+    else:
+        total_samples = 0
+        progress_with_iters = True
+        data_len = data_loader_iters_per_gpu * world_size
+        desc = 'Total test iterations with multi gpus'
+
+    count = 0
+    with tqdm(total=data_len, desc=desc) as pbar:
+        for i, data in enumerate(data_loader):
+            data = to_device(data, device)
+
+            evaluate_batch(trainer, data, metric_classes, vis_closure)
+
+            if isinstance(data, Mapping):
+                if 'nsentences' in data:
+                    batch_size = data['nsentences']
+                else:
+                    batch_size = len(next(iter(data.values())))
+            else:
+                batch_size = len(data)
+            if i >= (data_len // world_size) - 1:
+                total_samples = torch.LongTensor([batch_size
+                                                  ]).to(trainer.model.device)
+                dist.all_reduce(total_samples, op=dist.reduce_op.SUM)
+                total_samples = total_samples.item()
+            else:
+                total_samples = batch_size * world_size
+            if progress_with_iters:
+                iter_cnt_all = world_size
+            else:
+                iter_cnt_all = total_samples
+                count += iter_cnt_all
+
+            if rank == 0:
+                if count > data_len:
+                    iter_cnt_all = data_len - (count - iter_cnt_all)
+                for _ in range(iter_cnt_all):
+                    pbar.update()
+
+            if progress_with_iters and (i + 1) >= data_len:
+                break
+
+    # collect results and data from all ranks
+    if gpu_collect:
+        metric_classes_list = collect_results_gpu(metric_classes)
+    else:
+        if tmpdir is None:
+            tmpdir = make_tmp_dir()
+        metric_classes_list = collect_results_cpu(
+            metric_classes, os.path.join(tmpdir, 'metrics'))
+
+    metric_classes = merge_metrics(metric_classes_list)
+
+    return get_metric_values(metric_classes)
+
+
+def evaluate_batch(trainer, data, metric_classes, vis_closure):
+    batch_result = trainer.evaluation_step(data)
+
+    if metric_classes is not None:
+        for metric_cls in metric_classes:
+            metric_cls.add(batch_result, data)
+
+    if vis_closure is not None:
+        # trainer.visualization
+        vis_closure(batch_result)
+
+
+def get_metric_values(metric_classes):
+    rank, world_size = get_dist_info()
+    metric_values = {}
+    if rank == 0:
+        for metric_cls in metric_classes:
+            metric_values.update(metric_cls.evaluate())
+    if world_size > 1:
+        metric_values = broadcast(metric_values, 0)
+    return metric_values
+
+
+def collect_results_cpu(result_part, tmpdir=None):
+    """Collect results under cpu mode.
+
+    On cpu mode, this function will save the results on different gpus to
+    ``tmpdir`` and collect them by the rank 0 worker.
+
+    Args:
+        result_part (list): Result list containing result parts
+            to be collected.
+        size (int): Size of the results, commonly equal to length of
+            the results.
+        tmpdir (str | None): temporal directory for collected results to
+            store. If set to None, it will create a random temporal directory
+            for it.
+
+    Returns:
+        list: The collected results.
+    """
+    rank, world_size = get_dist_info()
+    if tmpdir is None:
+        tmpdir = make_tmp_dir()
+    if not os.path.exists(tmpdir) and is_master():
+        os.makedirs(tmpdir)
+    dist.barrier()
+
+    # dump the part result to the dir
+    with open(os.path.join(tmpdir, f'part_{rank}.pkl'), 'wb') as f:
+        pickle.dump(result_part, f)
+    dist.barrier()
+    # collect all parts
+    if rank != 0:
+        return None
+    else:
+        # load results of all parts from tmp dir
+        part_list = []
+        for i in range(world_size):
+            part_file = os.path.join(tmpdir, f'part_{i}.pkl')
+            with open(part_file, 'rb') as f:
+                part_result = pickle.load(f)
+            # When data is severely insufficient, an empty part_result
+            # on a certain gpu could makes the overall outputs empty.
+            if part_result:
+                part_list.append(part_result)
+
+        # remove tmp dir
+        shutil.rmtree(tmpdir)
+        return part_list
+
+
+def collect_results_gpu(result_part):
+    """Collect results under gpu mode.
+
+    On gpu mode, this function will encode results to gpu tensors and use gpu
+    communication for results collection.
+
+    Args:
+        result_part (list): Result list containing result parts
+            to be collected.
+        size (int): Size of the results, commonly equal to length of
+            the results.
+
+    Returns:
+        list: The collected results.
+    """
+    rank, world_size = get_dist_info()
+    # dump result part to tensor with pickle
+    part_tensor = torch.tensor(bytearray(pickle.dumps(result_part)),
+                               dtype=torch.uint8,
+                               device='cuda')
+    # gather all result part tensor shape
+    shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
+    shape_list = [shape_tensor.clone() for _ in range(world_size)]
+    dist.all_gather(shape_list, shape_tensor)
+    # padding result part tensor to max length
+    shape_max = torch.tensor(shape_list).max()
+    part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
+    part_send[:shape_tensor[0]] = part_tensor
+    part_recv_list = [
+        part_tensor.new_zeros(shape_max) for _ in range(world_size)
+    ]
+    # gather all result part
+    dist.all_gather(part_recv_list, part_send)
+
+    if rank == 0:
+        part_list = []
+        for recv, shape in zip(part_recv_list, shape_list):
+            part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())
+            # When data is severely insufficient, an empty part_result
+            # on a certain gpu could makes the overall outputs empty.
+            if part_result:
+                part_list.append(part_result)
+
+        return part_list
+
+
+def merge_metrics(metric_classes_list):
+    if metric_classes_list is None:
+        return None
+
+    metric_classes_0 = metric_classes_list[0]
+    for metric_classes_i in metric_classes_list[1:]:
+        for cls_0, cls_i in zip(metric_classes_0, metric_classes_i):
+            cls_0.merge(cls_i)
+    return metric_classes_0
diff --git a/modelscope/trainers/utils/log_buffer.py b/modelscope/trainers/utils/log_buffer.py
new file mode 100644
index 0000000..d01168a
--- /dev/null
+++ b/modelscope/trainers/utils/log_buffer.py
@@ -0,0 +1,41 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from collections import OrderedDict
+
+import numpy as np
+
+
+class LogBuffer:
+    def __init__(self):
+        self.val_history = OrderedDict()
+        self.n_history = OrderedDict()
+        self.output = OrderedDict()
+        self.ready = False
+
+    def clear(self) -> None:
+        self.val_history.clear()
+        self.n_history.clear()
+        self.clear_output()
+
+    def clear_output(self) -> None:
+        self.output.clear()
+        self.ready = False
+
+    def update(self, vars: dict, count: int = 1) -> None:
+        assert isinstance(vars, dict)
+        for key, var in vars.items():
+            if key not in self.val_history:
+                self.val_history[key] = []
+                self.n_history[key] = []
+            self.val_history[key].append(var)
+            self.n_history[key].append(count)
+
+    def average(self, n: int = 0) -> None:
+        """Average latest n values or all values."""
+        assert n >= 0
+        for key in self.val_history:
+            values = np.array(self.val_history[key][-n:])
+            nums = np.array(self.n_history[key][-n:])
+            avg = np.sum(values * nums) / np.sum(nums)
+            self.output[key] = avg
+        self.ready = True
diff --git a/modelscope/utils/__init__.py b/modelscope/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/utils/ast_utils.py b/modelscope/utils/ast_utils.py
new file mode 100644
index 0000000..c6b352a
--- /dev/null
+++ b/modelscope/utils/ast_utils.py
@@ -0,0 +1,765 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import ast
+import contextlib
+import hashlib
+import json
+import os
+import os.path as osp
+import time
+import traceback
+from functools import reduce
+from pathlib import Path
+from typing import Generator, Union
+
+import gast
+
+from modelscope import __version__
+from modelscope.fileio.file import LocalStorage
+from modelscope.metainfo import (Datasets, Heads, Hooks, LR_Schedulers,
+                                 Metrics, Models, Optimizers, Pipelines,
+                                 Preprocessors, TaskModels, Trainers)
+from modelscope.utils.constant import Fields, Tasks
+from modelscope.utils.file_utils import get_default_cache_dir
+from modelscope.utils.logger import get_logger
+from modelscope.utils.registry import default_group
+
+logger = get_logger()
+storage = LocalStorage()
+p = Path(__file__)
+
+# get the path of package 'modelscope'
+SKIP_FUNCTION_SCANNING = True
+MODELSCOPE_PATH = p.resolve().parents[1]
+INDEXER_FILE_DIR = get_default_cache_dir()
+REGISTER_MODULE = 'register_module'
+IGNORED_PACKAGES = ['modelscope', '.']
+SCAN_SUB_FOLDERS = [
+    'models', 'metrics', 'pipelines', 'preprocessors', 'trainers', 'msdatasets'
+]
+INDEXER_FILE = 'ast_indexer'
+DECORATOR_KEY = 'decorators'
+EXPRESS_KEY = 'express'
+FROM_IMPORT_KEY = 'from_imports'
+IMPORT_KEY = 'imports'
+FILE_NAME_KEY = 'filepath'
+MODELSCOPE_PATH_KEY = 'modelscope_path'
+VERSION_KEY = 'version'
+MD5_KEY = 'md5'
+INDEX_KEY = 'index'
+FILES_MTIME_KEY = 'files_mtime'
+REQUIREMENT_KEY = 'requirements'
+MODULE_KEY = 'module'
+CLASS_NAME = 'class_name'
+GROUP_KEY = 'group_key'
+MODULE_NAME = 'module_name'
+MODULE_CLS = 'module_cls'
+TEMPLATE_PATH = 'TEMPLATE_PATH'
+TEMPLATE_FILE = 'ast_index_file.py'
+
+
+class AstScanning(object):
+    def __init__(self) -> None:
+        self.result_import = dict()
+        self.result_from_import = dict()
+        self.result_decorator = []
+        self.express = []
+
+    def _is_sub_node(self, node: object) -> bool:
+        return isinstance(node,
+                          ast.AST) and not isinstance(node, ast.expr_context)
+
+    def _is_leaf(self, node: ast.AST) -> bool:
+        for field in node._fields:
+            attr = getattr(node, field)
+            if self._is_sub_node(attr):
+                return False
+            elif isinstance(attr, (list, tuple)):
+                for val in attr:
+                    if self._is_sub_node(val):
+                        return False
+        else:
+            return True
+
+    def _skip_function(self, node: ast.AST) -> bool:
+        if type(node).__name__ == 'FunctionDef' and SKIP_FUNCTION_SCANNING:
+            return True
+        else:
+            return False
+
+    def _fields(self, n: ast.AST, show_offsets: bool = True) -> tuple:
+        if show_offsets:
+            return n._attributes + n._fields
+        else:
+            return n._fields
+
+    def _leaf(self, node: ast.AST, show_offsets: bool = True) -> str:
+        output = dict()
+        if isinstance(node, ast.AST):
+            local_dict = dict()
+            for field in self._fields(node, show_offsets=show_offsets):
+                field_output = self._leaf(getattr(node, field),
+                                          show_offsets=show_offsets)
+                local_dict[field] = field_output
+            output[type(node).__name__] = local_dict
+            return output
+        else:
+            return node
+
+    def _refresh(self):
+        self.result_import = dict()
+        self.result_from_import = dict()
+        self.result_decorator = []
+        self.result_express = []
+
+    def scan_ast(self, node: Union[ast.AST, None, str]):
+        self._setup_global()
+        self.scan_import(node, indent='  ', show_offsets=False)
+
+    def scan_import(
+        self,
+        node: Union[ast.AST, None, str],
+        indent: Union[str, int] = '    ',
+        show_offsets: bool = True,
+        _indent: int = 0,
+        parent_node_name: str = '',
+    ) -> tuple:
+        if node is None:
+            return node
+        elif self._is_leaf(node):
+            return self._leaf(node, show_offsets=show_offsets)
+        else:
+
+            class state:
+                indent = _indent
+
+            @contextlib.contextmanager
+            def indented() -> Generator[None, None, None]:
+                state.indent += 1
+                yield
+                state.indent -= 1
+
+            def _scan_import(el: Union[ast.AST, None, str],
+                             _indent: int = 0,
+                             parent_node_name: str = '') -> str:
+                return self.scan_import(el,
+                                        indent=indent,
+                                        show_offsets=show_offsets,
+                                        _indent=_indent,
+                                        parent_node_name=parent_node_name)
+
+            outputs = dict()
+            # add relative path expression
+            if type(node).__name__ == 'ImportFrom':
+                level = getattr(node, 'level')
+                if level >= 1:
+                    path_level = ''.join(['.'] * level)
+                    setattr(node, 'level', 0)
+                    module_name = getattr(node, 'module')
+                    if module_name is None:
+                        setattr(node, 'module', path_level)
+                    else:
+                        setattr(node, 'module', path_level + module_name)
+            with indented():
+                for field in self._fields(node, show_offsets=show_offsets):
+                    attr = getattr(node, field)
+                    if attr == []:
+                        outputs[field] = []
+                    elif (isinstance(attr, list) and len(attr) == 1
+                          and isinstance(attr[0], ast.AST)
+                          and self._skip_function(attr[0])):
+                        continue
+                    elif (isinstance(attr, list) and len(attr) == 1
+                          and isinstance(attr[0], ast.AST)
+                          and self._is_leaf(attr[0])):
+                        local_out = _scan_import(attr[0])
+                        outputs[field] = local_out
+                    elif isinstance(attr, list):
+                        el_dict = dict()
+                        with indented():
+                            for el in attr:
+                                local_out = _scan_import(
+                                    el, state.indent,
+                                    type(el).__name__)
+                                name = type(el).__name__
+                                if (name == 'Import' or name == 'ImportFrom'
+                                        or parent_node_name == 'ImportFrom'
+                                        or parent_node_name == 'Import'):
+                                    if name not in el_dict:
+                                        el_dict[name] = []
+                                    el_dict[name].append(local_out)
+                        outputs[field] = el_dict
+                    elif isinstance(attr, ast.AST):
+                        output = _scan_import(attr, state.indent)
+                        outputs[field] = output
+                    else:
+                        outputs[field] = attr
+
+                    if (type(node).__name__ == 'Import'
+                            or type(node).__name__ == 'ImportFrom'):
+                        if type(node).__name__ == 'ImportFrom':
+                            if field == 'module':
+                                self.result_from_import[
+                                    outputs[field]] = dict()
+                            if field == 'names':
+                                if isinstance(outputs[field]['alias'], list):
+                                    item_name = []
+                                    for item in outputs[field]['alias']:
+                                        local_name = item['alias']['name']
+                                        item_name.append(local_name)
+                                    self.result_from_import[
+                                        outputs['module']] = item_name
+                                else:
+                                    local_name = outputs[field]['alias'][
+                                        'name']
+                                    self.result_from_import[
+                                        outputs['module']] = [local_name]
+
+                        if type(node).__name__ == 'Import':
+                            final_dict = outputs[field]['alias']
+                            if isinstance(final_dict, list):
+                                for item in final_dict:
+                                    self.result_import[
+                                        item['alias']['name']] = item['alias']
+                            else:
+                                self.result_import[outputs[field]['alias']
+                                                   ['name']] = final_dict
+
+                    if 'decorator_list' == field and attr != []:
+                        for item in attr:
+                            setattr(item, CLASS_NAME, node.name)
+                        self.result_decorator.extend(attr)
+
+                    if attr != [] and type(
+                            attr
+                    ).__name__ == 'Call' and parent_node_name == 'Expr':
+                        self.result_express.append(attr)
+
+            return {
+                IMPORT_KEY: self.result_import,
+                FROM_IMPORT_KEY: self.result_from_import,
+                DECORATOR_KEY: self.result_decorator,
+                EXPRESS_KEY: self.result_express
+            }
+
+    def _parse_decorator(self, node: ast.AST) -> tuple:
+        def _get_attribute_item(node: ast.AST) -> tuple:
+            value, id, attr = None, None, None
+            if type(node).__name__ == 'Attribute':
+                value = getattr(node, 'value')
+                id = getattr(value, 'id')
+                attr = getattr(node, 'attr')
+            if type(node).__name__ == 'Name':
+                id = getattr(node, 'id')
+            return id, attr
+
+        def _get_args_name(nodes: list) -> list:
+            result = []
+            for node in nodes:
+                if type(node).__name__ == 'Str':
+                    result.append((node.s, None))
+                elif type(node).__name__ == 'Constant':
+                    result.append((node.value, None))
+                else:
+                    result.append(_get_attribute_item(node))
+            return result
+
+        def _get_keyword_name(nodes: ast.AST) -> list:
+            result = []
+            for node in nodes:
+                if type(node).__name__ == 'keyword':
+                    attribute_node = getattr(node, 'value')
+                    if type(attribute_node).__name__ == 'Str':
+                        result.append((getattr(node,
+                                               'arg'), attribute_node.s, None))
+                    elif type(attribute_node).__name__ == 'Constant':
+                        result.append(
+                            (getattr(node, 'arg'), attribute_node.value, None))
+                    else:
+                        result.append((getattr(node, 'arg'), ) +
+                                      _get_attribute_item(attribute_node))
+            return result
+
+        functions = _get_attribute_item(node.func)
+        args_list = _get_args_name(node.args)
+        keyword_list = _get_keyword_name(node.keywords)
+        return functions, args_list, keyword_list
+
+    def _get_registry_value(self, key_item):
+        if key_item is None:
+            return None
+        if key_item == 'default_group':
+            return default_group
+        split_list = key_item.split('.')
+        # in the case, the key_item is raw data, not registered
+        if len(split_list) == 1:
+            return key_item
+        else:
+            return getattr(eval(split_list[0]), split_list[1])
+
+    def _registry_indexer(self, parsed_input: tuple, class_name: str) -> tuple:
+        """format registry information to a tuple indexer
+
+        Return:
+            tuple: (MODELS, Tasks.text-classification, Models.structbert)
+        """
+        functions, args_list, keyword_list = parsed_input
+
+        # ignore decorators other than register_module
+        if REGISTER_MODULE != functions[1]:
+            return None
+        output = [functions[0]]
+
+        if len(args_list) == 0 and len(keyword_list) == 0:
+            args_list.append(default_group)
+        if len(keyword_list) == 0 and len(args_list) == 1:
+            args_list.append(class_name)
+
+        if len(keyword_list) > 0 and len(args_list) == 0:
+            remove_group_item = None
+            for item in keyword_list:
+                key, name, attr = item
+                if key == GROUP_KEY:
+                    args_list.append((name, attr))
+                    remove_group_item = item
+            if remove_group_item is not None:
+                keyword_list.remove(remove_group_item)
+
+        if len(args_list) == 0:
+            args_list.append(default_group)
+
+        for item in keyword_list:
+            key, name, attr = item
+            if key == MODULE_CLS:
+                class_name = name
+            else:
+                args_list.append((name, attr))
+
+        for item in args_list:
+            # the case empty input
+            if item is None:
+                output.append(None)
+            # the case (default_group)
+            elif item[1] is None:
+                output.append(item[0])
+            elif isinstance(item, str):
+                output.append(item)
+            else:
+                output.append('.'.join(item))
+        return (output[0], self._get_registry_value(output[1]),
+                self._get_registry_value(output[2]))
+
+    def parse_decorators(self, nodes: list) -> list:
+        """parse the AST nodes of decorators object to registry indexer
+
+        Args:
+            nodes (list): list of AST decorator nodes
+
+        Returns:
+            list: list of registry indexer
+        """
+        results = []
+        for node in nodes:
+            if type(node).__name__ != 'Call':
+                continue
+            class_name = getattr(node, CLASS_NAME, None)
+            func = getattr(node, 'func')
+
+            if getattr(func, 'attr', None) != REGISTER_MODULE:
+                continue
+
+            parse_output = self._parse_decorator(node)
+            index = self._registry_indexer(parse_output, class_name)
+            if None is not index:
+                results.append(index)
+        return results
+
+    def generate_ast(self, file):
+        self._refresh()
+        with open(file, 'r', encoding='utf8') as code:
+            data = code.readlines()
+        data = ''.join(data)
+
+        node = gast.parse(data)
+        output = self.scan_import(node, indent='  ', show_offsets=False)
+        output[DECORATOR_KEY] = self.parse_decorators(output[DECORATOR_KEY])
+        output[EXPRESS_KEY] = self.parse_decorators(output[EXPRESS_KEY])
+        output[DECORATOR_KEY].extend(output[EXPRESS_KEY])
+        return output
+
+
+class FilesAstScanning(object):
+    def __init__(self) -> None:
+        self.astScaner = AstScanning()
+        self.file_dirs = []
+
+    def _parse_import_path(self,
+                           import_package: str,
+                           current_path: str = None) -> str:
+        """
+        Args:
+            import_package (str): relative import or abs import
+            current_path (str): path/to/current/file
+        """
+        if import_package.startswith(IGNORED_PACKAGES[0]):
+            return MODELSCOPE_PATH + '/' + '/'.join(
+                import_package.split('.')[1:]) + '.py'
+        elif import_package.startswith(IGNORED_PACKAGES[1]):
+            current_path_list = current_path.split('/')
+            import_package_list = import_package.split('.')
+            level = 0
+            for index, item in enumerate(import_package_list):
+                if item != '':
+                    level = index
+                    break
+
+            abs_path_list = current_path_list[0:-level]
+            abs_path_list.extend(import_package_list[index:])
+            return '/' + '/'.join(abs_path_list) + '.py'
+        else:
+            return current_path
+
+    def _traversal_import(
+        self,
+        import_abs_path,
+    ):
+        pass
+
+    def parse_import(self, scan_result: dict) -> list:
+        """parse import and from import dicts to a third party package list
+
+        Args:
+            scan_result (dict): including the import and from import result
+
+        Returns:
+            list: a list of package ignored 'modelscope' and relative path import
+        """
+        output = []
+        output.extend(list(scan_result[IMPORT_KEY].keys()))
+        output.extend(list(scan_result[FROM_IMPORT_KEY].keys()))
+
+        # get the package name
+        for index, item in enumerate(output):
+            if '' == item.split('.')[0]:
+                output[index] = '.'
+            else:
+                output[index] = item.split('.')[0]
+
+        ignored = set()
+        for item in output:
+            for ignored_package in IGNORED_PACKAGES:
+                if item.startswith(ignored_package):
+                    ignored.add(item)
+        return list(set(output) - set(ignored))
+
+    def traversal_files(self, path, check_sub_dir):
+        self.file_dirs = []
+        if check_sub_dir is None or len(check_sub_dir) == 0:
+            self._traversal_files(path)
+
+        for item in check_sub_dir:
+            sub_dir = os.path.join(path, item)
+            if os.path.isdir(sub_dir):
+                self._traversal_files(sub_dir)
+
+    def _traversal_files(self, path):
+        dir_list = os.scandir(path)
+        for item in dir_list:
+            if item.name.startswith('__'):
+                continue
+            if item.is_dir():
+                self._traversal_files(item.path)
+            elif item.is_file() and item.name.endswith('.py'):
+                self.file_dirs.append(item.path)
+
+    def _get_single_file_scan_result(self, file):
+        try:
+            output = self.astScaner.generate_ast(file)
+        except Exception as e:
+            detail = traceback.extract_tb(e.__traceback__)
+            raise Exception(
+                f'During ast indexing the file {file}, a related error excepted '
+                f'in the file {detail[-1].filename} at line: '
+                f'{detail[-1].lineno}: "{detail[-1].line}" with error msg: '
+                f'"{type(e).__name__}: {e}", please double check the origin file {file} '
+                f'to see whether the file is correctly edited.')
+
+        import_list = self.parse_import(output)
+        return output[DECORATOR_KEY], import_list
+
+    def _inverted_index(self, forward_index):
+        inverted_index = dict()
+        for index in forward_index:
+            for item in forward_index[index][DECORATOR_KEY]:
+                inverted_index[item] = {
+                    FILE_NAME_KEY: index,
+                    IMPORT_KEY: forward_index[index][IMPORT_KEY],
+                    MODULE_KEY: forward_index[index][MODULE_KEY],
+                }
+        return inverted_index
+
+    def _module_import(self, forward_index):
+        module_import = dict()
+        for index, value_dict in forward_index.items():
+            module_import[value_dict[MODULE_KEY]] = value_dict[IMPORT_KEY]
+        return module_import
+
+    def _ignore_useless_keys(self, inverted_index):
+        if ('OPTIMIZERS', 'default', 'name') in inverted_index:
+            del inverted_index[('OPTIMIZERS', 'default', 'name')]
+        if ('LR_SCHEDULER', 'default', 'name') in inverted_index:
+            del inverted_index[('LR_SCHEDULER', 'default', 'name')]
+        return inverted_index
+
+    def get_files_scan_results(self,
+                               target_file_list=None,
+                               target_dir=MODELSCOPE_PATH,
+                               target_folders=SCAN_SUB_FOLDERS):
+        """the entry method of the ast scan method
+
+        Args:
+            target_file_list can override the dir and folders combine
+            target_dir (str, optional): the absolute path of the target directory to be scanned. Defaults to None.
+            target_folder (list, optional): the list of
+            sub-folders to be scanned in the target folder.
+            Defaults to SCAN_SUB_FOLDERS.
+
+        Returns:
+            dict: indexer of registry
+        """
+        start = time.time()
+        if target_file_list is not None:
+            self.file_dirs = target_file_list
+        else:
+            self.traversal_files(target_dir, target_folders)
+        logger.info(
+            f'AST-Scanning the path "{target_dir}" with the following sub folders {target_folders}'
+        )
+
+        result = dict()
+        for file in self.file_dirs:
+            filepath = file[file.rfind('modelscope'):]
+            module_name = filepath.replace(osp.sep, '.').replace('.py', '')
+            decorator_list, import_list = self._get_single_file_scan_result(
+                file)
+            result[file] = {
+                DECORATOR_KEY: decorator_list,
+                IMPORT_KEY: import_list,
+                MODULE_KEY: module_name
+            }
+        inverted_index_with_results = self._inverted_index(result)
+        inverted_index_with_results = self._ignore_useless_keys(
+            inverted_index_with_results)
+        module_import = self._module_import(result)
+        index = {
+            INDEX_KEY: inverted_index_with_results,
+            REQUIREMENT_KEY: module_import
+        }
+        logger.info(
+            f'Scanning done! A number of {len(inverted_index_with_results)} '
+            f'components indexed or updated! Time consumed {time.time()-start}s'
+        )
+        return index
+
+    def files_mtime_md5(self,
+                        target_path=MODELSCOPE_PATH,
+                        target_subfolder=SCAN_SUB_FOLDERS,
+                        file_list=None):
+        self.file_dirs = []
+        if file_list and isinstance(file_list, list):
+            self.file_dirs = file_list
+        else:
+            self.traversal_files(target_path, target_subfolder)
+        files_mtime = []
+        files_mtime_dict = dict()
+        for item in self.file_dirs:
+            mtime = os.path.getmtime(item)
+            files_mtime.append(mtime)
+            files_mtime_dict[item] = mtime
+        result_str = reduce(lambda x, y: str(x) + str(y), files_mtime, '')
+        md5 = hashlib.md5(result_str.encode())
+        return md5.hexdigest(), files_mtime_dict
+
+
+file_scanner = FilesAstScanning()
+
+
+def _save_index(index, file_path, file_list=None, with_template=False):
+    # convert tuple key to str key
+    index[INDEX_KEY] = {str(k): v for k, v in index[INDEX_KEY].items()}
+    index[VERSION_KEY] = __version__
+    index[MD5_KEY], index[FILES_MTIME_KEY] = file_scanner.files_mtime_md5(
+        file_list=file_list)
+    index[MODELSCOPE_PATH_KEY] = MODELSCOPE_PATH.as_posix()
+    json_index = json.dumps(index)
+    if with_template:
+        json_index = json_index.replace(MODELSCOPE_PATH.as_posix(),
+                                        TEMPLATE_PATH)
+    storage.write(json_index.encode(), file_path)
+    index[INDEX_KEY] = {
+        ast.literal_eval(k): v
+        for k, v in index[INDEX_KEY].items()
+    }
+
+
+def _load_index(file_path, with_template=False):
+    bytes_index = storage.read(file_path)
+    if with_template:
+        bytes_index = bytes_index.decode().replace(TEMPLATE_PATH,
+                                                   MODELSCOPE_PATH.as_posix())
+    wrapped_index = json.loads(bytes_index)
+    # convert str key to tuple key
+    wrapped_index[INDEX_KEY] = {
+        ast.literal_eval(k): v
+        for k, v in wrapped_index[INDEX_KEY].items()
+    }
+    return wrapped_index
+
+
+def _update_index(index, files_mtime):
+    # inplace update index
+    origin_files_mtime = index[FILES_MTIME_KEY]
+    new_files = list(set(files_mtime) - set(origin_files_mtime))
+    removed_files = list(set(origin_files_mtime) - set(files_mtime))
+    updated_files = []
+    for file in origin_files_mtime:
+        if file not in removed_files and \
+                (origin_files_mtime[file] != files_mtime[file]):
+            updated_files.append(file)
+    updated_files.extend(new_files)
+
+    # remove deleted index
+    if len(removed_files) > 0:
+        remove_index_keys = []
+        remove_requirement_keys = []
+        for key in index[INDEX_KEY]:
+            if index[INDEX_KEY][key][FILE_NAME_KEY] in removed_files:
+                remove_index_keys.append(key)
+                remove_requirement_keys.append(
+                    index[INDEX_KEY][key][MODULE_KEY])
+        for key in remove_index_keys:
+            del index[INDEX_KEY][key]
+        for key in remove_requirement_keys:
+            if key in index[REQUIREMENT_KEY]:
+                del index[REQUIREMENT_KEY][key]
+
+    # add new index
+    updated_index = file_scanner.get_files_scan_results(updated_files)
+    index[INDEX_KEY].update(updated_index[INDEX_KEY])
+    index[REQUIREMENT_KEY].update(updated_index[REQUIREMENT_KEY])
+
+
+def load_index(
+    file_list=None,
+    force_rebuild=False,
+    indexer_file_dir=INDEXER_FILE_DIR,
+    indexer_file=INDEXER_FILE,
+):
+    """get the index from scan results or cache
+
+    Args:
+        file_list: load indexer only from the file lists if provided, default as None
+        force_rebuild: If set true, rebuild and load index, default as False,
+        indexer_file_dir: The dir where the indexer file saved, default as INDEXER_FILE_DIR
+        indexer_file: The indexer file name, default as INDEXER_FILE
+    Returns:
+        dict: the index information for all registered modules, including key:
+        index, requirements, files last modified time, modelscope home path,
+        version and md5, the detail is shown below example: {
+            'index': {
+                ('MODELS', 'nlp', 'bert'):{
+                    'filepath' : 'path/to/the/registered/model', 'imports':
+                    ['os', 'torch', 'typing'] 'module':
+                    'modelscope.models.nlp.bert'
+                },
+                ...
+            }, 'requirements': {
+                'modelscope.models.nlp.bert': ['os', 'torch', 'typing'],
+                'modelscope.models.nlp.structbert': ['os', 'torch', 'typing'],
+                ...
+            }, 'files_mtime' : {
+                '/User/Path/To/Your/Modelscope/modelscope/preprocessors/nlp/text_generation_preprocessor.py':
+                16554565445, ...
+            },'version': '0.2.3', 'md5': '8616924970fe6bc119d1562832625612',
+            'modelscope_path': '/User/Path/To/Your/Modelscope'
+        }
+    """
+    # env variable override
+    cache_dir = os.getenv('MODELSCOPE_CACHE', indexer_file_dir)
+    index_file = os.getenv('MODELSCOPE_INDEX_FILE', indexer_file)
+    file_path = os.path.join(cache_dir, index_file)
+    logger.info(f'Loading ast index from {file_path}')
+    index = None
+    local_changed = False
+    if not force_rebuild and os.path.exists(file_path):
+        wrapped_index = _load_index(file_path)
+        md5, files_mtime = file_scanner.files_mtime_md5(file_list=file_list)
+        if (wrapped_index[VERSION_KEY] == __version__):
+            index = wrapped_index
+            if (wrapped_index[MD5_KEY] != md5):
+                local_changed = True
+    full_index_flag = False
+
+    if index is None:
+        full_index_flag = True
+    elif index and local_changed and FILES_MTIME_KEY not in index:
+        full_index_flag = True
+    elif index and local_changed and MODELSCOPE_PATH_KEY not in index:
+        full_index_flag = True
+    elif index and local_changed and index[
+            MODELSCOPE_PATH_KEY] != MODELSCOPE_PATH.as_posix():
+        full_index_flag = True
+
+    if full_index_flag:
+        if force_rebuild:
+            logger.info('Force rebuilding ast index from scanning every file!')
+            index = file_scanner.get_files_scan_results(file_list)
+        else:
+            logger.info(
+                f'No valid ast index found from {file_path}, generating ast index from prebuilt!'
+            )
+            index = load_from_prebuilt()
+            if index is None:
+                index = file_scanner.get_files_scan_results(file_list)
+        _save_index(index, file_path, file_list)
+    elif local_changed and not full_index_flag:
+        logger.info(
+            'Updating the files for the changes of local files, '
+            'first time updating will take longer time! Please wait till updating done!'
+        )
+        _update_index(index, files_mtime)
+        _save_index(index, file_path, file_list)
+
+    logger.info(
+        f'Loading done! Current index file version is {index[VERSION_KEY]}, '
+        f'with md5 {index[MD5_KEY]} and a total number of '
+        f'{len(index[INDEX_KEY])} components indexed')
+    return index
+
+
+def load_from_prebuilt(file_path=None):
+    if file_path is None:
+        local_path = p.resolve().parents[0]
+        file_path = os.path.join(local_path, TEMPLATE_FILE)
+    if os.path.exists(file_path):
+        index = _load_index(file_path, with_template=True)
+    else:
+        index = None
+    return index
+
+
+def generate_ast_template(file_path=None, force_rebuild=True):
+    index = load_index(force_rebuild=force_rebuild)
+    if file_path is None:
+        local_path = p.resolve().parents[0]
+        file_path = os.path.join(local_path, TEMPLATE_FILE)
+    _save_index(index, file_path, with_template=True)
+    if not os.path.exists(file_path):
+        raise Exception(
+            'The index file is not create correctly, please double check')
+    return index
+
+
+if __name__ == '__main__':
+    index = load_index(force_rebuild=True)
+    print(index)
diff --git a/modelscope/utils/audio/__init__.py b/modelscope/utils/audio/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/utils/audio/audio_utils.py b/modelscope/utils/audio/audio_utils.py
new file mode 100644
index 0000000..e17e37d
--- /dev/null
+++ b/modelscope/utils/audio/audio_utils.py
@@ -0,0 +1,256 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import re
+import struct
+import sys
+import tempfile
+from typing import Union
+from urllib.parse import urlparse
+
+import numpy as np
+
+from modelscope.fileio.file import HTTPStorage
+
+SEGMENT_LENGTH_TRAIN = 16000
+SUPPORT_AUDIO_TYPE_SETS = ('.flac', '.mp3', '.ogg', '.opus', '.wav', '.pcm')
+
+
+class TtsTrainType(object):
+    TRAIN_TYPE_SAMBERT = 'train-type-sambert'
+    TRAIN_TYPE_BERT = 'train-type-bert'
+    TRAIN_TYPE_VOC = 'train-type-voc'
+
+
+def to_segment(batch, segment_length=SEGMENT_LENGTH_TRAIN):
+    """
+    Dataset mapping function to split one audio into segments.
+    It only works in batch mode.
+    """
+    noisy_arrays = []
+    clean_arrays = []
+    for x, y in zip(batch['noisy'], batch['clean']):
+        length = min(len(x['array']), len(y['array']))
+        noisy = x['array']
+        clean = y['array']
+        for offset in range(segment_length, length + 1, segment_length):
+            noisy_arrays.append(noisy[offset - segment_length:offset])
+            clean_arrays.append(clean[offset - segment_length:offset])
+    return {'noisy': noisy_arrays, 'clean': clean_arrays}
+
+
+def audio_norm(x):
+    rms = (x**2).mean()**0.5
+    scalar = 10**(-25 / 20) / rms
+    x = x * scalar
+    pow_x = x**2
+    avg_pow_x = pow_x.mean()
+    rmsx = pow_x[pow_x > avg_pow_x].mean()**0.5
+    scalarx = 10**(-25 / 20) / rmsx
+    x = x * scalarx
+    return x
+
+
+def update_conf(origin_config_file, new_config_file, conf_item: [str, str]):
+    def repl(matched):
+        key = matched.group(1)
+        if key in conf_item:
+            value = conf_item[key]
+            if not isinstance(value, str):
+                value = str(value)
+            return value
+        else:
+            return None
+
+    with open(origin_config_file, encoding='utf-8') as f:
+        lines = f.readlines()
+    with open(new_config_file, 'w') as f:
+        for line in lines:
+            line = re.sub(r'\$\{(.*)\}', repl, line)
+            f.write(line)
+
+
+def extract_pcm_from_wav(wav: bytes) -> bytes:
+    data = wav
+    sample_rate = None
+    if len(data) > 44:
+        frame_len = 44
+        file_len = len(data)
+        try:
+            header_fields = {}
+            header_fields['ChunkID'] = str(data[0:4], 'UTF-8')
+            header_fields['Format'] = str(data[8:12], 'UTF-8')
+            header_fields['Subchunk1ID'] = str(data[12:16], 'UTF-8')
+            if header_fields['ChunkID'] == 'RIFF' and header_fields[
+                    'Format'] == 'WAVE' and header_fields[
+                        'Subchunk1ID'] == 'fmt ':
+                header_fields['SubChunk1Size'] = struct.unpack(
+                    '<I', data[16:20])[0]
+                header_fields['SampleRate'] = struct.unpack('<I',
+                                                            data[24:28])[0]
+                sample_rate = header_fields['SampleRate']
+
+                if header_fields['SubChunk1Size'] == 16:
+                    frame_len = 44
+                elif header_fields['SubChunk1Size'] == 18:
+                    frame_len = 46
+                else:
+                    return data, sample_rate
+
+                data = wav[frame_len:file_len]
+        except Exception:
+            # no treatment
+            pass
+
+    return data, sample_rate
+
+
+# This implementation is adopted from scipy.io.wavfile.write,
+# made publicly available under the BSD-3-Clause license at
+# https://github.com/scipy/scipy/blob/v1.9.3/scipy/io/wavfile.py
+def ndarray_pcm_to_wav(fs: int, data: np.ndarray) -> bytes:
+    dkind = data.dtype.kind
+    if not (dkind == 'i' or dkind == 'f' or  # noqa W504
+            (dkind == 'u' and data.dtype.itemsize == 1)):
+        raise ValueError(f'Unsupported data type {data.dtype}')
+
+    header_data = bytearray()
+    header_data += b'RIFF'
+    header_data += b'\x00\x00\x00\x00'
+    header_data += b'WAVE'
+    header_data += b'fmt '
+    if dkind == 'f':
+        format_tag = 0x0003
+    else:
+        format_tag = 0x0001
+    if data.ndim == 1:
+        channels = 1
+    else:
+        channels = data.shape[1]
+    bit_depth = data.dtype.itemsize * 8
+    bytes_per_second = fs * (bit_depth // 8) * channels
+    block_align = channels * (bit_depth // 8)
+
+    fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
+                                 bytes_per_second, block_align, bit_depth)
+    if not (dkind == 'i' or dkind == 'u'):
+        fmt_chunk_data += b'\x00\x00'
+    header_data += struct.pack('<I', len(fmt_chunk_data))
+    header_data += fmt_chunk_data
+
+    if not (dkind == 'i' or dkind == 'u'):
+        header_data += b'fact'
+        header_data += struct.pack('<II', 4, data.shape[0])
+
+    if ((len(header_data) - 8) + (8 + data.nbytes)) > 0xFFFFFFFF:
+        raise ValueError('Data exceeds wave file size limit')
+
+    header_data += b'data'
+    header_data += struct.pack('<I', data.nbytes)
+    if data.dtype.byteorder == '>' or (data.dtype.byteorder == '='
+                                       and sys.byteorder == 'big'):
+        data = data.byteswap()
+    header_data += data.ravel().view('b').data
+    size = len(header_data)
+    header_data[4:8] = struct.pack('<I', size - 8)
+    return bytes(header_data)
+
+
+def load_bytes_from_url(url: str) -> Union[bytes, str]:
+    sample_rate = None
+    result = urlparse(url)
+    if result.scheme is not None and len(result.scheme) > 0:
+        storage = HTTPStorage()
+        data = storage.read(url)
+        data, sample_rate = extract_pcm_from_wav(data)
+    else:
+        data = url
+
+    return data, sample_rate
+
+
+def generate_scp_from_url(url: str, key: str = None):
+    wav_scp_path = None
+    raw_inputs = None
+    # for local wav.scp inputs
+    if os.path.exists(url) and url.lower().endswith('.scp'):
+        wav_scp_path = url
+        return wav_scp_path, raw_inputs
+    # for local wav file inputs
+    if os.path.exists(url) and (url.lower().endswith(SUPPORT_AUDIO_TYPE_SETS)):
+        wav_scp_path = url
+        return wav_scp_path, raw_inputs
+    # for wav url, download bytes data
+    result = urlparse(url)
+    if result.scheme is not None and len(result.scheme) > 0:
+        storage = HTTPStorage()
+        # bytes
+        wav_scp_path = storage.read(url)
+
+        return wav_scp_path, raw_inputs
+
+    return wav_scp_path, raw_inputs
+
+
+def generate_text_from_url(url: str):
+    text_file_path = None
+    raw_inputs = None
+    # for text str input
+    if not os.path.exists(url) and not url.startswith('http'):
+        raw_inputs = url
+        return text_file_path, raw_inputs
+
+    # for local txt inputs
+    if os.path.exists(url) and (url.lower().endswith('.txt')
+                                or url.lower().endswith('.scp')):
+        text_file_path = url
+        return text_file_path, raw_inputs
+    # for url, download and generate txt
+    result = urlparse(url)
+    if result.scheme is not None and len(result.scheme) > 0:
+        storage = HTTPStorage()
+        data = storage.read(url)
+        work_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(work_dir):
+            os.makedirs(work_dir)
+        text_file_path = os.path.join(work_dir, os.path.basename(url))
+        with open(text_file_path, 'wb') as fp:
+            fp.write(data)
+        return text_file_path, raw_inputs
+
+    return text_file_path, raw_inputs
+
+
+def generate_scp_for_sv(url: str, key: str = None):
+    wav_scp_path = None
+    wav_name = key if key is not None else os.path.basename(url)
+    # for local wav.scp inputs
+    if os.path.exists(url) and url.lower().endswith('.scp'):
+        wav_scp_path = url
+        return wav_scp_path
+    # for local wav file inputs
+    if os.path.exists(url) and (url.lower().endswith(SUPPORT_AUDIO_TYPE_SETS)):
+        wav_path = url
+        work_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(work_dir):
+            os.makedirs(work_dir)
+        wav_scp_path = os.path.join(work_dir, 'wav.scp')
+        with open(wav_scp_path, 'w') as ft:
+            scp_content = '\t'.join([wav_name, wav_path]) + '\n'
+            ft.writelines(scp_content)
+        return wav_scp_path
+    # for wav url, download and generate wav.scp
+    result = urlparse(url)
+    if result.scheme is not None and len(result.scheme) > 0:
+        storage = HTTPStorage()
+        wav_scp_path = storage.read(url)
+        return wav_scp_path
+
+    return wav_scp_path
+
+
+def generate_sv_scp_from_url(url: tuple):
+    if len(url) != 2:
+        raise Exception('Speaker Verification needs 2 input wav file!')
+    audio_scp1 = generate_scp_for_sv(url[0], key='test1')
+    audio_scp2 = generate_scp_for_sv(url[1], key='test1')
+    return audio_scp1, audio_scp2
diff --git a/modelscope/utils/audio/tts_exceptions.py b/modelscope/utils/audio/tts_exceptions.py
new file mode 100644
index 0000000..5937cd1
--- /dev/null
+++ b/modelscope/utils/audio/tts_exceptions.py
@@ -0,0 +1,118 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+"""
+Define TTS exceptions
+"""
+
+
+class TtsException(Exception):
+    """
+    TTS exception class.
+    """
+    pass
+
+
+class TtsModelConfigurationException(TtsException):
+    """
+    TTS model configuration exceptions.
+    """
+    pass
+
+
+class TtsModelNotExistsException(TtsException):
+    """
+    TTS model not exists exception.
+    """
+
+
+class TtsVoiceNotExistsException(TtsException):
+    """
+    TTS voice not exists exception.
+    """
+    pass
+
+
+class TtsFrontendException(TtsException):
+    """
+    TTS frontend module level exceptions.
+    """
+    pass
+
+
+class TtsFrontendInitializeFailedException(TtsFrontendException):
+    """
+    If tts frontend resource is invalid or not exist, this exception will be raised.
+    """
+    pass
+
+
+class TtsFrontendLanguageTypeInvalidException(TtsFrontendException):
+    """
+    If language type is invalid, this exception will be raised.
+    """
+
+
+class TtsVocoderException(TtsException):
+    """
+    Vocoder exception
+    """
+
+
+class TtsVocoderMelspecShapeMismatchException(TtsVocoderException):
+    """
+    If vocoder's input melspec shape mismatch, this exception will be raised.
+    """
+
+
+class TtsDataPreprocessorException(TtsException):
+    """
+    Tts data preprocess exception
+    """
+
+
+class TtsDataPreprocessorDirNotExistsException(TtsDataPreprocessorException):
+    """
+    If any dir is not exists, this exception will be raised.
+    """
+
+
+class TtsDataPreprocessorAudioConfigNotExistsException(
+        TtsDataPreprocessorException):
+    """
+    If audio config is not exists, this exception will be raised.
+    """
+
+
+class TtsTrainingException(TtsException):
+    """
+    Tts training exception
+    """
+
+
+class TtsTrainingHparamsInvalidException(TtsException):
+    """
+    If training hparams is invalid, this exception will be raised.
+    """
+
+
+class TtsTrainingWorkDirNotExistsException(TtsTrainingException):
+    """
+    If training work dir not exists, this exception will be raised.
+    """
+
+
+class TtsTrainingCfgNotExistsException(TtsTrainingException):
+    """
+    If training cfg not exists, this exception will be raised.
+    """
+
+
+class TtsTrainingDatasetInvalidException(TtsTrainingException):
+    """
+    If dataset invalid, this exception will be raised.
+    """
+
+
+class TtsTrainingInvalidModelException(TtsTrainingException):
+    """
+    If model is invalid or not exists, this exception will be raised.
+    """
diff --git a/modelscope/utils/checkpoint.py b/modelscope/utils/checkpoint.py
new file mode 100644
index 0000000..2c0536f
--- /dev/null
+++ b/modelscope/utils/checkpoint.py
@@ -0,0 +1,586 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import io
+import json
+import os
+import re
+import time
+from collections import OrderedDict
+from shutil import copytree, ignore_patterns, rmtree
+from typing import Callable, Dict, Optional, Union
+
+import torch
+from torch import nn
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import _LRScheduler
+
+from modelscope import __version__
+from modelscope.fileio import File, LocalStorage
+from modelscope.utils.config import JSONIteratorEncoder
+from modelscope.utils.constant import ConfigFields, ModelFile
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+storage = LocalStorage()
+
+
+def weights_to_cpu(state_dict):
+    """Copy a model state_dict to cpu.
+
+    Args:
+        state_dict (OrderedDict): Model weights on GPU.
+
+    Returns:
+        OrderedDict: Model weights on GPU.
+    """
+    state_dict_cpu = OrderedDict()
+    for key, val in state_dict.items():
+        state_dict_cpu[key] = val.cpu()
+    # Keep metadata in state_dict
+    state_dict_cpu._metadata = getattr(state_dict, '_metadata', OrderedDict())
+    return state_dict_cpu
+
+
+def save_checkpoint(model: torch.nn.Module,
+                    filename: str,
+                    optimizer: Optional[Optimizer] = None,
+                    lr_scheduler: Optional[_LRScheduler] = None,
+                    meta: Optional[dict] = None,
+                    with_meta: bool = True) -> None:
+    """Save checkpoint to file.
+
+    The checkpoint will have 3 fields: ``meta``, ``state_dict`` and
+    ``optimizer``. By default, ``meta`` will contain version and time info.
+
+    Args:
+        model (Module): Module whose params are to be saved.
+        filename (str): Checkpoint filename.
+        optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
+        lr_scheduler(:obj:`_LRScheduler`, optional): LRScheduler to be saved.
+        meta (dict, optional): Metadata to be saved in checkpoint.
+        with_meta (bool, optional):
+    """
+    if meta is None:
+        meta = {}
+    elif not isinstance(meta, dict):
+        raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
+    meta.update(modelscope=__version__, time=time.asctime())
+
+    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
+        model = model.module
+
+    if hasattr(model, 'CLASSES') and model.CLASSES is not None:
+        # save class name to the meta
+        meta.update(CLASSES=model.CLASSES)
+
+    if with_meta:
+        checkpoint = {
+            'meta': meta,
+            'state_dict': weights_to_cpu(model.state_dict())
+        }
+
+        # save optimizer state dict in the checkpoint
+        if isinstance(optimizer, Optimizer):
+            checkpoint['optimizer'] = optimizer.state_dict()
+        elif isinstance(optimizer, dict):
+            checkpoint['optimizer'] = {}
+            for name, optim in optimizer.items():
+                checkpoint['optimizer'][name] = optim.state_dict()
+
+        # save lr_scheduler state dict in the checkpoint
+        if lr_scheduler is not None and hasattr(lr_scheduler, 'state_dict'):
+            checkpoint['lr_scheduler'] = lr_scheduler.state_dict()
+    else:
+        checkpoint = weights_to_cpu(model.state_dict())
+
+    with io.BytesIO() as f:
+        torch.save(checkpoint, f)
+        File.write(f.getvalue(), filename)
+
+
+def load_checkpoint(filename,
+                    model,
+                    optimizer: Optimizer = None,
+                    lr_scheduler: _LRScheduler = None):
+    if not os.path.exists(filename):
+        raise ValueError(f'Checkpoint file {filename} does not exist!')
+    checkpoint = torch.load(filename, map_location='cpu')
+
+    if optimizer is not None:
+        if 'optimizer' in checkpoint:
+            if isinstance(optimizer, Optimizer):
+                optimizer.load_state_dict(checkpoint['optimizer'])
+            elif isinstance(optimizer, dict):
+                optimizer_dict = checkpoint['optimizer']
+                for key, optimizer_ins in optimizer.items():
+                    if key in optimizer_dict:
+                        optimizer_ins.load_state_dict(optimizer_dict[key])
+                    else:
+                        logger.warning(
+                            f'The state dict of optimizer {key} cannot be found in checkpoint file: {filename}'
+                        )
+        else:
+            logger.warning(
+                f'The state dict of optimizer cannot be found in checkpoint file: {filename}'
+            )
+
+    if lr_scheduler is not None:
+        if 'lr_scheduler' in checkpoint:
+            lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
+        else:
+            logger.warning(
+                f'The state dict of lr_scheduler cannot be found in checkpoint file: {filename}'
+            )
+
+    state_dict = checkpoint if 'state_dict' not in checkpoint else checkpoint[
+        'state_dict']
+    model.load_state_dict(state_dict)
+    return checkpoint.get('meta', {})
+
+
+def load_task_model_checkpoint(model_to_load,
+                               model_local_dir,
+                               default_dtype=None,
+                               load_state_fn=None,
+                               **kwargs):
+    """
+    Load model checkpoint file and feed the parameters into the model.
+    Args:
+        model_to_load: The model to be load
+        model_local_dir: The actual checkpoint dir on local disk.
+        default_dtype: Set the default float type by 'torch.set_default_dtype'
+        load_state_fn: An optional load_state_fn used to load state_dict into the model.
+
+    Returns:
+
+    """
+    def _add_head_prefix_to_state_dict(state_dicts, head_prefix,
+                                       expected_keys_without_head_prefix,
+                                       missing_keys):
+        new_state_dict = OrderedDict()
+        for name, module in state_dicts.items():
+            if name in expected_keys_without_head_prefix:
+                name_with_head = '.'.join([head_prefix, name])
+                new_state_dict[name_with_head] = module
+                expected_keys_without_head_prefix.remove(name)
+                missing_keys = list(set(missing_keys) - set([name_with_head]))
+            else:
+                new_state_dict[name] = module
+
+        missing_head_keys = []
+        if len(expected_keys_without_head_prefix) > 0:
+            missing_head_keys = expected_keys_without_head_prefix.copy()
+        return new_state_dict, missing_head_keys, missing_keys
+
+    def _find_mismatched_keys(
+        state_dicts,
+        model_state_dict,
+        loaded_keys,
+        prefix,
+        add_prefix_to_model,
+        remove_prefix_from_model,
+        ignore_mismatched_sizes,
+    ):
+        mismatched_key = []
+        if ignore_mismatched_sizes:
+            for checkpoint_key in loaded_keys:
+                model_key = checkpoint_key
+                if remove_prefix_from_model:
+                    # The model key starts with `prefix` but `checkpoint_key` doesn't, so we add it.
+                    model_key = f'{prefix}.{checkpoint_key}'
+                elif add_prefix_to_model:
+                    # The model key doesn't start with `prefix` but `checkpoint_key` does, so we remove it.
+                    model_key = '.'.join(checkpoint_key.split('.')[1:])
+
+                if model_key in model_state_dict:
+                    model_shape = model_state_dict[model_key].shape
+                    checkpoint_shape = state_dicts[checkpoint_key].shape
+                    if checkpoint_shape != model_shape:
+                        mismatched_key.append(
+                            (checkpoint_key, state_dicts[checkpoint_key].shape,
+                             model_state_dict[model_key].shape))
+                        del state_dicts[checkpoint_key]
+        return mismatched_key
+
+    def _load_state_dict_into_model(
+        model,
+        state_dict,
+        start_prefix,
+        head_prefix_keys,
+        load_state_fn=None,
+    ):
+        # Convert old format to new format if needed from a PyTorch state_dict
+        old_keys = []
+        new_keys = []
+        for key in state_dict.keys():
+            new_key = None
+            if 'gamma' in key:
+                new_key = key.replace('gamma', 'weight')
+            if 'beta' in key:
+                new_key = key.replace('beta', 'bias')
+            if new_key:
+                old_keys.append(key)
+                new_keys.append(new_key)
+        for old_key, new_key in zip(old_keys, new_keys):
+            state_dict[new_key] = state_dict.pop(old_key)
+
+        # copy state_dict so _load_from_state_dict can modify it
+        metadata = getattr(state_dict, '_metadata', None)
+        state_dict = state_dict.copy()
+        if metadata is not None:
+            state_dict._metadata = metadata
+
+        error_msgs = []
+
+        if load_state_fn is not None:
+            load_state_fn(model,
+                          state_dict,
+                          prefix=start_prefix,
+                          head_prefix_keys=head_prefix_keys,
+                          local_metadata=None,
+                          error_msgs=error_msgs)
+        else:
+
+            def load(module: nn.Module, prefix=''):
+                local_metadata = {} if metadata is None else metadata.get(
+                    prefix[:-1], {})
+                args = (state_dict, prefix, local_metadata, True, [], [],
+                        error_msgs)
+                module._load_from_state_dict(*args)
+                for name, child in module._modules.items():
+                    if child is not None:
+                        load(child, prefix + name + '.')
+
+            load(model, prefix=start_prefix)
+
+        return error_msgs
+
+    def _load_checkpoint(
+        model,
+        state_dict,
+        load_state_fn,
+        ignore_mismatched_sizes,
+        _fast_init,
+    ):
+        # Retrieve missing & unexpected_keys
+        model_state_dict = model.state_dict()
+        expected_keys = list(model_state_dict.keys())
+        keys_from_pretrained = list(state_dict.keys())
+
+        prefix = model.base_model_prefix
+
+        # during loading stage, base model prefix is complicated, should consider remove or add
+        if len(prefix) > 0:
+            # nlp: encoder, decoder
+            pretrained_has_prefix_module = any(
+                s.startswith(prefix) for s in keys_from_pretrained)
+            model_expects_prefix_module = any(
+                s.startswith(prefix) for s in expected_keys)
+        else:
+            # nlp:encoder-decoder, cv:backbone-head,
+            pretrained_has_prefix_module = False
+            model_expects_prefix_module = False
+
+        remove_prefix_from_model = not pretrained_has_prefix_module and model_expects_prefix_module
+        add_prefix_to_model = pretrained_has_prefix_module and not model_expects_prefix_module
+
+        if remove_prefix_from_model:
+            expected_keys_not_base_model_prefixed = [
+                s for s in expected_keys if not s.startswith(prefix)
+            ]
+            expected_keys = [
+                '.'.join(s.split('.')[1:]) if s.startswith(prefix) else s
+                for s in expected_keys
+            ]
+        elif add_prefix_to_model:
+            # backbone only
+            expected_keys = ['.'.join([prefix, s]) for s in expected_keys]
+            expected_keys_not_base_model_prefixed = []
+
+        missing_keys = list(set(expected_keys) - set(keys_from_pretrained))
+        unexpected_keys = list(set(keys_from_pretrained) - set(expected_keys))
+
+        # during loading stage head prefix is simple, add or not add
+        prefix_heads = model.head_prefix
+        expected_head_keys_without_head_prefix = []
+        missing_head_keys = []
+        unexpected_head_keys = []
+        pretrained_has_prefix_head = dict()
+        head_prefix_keys = dict()
+
+        # only for case of head mismatched with state-dict
+        if len(prefix_heads) > 0 and len(unexpected_keys) > 0:
+            if isinstance(prefix_heads, str):
+                prefix_heads = [prefix_heads]
+
+            # to double-check if head matched with state-dict
+            for prefix_head in prefix_heads:
+                pretrained_has_prefix_head[prefix_head] = any(
+                    s.startswith(prefix_head) for s in keys_from_pretrained)
+
+            for prefix_head in prefix_heads:
+                expected_keys_without_head_prefix = [
+                    '.'.join(s.split('.')[1:]) for s in expected_keys
+                    if s.startswith(prefix_head)
+                ]
+                expected_head_keys_without_head_prefix.extend(
+                    expected_keys_without_head_prefix)
+                head_prefix_keys[
+                    prefix_head] = expected_keys_without_head_prefix
+            unexpected_head_keys = list(
+                set(unexpected_keys) -
+                set(expected_head_keys_without_head_prefix))
+            unexpected_keys = list(
+                set(unexpected_keys) -
+                set(expected_head_keys_without_head_prefix))
+
+        _keys_to_ignore_on_load_missing = kwargs.pop(
+            '_keys_to_ignore_on_load_missing', None)
+        _keys_to_ignore_on_load_unexpected = kwargs.pop(
+            '_keys_to_ignore_on_load_unexpected', None)
+        # Some models may have keys that are not in the state by design, removing them before needlessly warning
+        # the user.
+        if _keys_to_ignore_on_load_missing is not None:
+            for pat in _keys_to_ignore_on_load_missing:
+                missing_keys = [
+                    k for k in missing_keys if re.search(pat, k) is None
+                ]
+
+        if _keys_to_ignore_on_load_unexpected is not None:
+            for pat in _keys_to_ignore_on_load_unexpected:
+                unexpected_keys = [
+                    k for k in unexpected_keys if re.search(pat, k) is None
+                ]
+
+        # retrieve uninitialized modules and initialize before maybe overriding that with the pretrained weights.
+        if _fast_init:
+            uninitialized_modules = retrieve_modules_from_names(
+                model,
+                missing_keys,
+                prefix=prefix,
+                add_prefix=add_prefix_to_model,
+                remove_prefix=remove_prefix_from_model)
+            for module in uninitialized_modules:
+                model._init_weights(module)
+
+        # Make sure we are able to load head correctly by revise state-dict
+        missing_head_keys_by_head = dict()
+        if len(head_prefix_keys) > 0:
+            for head_prefix in head_prefix_keys:
+                if not pretrained_has_prefix_head[head_prefix]:
+                    state_dict, missing_head_keys, missing_keys = _add_head_prefix_to_state_dict(
+                        state_dict, head_prefix, head_prefix_keys[head_prefix],
+                        missing_keys)
+                    missing_head_keys_by_head[head_prefix] = missing_head_keys
+
+        # Make sure we are able to load base models as well as derived models (with heads)
+        start_prefix = ''
+        model_to_load = model
+        heads_to_load = dict()
+        if len(model.base_model_prefix) > 0 and not hasattr(
+                model,
+                model.base_model_prefix) and pretrained_has_prefix_module:
+            start_prefix = model.base_model_prefix + '.'
+        if len(model.base_model_prefix) > 0 and hasattr(
+                model,
+                model.base_model_prefix) and not pretrained_has_prefix_module:
+            model_to_load = getattr(model, model.base_model_prefix)
+            for head_prefix in prefix_heads:
+                heads_to_load[head_prefix] = getattr(model, head_prefix)
+            if any(key in expected_keys_not_base_model_prefixed
+                   for key in keys_from_pretrained):
+                raise ValueError(
+                    'The state dictionary of the model you are trying to load is corrupted. Are you sure it was '
+                    'properly saved?')
+
+        # Whole checkpoint
+        mismatched_keys = _find_mismatched_keys(
+            state_dict,
+            model_state_dict,
+            keys_from_pretrained,
+            prefix,
+            add_prefix_to_model,
+            remove_prefix_from_model,
+            ignore_mismatched_sizes,
+        )
+        error_msgs = _load_state_dict_into_model(model_to_load, state_dict,
+                                                 start_prefix, load_state_fn)
+
+        if len(heads_to_load) > 0:
+            for head in heads_to_load:
+                local_error_msgs = _load_state_dict_into_model(
+                    heads_to_load[head], state_dict, head + '.', load_state_fn)
+                error_msgs.extend(local_error_msgs)
+
+        if len(error_msgs) > 0:
+            error_msg = '\n\t'.join(error_msgs)
+            raise RuntimeError(
+                f'Error(s) in loading state_dict for {model.__class__.__name__}:\n\t{error_msg}'
+            )
+
+        if len(unexpected_keys) > 0:
+            logger.warning(
+                f'Some weights of the model checkpoint were not used when'
+                f' initializing {model.__class__.__name__}: {unexpected_keys}\n- This IS expected if you are'
+                f' initializing {model.__class__.__name__} from the checkpoint of a model trained on another task or'
+                ' with another architecture (e.g. initializing a BertForTokenClassification model from a'
+                ' BertForPreTraining model).\n- This IS NOT expected if you are initializing'
+                f' {model.__class__.__name__} from the checkpoint of a model that you expect to be exactly identical'
+                ' (initializing a BertForTokenClassification model from a BertForTokenClassification model).'
+            )
+        elif len(unexpected_head_keys) > 0:
+            logger.warning(
+                f'Some weights of the model checkpoint were not used when'
+                f' initializing {model.__class__.__name__}: {unexpected_head_keys}\n- This IS Not expected if you are'
+                f' initializing {model.__class__.__name__} from the checkpoint of a model with a same task while the'
+                ' structure is different (e.g. initializing a BertForTokenClassification model from a'
+                ' BertForTokenClassification model).')
+        else:
+            logger.info(
+                f'All model checkpoint weights were used when initializing {model.__class__.__name__}.\n'
+            )
+        if len(missing_keys) > 0:
+            logger.warning(
+                f'Some weights of {model.__class__.__name__} were not initialized from the model checkpoint'
+                f' and are newly initialized: {missing_keys}\nYou should probably'
+                ' TRAIN this model on a down-stream task to be able to use it for predictions and inference.'
+            )
+        elif len(mismatched_keys) == 0:
+            logger.info(
+                f'All the weights of {model.__class__.__name__} were initialized from the model checkpoint '
+                f'If your task is similar to the task the model of the checkpoint'
+                f' was trained on, you can already use {model.__class__.__name__} for predictions without further'
+                ' training.')
+        if len(mismatched_keys) > 0:
+            mismatched_warning = '\n'.join([
+                f'- {key}: found shape {shape1} in the checkpoint and {shape2} in the model instantiated'
+                for key, shape1, shape2 in mismatched_keys
+            ])
+            logger.warning(
+                f'Some weights of {model.__class__.__name__} were not initialized from the model checkpoint'
+                f' and are newly initialized because the shapes did not'
+                f' match:\n{mismatched_warning}\nYou should probably TRAIN this model on a down-stream task to be able'
+                ' to use it for predictions and inference.')
+
+        return missing_keys, unexpected_keys, mismatched_keys, error_msgs
+
+    def retrieve_modules_from_names(model,
+                                    names,
+                                    prefix=None,
+                                    add_prefix=False,
+                                    remove_prefix=False):
+        module_keys = set(['.'.join(key.split('.')[:-1]) for key in names])
+
+        # torch.nn.ParameterList is a special case where two parameter keywords
+        # are appended to the module name, *e.g.* bert.special_embeddings.0
+        module_keys = module_keys.union(
+            set([
+                '.'.join(key.split('.')[:-2]) for key in names
+                if key[-1].isdigit()
+            ]))
+
+        retrieved_modules = []
+        # retrieve all modules that has at least one missing weight name
+        for name, module in model.named_modules():
+            if remove_prefix:
+                name = '.'.join(
+                    name.split('.')[1:]) if name.startswith(prefix) else name
+            elif add_prefix:
+                name = '.'.join([prefix, name]) if len(name) > 0 else prefix
+
+            if name in module_keys:
+                retrieved_modules.append(module)
+
+        return retrieved_modules
+
+    # TODO Sharded ckpt
+    ckpt_file = os.path.join(model_local_dir, ModelFile.TORCH_MODEL_BIN_FILE)
+    state_dict = torch.load(ckpt_file, map_location='cpu')
+    if default_dtype is not None:
+        torch.set_default_dtype(default_dtype)
+
+    missing_keys, unexpected_keys, mismatched_keys, error_msgs = _load_checkpoint(
+        model_to_load,
+        state_dict,
+        load_state_fn=load_state_fn,
+        ignore_mismatched_sizes=True,
+        _fast_init=True,
+    )
+
+    return {
+        'model': model_to_load,
+        'missing_keys': missing_keys,
+        'unexpected_keys': unexpected_keys,
+        'mismatched_keys': mismatched_keys,
+        'error_msgs': error_msgs,
+    }
+
+
+def save_configuration(target_folder, config: Dict):
+    from modelscope.utils.config import Config
+    if isinstance(config, Config):
+        config = config.to_dict()
+    if ConfigFields.pipeline not in config:
+        config[ConfigFields.pipeline] = {'type': config[ConfigFields.task]}
+    cfg_str = json.dumps(config, indent=4, cls=JSONIteratorEncoder)
+    config_file = os.path.join(target_folder, ModelFile.CONFIGURATION)
+    storage.write(cfg_str.encode(), config_file)
+
+
+def save_pretrained(model,
+                    target_folder: Union[str, os.PathLike],
+                    save_checkpoint_name: str = None,
+                    save_function: Callable = None,
+                    **kwargs):
+    """save the pretrained model, its configuration and other related files to a directory, so that it can be re-loaded
+
+    Args:
+        model (Model): Model whose params are to be saved.
+
+        target_folder (Union[str, os.PathLike]):
+        Directory to which to save. Will be created if it doesn't exist.
+
+        save_checkpoint_name (str):
+        The checkpoint name to be saved in the target_folder
+
+        save_function (Callable):
+        The function to use to save the state dictionary.
+    """
+
+    if save_function is None or not isinstance(save_function, Callable):
+        raise Exception('A valid save function must be passed in')
+
+    if target_folder is None or os.path.isfile(target_folder):
+        raise ValueError(
+            f'Provided path ({target_folder}) should be a directory, not a file'
+        )
+
+    if save_checkpoint_name is None:
+        raise Exception(
+            'At least pass in one checkpoint name for saving method')
+
+    # Clean the folder from a previous save
+    if os.path.exists(target_folder):
+        rmtree(target_folder)
+
+    # Single ckpt path, sharded ckpt logic will be added later
+    output_ckpt_path = os.path.join(target_folder, save_checkpoint_name)
+
+    # Save the files to be copied to the save directory, ignore the original ckpts and configuration
+    origin_file_to_be_ignored = [save_checkpoint_name]
+    ignore_file_set = set(origin_file_to_be_ignored)
+    ignore_file_set.add(ModelFile.CONFIGURATION)
+    ignore_file_set.add('.*')
+    if hasattr(model, 'model_dir') and model.model_dir is not None:
+        copytree(model.model_dir,
+                 target_folder,
+                 ignore=ignore_patterns(*ignore_file_set))
+
+    # Save the ckpt to the save directory
+    try:
+        save_function(model, output_ckpt_path, **kwargs)
+    except Exception as e:
+        raise Exception(
+            f'During saving checkpoints, the error of "{type(e).__name__} '
+            f'with msg {e} thrown')
diff --git a/modelscope/utils/chinese_utils.py b/modelscope/utils/chinese_utils.py
new file mode 100644
index 0000000..86cf91a
--- /dev/null
+++ b/modelscope/utils/chinese_utils.py
@@ -0,0 +1,82 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import re
+import string
+
+from zhconv import convert
+
+CHINESE_PUNCTUATION = '＂＃＄％＆＇（）＊＋，－／：；＜＝＞＠［＼］＾＿｀｛｜｝～｟｠｢｣､\u3000、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·！？｡。'
+ENGLISH_PUNCTUATION = string.punctuation
+
+
+def remove_space_between_chinese_chars(decoded_str: str):
+    old_word_list = decoded_str.split(' ')
+    new_word_list = []
+    start = -1
+    for i, word in enumerate(old_word_list):
+        if _is_chinese_str(word):
+            if start == -1:
+                start = i
+        else:
+            if start != -1:
+                new_word_list.append(''.join(old_word_list[start:i]))
+                start = -1
+            new_word_list.append(word)
+    if start != -1:
+        new_word_list.append(''.join(old_word_list[start:]))
+    return ' '.join(new_word_list).strip()
+
+
+# add space for each chinese char
+def rebuild_chinese_str(string: str):
+    return ' '.join(''.join([
+        f' {char} '
+        if _is_chinese_char(char) or char in CHINESE_PUNCTUATION else char
+        for char in string
+    ]).split())
+
+
+def _is_chinese_str(string: str) -> bool:
+    return all(
+        _is_chinese_char(cp) or cp in CHINESE_PUNCTUATION
+        or cp in ENGLISH_PUNCTUATION or cp for cp in string)
+
+
+def _is_chinese_char(cp: str) -> bool:
+    """Checks whether CP is the codepoint of a CJK character."""
+    cp = ord(cp)
+    if ((cp >= 0x4E00 and cp <= 0x9FFF) or (cp >= 0x3400 and cp <= 0x4DBF)
+            or (cp >= 0x20000 and cp <= 0x2A6DF)
+            or (cp >= 0x2A700 and cp <= 0x2B73F)
+            or (cp >= 0x2B740 and cp <= 0x2B81F)
+            or (cp >= 0x2B820 and cp <= 0x2CEAF)
+            or (cp >= 0xF900 and cp <= 0xFAFF)
+            or (cp >= 0x2F800 and cp <= 0x2FA1F)):
+        return True
+
+    return False
+
+
+def normalize_chinese_number(text):
+    chinese_number = ['零', '一', '二', '三', '四', '五', '六', '七', '八', '九']
+    new_text = ''
+    for x in text:
+        if x in '0123456789':
+            x = chinese_number[0]
+        new_text += x
+    new_text = convert(new_text, 'zh-hans')
+    return new_text
+
+
+def pre_chinese(text, max_words):
+
+    text = text.lower().replace(CHINESE_PUNCTUATION,
+                                ' ').replace(ENGLISH_PUNCTUATION, ' ')
+    text = re.sub(
+        r'\s{2,}',
+        ' ',
+        text,
+    )
+    text = text.rstrip('\n')
+    text = text.strip(' ')[:max_words]
+    return text
diff --git a/modelscope/utils/config.py b/modelscope/utils/config.py
new file mode 100644
index 0000000..e0e6b6b
--- /dev/null
+++ b/modelscope/utils/config.py
@@ -0,0 +1,695 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# Major implementation is borrowed and modified from
+# https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py
+
+import copy
+import json
+import os
+import os.path as osp
+import platform
+import shutil
+import sys
+import tempfile
+import types
+from pathlib import Path
+from types import FunctionType
+from typing import Dict, Union
+
+import addict
+from yapf.yapflib.yapf_api import FormatCode
+
+from modelscope.utils.constant import ConfigFields, ModelFile
+from modelscope.utils.import_utils import import_modules_from_file
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+BASE_KEY = '_base_'
+DELETE_KEY = '_delete_'
+DEPRECATION_KEY = '_deprecation_'
+RESERVED_KEYS = ['filename', 'text', 'pretty_text']
+
+
+class ConfigDict(addict.Dict):
+    """ Dict which support get value through getattr
+
+    Examples:
+        >>> cdict = ConfigDict({'a':1232})
+        >>> print(cdict.a)
+        >>> # 1232
+    """
+    def __missing__(self, name):
+        raise KeyError(name)
+
+    def __getattr__(self, name):
+        try:
+            value = super(ConfigDict, self).__getattr__(name)
+        except KeyError:
+            ex = AttributeError(f"'{self.__class__.__name__}' object has no "
+                                f"attribute '{name}'")
+        except Exception as e:
+            ex = e
+        else:
+            return value
+        raise ex
+
+
+class Config:
+    """A facility for config and config files.
+
+    It supports common file formats as configs: python/json/yaml. The interface
+    is the same as a dict object and also allows access config values as
+    attributes.
+
+    Example:
+        >>> cfg = Config(dict(a=1, b=dict(c=[1,2,3], d='dd')))
+        >>> cfg.a
+        1
+        >>> cfg.b
+        {'c': [1, 2, 3], 'd': 'dd'}
+        >>> cfg.b.d
+        'dd'
+        >>> cfg = Config.from_file('configs/examples/configuration.json')
+        >>> cfg.filename
+       'configs/examples/configuration.json'
+        >>> cfg.b
+        {'c': [1, 2, 3], 'd': 'dd'}
+        >>> cfg = Config.from_file('configs/examples/configuration.py')
+        >>> cfg.filename
+        "configs/examples/configuration.py"
+        >>> cfg = Config.from_file('configs/examples/configuration.yaml')
+        >>> cfg.filename
+        "configs/examples/configuration.yaml"
+    """
+    @staticmethod
+    def _file2dict(filename):
+        filename = osp.abspath(osp.expanduser(filename))
+        if not osp.exists(filename):
+            raise ValueError(f'File does not exists {filename}')
+        fileExtname = osp.splitext(filename)[1]
+        if fileExtname not in ['.py', '.json', '.yaml', '.yml']:
+            raise IOError('Only py/yml/yaml/json type are supported now!')
+
+        with tempfile.TemporaryDirectory() as tmp_cfg_dir:
+            tmp_cfg_file = tempfile.NamedTemporaryFile(dir=tmp_cfg_dir,
+                                                       suffix=fileExtname)
+            if platform.system() == 'Windows':
+                tmp_cfg_file.close()
+            tmp_cfg_name = osp.basename(tmp_cfg_file.name)
+            shutil.copyfile(filename, tmp_cfg_file.name)
+
+            if filename.endswith('.py'):
+                module_nanme, mod = import_modules_from_file(
+                    osp.join(tmp_cfg_dir, tmp_cfg_name))
+                cfg_dict = {}
+                for name, value in mod.__dict__.items():
+                    if not name.startswith('__') and \
+                       not isinstance(value, types.ModuleType) and \
+                       not isinstance(value, types.FunctionType):
+                        cfg_dict[name] = value
+
+                # delete imported module
+                del sys.modules[module_nanme]
+            elif filename.endswith(('.yml', '.yaml', '.json')):
+                from modelscope.fileio import load
+                cfg_dict = load(tmp_cfg_file.name)
+            # close temp file
+            tmp_cfg_file.close()
+
+        cfg_text = filename + '\n'
+        with open(filename, 'r', encoding='utf-8') as f:
+            # Setting encoding explicitly to resolve coding issue on windows
+            cfg_text += f.read()
+
+        return cfg_dict, cfg_text
+
+    @staticmethod
+    def from_file(filename):
+        if isinstance(filename, Path):
+            filename = str(filename)
+        cfg_dict, cfg_text = Config._file2dict(filename)
+        return Config(cfg_dict, cfg_text=cfg_text, filename=filename)
+
+    @staticmethod
+    def from_string(cfg_str, file_format):
+        """Generate config from config str.
+
+        Args:
+            cfg_str (str): Config str.
+            file_format (str): Config file format corresponding to the
+               config str. Only py/yml/yaml/json type are supported now!
+
+        Returns:
+            :obj:`Config`: Config obj.
+        """
+        if file_format not in ['.py', '.json', '.yaml', '.yml']:
+            raise IOError('Only py/yml/yaml/json type are supported now!')
+        if file_format != '.py' and 'dict(' in cfg_str:
+            # check if users specify a wrong suffix for python
+            logger.warning(
+                'Please check "file_format", the file format may be .py')
+        with tempfile.NamedTemporaryFile('w',
+                                         encoding='utf-8',
+                                         suffix=file_format,
+                                         delete=False) as temp_file:
+            temp_file.write(cfg_str)
+            # on windows, previous implementation cause error
+            # see PR 1077 for details
+        cfg = Config.from_file(temp_file.name)
+        os.remove(temp_file.name)
+        return cfg
+
+    def __init__(self, cfg_dict=None, cfg_text=None, filename=None):
+        if cfg_dict is None:
+            cfg_dict = dict()
+        elif not isinstance(cfg_dict, dict):
+            raise TypeError('cfg_dict must be a dict, but '
+                            f'got {type(cfg_dict)}')
+        for key in cfg_dict:
+            if key in RESERVED_KEYS:
+                raise KeyError(f'{key} is reserved for config file')
+
+        if isinstance(filename, Path):
+            filename = str(filename)
+
+        super(Config, self).__setattr__('_cfg_dict', ConfigDict(cfg_dict))
+        super(Config, self).__setattr__('_filename', filename)
+        if cfg_text:
+            text = cfg_text
+        elif filename:
+            with open(filename, 'r', encoding='utf-8') as f:
+                text = f.read()
+        else:
+            text = ''
+        super(Config, self).__setattr__('_text', text)
+
+    @property
+    def filename(self):
+        return self._filename
+
+    @property
+    def text(self):
+        return self._text
+
+    @property
+    def pretty_text(self):
+
+        indent = 4
+
+        def _indent(s_, num_spaces):
+            s = s_.split('\n')
+            if len(s) == 1:
+                return s_
+            first = s.pop(0)
+            s = [(num_spaces * ' ') + line for line in s]
+            s = '\n'.join(s)
+            s = first + '\n' + s
+            return s
+
+        def _format_basic_types(k, v, use_mapping=False):
+            if isinstance(v, str):
+                v_str = f"'{v}'"
+            else:
+                v_str = str(v)
+
+            if use_mapping:
+                k_str = f"'{k}'" if isinstance(k, str) else str(k)
+                attr_str = f'{k_str}: {v_str}'
+            else:
+                attr_str = f'{str(k)}={v_str}'
+            attr_str = _indent(attr_str, indent)
+
+            return attr_str
+
+        def _format_list(k, v, use_mapping=False):
+            # check if all items in the list are dict
+            if all(isinstance(_, dict) for _ in v):
+                v_str = '[\n'
+                v_str += '\n'.join(
+                    f'dict({_indent(_format_dict(v_), indent)}),'
+                    for v_ in v).rstrip(',')
+                if use_mapping:
+                    k_str = f"'{k}'" if isinstance(k, str) else str(k)
+                    attr_str = f'{k_str}: {v_str}'
+                else:
+                    attr_str = f'{str(k)}={v_str}'
+                attr_str = _indent(attr_str, indent) + ']'
+            else:
+                attr_str = _format_basic_types(k, v, use_mapping)
+            return attr_str
+
+        def _contain_invalid_identifier(dict_str):
+            contain_invalid_identifier = False
+            for key_name in dict_str:
+                contain_invalid_identifier |= \
+                    (not str(key_name).isidentifier())
+            return contain_invalid_identifier
+
+        def _format_dict(input_dict, outest_level=False):
+            r = ''
+            s = []
+
+            use_mapping = _contain_invalid_identifier(input_dict)
+            if use_mapping:
+                r += '{'
+            for idx, (k, v) in enumerate(input_dict.items()):
+                is_last = idx >= len(input_dict) - 1
+                end = '' if outest_level or is_last else ','
+                if isinstance(v, dict):
+                    v_str = '\n' + _format_dict(v)
+                    if use_mapping:
+                        k_str = f"'{k}'" if isinstance(k, str) else str(k)
+                        attr_str = f'{k_str}: dict({v_str}'
+                    else:
+                        attr_str = f'{str(k)}=dict({v_str}'
+                    attr_str = _indent(attr_str, indent) + ')' + end
+                elif isinstance(v, list):
+                    attr_str = _format_list(k, v, use_mapping) + end
+                else:
+                    attr_str = _format_basic_types(k, v, use_mapping) + end
+
+                s.append(attr_str)
+            r += '\n'.join(s)
+            if use_mapping:
+                r += '}'
+            return r
+
+        cfg_dict = self._cfg_dict.to_dict()
+        text = _format_dict(cfg_dict, outest_level=True)
+        # copied from setup.cfg
+        yapf_style = dict(based_on_style='pep8',
+                          blank_line_before_nested_class_or_def=True,
+                          split_before_expression_after_opening_paren=True)
+        text, _ = FormatCode(text, style_config=yapf_style, verify=True)
+
+        return text
+
+    def __repr__(self):
+        return f'Config (path: {self.filename}): {self._cfg_dict.__repr__()}'
+
+    def __len__(self):
+        return len(self._cfg_dict)
+
+    def __getattr__(self, name):
+        return getattr(self._cfg_dict, name)
+
+    def __getitem__(self, name):
+        return self._cfg_dict.__getitem__(name)
+
+    def __setattr__(self, name, value):
+        if isinstance(value, dict):
+            value = ConfigDict(value)
+        self._cfg_dict.__setattr__(name, value)
+
+    def __setitem__(self, name, value):
+        if isinstance(value, dict):
+            value = ConfigDict(value)
+        self._cfg_dict.__setitem__(name, value)
+
+    def __iter__(self):
+        return iter(self._cfg_dict)
+
+    def __getstate__(self):
+        return (self._cfg_dict, self._filename, self._text)
+
+    def __copy__(self):
+        cls = self.__class__
+        other = cls.__new__(cls)
+        other.__dict__.update(self.__dict__)
+
+        return other
+
+    def __deepcopy__(self, memo):
+        cls = self.__class__
+        other = cls.__new__(cls)
+        memo[id(self)] = other
+
+        for key, value in self.__dict__.items():
+            super(Config, other).__setattr__(key, copy.deepcopy(value, memo))
+
+        return other
+
+    def __setstate__(self, state):
+        _cfg_dict, _filename, _text = state
+        super(Config, self).__setattr__('_cfg_dict', _cfg_dict)
+        super(Config, self).__setattr__('_filename', _filename)
+        super(Config, self).__setattr__('_text', _text)
+
+    def safe_get(self, key_chain: str, default=None, type_field='type'):
+        """Get a value with a key-chain in str format, if key does not exist, the default value will be returned.
+
+        This method is safe to call, and will not edit any value.
+
+        Args:
+            key_chain: The input key chain, for example: 'train.hooks[0].type'
+            default: The default value returned when any key does not exist, default None.
+            type_field: Get an object from a list or tuple for example by 'train.hooks.CheckPointHook', in which
+                'hooks' is a list, and 'CheckPointHook' is a value of the content of key `type_field`.
+                If there are multiple matched objects, the first element will be returned.
+        Returns:
+            The value, or the default value.
+        """
+        try:
+            keys = key_chain.split('.')
+            _cfg_dict = self._cfg_dict
+            for key in keys:
+                val = None
+                if '[' in key:
+                    key, val = key.split('[')
+                    val, _ = val.split(']')
+
+                if isinstance(_cfg_dict, (list, tuple)):
+                    assert type_field is not None, 'Getting object without an index from a list or tuple ' \
+                                                   'needs an valid `type_field` param.'
+                    _sub_cfg_dict = list(
+                        filter(lambda sub: sub[type_field] == key, _cfg_dict))
+                    _cfg_dict = _sub_cfg_dict[0]
+                else:
+                    _cfg_dict = _cfg_dict[key]
+                if val is not None:
+                    _cfg_dict = _cfg_dict[int(val)]
+            return _cfg_dict
+        except Exception as e:
+            logger.debug(
+                f'Key not valid in Config: {key_chain}, return the default value: {default}'
+            )
+            logger.debug(e)
+            return default
+
+    def dump(self, file: str = None):
+        """Dumps config into a file or returns a string representation of the
+        config.
+
+        If a file argument is given, saves the config to that file using the
+        format defined by the file argument extension.
+
+        Otherwise, returns a string representing the config. The formatting of
+        this returned string is defined by the extension of `self.filename`. If
+        `self.filename` is not defined, returns a string representation of a
+         dict (lowercased and using ' for strings).
+
+        Examples:
+            >>> cfg_dict = dict(item1=[1, 2], item2=dict(a=0),
+            ...     item3=True, item4='test')
+            >>> cfg = Config(cfg_dict=cfg_dict)
+            >>> dump_file = "a.py"
+            >>> cfg.dump(dump_file)
+
+        Args:
+            file (str, optional): Path of the output file where the config
+                will be dumped. Defaults to None.
+        """
+        from modelscope.fileio import dump
+        cfg_dict = super(Config, self).__getattribute__('_cfg_dict').to_dict()
+        if file is None:
+            if self.filename is None or self.filename.endswith('.py'):
+                return self.pretty_text
+            else:
+                file_format = self.filename.split('.')[-1]
+                return dump(cfg_dict, file_format=file_format)
+        elif file.endswith('.py'):
+            with open(file, 'w', encoding='utf-8') as f:
+                f.write(self.pretty_text)
+        else:
+            file_format = file.split('.')[-1]
+            return dump(cfg_dict, file=file, file_format=file_format)
+
+    def merge_from_dict(self, options, allow_list_keys=True, force=True):
+        """Merge dict into cfg_dict.
+
+        Merge the dict parsed by MultipleKVAction into this cfg.
+
+        Examples:
+            >>> options = {'model.backbone.depth': 50,
+            ...            'model.backbone.with_cp':True}
+            >>> cfg = Config(dict(model=dict(backbone=dict(type='ResNet'))))
+            >>> cfg.merge_from_dict(options)
+            >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict')
+            >>> assert cfg_dict == dict(
+            ...     model=dict(backbone=dict(type='ResNet', depth=50, with_cp=True)))
+
+            >>> # Merge list element for replace target index
+            >>> cfg = Config(dict(pipeline=[
+            ...     dict(type='Resize'), dict(type='RandomDistortion')]))
+            >>> options = dict(pipeline={'0': dict(type='MyResize')})
+            >>> cfg.merge_from_dict(options, allow_list_keys=True)
+            >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict')
+            >>> assert cfg_dict == dict(pipeline=[
+            ...     dict(type='MyResize'), dict(type='RandomDistortion')])
+
+            >>> # Merge list element for replace args and add to list, only support list of type dict with key ``type``,
+            >>> # if you add new list element, the list does not guarantee the order,
+            >>> # it is only suitable for the case where the order of the list is not concerned.
+            >>> cfg = Config(dict(pipeline=[
+            ...     dict(type='Resize', size=224), dict(type='RandomDistortion')]))
+            >>> options = dict(pipeline=[dict(type='Resize', size=256), dict(type='RandomFlip')])
+            >>> cfg.merge_from_dict(options, allow_list_keys=True)
+            >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict')
+            >>> assert cfg_dict == dict(pipeline=[
+            ...     dict(type='Resize', size=256), dict(type='RandomDistortion'), dict(type='RandomFlip')])
+
+            >>> # force usage
+            >>> options = {'model.backbone.depth': 18,
+            ...            'model.backbone.with_cp':True}
+            >>> cfg = Config(dict(model=dict(backbone=dict(type='ResNet', depth=50))))
+            >>> cfg.merge_from_dict(options, force=False)
+            >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict')
+            >>> assert cfg_dict == dict(
+            ...     model=dict(backbone=dict(type='ResNet', depth=50, with_cp=True)))
+
+        Args:
+            options (dict): dict of configs to merge from.
+            allow_list_keys (bool): If True, int string keys (e.g. '0', '1')
+              are allowed in ``options`` and will replace the element of the
+              corresponding index in the config if the config is a list.
+              Or you can directly replace args for list or add new list element,
+              only support list of type dict with key ``type``,
+              but if you add new list element, the list does not guarantee the order,
+              It is only suitable for the case where the order of the list is not concerned.
+              Default: True.
+            force (bool): If True, existing key-value will be replaced by new given.
+                If False, existing key-value will not be updated.
+        """
+        option_cfg_dict = {}
+        for full_key, v in options.items():
+            d = option_cfg_dict
+            key_list = full_key.split('.')
+            for subkey in key_list[:-1]:
+                d.setdefault(subkey, ConfigDict())
+                d = d[subkey]
+            subkey = key_list[-1]
+            d[subkey] = v
+
+        cfg_dict = super(Config, self).__getattribute__('_cfg_dict')
+        super(Config, self).__setattr__(
+            '_cfg_dict',
+            Config._merge_a_into_b(option_cfg_dict,
+                                   cfg_dict,
+                                   allow_list_keys=allow_list_keys,
+                                   force=force))
+
+    @staticmethod
+    def _merge_a_into_b(a, b, allow_list_keys=False, force=True):
+        """merge dict ``a`` into dict ``b`` (non-inplace).
+
+        Values in ``a`` will overwrite ``b``. ``b`` is copied first to avoid
+        in-place modifications.
+
+        Args:
+            a (dict): The source dict to be merged into ``b``.
+            b (dict): The origin dict to be fetch keys from ``a``.
+            allow_list_keys (bool): If True, int string keys (e.g. '0', '1')
+              are allowed in source ``a`` and will replace the element of the
+              corresponding index in b if b is a list. Default: False.
+            force (bool): If True, existing key-value will be replaced by new given.
+                If False, existing key-value will not be updated.
+
+        Returns:
+            dict: The modified dict of ``b`` using ``a``.
+
+        Examples:
+            # Normally merge a into b.
+            >>> Config._merge_a_into_b(
+            ...     dict(obj=dict(a=2)), dict(obj=dict(a=1)))
+            {'obj': {'a': 2}}
+
+            # Delete b first and merge a into b.
+            >>> Config._merge_a_into_b(
+            ...     dict(obj=dict(_delete_=True, a=2)), dict(obj=dict(a=1)))
+            {'obj': {'a': 2}}
+
+            # b is a list
+            >>> Config._merge_a_into_b(
+            ...     {'0': dict(a=2)}, [dict(a=1), dict(b=2)], True)
+            [{'a': 2}, {'b': 2}]
+
+            # value of a and b are both list, only support list of type dict with key ``type``,
+            # You can directly replace args for list or add new list element,
+            # but if you add new list element, the list does not guarantee the order,
+            # it is only suitable for the case where the order of the list is not concerned.
+            >>> Config._merge_a_into_b(
+            ...     {'k': [dict(a=2), dict(c=3)]}, {'k': [dict(a=1), dict(b=2)]}, True)
+            {'k': [dict(a=2), dict(b=2), dict(c=3)]}
+
+            # force is False
+            >>> Config._merge_a_into_b(
+            ...     dict(obj=dict(a=2, b=2)), dict(obj=dict(a=1))), True, force=False)
+            {'obj': {'a': 1, b=2}}
+        """
+        b = b.copy()
+        for k, v in a.items():
+            if allow_list_keys and k.isdigit() and isinstance(b, list):
+                k = int(k)
+                if len(b) <= k:
+                    raise KeyError(f'Index {k} exceeds the length of list {b}')
+                b[k] = Config._merge_a_into_b(v,
+                                              b[k],
+                                              allow_list_keys,
+                                              force=force)
+            elif allow_list_keys and isinstance(v, list) and k in b:
+                if not isinstance(b[k], list):
+                    raise ValueError(
+                        f'type mismatch {type(v)} and {type(b[k])} between a and b for key {k}'
+                    )
+                _is_dict_with_type = True
+                for list_i in b[k] + v:
+                    if not isinstance(list_i, dict) or 'type' not in list_i:
+                        if k not in b or force:
+                            b[k] = v
+                        _is_dict_with_type = False
+                if _is_dict_with_type:
+                    res_list = []
+                    added_index_bk, added_index_v = [], []
+                    for i, b_li in enumerate(b[k]):
+                        for j, a_lj in enumerate(v):
+                            if a_lj['type'] == b_li['type']:
+                                res_list.append(
+                                    Config._merge_a_into_b(a_lj,
+                                                           b_li,
+                                                           allow_list_keys,
+                                                           force=force))
+                                added_index_v.append(j)
+                                added_index_bk.append(i)
+                                break
+                    rest_bk = [
+                        b[k][i] for i in range(len(b[k]))
+                        if i not in added_index_bk
+                    ]
+                    rest_v = [
+                        v[i] for i in range(len(v)) if i not in added_index_v
+                    ]
+                    rest = rest_bk + rest_v
+                    res_list += [
+                        Config._merge_a_into_b(rest[i], {},
+                                               allow_list_keys,
+                                               force=force)
+                        for i in range(len(rest))
+                    ]
+                    b[k] = res_list
+            elif isinstance(v,
+                            dict) and k in b and not v.pop(DELETE_KEY, False):
+                allowed_types = (dict, list) if allow_list_keys else dict
+                if not isinstance(b[k], allowed_types):
+                    raise TypeError(
+                        f'{k}={v} in child config cannot inherit from base '
+                        f'because {k} is a dict in the child config but is of '
+                        f'type {type(b[k])} in base config. You may set '
+                        f'`{DELETE_KEY}=True` to ignore the base config')
+                b[k] = Config._merge_a_into_b(v,
+                                              b[k],
+                                              allow_list_keys,
+                                              force=force)
+            else:
+                if k not in b or force:
+                    b[k] = v
+        return b
+
+    def to_dict(self) -> Dict:
+        """ Convert Config object to python dict
+        """
+        return self._cfg_dict.to_dict()
+
+    def to_args(self, parse_fn, use_hyphen=True):
+        """ Convert config obj to args using parse_fn
+
+        Args:
+            parse_fn: a function object, which takes args as input,
+                such as ['--foo', 'FOO'] and return parsed args, an
+                example is given as follows
+                including literal blocks::
+                    def parse_fn(args):
+                        parser = argparse.ArgumentParser(prog='PROG')
+                        parser.add_argument('-x')
+                        parser.add_argument('--foo')
+                        return parser.parse_args(args)
+            use_hyphen (bool, optional): if set true, hyphen in keyname
+                will be converted to underscore
+        Return:
+            args: arg object parsed by argparse.ArgumentParser
+        """
+        args = []
+        for k, v in self._cfg_dict.items():
+            arg_name = f'--{k}'
+            if use_hyphen:
+                arg_name = arg_name.replace('_', '-')
+            if isinstance(v, bool) and v:
+                args.append(arg_name)
+            elif isinstance(v, (int, str, float)):
+                args.append(arg_name)
+                args.append(str(v))
+            elif isinstance(v, list):
+                args.append(arg_name)
+                assert isinstance(v, (int, str, float, bool)), 'Element type in list ' \
+                    f'is expected to be either int,str,float, but got type {v[0]}'
+                args.append(str(v))
+            else:
+                raise ValueError(
+                    'type in config file which supported to be '
+                    'converted to args should be either bool, '
+                    f'int, str, float or list of them but got type {v}')
+
+        return parse_fn(args)
+
+
+def check_config(cfg: Union[str, ConfigDict], is_training=False):
+    """ Check whether configuration file is valid, If anything wrong, exception will be raised.
+
+    Args:
+        cfg (str or ConfigDict): Config file path or config object.
+        is_training: indicate if checking training related elements
+    """
+
+    if isinstance(cfg, str):
+        cfg = Config.from_file(cfg)
+
+    def check_attr(attr_name, msg=''):
+        assert hasattr(cfg, attr_name), f'Attribute {attr_name} is missing from ' \
+            f'{ModelFile.CONFIGURATION}. {msg}'
+
+    check_attr(ConfigFields.framework)
+    check_attr(ConfigFields.task)
+    check_attr(ConfigFields.pipeline)
+
+    if is_training:
+        check_attr(ConfigFields.model)
+        check_attr(ConfigFields.train)
+        check_attr(ConfigFields.preprocessor)
+        check_attr(ConfigFields.evaluation)
+
+
+class JSONIteratorEncoder(json.JSONEncoder):
+    """Implement this method in order that supporting arbitrary iterators, it returns
+        a serializable object for ``obj``, or calls the base implementation
+        (to raise a ``TypeError``).
+
+    """
+    def default(self, obj):
+        if isinstance(obj, FunctionType):
+            return None
+        try:
+            iterable = iter(obj)
+        except TypeError:
+            pass
+        else:
+            return list(iterable)
+        return json.JSONEncoder.default(self, obj)
diff --git a/modelscope/utils/config_ds.py b/modelscope/utils/config_ds.py
new file mode 100644
index 0000000..fce823c
--- /dev/null
+++ b/modelscope/utils/config_ds.py
@@ -0,0 +1,26 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+from pathlib import Path
+
+# Cache location
+from modelscope.hub.constants import DEFAULT_MODELSCOPE_DATA_ENDPOINT
+
+DEFAULT_CACHE_HOME = Path.home().joinpath('.cache')
+CACHE_HOME = os.getenv('CACHE_HOME', DEFAULT_CACHE_HOME)
+DEFAULT_MS_CACHE_HOME = os.path.join(CACHE_HOME, 'modelscope', 'hub')
+MS_CACHE_HOME = os.path.expanduser(
+    os.getenv('MS_CACHE_HOME', DEFAULT_MS_CACHE_HOME))
+
+DEFAULT_MS_DATASETS_CACHE = os.path.join(MS_CACHE_HOME, 'datasets')
+MS_DATASETS_CACHE = Path(
+    os.getenv('MS_DATASETS_CACHE', DEFAULT_MS_DATASETS_CACHE))
+
+DOWNLOADED_DATASETS_DIR = 'downloads'
+DEFAULT_DOWNLOADED_DATASETS_PATH = os.path.join(MS_DATASETS_CACHE,
+                                                DOWNLOADED_DATASETS_DIR)
+DOWNLOADED_DATASETS_PATH = Path(
+    os.getenv('DOWNLOADED_DATASETS_PATH', DEFAULT_DOWNLOADED_DATASETS_PATH))
+
+HUB_DATASET_ENDPOINT = os.environ.get('HUB_DATASET_ENDPOINT',
+                                      DEFAULT_MODELSCOPE_DATA_ENDPOINT)
diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py
new file mode 100644
index 0000000..f289ea5
--- /dev/null
+++ b/modelscope/utils/constant.py
@@ -0,0 +1,503 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import enum
+
+
+class Fields(object):
+    """ Names for different application fields
+    """
+    cv = 'cv'
+    nlp = 'nlp'
+    audio = 'audio'
+    multi_modal = 'multi-modal'
+    science = 'science'
+
+
+class CVTasks(object):
+    # ocr
+    ocr_detection = 'ocr-detection'
+    ocr_recognition = 'ocr-recognition'
+    table_recognition = 'table-recognition'
+    license_plate_detection = 'license-plate-detection'
+
+    # human face body related
+    animal_recognition = 'animal-recognition'
+    face_detection = 'face-detection'
+    face_liveness = 'face-liveness'
+    face_quality_assessment = 'face-quality-assessment'
+    card_detection = 'card-detection'
+    face_recognition = 'face-recognition'
+    facial_expression_recognition = 'facial-expression-recognition'
+    face_processing_base = 'face-processing-base'
+    face_attribute_recognition = 'face-attribute-recognition'
+    face_2d_keypoints = 'face-2d-keypoints'
+    human_detection = 'human-detection'
+    human_object_interaction = 'human-object-interaction'
+    face_image_generation = 'face-image-generation'
+    body_2d_keypoints = 'body-2d-keypoints'
+    body_3d_keypoints = 'body-3d-keypoints'
+    hand_2d_keypoints = 'hand-2d-keypoints'
+    general_recognition = 'general-recognition'
+    human_wholebody_keypoint = 'human-wholebody-keypoint'
+
+    image_classification = 'image-classification'
+    image_multilabel_classification = 'image-multilabel-classification'
+    image_classification_imagenet = 'image-classification-imagenet'
+    image_classification_dailylife = 'image-classification-dailylife'
+
+    image_object_detection = 'image-object-detection'
+    video_object_detection = 'video-object-detection'
+    image_fewshot_detection = 'image-fewshot-detection'
+    open_vocabulary_detection = 'open-vocabulary-detection'
+    object_detection_3d = 'object-detection-3d'
+
+    image_segmentation = 'image-segmentation'
+    semantic_segmentation = 'semantic-segmentation'
+    image_driving_perception = 'image-driving-perception'
+    image_depth_estimation = 'image-depth-estimation'
+    indoor_layout_estimation = 'indoor-layout-estimation'
+    video_depth_estimation = 'video-depth-estimation'
+    panorama_depth_estimation = 'panorama-depth-estimation'
+    portrait_matting = 'portrait-matting'
+    universal_matting = 'universal-matting'
+    text_driven_segmentation = 'text-driven-segmentation'
+    shop_segmentation = 'shop-segmentation'
+    hand_static = 'hand-static'
+    face_human_hand_detection = 'face-human-hand-detection'
+    face_emotion = 'face-emotion'
+    product_segmentation = 'product-segmentation'
+    image_matching = 'image-matching'
+    image_quality_assessment_degradation = 'image-quality-assessment-degradation'
+
+    crowd_counting = 'crowd-counting'
+
+    # image editing
+    skin_retouching = 'skin-retouching'
+    image_super_resolution = 'image-super-resolution'
+    image_debanding = 'image-debanding'
+    image_colorization = 'image-colorization'
+    image_color_enhancement = 'image-color-enhancement'
+    image_denoising = 'image-denoising'
+    image_deblurring = 'image-deblurring'
+    image_portrait_enhancement = 'image-portrait-enhancement'
+    image_inpainting = 'image-inpainting'
+    image_paintbyexample = 'image-paintbyexample'
+    image_skychange = 'image-skychange'
+    image_demoireing = 'image-demoireing'
+
+    # image generation
+    image_to_image_translation = 'image-to-image-translation'
+    image_to_image_generation = 'image-to-image-generation'
+    image_style_transfer = 'image-style-transfer'
+    image_portrait_stylization = 'image-portrait-stylization'
+    image_body_reshaping = 'image-body-reshaping'
+    image_embedding = 'image-embedding'
+    image_face_fusion = 'image-face-fusion'
+    product_retrieval_embedding = 'product-retrieval-embedding'
+
+    # video recognition
+    live_category = 'live-category'
+    action_recognition = 'action-recognition'
+    action_detection = 'action-detection'
+    video_category = 'video-category'
+    video_embedding = 'video-embedding'
+    virtual_try_on = 'virtual-try-on'
+    movie_scene_segmentation = 'movie-scene-segmentation'
+    language_guided_video_summarization = 'language-guided-video-summarization'
+    vop_retrieval = 'video-text-retrieval'
+
+    # video segmentation
+    video_object_segmentation = 'video-object-segmentation'
+    referring_video_object_segmentation = 'referring-video-object-segmentation'
+    video_human_matting = 'video-human-matting'
+    video_panoptic_segmentation = 'video-panoptic-segmentation'
+
+    # video editing
+    video_inpainting = 'video-inpainting'
+    video_frame_interpolation = 'video-frame-interpolation'
+    video_stabilization = 'video-stabilization'
+    video_super_resolution = 'video-super-resolution'
+    video_deinterlace = 'video-deinterlace'
+    video_colorization = 'video-colorization'
+
+    # reid and tracking
+    video_single_object_tracking = 'video-single-object-tracking'
+    video_multi_object_tracking = 'video-multi-object-tracking'
+    video_summarization = 'video-summarization'
+    image_reid_person = 'image-reid-person'
+
+    # pointcloud task
+    pointcloud_sceneflow_estimation = 'pointcloud-sceneflow-estimation'
+    # image multi-view depth estimation
+    image_multi_view_depth_estimation = 'image-multi-view-depth-estimation'
+
+    # domain specific object detection
+    domain_specific_object_detection = 'domain-specific-object-detection'
+
+    # content check
+    content_check = 'content-check'
+
+    # 3d face reconstruction
+    face_reconstruction = 'face-reconstruction'
+
+    # image quality assessment mos
+    image_quality_assessment_mos = 'image-quality-assessment-mos'
+    # motion generation
+    motion_generation = 'motion-generation'
+    # 3d reconstruction
+    nerf_recon_acc = 'nerf-recon-acc'
+
+    # vision efficient tuning
+    vision_efficient_tuning = 'vision-efficient-tuning'
+
+    # bad image detecting
+    bad_image_detecting = 'bad-image-detecting'
+
+
+class NLPTasks(object):
+    # nlp tasks
+    word_segmentation = 'word-segmentation'
+    part_of_speech = 'part-of-speech'
+    named_entity_recognition = 'named-entity-recognition'
+    nli = 'nli'
+    sentiment_classification = 'sentiment-classification'
+    sentiment_analysis = 'sentiment-analysis'
+    sentence_similarity = 'sentence-similarity'
+    text_classification = 'text-classification'
+    sentence_embedding = 'sentence-embedding'
+    text_ranking = 'text-ranking'
+    relation_extraction = 'relation-extraction'
+    zero_shot = 'zero-shot'
+    translation = 'translation'
+    token_classification = 'token-classification'
+    transformer_crf = 'transformer-crf'
+    conversational = 'conversational'
+    text_generation = 'text-generation'
+    fid_dialogue = 'fid-dialogue'
+    text2text_generation = 'text2text-generation'
+    task_oriented_conversation = 'task-oriented-conversation'
+    dialog_intent_prediction = 'dialog-intent-prediction'
+    dialog_state_tracking = 'dialog-state-tracking'
+    table_question_answering = 'table-question-answering'
+    fill_mask = 'fill-mask'
+    text_summarization = 'text-summarization'
+    question_answering = 'question-answering'
+    code_translation = 'code-translation'
+    code_generation = 'code-generation'
+    zero_shot_classification = 'zero-shot-classification'
+    backbone = 'backbone'
+    text_error_correction = 'text-error-correction'
+    word_alignment = 'word-alignment'
+    faq_question_answering = 'faq-question-answering'
+    information_extraction = 'information-extraction'
+    document_segmentation = 'document-segmentation'
+    extractive_summarization = 'extractive-summarization'
+    feature_extraction = 'feature-extraction'
+    translation_evaluation = 'translation-evaluation'
+    sudoku = 'sudoku'
+    text2sql = 'text2sql'
+    siamese_uie = 'siamese-uie'
+    document_grounded_dialog_retrieval = 'document-grounded-dialog-retrieval'
+    document_grounded_dialog_rerank = 'document-grounded-dialog-rerank'
+    document_grounded_dialog_generate = 'document-grounded-dialog-generate'
+
+
+class AudioTasks(object):
+    # audio tasks
+    auto_speech_recognition = 'auto-speech-recognition'
+    text_to_speech = 'text-to-speech'
+    speech_signal_process = 'speech-signal-process'
+    speech_separation = 'speech-separation'
+    acoustic_echo_cancellation = 'acoustic-echo-cancellation'
+    acoustic_noise_suppression = 'acoustic-noise-suppression'
+    keyword_spotting = 'keyword-spotting'
+    inverse_text_processing = 'inverse-text-processing'
+    punctuation = 'punctuation'
+    speaker_verification = 'speaker-verification'
+    voice_activity_detection = 'voice-activity-detection'
+    language_model = 'language-model'
+
+
+class MultiModalTasks(object):
+    # multi-modal tasks
+    image_captioning = 'image-captioning'
+    visual_grounding = 'visual-grounding'
+    text_to_image_synthesis = 'text-to-image-synthesis'
+    multi_modal_embedding = 'multi-modal-embedding'
+    generative_multi_modal_embedding = 'generative-multi-modal-embedding'
+    multi_modal_similarity = 'multi-modal-similarity'
+    visual_question_answering = 'visual-question-answering'
+    visual_entailment = 'visual-entailment'
+    video_multi_modal_embedding = 'video-multi-modal-embedding'
+    image_text_retrieval = 'image-text-retrieval'
+    document_vl_embedding = 'document-vl-embedding'
+    video_captioning = 'video-captioning'
+    video_question_answering = 'video-question-answering'
+
+
+class ScienceTasks(object):
+    protein_structure = 'protein-structure'
+
+
+class TasksIODescriptions(object):
+    image_to_image = 'image_to_image',
+    images_to_image = 'images_to_image',
+    image_to_text = 'image_to_text',
+    seed_to_image = 'seed_to_image',
+    text_to_speech = 'text_to_speech',
+    text_to_text = 'text_to_text',
+    speech_to_text = 'speech_to_text',
+    speech_to_speech = 'speech_to_speech'
+    speeches_to_speech = 'speeches_to_speech',
+    visual_grounding = 'visual_grounding',
+    visual_question_answering = 'visual_question_answering',
+    visual_entailment = 'visual_entailment',
+    generative_multi_modal_embedding = 'generative_multi_modal_embedding'
+
+
+class Tasks(CVTasks, NLPTasks, AudioTasks, MultiModalTasks, ScienceTasks):
+    """ Names for tasks supported by modelscope.
+
+    Holds the standard task name to use for identifying different tasks.
+    This should be used to register models, pipelines, trainers.
+    """
+    reverse_field_index = {}
+
+    @staticmethod
+    def find_field_by_task(task_name):
+        if len(Tasks.reverse_field_index) == 0:
+            # Lazy init, not thread safe
+            field_dict = {
+                Fields.cv: [
+                    getattr(Tasks, attr) for attr in dir(CVTasks)
+                    if not attr.startswith('__')
+                ],
+                Fields.nlp: [
+                    getattr(Tasks, attr) for attr in dir(NLPTasks)
+                    if not attr.startswith('__')
+                ],
+                Fields.audio: [
+                    getattr(Tasks, attr) for attr in dir(AudioTasks)
+                    if not attr.startswith('__')
+                ],
+                Fields.multi_modal: [
+                    getattr(Tasks, attr) for attr in dir(MultiModalTasks)
+                    if not attr.startswith('__')
+                ],
+                Fields.science: [
+                    getattr(Tasks, attr) for attr in dir(ScienceTasks)
+                    if not attr.startswith('__')
+                ],
+            }
+
+            for field, tasks in field_dict.items():
+                for task in tasks:
+                    if task in Tasks.reverse_field_index:
+                        raise ValueError(f'Duplicate task: {task}')
+                    Tasks.reverse_field_index[task] = field
+
+        return Tasks.reverse_field_index.get(task_name)
+
+
+class InputFields(object):
+    """ Names for input data fields in the input data for pipelines
+    """
+    img = 'img'
+    text = 'text'
+    audio = 'audio'
+
+
+class Hubs(enum.Enum):
+    """ Source from which an entity (such as a Dataset or Model) is stored
+    """
+    modelscope = 'modelscope'
+    huggingface = 'huggingface'
+
+
+class DownloadMode(enum.Enum):
+    """ How to treat existing datasets
+    """
+    REUSE_DATASET_IF_EXISTS = 'reuse_dataset_if_exists'
+    FORCE_REDOWNLOAD = 'force_redownload'
+
+
+class DownloadChannel(enum.Enum):
+    """ Channels of datasets downloading for uv/pv counting.
+    """
+    LOCAL = 'local'
+    DSW = 'dsw'
+    EAIS = 'eais'
+
+
+class UploadMode(enum.Enum):
+    """ How to upload object to remote.
+    """
+    # Upload all objects from local, existing remote objects may be overwritten. (Default)
+    OVERWRITE = 'overwrite'
+    # Upload local objects in append mode, skipping all existing remote objects.
+    APPEND = 'append'
+
+
+class DatasetFormations(enum.Enum):
+    """ How a dataset is organized and interpreted
+    """
+    # formation that is compatible with official huggingface dataset, which
+    # organizes whole dataset into one single (zip) file.
+    hf_compatible = 1
+    # native modelscope formation that supports, among other things,
+    # multiple files in a dataset
+    native = 2
+    # for local meta cache mark
+    formation_mark_ext = '.formation_mark'
+
+
+DatasetMetaFormats = {
+    DatasetFormations.native: ['.json'],
+    DatasetFormations.hf_compatible: ['.py'],
+}
+
+
+class ModelFile(object):
+    CONFIGURATION = 'configuration.json'
+    README = 'README.md'
+    TF_SAVED_MODEL_FILE = 'saved_model.pb'
+    TF_GRAPH_FILE = 'tf_graph.pb'
+    TF_CHECKPOINT_FOLDER = 'tf_ckpts'
+    TF_CKPT_PREFIX = 'ckpt-'
+    TORCH_MODEL_FILE = 'pytorch_model.pt'
+    TORCH_MODEL_BIN_FILE = 'pytorch_model.bin'
+    VOCAB_FILE = 'vocab.txt'
+    ONNX_MODEL_FILE = 'model.onnx'
+    LABEL_MAPPING = 'label_mapping.json'
+    TRAIN_OUTPUT_DIR = 'output'
+    TRAIN_BEST_OUTPUT_DIR = 'output_best'
+    TS_MODEL_FILE = 'model.ts'
+    YAML_FILE = 'model.yaml'
+    TOKENIZER_FOLDER = 'tokenizer'
+    CONFIG = 'config.json'
+
+
+class Invoke(object):
+    KEY = 'invoked_by'
+    PRETRAINED = 'from_pretrained'
+    PIPELINE = 'pipeline'
+    TRAINER = 'trainer'
+    LOCAL_TRAINER = 'local_trainer'
+    PREPROCESSOR = 'preprocessor'
+
+
+class ConfigFields(object):
+    """ First level keyword in configuration file
+    """
+    framework = 'framework'
+    task = 'task'
+    pipeline = 'pipeline'
+    model = 'model'
+    dataset = 'dataset'
+    preprocessor = 'preprocessor'
+    train = 'train'
+    evaluation = 'evaluation'
+    postprocessor = 'postprocessor'
+
+
+class ConfigKeys(object):
+    """Fixed keywords in configuration file"""
+    train = 'train'
+    val = 'val'
+    test = 'test'
+
+
+class Requirements(object):
+    """Requirement names for each module
+    """
+    protobuf = 'protobuf'
+    sentencepiece = 'sentencepiece'
+    sklearn = 'sklearn'
+    scipy = 'scipy'
+    timm = 'timm'
+    tokenizers = 'tokenizers'
+    tf = 'tf'
+    torch = 'torch'
+
+
+class Frameworks(object):
+    tf = 'tensorflow'
+    torch = 'pytorch'
+    kaldi = 'kaldi'
+
+
+DEFAULT_MODEL_REVISION = None
+MASTER_MODEL_BRANCH = 'master'
+DEFAULT_REPOSITORY_REVISION = 'master'
+DEFAULT_DATASET_REVISION = 'master'
+DEFAULT_DATASET_NAMESPACE = 'modelscope'
+DEFAULT_DATA_ACCELERATION_ENDPOINT = 'https://oss-accelerate.aliyuncs.com'
+
+
+class ModeKeys:
+    TRAIN = 'train'
+    EVAL = 'eval'
+    INFERENCE = 'inference'
+
+
+class LogKeys:
+    ITER = 'iter'
+    ITER_TIME = 'iter_time'
+    EPOCH = 'epoch'
+    LR = 'lr'  # learning rate
+    MODE = 'mode'
+    DATA_LOAD_TIME = 'data_load_time'
+    ETA = 'eta'  # estimated time of arrival
+    MEMORY = 'memory'
+    LOSS = 'loss'
+
+
+class TrainerStages:
+    before_run = 'before_run'
+    before_train_epoch = 'before_train_epoch'
+    before_train_iter = 'before_train_iter'
+    after_train_iter = 'after_train_iter'
+    after_train_epoch = 'after_train_epoch'
+    before_val_epoch = 'before_val_epoch'
+    before_val_iter = 'before_val_iter'
+    after_val_iter = 'after_val_iter'
+    after_val_epoch = 'after_val_epoch'
+    after_run = 'after_run'
+
+
+class ColorCodes:
+    MAGENTA = '\033[95m'
+    YELLOW = '\033[93m'
+    GREEN = '\033[92m'
+    RED = '\033[91m'
+    END = '\033[0m'
+
+
+class Devices:
+    """device used for training and inference"""
+    cpu = 'cpu'
+    gpu = 'gpu'
+
+
+# Supported extensions for text datasets.
+EXTENSIONS_TO_LOAD = {
+    'csv': 'csv',
+    'tsv': 'csv',
+    'json': 'json',
+    'jsonl': 'json',
+    'parquet': 'parquet',
+    'txt': 'text'
+}
+
+
+class DatasetPathName:
+    META_NAME = 'meta'
+    DATA_FILES_NAME = 'data_files'
+    LOCK_FILE_NAME_ANY = 'any'
+    LOCK_FILE_NAME_DELIMITER = '-'
+
+
+class MetaDataFields:
+    ARGS_BIG_DATA = 'big_data'
+
+
+DatasetVisibilityMap = {1: 'private', 3: 'internal', 5: 'public'}
diff --git a/modelscope/utils/cv/__init__.py b/modelscope/utils/cv/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/utils/cv/image_utils.py b/modelscope/utils/cv/image_utils.py
new file mode 100644
index 0000000..2472e5a
--- /dev/null
+++ b/modelscope/utils/cv/image_utils.py
@@ -0,0 +1,730 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+
+import cv2
+import matplotlib
+import matplotlib.cm as cm
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image
+
+from modelscope.outputs import OutputKeys
+from modelscope.preprocessors.image import load_image
+from modelscope.utils import logger as logging
+
+logger = logging.get_logger()
+
+
+def voc_ap(rec, prec):
+
+    # correct AP calculation
+    # first append sentinel values at the end
+    mrec = np.concatenate(([0.], rec, [1.]))
+    mpre = np.concatenate(([0.], prec, [0.]))
+
+    # compute the precision envelope
+    for i in range(mpre.size - 1, 0, -1):
+        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+    # to calculate area under PR curve, look for points
+    # where X axis (recall) changes value
+    i = np.where(mrec[1:] != mrec[:-1])[0]
+
+    # and sum (\Delta recall) * prec
+    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+
+def image_eval(pred, gt, iou_thresh):
+    """ single image evaluation
+    pred: Nx5
+    gt: Nx4
+    ignore:
+    """
+    _pred = pred.copy()
+    _gt = gt.copy()
+    pred_recall = np.zeros(_pred.shape[0])
+    recall_list = np.zeros(_gt.shape[0])
+    proposal_list = np.ones(_pred.shape[0])
+
+    #_pred[:, 2] = _pred[:, 2] + _pred[:, 0]
+    #_pred[:, 3] = _pred[:, 3] + _pred[:, 1]
+    _gt[:, 2] = _gt[:, 2] + _gt[:, 0]
+    _gt[:, 3] = _gt[:, 3] + _gt[:, 1]
+
+
+    for h in range(_pred.shape[0]):
+        gt_overlap = bbox_overlap(_gt, _pred[h])
+        #gt_overlap = gt_overlap_list[h]
+        max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax()
+
+        if max_overlap >= iou_thresh:
+            if recall_list[max_idx] == 0:
+                recall_list[max_idx] = 1
+
+        r_keep_index = np.where(recall_list == 1)[0]
+        pred_recall[h] = len(r_keep_index)
+
+    return pred_recall, proposal_list
+
+def img_pr_info(thresh_num, pred_info, proposal_list, pred_recall):
+    pr_info = np.zeros((thresh_num, 2)).astype('float')
+    fp = np.zeros((pred_info.shape[0],), dtype=np.int32)
+    last_info = [-1, -1]
+    for t in range(thresh_num):
+
+        thresh = 1 - (t+1)/thresh_num
+        r_index = np.where(pred_info[:, 4] >= thresh)[0]
+        if len(r_index) == 0:
+            pr_info[t, 0] = 0
+            pr_info[t, 1] = 0
+        else:
+            r_index = r_index[-1]
+            p_index = np.where(proposal_list[:r_index+1] == 1)[0]
+            pr_info[t, 0] = len(p_index) #valid pred number
+            pr_info[t, 1] = pred_recall[r_index] # valid gt number
+
+            if t>0 and pr_info[t, 0] > pr_info[t-1,0] and pr_info[t, 1]==pr_info[t-1,1]:
+                fp[r_index] = 1
+    return pr_info, fp
+
+def gen_gt_info(img_gt):
+    gt_info = {}
+    fo = open(img_gt)
+    for line in fo:
+        if 'jpg'  in line:
+            img_name = line.strip()
+            gt_info[img_name] = []
+            continue
+        gt_info[img_name].append([float(item) for item in line.strip().split(' ')[:4]])
+    return gt_info
+
+def dataset_pr_info(thresh_num, pr_curve, count_face):
+    _pr_curve = np.zeros((thresh_num, 2))
+    for i in range(thresh_num):
+        _pr_curve[i, 0] = pr_curve[i, 1] / pr_curve[i, 0]
+        _pr_curve[i, 1] = pr_curve[i, 1] / count_face
+    return _pr_curve
+
+def bbox_overlap(a, b):
+    x1 = np.maximum(a[:,0], b[0])
+    y1 = np.maximum(a[:,1], b[1])
+    x2 = np.minimum(a[:,2], b[2])
+    y2 = np.minimum(a[:,3], b[3])
+    w = x2-x1+1
+    h = y2-y1+1
+    inter = w*h
+    aarea = (a[:,2]-a[:,0]+1) * (a[:,3]-a[:,1]+1)
+    barea = (b[2]-b[0]+1) * (b[3]-b[1]+1)
+    o = inter / (aarea+barea-inter)
+    o[w<=0] = 0
+    o[h<=0] = 0
+    return o
+
+
+def numpy_to_cv2img(img_array):
+    """to convert a np.array with shape(h, w) to cv2 img
+
+    Args:
+        img_array (np.array): input data
+
+    Returns:
+        cv2 img
+    """
+    img_array = (img_array - img_array.min()) / (img_array.max() -
+                                                 img_array.min() + 1e-5)
+    img_array = (img_array * 255).astype(np.uint8)
+    img_array = cv2.applyColorMap(img_array, cv2.COLORMAP_JET)
+    return img_array
+
+
+def draw_joints(image, np_kps, score, threshold=0.2):
+    lst_parent_ids_17 = [0, 0, 0, 1, 2, 0, 0, 5, 6, 7, 8, 5, 6, 11, 12, 13, 14]
+    lst_left_ids_17 = [1, 3, 5, 7, 9, 11, 13, 15]
+    lst_right_ids_17 = [2, 4, 6, 8, 10, 12, 14, 16]
+
+    lst_parent_ids_15 = [0, 0, 1, 2, 3, 1, 5, 6, 14, 8, 9, 14, 11, 12, 1]
+    lst_left_ids_15 = [2, 3, 4, 8, 9, 10]
+    lst_right_ids_15 = [5, 6, 7, 11, 12, 13]
+
+    if np_kps.shape[0] == 17:
+        lst_parent_ids = lst_parent_ids_17
+        lst_left_ids = lst_left_ids_17
+        lst_right_ids = lst_right_ids_17
+
+    elif np_kps.shape[0] == 15:
+        lst_parent_ids = lst_parent_ids_15
+        lst_left_ids = lst_left_ids_15
+        lst_right_ids = lst_right_ids_15
+
+    for i in range(len(lst_parent_ids)):
+        pid = lst_parent_ids[i]
+        if i == pid:
+            continue
+
+        if (score[i] < threshold or score[1] < threshold):
+            continue
+
+        if i in lst_left_ids and pid in lst_left_ids:
+            color = (0, 255, 0)
+        elif i in lst_right_ids and pid in lst_right_ids:
+            color = (255, 0, 0)
+        else:
+            color = (0, 255, 255)
+
+        cv2.line(image, (int(np_kps[i, 0]), int(np_kps[i, 1])),
+                 (int(np_kps[pid][0]), int(np_kps[pid, 1])), color, 3)
+
+    for i in range(np_kps.shape[0]):
+        if score[i] < threshold:
+            continue
+        cv2.circle(image, (int(np_kps[i, 0]), int(np_kps[i, 1])), 5,
+                   (0, 0, 255), -1)
+
+
+def draw_box(image, box):
+    cv2.rectangle(image, (int(box[0]), int(box[1])),
+                  (int(box[2]), int(box[3])), (0, 0, 255), 2)
+
+
+def realtime_object_detection_bbox_vis(image, bboxes):
+    for bbox in bboxes:
+        cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
+                      (255, 0, 0), 2)
+    return image
+
+
+def draw_keypoints(output, original_image):
+    poses = np.array(output[OutputKeys.KEYPOINTS])
+    scores = np.array(output[OutputKeys.SCORES])
+    boxes = np.array(output[OutputKeys.BOXES])
+    assert len(poses) == len(scores) and len(poses) == len(boxes)
+    image = cv2.imread(original_image, -1)
+    for i in range(len(poses)):
+        draw_box(image, np.array(boxes[i]))
+        draw_joints(image, np.array(poses[i]), np.array(scores[i]))
+    return image
+
+
+def draw_106face_keypoints(in_path,
+                           keypoints,
+                           boxes,
+                           scale=4.0,
+                           save_path=None):
+    face_contour_point_index = [
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+        20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
+    ]
+    left_eye_brow_point_index = [33, 34, 35, 36, 37, 38, 39, 40, 41, 33]
+    right_eye_brow_point_index = [42, 43, 44, 45, 46, 47, 48, 49, 50, 42]
+    left_eye_point_index = [66, 67, 68, 69, 70, 71, 72, 73, 66]
+    right_eye_point_index = [75, 76, 77, 78, 79, 80, 81, 82, 75]
+    nose_bridge_point_index = [51, 52, 53, 54]
+    nose_contour_point_index = [55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65]
+    mouth_outer_point_index = [
+        84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 84
+    ]
+    mouth_inter_point_index = [96, 97, 98, 99, 100, 101, 102, 103, 96]
+
+    img = cv2.imread(in_path)
+
+    for i in range(len(boxes)):
+        draw_box(img, np.array(boxes[i]))
+
+    image = cv2.resize(img, dsize=None, fx=scale, fy=scale)
+
+    def draw_line(point_index, image, point):
+        for i in range(len(point_index) - 1):
+            cur_index = point_index[i]
+            next_index = point_index[i + 1]
+            cur_pt = (int(point[cur_index][0] * scale),
+                      int(point[cur_index][1] * scale))
+            next_pt = (int(point[next_index][0] * scale),
+                       int(point[next_index][1] * scale))
+            cv2.line(image, cur_pt, next_pt, (0, 0, 255), thickness=2)
+
+    for i in range(len(keypoints)):
+        points = keypoints[i]
+
+        draw_line(face_contour_point_index, image, points)
+        draw_line(left_eye_brow_point_index, image, points)
+        draw_line(right_eye_brow_point_index, image, points)
+        draw_line(left_eye_point_index, image, points)
+        draw_line(right_eye_point_index, image, points)
+        draw_line(nose_bridge_point_index, image, points)
+        draw_line(nose_contour_point_index, image, points)
+        draw_line(mouth_outer_point_index, image, points)
+        draw_line(mouth_inter_point_index, image, points)
+
+        size = len(points)
+        for i in range(size):
+            x = int(points[i][0])
+            y = int(points[i][1])
+            cv2.putText(image, str(i), (int(x * scale), int(y * scale)),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
+            cv2.circle(image, (int(x * scale), int(y * scale)), 2, (0, 255, 0),
+                       cv2.FILLED)
+
+    if save_path is not None:
+        cv2.imwrite(save_path, image)
+
+    return image
+
+
+def draw_face_detection_no_lm_result(img_path, detection_result):
+    bboxes = np.array(detection_result[OutputKeys.BOXES])
+    scores = np.array(detection_result[OutputKeys.SCORES])
+    img = cv2.imread(img_path)
+    assert img is not None, f"Can't read img: {img_path}"
+    for i in range(len(scores)):
+        bbox = bboxes[i].astype(np.int32)
+        x1, y1, x2, y2 = bbox
+        score = scores[i]
+        cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
+        cv2.putText(img,
+                    f'{score:.2f}', (x1, y2),
+                    1,
+                    1.0, (0, 255, 0),
+                    thickness=1,
+                    lineType=8)
+    print(f'Found {len(scores)} faces')
+    return img
+
+
+def draw_facial_expression_result(img_path, facial_expression_result):
+    scores = facial_expression_result[OutputKeys.SCORES]
+    labels = facial_expression_result[OutputKeys.LABELS]
+    label = labels[np.argmax(scores)]
+    img = cv2.imread(img_path)
+    assert img is not None, f"Can't read img: {img_path}"
+    cv2.putText(img,
+                'facial expression: {}'.format(label), (10, 10),
+                1,
+                1.0, (0, 255, 0),
+                thickness=1,
+                lineType=8)
+    print('facial expression: {}'.format(label))
+    return img
+
+
+def draw_face_attribute_result(img_path, face_attribute_result):
+    scores = face_attribute_result[OutputKeys.SCORES]
+    labels = face_attribute_result[OutputKeys.LABELS]
+    label_gender = labels[0][np.argmax(scores[0])]
+    label_age = labels[1][np.argmax(scores[1])]
+    img = cv2.imread(img_path)
+    assert img is not None, f"Can't read img: {img_path}"
+    cv2.putText(img,
+                'face gender: {}'.format(label_gender), (10, 10),
+                1,
+                1.0, (0, 255, 0),
+                thickness=1,
+                lineType=8)
+
+    cv2.putText(img,
+                'face age interval: {}'.format(label_age), (10, 40),
+                1,
+                1.0, (255, 0, 0),
+                thickness=1,
+                lineType=8)
+    logger.info('face gender: {}'.format(label_gender))
+    logger.info('face age interval: {}'.format(label_age))
+    return img
+
+
+def draw_face_detection_result(img_path, detection_result):
+    bboxes = np.array(detection_result[OutputKeys.BOXES])
+    kpss = np.array(detection_result[OutputKeys.KEYPOINTS])
+    scores = np.array(detection_result[OutputKeys.SCORES])
+    img = cv2.imread(img_path)
+    assert img is not None, f"Can't read img: {img_path}"
+    for i in range(len(scores)):
+        bbox = bboxes[i].astype(np.int32)
+        kps = kpss[i].reshape(-1, 2).astype(np.int32)
+        score = scores[i]
+        x1, y1, x2, y2 = bbox
+        cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
+        for kp in kps:
+            cv2.circle(img, tuple(kp), 1, (0, 0, 255), 1)
+        cv2.putText(img,
+                    f'{score:.2f}', (x1, y2),
+                    1,
+                    1.0, (0, 255, 0),
+                    thickness=1,
+                    lineType=8)
+    print(f'Found {len(scores)} faces')
+    return img
+
+
+def draw_card_detection_result(img_path, detection_result):
+    def warp_img(src_img, kps, ratio):
+        short_size = 500
+        if ratio > 1:
+            obj_h = short_size
+            obj_w = int(obj_h * ratio)
+        else:
+            obj_w = short_size
+            obj_h = int(obj_w / ratio)
+        input_pts = np.float32([kps[0], kps[1], kps[2], kps[3]])
+        output_pts = np.float32([[0, obj_h - 1], [0, 0], [obj_w - 1, 0],
+                                 [obj_w - 1, obj_h - 1]])
+        M = cv2.getPerspectiveTransform(input_pts, output_pts)
+        obj_img = cv2.warpPerspective(src_img, M, (obj_w, obj_h))
+        return obj_img
+
+    bboxes = np.array(detection_result[OutputKeys.BOXES])
+    kpss = np.array(detection_result[OutputKeys.KEYPOINTS])
+    scores = np.array(detection_result[OutputKeys.SCORES])
+    img_list = []
+    ver_col = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (0, 255, 255)]
+    img = cv2.imread(img_path)
+    img_list += [img]
+    assert img is not None, f"Can't read img: {img_path}"
+    for i in range(len(scores)):
+        bbox = bboxes[i].astype(np.int32)
+        kps = kpss[i].reshape(-1, 2).astype(np.int32)
+        _w = (kps[0][0] - kps[3][0])**2 + (kps[0][1] - kps[3][1])**2
+        _h = (kps[0][0] - kps[1][0])**2 + (kps[0][1] - kps[1][1])**2
+        ratio = 1.59 if _w >= _h else 1 / 1.59
+        card_img = warp_img(img, kps, ratio)
+        img_list += [card_img]
+        score = scores[i]
+        x1, y1, x2, y2 = bbox
+        cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 4)
+        for k, kp in enumerate(kps):
+            cv2.circle(img, tuple(kp), 1, color=ver_col[k], thickness=10)
+        cv2.putText(img,
+                    f'{score:.2f}', (x1, y2),
+                    1,
+                    1.0, (0, 255, 0),
+                    thickness=1,
+                    lineType=8)
+    return img_list
+
+
+def created_boxed_image(image_in, box):
+    image = load_image(image_in)
+    img = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
+    cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
+                  (0, 255, 0), 3)
+    return img
+
+
+def show_video_tracking_result(video_in_path, bboxes, video_save_path):
+    cap = cv2.VideoCapture(video_in_path)
+    for i in range(len(bboxes)):
+        box = bboxes[i]
+        success, frame = cap.read()
+        if success is False:
+            raise Exception(video_in_path,
+                            ' can not be correctly decoded by OpenCV.')
+        if i == 0:
+            size = (frame.shape[1], frame.shape[0])
+            fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
+            video_writer = cv2.VideoWriter(video_save_path, fourcc,
+                                           cap.get(cv2.CAP_PROP_FPS), size,
+                                           True)
+        cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0),
+                      5)
+        video_writer.write(frame)
+    video_writer.release
+    cap.release()
+
+
+def show_video_object_detection_result(video_in_path, bboxes_list, labels_list,
+                                       video_save_path):
+
+    PALETTE = {
+        'person': [128, 0, 0],
+        'bicycle': [128, 128, 0],
+        'car': [64, 0, 0],
+        'motorcycle': [0, 128, 128],
+        'bus': [64, 128, 0],
+        'truck': [192, 128, 0],
+        'traffic light': [64, 0, 128],
+        'stop sign': [192, 0, 128],
+    }
+    from tqdm import tqdm
+    import math
+    cap = cv2.VideoCapture(video_in_path)
+    with tqdm(total=len(bboxes_list)) as pbar:
+        pbar.set_description(
+            'Writing results to video: {}'.format(video_save_path))
+        for i in range(len(bboxes_list)):
+            bboxes = bboxes_list[i].astype(int)
+            labels = labels_list[i]
+            success, frame = cap.read()
+            if success is False:
+                raise Exception(video_in_path,
+                                ' can not be correctly decoded by OpenCV.')
+            if i == 0:
+                size = (frame.shape[1], frame.shape[0])
+                fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
+                video_writer = cv2.VideoWriter(video_save_path, fourcc,
+                                               cap.get(cv2.CAP_PROP_FPS), size,
+                                               True)
+
+            FONT_SCALE = 1e-3  # Adjust for larger font size in all images
+            THICKNESS_SCALE = 1e-3  # Adjust for larger thickness in all images
+            TEXT_Y_OFFSET_SCALE = 1e-2  # Adjust for larger Y-offset of text and bounding box
+            H, W, _ = frame.shape
+            zeros_mask = np.zeros((frame.shape)).astype(np.uint8)
+            for bbox, l in zip(bboxes, labels):
+                cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
+                              PALETTE[l], 1)
+                cv2.putText(frame,
+                            l,
+                            (bbox[0], bbox[1] - int(TEXT_Y_OFFSET_SCALE * H)),
+                            fontFace=cv2.FONT_HERSHEY_TRIPLEX,
+                            fontScale=min(H, W) * FONT_SCALE,
+                            thickness=math.ceil(min(H, W) * THICKNESS_SCALE),
+                            color=PALETTE[l])
+                zeros_mask = cv2.rectangle(zeros_mask, (bbox[0], bbox[1]),
+                                           (bbox[2], bbox[3]),
+                                           color=PALETTE[l],
+                                           thickness=-1)
+
+            frame = cv2.addWeighted(frame, 1., zeros_mask, .65, 0)
+            video_writer.write(frame)
+            pbar.update(1)
+    video_writer.release
+    cap.release()
+
+
+def panoptic_seg_masks_to_image(masks):
+    draw_img = np.zeros([masks[0].shape[0], masks[0].shape[1], 3])
+    from mmdet.core.visualization.palette import get_palette
+    mask_palette = get_palette('coco', 133)
+
+    from mmdet.core.visualization.image import _get_bias_color
+    taken_colors = set([0, 0, 0])
+    for i, mask in enumerate(masks):
+        color_mask = mask_palette[i]
+        while tuple(color_mask) in taken_colors:
+            color_mask = _get_bias_color(color_mask)
+        taken_colors.add(tuple(color_mask))
+
+        mask = mask.astype(bool)
+        draw_img[mask] = color_mask
+
+    return draw_img
+
+
+def semantic_seg_masks_to_image(masks):
+    from mmdet.core.visualization.palette import get_palette
+    mask_palette = get_palette('coco', 133)
+
+    draw_img = np.zeros([masks[0].shape[0], masks[0].shape[1], 3])
+
+    for i, mask in enumerate(masks):
+        color_mask = mask_palette[i]
+        mask = mask.astype(bool)
+        draw_img[mask] = color_mask
+    return draw_img
+
+
+def show_video_summarization_result(video_in_path, result, video_save_path):
+    frame_indexes = result[OutputKeys.OUTPUT]
+    cap = cv2.VideoCapture(video_in_path)
+    for i in range(len(frame_indexes)):
+        idx = frame_indexes[i]
+        success, frame = cap.read()
+        if success is False:
+            raise Exception(video_in_path,
+                            ' can not be correctly decoded by OpenCV.')
+        if i == 0:
+            size = (frame.shape[1], frame.shape[0])
+            fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
+            video_writer = cv2.VideoWriter(video_save_path, fourcc,
+                                           cap.get(cv2.CAP_PROP_FPS), size,
+                                           True)
+        if idx == 1:
+            video_writer.write(frame)
+    video_writer.release()
+    cap.release()
+
+
+def show_image_object_detection_auto_result(img_path,
+                                            detection_result,
+                                            save_path=None):
+    scores = detection_result[OutputKeys.SCORES]
+    labels = detection_result[OutputKeys.LABELS]
+    bboxes = detection_result[OutputKeys.BOXES]
+    img = cv2.imread(img_path)
+    assert img is not None, f"Can't read img: {img_path}"
+
+    for (score, label, box) in zip(scores, labels, bboxes):
+        cv2.rectangle(img, (int(box[0]), int(box[1])),
+                      (int(box[2]), int(box[3])), (0, 0, 255), 2)
+        cv2.putText(img,
+                    f'{score:.2f}', (int(box[0]), int(box[1])),
+                    1,
+                    1.0, (0, 255, 0),
+                    thickness=1,
+                    lineType=8)
+        cv2.putText(img,
+                    label, (int(box[0]), int(box[3])),
+                    1,
+                    1.0, (0, 255, 0),
+                    thickness=1,
+                    lineType=8)
+
+    if save_path is not None:
+        cv2.imwrite(save_path, img)
+    return img
+
+
+def depth_to_color(depth):
+    colormap = plt.get_cmap('plasma')
+    depth_color = (colormap(
+        (depth.max() - depth) / depth.max()) * 2**8).astype(np.uint8)[:, :, :3]
+    depth_color = cv2.cvtColor(depth_color, cv2.COLOR_RGB2BGR)
+    return depth_color
+
+
+def show_video_depth_estimation_result(depths, video_save_path):
+    height, width, layers = depths[0].shape
+    out = cv2.VideoWriter(video_save_path, cv2.VideoWriter_fourcc(*'MP4V'), 25,
+                          (width, height))
+    for (i, img) in enumerate(depths):
+        out.write(cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_RGB2BGR))
+    out.release()
+
+
+def show_image_driving_perception_result(img,
+                                         results,
+                                         out_file='result.jpg',
+                                         if_draw=[1, 1, 1]):
+    assert img.shape == (720, 1280,
+                         3), 'input image shape need fix to (720, 1280, 3)'
+    bboxes = results.get(OutputKeys.BOXES)[0]
+    if if_draw[0]:
+        for x in bboxes:
+            c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
+            cv2.rectangle(img,
+                          c1,
+                          c2, [255, 255, 0],
+                          thickness=2,
+                          lineType=cv2.LINE_AA)
+
+    result = results.get(OutputKeys.MASKS)
+
+    color_area = np.zeros((result[0].shape[0], result[0].shape[1], 3),
+                          dtype=np.uint8)
+
+    if if_draw[1]:
+        color_area[result[0] == 1] = [0, 255, 0]
+    if if_draw[2]:
+        color_area[result[1] == 1] = [255, 0, 0]
+    color_seg = color_area
+
+    color_mask = np.mean(color_seg, 2)
+    msk_idx = color_mask != 0
+    img[msk_idx] = img[msk_idx] * 0.5 + color_seg[msk_idx] * 0.5
+    if out_file is not None:
+        cv2.imwrite(out_file, img[:, :, ::-1])
+    return img
+
+
+def masks_visualization(masks, palette):
+    vis_masks = []
+    for f in range(masks.shape[0]):
+        img_E = Image.fromarray(masks[f])
+        img_E.putpalette(palette)
+        vis_masks.append(img_E)
+    return vis_masks
+
+
+# This implementation is adopted from LoFTR,
+# made public available under the Apache License, Version 2.0,
+# at https://github.com/zju3dv/LoFTR
+
+
+def make_matching_figure(img0,
+                         img1,
+                         mkpts0,
+                         mkpts1,
+                         color,
+                         kpts0=None,
+                         kpts1=None,
+                         text=[],
+                         dpi=75,
+                         path=None):
+    # draw image pair
+    assert mkpts0.shape[0] == mkpts1.shape[
+        0], f'mkpts0: {mkpts0.shape[0]} v.s. mkpts1: {mkpts1.shape[0]}'
+    fig, axes = plt.subplots(1, 2, figsize=(10, 6), dpi=dpi)
+    axes[0].imshow(img0, cmap='gray')
+    axes[1].imshow(img1, cmap='gray')
+    for i in range(2):  # clear all frames
+        axes[i].get_yaxis().set_ticks([])
+        axes[i].get_xaxis().set_ticks([])
+        for spine in axes[i].spines.values():
+            spine.set_visible(False)
+    plt.tight_layout(pad=1)
+
+    if kpts0 is not None:
+        assert kpts1 is not None
+        axes[0].scatter(kpts0[:, 0], kpts0[:, 1], c='w', s=2)
+        axes[1].scatter(kpts1[:, 0], kpts1[:, 1], c='w', s=2)
+
+    # draw matches
+    if mkpts0.shape[0] != 0 and mkpts1.shape[0] != 0:
+        fig.canvas.draw()
+        transFigure = fig.transFigure.inverted()
+        fkpts0 = transFigure.transform(axes[0].transData.transform(mkpts0))
+        fkpts1 = transFigure.transform(axes[1].transData.transform(mkpts1))
+        fig.lines = [
+            matplotlib.lines.Line2D((fkpts0[i, 0], fkpts1[i, 0]),
+                                    (fkpts0[i, 1], fkpts1[i, 1]),
+                                    transform=fig.transFigure,
+                                    c=color[i],
+                                    linewidth=1) for i in range(len(mkpts0))
+        ]
+
+        axes[0].scatter(mkpts0[:, 0], mkpts0[:, 1], c=color, s=4)
+        axes[1].scatter(mkpts1[:, 0], mkpts1[:, 1], c=color, s=4)
+
+    # put txts
+    txt_color = 'k' if img0[:100, :200].mean() > 200 else 'w'
+    fig.text(0.01,
+             0.99,
+             '\n'.join(text),
+             transform=fig.axes[0].transAxes,
+             fontsize=15,
+             va='top',
+             ha='left',
+             color=txt_color)
+
+    # save or return figure
+    if path:
+        plt.savefig(str(path), bbox_inches='tight', pad_inches=0)
+        plt.close()
+    else:
+        return fig
+
+
+def match_pair_visualization(img_name0,
+                             img_name1,
+                             kpts0,
+                             kpts1,
+                             conf,
+                             output_filename='quadtree_match.png',
+                             method='QuadTreeAttention'):
+
+    print(f'Found {len(kpts0)} matches')
+
+    # visualize the matches
+    img0 = cv2.imread(str(img_name0))
+    img1 = cv2.imread(str(img_name1))
+
+    # Draw
+    color = cm.jet(conf)
+    text = [
+        method,
+        'Matches: {}'.format(len(kpts0)),
+    ]
+    fig = make_matching_figure(img0, img1, kpts0, kpts1, color, text=text)
+
+    # save the figure
+    fig.savefig(str(output_filename), dpi=300, bbox_inches='tight')
diff --git a/modelscope/utils/cv/motion_utils/__init__.py b/modelscope/utils/cv/motion_utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/utils/cv/motion_utils/motion_process.py b/modelscope/utils/cv/motion_utils/motion_process.py
new file mode 100644
index 0000000..30c37ac
--- /dev/null
+++ b/modelscope/utils/cv/motion_utils/motion_process.py
@@ -0,0 +1,72 @@
+# This code is borrowed and modified from Actor,
+# made publicly available under MIT license at https://github.com/Mathux/ACTOR
+
+import torch
+
+
+def qinv(q):
+    assert q.shape[-1] == 4, 'q must be a tensor of shape (*, 4)'
+    mask = torch.ones_like(q)
+    mask[..., 1:] = -mask[..., 1:]
+    return q * mask
+
+
+def qrot(q, v):
+    """
+    Rotate vector(s) v about the rotation described by quaternion(s) q.
+    Expects a tensor of shape (*, 4) for q and a tensor of shape (*, 3) for v,
+    where * denotes any number of dimensions.
+    Returns a tensor of shape (*, 3).
+    """
+    assert q.shape[-1] == 4
+    assert v.shape[-1] == 3
+    assert q.shape[:-1] == v.shape[:-1]
+
+    original_shape = list(v.shape)
+    # print(q.shape)
+    q = q.contiguous().view(-1, 4)
+    v = v.contiguous().view(-1, 3)
+
+    qvec = q[:, 1:]
+    uv = torch.cross(qvec, v, dim=1)
+    uuv = torch.cross(qvec, uv, dim=1)
+    return (v + 2 * (q[:, :1] * uv + uuv)).view(original_shape)
+
+
+def recover_root_rot_pos(data):
+    rot_vel = data[..., 0]
+    r_rot_ang = torch.zeros_like(rot_vel).to(data.device)
+    '''Get Y-axis rotation from rotation velocity'''
+    r_rot_ang[..., 1:] = rot_vel[..., :-1]
+    r_rot_ang = torch.cumsum(r_rot_ang, dim=-1)
+
+    r_rot_quat = torch.zeros(data.shape[:-1] + (4, )).to(data.device)
+    r_rot_quat[..., 0] = torch.cos(r_rot_ang)
+    r_rot_quat[..., 2] = torch.sin(r_rot_ang)
+
+    r_pos = torch.zeros(data.shape[:-1] + (3, )).to(data.device)
+    r_pos[..., 1:, [0, 2]] = data[..., :-1, 1:3]
+    '''Add Y-axis rotation to root position'''
+    r_pos = qrot(qinv(r_rot_quat), r_pos)
+
+    r_pos = torch.cumsum(r_pos, dim=-2)
+
+    r_pos[..., 1] = data[..., 3]
+    return r_rot_quat, r_pos
+
+
+def recover_from_ric(data, joints_num):
+    r_rot_quat, r_pos = recover_root_rot_pos(data)
+    positions = data[..., 4:(joints_num - 1) * 3 + 4]
+    positions = positions.view(positions.shape[:-1] + (-1, 3))
+    '''Add Y-axis rotation to local joints'''
+    positions = qrot(
+        qinv(r_rot_quat[..., None, :]).expand(positions.shape[:-1] + (4, )),
+        positions)
+    '''Add root XZ to joints'''
+    positions[..., 0] += r_pos[..., 0:1]
+    positions[..., 2] += r_pos[..., 2:3]
+    '''Concate root and joints'''
+    positions = torch.cat([r_pos.unsqueeze(-2), positions], dim=-2)
+
+    return positions
diff --git a/modelscope/utils/cv/motion_utils/plot_script.py b/modelscope/utils/cv/motion_utils/plot_script.py
new file mode 100644
index 0000000..addf1ef
--- /dev/null
+++ b/modelscope/utils/cv/motion_utils/plot_script.py
@@ -0,0 +1,124 @@
+# This code is borrowed and modified from Actor,
+# made publicly available under MIT license at https://github.com/Mathux/ACTOR
+
+import math
+from textwrap import wrap
+
+import matplotlib
+import matplotlib.pyplot as plt
+import mpl_toolkits.mplot3d.axes3d as p3
+import numpy as np
+from matplotlib.animation import FuncAnimation
+from mpl_toolkits.mplot3d.art3d import Poly3DCollection
+
+
+def list_cut_average(ll, intervals):
+    if intervals == 1:
+        return ll
+
+    bins = math.ceil(len(ll) * 1.0 / intervals)
+    ll_new = []
+    for i in range(bins):
+        l_low = intervals * i
+        l_high = l_low + intervals
+        l_high = l_high if l_high < len(ll) else len(ll)
+        ll_new.append(np.mean(ll[l_low:l_high]))
+    return ll_new
+
+
+def plot_3d_motion(save_path,
+                   kinematic_tree,
+                   joints,
+                   title,
+                   dataset,
+                   figsize=(3, 3),
+                   fps=120,
+                   radius=3,
+                   vis_mode='default',
+                   gt_frames=[]):
+    matplotlib.use('Agg')
+
+    title = '\n'.join(wrap(title, 30))
+
+    def init():
+        ax.set_xlim3d([-radius / 2, radius / 2])
+        ax.set_ylim3d([0, radius])
+        ax.set_zlim3d([-radius / 3., radius * 2 / 3.])
+        fig.suptitle(title, fontsize=10)
+        ax.grid(b=False)
+
+    def plot_xzPlane(minx, maxx, miny, minz, maxz):
+        verts = [[minx, miny, minz], [minx, miny, maxz], [maxx, miny, maxz],
+                 [maxx, miny, minz]]
+        xz_plane = Poly3DCollection([verts])
+        xz_plane.set_facecolor((0.5, 0.5, 0.5, 0.5))
+        ax.add_collection3d(xz_plane)
+
+    data = joints.copy().reshape(len(joints), -1, 3)
+
+    if dataset == 'kit':
+        data *= 0.003  # scale for visualization
+    elif dataset == 'humanml':
+        data *= 1.3  # scale for visualization
+    elif dataset in ['humanact12', 'uestc']:
+        data *= -1.5  # reverse axes, scale for visualization
+
+    fig = plt.figure(figsize=figsize)
+    plt.tight_layout()
+    ax = p3.Axes3D(fig)
+    init()
+    MINS = data.min(axis=0).min(axis=0)
+    MAXS = data.max(axis=0).max(axis=0)
+    colors_blue = ['#4D84AA', '#5B9965', '#61CEB9', '#34C1E2',
+                   '#80B79A']  # GT color
+    colors_orange = ['#DD5A37', '#D69E00', '#B75A39', '#FF6D00',
+                     '#DDB50E']  # Generation color
+    colors = colors_orange
+    if vis_mode == 'upper_body':  # lower body taken fixed to input motion
+        colors[0] = colors_blue[0]
+        colors[1] = colors_blue[1]
+    elif vis_mode == 'gt':
+        colors = colors_blue
+
+    frame_number = data.shape[0]
+    #     print(dataset.shape)
+
+    height_offset = MINS[1]
+    data[:, :, 1] -= height_offset
+    trajec = data[:, 0, [0, 2]]
+
+    data[..., 0] -= data[:, 0:1, 0]
+    data[..., 2] -= data[:, 0:1, 2]
+
+    def update(index):
+        ax.lines.clear()
+        ax.collections.clear()
+        ax.view_init(elev=120, azim=-90)
+        ax.dist = 7.5
+        plot_xzPlane(MINS[0] - trajec[index, 0], MAXS[0] - trajec[index, 0], 0,
+                     MINS[2] - trajec[index, 1], MAXS[2] - trajec[index, 1])
+
+        used_colors = colors_blue if index in gt_frames else colors
+        for i, (chain, color) in enumerate(zip(kinematic_tree, used_colors)):
+            if i < 5:
+                linewidth = 4.0
+            else:
+                linewidth = 2.0
+            ax.plot3D(data[index, chain, 0],
+                      data[index, chain, 1],
+                      data[index, chain, 2],
+                      linewidth=linewidth,
+                      color=color)
+        plt.axis('off')
+        ax.set_xticklabels([])
+        ax.set_yticklabels([])
+        ax.set_zticklabels([])
+
+    ani = FuncAnimation(fig,
+                        update,
+                        frames=frame_number,
+                        interval=1000 / fps,
+                        repeat=False)
+    ani.save(save_path, fps=fps)
+
+    plt.close()
diff --git a/modelscope/utils/cv/motion_utils/rotation_conversions.py b/modelscope/utils/cv/motion_utils/rotation_conversions.py
new file mode 100644
index 0000000..5f0ee94
--- /dev/null
+++ b/modelscope/utils/cv/motion_utils/rotation_conversions.py
@@ -0,0 +1,132 @@
+# This code is borrowed and modified from Actor,
+# made publicly available under MIT license at https://github.com/Mathux/ACTOR
+
+import functools
+
+import torch
+import torch.nn.functional as F
+
+
+def quaternion_to_matrix(quaternions):
+    """
+    Convert rotations given as quaternions to rotation matrices.
+
+    Args:
+        quaternions: quaternions with real part first,
+            as tensor of shape (..., 4).
+
+    Returns:
+        Rotation matrices as tensor of shape (..., 3, 3).
+    """
+    r, i, j, k = torch.unbind(quaternions, -1)
+    two_s = 2.0 / (quaternions * quaternions).sum(-1)
+
+    o = torch.stack(
+        (
+            1 - two_s * (j * j + k * k),
+            two_s * (i * j - k * r),
+            two_s * (i * k + j * r),
+            two_s * (i * j + k * r),
+            1 - two_s * (i * i + k * k),
+            two_s * (j * k - i * r),
+            two_s * (i * k - j * r),
+            two_s * (j * k + i * r),
+            1 - two_s * (i * i + j * j),
+        ),
+        -1,
+    )
+    return o.reshape(quaternions.shape[:-1] + (3, 3))
+
+
+def _axis_angle_rotation(axis: str, angle):
+    """
+    Return the rotation matrices for one of the rotations about an axis
+    of which Euler angles describe, for each value of the angle given.
+
+    Args:
+        axis: Axis label "X" or "Y or "Z".
+        angle: any shape tensor of Euler angles in radians
+
+    Returns:
+        Rotation matrices as tensor of shape (..., 3, 3).
+    """
+
+    cos = torch.cos(angle)
+    sin = torch.sin(angle)
+    one = torch.ones_like(angle)
+    zero = torch.zeros_like(angle)
+
+    if axis == 'X':
+        R_flat = (one, zero, zero, zero, cos, -sin, zero, sin, cos)
+    if axis == 'Y':
+        R_flat = (cos, zero, sin, zero, one, zero, -sin, zero, cos)
+    if axis == 'Z':
+        R_flat = (cos, -sin, zero, sin, cos, zero, zero, zero, one)
+
+    return torch.stack(R_flat, -1).reshape(angle.shape + (3, 3))
+
+
+def euler_angles_to_matrix(euler_angles, convention: str):
+    """
+    Convert rotations given as Euler angles in radians to rotation matrices.
+
+    Args:
+        euler_angles: Euler angles in radians as tensor of shape (..., 3).
+        convention: Convention string of three uppercase letters from
+            {"X", "Y", and "Z"}.
+
+    Returns:
+        Rotation matrices as tensor of shape (..., 3, 3).
+    """
+    if euler_angles.dim() == 0 or euler_angles.shape[-1] != 3:
+        raise ValueError('Invalid input euler angles.')
+    if len(convention) != 3:
+        raise ValueError('Convention must have 3 letters.')
+    if convention[1] in (convention[0], convention[2]):
+        raise ValueError(f'Invalid convention {convention}.')
+    for letter in convention:
+        if letter not in ('X', 'Y', 'Z'):
+            raise ValueError(f'Invalid letter {letter} in convention string.')
+    matrices = map(_axis_angle_rotation, convention,
+                   torch.unbind(euler_angles, -1))
+    return functools.reduce(torch.matmul, matrices)
+
+
+def axis_angle_to_matrix(axis_angle):
+    """
+    Convert rotations given as axis/angle to rotation matrices.
+
+    Args:
+        axis_angle: Rotations given as a vector in axis angle form,
+            as a tensor of shape (..., 3), where the magnitude is
+            the angle turned anticlockwise in radians around the
+            vector's direction.
+
+    Returns:
+        Rotation matrices as tensor of shape (..., 3, 3).
+    """
+    return quaternion_to_matrix(axis_angle_to_quaternion(axis_angle))
+
+
+def rotation_6d_to_matrix(d6: torch.Tensor) -> torch.Tensor:
+    """
+    Converts 6D rotation representation by Zhou et al. [1] to rotation matrix
+    using Gram--Schmidt orthogonalisation per Section B of [1].
+    Args:
+        d6: 6D rotation representation, of size (*, 6)
+
+    Returns:
+        batch of rotation matrices of size (*, 3, 3)
+
+    [1] Zhou, Y., Barnes, C., Lu, J., Yang, J., & Li, H.
+    On the Continuity of Rotation Representations in Neural Networks.
+    IEEE Conference on Computer Vision and Pattern Recognition, 2019.
+    Retrieved from http://arxiv.org/abs/1812.07035
+    """
+
+    a1, a2 = d6[..., :3], d6[..., 3:]
+    b1 = F.normalize(a1, dim=-1)
+    b2 = a2 - (b1 * a2).sum(-1, keepdim=True) * b1
+    b2 = F.normalize(b2, dim=-1)
+    b3 = torch.cross(b1, b2, dim=-1)
+    return torch.stack((b1, b2, b3), dim=-2)
diff --git a/modelscope/utils/data_collators.py b/modelscope/utils/data_collators.py
new file mode 100644
index 0000000..3077d3e
--- /dev/null
+++ b/modelscope/utils/data_collators.py
@@ -0,0 +1,75 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+# Part of the implementation is borrowed from huggingface/transformers.
+
+from collections import OrderedDict
+from collections.abc import Mapping
+from typing import Any, List, Optional, Tuple
+
+from .logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class RemoveColumnsCollator:
+    """Remove specified columns from the input mini-batch, and convert them to attributes.
+
+    For example: if columns_to_remove = ['id'], then user should call batch.id instead of batch['id'].
+
+    Args:
+        data_collator: An inner data collator to collate the mini-batch
+        columns_to_remove(`List[str]`): The redundant columns to be removed from the mini-batch
+        model_name(`Optional[str]`): An optional model name to print into log
+        description(`Optional[str]`): An optional description to print into log
+    """
+    def __init__(
+        self,
+        data_collator,
+        columns_to_remove: List[str],
+        model_name: Optional[str] = None,
+        description: Optional[str] = None,
+    ):
+        self.data_collator = data_collator
+        self.columns_to_remove = columns_to_remove
+        self.description = description
+        self.model_name = model_name
+        self.message_logged = False
+
+    def _remove_columns(self, feature: Mapping) -> Tuple[Mapping, Any]:
+        if not isinstance(feature, Mapping):
+            return feature, None
+        if not self.message_logged and self.model_name:
+            ignored_columns = list(
+                set(feature.keys()) - set(self.columns_to_remove))
+            if len(ignored_columns) > 0:
+                dset_description = '' if self.description is None else f'in the {self.description} set'
+                logger.info(
+                    f"The following columns {dset_description} don't have a corresponding argument in "
+                    f"`{self.model_name}.forward` and have been ignored: {', '.join(ignored_columns)}."
+                    f"Legal columns: {', '.join(self.columns_to_remove)}."
+                    f" If {', '.join(ignored_columns)} are not expected by `{self.model_name}.forward`, "
+                    ' you can safely ignore this message.')
+                self.message_logged = True
+        feature_clean = {
+            k: v
+            for k, v in feature.items() if k in self.columns_to_remove
+        }
+        feature_unused = {
+            k: v
+            for k, v in feature.items() if k not in self.columns_to_remove
+        }
+        return feature_clean, feature_unused
+
+    def __call__(self, features: List[Mapping]):
+        features_clean = []
+        features_unused = []
+        for feature in features:
+            feature, feature_unused = self._remove_columns(feature)
+            features_clean.append(feature)
+            features_unused.append(feature_unused)
+        data = OrderedDict(self.data_collator(features_clean))
+        if features_unused[0] is not None:
+            for key in features_unused[0].keys():
+                setattr(data, key, [
+                    feature_unused[key] for feature_unused in features_unused
+                ])
+        return data
diff --git a/modelscope/utils/data_utils.py b/modelscope/utils/data_utils.py
new file mode 100644
index 0000000..424fb53
--- /dev/null
+++ b/modelscope/utils/data_utils.py
@@ -0,0 +1,37 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from collections.abc import Mapping
+
+import torch
+
+from modelscope.outputs import ModelOutputBase
+
+
+def to_device(batch, device, non_blocking=False):
+    """Put the data to the target cuda device just before the forward function.
+    Args:
+        batch: The batch data out of the dataloader.
+        device: (str | torch.device): The target device for the data.
+
+    Returns: The data to the target device.
+
+    """
+    if isinstance(batch, ModelOutputBase):
+        for idx in range(len(batch)):
+            batch[idx] = to_device(batch[idx], device)
+        return batch
+    elif isinstance(batch, dict) or isinstance(batch, Mapping):
+        if hasattr(batch, '__setitem__'):
+            # Reuse mini-batch to keep attributes for prediction.
+            for k, v in batch.items():
+                batch[k] = to_device(v, device)
+            return batch
+        else:
+            return type(batch)(
+                {k: to_device(v, device)
+                 for k, v in batch.items()})
+    elif isinstance(batch, (tuple, list)):
+        return type(batch)(to_device(v, device) for v in batch)
+    elif isinstance(batch, torch.Tensor):
+        return batch.to(device, non_blocking=non_blocking)
+    else:
+        return batch
diff --git a/modelscope/utils/demo_utils.py b/modelscope/utils/demo_utils.py
new file mode 100644
index 0000000..ff395e1
--- /dev/null
+++ b/modelscope/utils/demo_utils.py
@@ -0,0 +1,272 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import io
+import json
+
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks, TasksIODescriptions
+from modelscope.utils.service_utils import NumpyEncoder
+
+TASKS_INPUT_TEMPLATES = {
+    # vision tasks
+    Tasks.image_portrait_stylization: TasksIODescriptions.image_to_image,
+    Tasks.portrait_matting: TasksIODescriptions.image_to_image,
+    Tasks.skin_retouching: TasksIODescriptions.image_to_image,
+    Tasks.image_captioning: TasksIODescriptions.image_to_text,
+    Tasks.image_denoising: TasksIODescriptions.image_to_image,
+    Tasks.image_portrait_enhancement: TasksIODescriptions.image_to_image,
+    Tasks.image_super_resolution: TasksIODescriptions.image_to_image,
+    Tasks.image_colorization: TasksIODescriptions.image_to_image,
+    Tasks.image_color_enhancement: TasksIODescriptions.image_to_image,
+    Tasks.face_image_generation: TasksIODescriptions.seed_to_image,
+    Tasks.image_style_transfer: TasksIODescriptions.images_to_image,
+    Tasks.image_segmentation: TasksIODescriptions.image_to_text,
+    Tasks.image_object_detection: TasksIODescriptions.image_to_text,
+
+    # not tested
+    Tasks.image_classification: TasksIODescriptions.image_to_text,
+    Tasks.ocr_detection: TasksIODescriptions.image_to_text,
+    Tasks.ocr_recognition: TasksIODescriptions.image_to_text,
+    Tasks.body_2d_keypoints: TasksIODescriptions.image_to_text,
+
+    # nlp tasks
+    Tasks.text_classification: TasksIODescriptions.text_to_text,
+    Tasks.text_generation: TasksIODescriptions.text_to_text,
+    Tasks.word_segmentation: TasksIODescriptions.text_to_text,
+    Tasks.text_error_correction: TasksIODescriptions.text_to_text,
+    Tasks.named_entity_recognition: TasksIODescriptions.text_to_text,
+    Tasks.sentiment_classification: TasksIODescriptions.text_to_text,
+
+    # audio tasks
+    Tasks.text_to_speech: TasksIODescriptions.text_to_speech,
+    Tasks.auto_speech_recognition: TasksIODescriptions.speech_to_text,
+    Tasks.keyword_spotting: TasksIODescriptions.speech_to_text,
+    Tasks.acoustic_noise_suppression: TasksIODescriptions.speech_to_speech,
+    Tasks.acoustic_echo_cancellation: TasksIODescriptions.speeches_to_speech,
+
+    # multi-modal
+    Tasks.visual_grounding: TasksIODescriptions.visual_grounding,
+    Tasks.visual_question_answering:
+    TasksIODescriptions.visual_question_answering,
+    Tasks.visual_entailment: TasksIODescriptions.visual_entailment,
+    Tasks.generative_multi_modal_embedding:
+    TasksIODescriptions.generative_multi_modal_embedding,
+
+    # new tasks
+    Tasks.virtual_try_on: TasksIODescriptions.images_to_image,
+
+    # TODO(lingcai.wl): support more tasks and implement corresponding example
+}
+
+INPUT_EXAMPLES = {
+    # Must align with task schema defined in the Widget section of model card=
+    # cv
+    TasksIODescriptions.image_to_image: {
+        'inputs': [
+            'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_cartoon.png'
+        ],
+        'urlPaths': {
+            'outUrls': [{
+                'outputKey': OutputKeys.OUTPUT_IMG,
+                'fileType': 'png'
+            }]
+        }
+    },
+    TasksIODescriptions.images_to_image: {
+        'inputs': [
+            'https://modelscope.oss-cn-beijing.aliyuncs.com/demo/image-style-transfer/style_transfer_content.jpg',
+            'https://modelscope.oss-cn-beijing.aliyuncs.com/demo/image-style-transfer/style_transfer_style.jpg'
+        ],
+        'urlPaths': {
+            'outUrls': [{
+                'outputKey': OutputKeys.OUTPUT_IMG,
+                'fileType': 'png'
+            }]
+        }
+    },
+    TasksIODescriptions.image_to_text: {
+        'inputs': [
+            'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_cartoon.png'
+        ],
+        'urlPaths': {}
+    },
+    # nlp
+    TasksIODescriptions.text_to_text: {
+        'inputs': ['test'],
+        'urlPaths': {}
+    },
+
+    # audio
+    TasksIODescriptions.speech_to_text: {
+        'inputs': [
+            'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example.wav'
+        ],
+        'urlPaths': {}
+    },
+    TasksIODescriptions.text_to_speech: {
+        'inputs': ['北京今天天气怎么样'],
+        'urlPaths': {
+            'outUrls': [{
+                'outputKey': OutputKeys.OUTPUT_PCM,
+                'fileType': 'pcm'
+            }]
+        }
+    },
+    TasksIODescriptions.speeches_to_speech: {
+        'inputs': [
+            'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/nearend_mic.wav',
+            'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/nearend_speech.wav'
+        ],
+        'urlPaths': {
+            'outUrls': [{
+                'outputKey': OutputKeys.OUTPUT_PCM,
+                'fileType': 'pcm'
+            }]
+        }
+    },
+    TasksIODescriptions.speech_to_speech: {
+        'inputs': [
+            'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/speech_with_noise.wav'
+        ],
+        'urlPaths': {
+            'outUrls': [{
+                'outputKey': OutputKeys.OUTPUT_PCM,
+                'fileType': 'pcm'
+            }]
+        }
+    },
+
+    # multi modal
+    TasksIODescriptions.visual_grounding: {
+        'task':
+        Tasks.visual_grounding,
+        'inputs': [
+            'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-grounding/visual_grounding.png',
+            'a blue turtle-like pokemon with round head'
+        ],
+        'urlPaths': {
+            'inUrls': [{
+                'name': 'image'
+            }, {
+                'name': 'text'
+            }]
+        }
+    },
+    TasksIODescriptions.visual_question_answering: {
+        'task':
+        Tasks.visual_question_answering,
+        'inputs': [
+            'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/visual_question_answering.png',
+            'what is grown on the plant?'
+        ],
+        'urlPaths': {
+            'inUrls': [{
+                'name': 'image'
+            }, {
+                'name': 'text'
+            }],
+            'outUrls': [{
+                'outputKey': 'text'
+            }]
+        }
+    },
+    TasksIODescriptions.visual_entailment: {
+        'task':
+        Tasks.visual_entailment,
+        'inputs': [
+            'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-entailment/visual_entailment.jpg',
+            'there are two birds.', 'test'
+        ],
+        'urlPaths': {
+            'inUrls': [{
+                'name': 'image'
+            }, {
+                'name': 'text'
+            }],
+            'outUrls': [{}]
+        }
+    },
+    TasksIODescriptions.generative_multi_modal_embedding: {
+        'task':
+        Tasks.generative_multi_modal_embedding,
+        'inputs': [
+            'http://clip-multimodal.oss-cn-beijing.aliyuncs.com/lingchen/demo/dogs.jpg',
+            'dogs playing in the grass'
+        ],
+        'urlPaths': {
+            'inUrls': [{
+                'name': 'image'
+            }, {
+                'name': 'text'
+            }],
+            'outUrls': [{}]
+        }
+    },
+}
+
+
+class DemoCompatibilityCheck(object):
+    def compatibility_check(self):
+        if self.task not in TASKS_INPUT_TEMPLATES:
+            print('task is not supported in demo service so far')
+            return False
+        if TASKS_INPUT_TEMPLATES[self.task] not in INPUT_EXAMPLES:
+            print('no example input for this task')
+            return False
+
+        print('testing demo: ', self.task, self.model_id)
+        test_pipline = pipeline(self.task, self.model_id)
+        req = INPUT_EXAMPLES[TASKS_INPUT_TEMPLATES[self.task]]
+        inputs = preprocess(req)
+        params = req.get('parameters', {})
+        # modelscope inference
+        if params != {}:
+            output = test_pipline(inputs, **params)
+        else:
+            output = test_pipline(inputs)
+        json.dumps(output, cls=NumpyEncoder)
+        result = postprocess(req, output)
+        print(result)
+        return True
+
+
+def preprocess(req):
+    in_urls = req.get('urlPaths').get('inUrls')
+    if len(req['inputs']) == 1:
+        inputs = req['inputs'][0]
+    else:
+        inputs = tuple(req['inputs'])
+    if in_urls is None or len(in_urls) == 0:
+        return inputs
+
+    inputs_dict = {}
+    for i, in_url in enumerate(in_urls):
+        input_name = in_url.get('name')
+        if input_name is None or input_name == '':
+            return inputs
+        inputs_dict[input_name] = req['inputs'][i]
+    return inputs_dict
+
+
+def postprocess(req, resp):
+    out_urls = req.get('urlPaths').get('outUrls')
+    if out_urls is None or len(out_urls) == 0:
+        return resp
+    new_resp = resp
+    if isinstance(resp, str):
+        new_resp = json.loads(resp)
+    for out_url in out_urls:
+        output_key = out_url['outputKey']
+        file_type = out_url['fileType']
+        new_resp.get(output_key)
+        if file_type == 'png' or file_type == 'jpg':
+            content = new_resp.get(output_key)
+            import cv2
+            _, img_encode = cv2.imencode('.' + file_type, content)
+            img_bytes = img_encode.tobytes()
+            return type(img_bytes)
+        else:
+            out_mem_file = io.BytesIO()
+            out_mem_file.write(new_resp.get(output_key))
+            return type(out_mem_file)
diff --git a/modelscope/utils/device.py b/modelscope/utils/device.py
new file mode 100644
index 0000000..47e0878
--- /dev/null
+++ b/modelscope/utils/device.py
@@ -0,0 +1,119 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from contextlib import contextmanager
+
+from modelscope.utils.constant import Devices, Frameworks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+def verify_device(device_name):
+    """ Verify device is valid, device should be either cpu, cuda, gpu, cuda:X or gpu:X.
+
+    Args:
+        device (str):  device str, should be either cpu, cuda, gpu, gpu:X or cuda:X
+            where X is the ordinal for gpu device.
+
+    Return:
+        device info (tuple):  device_type and device_id, if device_id is not set, will use 0 as default.
+    """
+    err_msg = 'device should be either cpu, cuda, gpu, gpu:X or cuda:X where X is the ordinal for gpu device.'
+    assert device_name is not None and device_name != '', err_msg
+    device_name = device_name.lower()
+    eles = device_name.split(':')
+    assert len(eles) <= 2, err_msg
+    assert device_name is not None
+    assert eles[0] in ['cpu', 'cuda', 'gpu'], err_msg
+    device_type = eles[0]
+    device_id = None
+    if len(eles) > 1:
+        device_id = int(eles[1])
+    if device_type == 'cuda':
+        device_type = Devices.gpu
+    if device_type == Devices.gpu and device_id is None:
+        device_id = 0
+    return device_type, device_id
+
+
+@contextmanager
+def device_placement(framework, device_name='gpu:0'):
+    """ Device placement function, allow user to specify which device to place model or tensor
+    Args:
+        framework (str):  tensorflow or pytorch.
+        device (str):  gpu or cpu to use, if you want to specify certain gpu,
+            use gpu:$gpu_id or cuda:$gpu_id.
+
+    Returns:
+        Context manager
+
+    Examples:
+
+        >>> # Requests for using model on cuda:0 for gpu
+        >>> with device_placement('pytorch', device='gpu:0'):
+        >>>     model = Model.from_pretrained(...)
+    """
+    device_type, device_id = verify_device(device_name)
+
+    if framework == Frameworks.tf:
+        import tensorflow as tf
+        if device_type == Devices.gpu and not tf.test.is_gpu_available():
+            logger.debug(
+                'tensorflow: cuda is not available, using cpu instead.')
+        device_type = Devices.cpu
+        if device_type == Devices.cpu:
+            with tf.device('/CPU:0'):
+                yield
+        else:
+            if device_type == Devices.gpu:
+                with tf.device(f'/device:gpu:{device_id}'):
+                    yield
+
+    elif framework == Frameworks.torch:
+        import torch
+        if device_type == Devices.gpu:
+            if torch.cuda.is_available():
+                torch.cuda.set_device(f'cuda:{device_id}')
+            else:
+                logger.debug(
+                    'pytorch: cuda is not available, using cpu instead.')
+        yield
+    else:
+        yield
+
+
+def create_device(device_name):
+    """ create torch device
+
+    Args:
+        device_name (str):  cpu, gpu, gpu:0, cuda:0 etc.
+    """
+    import torch
+    device_type, device_id = verify_device(device_name)
+    use_cuda = False
+    if device_type == Devices.gpu:
+        use_cuda = True
+        if not torch.cuda.is_available():
+            logger.info('cuda is not available, using cpu instead.')
+            use_cuda = False
+
+    if use_cuda:
+        device = torch.device(f'cuda:{device_id}')
+    else:
+        device = torch.device('cpu')
+
+    return device
+
+
+def get_device():
+    import torch
+    from torch import distributed as dist
+    if torch.cuda.is_available():
+        if dist.is_available() and dist.is_initialized(
+        ) and 'LOCAL_RANK' in os.environ:
+            device_id = f"cuda:{os.environ['LOCAL_RANK']}"
+        else:
+            device_id = 'cuda:0'
+    else:
+        device_id = 'cpu'
+    return torch.device(device_id)
diff --git a/modelscope/utils/error.py b/modelscope/utils/error.py
new file mode 100644
index 0000000..44e6b23
--- /dev/null
+++ b/modelscope/utils/error.py
@@ -0,0 +1,155 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+# docstyle-ignore
+AUDIO_IMPORT_ERROR = """
+Audio model import failed: {0}, if you want to use audio related function, please execute
+`pip install modelscope[audio] -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html`
+"""
+
+# docstyle-ignore
+PROTOBUF_IMPORT_ERROR = """
+{0} requires the protobuf library but it was not found in your environment. Checkout the instructions on the
+installation page of its repo: https://github.com/protocolbuffers/protobuf/tree/master/python#installation and
+follow the ones that match your environment.
+"""
+
+# docstyle-ignore
+SENTENCEPIECE_IMPORT_ERROR = """
+{0} requires the SentencePiece library but it was not found in your environment. Checkout the instructions on the
+installation page of its repo: https://github.com/google/sentencepiece#installation and follow the ones
+that match your environment.
+"""
+
+# docstyle-ignore
+SKLEARN_IMPORT_ERROR = """
+{0} requires the scikit-learn library but it was not found in your environment. You can install it with:
+```
+pip install -U scikit-learn
+```
+In a notebook or a colab, you can install it by executing a cell with
+```
+!pip install -U scikit-learn
+```
+"""
+
+# docstyle-ignore
+TENSORFLOW_IMPORT_ERROR = """
+{0} requires the TensorFlow library but it was not found in your environment. Checkout the instructions on the
+installation page: https://www.tensorflow.org/install and follow the ones that match your environment.
+"""
+
+# docstyle-ignore
+TENSORFLOW_IMPORT_WARNING = """
+{0} requires the TensorFlow library but it was not found in your environment.
+If you don't want to use them, please ignore this message
+If you want to use them, please Checkout the instructions on the
+installation page: https://www.tensorflow.org/install and follow the ones that match your environment.
+"""
+
+# docstyle-ignore
+TIMM_IMPORT_ERROR = """
+{0} requires the timm library but it was not found in your environment. You can install it with pip:
+`pip install timm`
+"""
+
+# docstyle-ignore
+TOKENIZERS_IMPORT_ERROR = """
+{0} requires the 🤗 Tokenizers library but it was not found in your environment. You can install it with:
+```
+pip install tokenizers
+```
+In a notebook or a colab, you can install it by executing a cell with
+```
+!pip install tokenizers
+```
+"""
+
+# docstyle-ignore
+PYTORCH_IMPORT_ERROR = """
+{0} requires the PyTorch library but it was not found in your environment. Checkout the instructions on the
+installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your environment.
+"""
+
+WENETRUNTIME_IMPORT_ERROR = """
+{0} requires the wenetruntime library but it was not found in your environment. You can install it with pip:
+`pip install wenetruntime==TORCH_VER`
+"""
+
+# docstyle-ignore
+SCIPY_IMPORT_ERROR = """
+{0} requires the scipy library but it was not found in your environment. You can install it with pip:
+`pip install scipy`
+"""
+
+# docstyle-ignore
+OPENCV_IMPORT_ERROR = """
+{0} requires the opencv library but it was not found in your environment. You can install it with pip:
+`pip install opencv-python`
+"""
+
+PILLOW_IMPORT_ERROR = """
+{0} requires the Pillow library but it was not found in your environment. You can install it with pip:
+`pip install Pillow`
+"""
+
+GENERAL_IMPORT_ERROR = """
+{0} requires the REQ library but it was not found in your environment. You can install it with pip:
+`pip install REQ`
+"""
+
+DECORD_IMPORT_ERROR = """
+{0} requires the decord library but it was not found in your environment. You can install it with pip:
+`pip install decord>=0.6.0`
+"""
+
+# docstyle-ignore
+DEEPSPEED_IMPORT_ERROR = """
+{0} requires the Deepspeed library but it was not found in your environment. Checkout the instructions on the
+installation page: https://www.deepspeed.ai/tutorials/advanced-install/ and follow the ones that match your environment.
+"""
+
+# docstyle-ignore
+FAIRSEQ_IMPORT_ERROR = """
+{0} requires the fairseq library but it was not found in your environment.
+You can install it with pip on linux:
+`pip install fairseq`
+On windows, please checkout the instructions on the
+installation page: https://github.com/facebookresearch/fairseq and follow the ones that match your environment.
+"""
+
+# docstyle-ignore
+FASTTEXT_IMPORT_ERROR = """
+{0} requires the fasttext library but it was not found in your environment.
+You can install it with pip on linux or mac:
+`pip install fasttext`
+Or you can checkout the instructions on the
+installation page: https://github.com/facebookresearch/fastText and follow the ones that match your environment.
+"""
+
+# docstyle-ignore
+EASYNLP_IMPORT_ERROR = """
+{0} requires the easynlp library but it was not found in your environment.
+You can install it with pip on linux or mac:
+`pip install pai-easynlp -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html`
+Or you can checkout the instructions on the
+installation page: https://github.com/alibaba/EasyNLP and follow the ones that match your environment.
+"""
+
+# docstyle-ignore
+MEGATRON_UTIL_IMPORT_ERROR = """
+{0} requires the megatron_util library but it was not found in your environment. You can install it with pip:
+`pip install megatron_util -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html`
+"""
+
+# docstyle-ignore
+TEXT2SQL_LGESQL_IMPORT_ERROR = """
+{0} requires the text2sql_lgesql library but it was not found in your environment. You can install it with pip:
+`pip install text2sql_lgesql -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html`
+"""
+
+# docstyle-ignore
+MPI4PY_IMPORT_ERROR = """
+{0} requires the mpi4py library but it was not found in your environment. You can install it with pip:
+`pip install mpi4py' and with following the instruction to install openmpi,
+https://docs.open-mpi.org/en/v5.0.x/installing-open-mpi/quickstart.html`
+"""
diff --git a/modelscope/utils/file_utils.py b/modelscope/utils/file_utils.py
new file mode 100644
index 0000000..09e3364
--- /dev/null
+++ b/modelscope/utils/file_utils.py
@@ -0,0 +1,44 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import inspect
+from pathlib import Path
+
+
+# TODO: remove this api, unify to flattened args
+def func_receive_dict_inputs(func):
+    """to decide if a func could recieve dict inputs or not
+
+    Args:
+        func (class): the target function to be inspected
+
+    Returns:
+        bool: if func only has one arg ``input`` or ``inputs``, return True, else return False
+    """
+    full_args_spec = inspect.getfullargspec(func)
+    varargs = full_args_spec.varargs
+    varkw = full_args_spec.varkw
+    if not (varargs is None and varkw is None):
+        return False
+
+    args = [] if not full_args_spec.args else full_args_spec.args
+    args.pop(0) if (args and args[0] in ['self', 'cls']) else args
+
+    if len(args) == 1 and args[0] in ['input', 'inputs']:
+        return True
+
+    return False
+
+
+def get_default_cache_dir():
+    """
+    default base dir: '~/.cache/modelscope'
+    """
+    default_cache_dir = Path.home().joinpath('.cache', 'modelscope')
+    return default_cache_dir
+
+
+def read_file(path):
+
+    with open(path, 'r') as f:
+        text = f.read()
+    return text
diff --git a/modelscope/utils/hub.py b/modelscope/utils/hub.py
new file mode 100644
index 0000000..9e5089b
--- /dev/null
+++ b/modelscope/utils/hub.py
@@ -0,0 +1,163 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import os.path as osp
+from typing import List, Optional, Union
+
+from requests import HTTPError
+
+from modelscope.hub.constants import Licenses, ModelVisibility
+from modelscope.hub.file_download import model_file_download
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.utils.config import Config
+from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigFields,
+                                       ModelFile)
+
+from .logger import get_logger
+
+logger = get_logger()
+
+
+def create_model_if_not_exist(
+        api,
+        model_id: str,
+        chinese_name: str,
+        visibility: Optional[int] = ModelVisibility.PUBLIC,
+        license: Optional[str] = Licenses.APACHE_V2,
+        revision: Optional[str] = DEFAULT_MODEL_REVISION):
+    exists = True
+    try:
+        api.get_model(model_id=model_id, revision=revision)
+    except HTTPError:
+        exists = False
+    if exists:
+        print(f'model {model_id} already exists, skip creation.')
+        return False
+    else:
+        api.create_model(
+            model_id=model_id,
+            visibility=visibility,
+            license=license,
+            chinese_name=chinese_name,
+        )
+        print(f'model {model_id} successfully created.')
+        return True
+
+
+def read_config(model_id_or_path: str,
+                revision: Optional[str] = DEFAULT_MODEL_REVISION):
+    """ Read config from hub or local path
+
+    Args:
+        model_id_or_path (str): Model repo name or local directory path.
+        revision: revision of the model when getting from the hub
+    Return:
+        config (:obj:`Config`): config object
+    """
+    if not os.path.exists(model_id_or_path):
+        local_path = model_file_download(model_id_or_path,
+                                         ModelFile.CONFIGURATION,
+                                         revision=revision)
+    elif os.path.isdir(model_id_or_path):
+        local_path = os.path.join(model_id_or_path, ModelFile.CONFIGURATION)
+    elif os.path.isfile(model_id_or_path):
+        local_path = model_id_or_path
+
+    return Config.from_file(local_path)
+
+
+def auto_load(model: Union[str, List[str]]):
+    if isinstance(model, str):
+        if not osp.exists(model):
+            model = snapshot_download(model)
+    else:
+        model = [
+            snapshot_download(m) if not osp.exists(m) else m for m in model
+        ]
+
+    return model
+
+
+def get_model_type(model_dir):
+    """Get the model type from the configuration.
+
+    This method will try to get the model type from 'model.backbone.type',
+    'model.type' or 'model.model_type' field in the configuration.json file. If
+    this file does not exist, the method will try to get the 'model_type' field
+    from the config.json.
+
+    Args:
+        model_dir: The local model dir to use. @return: The model type
+    string, returns None if nothing is found.
+    """
+    try:
+        configuration_file = osp.join(model_dir, ModelFile.CONFIGURATION)
+        config_file = osp.join(model_dir, 'config.json')
+        if osp.isfile(configuration_file):
+            cfg = Config.from_file(configuration_file)
+            if hasattr(cfg.model, 'backbone'):
+                return cfg.model.backbone.type
+            elif hasattr(cfg.model,
+                         'model_type') and not hasattr(cfg.model, 'type'):
+                return cfg.model.model_type
+            else:
+                return cfg.model.type
+        elif osp.isfile(config_file):
+            cfg = Config.from_file(config_file)
+            return cfg.model_type if hasattr(cfg, 'model_type') else None
+    except Exception as e:
+        logger.error(f'parse config file failed with error: {e}')
+
+
+def parse_label_mapping(model_dir):
+    """Get the label mapping from the model dir.
+
+    This method will do:
+    1. Try to read label-id mapping from the label_mapping.json
+    2. Try to read label-id mapping from the configuration.json
+    3. Try to read label-id mapping from the config.json
+
+    Args:
+        model_dir: The local model dir to use.
+
+    Returns:
+        The label2id mapping if found.
+    """
+    import json
+    import os
+    label2id = None
+    label_path = os.path.join(model_dir, ModelFile.LABEL_MAPPING)
+    if os.path.exists(label_path):
+        with open(label_path, encoding='utf-8') as f:
+            label_mapping = json.load(f)
+        label2id = {name: idx for name, idx in label_mapping.items()}
+
+    if label2id is None:
+        config_path = os.path.join(model_dir, ModelFile.CONFIGURATION)
+        config = Config.from_file(config_path)
+        if hasattr(config, ConfigFields.model) and hasattr(
+                config[ConfigFields.model], 'label2id'):
+            label2id = config[ConfigFields.model].label2id
+        elif hasattr(config, ConfigFields.model) and hasattr(
+                config[ConfigFields.model], 'id2label'):
+            id2label = config[ConfigFields.model].id2label
+            label2id = {label: id for id, label in id2label.items()}
+        elif hasattr(config, ConfigFields.preprocessor) and hasattr(
+                config[ConfigFields.preprocessor], 'label2id'):
+            label2id = config[ConfigFields.preprocessor].label2id
+        elif hasattr(config, ConfigFields.preprocessor) and hasattr(
+                config[ConfigFields.preprocessor], 'id2label'):
+            id2label = config[ConfigFields.preprocessor].id2label
+            label2id = {label: id for id, label in id2label.items()}
+
+    config_path = os.path.join(model_dir, 'config.json')
+    if label2id is None and os.path.exists(config_path):
+        config = Config.from_file(config_path)
+        if hasattr(config, 'label2id'):
+            label2id = config.label2id
+        elif hasattr(config, 'id2label'):
+            id2label = config.id2label
+            label2id = {label: id for id, label in id2label.items()}
+    if label2id is not None:
+        label2id = {label: int(id) for label, id in label2id.items()}
+    return label2id
diff --git a/modelscope/utils/import_utils.py b/modelscope/utils/import_utils.py
new file mode 100644
index 0000000..3517ea3
--- /dev/null
+++ b/modelscope/utils/import_utils.py
@@ -0,0 +1,458 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+# Part of the implementation is borrowed from huggingface/transformers.
+import ast
+import functools
+import importlib
+import os
+import os.path as osp
+import sys
+from collections import OrderedDict
+from importlib import import_module
+from itertools import chain
+from pathlib import Path
+from types import ModuleType
+from typing import Any
+
+from packaging import version
+
+from modelscope.utils.ast_utils import (INDEX_KEY, MODULE_KEY, REQUIREMENT_KEY,
+                                        load_index)
+from modelscope.utils.error import *  # noqa
+from modelscope.utils.logger import get_logger
+
+if sys.version_info < (3, 8):
+    import importlib_metadata
+else:
+    import importlib.metadata as importlib_metadata
+
+logger = get_logger()
+
+AST_INDEX = None
+
+
+def import_modules_from_file(py_file: str):
+    """ Import module from a certrain file
+
+    Args:
+        py_file: path to a python file to be imported
+
+    Return:
+
+    """
+    dirname, basefile = os.path.split(py_file)
+    if dirname == '':
+        dirname = Path.cwd()
+    module_name = osp.splitext(basefile)[0]
+    sys.path.insert(0, dirname)
+    validate_py_syntax(py_file)
+    mod = import_module(module_name)
+    sys.path.pop(0)
+    return module_name, mod
+
+
+def is_method_overridden(method, base_class, derived_class):
+    """Check if a method of base class is overridden in derived class.
+
+    Args:
+        method (str): the method name to check.
+        base_class (type): the class of the base class.
+        derived_class (type | Any): the class or instance of the derived class.
+    """
+    assert isinstance(base_class, type), \
+        "base_class doesn't accept instance, Please pass class instead."
+
+    if not isinstance(derived_class, type):
+        derived_class = derived_class.__class__
+
+    base_method = getattr(base_class, method)
+    derived_method = getattr(derived_class, method)
+    return derived_method != base_method
+
+
+def has_method(obj: object, method: str) -> bool:
+    """Check whether the object has a method.
+
+    Args:
+        method (str): The method name to check.
+        obj (object): The object to check.
+
+    Returns:
+        bool: True if the object has the method else False.
+    """
+    return hasattr(obj, method) and callable(getattr(obj, method))
+
+
+def import_modules(imports, allow_failed_imports=False):
+    """Import modules from the given list of strings.
+
+    Args:
+        imports (list | str | None): The given module names to be imported.
+        allow_failed_imports (bool): If True, the failed imports will return
+            None. Otherwise, an ImportError is raise. Default: False.
+
+    Returns:
+        list[module] | module | None: The imported modules.
+
+    Examples:
+        >>> osp, sys = import_modules(
+        ...     ['os.path', 'sys'])
+        >>> import os.path as osp_
+        >>> import sys as sys_
+        >>> assert osp == osp_
+        >>> assert sys == sys_
+    """
+    if not imports:
+        return
+    single_import = False
+    if isinstance(imports, str):
+        single_import = True
+        imports = [imports]
+    if not isinstance(imports, list):
+        raise TypeError(
+            f'custom_imports must be a list but got type {type(imports)}')
+    imported = []
+    for imp in imports:
+        if not isinstance(imp, str):
+            raise TypeError(
+                f'{imp} is of type {type(imp)} and cannot be imported.')
+        try:
+            imported_tmp = import_module(imp)
+        except ImportError:
+            if allow_failed_imports:
+                logger.warning(f'{imp} failed to import and is ignored.')
+                imported_tmp = None
+            else:
+                raise ImportError
+        imported.append(imported_tmp)
+    if single_import:
+        imported = imported[0]
+    return imported
+
+
+def validate_py_syntax(filename):
+    with open(filename, 'r', encoding='utf-8') as f:
+        # Setting encoding explicitly to resolve coding issue on windows
+        content = f.read()
+    try:
+        ast.parse(content)
+    except SyntaxError as e:
+        raise SyntaxError('There are syntax errors in config '
+                          f'file {filename}: {e}')
+
+
+# following code borrows implementation from huggingface/transformers
+ENV_VARS_TRUE_VALUES = {'1', 'ON', 'YES', 'TRUE'}
+ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({'AUTO'})
+USE_TF = os.environ.get('USE_TF', 'AUTO').upper()
+USE_TORCH = os.environ.get('USE_TORCH', 'AUTO').upper()
+
+_torch_version = 'N/A'
+if USE_TORCH in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TF not in ENV_VARS_TRUE_VALUES:
+    _torch_available = importlib.util.find_spec('torch') is not None
+    if _torch_available:
+        try:
+            _torch_version = importlib_metadata.version('torch')
+            logger.info(f'PyTorch version {_torch_version} Found.')
+        except importlib_metadata.PackageNotFoundError:
+            _torch_available = False
+else:
+    logger.info('Disabling PyTorch because USE_TF is set')
+    _torch_available = False
+
+_timm_available = importlib.util.find_spec('timm') is not None
+try:
+    _timm_version = importlib_metadata.version('timm')
+    logger.debug(f'Successfully imported timm version {_timm_version}')
+except importlib_metadata.PackageNotFoundError:
+    _timm_available = False
+
+_tf_version = 'N/A'
+if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VALUES:
+    _tf_available = importlib.util.find_spec('tensorflow') is not None
+    if _tf_available:
+        candidates = (
+            'tensorflow',
+            'tensorflow-cpu',
+            'tensorflow-gpu',
+            'tf-nightly',
+            'tf-nightly-cpu',
+            'tf-nightly-gpu',
+            'intel-tensorflow',
+            'intel-tensorflow-avx512',
+            'tensorflow-rocm',
+            'tensorflow-macos',
+        )
+        _tf_version = None
+        # For the metadata, we have to look for both tensorflow and tensorflow-cpu
+        for pkg in candidates:
+            try:
+                _tf_version = importlib_metadata.version(pkg)
+                break
+            except importlib_metadata.PackageNotFoundError:
+                pass
+        _tf_available = _tf_version is not None
+    if _tf_available:
+        if version.parse(_tf_version) < version.parse('2'):
+            pass
+        else:
+            logger.info(f'TensorFlow version {_tf_version} Found.')
+else:
+    logger.info('Disabling Tensorflow because USE_TORCH is set')
+    _tf_available = False
+
+
+def is_scipy_available():
+    return importlib.util.find_spec('scipy') is not None
+
+
+def is_sklearn_available():
+    if importlib.util.find_spec('sklearn') is None:
+        return False
+    return is_scipy_available() and importlib.util.find_spec('sklearn.metrics')
+
+
+def is_sentencepiece_available():
+    return importlib.util.find_spec('sentencepiece') is not None
+
+
+def is_protobuf_available():
+    if importlib.util.find_spec('google') is None:
+        return False
+    return importlib.util.find_spec('google.protobuf') is not None
+
+
+def is_tokenizers_available():
+    return importlib.util.find_spec('tokenizers') is not None
+
+
+def is_timm_available():
+    return _timm_available
+
+
+def is_torch_available():
+    return _torch_available
+
+
+def is_torch_cuda_available():
+    if is_torch_available():
+        import torch
+
+        return torch.cuda.is_available()
+    else:
+        return False
+
+
+def is_wenetruntime_available():
+    return importlib.util.find_spec('wenetruntime') is not None
+
+
+def is_tf_available():
+    return _tf_available
+
+
+def is_opencv_available():
+    return importlib.util.find_spec('cv2') is not None
+
+
+def is_pillow_available():
+    return importlib.util.find_spec('PIL.Image') is not None
+
+
+def _is_package_available_fn(pkg_name):
+    return importlib.util.find_spec(pkg_name) is not None
+
+
+def is_package_available(pkg_name):
+    return functools.partial(_is_package_available_fn, pkg_name)
+
+
+def is_espnet_available(pkg_name):
+    return importlib.util.find_spec('espnet2') is not None \
+        and importlib.util.find_spec('espnet')
+
+
+REQUIREMENTS_MAAPING = OrderedDict([
+    ('protobuf', (is_protobuf_available, PROTOBUF_IMPORT_ERROR)),
+    ('sentencepiece', (is_sentencepiece_available,
+                       SENTENCEPIECE_IMPORT_ERROR)),
+    ('sklearn', (is_sklearn_available, SKLEARN_IMPORT_ERROR)),
+    ('tf', (is_tf_available, TENSORFLOW_IMPORT_ERROR)),
+    ('tensorflow', (is_tf_available, TENSORFLOW_IMPORT_ERROR)),
+    ('timm', (is_timm_available, TIMM_IMPORT_ERROR)),
+    ('tokenizers', (is_tokenizers_available, TOKENIZERS_IMPORT_ERROR)),
+    ('torch', (is_torch_available, PYTORCH_IMPORT_ERROR)),
+    ('wenetruntime',
+     (is_wenetruntime_available,
+      WENETRUNTIME_IMPORT_ERROR.replace('TORCH_VER', _torch_version))),
+    ('scipy', (is_scipy_available, SCIPY_IMPORT_ERROR)),
+    ('cv2', (is_opencv_available, OPENCV_IMPORT_ERROR)),
+    ('PIL', (is_pillow_available, PILLOW_IMPORT_ERROR)),
+    ('pai-easynlp', (is_package_available('easynlp'), EASYNLP_IMPORT_ERROR)),
+    ('espnet2', (is_espnet_available,
+                 GENERAL_IMPORT_ERROR.replace('REQ', 'espnet'))),
+    ('espnet', (is_espnet_available,
+                GENERAL_IMPORT_ERROR.replace('REQ', 'espnet'))),
+    ('easyasr', (is_package_available('easyasr'), AUDIO_IMPORT_ERROR)),
+    ('funasr', (is_package_available('funasr'), AUDIO_IMPORT_ERROR)),
+    ('kwsbp', (is_package_available('kwsbp'), AUDIO_IMPORT_ERROR)),
+    ('decord', (is_package_available('decord'), DECORD_IMPORT_ERROR)),
+    ('deepspeed', (is_package_available('deepspeed'), DEEPSPEED_IMPORT_ERROR)),
+    ('fairseq', (is_package_available('fairseq'), FAIRSEQ_IMPORT_ERROR)),
+    ('fasttext', (is_package_available('fasttext'), FASTTEXT_IMPORT_ERROR)),
+    ('megatron_util', (is_package_available('megatron_util'),
+                       MEGATRON_UTIL_IMPORT_ERROR)),
+    ('text2sql_lgesql', (is_package_available('text2sql_lgesql'),
+                         TEXT2SQL_LGESQL_IMPORT_ERROR)),
+    ('mpi4py', (is_package_available('mpi4py'), MPI4PY_IMPORT_ERROR)),
+])
+
+SYSTEM_PACKAGE = set(['os', 'sys', 'typing'])
+
+
+def requires(obj, requirements):
+    if not isinstance(requirements, (list, tuple)):
+        requirements = [requirements]
+    if isinstance(obj, str):
+        name = obj
+    else:
+        name = obj.__name__ if hasattr(obj,
+                                       '__name__') else obj.__class__.__name__
+    checks = []
+    for req in requirements:
+        if req == '' or req in SYSTEM_PACKAGE:
+            continue
+        if req in REQUIREMENTS_MAAPING:
+            check = REQUIREMENTS_MAAPING[req]
+        else:
+            check_fn = is_package_available(req)
+            err_msg = GENERAL_IMPORT_ERROR.replace('REQ', req)
+            check = (check_fn, err_msg)
+        checks.append(check)
+
+    failed = [msg.format(name) for available, msg in checks if not available()]
+    if failed:
+        raise ImportError(''.join(failed))
+
+
+def torch_required(func):
+    # Chose a different decorator name than in tests so it's clear they are not the same.
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        if is_torch_available():
+            return func(*args, **kwargs)
+        else:
+            raise ImportError(f'Method `{func.__name__}` requires PyTorch.')
+
+    return wrapper
+
+
+def tf_required(func):
+    # Chose a different decorator name than in tests so it's clear they are not the same.
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        if is_tf_available():
+            return func(*args, **kwargs)
+        else:
+            raise ImportError(f'Method `{func.__name__}` requires TF.')
+
+    return wrapper
+
+
+class LazyImportModule(ModuleType):
+    AST_INDEX = None
+    if AST_INDEX is None:
+        AST_INDEX = load_index()
+
+    def __init__(self,
+                 name,
+                 module_file,
+                 import_structure,
+                 module_spec=None,
+                 extra_objects=None,
+                 try_to_pre_import=False):
+        super().__init__(name)
+        self._modules = set(import_structure.keys())
+        self._class_to_module = {}
+        for key, values in import_structure.items():
+            for value in values:
+                self._class_to_module[value] = key
+        # Needed for autocompletion in an IDE
+        self.__all__ = list(import_structure.keys()) + list(
+            chain(*import_structure.values()))
+        self.__file__ = module_file
+        self.__spec__ = module_spec
+        self.__path__ = [os.path.dirname(module_file)]
+        self._objects = {} if extra_objects is None else extra_objects
+        self._name = name
+        self._import_structure = import_structure
+        if try_to_pre_import:
+            self._try_to_import()
+
+    def _try_to_import(self):
+        for sub_module in self._class_to_module.keys():
+            try:
+                getattr(self, sub_module)
+            except Exception as e:
+                logger.warning(
+                    f'pre load module {sub_module} error, please check {e}')
+
+    # Needed for autocompletion in an IDE
+    def __dir__(self):
+        result = super().__dir__()
+        # The elements of self.__all__ that are submodules may or may not be in the dir already, depending on whether
+        # they have been accessed or not. So we only add the elements of self.__all__ that are not already in the dir.
+        for attr in self.__all__:
+            if attr not in result:
+                result.append(attr)
+        return result
+
+    def __getattr__(self, name: str) -> Any:
+        if name in self._objects:
+            return self._objects[name]
+        if name in self._modules:
+            value = self._get_module(name)
+        elif name in self._class_to_module.keys():
+            module = self._get_module(self._class_to_module[name])
+            value = getattr(module, name)
+        else:
+            raise AttributeError(
+                f'module {self.__name__} has no attribute {name}')
+
+        setattr(self, name, value)
+        return value
+
+    def _get_module(self, module_name: str):
+        try:
+            # check requirements before module import
+            module_name_full = self.__name__ + '.' + module_name
+            if module_name_full in LazyImportModule.AST_INDEX[REQUIREMENT_KEY]:
+                requirements = LazyImportModule.AST_INDEX[REQUIREMENT_KEY][
+                    module_name_full]
+                requires(module_name_full, requirements)
+            return importlib.import_module('.' + module_name, self.__name__)
+        except Exception as e:
+            raise RuntimeError(
+                f'Failed to import {self.__name__}.{module_name} because of the following error '
+                f'(look up to see its traceback):\n{e}') from e
+
+    def __reduce__(self):
+        return self.__class__, (self._name, self.__file__,
+                                self._import_structure)
+
+    @staticmethod
+    def import_module(signature):
+        """ import a lazy import module using signature
+
+        Args:
+            signature (tuple): a tuple of str, (registry_name, registry_group_name, module_name)
+        """
+        if signature in LazyImportModule.AST_INDEX[INDEX_KEY]:
+            mod_index = LazyImportModule.AST_INDEX[INDEX_KEY][signature]
+            module_name = mod_index[MODULE_KEY]
+            if module_name in LazyImportModule.AST_INDEX[REQUIREMENT_KEY]:
+                requirements = LazyImportModule.AST_INDEX[REQUIREMENT_KEY][
+                    module_name]
+                requires(module_name, requirements)
+            importlib.import_module(module_name)
+        else:
+            logger.warning(f'{signature} not found in ast index file')
diff --git a/modelscope/utils/json_utils.py b/modelscope/utils/json_utils.py
new file mode 100644
index 0000000..ea8fde1
--- /dev/null
+++ b/modelscope/utils/json_utils.py
@@ -0,0 +1,17 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import json
+
+import numpy as np
+
+
+class EnhancedEncoder(json.JSONEncoder):
+    """ Enhanced json encoder for not supported types """
+    def default(self, obj):
+        if isinstance(obj, np.integer):
+            return int(obj)
+        elif isinstance(obj, np.floating):
+            return float(obj)
+        elif isinstance(obj, np.ndarray):
+            return obj.tolist()
+        return json.JSONEncoder.default(self, obj)
diff --git a/modelscope/utils/logger.py b/modelscope/utils/logger.py
new file mode 100644
index 0000000..17923a6
--- /dev/null
+++ b/modelscope/utils/logger.py
@@ -0,0 +1,87 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import importlib
+import logging
+from typing import Optional
+
+init_loggers = {}
+
+formatter = logging.Formatter(
+    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+
+
+def get_logger(log_file: Optional[str] = None,
+               log_level: int = logging.INFO,
+               file_mode: str = 'w'):
+    """ Get logging logger
+
+    Args:
+        log_file: Log filename, if specified, file handler will be added to
+            logger
+        log_level: Logging level.
+        file_mode: Specifies the mode to open the file, if filename is
+            specified (if filemode is unspecified, it defaults to 'w').
+    """
+
+    logger_name = __name__.split('.')[0]
+    logger = logging.getLogger(logger_name)
+
+    if logger_name in init_loggers:
+        add_file_handler_if_needed(logger, log_file, file_mode, log_level)
+        return logger
+
+    # handle duplicate logs to the console
+    # Starting in 1.8.0, PyTorch DDP attaches a StreamHandler <stderr> (NOTSET)
+    # to the root logger. As logger.propagate is True by default, this root
+    # level handler causes logging messages from rank>0 processes to
+    # unexpectedly show up on the console, creating much unwanted clutter.
+    # To fix this issue, we set the root logger's StreamHandler, if any, to log
+    # at the ERROR level.
+    for handler in logger.root.handlers:
+        if type(handler) is logging.StreamHandler:
+            handler.setLevel(logging.ERROR)
+
+    stream_handler = logging.StreamHandler()
+    handlers = [stream_handler]
+
+    if importlib.util.find_spec('torch') is not None:
+        from modelscope.utils.torch_utils import is_master
+        is_worker0 = is_master()
+    else:
+        is_worker0 = True
+
+    if is_worker0 and log_file is not None:
+        file_handler = logging.FileHandler(log_file, file_mode)
+        handlers.append(file_handler)
+
+    for handler in handlers:
+        handler.setFormatter(formatter)
+        handler.setLevel(log_level)
+        logger.addHandler(handler)
+
+    if is_worker0:
+        logger.setLevel(log_level)
+    else:
+        logger.setLevel(logging.ERROR)
+
+    init_loggers[logger_name] = True
+
+    return logger
+
+
+def add_file_handler_if_needed(logger, log_file, file_mode, log_level):
+    for handler in logger.handlers:
+        if isinstance(handler, logging.FileHandler):
+            return
+
+    if importlib.util.find_spec('torch') is not None:
+        from modelscope.utils.torch_utils import is_master
+        is_worker0 = is_master()
+    else:
+        is_worker0 = True
+
+    if is_worker0 and log_file is not None:
+        file_handler = logging.FileHandler(log_file, file_mode)
+        file_handler.setFormatter(formatter)
+        file_handler.setLevel(log_level)
+        logger.addHandler(file_handler)
diff --git a/modelscope/utils/megatron_utils.py b/modelscope/utils/megatron_utils.py
new file mode 100644
index 0000000..8a543ab
--- /dev/null
+++ b/modelscope/utils/megatron_utils.py
@@ -0,0 +1,45 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from typing import Optional
+
+from megatron_util import initialize_megatron
+from modelscope.utils.config import Config
+from modelscope.utils.hub import read_config
+
+_DEFAULT_CFG_WITH_MODEL_TYPE = {
+    'gpt-moe': {
+        'version': 'moe',
+        'world_size': 8
+    },
+    'plug': {
+        'version': 'v1',
+        'world_size': 8,
+        'tensor_model_parallel_size': 8,
+        'seed': 1234
+    },
+    'mglm-text-summarization': {
+        'version': 'v1',
+        'seed': 1234
+    },
+}
+
+
+def init_megatron_util(cfg: Optional[Config] = None,
+                       model_dir: Optional[str] = None,
+                       **kwargs):
+    assert not (cfg is None and model_dir is None), \
+        'cfg and model_dir cannot both be None when initializing megatron_util'
+    if cfg is None:
+        cfg = read_config(model_dir)
+    try:
+        megatron_cfg = cfg.megatron
+    except AttributeError:
+        try:
+            model_type = cfg.model.type
+        except AttributeError:
+            # Fit models without model type, such as mglm
+            model_type = cfg.pipeline.type
+        megatron_cfg = _DEFAULT_CFG_WITH_MODEL_TYPE[model_type] \
+            if model_type in _DEFAULT_CFG_WITH_MODEL_TYPE else {}
+    megatron_cfg.update(kwargs)
+    initialize_megatron(megatron_cfg)
diff --git a/modelscope/utils/metric.py b/modelscope/utils/metric.py
new file mode 100644
index 0000000..7717216
--- /dev/null
+++ b/modelscope/utils/metric.py
@@ -0,0 +1,96 @@
+# Copyright (c) Megvii Inc. All rights reserved.
+# Copyright © Alibaba, Inc. and its affiliates.
+
+import functools
+import os
+from collections import defaultdict, deque
+
+import numpy as np
+import torch
+
+__all__ = [
+    'AverageMeter',
+    'MeterBuffer',
+    'gpu_mem_usage',
+]
+
+
+def gpu_mem_usage():
+    """
+    Compute the GPU memory usage for the current device (MB).
+    """
+    mem_usage_bytes = torch.cuda.max_memory_allocated()
+    return mem_usage_bytes / (1024 * 1024)
+
+
+class AverageMeter:
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+    def __init__(self, window_size=50):
+        self._deque = deque(maxlen=window_size)
+        self._total = 0.0
+        self._count = 0
+
+    def update(self, value):
+        self._deque.append(value)
+        self._count += 1
+        self._total += value
+
+    @property
+    def median(self):
+        d = np.array(list(self._deque))
+        return np.median(d)
+
+    @property
+    def avg(self):
+        # if deque is empty, nan will be returned.
+        d = np.array(list(self._deque))
+        return d.mean()
+
+    @property
+    def global_avg(self):
+        return self._total / max(self._count, 1e-5)
+
+    @property
+    def latest(self):
+        return self._deque[-1] if len(self._deque) > 0 else None
+
+    @property
+    def total(self):
+        return self._total
+
+    def reset(self):
+        self._deque.clear()
+        self._total = 0.0
+        self._count = 0
+
+    def clear(self):
+        self._deque.clear()
+
+
+class MeterBuffer(defaultdict):
+    """Computes and stores the average and current value"""
+    def __init__(self, window_size=20):
+        factory = functools.partial(AverageMeter, window_size=window_size)
+        super().__init__(factory)
+
+    def reset(self):
+        for v in self.values():
+            v.reset()
+
+    def get_filtered_meter(self, filter_key='time'):
+        return {k: v for k, v in self.items() if filter_key in k}
+
+    def update(self, values=None, **kwargs):
+        if values is None:
+            values = {}
+        values.update(kwargs)
+        for k, v in values.items():
+            if isinstance(v, torch.Tensor):
+                v = v.detach()
+            self[k].update(v)
+
+    def clear_meters(self):
+        for v in self.values():
+            v.clear()
diff --git a/modelscope/utils/model_tag.py b/modelscope/utils/model_tag.py
new file mode 100644
index 0000000..e68c81f
--- /dev/null
+++ b/modelscope/utils/model_tag.py
@@ -0,0 +1,183 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import json
+import logging
+import os
+
+import requests
+
+from modelscope.version import __version__
+
+
+# 打标
+class ModelTag(object):
+    _URL = os.environ.get('MODEL_TAG_URL', None)
+
+    # 模型测试结果
+    BATCH_COMMIT_RESULT_URL = f'{_URL}/batchCommitResult'
+    # 测试阶段完成
+    BATCH_REFRESH_STAGE_URL = f'{_URL}/batchRefreshStage'
+    # query_model_stage
+    QUERY_MODEL_STAGE_URL = f'{_URL}/queryModelStage'
+
+    HEADER = {'Content-Type': 'application/json'}
+
+    # 检测结果
+    MODEL_SKIP = 0
+    MODEL_FAIL = 1
+    MODEL_PASS = 2
+
+    class ItemResult(object):
+        def __init__(self):
+            self.result = 0
+            self.name = ''
+            self.info = ''
+
+        def to_json(self):
+            return {
+                'name': self.name,
+                'result': self.result,
+                'info': self.info
+            }
+
+    def __init__(self):
+        self.job_name = ''
+        self.job_id = ''
+        self.model = ''
+        self.sdk_version = ''
+        self.image_version = ''
+        self.domain = ''
+        self.task = ''
+        self.source = ''
+        self.stage = ''
+        # ItemResult list
+        self.item_result = []
+
+    # 发送请求
+    def _post_request(self, url, param):
+        try:
+            logging.info(url + ' query: ' +
+                         str(json.dumps(param, ensure_ascii=False)))
+            res = requests.post(url=url,
+                                headers=self.HEADER,
+                                data=json.dumps(
+                                    param, ensure_ascii=False).encode('utf8'))
+            if res.status_code == 200:
+                logging.info(f'{url} post结果: ' + res.text)
+                res_json = json.loads(res.text)
+                if int(res_json['errorCode']) == 200:
+                    return res_json['content']
+                else:
+                    logging.error(res.text)
+            else:
+                logging.error(res.text)
+        except Exception as e:
+            logging.error(e)
+
+        return None
+
+    # 提交模型测试结果
+    def batch_commit_result(self):
+        try:
+            param = {
+                'sdkVersion':
+                self.sdk_version,
+                'imageVersion':
+                self.image_version,
+                'source':
+                self.source,
+                'jobName':
+                self.job_name,
+                'jobId':
+                self.job_id,
+                'modelList': [{
+                    'model': self.model,
+                    'domain': self.domain,
+                    'task': self.task,
+                    'itemResult': self.item_result
+                }]
+            }
+            return self._post_request(self.BATCH_COMMIT_RESULT_URL, param)
+
+        except Exception as e:
+            logging.error(e)
+
+        return
+
+    # 测试阶段完成
+    def batch_refresh_stage(self):
+        try:
+            param = {
+                'sdkVersion':
+                self.sdk_version,
+                'imageVersion':
+                self.image_version,
+                'source':
+                self.source,
+                'stage':
+                self.stage,
+                'modelList': [{
+                    'model': self.model,
+                    'domain': self.domain,
+                    'task': self.task
+                }]
+            }
+            return self._post_request(self.BATCH_REFRESH_STAGE_URL, param)
+
+        except Exception as e:
+            logging.error(e)
+
+        return
+
+    # 查询模型某个阶段的最新测试结果（只返回单个结果
+    def query_model_stage(self):
+        try:
+            param = {
+                'sdkVersion': self.sdk_version,
+                'model': self.model,
+                'stage': self.stage,
+                'imageVersion': self.image_version
+            }
+            return self._post_request(self.QUERY_MODEL_STAGE_URL, param)
+
+        except Exception as e:
+            logging.error(e)
+
+        return None
+
+    # 提交模型UT测试结果
+    """
+        model_tag = ModelTag()
+        model_tag.model = "XXX"
+        model_tag.sdk_version = "0.3.7"
+        model_tag.domain = "nlp"
+        model_tag.task = "word-segmentation"
+        item = model_tag.ItemResult()
+        item.result = model_tag.MODEL_PASS
+        item.name = "ALL"
+        item.info = ""
+        model_tag.item_result.append(item.to_json())
+    """
+
+    def commit_ut_result(self):
+        if self._URL is not None and self._URL != '':
+            self.job_name = 'UT'
+            self.source = 'dev'
+            self.stage = 'integration'
+
+            self.batch_commit_result()
+            self.batch_refresh_stage()
+
+
+def commit_model_ut_result(model_name, ut_result):
+    model_tag = ModelTag()
+    model_tag.model = model_name.replace('damo/', '')
+    model_tag.sdk_version = __version__
+    # model_tag.domain = ""
+    # model_tag.task = ""
+    item = model_tag.ItemResult()
+    item.result = ut_result
+    item.name = 'ALL'
+    item.info = ''
+    model_tag.item_result.append(item.to_json())
+    model_tag.commit_ut_result()
diff --git a/modelscope/utils/nlp/__init__.py b/modelscope/utils/nlp/__init__.py
new file mode 100644
index 0000000..62c0b88
--- /dev/null
+++ b/modelscope/utils/nlp/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .utils import import_external_nltk_data
+
+else:
+    _import_structure = {
+        'utils': ['import_external_nltk_data'],
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/utils/nlp/distributed.py b/modelscope/utils/nlp/distributed.py
new file mode 100755
index 0000000..5cb32ac
--- /dev/null
+++ b/modelscope/utils/nlp/distributed.py
@@ -0,0 +1,112 @@
+# Copyright 2021-2022 The Alibaba DAMO NLP Team Authors.
+# Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import torch
+import torch.distributed as dist
+from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
+from torch.autograd import Variable
+from torch.nn.modules import Module
+
+from megatron_util import mpu
+
+
+def normal_init_method(mean, std):
+    def init_(tensor):
+        return torch.nn.init.normal_(tensor, mean=mean, std=std)
+
+    return init_
+
+
+def scaled_init_method(mean, std, num_layers):
+    """Init method based on N(0, sigma/sqrt(2*num_layers)."""
+    std = std / math.sqrt(2.0 * num_layers)
+
+    def init_(tensor):
+        return torch.nn.init.normal_(tensor, mean=mean, std=std)
+
+    return init_
+
+
+class DistributedDataParallel(Module):
+    def __init__(self, module):
+        super(DistributedDataParallel, self).__init__()
+        self.warn_on_half = True if dist._backend == dist.dist_backend.GLOO else False
+
+        self.module = module
+        self.data_parallel_group = mpu.get_data_parallel_group()
+        src_rank = mpu.get_tensor_model_parallel_rank()
+        for p in self.module.parameters():
+            if torch.is_tensor(p):
+                dist.broadcast(p, src_rank, group=self.data_parallel_group)
+
+        def allreduce_params(reduce_after=True,
+                             no_scale=False,
+                             fp32_allreduce=False):
+            if (self.needs_reduction):
+                self.needs_reduction = False
+                buckets = {}
+                for name, param in self.module.named_parameters():
+                    if param.requires_grad and param.grad is not None:
+                        tp = (param.data.type())
+                        if tp not in buckets:
+                            buckets[tp] = []
+                        buckets[tp].append(param)
+                if self.warn_on_half:
+                    if torch.cuda.HalfTensor in buckets:
+                        print(
+                            'WARNING: gloo dist backend for half parameters may be extremely slow.',
+                            'It is recommended to use the NCCL backend in this case.'
+                        )
+                        self.warn_on_half = False
+                for tp in buckets:
+                    bucket = buckets[tp]
+                    grads = [param.grad.data for param in bucket]
+                    coalesced = _flatten_dense_tensors(grads)
+                    if fp32_allreduce:
+                        coalesced = coalesced.float()
+                    if not no_scale and not reduce_after:
+                        coalesced /= dist.get_world_size(
+                            group=self.data_parallel_group)
+                    dist.all_reduce(coalesced, group=self.data_parallel_group)
+                    torch.cuda.synchronize()
+                    if not no_scale and reduce_after:
+                        coalesced /= dist.get_world_size(
+                            group=self.data_parallel_group)
+                    for buf, synced in zip(
+                            grads, _unflatten_dense_tensors(coalesced, grads)):
+                        buf.copy_(synced)
+
+        self.hook_handles = []
+        self.hooks = []
+        for param in list(self.module.parameters()):
+
+            def allreduce_hook(*unused):
+                Variable._execution_engine.queue_callback(allreduce_params)
+
+        self.allreduce_params = allreduce_params
+
+    def forward(self, *inputs, **kwargs):
+        self.needs_reduction = True
+        return self.module(*inputs, **kwargs)
+
+    def state_dict(self, destination=None, prefix='', keep_vars=False):
+        sd = self.module.state_dict(destination, prefix, keep_vars)
+
+        return sd
+
+    def load_state_dict(self, state_dict, strict=True):
+        self.module.load_state_dict(state_dict, strict=strict)
diff --git a/modelscope/utils/nlp/load_checkpoint.py b/modelscope/utils/nlp/load_checkpoint.py
new file mode 100755
index 0000000..0e1274c
--- /dev/null
+++ b/modelscope/utils/nlp/load_checkpoint.py
@@ -0,0 +1,116 @@
+# Copyright 2021-2022 The Alibaba DAMO NLP Team Authors.
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import torch
+
+
+def load_checkpoint(model,
+                    load_dir,
+                    tag,
+                    load_module_strict=True,
+                    load_optimizer_states=True,
+                    load_lr_scheduler_states=True):
+    r"""Load training checkpoint
+
+    Arguments:
+        load_dir: Required. Directory to load the checkpoint from
+        tag: Required. Checkpoint tag used as a unique identifier for the checkpoint. Ex. Global Step.
+        load_module_strict: Optional. Boolean to strictly enforce that the keys in state_dict of module and
+         checkpoint match.
+        load_optimizer_states: Optional. Boolean to load the training optimizer states from Checkpoint.
+         Ex. ADAM's momentum and variance
+        load_lr_scheduler_states: Optional. Boolean to add the learning rate scheduler states from Checkpoint.
+    Return:
+        load_path: Path of the loaded checkpoint. None if loading the checkpoint failed
+        client_state: State dictionary used for loading required training states in the client code.
+    """
+
+    load_path, client_states = _load_checkpoint(
+        model,
+        load_dir,
+        tag,
+        load_module_strict=load_module_strict,
+        load_optimizer_states=load_optimizer_states,
+        load_lr_scheduler_states=load_lr_scheduler_states)
+
+    if load_optimizer_states:
+        if model.zero_optimization() and load_path is not None:
+            model._load_zero_checkpoint(
+                load_dir, tag, load_optimizer_states=load_optimizer_states)
+
+    return load_path, client_states
+
+
+def _get_ckpt_name(mp_rank, checkpoints_path, tag):
+    ckpt_name = os.path.join(
+        checkpoints_path, str(tag),
+        'mp_rank_{:02d}'.format(mp_rank) + '_model_states.pt')
+    return ckpt_name
+
+
+def pre_load(mp_rank, load_dir, tag=''):
+    load_path = _get_ckpt_name(mp_rank, load_dir, tag)
+    checkpoint = torch.load(load_path,
+                            map_location=lambda storage, loc: storage)
+    return checkpoint['module']
+
+
+def _load_checkpoint(model,
+                     load_dir,
+                     tag,
+                     load_module_strict=True,
+                     load_optimizer_states=True,
+                     load_lr_scheduler_states=True):
+
+    load_path = model._get_ckpt_name(load_dir, tag)
+
+    if not os.path.exists(load_path):
+        return None, None
+
+    checkpoint = torch.load(load_path,
+                            map_location=lambda storage, loc: storage)
+
+    model.load_module_state_dict(state_dict=checkpoint['module'],
+                                 strict=load_module_strict)
+    if not model.zero_optimization() and load_optimizer_states:
+        if model.fp16_enabled():
+            model.optimizer.load_state_dict(
+                checkpoint['optimizer'],
+                load_optimizer_states=load_optimizer_states)
+        elif load_optimizer_states:
+            model.optimizer.load_state_dict(checkpoint['optimizer'])
+
+    if load_lr_scheduler_states and model.lr_scheduler is not None:
+        model.lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
+
+    model.csr_tensor_module_names = checkpoint['csr_tensor_module_names']
+    model.global_steps = checkpoint['global_steps']
+    model.global_samples = checkpoint.get(
+        'global_samples', model.global_steps * model.train_batch_size())
+    model.skipped_steps = checkpoint['skipped_steps']
+    model.loaded_checkpoint_mp_world_size = checkpoint['mp_world_size']
+    model.loaded_checkpoint_dp_world_size = checkpoint['dp_world_size']
+    deepspeed_states = [
+        'module', 'optimizer', 'lr_scheduler', 'csr_tensor_module_names',
+        'skipped_steps', 'global_steps', 'dp_world_size', 'mp_world_size'
+    ]
+    client_state = {
+        key: value
+        for key, value in checkpoint.items() if key not in deepspeed_states
+    }
+
+    return load_path, client_state
diff --git a/modelscope/utils/nlp/space/__init__.py b/modelscope/utils/nlp/space/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/utils/nlp/space/args.py b/modelscope/utils/nlp/space/args.py
new file mode 100644
index 0000000..b4eb17a
--- /dev/null
+++ b/modelscope/utils/nlp/space/args.py
@@ -0,0 +1,62 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import argparse
+import json
+
+
+def str2bool(v):
+    if v.lower() in ('yes', 'true', 't', 'y', '1'):
+        return True
+    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Unsupported value encountered.')
+
+
+class HParams(dict):
+    """ Hyper-parameters class
+
+    Store hyper-parameters in training / infer / ... scripts.
+    """
+    def __getattr__(self, name):
+        if name in self.keys():
+            return self[name]
+        for v in self.values():
+            if isinstance(v, HParams):
+                if name in v:
+                    return v[name]
+        raise AttributeError(f"'HParams' object has no attribute '{name}'")
+
+    def __setattr__(self, name, value):
+        self[name] = value
+
+    def save(self, filename):
+        with open(filename, 'w', encoding='utf-8') as fp:
+            json.dump(self, fp, ensure_ascii=False, indent=4, sort_keys=False)
+
+    def load(self, filename):
+        with open(filename, 'r', encoding='utf-8') as fp:
+            params_dict = json.load(fp)
+        for k, v in params_dict.items():
+            if isinstance(v, dict):
+                self[k].update(HParams(v))
+            else:
+                self[k] = v
+
+
+def parse_args(parser):
+    """ Parse hyper-parameters from cmdline. """
+    parsed = parser.parse_args()
+    args = HParams()
+    optional_args = parser._action_groups[1]
+    for action in optional_args._group_actions[1:]:
+        arg_name = action.dest
+        args[arg_name] = getattr(parsed, arg_name)
+    for group in parser._action_groups[2:]:
+        group_args = HParams()
+        for action in group._group_actions:
+            arg_name = action.dest
+            group_args[arg_name] = getattr(parsed, arg_name)
+        if len(group_args) > 0:
+            args[group.title] = group_args
+    return args
diff --git a/modelscope/utils/nlp/space/clean_dataset.py b/modelscope/utils/nlp/space/clean_dataset.py
new file mode 100644
index 0000000..cd4fe9f
--- /dev/null
+++ b/modelscope/utils/nlp/space/clean_dataset.py
@@ -0,0 +1,336 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import re
+
+from . import ontology
+
+
+def clean_text_split_dot(text):
+    text = re.sub(r'([a-zT]+)\.([a-z])', r'\1 . \2',
+                  text)  # 'abc.xyz' -> 'abc . xyz'
+    text = re.sub(r'(\w+)\.\.? ', r'\1 . ', text)  # if 'abc. ' -> 'abc . '
+    return text
+
+
+def clean_text(data_dir, text):
+    text = text.strip()
+    text = text.lower()
+    text = text.replace(u'’', "'")
+    text = text.replace(u'‘', "'")
+    text = text.replace(';', ',')
+    text = text.replace('"', ' ')
+    text = text.replace('/', ' and ')
+    text = text.replace("don't", "do n't")
+    text = clean_time(text)
+    baddata = {
+        r'c\.b (\d), (\d) ([a-z])\.([a-z])': r'cb\1\2\3\4',
+        'c.b. 1 7 d.y': 'cb17dy',
+        'c.b.1 7 d.y': 'cb17dy',
+        'c.b 25, 9 a.q': 'cb259aq',
+        'isc.b 25, 9 a.q': 'is cb259aq',
+        'c.b2, 1 u.f': 'cb21uf',
+        'c.b 1,2 q.a': 'cb12qa',
+        '0-122-336-5664': '01223365664',
+        'postcodecb21rs': 'postcode cb21rs',
+        r'i\.d': 'id',
+        ' i d ': 'id',
+        'Telephone:01223358966': 'Telephone: 01223358966',
+        'depature': 'departure',
+        'depearting': 'departing',
+        '-type': ' type',
+        r'b[\s]?&[\s]?b': 'bed and breakfast',
+        'b and b': 'bed and breakfast',
+        r'guesthouse[s]?': 'guest house',
+        r'swimmingpool[s]?': 'swimming pool',
+        "wo n\'t": 'will not',
+        " \'d ": ' would ',
+        " \'m ": ' am ',
+        " \'re' ": ' are ',
+        " \'ll' ": ' will ',
+        " \'ve ": ' have ',
+        r'^\'': '',
+        r'\'$': '',
+    }
+    for tmpl, good in baddata.items():
+        text = re.sub(tmpl, good, text)
+
+    text = re.sub(r'([a-zT]+)\.([a-z])', r'\1 . \2',
+                  text)  # 'abc.xyz' -> 'abc . xyz'
+    text = re.sub(r'(\w+)\.\.? ', r'\1 . ', text)  # if 'abc. ' -> 'abc . '
+
+    with open(os.path.join(data_dir, 'mapping.pair'), 'r',
+              encoding='utf-8') as fin:
+        for line in fin.readlines():
+            fromx, tox = line.replace('\n', '').split('\t')
+            text = ' ' + text + ' '
+            text = text.replace(' ' + fromx + ' ', ' ' + tox + ' ')[1:-1]
+
+    return text
+
+
+def clean_time(utter):
+    utter = re.sub(r'(\d+) ([ap]\.?m)', lambda x: x.group(1) + x.group(2),
+                   utter)  # 9 am -> 9am
+    utter = re.sub(r'((?<!\d)\d:\d+)(am)?', r'0\1', utter)
+    utter = re.sub(r'((?<!\d)\d)am', r'0\1:00', utter)
+    utter = re.sub(r'((?<!\d)\d)pm',
+                   lambda x: str(int(x.group(1)) + 12) + ':00', utter)
+    utter = re.sub(r'(\d+)(:\d+)pm',
+                   lambda x: str(int(x.group(1)) + 12) + x.group(2), utter)
+    utter = re.sub(r'(\d+)a\.?m', r'\1', utter)
+    return utter
+
+
+def clean_slot_values(data_dir, domain, slot, value):
+    value = clean_text(data_dir, value)
+    if not value:
+        value = ''
+    elif value == 'not mentioned':
+        value = ''
+        # value = 'not mentioned' # if in DST setting
+    elif domain == 'attraction':
+        if slot == 'name':
+            if value == 't':
+                value = ''
+            if value == 'trinity':
+                value = 'trinity college'
+        elif slot == 'area':
+            if value in ['town centre', 'cent', 'center', 'ce']:
+                value = 'centre'
+            elif value in [
+                    'ely', 'in town', 'museum', 'norwich', 'same area as hotel'
+            ]:
+                value = ''
+            elif value in ['we']:
+                value = 'west'
+        elif slot == 'type':
+            if value in ['m', 'mus', 'musuem']:
+                value = 'museum'
+            elif value in ['art', 'architectural']:
+                value = 'architecture'
+            elif value in ['churches']:
+                value = 'church'
+            elif value in ['coll']:
+                value = 'college'
+            elif value in ['concert', 'concerthall']:
+                value = 'concert hall'
+            elif value in ['night club']:
+                value = 'nightclub'
+            elif value in [
+                    'mutiple sports', 'mutliple sports', 'sports', 'galleria'
+            ]:
+                value = 'multiple sports'
+            elif value in ['ol', 'science', 'gastropub', 'la raza']:
+                value = ''
+            elif value in ['swimmingpool', 'pool']:
+                value = 'swimming pool'
+            elif value in ['fun']:
+                value = 'entertainment'
+
+    elif domain == 'hotel':
+        if slot == 'area':
+            if value in [
+                    'cen', 'centre of town', 'near city center', 'center'
+            ]:
+                value = 'centre'
+            elif value in ['east area', 'east side']:
+                value = 'east'
+            elif value in ['in the north', 'north part of town']:
+                value = 'north'
+            elif value in ['we']:
+                value = 'west'
+        elif slot == 'day':
+            if value == 'monda':
+                value = 'monday'
+            elif value == 't':
+                value = 'tuesday'
+        elif slot == 'name':
+            if value == 'uni':
+                value = 'university arms hotel'
+            elif value == 'university arms':
+                value = 'university arms hotel'
+            elif value == 'acron':
+                value = 'acorn guest house'
+            elif value == 'ashley':
+                value = 'ashley hotel'
+            elif value == 'arbury lodge guesthouse':
+                value = 'arbury lodge guest house'
+            elif value == 'la':
+                value = 'la margherit'
+            elif value == 'no':
+                value = ''
+        elif slot == 'internet':
+            if value == 'does not':
+                value = 'no'
+            elif value in ['y', 'free', 'free internet']:
+                value = 'yes'
+            elif value in ['4']:
+                value = ''
+        elif slot == 'parking':
+            if value == 'n':
+                value = 'no'
+            elif value in ['free parking']:
+                value = 'yes'
+            elif value in ['y']:
+                value = 'yes'
+        elif slot in ['pricerange', 'price range']:
+            slot = 'pricerange'
+            if value == 'moderately':
+                value = 'moderate'
+            elif value in ['any']:
+                value = "do n't care"
+            elif value in ['any']:
+                value = "do n't care"
+            elif value in ['inexpensive']:
+                value = 'cheap'
+            elif value in ['2', '4']:
+                value = ''
+        elif slot == 'stars':
+            if value == 'two':
+                value = '2'
+            elif value == 'three':
+                value = '3'
+            elif value in [
+                    '4-star', '4 stars', '4 star', 'four star', 'four stars'
+            ]:
+                value = '4'
+        elif slot == 'type':
+            if value == '0 star rarting':
+                value = ''
+            elif value == 'guesthouse':
+                value = 'guest house'
+            elif value not in ['hotel', 'guest house', "do n't care"]:
+                value = ''
+    elif domain == 'restaurant':
+        if slot == 'area':
+            if value in [
+                    'center', 'scentre', 'center of town', 'city center',
+                    'cb30aq', 'town center', 'centre of cambridge',
+                    'city centre'
+            ]:
+                value = 'centre'
+            elif value == 'west part of town':
+                value = 'west'
+            elif value == 'n':
+                value = 'north'
+            elif value in ['the south']:
+                value = 'south'
+            elif value not in [
+                    'centre', 'south', "do n't care", 'west', 'east', 'north'
+            ]:
+                value = ''
+        elif slot == 'day':
+            if value == 'monda':
+                value = 'monday'
+            elif value == 't':
+                value = 'tuesday'
+        elif slot in ['pricerange', 'price range']:
+            slot = 'pricerange'
+            if value in ['moderately', 'mode', 'mo']:
+                value = 'moderate'
+            elif value in ['not']:
+                value = ''
+            elif value in ['inexpensive', 'ch']:
+                value = 'cheap'
+        elif slot == 'food':
+            if value == 'barbecue':
+                value = 'barbeque'
+        elif slot == 'pricerange':
+            if value == 'moderately':
+                value = 'moderate'
+        elif slot == 'time':
+            if value == '9:00':
+                value = '09:00'
+            elif value == '9:45':
+                value = '09:45'
+            elif value == '1330':
+                value = '13:30'
+            elif value == '1430':
+                value = '14:30'
+            elif value == '9:15':
+                value = '09:15'
+            elif value == '9:30':
+                value = '09:30'
+            elif value == '1830':
+                value = '18:30'
+            elif value == '9':
+                value = '09:00'
+            elif value == '2:00':
+                value = '14:00'
+            elif value == '1:00':
+                value = '13:00'
+            elif value == '3:00':
+                value = '15:00'
+    elif domain == 'taxi':
+        if slot in ['arriveBy', 'arrive by']:
+            slot = 'arriveby'
+            if value == '1530':
+                value = '15:30'
+            elif value == '15 minutes':
+                value = ''
+        elif slot in ['leaveAt', 'leave at']:
+            slot = 'leaveat'
+            if value == '1:00':
+                value = '01:00'
+            elif value == '21:4':
+                value = '21:04'
+            elif value == '4:15':
+                value = '04:15'
+            elif value == '5:45':
+                value = '05:45'
+            elif value == '0700':
+                value = '07:00'
+            elif value == '4:45':
+                value = '04:45'
+            elif value == '8:30':
+                value = '08:30'
+            elif value == '9:30':
+                value = '09:30'
+            value = value.replace('.', ':')
+
+    elif domain == 'train':
+        if slot in ['arriveBy', 'arrive by']:
+            slot = 'arriveby'
+            if value == '1':
+                value = '01:00'
+            elif value in ['does not care', 'doesnt care', "doesn't care"]:
+                value = "do n't care"
+            elif value == '8:30':
+                value = '08:30'
+            elif value == 'not 15:45':
+                value = ''
+            value = value.replace('.', ':')
+        elif slot == 'day':
+            if value == 'doesnt care' or value == "doesn't care":
+                value = "do n't care"
+        elif slot in ['leaveAt', 'leave at']:
+            slot = 'leaveat'
+            if value == '2:30':
+                value = '02:30'
+            elif value == '7:54':
+                value = '07:54'
+            elif value == 'after 5:45 pm':
+                value = '17:45'
+            elif value in [
+                    'early evening', 'friday', 'sunday', 'tuesday', 'afternoon'
+            ]:
+                value = ''
+            elif value == '12':
+                value = '12:00'
+            elif value == '1030':
+                value = '10:30'
+            elif value == '1700':
+                value = '17:00'
+            elif value in [
+                    'does not care', 'doesnt care', 'do nt care',
+                    "doesn't care"
+            ]:
+                value = "do n't care"
+
+            value = value.replace('.', ':')
+    if value in ['dont care', "don't care", 'do nt care', "doesn't care"]:
+        value = "do n't care"
+    if ontology.normlize_slot_names.get(slot):
+        slot = ontology.normlize_slot_names[slot]
+    return slot, value
diff --git a/modelscope/utils/nlp/space/criterions.py b/modelscope/utils/nlp/space/criterions.py
new file mode 100644
index 0000000..ecb2f5c
--- /dev/null
+++ b/modelscope/utils/nlp/space/criterions.py
@@ -0,0 +1,55 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import torch
+import torch.nn.functional as F
+from torch.nn.modules.loss import _Loss
+
+
+def compute_kl_loss(p, q, filter_scores=None):
+    p_loss = F.kl_div(F.log_softmax(p, dim=-1),
+                      F.softmax(q, dim=-1),
+                      reduction='none')
+    q_loss = F.kl_div(F.log_softmax(q, dim=-1),
+                      F.softmax(p, dim=-1),
+                      reduction='none')
+
+    # You can choose whether to use function "sum" and "mean" depending on your task
+    p_loss = p_loss.sum(dim=-1)
+    q_loss = q_loss.sum(dim=-1)
+
+    # mask is for filter mechanism
+    if filter_scores is not None:
+        p_loss = filter_scores * p_loss
+        q_loss = filter_scores * q_loss
+
+    p_loss = p_loss.mean()
+    q_loss = q_loss.mean()
+
+    loss = (p_loss + q_loss) / 2
+    return loss
+
+
+class CatKLLoss(_Loss):
+    """
+    CatKLLoss
+    """
+    def __init__(self, reduction='mean'):
+        super(CatKLLoss, self).__init__()
+        assert reduction in ['none', 'sum', 'mean']
+        self.reduction = reduction
+
+    def forward(self, log_qy, log_py):
+        """
+        KL(qy|py) = Eq[qy * log(q(y) / p(y))]
+
+        log_qy: (batch_size, latent_size)
+        log_py: (batch_size, latent_size)
+        """
+        qy = torch.exp(log_qy)
+        kl = torch.sum(qy * (log_qy - log_py), dim=1)
+
+        if self.reduction == 'mean':
+            kl = kl.mean()
+        elif self.reduction == 'sum':
+            kl = kl.sum()
+        return kl
diff --git a/modelscope/utils/nlp/space/db_ops.py b/modelscope/utils/nlp/space/db_ops.py
new file mode 100644
index 0000000..5fae5de
--- /dev/null
+++ b/modelscope/utils/nlp/space/db_ops.py
@@ -0,0 +1,315 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import json
+import os
+import random
+import sqlite3
+
+from .ontology import all_domains, db_domains
+
+
+class MultiWozDB(object):
+    def __init__(self, db_dir, db_paths):
+        self.dbs = {}
+        self.sql_dbs = {}
+        for domain in all_domains:
+            with open(os.path.join(db_dir, db_paths[domain]),
+                      'r',
+                      encoding='utf-8') as f:
+                self.dbs[domain] = json.loads(f.read().lower())
+
+    def oneHotVector(self, domain, num):
+        """Return number of available entities for particular domain."""
+        vector = [0, 0, 0, 0]
+        if num == '':
+            return vector
+        if domain != 'train':
+            if num == 0:
+                vector = [1, 0, 0, 0]
+            elif num == 1:
+                vector = [0, 1, 0, 0]
+            elif num <= 3:
+                vector = [0, 0, 1, 0]
+            else:
+                vector = [0, 0, 0, 1]
+        else:
+            if num == 0:
+                vector = [1, 0, 0, 0]
+            elif num <= 5:
+                vector = [0, 1, 0, 0]
+            elif num <= 10:
+                vector = [0, 0, 1, 0]
+            else:
+                vector = [0, 0, 0, 1]
+        return vector
+
+    def addBookingPointer(self, turn_da):
+        """Add information about availability of the booking option."""
+        # Booking pointer
+        # Do not consider booking two things in a single turn.
+        vector = [0, 0]
+        if turn_da.get('booking-nobook'):
+            vector = [1, 0]
+        if turn_da.get('booking-book') or turn_da.get('train-offerbooked'):
+            vector = [0, 1]
+        return vector
+
+    def addDBPointer(self, domain, match_num, return_num=False):
+        """Create database pointer for all related domains."""
+        # if turn_domains is None:
+        #     turn_domains = db_domains
+        if domain in db_domains:
+            vector = self.oneHotVector(domain, match_num)
+        else:
+            vector = [0, 0, 0, 0]
+        return vector
+
+    def addDBIndicator(self, domain, match_num, return_num=False):
+        """Create database indicator for all related domains."""
+        # if turn_domains is None:
+        #     turn_domains = db_domains
+        if domain in db_domains:
+            vector = self.oneHotVector(domain, match_num)
+        else:
+            vector = [0, 0, 0, 0]
+
+        # '[db_nores]', '[db_0]', '[db_1]', '[db_2]', '[db_3]'
+        if vector == [0, 0, 0, 0]:
+            indicator = '[db_nores]'
+        else:
+            indicator = '[db_%s]' % vector.index(1)
+        return indicator
+
+    def get_match_num(self, constraints, return_entry=False):
+        """Create database pointer for all related domains."""
+        match = {'general': ''}
+        entry = {}
+        # if turn_domains is None:
+        #     turn_domains = db_domains
+        for domain in all_domains:
+            match[domain] = ''
+            if domain in db_domains and constraints.get(domain):
+                matched_ents = self.queryJsons(domain, constraints[domain])
+                match[domain] = len(matched_ents)
+                if return_entry:
+                    entry[domain] = matched_ents
+        if return_entry:
+            return entry
+        return match
+
+    def pointerBack(self, vector, domain):
+        # multi domain implementation
+        # domnum = cfg.domain_num
+        if domain.endswith(']'):
+            domain = domain[1:-1]
+        if domain != 'train':
+            nummap = {0: '0', 1: '1', 2: '2-3', 3: '>3'}
+        else:
+            nummap = {0: '0', 1: '1-5', 2: '6-10', 3: '>10'}
+        if vector[:4] == [0, 0, 0, 0]:
+            report = ''
+        else:
+            num = vector.index(1)
+            report = domain + ': ' + nummap[num] + '; '
+
+        if vector[-2] == 0 and vector[-1] == 1:
+            report += 'booking: ok'
+        if vector[-2] == 1 and vector[-1] == 0:
+            report += 'booking: unable'
+
+        return report
+
+    def queryJsons(self,
+                   domain,
+                   constraints,
+                   exactly_match=True,
+                   return_name=False):
+        """Returns the list of entities for a given domain
+        based on the annotation of the belief state
+        constraints: dict e.g. {'pricerange': 'cheap', 'area': 'west'}
+        """
+        # query the db
+        if domain == 'taxi':
+            return [{
+                'taxi_colors':
+                random.choice(self.dbs[domain]['taxi_colors']),
+                'taxi_types':
+                random.choice(self.dbs[domain]['taxi_types']),
+                'taxi_phone': [random.randint(1, 9) for _ in range(10)]
+            }]
+        if domain == 'police':
+            return self.dbs['police']
+        if domain == 'hospital':
+            if constraints.get('department'):
+                for entry in self.dbs['hospital']:
+                    if entry.get('department') == constraints.get(
+                            'department'):
+                        return [entry]
+            else:
+                return []
+
+        valid_cons = False
+        for v in constraints.values():
+            if v not in ['not mentioned', '']:
+                valid_cons = True
+        if not valid_cons:
+            return []
+
+        match_result = []
+
+        if 'name' in constraints:
+            for db_ent in self.dbs[domain]:
+                if 'name' in db_ent:
+                    cons = constraints['name']
+                    dbn = db_ent['name']
+                    if cons == dbn:
+                        db_ent = db_ent if not return_name else db_ent['name']
+                        match_result.append(db_ent)
+                        return match_result
+
+        for db_ent in self.dbs[domain]:
+            match = True
+            for s, v in constraints.items():
+                if s == 'name':
+                    continue
+                if s in ['people', 'stay'] or (domain == 'hotel' and s == 'day') or \
+                        (domain == 'restaurant' and s in ['day', 'time']):
+                    # These inform slots belong to "book info"，which do not exist in DB
+                    # "book" is according to the user goal，not DB
+                    continue
+
+                skip_case = {
+                    "don't care": 1,
+                    "do n't care": 1,
+                    'dont care': 1,
+                    'not mentioned': 1,
+                    'dontcare': 1,
+                    '': 1
+                }
+                if skip_case.get(v):
+                    continue
+
+                if s not in db_ent:
+                    # logging.warning('Searching warning: slot %s not in %s db'%(s, domain))
+                    match = False
+                    break
+
+                # v = 'guesthouse' if v == 'guest house' else v
+                # v = 'swimmingpool' if v == 'swimming pool' else v
+                v = 'yes' if v == 'free' else v
+
+                if s in ['arrive', 'leave']:
+                    try:
+                        h, m = v.split(
+                            ':'
+                        )  # raise error if time value is not xx:xx format
+                        v = int(h) * 60 + int(m)
+                    except Exception:
+                        match = False
+                        break
+                    time = int(db_ent[s].split(':')[0]) * 60 + int(
+                        db_ent[s].split(':')[1])
+                    if s == 'arrive' and v > time:
+                        match = False
+                    if s == 'leave' and v < time:
+                        match = False
+                else:
+                    if exactly_match and v != db_ent[s]:
+                        match = False
+                        break
+                    elif v not in db_ent[s]:
+                        match = False
+                        break
+
+            if match:
+                match_result.append(db_ent)
+
+        if not return_name:
+            return match_result
+        else:
+            if domain == 'train':
+                match_result = [e['id'] for e in match_result]
+            else:
+                match_result = [e['name'] for e in match_result]
+            return match_result
+
+    def querySQL(self, domain, constraints):
+        if not self.sql_dbs:
+            for dom in db_domains:
+                db = 'db/{}-dbase.db'.format(dom)
+                conn = sqlite3.connect(db)
+                c = conn.cursor()
+                self.sql_dbs[dom] = c
+
+        sql_query = 'select * from {}'.format(domain)
+
+        flag = True
+        for key, val in constraints.items():
+            if val == '' \
+                    or val == 'dontcare' \
+                    or val == 'not mentioned' \
+                    or val == "don't care" \
+                    or val == 'dont care' \
+                    or val == "do n't care":
+                pass
+            else:
+                if flag:
+                    sql_query += ' where '
+                    val2 = val.replace("'", "''")
+                    # val2 = normalize(val2)
+                    if key == 'leaveAt':
+                        sql_query += r' ' + key + ' > ' + r"'" + val2 + r"'"
+                    elif key == 'arriveBy':
+                        sql_query += r' ' + key + ' < ' + r"'" + val2 + r"'"
+                    else:
+                        sql_query += r' ' + key + '=' + r"'" + val2 + r"'"
+                    flag = False
+                else:
+                    val2 = val.replace("'", "''")
+                    # val2 = normalize(val2)
+                    if key == 'leaveAt':
+                        sql_query += r' and ' + key + ' > ' + r"'" + val2 + r"'"
+                    elif key == 'arriveBy':
+                        sql_query += r' and ' + key + ' < ' + r"'" + val2 + r"'"
+                    else:
+                        sql_query += r' and ' + key + '=' + r"'" + val2 + r"'"
+
+        try:  # "select * from attraction  where name = 'queens college'"
+            print(sql_query)
+            return self.sql_dbs[domain].execute(sql_query).fetchall()
+        except Exception:
+            return []  # TODO test it
+
+
+if __name__ == '__main__':
+    dbPATHs = {
+        'attraction': 'db/attraction_db_processed.json',
+        'hospital': 'db/hospital_db_processed.json',
+        'hotel': 'db/hotel_db_processed.json',
+        'police': 'db/police_db_processed.json',
+        'restaurant': 'db/restaurant_db_processed.json',
+        'taxi': 'db/taxi_db_processed.json',
+        'train': 'db/train_db_processed.json',
+    }
+    db = MultiWozDB(dbPATHs)
+    while True:
+        constraints = {}
+        inp = input(
+            'input belief state in fomat: domain-slot1=value1;slot2=value2...\n'
+        )
+        domain, cons = inp.split('-')
+        for sv in cons.split(';'):
+            s, v = sv.split('=')
+            constraints[s] = v
+        # res = db.querySQL(domain, constraints)
+        res = db.queryJsons(domain, constraints, return_name=True)
+        report = []
+        reidx = {
+            'hotel': 8,
+            'restaurant': 6,
+            'attraction': 5,
+            'train': 1,
+        }
+        print(constraints)
+        print(res)
+        print('count:', len(res), '\nnames:', report)
diff --git a/modelscope/utils/nlp/space/ontology.py b/modelscope/utils/nlp/space/ontology.py
new file mode 100644
index 0000000..c55d12e
--- /dev/null
+++ b/modelscope/utils/nlp/space/ontology.py
@@ -0,0 +1,206 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+all_domains = [
+    'restaurant', 'hotel', 'attraction', 'train', 'taxi', 'police', 'hospital'
+]
+all_domains_with_bracket = ['[{}]'.format(item) for item in all_domains]
+db_domains = ['restaurant', 'hotel', 'attraction', 'train']
+placeholder_tokens = [
+    '<go_r>', '<go_b>', '<go_a>', '<go_d>', '<eos_u>', '<eos_r>', '<eos_b>',
+    '<eos_a>', '<eos_d>', '<eos_q>', '<sos_u>', '<sos_r>', '<sos_b>',
+    '<sos_a>', '<sos_d>', '<sos_q>'
+]
+
+normlize_slot_names = {
+    'car type': 'car',
+    'entrance fee': 'price',
+    'duration': 'time',
+    'leaveat': 'leave',
+    'arriveby': 'arrive',
+    'trainid': 'id'
+}
+
+requestable_slots = {
+    'taxi': ['car', 'phone'],
+    'police': ['postcode', 'address', 'phone'],
+    'hospital': ['address', 'phone', 'postcode'],
+    'hotel': [
+        'address', 'postcode', 'internet', 'phone', 'parking', 'type',
+        'pricerange', 'stars', 'area', 'reference'
+    ],
+    'attraction':
+    ['price', 'type', 'address', 'postcode', 'phone', 'area', 'reference'],
+    'train': ['time', 'leave', 'price', 'arrive', 'id', 'reference'],
+    'restaurant': [
+        'phone', 'postcode', 'address', 'pricerange', 'food', 'area',
+        'reference'
+    ]
+}
+all_reqslot = [
+    'car', 'address', 'postcode', 'phone', 'internet', 'parking', 'type',
+    'pricerange', 'food', 'stars', 'area', 'reference', 'time', 'leave',
+    'price', 'arrive', 'id'
+]
+
+informable_slots = {
+    'taxi': ['leave', 'destination', 'departure', 'arrive'],
+    'police': [],
+    'hospital': ['department'],
+    'hotel': [
+        'type', 'parking', 'pricerange', 'internet', 'stay', 'day', 'people',
+        'area', 'stars', 'name'
+    ],
+    'attraction': ['area', 'type', 'name'],
+    'train': ['destination', 'day', 'arrive', 'departure', 'people', 'leave'],
+    'restaurant':
+    ['food', 'pricerange', 'area', 'name', 'time', 'day', 'people']
+}
+all_infslot = [
+    'type', 'parking', 'pricerange', 'internet', 'stay', 'day', 'people',
+    'area', 'stars', 'name', 'leave', 'destination', 'departure', 'arrive',
+    'department', 'food', 'time'
+]
+
+all_slots = all_reqslot + [
+    'stay', 'day', 'people', 'name', 'destination', 'departure', 'department'
+]
+get_slot = {}
+for s in all_slots:
+    get_slot[s] = 1
+
+# mapping slots in dialogue act to original goal slot names
+da_abbr_to_slot_name = {
+    'addr': 'address',
+    'fee': 'price',
+    'post': 'postcode',
+    'ref': 'reference',
+    'ticket': 'price',
+    'depart': 'departure',
+    'dest': 'destination',
+}
+
+dialog_acts = {
+    'restaurant': [
+        'inform', 'request', 'nooffer', 'recommend', 'select', 'offerbook',
+        'offerbooked', 'nobook'
+    ],
+    'hotel': [
+        'inform', 'request', 'nooffer', 'recommend', 'select', 'offerbook',
+        'offerbooked', 'nobook'
+    ],
+    'attraction': ['inform', 'request', 'nooffer', 'recommend', 'select'],
+    'train':
+    ['inform', 'request', 'nooffer', 'offerbook', 'offerbooked', 'select'],
+    'taxi': ['inform', 'request'],
+    'police': ['inform', 'request'],
+    'hospital': ['inform', 'request'],
+    # 'booking': ['book', 'inform', 'nobook', 'request'],
+    'general': ['bye', 'greet', 'reqmore', 'welcome'],
+}
+all_acts = []
+for acts in dialog_acts.values():
+    for act in acts:
+        if act not in all_acts:
+            all_acts.append(act)
+
+dialog_act_params = {
+    'inform': all_slots + ['choice', 'open'],
+    'request': all_infslot + ['choice', 'price'],
+    'nooffer': all_slots + ['choice'],
+    'recommend': all_reqslot + ['choice', 'open'],
+    'select': all_slots + ['choice'],
+    # 'book': ['time', 'people', 'stay', 'reference', 'day', 'name', 'choice'],
+    'nobook': ['time', 'people', 'stay', 'reference', 'day', 'name', 'choice'],
+    'offerbook': all_slots + ['choice'],
+    'offerbooked': all_slots + ['choice'],
+    'reqmore': [],
+    'welcome': [],
+    'bye': [],
+    'greet': [],
+}
+
+dialog_act_all_slots = all_slots + ['choice', 'open']
+
+# special slot tokens in belief span
+# no need of this, just covert slot to [slot] e.g. pricerange -> [pricerange]
+slot_name_to_slot_token = {}
+
+# eos tokens definition
+eos_tokens = {
+    'user': '<eos_u>',
+    'user_delex': '<eos_u>',
+    'resp': '<eos_r>',
+    'resp_gen': '<eos_r>',
+    'pv_resp': '<eos_r>',
+    'bspn': '<eos_b>',
+    'bspn_gen': '<eos_b>',
+    'pv_bspn': '<eos_b>',
+    'bsdx': '<eos_b>',
+    'bsdx_gen': '<eos_b>',
+    'pv_bsdx': '<eos_b>',
+    'qspn': '<eos_q>',
+    'qspn_gen': '<eos_q>',
+    'pv_qspn': '<eos_q>',
+    'aspn': '<eos_a>',
+    'aspn_gen': '<eos_a>',
+    'pv_aspn': '<eos_a>',
+    'dspn': '<eos_d>',
+    'dspn_gen': '<eos_d>',
+    'pv_dspn': '<eos_d>'
+}
+
+# sos tokens definition
+sos_tokens = {
+    'user': '<sos_u>',
+    'user_delex': '<sos_u>',
+    'resp': '<sos_r>',
+    'resp_gen': '<sos_r>',
+    'pv_resp': '<sos_r>',
+    'bspn': '<sos_b>',
+    'bspn_gen': '<sos_b>',
+    'pv_bspn': '<sos_b>',
+    'bsdx': '<sos_b>',
+    'bsdx_gen': '<sos_b>',
+    'pv_bsdx': '<sos_b>',
+    'qspn': '<sos_q>',
+    'qspn_gen': '<sos_q>',
+    'pv_qspn': '<sos_q>',
+    'aspn': '<sos_a>',
+    'aspn_gen': '<sos_a>',
+    'pv_aspn': '<sos_a>',
+    'dspn': '<sos_d>',
+    'dspn_gen': '<sos_d>',
+    'pv_dspn': '<sos_d>'
+}
+
+# db tokens definition
+db_tokens = [
+    '<sos_db>', '<eos_db>', '[book_nores]', '[book_fail]', '[book_success]',
+    '[db_nores]', '[db_0]', '[db_1]', '[db_2]', '[db_3]'
+]
+
+
+# understand tokens definition
+def get_understand_tokens(prompt_num_for_understand):
+    understand_tokens = []
+    for i in range(prompt_num_for_understand):
+        understand_tokens.append(f'<understand_{i}>')
+    return understand_tokens
+
+
+# policy tokens definition
+def get_policy_tokens(prompt_num_for_policy):
+    policy_tokens = []
+    for i in range(prompt_num_for_policy):
+        policy_tokens.append(f'<policy_{i}>')
+    return policy_tokens
+
+
+# all special tokens definition
+def get_special_tokens(other_tokens):
+    special_tokens = [
+        '<go_r>', '<go_b>', '<go_a>', '<go_d>', '<eos_u>', '<eos_r>',
+        '<eos_b>', '<eos_a>', '<eos_d>', '<eos_q>', '<sos_u>', '<sos_r>',
+        '<sos_b>', '<sos_a>', '<sos_d>', '<sos_q>'
+    ] + db_tokens + other_tokens
+    return special_tokens
diff --git a/modelscope/utils/nlp/space/scores.py b/modelscope/utils/nlp/space/scores.py
new file mode 100644
index 0000000..eb6dd41
--- /dev/null
+++ b/modelscope/utils/nlp/space/scores.py
@@ -0,0 +1,9 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+
+def hierarchical_set_score(frame1, frame2):
+    # deal with empty frame
+    if not (frame1 and frame2):
+        return 0.
+    pass
+    return 0.
diff --git a/modelscope/utils/nlp/space/utils.py b/modelscope/utils/nlp/space/utils.py
new file mode 100644
index 0000000..56192d8
--- /dev/null
+++ b/modelscope/utils/nlp/space/utils.py
@@ -0,0 +1,193 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import json
+import logging
+from collections import OrderedDict
+
+import numpy as np
+
+from modelscope.utils.logger import get_logger
+
+from . import ontology
+
+logger = get_logger()
+
+
+def max_lens(X):
+    lens = [len(X)]
+    while isinstance(X[0], list):
+        lens.append(max(map(len, X)))
+        X = [x for xs in X for x in xs]
+    return lens
+
+
+def list2np(X: object, padding: object = 0, dtype: object = 'int64') -> object:
+    shape = max_lens(X)
+    ret = np.full(shape, padding, dtype=np.int32)
+
+    if len(shape) == 1:
+        ret = np.array(X)
+    elif len(shape) == 2:
+        for i, x in enumerate(X):
+            ret[i, :len(x)] = np.array(x)
+    elif len(shape) == 3:
+        for i, xs in enumerate(X):
+            for j, x in enumerate(xs):
+                ret[i, j, :len(x)] = np.array(x)
+    return ret.astype(dtype)
+
+
+def clean_replace(s, r, t, forward=True, backward=False):
+    def clean_replace_single(s, r, t, forward, backward, sidx=0):
+        # idx = s[sidx:].find(r)
+        idx = s.find(r)
+        if idx == -1:
+            return s, -1
+        idx_r = idx + len(r)
+        if backward:
+            while idx > 0 and s[idx - 1]:
+                idx -= 1
+        elif idx > 0 and s[idx - 1] != ' ':
+            return s, -1
+
+        if forward:
+            while \
+                    idx_r < len(s) and (s[idx_r].isalpha() or s[idx_r].isdigit()):
+                idx_r += 1
+        elif idx_r != len(s) and (s[idx_r].isalpha() or s[idx_r].isdigit()):
+            return s, -1
+        return s[:idx] + t + s[idx_r:], idx_r
+
+    sidx = 0
+    while sidx != -1:
+        s, sidx = clean_replace_single(s, r, t, forward, backward, sidx)
+    return s
+
+
+def py2np(list):
+    return np.array(list)
+
+
+def write_dict(fn, dic):
+    with open(fn, 'w') as f:
+        json.dump(dic, f, indent=2)
+
+
+def f1_score(label_list, pred_list):
+    tp = len([t for t in pred_list if t in label_list])
+    fp = max(0, len(pred_list) - tp)
+    fn = max(0, len(label_list) - tp)
+    precision = tp / (tp + fp + 1e-10)
+    recall = tp / (tp + fn + 1e-10)
+    f1 = 2 * precision * recall / (precision + recall + 1e-10)
+    return f1
+
+
+class MultiWOZVocab(object):
+    def __init__(self, vocab_size=0):
+        """
+        vocab for multiwoz dataset
+        """
+        self.vocab_size = vocab_size
+        self.vocab_size_oov = 0  # get after construction
+        self._idx2word = {}  # word + oov
+        self._word2idx = {}  # word
+        self._freq_dict = {}  # word + oov
+        for w in [
+                '[PAD]', '<go_r>', '[UNK]', '<go_b>', '<go_a>', '<eos_u>',
+                '<eos_r>', '<eos_b>', '<eos_a>', '<go_d>', '<eos_d>'
+        ]:
+            self._absolute_add_word(w)
+
+    def _absolute_add_word(self, w):
+        idx = len(self._idx2word)
+        self._idx2word[idx] = w
+        self._word2idx[w] = idx
+
+    def add_word(self, word):
+        if word not in self._freq_dict:
+            self._freq_dict[word] = 0
+        self._freq_dict[word] += 1
+
+    def has_word(self, word):
+        return self._freq_dict.get(word)
+
+    def _add_to_vocab(self, word):
+        if word not in self._word2idx:
+            idx = len(self._idx2word)
+            self._idx2word[idx] = word
+            self._word2idx[word] = idx
+
+    def construct(self):
+        freq_dict_sorted = sorted(self._freq_dict.keys(),
+                                  key=lambda x: -self._freq_dict[x])
+        logger.info('Vocabulary size including oov: %d' %
+                    (len(freq_dict_sorted) + len(self._idx2word)))
+        if len(freq_dict_sorted) + len(self._idx2word) < self.vocab_size:
+            logging.warning(
+                'actual label set smaller than that configured: {}/{}'.format(
+                    len(freq_dict_sorted) + len(self._idx2word),
+                    self.vocab_size))
+        for word in ontology.all_domains + ['general']:
+            word = '[' + word + ']'
+            self._add_to_vocab(word)
+        for word in ontology.all_acts:
+            word = '[' + word + ']'
+            self._add_to_vocab(word)
+        for word in ontology.all_slots:
+            self._add_to_vocab(word)
+        for word in freq_dict_sorted:
+            if word.startswith('[value_') and word.endswith(']'):
+                self._add_to_vocab(word)
+        for word in freq_dict_sorted:
+            self._add_to_vocab(word)
+        self.vocab_size_oov = len(self._idx2word)
+
+    def load_vocab(self, vocab_path):
+        self._freq_dict = json.loads(
+            open(vocab_path + '.freq.json', 'r', encoding='utf-8').read())
+        self._word2idx = json.loads(
+            open(vocab_path + '.word2idx.json', 'r', encoding='utf-8').read())
+        self._idx2word = {}
+        for w, idx in self._word2idx.items():
+            self._idx2word[idx] = w
+        self.vocab_size_oov = len(self._idx2word)
+        logger.info('vocab file loaded from "' + vocab_path + '"')
+        logger.info('Vocabulary size including oov: %d' %
+                    (self.vocab_size_oov))
+
+    def save_vocab(self, vocab_path):
+        _freq_dict = OrderedDict(
+            sorted(self._freq_dict.items(), key=lambda kv: kv[1],
+                   reverse=True))
+        write_dict(vocab_path + '.word2idx.json', self._word2idx)
+        write_dict(vocab_path + '.freq.json', _freq_dict)
+
+    def encode(self, word, include_oov=True):
+        if include_oov:
+            if self._word2idx.get(word, None) is None:
+                raise ValueError(
+                    'Unknown word: %s. Vocabulary should include oovs here.' %
+                    word)
+            return self._word2idx[word]
+        else:
+            word = '<unk>' if word not in self._word2idx else word
+            return self._word2idx[word]
+
+    def sentence_encode(self, word_list):
+        return [self.encode(_) for _ in word_list]
+
+    def oov_idx_map(self, idx):
+        return 2 if idx > self.vocab_size else idx
+
+    def sentence_oov_map(self, index_list):
+        return [self.oov_idx_map(_) for _ in index_list]
+
+    def decode(self, idx, indicate_oov=False):
+        if not self._idx2word.get(idx):
+            raise ValueError(
+                'Error idx: %d. Vocabulary should include oovs here.' % idx)
+        if not indicate_oov or idx < self.vocab_size:
+            return self._idx2word[idx]
+        else:
+            return self._idx2word[idx] + '(o)'
diff --git a/modelscope/utils/nlp/space/utils_dst.py b/modelscope/utils/nlp/space/utils_dst.py
new file mode 100644
index 0000000..6277172
--- /dev/null
+++ b/modelscope/utils/nlp/space/utils_dst.py
@@ -0,0 +1,36 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import List
+
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.nlp import DialogStateTrackingPipeline
+
+
+def tracking_and_print_dialog_states(
+        test_case, pipelines: List[DialogStateTrackingPipeline]):
+    import json
+    pipelines_len = len(pipelines)
+    history_states = [{}]
+    utter = {}
+    for step, item in enumerate(test_case):
+        utter.update(item)
+        result = pipelines[step % pipelines_len]({
+            'utter':
+            utter,
+            'history_states':
+            history_states
+        })
+        print(json.dumps(result))
+
+        history_states.extend([result[OutputKeys.OUTPUT], {}])
+
+
+def batch_to_device(batch, device):
+    batch_on_device = []
+    for element in batch:
+        if isinstance(element, dict):
+            batch_on_device.append(
+                {k: v.to(device)
+                 for k, v in element.items()})
+        else:
+            batch_on_device.append(element.to(device))
+    return tuple(batch_on_device)
diff --git a/modelscope/utils/nlp/space_T_en/__init__.py b/modelscope/utils/nlp/space_T_en/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modelscope/utils/nlp/space_T_en/utils.py b/modelscope/utils/nlp/space_T_en/utils.py
new file mode 100644
index 0000000..d884c24
--- /dev/null
+++ b/modelscope/utils/nlp/space_T_en/utils.py
@@ -0,0 +1,25 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from typing import List
+
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.nlp import ConversationalTextToSqlPipeline
+
+
+def text2sql_tracking_and_print_results(
+        test_case, pipelines: List[ConversationalTextToSqlPipeline]):
+    for p in pipelines:
+        last_sql, history = '', []
+        for item in test_case['utterance']:
+            case = {
+                'utterance': item,
+                'history': history,
+                'last_sql': last_sql,
+                'database_id': test_case['database_id'],
+                'local_db_path': test_case['local_db_path']
+            }
+            results = p(case)
+            print({'question': item})
+            print(results)
+            last_sql = results[OutputKeys.OUTPUT][OutputKeys.TEXT]
+            history.append(item)
diff --git a/modelscope/utils/nlp/utils.py b/modelscope/utils/nlp/utils.py
new file mode 100644
index 0000000..3295b5d
--- /dev/null
+++ b/modelscope/utils/nlp/utils.py
@@ -0,0 +1,68 @@
+import os.path as osp
+
+from modelscope.utils.hub import parse_label_mapping
+
+
+def import_external_nltk_data(nltk_data_dir, package_name):
+    """import external nltk_data, and extract nltk zip package.
+
+    Args:
+        nltk_data_dir (str): external nltk_data dir path, eg. /home/xx/nltk_data
+        package_name (str): nltk package name, eg. tokenizers/punkt
+    """
+    import nltk
+    nltk.data.path.append(nltk_data_dir)
+
+    filepath = osp.join(nltk_data_dir, package_name + '.zip')
+    zippath = osp.join(nltk_data_dir, package_name)
+    packagepath = osp.dirname(zippath)
+    if not osp.exists(zippath):
+        import zipfile
+        with zipfile.ZipFile(filepath) as zf:
+            zf.extractall(osp.join(packagepath))
+
+
+def parse_labels_in_order(model_dir=None, cfg=None, **kwargs):
+    """Parse labels information in order.
+
+    This is a helper function, used to get labels information in the correct order.
+    1. The kw arguments listed in the method will in the first priority.
+    2. Information in the cfg.dataset.train.labels will be used in the second priority (Compatible with old logic).
+    3. Information in other files will be used then.
+
+    Args:
+        model_dir: The model_dir used to call `parse_label_mapping`.
+        cfg: An optional cfg parsed and modified from the configuration.json.
+        **kwargs: The user inputs into the method.
+
+    Returns:
+        The modified kwargs.
+    """
+    label2id = kwargs.pop('label2id', None)
+    id2label = kwargs.pop('id2label', None)
+    num_labels = kwargs.pop('num_labels', None)
+    if label2id is None and id2label is not None:
+        label2id = {label: id for id, label in id2label.items()}
+    if label2id is None:
+        if cfg is not None and cfg.safe_get(
+                'dataset.train.labels') is not None:
+            # An extra logic to parse labels from the dataset area.
+            label2id = {
+                label: idx
+                for idx, label in enumerate(
+                    cfg.safe_get('dataset.train.labels'))
+            }
+        elif model_dir is not None:
+            label2id = parse_label_mapping(model_dir)
+
+    if num_labels is None and label2id is not None:
+        num_labels = len(label2id)
+    if id2label is None and label2id is not None:
+        id2label = {id: label for label, id in label2id.items()}
+    if num_labels is not None:
+        kwargs['num_labels'] = num_labels
+    if label2id is not None:
+        kwargs['label2id'] = label2id
+    if id2label is not None:
+        kwargs['id2label'] = id2label
+    return kwargs
diff --git a/modelscope/utils/plugins.py b/modelscope/utils/plugins.py
new file mode 100644
index 0000000..ad6d9fc
--- /dev/null
+++ b/modelscope/utils/plugins.py
@@ -0,0 +1,214 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+# This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp
+import importlib
+import os
+import pkgutil
+import sys
+from contextlib import contextmanager
+from fnmatch import fnmatch
+from pathlib import Path
+from typing import Iterable, List, Optional, Set
+
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+LOCAL_PLUGINS_FILENAME = '.modelscope_plugins'
+GLOBAL_PLUGINS_FILENAME = os.path.join(Path.home(), '.modelscope', 'plugins')
+DEFAULT_PLUGINS = []
+
+
+@contextmanager
+def pushd(new_dir: str, verbose: bool = False):
+    """
+    Changes the current directory to the given path and prepends it to `sys.path`.
+    This method is intended to use with `with`, so after its usage, the current
+    directory will be set to the previous value.
+    """
+    previous_dir = os.getcwd()
+    if verbose:
+        logger.info(f'Changing directory to {new_dir}')  # type: ignore
+    os.chdir(new_dir)
+    try:
+        yield
+    finally:
+        if verbose:
+            logger.info(f'Changing directory back to {previous_dir}')
+        os.chdir(previous_dir)
+
+
+@contextmanager
+def push_python_path(path: str):
+    """
+    Prepends the given path to `sys.path`.
+    This method is intended to use with `with`, so after its usage, its value
+    will be removed from `sys.path`.
+    """
+    path = Path(path).resolve()
+    path = str(path)
+    sys.path.insert(0, path)
+    try:
+        yield
+    finally:
+        sys.path.remove(path)
+
+
+def discover_file_plugins(
+        filename: str = LOCAL_PLUGINS_FILENAME) -> Iterable[str]:
+    """
+    Discover plugins from file
+    """
+    with open(filename) as f:
+        for module_name in f:
+            module_name = module_name.strip()
+            if module_name:
+                yield module_name
+
+
+def discover_plugins() -> Iterable[str]:
+    """
+    Discover plugins
+    """
+    plugins: Set[str] = set()
+    if os.path.isfile(LOCAL_PLUGINS_FILENAME):
+        with push_python_path('.'):
+            for plugin in discover_file_plugins(LOCAL_PLUGINS_FILENAME):
+                if plugin in plugins:
+                    continue
+                yield plugin
+                plugins.add(plugin)
+    if os.path.isfile(GLOBAL_PLUGINS_FILENAME):
+        for plugin in discover_file_plugins(GLOBAL_PLUGINS_FILENAME):
+            if plugin in plugins:
+                continue
+            yield plugin
+            plugins.add(plugin)
+
+
+def import_all_plugins(plugins: List[str] = None) -> List[str]:
+    """
+    Imports default plugins, input plugins and file discovered plugins.
+    """
+    import_module_and_submodules('modelscope',
+                                 include={
+                                     'modelscope.metrics.builder',
+                                     'modelscope.models.builder',
+                                     'modelscope.pipelines.builder',
+                                     'modelscope.preprocessors.builder',
+                                     'modelscope.trainers.builder',
+                                 },
+                                 exclude={
+                                     'modelscope.metrics.*',
+                                     'modelscope.models.*',
+                                     'modelscope.pipelines.*',
+                                     'modelscope.preprocessors.*',
+                                     'modelscope.trainers.*',
+                                     'modelscope.msdatasets',
+                                     'modelscope.utils',
+                                     'modelscope.exporters',
+                                 })
+
+    imported_plugins: List[str] = []
+
+    imported_plugins.extend(import_plugins(DEFAULT_PLUGINS))
+    imported_plugins.extend(import_plugins(plugins))
+    imported_plugins.extend(import_file_plugins())
+
+    return imported_plugins
+
+
+def import_plugins(plugins: List[str] = None) -> List[str]:
+    """
+    Imports the plugins listed in the arguments.
+    """
+    imported_plugins: List[str] = []
+    if plugins is None or len(plugins) == 0:
+        return imported_plugins
+
+    # Workaround for a presumed Python issue where spawned processes can't find modules in the current directory.
+    cwd = os.getcwd()
+    if cwd not in sys.path:
+        sys.path.append(cwd)
+
+    for module_name in plugins:
+        try:
+            import_module_and_submodules(module_name)
+            logger.info('Plugin %s available', module_name)
+            imported_plugins.append(module_name)
+        except ModuleNotFoundError as e:
+            logger.error(f'Plugin {module_name} could not be loaded: {e}')
+
+    return imported_plugins
+
+
+def import_file_plugins() -> List[str]:
+    """
+    Imports the plugins found with `discover_plugins()`.
+    """
+    imported_plugins: List[str] = []
+
+    # Workaround for a presumed Python issue where spawned processes can't find modules in the current directory.
+    cwd = os.getcwd()
+    if cwd not in sys.path:
+        sys.path.append(cwd)
+
+    for module_name in discover_plugins():
+        try:
+            importlib.import_module(module_name)
+            logger.info('Plugin %s available', module_name)
+            imported_plugins.append(module_name)
+        except ModuleNotFoundError as e:
+            logger.error(f'Plugin {module_name} could not be loaded: {e}')
+
+    return imported_plugins
+
+
+def import_module_and_submodules(package_name: str,
+                                 include: Optional[Set[str]] = None,
+                                 exclude: Optional[Set[str]] = None) -> None:
+    """
+    Import all public submodules under the given package.
+    """
+    # take care of None
+    include = include if include else set()
+    exclude = exclude if exclude else set()
+
+    def fn_in(packge_name: str, pattern_set: Set[str]) -> bool:
+        for pattern in pattern_set:
+            if fnmatch(package_name, pattern):
+                return True
+        return False
+
+    if not fn_in(package_name, include) and fn_in(package_name, exclude):
+        return
+
+    importlib.invalidate_caches()
+
+    # For some reason, python doesn't always add this by default to your path, but you pretty much
+    # always want it when using `--include-package`.  And if it's already there, adding it again at
+    # the end won't hurt anything.
+    with push_python_path('.'):
+        # Import at top level
+        try:
+            module = importlib.import_module(package_name)
+            path = getattr(module, '__path__', [])
+            path_string = '' if not path else path[0]
+
+            # walk_packages only finds immediate children, so need to recurse.
+            for module_finder, name, _ in pkgutil.walk_packages(path):
+                # Sometimes when you import third-party libraries that are on your path,
+                # `pkgutil.walk_packages` returns those too, so we need to skip them.
+                if path_string and module_finder.path != path_string:  # type: ignore[union-attr]
+                    continue
+                if name.startswith('_'):
+                    # skip directly importing private subpackages
+                    continue
+                if name.startswith('test'):
+                    # skip tests
+                    continue
+                subpackage = f'{package_name}.{name}'
+                import_module_and_submodules(subpackage, exclude=exclude)
+        except Exception as e:
+            logger.warning(f'{package_name} not imported: {str(e)}')
+            if len(package_name.split('.')) == 1:
+                raise ModuleNotFoundError('Package not installed')
diff --git a/modelscope/utils/registry.py b/modelscope/utils/registry.py
new file mode 100644
index 0000000..9c386ab
--- /dev/null
+++ b/modelscope/utils/registry.py
@@ -0,0 +1,212 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import importlib
+import inspect
+from typing import List, Tuple, Union
+
+from modelscope.utils.logger import get_logger
+
+TYPE_NAME = 'type'
+default_group = 'default'
+logger = get_logger()
+AST_INDEX = None
+
+
+class Registry(object):
+    """ Registry which support registering modules and group them by a keyname
+
+    If group name is not provided, modules will be registered to default group.
+    """
+    def __init__(self, name: str):
+        self._name = name
+        self._modules = {default_group: {}}
+
+    def __repr__(self):
+        format_str = self.__class__.__name__ + f' ({self._name})\n'
+        for group_name, group in self._modules.items():
+            format_str += f'group_name={group_name}, '\
+                f'modules={list(group.keys())}\n'
+
+        return format_str
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def modules(self):
+        return self._modules
+
+    def list(self):
+        """ logging the list of module in current registry
+        """
+        for group_name, group in self._modules.items():
+            logger.info(f'group_name={group_name}')
+            for m in group.keys():
+                logger.info(f'\t{m}')
+            logger.info('')
+
+    def get(self, module_key, group_key=default_group):
+        if group_key not in self._modules:
+            return None
+        else:
+            return self._modules[group_key].get(module_key, None)
+
+    def _register_module(self,
+                         group_key=default_group,
+                         module_name=None,
+                         module_cls=None,
+                         force=False):
+        assert isinstance(group_key,
+                          str), 'group_key is required and must be str'
+
+        if group_key not in self._modules:
+            self._modules[group_key] = dict()
+
+        # Some registered module_cls can be function type.
+        # if not inspect.isclass(module_cls):
+        #     raise TypeError(f'module is not a class type: {type(module_cls)}')
+
+        if module_name is None:
+            module_name = module_cls.__name__
+
+        if module_name in self._modules[group_key] and not force:
+            raise KeyError(f'{module_name} is already registered in '
+                           f'{self._name}[{group_key}]')
+        self._modules[group_key][module_name] = module_cls
+        module_cls.group_key = group_key
+
+    def register_module(self,
+                        group_key: str = default_group,
+                        module_name: str = None,
+                        module_cls: type = None,
+                        force=False):
+        """ Register module
+
+        Example:
+            >>> models = Registry('models')
+            >>> @models.register_module('image-classification', 'SwinT')
+            >>> class SwinTransformer:
+            >>>     pass
+
+            >>> @models.register_module('SwinDefault')
+            >>> class SwinTransformerDefaultGroup:
+            >>>     pass
+
+            >>> class SwinTransformer2:
+            >>>     pass
+            >>> MODELS.register_module('image-classification',
+                                        module_name='SwinT2',
+                                        module_cls=SwinTransformer2)
+
+        Args:
+            group_key: Group name of which module will be registered,
+                default group name is 'default'
+            module_name: Module name
+            module_cls: Module class object
+            force (bool, optional): Whether to override an existing class with
+                the same name. Default: False.
+
+        """
+        if not (module_name is None or isinstance(module_name, str)):
+            raise TypeError(f'module_name must be either of None, str,'
+                            f'got {type(module_name)}')
+        if module_cls is not None:
+            self._register_module(group_key=group_key,
+                                  module_name=module_name,
+                                  module_cls=module_cls,
+                                  force=force)
+            return module_cls
+
+        # if module_cls is None, should return a decorator function
+        def _register(module_cls):
+            self._register_module(group_key=group_key,
+                                  module_name=module_name,
+                                  module_cls=module_cls,
+                                  force=force)
+            return module_cls
+
+        return _register
+
+
+def build_from_cfg(cfg,
+                   registry: Registry,
+                   group_key: str = default_group,
+                   default_args: dict = None) -> object:
+    """Build a module from config dict when it is a class configuration, or
+    call a function from config dict when it is a function configuration.
+
+    Example:
+        >>> models = Registry('models')
+        >>> @models.register_module('image-classification', 'SwinT')
+        >>> class SwinTransformer:
+        >>>     pass
+        >>> swint = build_from_cfg(dict(type='SwinT'), MODELS,
+        >>>     'image-classification')
+        >>> # Returns an instantiated object
+        >>>
+        >>> @MODELS.register_module()
+        >>> def swin_transformer():
+        >>>     pass
+        >>>       = build_from_cfg(dict(type='swin_transformer'), MODELS)
+        >>> # Return a result of the calling function
+
+    Args:
+        cfg (dict): Config dict. It should at least contain the key "type".
+        registry (:obj:`Registry`): The registry to search the type from.
+        group_key (str, optional): The name of registry group from which
+            module should be searched.
+        default_args (dict, optional): Default initialization arguments.
+        type_name (str, optional): The name of the type in the config.
+    Returns:
+        object: The constructed object.
+    """
+    if not isinstance(cfg, dict):
+        raise TypeError(f'cfg must be a dict, but got {type(cfg)}')
+    if TYPE_NAME not in cfg:
+        if default_args is None or TYPE_NAME not in default_args:
+            raise KeyError(
+                f'`cfg` or `default_args` must contain the key "{TYPE_NAME}", '
+                f'but got {cfg}\n{default_args}')
+    if not isinstance(registry, Registry):
+        raise TypeError('registry must be an modelscope.Registry object, '
+                        f'but got {type(registry)}')
+    if not (isinstance(default_args, dict) or default_args is None):
+        raise TypeError('default_args must be a dict or None, '
+                        f'but got {type(default_args)}')
+
+    # dynamic load installation requirements for this module
+    from modelscope.utils.import_utils import LazyImportModule
+    sig = (registry.name.upper(), group_key, cfg['type'])
+    LazyImportModule.import_module(sig)
+
+    args = cfg.copy()
+    if default_args is not None:
+        for name, value in default_args.items():
+            args.setdefault(name, value)
+
+    if group_key is None:
+        group_key = default_group
+
+    obj_type = args.pop(TYPE_NAME)
+    if isinstance(obj_type, str):
+        obj_cls = registry.get(obj_type, group_key=group_key)
+        if obj_cls is None:
+            raise KeyError(
+                f'{obj_type} is not in the {registry.name}'
+                f' registry group {group_key}. Please make'
+                f' sure the correct version of ModelScope library is used.')
+        obj_cls.group_key = group_key
+    elif inspect.isclass(obj_type) or inspect.isfunction(obj_type):
+        obj_cls = obj_type
+    else:
+        raise TypeError(
+            f'type must be a str or valid type, but got {type(obj_type)}')
+    try:
+        if hasattr(obj_cls, '_instantiate'):
+            return obj_cls._instantiate(**args)
+        else:
+            return obj_cls(**args)
+    except Exception as e:
+        # Normal TypeError does not print class name.
+        raise type(e)(f'{obj_cls.__name__}: {e}')
diff --git a/modelscope/utils/regress_test_utils.py b/modelscope/utils/regress_test_utils.py
new file mode 100644
index 0000000..85be721
--- /dev/null
+++ b/modelscope/utils/regress_test_utils.py
@@ -0,0 +1,795 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import contextlib
+import hashlib
+import json
+import os
+import pickle
+import random
+import re
+import shutil
+import tempfile
+from collections import OrderedDict
+from collections.abc import Mapping
+from pathlib import Path
+from types import FunctionType
+from typing import Any, Dict, Union
+
+import numpy as np
+import torch
+import torch.optim
+from torch import nn
+
+from .test_utils import compare_arguments_nested
+
+
+class RegressTool:
+    """This class is used to stop inference/training results from changing by some unaware affections by unittests.
+
+    Firstly, run a baseline test to create a result file, then changes can be observed between
+    the latest version and the baseline file.
+    """
+    def __init__(self,
+                 baseline: bool = None,
+                 store_func: FunctionType = None,
+                 load_func: FunctionType = None):
+        """A func to store the baseline file and a func to load the baseline file.
+        """
+        self.baseline = baseline
+        self.store_func = store_func
+        self.load_func = load_func
+        print(f'Current working dir is: {Path.cwd()}')
+
+    def store(self, local, remote):
+        if self.store_func is not None:
+            self.store_func(local, remote)
+        else:
+            path = os.path.abspath(
+                os.path.join(Path.cwd(), 'data', 'test', 'regression'))
+            os.makedirs(path, exist_ok=True)
+            shutil.copy(local, os.path.join(path, remote))
+
+    def load(self, local, remote):
+        if self.load_func is not None:
+            self.load_func(local, remote)
+        else:
+            path = os.path.abspath(
+                os.path.join(Path.cwd(), 'data', 'test', 'regression'))
+            baseline = os.path.join(path, remote)
+            if not os.path.exists(baseline):
+                raise ValueError(f'base line file {baseline} not exist')
+            print(
+                f'local file found:{baseline}, md5:{hashlib.md5(open(baseline,"rb").read()).hexdigest()}'
+            )
+            if os.path.exists(local):
+                os.remove(local)
+            os.symlink(baseline, local, target_is_directory=False)
+
+    @contextlib.contextmanager
+    def monitor_module_single_forward(self,
+                                      module: nn.Module,
+                                      file_name: str,
+                                      compare_fn=None,
+                                      compare_model_output=True,
+                                      **kwargs):
+        """Monitor a pytorch module in a single forward.
+
+        Args:
+            module: A torch module
+            file_name: The file_name to store or load file
+            compare_fn: A custom fn used to compare the results manually.
+            compare_model_output: Only compare the input module's output, skip all other tensors
+
+        >>> def compare_fn(v1, v2, key, type):
+        >>>     return None
+
+        v1 is the baseline value
+        v2 is the value of current version
+        key is the key of submodules
+        type is in one of 'input', 'output'
+
+            kwargs:
+            atol: The absolute gap between two np arrays.
+            rtol: The relative gap between two np arrays.
+        """
+        baseline = os.getenv('REGRESSION_BASELINE')
+        if baseline is None or self.baseline is None:
+            yield
+            return
+
+        baseline = self.baseline
+        io_json = {}
+        absolute_path = f'./{file_name}.bin'
+        if not isinstance(module, nn.Module):
+            assert hasattr(module, 'model')
+            module = module.model
+
+        hack_forward(module, file_name, io_json)
+        intercept_module(module, io_json)
+        yield
+        hack_forward(module, None, None, restore=True)
+        intercept_module(module, None, restore=True)
+        if baseline:
+            with open(absolute_path, 'wb') as f:
+                pickle.dump(io_json, f)
+            self.store(absolute_path, f'{file_name}.bin')
+            os.remove(absolute_path)
+        else:
+            name = os.path.basename(absolute_path)
+            baseline = os.path.join(tempfile.gettempdir(), name)
+            self.load(baseline, name)
+            with open(baseline, 'rb') as f:
+                base = pickle.load(f)
+
+            class SafeNumpyEncoder(json.JSONEncoder):
+                def parse_default(self, obj):
+                    if isinstance(obj, np.ndarray):
+                        return obj.tolist()
+
+                    if isinstance(obj, np.floating):
+                        return float(obj)
+
+                    if isinstance(obj, np.integer):
+                        return int(obj)
+
+                    return json.JSONEncoder.default(self, obj)
+
+                def default(self, obj):
+                    try:
+                        return self.default(obj)
+                    except Exception:
+                        print(
+                            f'Type {obj.__class__} cannot be serialized and printed'
+                        )
+                        return None
+
+            if compare_model_output:
+                print(
+                    'Ignore inner modules, only the output of the model will be verified.'
+                )
+                base = {
+                    key: value
+                    for key, value in base.items() if key == file_name
+                }
+                for key, value in base.items():
+                    value['input'] = {'args': None, 'kwargs': None}
+                io_json = {
+                    key: value
+                    for key, value in io_json.items() if key == file_name
+                }
+                for key, value in io_json.items():
+                    value['input'] = {'args': None, 'kwargs': None}
+
+            print(f'baseline: {json.dumps(base, cls=SafeNumpyEncoder)}')
+            print(f'latest  : {json.dumps(io_json, cls=SafeNumpyEncoder)}')
+            if not compare_io_and_print(base, io_json, compare_fn, **kwargs):
+                raise ValueError('Result not match!')
+
+    @contextlib.contextmanager
+    def monitor_module_train(self,
+                             trainer: Union[Dict, Any],
+                             file_name,
+                             level='config',
+                             compare_fn=None,
+                             ignore_keys=None,
+                             compare_random=True,
+                             reset_dropout=True,
+                             lazy_stop_callback=None,
+                             **kwargs):
+        """Monitor a pytorch module's backward data and cfg data within a step of the optimizer.
+
+        This is usually useful when you try to change some dangerous code
+        which has the risk of affecting the training loop.
+
+        Args:
+            trainer: A dict or an object contains the model/optimizer/lr_scheduler
+            file_name: The file_name to store or load file
+            level: The regression level.
+            'strict' for matching every single tensor.
+                     Please make sure the parameters of head are fixed
+                     and the drop-out rate is zero.
+            'config' for matching the initial config, like cfg file, optimizer param_groups,
+                     lr_scheduler params and the random seed.
+            'metric' for compare the best metrics in the evaluation loop.
+            compare_fn: A custom fn used to compare the results manually.
+            ignore_keys: The keys to ignore of the named_parameters.
+            compare_random: If to compare random setttings, default True.
+            reset_dropout: Reset all dropout modules to 0.0.
+            lazy_stop_callback: A callback passed in, when the moniting is over, this callback will be called.
+            kwargs:
+            atol: The absolute gap between two np arrays.
+            rtol: The relative gap between two np arrays.
+
+        >>> def compare_fn(v1, v2, key, type):
+        >>>     return None
+
+        v1 is the baseline value
+        v2 is the value of current version
+        key is the key of modules/parameters
+        type is in one of 'input', 'output', 'backward', 'optimizer', 'lr_scheduler', 'cfg', 'state'
+        """
+        baseline = os.getenv('REGRESSION_BASELINE')
+        if baseline is None or self.baseline is None:
+            yield
+            return
+
+        baseline = self.baseline
+
+        io_json = {}
+        bw_json = {}
+        absolute_path = f'./{file_name}.bin'
+
+        if level == 'strict':
+            print(
+                "[Important] The level of regression is 'strict', please make sure your model's parameters are "
+                'fixed and all drop-out rates have been set to zero.')
+
+        assert hasattr(
+            trainer, 'model') or 'model' in trainer, 'model must be in trainer'
+        module = trainer['model'] if isinstance(trainer,
+                                                dict) else trainer.model
+        if not isinstance(module, nn.Module):
+            assert hasattr(module, 'model')
+            module = module.model
+
+        assert hasattr(
+            trainer, 'optimizer'
+        ) or 'optimizer' in trainer, 'optimizer must be in trainer'
+        assert hasattr(
+            trainer, 'lr_scheduler'
+        ) or 'lr_scheduler' in trainer, 'lr_scheduler must be in trainer'
+        optimizer: torch.optim.Optimizer = trainer['optimizer'] if isinstance(
+            trainer, dict) else trainer.optimizer
+        lr_scheduler: torch.optim.lr_scheduler._LRScheduler = trainer['lr_scheduler'] if isinstance(trainer, dict) \
+            else trainer.lr_scheduler
+        torch_state = numpify_tensor_nested(torch.get_rng_state())
+        np_state = np.random.get_state()
+        random_seed = random.getstate()
+        seed = trainer._seed if hasattr(
+            trainer,
+            '_seed') else trainer.seed if hasattr(trainer, 'seed') else None
+
+        if reset_dropout:
+            with torch.no_grad():
+
+                def reinit_dropout(_module):
+                    for name, submodule in _module.named_children():
+                        if isinstance(submodule, torch.nn.Dropout):
+                            setattr(_module, name, torch.nn.Dropout(0.))
+                        else:
+                            reinit_dropout(submodule)
+
+                reinit_dropout(module)
+
+        if level == 'strict':
+            hack_forward(module, file_name, io_json)
+            intercept_module(module, io_json)
+        hack_backward(module,
+                      optimizer,
+                      bw_json,
+                      lazy_stop_callback=lazy_stop_callback)
+        yield
+        hack_backward(module, optimizer, None, restore=True)
+        if level == 'strict':
+            hack_forward(module, None, None, restore=True)
+            intercept_module(module, None, restore=True)
+
+        optimizer_dict = optimizer.state_dict()
+        optimizer_dict.pop('state', None)
+        summary = {
+            'forward': io_json,
+            'backward': bw_json,
+            'optimizer': {
+                'type': optimizer.__class__.__name__,
+                'defaults': optimizer.defaults,
+                'state_dict': optimizer_dict
+            },
+            'lr_scheduler': {
+                'type': lr_scheduler.__class__.__name__,
+                'state_dict': lr_scheduler.state_dict()
+            },
+            'cfg': trainer.cfg.to_dict() if hasattr(trainer, 'cfg') else None,
+            'state': {
+                'torch_state': torch_state,
+                'np_state': np_state,
+                'random_seed': random_seed,
+                'seed': seed,
+            }
+        }
+
+        if baseline:
+            with open(absolute_path, 'wb') as f:
+                pickle.dump(summary, f)
+            self.store(absolute_path, f'{file_name}.bin')
+            os.remove(absolute_path)
+        else:
+            name = os.path.basename(absolute_path)
+            baseline = os.path.join(tempfile.gettempdir(), name)
+            self.load(baseline, name)
+            with open(baseline, 'rb') as f:
+                baseline_json = pickle.load(f)
+
+            if level == 'strict' and not compare_io_and_print(
+                    baseline_json['forward'], io_json, compare_fn, **kwargs):
+                raise RuntimeError('Forward not match!')
+            if not compare_backward_and_print(baseline_json['backward'],
+                                              bw_json,
+                                              compare_fn=compare_fn,
+                                              ignore_keys=ignore_keys,
+                                              level=level,
+                                              **kwargs):
+                raise RuntimeError('Backward not match!')
+            cfg_opt1 = {
+                'optimizer': baseline_json['optimizer'],
+                'lr_scheduler': baseline_json['lr_scheduler'],
+                'cfg': baseline_json['cfg'],
+                'state': None if not compare_random else baseline_json['state']
+            }
+            cfg_opt2 = {
+                'optimizer': summary['optimizer'],
+                'lr_scheduler': summary['lr_scheduler'],
+                'cfg': summary['cfg'],
+                'state': None if not compare_random else summary['state']
+            }
+            if not compare_cfg_and_optimizers(cfg_opt1, cfg_opt2, compare_fn,
+                                              **kwargs):
+                raise RuntimeError('Cfg or optimizers not match!')
+
+
+class MsRegressTool(RegressTool):
+    class EarlyStopError(Exception):
+        pass
+
+    @contextlib.contextmanager
+    def monitor_ms_train(self,
+                         trainer,
+                         file_name,
+                         level='config',
+                         compare_fn=None,
+                         ignore_keys=None,
+                         compare_random=True,
+                         lazy_stop_callback=None,
+                         **kwargs):
+
+        if lazy_stop_callback is None:
+
+            def lazy_stop_callback():
+                class EarlyStopHook:
+                    PRIORITY = 90
+
+                    def before_run(self, trainer):
+                        pass
+
+                    def after_run(self, trainer):
+                        pass
+
+                    def before_epoch(self, trainer):
+                        pass
+
+                    def after_epoch(self, trainer):
+                        pass
+
+                    def before_iter(self, trainer):
+                        pass
+
+                    def before_train_epoch(self, trainer):
+                        self.before_epoch(trainer)
+
+                    def before_val_epoch(self, trainer):
+                        self.before_epoch(trainer)
+
+                    def after_train_epoch(self, trainer):
+                        self.after_epoch(trainer)
+
+                    def after_val_epoch(self, trainer):
+                        self.after_epoch(trainer)
+
+                    def before_train_iter(self, trainer):
+                        self.before_iter(trainer)
+
+                    def before_val_iter(self, trainer):
+                        self.before_iter(trainer)
+
+                    def after_train_iter(self, trainer):
+                        self.after_iter(trainer)
+
+                    def after_val_iter(self, trainer):
+                        self.after_iter(trainer)
+
+                    def every_n_epochs(self, trainer, n):
+                        return (trainer.epoch + 1) % n == 0 if n > 0 else False
+
+                    def every_n_inner_iters(self, runner, n):
+                        return (runner.inner_iter +
+                                1) % n == 0 if n > 0 else False
+
+                    def every_n_iters(self, trainer, n):
+                        return (trainer.iter + 1) % n == 0 if n > 0 else False
+
+                    def end_of_epoch(self, trainer):
+                        return trainer.inner_iter + 1 == trainer.iters_per_epoch
+
+                    def is_last_epoch(self, trainer):
+                        return trainer.epoch + 1 == trainer.max_epochs
+
+                    def is_last_iter(self, trainer):
+                        return trainer.iter + 1 == trainer.max_iters
+
+                    def get_triggered_stages(self):
+                        return []
+
+                    def state_dict(self):
+                        return {}
+
+                    def load_state_dict(self, state_dict):
+                        pass
+
+                    def after_iter(self, trainer):
+                        raise MsRegressTool.EarlyStopError('Test finished.')
+
+                trainer.register_hook(EarlyStopHook())
+
+        def _train_loop(trainer, *args_train, **kwargs_train):
+            with self.monitor_module_train(
+                    trainer,
+                    file_name,
+                    level,
+                    compare_fn=compare_fn,
+                    ignore_keys=ignore_keys,
+                    compare_random=compare_random,
+                    lazy_stop_callback=lazy_stop_callback,
+                    **kwargs):
+                try:
+                    return trainer.train_loop_origin(*args_train,
+                                                     **kwargs_train)
+                except MsRegressTool.EarlyStopError:
+                    pass
+
+        trainer.train_loop_origin, trainer.train_loop = \
+            trainer.train_loop, type(trainer.train_loop)(_train_loop, trainer)
+        yield
+
+
+def compare_module(module1: nn.Module, module2: nn.Module):
+    for p1, p2 in zip(module1.parameters(), module2.parameters()):
+        if p1.data.ne(p2.data).sum() > 0:
+            return False
+    return True
+
+
+def numpify_tensor_nested(tensors, reduction=None, clip_value=10000):
+    try:
+        from modelscope.outputs import ModelOutputBase
+    except ImportError:
+        ModelOutputBase = dict
+    "Numpify `tensors` (even if it's a nested list/tuple of tensors)."
+    if isinstance(tensors, (Mapping, ModelOutputBase)):
+        return OrderedDict({
+            k: numpify_tensor_nested(t, reduction, clip_value)
+            for k, t in tensors.items()
+        })
+    if isinstance(tensors, list):
+        return list(
+            numpify_tensor_nested(t, reduction, clip_value) for t in tensors)
+    if isinstance(tensors, tuple):
+        return tuple(
+            numpify_tensor_nested(t, reduction, clip_value) for t in tensors)
+    if isinstance(tensors, torch.Tensor):
+        t: np.ndarray = tensors.cpu().numpy()
+        if clip_value is not None:
+            t = np.where(t > clip_value, clip_value, t)
+            t = np.where(t < -clip_value, -clip_value, t)
+        if reduction == 'sum':
+            return t.sum(dtype=np.float)
+        elif reduction == 'mean':
+            return t.mean(dtype=np.float)
+        return t
+    return tensors
+
+
+def detach_tensor_nested(tensors):
+    try:
+        from modelscope.outputs import ModelOutputBase
+    except ImportError:
+        ModelOutputBase = dict
+    "Detach `tensors` (even if it's a nested list/tuple of tensors)."
+    if isinstance(tensors, (Mapping, ModelOutputBase)):
+        return OrderedDict(
+            {k: detach_tensor_nested(t)
+             for k, t in tensors.items()})
+    if isinstance(tensors, list):
+        return list(detach_tensor_nested(t) for t in tensors)
+    if isinstance(tensors, tuple):
+        return tuple(detach_tensor_nested(t) for t in tensors)
+    if isinstance(tensors, torch.Tensor):
+        return tensors.detach()
+    return tensors
+
+
+def hack_forward(module: nn.Module,
+                 name,
+                 io_json,
+                 restore=False,
+                 keep_tensors=False):
+    def _forward(self, *args, **kwargs):
+        ret = self.forward_origin(*args, **kwargs)
+        if keep_tensors:
+            args = numpify_tensor_nested(detach_tensor_nested(args))
+            kwargs = numpify_tensor_nested(detach_tensor_nested(kwargs))
+            output = numpify_tensor_nested(detach_tensor_nested(ret))
+        else:
+            args = {
+                'sum':
+                numpify_tensor_nested(detach_tensor_nested(args),
+                                      reduction='sum'),
+                'mean':
+                numpify_tensor_nested(detach_tensor_nested(args),
+                                      reduction='mean'),
+            }
+            kwargs = {
+                'sum':
+                numpify_tensor_nested(detach_tensor_nested(kwargs),
+                                      reduction='sum'),
+                'mean':
+                numpify_tensor_nested(detach_tensor_nested(kwargs),
+                                      reduction='mean'),
+            }
+            output = {
+                'sum':
+                numpify_tensor_nested(detach_tensor_nested(ret),
+                                      reduction='sum'),
+                'mean':
+                numpify_tensor_nested(detach_tensor_nested(ret),
+                                      reduction='mean'),
+            }
+
+        io_json[name] = {
+            'input': {
+                'args': args,
+                'kwargs': kwargs,
+            },
+            'output': output,
+        }
+        return ret
+
+    if not restore and not hasattr(module, 'forward_origin'):
+        module.forward_origin, module.forward = module.forward, type(
+            module.forward)(_forward, module)
+    if restore and hasattr(module, 'forward_origin'):
+        module.forward = module.forward_origin
+        del module.forward_origin
+
+
+def hack_backward(module: nn.Module,
+                  optimizer,
+                  io_json,
+                  restore=False,
+                  lazy_stop_callback=None):
+    def _step(self, *args, **kwargs):
+        for name, param in module.named_parameters():
+            io_json[name] = {
+                'data': {
+                    'sum':
+                    numpify_tensor_nested(detach_tensor_nested(param.data),
+                                          reduction='sum'),
+                    'mean':
+                    numpify_tensor_nested(detach_tensor_nested(param.data),
+                                          reduction='mean'),
+                },
+                'grad': {
+                    'sum':
+                    numpify_tensor_nested(detach_tensor_nested(param.grad),
+                                          reduction='sum'),
+                    'mean':
+                    numpify_tensor_nested(detach_tensor_nested(param.grad),
+                                          reduction='mean'),
+                }
+            }
+        ret = self.step_origin(*args, **kwargs)
+        for name, param in module.named_parameters():
+            io_json[name]['data_after'] = {
+                'sum':
+                numpify_tensor_nested(detach_tensor_nested(param.data),
+                                      reduction='sum'),
+                'mean':
+                numpify_tensor_nested(detach_tensor_nested(param.data),
+                                      reduction='mean'),
+            }
+        if lazy_stop_callback is not None:
+            lazy_stop_callback()
+        return ret
+
+    if not restore and not hasattr(optimizer, 'step_origin'):
+        optimizer.step_origin, optimizer.step = optimizer.step, type(
+            optimizer.state_dict)(_step, optimizer)
+    if restore and hasattr(optimizer, 'step_origin'):
+        optimizer.step = optimizer.step_origin
+        del optimizer.step_origin
+
+
+def intercept_module(module: nn.Module,
+                     io_json,
+                     parent_name=None,
+                     restore=False):
+    for name, module in module.named_children():
+        full_name = parent_name + '.' + name if parent_name is not None else name
+        hack_forward(module, full_name, io_json, restore)
+        intercept_module(module, io_json, full_name, restore)
+
+
+def compare_io_and_print(baseline_json, io_json, compare_fn=None, **kwargs):
+    if compare_fn is None:
+
+        def compare_fn(*args, **kwargs):
+            return None
+
+    keys1 = set(baseline_json.keys())
+    keys2 = set(io_json.keys())
+    added = keys1 - keys2
+    removed = keys2 - keys1
+    print(f'unmatched keys: {added}, {removed}')
+    shared_keys = keys1.intersection(keys2)
+    match = True
+    for key in shared_keys:
+        v1 = baseline_json[key]
+        v2 = io_json[key]
+
+        v1input = numpify_tensor_nested(v1['input'])
+        v2input = numpify_tensor_nested(v2['input'])
+        res = compare_fn(v1input, v2input, key, 'input')
+        if res is not None:
+            print(
+                f'input of {key} compared with user compare_fn with result:{res}\n'
+            )
+            match = match and res
+        else:
+            match = compare_arguments_nested(
+                f'unmatched module {key} input args', v1input['args'],
+                v2input['args'], **kwargs) and match
+            match = compare_arguments_nested(
+                f'unmatched module {key} input kwargs', v1input['kwargs'],
+                v2input['kwargs'], **kwargs) and match
+        v1output = numpify_tensor_nested(v1['output'])
+        v2output = numpify_tensor_nested(v2['output'])
+        res = compare_fn(v1output, v2output, key, 'output')
+        if res is not None:
+            print(
+                f'output of {key} compared with user compare_fn with result:{res}\n'
+            )
+            match = match and res
+        else:
+            match = compare_arguments_nested(f'unmatched module {key} outputs',
+                                             arg1=v1output,
+                                             arg2=v2output,
+                                             **kwargs) and match
+    return match
+
+
+def compare_backward_and_print(baseline_json,
+                               bw_json,
+                               level,
+                               ignore_keys=None,
+                               compare_fn=None,
+                               **kwargs):
+    if compare_fn is None:
+
+        def compare_fn(*args, **kwargs):
+            return None
+
+    keys1 = set(baseline_json.keys())
+    keys2 = set(bw_json.keys())
+    added = keys1 - keys2
+    removed = keys2 - keys1
+    print(f'unmatched backward keys: {added}, {removed}')
+    shared_keys = keys1.intersection(keys2)
+    match = True
+    for key in shared_keys:
+        if ignore_keys is not None and key in ignore_keys:
+            continue
+
+        res = compare_fn(baseline_json[key], bw_json[key], key, 'backward')
+        if res is not None:
+            print(f'backward data of {key} compared with '
+                  f'user compare_fn with result:{res}\n')
+            match = match and res
+        else:
+            data1, grad1, data_after1 = baseline_json[key][
+                'data'], baseline_json[key]['grad'], baseline_json[key][
+                    'data_after']
+            data2, grad2, data_after2 = bw_json[key]['data'], bw_json[key][
+                'grad'], bw_json[key]['data_after']
+            match = compare_arguments_nested(
+                f'unmatched module {key} tensor data',
+                arg1=data1,
+                arg2=data2,
+                **kwargs) and match
+            if level == 'strict':
+                match = compare_arguments_nested(
+                    f'unmatched module {key} grad data',
+                    arg1=grad1,
+                    arg2=grad2,
+                    **kwargs) and match
+                match = compare_arguments_nested(
+                    f'unmatched module {key} data after step', data_after1,
+                    data_after2, **kwargs) and match
+    return match
+
+
+def compare_cfg_and_optimizers(baseline_json,
+                               cfg_json,
+                               compare_fn=None,
+                               **kwargs):
+    if compare_fn is None:
+
+        def compare_fn(*args, **kwargs):
+            return None
+
+    optimizer1, lr_scheduler1, cfg1, state1 = baseline_json[
+        'optimizer'], baseline_json['lr_scheduler'], baseline_json[
+            'cfg'], baseline_json['state']
+    optimizer2, lr_scheduler2, cfg2, state2 = cfg_json['optimizer'], cfg_json[
+        'lr_scheduler'], cfg_json['cfg'], baseline_json['state']
+
+    match = True
+    res = compare_fn(optimizer1, optimizer2, None, 'optimizer')
+    if res is not None:
+        print(f'optimizer compared with user compare_fn with result:{res}\n')
+        match = match and res
+    else:
+        if optimizer1['type'] != optimizer2['type']:
+            print(
+                f"Optimizer type not equal:{optimizer1['type']} and {optimizer2['type']}"
+            )
+        match = compare_arguments_nested(
+            'unmatched optimizer defaults', optimizer1['defaults'],
+            optimizer2['defaults'], **kwargs) and match
+        match = compare_arguments_nested(
+            'unmatched optimizer state_dict', optimizer1['state_dict'],
+            optimizer2['state_dict'], **kwargs) and match
+
+    res = compare_fn(lr_scheduler1, lr_scheduler2, None, 'lr_scheduler')
+    if res is not None:
+        print(
+            f'lr_scheduler compared with user compare_fn with result:{res}\n')
+        match = match and res
+    else:
+        if lr_scheduler1['type'] != lr_scheduler2['type']:
+            print(
+                f"Optimizer type not equal:{lr_scheduler1['type']} and {lr_scheduler2['type']}"
+            )
+        match = compare_arguments_nested(
+            'unmatched lr_scheduler state_dict', lr_scheduler1['state_dict'],
+            lr_scheduler2['state_dict'], **kwargs) and match
+
+    res = compare_fn(cfg1, cfg2, None, 'cfg')
+    if res is not None:
+        print(f'cfg compared with user compare_fn with result:{res}\n')
+        match = match and res
+    else:
+        match = compare_arguments_nested(
+            'unmatched cfg', arg1=cfg1, arg2=cfg2, **kwargs) and match
+
+    res = compare_fn(state1, state2, None, 'state')
+    if res is not None:
+        print(
+            f'random state compared with user compare_fn with result:{res}\n')
+        match = match and res
+    else:
+        match = compare_arguments_nested('unmatched random state', state1,
+                                         state2, **kwargs) and match
+
+    return match
+
+
+class IgnoreKeyFn:
+    def __init__(self, keys):
+        if isinstance(keys, str):
+            keys = [keys]
+        self.keys = keys if isinstance(keys, list) else []
+
+    def __call__(self, v1output, v2output, key, type):
+        for _key in self.keys:
+            pattern = re.compile(_key)
+            if key is not None and pattern.fullmatch(key):
+                return True
+        return None
diff --git a/modelscope/utils/service_utils.py b/modelscope/utils/service_utils.py
new file mode 100644
index 0000000..bb0711b
--- /dev/null
+++ b/modelscope/utils/service_utils.py
@@ -0,0 +1,177 @@
+import base64
+import json
+import mimetypes
+from io import BytesIO
+
+import numpy as np
+import requests
+
+from modelscope.outputs import TASK_OUTPUTS, OutputKeys
+from modelscope.pipeline_inputs import TASK_INPUTS, InputType
+
+
+# service data decoder func decodes data from network and convert it to pipeline's input
+# for example
+def ExampleDecoder(data):
+    # Assuming the pipeline inputs is a dict contains an image and a text,
+    # to decode the data from network we decode the image as base64
+    data_json = json.loads(data)
+    # data: {"image": "xxxxxxxx=="(base64 str), "text": "a question"}
+    # pipeline(inputs) as follows:
+    # pipeline({'image': image, 'text': text})
+    inputs = {
+        'image': decode_base64_to_image(data_json.get('image')),
+        'text': data_json.get('text')
+    }
+    return inputs
+
+
+# service data encoder func encodes data from pipeline outputs and convert to network response (such as json)
+# for example
+def ExampleEncoder(data):
+    # Assuming the pipeline outputs is a dict contains an image and a text,
+    # and transmit it through network, this func encode image to base64 and dumps into json
+    # data (for e.g. python dict):
+    # {"image": a numpy array represents a image, "text": "output"}
+    image = data['image']
+    text = data['text']
+    data = {'image': encode_array_to_img_base64(image), 'text': text}
+    return json.dumps(data, cls=NumpyEncoder)
+
+
+CustomEncoder = {
+    # Tasks.visual_question_answering: ExampleEncoder
+}
+
+CustomDecoder = {
+    # Tasks.visual_question_answering: ExampleDecoder
+}
+
+
+class NumpyEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, np.ndarray):
+            return obj.tolist()
+
+        if isinstance(obj, np.floating):
+            return float(obj)
+
+        if isinstance(obj, np.integer):
+            return int(obj)
+
+        return json.JSONEncoder.default(self, obj)
+
+
+def get_extension(encoding):
+    encoding = encoding.replace('audio/wav', 'audio/x-wav')
+    tp = mimetypes.guess_type(encoding)[0]
+    if tp == 'audio/flac':  # flac is not supported by mimetypes
+        return 'flac'
+    extension = mimetypes.guess_extension(tp)
+    if extension is not None and extension.startswith('.'):
+        extension = extension[1:]
+    return extension
+
+
+def get_mimetype(filename):
+    mimetype = mimetypes.guess_type(filename)[0]
+    if mimetype is not None:
+        mimetype = mimetype.replace('x-wav', 'wav').replace('x-flac', 'flac')
+    return mimetype
+
+
+def decode_base64_to_binary(encoding):
+    extension = get_extension(encoding)
+    data = encoding.split(',')[1]
+    return base64.b64decode(data), extension
+
+
+def decode_base64_to_image(encoding):
+    from PIL import Image
+    content = encoding.split(';')[1]
+    image_encoded = content.split(',')[1]
+    return Image.open(BytesIO(base64.b64decode(image_encoded)))
+
+
+def encode_array_to_img_base64(image_array):
+    from PIL import Image
+    with BytesIO() as output_bytes:
+        pil_image = Image.fromarray(image_array.astype(np.uint8))
+        pil_image.save(output_bytes, 'PNG')
+        bytes_data = output_bytes.getvalue()
+    base64_str = str(base64.b64encode(bytes_data), 'utf-8')
+    return 'data:image/png;base64,' + base64_str
+
+
+def encode_pcm_to_base64(bytes_data):
+    from scipy.io.wavfile import write
+    with BytesIO() as out_mem_file:
+        write(out_mem_file, 16000, bytes_data)
+        base64_str = str(base64.b64encode(out_mem_file.getvalue()), 'utf-8')
+    return 'data:audio/pcm;base64,' + base64_str
+
+
+def encode_url_to_base64(url):
+    encoded_string = base64.b64encode(requests.get(url).content)
+    base64_str = str(encoded_string, 'utf-8')
+    mimetype = get_mimetype(url)
+    return ('data:' + (mimetype if mimetype is not None else '') + ';base64,' +
+            base64_str)
+
+
+def encode_file_to_base64(f):
+    with open(f, 'rb') as file:
+        encoded_string = base64.b64encode(file.read())
+        base64_str = str(encoded_string, 'utf-8')
+        mimetype = get_mimetype(f)
+        return ('data:' + (mimetype if mimetype is not None else '') +
+                ';base64,' + base64_str)
+
+
+def encode_url_or_file_to_base64(path):
+    try:
+        requests.get(path)
+        return encode_url_to_base64(path)
+    except (requests.exceptions.MissingSchema,
+            requests.exceptions.InvalidSchema):
+        return encode_file_to_base64(path)
+
+
+def service_data_decoder(task, data):
+    if CustomDecoder.get(task) is not None:
+        return CustomDecoder[task](data)
+    input_type = TASK_INPUTS[task]
+    input_data = data.decode('utf-8')
+    if input_type == InputType.IMAGE:
+        return decode_base64_to_image(input_data)
+    elif input_type == InputType.AUDIO:
+        return decode_base64_to_binary(input_data)[0]
+    elif input_type == InputType.TEXT:
+        return input_data
+    elif isinstance(input_type, dict):
+        input_data = {}
+        for key, val in input_type.items():
+            if val == InputType.IMAGE:
+                input_data[key] = decode_base64_to_image(data[key])
+            elif val == InputType.AUDIO:
+                input_data[key] = decode_base64_to_binary(data[key])[0]
+            elif val == InputType.TEXT:
+                input_data[key] = data[key]
+
+    return input_data
+
+
+def service_data_encoder(task, data):
+    if CustomEncoder.get(task) is not None:
+        return CustomEncoder[task](data)
+    output_keys = TASK_OUTPUTS[task]
+    result = data
+    for output_key in output_keys:
+        if output_key == OutputKeys.OUTPUT_IMG:
+            result[OutputKeys.OUTPUT_IMG] = encode_array_to_img_base64(
+                data[OutputKeys.OUTPUT_IMG][..., ::-1])
+        elif output_key == OutputKeys.OUTPUT_PCM:
+            result[OutputKeys.OUTPUT_PCM] = encode_pcm_to_base64(
+                data[OutputKeys.OUTPUT_PCM])
+    result = bytes(json.dumps(result, cls=NumpyEncoder), encoding='utf8')
+    return result
diff --git a/modelscope/utils/task_utils.py b/modelscope/utils/task_utils.py
new file mode 100644
index 0000000..f862718
--- /dev/null
+++ b/modelscope/utils/task_utils.py
@@ -0,0 +1,31 @@
+from modelscope.metainfo import TaskModels
+from modelscope.utils import registry
+from modelscope.utils.constant import Tasks
+
+SUB_TASKS = 'sub_tasks'
+PARENT_TASK = 'parent_task'
+TASK_MODEL = 'task_model'
+
+DEFAULT_TASKS_LEVEL = {}
+
+
+def _inverted_index(forward_index):
+    inverted_index = dict()
+    for index in forward_index:
+        for item in forward_index[index][SUB_TASKS]:
+            inverted_index[item] = {
+                PARENT_TASK: index,
+                TASK_MODEL: forward_index[index][TASK_MODEL],
+            }
+    return inverted_index
+
+
+INVERTED_TASKS_LEVEL = _inverted_index(DEFAULT_TASKS_LEVEL)
+
+
+def get_task_by_subtask_name(group_key):
+    if group_key in INVERTED_TASKS_LEVEL:
+        return INVERTED_TASKS_LEVEL[group_key][
+            PARENT_TASK], INVERTED_TASKS_LEVEL[group_key][TASK_MODEL]
+    else:
+        return group_key, None
diff --git a/modelscope/utils/tensor_utils.py b/modelscope/utils/tensor_utils.py
new file mode 100644
index 0000000..8f580d1
--- /dev/null
+++ b/modelscope/utils/tensor_utils.py
@@ -0,0 +1,51 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+# Part of the implementation is borrowed from huggingface/transformers.
+from collections.abc import Mapping
+
+
+def torch_nested_numpify(tensors):
+    """ Numpify nested torch tensors.
+
+    NOTE: If the type of input tensors is dict-like(Mapping, dict, OrderedDict, etc.), the return type will be dict.
+
+    Args:
+        tensors: Nested torch tensors.
+
+    Returns:
+        The numpify tensors.
+    """
+
+    import torch
+    "Numpify `tensors` (even if it's a nested list/tuple of tensors)."
+    if isinstance(tensors, (list, tuple)):
+        return type(tensors)(torch_nested_numpify(t) for t in tensors)
+    if isinstance(tensors, Mapping):
+        # return dict
+        return {k: torch_nested_numpify(t) for k, t in tensors.items()}
+    if isinstance(tensors, torch.Tensor):
+        t = tensors.cpu()
+        return t.numpy()
+    return tensors
+
+
+def torch_nested_detach(tensors):
+    """ Detach nested torch tensors.
+
+    NOTE: If the type of input tensors is dict-like(Mapping, dict, OrderedDict, etc.), the return type will be dict.
+
+    Args:
+        tensors: Nested torch tensors.
+
+    Returns:
+        The detached tensors.
+    """
+
+    import torch
+    "Detach `tensors` (even if it's a nested list/tuple of tensors)."
+    if isinstance(tensors, (list, tuple)):
+        return type(tensors)(torch_nested_detach(t) for t in tensors)
+    if isinstance(tensors, Mapping):
+        return {k: torch_nested_detach(t) for k, t in tensors.items()}
+    if isinstance(tensors, torch.Tensor):
+        return tensors.detach()
+    return tensors
diff --git a/modelscope/utils/test_utils.py b/modelscope/utils/test_utils.py
new file mode 100644
index 0000000..22843d8
--- /dev/null
+++ b/modelscope/utils/test_utils.py
@@ -0,0 +1,369 @@
+#!/usr/bin/env python
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import copy
+import os
+import pickle
+import shutil
+import socket
+import subprocess
+import sys
+import tarfile
+import tempfile
+import unittest
+from collections import OrderedDict
+from collections.abc import Mapping
+
+import numpy as np
+import requests
+
+from modelscope.utils.import_utils import is_tf_available, is_torch_available
+
+TEST_LEVEL = 2
+TEST_LEVEL_STR = 'TEST_LEVEL'
+
+
+def test_level():
+    global TEST_LEVEL
+    if TEST_LEVEL_STR in os.environ:
+        TEST_LEVEL = int(os.environ[TEST_LEVEL_STR])
+
+    return TEST_LEVEL
+
+
+def require_tf(test_case):
+    if not is_tf_available():
+        test_case = unittest.skip('test requires TensorFlow')(test_case)
+    return test_case
+
+
+def require_torch(test_case):
+    if not is_torch_available():
+        test_case = unittest.skip('test requires PyTorch')(test_case)
+    return test_case
+
+
+def set_test_level(level: int):
+    global TEST_LEVEL
+    TEST_LEVEL = level
+
+
+class DummyTorchDataset:
+    def __init__(self, feat, label, num) -> None:
+        self.feat = feat
+        self.label = label
+        self.num = num
+
+    def __getitem__(self, index):
+        import torch
+        return {
+            'feat': torch.Tensor(self.feat),
+            'labels': torch.Tensor(self.label)
+        }
+
+    def __len__(self):
+        return self.num
+
+
+def create_dummy_test_dataset(feat, label, num):
+    return DummyTorchDataset(feat, label, num)
+
+
+def download_and_untar(fpath, furl, dst) -> str:
+    if not os.path.exists(fpath):
+        r = requests.get(furl)
+        with open(fpath, 'wb') as f:
+            f.write(r.content)
+
+    file_name = os.path.basename(fpath)
+    root_dir = os.path.dirname(fpath)
+    target_dir_name = os.path.splitext(os.path.splitext(file_name)[0])[0]
+    target_dir_path = os.path.join(root_dir, target_dir_name)
+
+    # untar the file
+    t = tarfile.open(fpath)
+    t.extractall(path=dst)
+
+    return target_dir_path
+
+
+def get_case_model_info():
+    status_code, result = subprocess.getstatusoutput(
+        'grep -rn "damo/" tests/  | grep -v ".pyc" | grep -v "Binary file" | grep -v run.py '
+    )
+    lines = result.split('\n')
+    test_cases = OrderedDict()
+    model_cases = OrderedDict()
+    for line in lines:
+        # "tests/msdatasets/test_ms_dataset.py:92:        model_id = 'damo/bert-base-sst2'"
+        line = line.strip()
+        elements = line.split(':')
+        test_file = elements[0]
+        model_pos = line.find('damo')
+        left_quote = line[model_pos - 1]
+        rquote_idx = line.rfind(left_quote)
+        model_name = line[model_pos:rquote_idx]
+        if test_file not in test_cases:
+            test_cases[test_file] = set()
+        model_info = test_cases[test_file]
+        model_info.add(model_name)
+
+        if model_name not in model_cases:
+            model_cases[model_name] = set()
+        case_info = model_cases[model_name]
+        case_info.add(
+            test_file.replace('tests/', '').replace('.py',
+                                                    '').replace('/', '.'))
+
+    return model_cases
+
+
+def compare_arguments_nested(print_content,
+                             arg1,
+                             arg2,
+                             rtol=1.e-3,
+                             atol=1.e-8,
+                             ignore_unknown_type=True):
+    type1 = type(arg1)
+    type2 = type(arg2)
+    if type1.__name__ != type2.__name__:
+        if print_content is not None:
+            print(
+                f'{print_content}, type not equal:{type1.__name__} and {type2.__name__}'
+            )
+        return False
+
+    if arg1 is None:
+        return True
+    elif isinstance(arg1, (int, str, bool, np.bool, np.integer, np.str)):
+        if arg1 != arg2:
+            if print_content is not None:
+                print(f'{print_content}, arg1:{arg1}, arg2:{arg2}')
+            return False
+        return True
+    elif isinstance(arg1, (float, np.floating)):
+        if not np.isclose(arg1, arg2, rtol=rtol, atol=atol, equal_nan=True):
+            if print_content is not None:
+                print(f'{print_content}, arg1:{arg1}, arg2:{arg2}')
+            return False
+        return True
+    elif isinstance(arg1, (tuple, list)):
+        if len(arg1) != len(arg2):
+            if print_content is not None:
+                print(
+                    f'{print_content}, length is not equal:{len(arg1)}, {len(arg2)}'
+                )
+            return False
+        if not all([
+                compare_arguments_nested(
+                    None, sub_arg1, sub_arg2, rtol=rtol, atol=atol)
+                for sub_arg1, sub_arg2 in zip(arg1, arg2)
+        ]):
+            if print_content is not None:
+                print(f'{print_content}')
+            return False
+        return True
+    elif isinstance(arg1, Mapping):
+        keys1 = arg1.keys()
+        keys2 = arg2.keys()
+        if len(keys1) != len(keys2):
+            if print_content is not None:
+                print(
+                    f'{print_content}, key length is not equal:{len(keys1)}, {len(keys2)}'
+                )
+            return False
+        if len(set(keys1) - set(keys2)) > 0:
+            if print_content is not None:
+                print(f'{print_content}, key diff:{set(keys1) - set(keys2)}')
+            return False
+        if not all([
+                compare_arguments_nested(
+                    None, arg1[key], arg2[key], rtol=rtol, atol=atol)
+                for key in keys1
+        ]):
+            if print_content is not None:
+                print(f'{print_content}')
+            return False
+        return True
+    elif isinstance(arg1, np.ndarray):
+        arg1 = np.where(np.equal(arg1, None), np.NaN,
+                        arg1).astype(dtype=np.float)
+        arg2 = np.where(np.equal(arg2, None), np.NaN,
+                        arg2).astype(dtype=np.float)
+        if not all(
+                np.isclose(arg1, arg2, rtol=rtol, atol=atol,
+                           equal_nan=True).flatten()):
+            if print_content is not None:
+                print(f'{print_content}')
+            return False
+        return True
+    else:
+        if ignore_unknown_type:
+            return True
+        else:
+            raise ValueError(f'type not supported: {type1}')
+
+
+_DIST_SCRIPT_TEMPLATE = """
+import ast
+import argparse
+import pickle
+import torch
+from torch import distributed as dist
+from modelscope.utils.torch_utils import get_dist_info
+import {}
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--save_all_ranks', type=ast.literal_eval, help='save all ranks results')
+parser.add_argument('--save_file', type=str, help='save file')
+parser.add_argument('--local_rank', type=int, default=0)
+args = parser.parse_args()
+
+
+def main():
+    results = {}.{}({})  # module.func(params)
+    if args.save_all_ranks:
+        save_file = args.save_file + str(dist.get_rank())
+        with open(save_file, 'wb') as f:
+            pickle.dump(results, f)
+    else:
+        rank, _ = get_dist_info()
+        if rank == 0:
+            with open(args.save_file, 'wb') as f:
+                pickle.dump(results, f)
+
+
+if __name__ == '__main__':
+    main()
+"""
+
+
+class DistributedTestCase(unittest.TestCase):
+    """Distributed TestCase for test function with distributed mode.
+    Examples:
+        >>> import torch
+        >>> from torch import distributed as dist
+        >>> from modelscope.utils.torch_utils import init_dist
+
+        >>> def _test_func(*args, **kwargs):
+        >>>     init_dist(launcher='pytorch')
+        >>>     rank = dist.get_rank()
+        >>>     if rank == 0:
+        >>>         value = torch.tensor(1.0).cuda()
+        >>>     else:
+        >>>         value = torch.tensor(2.0).cuda()
+        >>>     dist.all_reduce(value)
+        >>>     return value.cpu().numpy()
+
+        >>> class DistTest(DistributedTestCase):
+        >>>     def test_function_dist(self):
+        >>>         args = ()  # args should be python builtin type
+        >>>         kwargs = {}  # kwargs should be python builtin type
+        >>>         self.start(
+        >>>             _test_func,
+        >>>             num_gpus=2,
+        >>>             assert_callback=lambda x: self.assertEqual(x, 3.0),
+        >>>             *args,
+        >>>             **kwargs,
+        >>>         )
+    """
+    def _start(self,
+               dist_start_cmd,
+               func,
+               num_gpus,
+               assert_callback=None,
+               save_all_ranks=False,
+               *args,
+               **kwargs):
+        script_path = func.__code__.co_filename
+        script_dir, script_name = os.path.split(script_path)
+        script_name = os.path.splitext(script_name)[0]
+        func_name = func.__qualname__
+
+        func_params = []
+        for arg in args:
+            if isinstance(arg, str):
+                arg = ('\'{}\''.format(arg))
+            func_params.append(str(arg))
+
+        for k, v in kwargs.items():
+            if isinstance(v, str):
+                v = ('\'{}\''.format(v))
+            func_params.append('{}={}'.format(k, v))
+
+        func_params = ','.join(func_params).strip(',')
+
+        tmp_run_file = tempfile.NamedTemporaryFile(suffix='.py').name
+        tmp_res_file = tempfile.NamedTemporaryFile(suffix='.pkl').name
+
+        with open(tmp_run_file, 'w') as f:
+            print('save temporary run file to : {}'.format(tmp_run_file))
+            print('save results to : {}'.format(tmp_res_file))
+            run_file_content = _DIST_SCRIPT_TEMPLATE.format(
+                script_name, script_name, func_name, func_params)
+            f.write(run_file_content)
+
+        tmp_res_files = []
+        if save_all_ranks:
+            for i in range(num_gpus):
+                tmp_res_files.append(tmp_res_file + str(i))
+        else:
+            tmp_res_files = [tmp_res_file]
+        self.addCleanup(self.clean_tmp, [tmp_run_file] + tmp_res_files)
+
+        tmp_env = copy.deepcopy(os.environ)
+        tmp_env['PYTHONPATH'] = ':'.join(
+            (tmp_env.get('PYTHONPATH', ''), script_dir)).lstrip(':')
+        # avoid distributed test hang
+        tmp_env['NCCL_P2P_DISABLE'] = '1'
+        script_params = '--save_all_ranks=%s --save_file=%s' % (save_all_ranks,
+                                                                tmp_res_file)
+        script_cmd = '%s %s %s' % (dist_start_cmd, tmp_run_file, script_params)
+        print('script command: %s' % script_cmd)
+        res = subprocess.call(script_cmd, shell=True, env=tmp_env)
+
+        script_res = []
+        for res_file in tmp_res_files:
+            with open(res_file, 'rb') as f:
+                script_res.append(pickle.load(f))
+        if not save_all_ranks:
+            script_res = script_res[0]
+
+        if assert_callback:
+            assert_callback(script_res)
+
+        self.assertEqual(
+            res,
+            0,
+            msg='The test function ``{}`` in ``{}`` run failed!'.format(
+                func_name, script_name))
+
+        return script_res
+
+    def start(self,
+              func,
+              num_gpus,
+              assert_callback=None,
+              save_all_ranks=False,
+              *args,
+              **kwargs):
+        from .torch_utils import _find_free_port
+        ip = socket.gethostbyname(socket.gethostname())
+        dist_start_cmd = '%s -m torch.distributed.launch --nproc_per_node=%d --master_addr=\'%s\' --master_port=%s' % (
+            sys.executable, num_gpus, ip, _find_free_port())
+
+        return self._start(dist_start_cmd=dist_start_cmd,
+                           func=func,
+                           num_gpus=num_gpus,
+                           assert_callback=assert_callback,
+                           save_all_ranks=save_all_ranks,
+                           *args,
+                           **kwargs)
+
+    def clean_tmp(self, tmp_file_list):
+        for file in tmp_file_list:
+            if os.path.exists(file):
+                if os.path.isdir(file):
+                    shutil.rmtree(file)
+                else:
+                    os.remove(file)
diff --git a/modelscope/utils/timer.py b/modelscope/utils/timer.py
new file mode 100644
index 0000000..8d3c8a4
--- /dev/null
+++ b/modelscope/utils/timer.py
@@ -0,0 +1,49 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# Copyright © Alibaba, Inc. and its affiliates.
+
+import datetime
+import time
+
+
+class Timer(object):
+    def __init__(self):
+        """Recorder of time consumption.
+
+        """
+        self.reset()
+
+    @property
+    def average_time(self):
+        return self.total_time / self.calls if self.calls > 0 else 0.0
+
+    def tic(self):
+        # using time.time instead of time.clock because time time.clock
+        # does not normalize for multithreading
+        self.start_time = time.time()
+
+    def toc(self, average=True):
+        self.add(time.time() - self.start_time)
+        if average:
+            return self.average_time
+        else:
+            return self.diff
+
+    def add(self, time_diff):
+        self.diff = time_diff
+        self.total_time += self.diff
+        self.calls += 1
+
+    def reset(self):
+        self.total_time = 0.0
+        self.calls = 0
+        self.start_time = 0.0
+        self.diff = 0.0
+
+    def avg_time_str(self):
+        time_str = str(datetime.timedelta(seconds=self.average_time))
+        return time_str
+
+
+def get_time_str(time_diff):
+    time_str = str(datetime.timedelta(seconds=time_diff))
+    return time_str
diff --git a/modelscope/utils/torch_utils.py b/modelscope/utils/torch_utils.py
new file mode 100644
index 0000000..4e17572
--- /dev/null
+++ b/modelscope/utils/torch_utils.py
@@ -0,0 +1,331 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+# Following code is partialy borrowed from openmmlab/mmcv
+import functools
+import os
+import pickle
+import random
+import socket
+import subprocess
+import tempfile
+from typing import Callable, List, Optional, Tuple
+
+import numpy as np
+import torch
+import torch.multiprocessing as mp
+from torch import distributed as dist
+
+
+def _find_free_port() -> str:
+    # Copied from https://github.com/facebookresearch/detectron2/blob/main/detectron2/engine/launch.py # noqa: E501
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    # Binding to port 0 will cause the OS to find an available port for us
+    sock.bind(('', 0))
+    port = sock.getsockname()[1]
+    sock.close()
+    # NOTE: there is still a chance the port could be taken by other processes.
+    return port
+
+
+def _is_free_port(port: int) -> bool:
+    ips = socket.gethostbyname_ex(socket.gethostname())[-1]
+    ips.append('localhost')
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        return all(s.connect_ex((ip, port)) != 0 for ip in ips)
+
+
+def init_dist(launcher: str, backend: str = 'nccl', **kwargs) -> None:
+    if mp.get_start_method(allow_none=True) is None:
+        mp.set_start_method('spawn')
+    if launcher == 'pytorch':
+        _init_dist_pytorch(backend, **kwargs)
+    elif launcher == 'mpi':
+        _init_dist_mpi(backend, **kwargs)
+    elif launcher == 'slurm':
+        _init_dist_slurm(backend, **kwargs)
+    else:
+        raise ValueError(f'Invalid launcher type: {launcher}')
+
+
+def _init_dist_pytorch(backend: str, **kwargs) -> None:
+    # rank = int(os.environ['RANK'])
+    local_rank = int(os.environ['LOCAL_RANK'])
+    torch.cuda.set_device(local_rank)
+    dist.init_process_group(backend=backend, **kwargs)
+
+
+def _init_dist_mpi(backend: str, **kwargs) -> None:
+    local_rank = int(os.environ['OMPI_COMM_WORLD_LOCAL_RANK'])
+    torch.cuda.set_device(local_rank)
+    if 'MASTER_PORT' not in os.environ:
+        # 29500 is torch.distributed default port
+        os.environ['MASTER_PORT'] = '29500'
+    if 'MASTER_ADDR' not in os.environ:
+        raise KeyError('The environment variable MASTER_ADDR is not set')
+    os.environ['WORLD_SIZE'] = os.environ['OMPI_COMM_WORLD_SIZE']
+    os.environ['RANK'] = os.environ['OMPI_COMM_WORLD_RANK']
+    dist.init_process_group(backend=backend, **kwargs)
+
+
+def _init_dist_slurm(backend: str, port: Optional[int] = None) -> None:
+    """Initialize slurm distributed training environment.
+
+    If argument ``port`` is not specified, then the master port will be system
+    environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system
+    environment variable, then a default port ``29500`` will be used.
+
+    Args:
+        backend (str): Backend of torch.distributed.
+        port (int, optional): Master port. Defaults to None.
+    """
+    proc_id = int(os.environ['SLURM_PROCID'])
+    ntasks = int(os.environ['SLURM_NTASKS'])
+    node_list = os.environ['SLURM_NODELIST']
+    num_gpus = torch.cuda.device_count()
+    torch.cuda.set_device(proc_id % num_gpus)
+    addr = subprocess.getoutput(
+        f'scontrol show hostname {node_list} | head -n1')
+    # specify master port
+    if port is not None:
+        os.environ['MASTER_PORT'] = str(port)
+    elif 'MASTER_PORT' in os.environ:
+        pass  # use MASTER_PORT in the environment variable
+    else:
+        # if torch.distributed default port(29500) is available
+        # then use it, else find a free port
+        if _is_free_port(29500):
+            os.environ['MASTER_PORT'] = '29500'
+        else:
+            os.environ['MASTER_PORT'] = str(_find_free_port())
+    # use MASTER_ADDR in the environment variable if it already exists
+    if 'MASTER_ADDR' not in os.environ:
+        os.environ['MASTER_ADDR'] = addr
+    os.environ['WORLD_SIZE'] = str(ntasks)
+    os.environ['LOCAL_RANK'] = str(proc_id % num_gpus)
+    os.environ['RANK'] = str(proc_id)
+    dist.init_process_group(backend=backend)
+
+
+def get_dist_info() -> Tuple[int, int]:
+    if is_dist():
+        try:
+            from megatron_util import mpu
+            assert mpu.model_parallel_is_initialized()
+            rank = mpu.get_data_parallel_rank()
+            world_size = mpu.get_data_parallel_world_size()
+        except (ImportError, AssertionError):
+            rank = dist.get_rank()
+            world_size = dist.get_world_size()
+    else:
+        rank = 0
+        world_size = 1
+    return rank, world_size
+
+
+def get_local_rank():
+    return int(os.environ.get('LOCAL_RANK', 0))
+
+
+def get_rank():
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def get_world_size():
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size()
+
+
+def synchronize():
+    """
+    Helper function to synchronize (barrier)
+    among all processes when using distributed training
+    """
+    if not dist.is_available():
+        return
+    if not dist.is_initialized():
+        return
+    world_size = dist.get_world_size()
+    if world_size == 1:
+        return
+    dist.barrier()
+
+
+def is_dist():
+    return dist.is_available() and dist.is_initialized()
+
+
+def is_master():
+    return dist.get_rank() == 0 if is_dist() else True
+
+
+def master_only(func: Callable) -> Callable:
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        if is_master():
+            return func(*args, **kwargs)
+
+    return wrapper
+
+
+def make_tmp_dir():
+    """Make sure each rank has the same temporary directory on the distributed mode.
+    """
+    if not is_dist():
+        return tempfile.mkdtemp()
+
+    tmpdir = None
+    if is_master():
+        tmpdir = tempfile.mkdtemp()
+
+    dist.barrier()
+    tmpdir = broadcast(tmpdir, 0)
+
+    return tmpdir
+
+
+def broadcast(inputs, src):
+    """
+    Broadcasts the inputs to all ranks.
+
+    Arguments:
+        inputs : Any objects that can be serialized by pickle.
+        src (int): Source rank.
+    Returns:
+        Each rank returns the same value as src.
+    """
+    rank = dist.get_rank()
+    shape_tensor = torch.tensor([0], device='cuda')
+
+    if rank == src:
+        inputs_tensor = torch.tensor(bytearray(pickle.dumps(inputs)),
+                                     dtype=torch.uint8,
+                                     device='cuda')
+        shape_tensor = torch.tensor(inputs_tensor.shape, device='cuda')
+
+    dist.barrier()
+    dist.broadcast(shape_tensor, src)
+
+    if rank != src:
+        inputs_tensor = torch.full((shape_tensor.item(), ),
+                                   0,
+                                   dtype=torch.uint8,
+                                   device='cuda')
+
+    dist.barrier()
+    dist.broadcast(inputs_tensor, src)
+
+    return pickle.loads(inputs_tensor.cpu().numpy().tobytes())
+
+
+def set_random_seed(seed):
+    if seed is not None and seed >= 0:
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+    else:
+        raise ValueError(
+            f'Random seed should be positive, current seed is {seed}')
+
+
+@functools.lru_cache()
+def _get_global_gloo_group():
+    """
+    Return a process group based on gloo backend, containing all the ranks
+    The result is cached.
+    """
+    if dist.get_backend() == 'nccl':
+        return dist.new_group(backend='gloo')
+    else:
+        return dist.group.WORLD
+
+
+def _serialize_to_tensor(data, group):
+    backend = dist.get_backend(group)
+    assert backend in ['gloo', 'nccl']
+    device = torch.device('cpu' if backend == 'gloo' else 'cuda')
+
+    buffer = pickle.dumps(data)
+    if len(buffer) > 1024**3:
+        logger.warning(
+            'Rank {} trying to all-gather {:.2f} GB of data on device {}'.
+            format(get_rank(),
+                   len(buffer) / (1024**3), device))
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to(device=device)
+    return tensor
+
+
+def _pad_to_largest_tensor(tensor, group):
+    """
+    Returns:
+        list[int]: size of the tensor, on each rank
+        Tensor: padded tensor that has the max size
+    """
+    world_size = dist.get_world_size(group=group)
+    assert (
+        world_size >= 1
+    ), 'comm.gather/all_gather must be called from ranks within the group!'
+    local_size = torch.tensor([tensor.numel()],
+                              dtype=torch.int64,
+                              device=tensor.device)
+    size_list = [
+        torch.zeros([1], dtype=torch.int64, device=tensor.device)
+        for _ in range(world_size)
+    ]
+    dist.all_gather(size_list, local_size, group=group)
+    size_list = [int(size.item()) for size in size_list]
+
+    max_size = max(size_list)
+
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    if local_size != max_size:
+        padding = torch.zeros((max_size - local_size, ),
+                              dtype=torch.uint8,
+                              device=tensor.device)
+        tensor = torch.cat((tensor, padding), dim=0)
+    return size_list, tensor
+
+
+def all_gather(data, group=None):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors).
+    Args:
+        data: any picklable object
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group) == 1:
+        return [data]
+
+    tensor = _serialize_to_tensor(data, group)
+
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+    max_size = max(size_list)
+
+    # receiving Tensor from all ranks
+    tensor_list = [
+        torch.empty((max_size, ), dtype=torch.uint8, device=tensor.device)
+        for _ in size_list
+    ]
+    dist.all_gather(tensor_list, tensor, group=group)
+
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+
+    return data_list
diff --git a/modelscope/utils/trie.py b/modelscope/utils/trie.py
new file mode 100644
index 0000000..bcae767
--- /dev/null
+++ b/modelscope/utils/trie.py
@@ -0,0 +1,27 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from collections import defaultdict
+
+
+class TreeNode:
+    def __init__(self):
+        self.child = defaultdict(TreeNode)
+
+
+class Trie:
+    def __init__(self, eos):
+        self.root = TreeNode()
+        self.eos = eos
+
+    def insert(self, word):
+        cur = self.root
+        for c in word:
+            cur = cur.child[c]
+
+    def get_next_layer(self, word):
+        cur = self.root
+        for c in word:
+            cur = cur.child.get(c)
+            if cur is None:
+                return [self.eos]
+        return list(cur.child.keys())
diff --git a/modelscope/utils/type_assert.py b/modelscope/utils/type_assert.py
new file mode 100644
index 0000000..be84256
--- /dev/null
+++ b/modelscope/utils/type_assert.py
@@ -0,0 +1,51 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from functools import wraps
+from inspect import signature
+
+
+def type_assert(*ty_args, **ty_kwargs):
+    """a decorator which is used to check the types of arguments in a function or class
+    Examples:
+        >>> @type_assert(str)
+        ... def main(a: str, b: list):
+        ...     print(a, b)
+        >>> main(1)
+        Argument a must be a str
+
+        >>> @type_assert(str, (int, str))
+        ... def main(a: str, b: int | str):
+        ...     print(a, b)
+        >>> main('1', [1])
+        Argument b must be (<class 'int'>, <class 'str'>)
+
+        >>> @type_assert(str, (int, str))
+        ... class A:
+        ...     def __init__(self, a: str, b: int | str)
+        ...         print(a, b)
+        >>> a = A('1', [1])
+        Argument b must be (<class 'int'>, <class 'str'>)
+    """
+    def decorate(func):
+        # If in optimized mode, disable type checking
+        if not __debug__:
+            return func
+
+        # Map function argument names to supplied types
+        sig = signature(func)
+        bound_types = sig.bind_partial(*ty_args, **ty_kwargs).arguments
+
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            bound_values = sig.bind(*args, **kwargs)
+            # Enforce type assertions across supplied arguments
+            for name, value in bound_values.arguments.items():
+                if name in bound_types:
+                    if not isinstance(value, bound_types[name]):
+                        raise TypeError('Argument {} must be {}'.format(
+                            name, bound_types[name]))
+            return func(*args, **kwargs)
+
+        return wrapper
+
+    return decorate
diff --git a/modelscope/utils/typing.py b/modelscope/utils/typing.py
new file mode 100644
index 0000000..a010a5f
--- /dev/null
+++ b/modelscope/utils/typing.py
@@ -0,0 +1,10 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from typing import Dict, List, Tuple, Union
+
+Image = Union[str, 'Image.Image', 'numpy.ndarray']
+Text = str
+Audio = Union[str, bytes, 'np.ndarray']
+Video = Union[str, 'np.ndarray', 'cv2.VideoCapture']
+
+Tensor = Union['torch.Tensor', 'tf.Tensor']
diff --git a/modelscope/version.py b/modelscope/version.py
new file mode 100644
index 0000000..4fa90b9
--- /dev/null
+++ b/modelscope/version.py
@@ -0,0 +1,5 @@
+# Make sure to modify __release_datetime__ to release time when making official release.
+__version__ = '1.3.0'
+# default release datetime for branches under active development is set
+# to be a time far-far-away-into-the-future
+__release_datetime__ = '2099-10-13 08:56:12'
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..e498455
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+torch
+torchvision
+-r requirements/framework.txt
+-r requirements/cv.txt
+-r requirements/tests.txt
diff --git a/requirements/cv.txt b/requirements/cv.txt
new file mode 100644
index 0000000..3d80419
--- /dev/null
+++ b/requirements/cv.txt
@@ -0,0 +1,6 @@
+opencv-python
+easydict
+easyrobust
+datasets
+mmdet
+openmim
diff --git a/requirements/framework.txt b/requirements/framework.txt
new file mode 100644
index 0000000..9a6a899
--- /dev/null
+++ b/requirements/framework.txt
@@ -0,0 +1,18 @@
+addict
+attrs
+datasets>=2.7.0,<=2.8.0
+einops
+filelock>=3.3.0
+gast>=0.2.2
+jsonplus
+numpy
+oss2
+Pillow>=6.2.0
+# pyarrow 9.0.0 introduced event_loop core dump
+pyarrow>=6.0.0,!=9.0.0
+pyyaml
+requests
+scipy
+setuptools
+tqdm>=4.64.0
+yapf
diff --git a/requirements/tests.txt b/requirements/tests.txt
new file mode 100644
index 0000000..6988c70
--- /dev/null
+++ b/requirements/tests.txt
@@ -0,0 +1,4 @@
+expecttest
+flake8
+isort>=4.3.21
+yapf==0.30.0 # use fix version to ensure consistent auto-styling
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/msdatasets/__init__.py b/tests/msdatasets/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/msdatasets/test_dataset_delete.py b/tests/msdatasets/test_dataset_delete.py
new file mode 100644
index 0000000..ec1e156
--- /dev/null
+++ b/tests/msdatasets/test_dataset_delete.py
@@ -0,0 +1,107 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+import zipfile
+
+from modelscope.msdatasets import MsDataset
+from modelscope.utils import logger as logging
+from modelscope.utils.test_utils import test_level
+
+logger = logging.get_logger()
+
+KEY_EXTRACTED = 'extracted'
+EXPECTED_MSG = 'success'
+
+
+class DatasetDeleteTest(unittest.TestCase):
+    def setUp(self):
+        self.old_dir = os.getcwd()
+        self.dataset_name = 'small_coco_for_test'
+        self.dataset_file_name = self.dataset_name
+        self.prepared_dataset_name = 'pets_small'
+        self.token = os.getenv('TEST_UPLOAD_MS_TOKEN')
+        error_msg = 'The modelscope token can not be empty, please set env variable: TEST_UPLOAD_MS_TOKEN'
+        self.assertIsNotNone(self.token, msg=error_msg)
+        from modelscope.hub.api import HubApi
+        from modelscope.hub.api import ModelScopeConfig
+        self.api = HubApi()
+        self.api.login(self.token)
+
+        # get user info
+        self.namespace, _ = ModelScopeConfig.get_user_info()
+
+        self.temp_dir = tempfile.mkdtemp()
+        self.test_work_dir = os.path.join(self.temp_dir, self.dataset_name)
+        if not os.path.exists(self.test_work_dir):
+            os.makedirs(self.test_work_dir)
+
+    def tearDown(self):
+        os.chdir(self.old_dir)
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+        logger.info(
+            f'Temporary directory {self.temp_dir} successfully removed!')
+
+    @staticmethod
+    def get_raw_downloaded_file_path(extracted_path):
+        raw_downloaded_file_path = ''
+        raw_data_dir = os.path.abspath(os.path.join(extracted_path,
+                                                    '../../..'))
+        for root, dirs, files in os.walk(raw_data_dir):
+            if KEY_EXTRACTED in dirs:
+                for file in files:
+                    curr_file_path = os.path.join(root, file)
+                    if zipfile.is_zipfile(curr_file_path):
+                        raw_downloaded_file_path = curr_file_path
+        return raw_downloaded_file_path
+
+    def upload_test_file(self):
+        # Get the prepared data from hub, using default modelscope namespace
+        ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train')
+        config_res = ms_ds_train._hf_ds.config_kwargs
+        extracted_path = config_res.get('split_config').get('train')
+        raw_zipfile_path = self.get_raw_downloaded_file_path(extracted_path)
+
+        object_name = self.dataset_file_name + '_for_del.zip'
+        MsDataset.upload(object_name=object_name,
+                         local_file_path=raw_zipfile_path,
+                         dataset_name=self.dataset_name,
+                         namespace=self.namespace)
+
+        return object_name
+
+    def upload_test_dir(self):
+        ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train')
+        config_train = ms_ds_train._hf_ds.config_kwargs
+        extracted_path_train = config_train.get('split_config').get('train')
+
+        object_name = 'train_for_del'
+        MsDataset.upload(object_name=object_name,
+                         local_file_path=os.path.join(extracted_path_train,
+                                                      'Pets/images/train'),
+                         dataset_name=self.dataset_name,
+                         namespace=self.namespace)
+
+        return object_name + '/'
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_ds_delete_object(self):
+
+        # upload prepared data
+        file_name = self.upload_test_file()
+        dir_name = self.upload_test_dir()
+
+        # delete object
+        del_file_msg = MsDataset.delete(object_name=file_name,
+                                        dataset_name=self.dataset_name,
+                                        namespace=self.namespace)
+        del_dir_msg = MsDataset.delete(object_name=dir_name,
+                                       dataset_name=self.dataset_name,
+                                       namespace=self.namespace)
+
+        assert all([del_file_msg == EXPECTED_MSG, del_dir_msg == EXPECTED_MSG])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/msdatasets/test_dataset_upload.py b/tests/msdatasets/test_dataset_upload.py
new file mode 100644
index 0000000..0640486
--- /dev/null
+++ b/tests/msdatasets/test_dataset_upload.py
@@ -0,0 +1,130 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+import zipfile
+
+from modelscope.msdatasets import MsDataset
+from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects
+from modelscope.utils import logger as logging
+from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DownloadMode,
+                                       ModelFile)
+from modelscope.utils.test_utils import test_level
+
+logger = logging.get_logger()
+
+KEY_EXTRACTED = 'extracted'
+
+
+class DatasetUploadTest(unittest.TestCase):
+    def setUp(self):
+        self.old_dir = os.getcwd()
+        self.dataset_name = 'small_coco_for_test'
+        self.dataset_file_name = self.dataset_name
+        self.prepared_dataset_name = 'pets_small'
+        self.token = os.getenv('TEST_UPLOAD_MS_TOKEN')
+        error_msg = 'The modelscope token can not be empty, please set env variable: TEST_UPLOAD_MS_TOKEN'
+        self.assertIsNotNone(self.token, msg=error_msg)
+        from modelscope.hub.api import HubApi
+        from modelscope.hub.api import ModelScopeConfig
+        self.api = HubApi()
+        self.api.login(self.token)
+
+        # get user info
+        self.namespace, _ = ModelScopeConfig.get_user_info()
+
+        self.temp_dir = tempfile.mkdtemp()
+        self.test_work_dir = os.path.join(self.temp_dir, self.dataset_name)
+        self.test_meta_dir = os.path.join(self.test_work_dir, 'meta')
+        if not os.path.exists(self.test_work_dir):
+            os.makedirs(self.test_work_dir)
+
+    def tearDown(self):
+        os.chdir(self.old_dir)
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+        logger.info(
+            f'Temporary directory {self.temp_dir} successfully removed!')
+
+    @staticmethod
+    def get_raw_downloaded_file_path(extracted_path):
+        raw_downloaded_file_path = ''
+        raw_data_dir = os.path.abspath(os.path.join(extracted_path,
+                                                    '../../..'))
+        for root, dirs, files in os.walk(raw_data_dir):
+            if KEY_EXTRACTED in dirs:
+                for file in files:
+                    curr_file_path = os.path.join(root, file)
+                    if zipfile.is_zipfile(curr_file_path):
+                        raw_downloaded_file_path = curr_file_path
+        return raw_downloaded_file_path
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_ds_upload(self):
+        # Get the prepared data from hub, using default modelscope namespace
+        ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train')
+        config_res = ms_ds_train._hf_ds.config_kwargs
+        extracted_path = config_res.get('split_config').get('train')
+        raw_zipfile_path = self.get_raw_downloaded_file_path(extracted_path)
+
+        MsDataset.upload(object_name=self.dataset_file_name + '.zip',
+                         local_file_path=raw_zipfile_path,
+                         dataset_name=self.dataset_name,
+                         namespace=self.namespace)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_ds_upload_dir(self):
+        ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train')
+        config_train = ms_ds_train._hf_ds.config_kwargs
+        extracted_path_train = config_train.get('split_config').get('train')
+
+        MsDataset.upload(object_name='train',
+                         local_file_path=os.path.join(extracted_path_train,
+                                                      'Pets/images/train'),
+                         dataset_name=self.dataset_name,
+                         namespace=self.namespace)
+        MsDataset.upload(object_name='val',
+                         local_file_path=os.path.join(extracted_path_train,
+                                                      'Pets/images/val'),
+                         dataset_name=self.dataset_name,
+                         namespace=self.namespace)
+
+        objects = list_dataset_objects(hub_api=self.api,
+                                       max_limit=-1,
+                                       is_recursive=True,
+                                       dataset_name=self.dataset_name,
+                                       namespace=self.namespace,
+                                       version=DEFAULT_DATASET_REVISION)
+
+        logger.info(f'{len(objects)} objects have been uploaded: {objects}')
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_ds_download_dir(self):
+        test_ds = MsDataset.load(self.dataset_name,
+                                 namespace=self.namespace,
+                                 download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        assert test_ds.config_kwargs['split_config'].values()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_ds_clone_meta(self):
+        MsDataset.clone_meta(dataset_work_dir=self.test_meta_dir,
+                             dataset_id=os.path.join(self.namespace,
+                                                     self.dataset_name))
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_ds_upload_meta(self):
+        # Clone dataset meta repo first.
+        MsDataset.clone_meta(dataset_work_dir=self.test_meta_dir,
+                             dataset_id=os.path.join(self.namespace,
+                                                     self.dataset_name))
+
+        with open(os.path.join(self.test_meta_dir, ModelFile.README),
+                  'a') as f:
+            f.write('\nThis is a line for unit test.')
+
+        MsDataset.upload_meta(dataset_work_dir=self.test_meta_dir,
+                              commit_message='Update for unit test.')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/msdatasets/test_ms_dataset.py b/tests/msdatasets/test_ms_dataset.py
new file mode 100644
index 0000000..acf898a
--- /dev/null
+++ b/tests/msdatasets/test_ms_dataset.py
@@ -0,0 +1,214 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import unittest
+
+from modelscope.models import Model
+from modelscope.msdatasets import MsDataset
+from modelscope.msdatasets.audio.asr_dataset import ASRDataset
+from modelscope.preprocessors import TextClassificationTransformersPreprocessor
+from modelscope.preprocessors.base import Preprocessor
+from modelscope.utils.constant import DEFAULT_DATASET_NAMESPACE, DownloadMode
+from modelscope.utils.test_utils import require_tf, require_torch, test_level
+
+
+class ImgPreprocessor(Preprocessor):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.path_field = kwargs.pop('image_path', 'image_path')
+        self.width = kwargs.pop('width', 'width')
+        self.height = kwargs.pop('height', 'width')
+
+    def __call__(self, data):
+        import cv2
+        image_path = data.get(self.path_field)
+        if not image_path:
+            return None
+        img = cv2.imread(image_path)
+        return {
+            'image':
+            cv2.resize(img,
+                       (data.get(self.height, 128), data.get(self.width, 128)))
+        }
+
+
+class MsDatasetTest(unittest.TestCase):
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_movie_scene_seg_toydata(self):
+        ms_ds_train = MsDataset.load('movie_scene_seg_toydata', split='train')
+        print(ms_ds_train._hf_ds.config_kwargs)
+        assert next(iter(ms_ds_train.config_kwargs['split_config'].values()))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_coco(self):
+        ms_ds_train = MsDataset.load(
+            'pets_small',
+            namespace=DEFAULT_DATASET_NAMESPACE,
+            download_mode=DownloadMode.FORCE_REDOWNLOAD,
+            split='train')
+        print(ms_ds_train.config_kwargs)
+        assert next(iter(ms_ds_train.config_kwargs['split_config'].values()))
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_ms_csv_basic(self):
+        ms_ds_train = MsDataset.load('clue',
+                                     subset_name='afqmc',
+                                     split='train').to_hf_dataset().select(
+                                         range(5))
+        print(next(iter(ms_ds_train)))
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_ds_basic(self):
+        ms_ds_full = MsDataset.load('xcopa',
+                                    subset_name='translation-et',
+                                    namespace='damotest')
+        ms_ds = MsDataset.load('xcopa',
+                               subset_name='translation-et',
+                               namespace='damotest',
+                               split='test')
+        print(next(iter(ms_ds_full['test'])))
+        print(next(iter(ms_ds)))
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    @require_torch
+    def test_to_torch_dataset_text(self):
+        model_id = 'damo/nlp_structbert_sentence-similarity_chinese-tiny'
+        nlp_model = Model.from_pretrained(model_id)
+        preprocessor = TextClassificationTransformersPreprocessor(
+            nlp_model.model_dir,
+            first_sequence='premise',
+            second_sequence=None,
+            padding='max_length')
+        ms_ds_train = MsDataset.load('xcopa',
+                                     subset_name='translation-et',
+                                     namespace='damotest',
+                                     split='test')
+        pt_dataset = ms_ds_train.to_torch_dataset(preprocessors=preprocessor)
+        import torch
+        dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5)
+        print(next(iter(dataloader)))
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    @require_tf
+    def test_to_tf_dataset_text(self):
+        import tensorflow as tf
+        tf.compat.v1.enable_eager_execution()
+        model_id = 'damo/nlp_structbert_sentence-similarity_chinese-tiny'
+        nlp_model = Model.from_pretrained(model_id)
+        preprocessor = TextClassificationTransformersPreprocessor(
+            nlp_model.model_dir,
+            first_sequence='premise',
+            second_sequence=None)
+        ms_ds_train = MsDataset.load('xcopa',
+                                     subset_name='translation-et',
+                                     namespace='damotest',
+                                     split='test')
+        tf_dataset = ms_ds_train.to_tf_dataset(batch_size=5,
+                                               shuffle=True,
+                                               preprocessors=preprocessor,
+                                               drop_remainder=True)
+        print(next(iter(tf_dataset)))
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_to_dataset_asr(self):
+        ms_ds_asr = ASRDataset.load('speech_asr_aishell1_trainsets',
+                                    namespace='speech_asr')
+        print(next(iter(ms_ds_asr['train'])))
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    @require_torch
+    def test_to_torch_dataset_img(self):
+        ms_image_train = MsDataset.load('fixtures_image_utils',
+                                        namespace='damotest',
+                                        split='test')
+        pt_dataset = ms_image_train.to_torch_dataset(
+            preprocessors=ImgPreprocessor(image_path='file'))
+        import torch
+        dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5)
+        print(next(iter(dataloader)))
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    @require_tf
+    def test_to_tf_dataset_img(self):
+        import tensorflow as tf
+        tf.compat.v1.enable_eager_execution()
+        ms_image_train = MsDataset.load('fixtures_image_utils',
+                                        namespace='damotest',
+                                        split='test')
+        tf_dataset = ms_image_train.to_tf_dataset(
+            batch_size=5,
+            shuffle=True,
+            preprocessors=ImgPreprocessor(image_path='file'),
+            drop_remainder=True,
+        )
+        print(next(iter(tf_dataset)))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_streaming_load_coco(self):
+        small_coco_for_test = MsDataset.load(
+            dataset_name='EasyCV/small_coco_for_test',
+            split='train',
+            use_streaming=True,
+            download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        dataset_sample_dict = next(iter(small_coco_for_test))
+        print(dataset_sample_dict)
+        assert dataset_sample_dict.values()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_streaming_load_uni_fold(self):
+        """Test case for loading large scale datasets."""
+        dataset = MsDataset.load(dataset_name='Uni-Fold-Data',
+                                 split='train',
+                                 use_streaming=True,
+                                 namespace='DPTech')
+        data_example = next(iter(dataset))
+        print(data_example)
+        assert data_example.values()
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_streaming_load_afqmc(self):
+        """To streaming-load afqmc dataset, which contains train/dev/validation data in meta-files."""
+        dataset = MsDataset.load('afqmc', split='test', use_streaming=True)
+        data_example = next(iter(dataset))
+        print(data_example)
+        assert data_example.values()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_streaming_load_from_hf(self):
+        """Use stream mode to load dataset from huggingface hub."""
+        from modelscope.utils.constant import Hubs
+        ds_train = MsDataset.load('glue',
+                                  subset_name='sst2',
+                                  split='train',
+                                  hub=Hubs.huggingface,
+                                  use_streaming=True)
+        data_example = next(iter(ds_train))
+        print(data_example)
+        assert data_example.values()
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_streaming_load_img_object(self):
+        """Test case for iterating PIL object."""
+        from PIL.PngImagePlugin import PngImageFile
+        dataset = MsDataset.load(dataset_name='SIDD',
+                                 subset_name='default',
+                                 namespace='huizheng',
+                                 split='train',
+                                 use_streaming=True)
+        data_example = next(iter(dataset))
+        print(data_example)
+        assert isinstance(data_example['Noisy Image:FILE:Object'],
+                          PngImageFile)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_to_ms_dataset(self):
+        """Test case for converting huggingface dataset to `MsDataset` instance."""
+        from datasets.load import load_dataset
+        hf_dataset = load_dataset('beans', split='train', streaming=True)
+        ms_dataset = MsDataset.to_ms_dataset(hf_dataset)
+        data_example = next(iter(ms_dataset))
+        print(data_example)
+        assert data_example.values()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_arc_face_recognition.py b/tests/pipelines/test_arc_face_recognition.py
new file mode 100644
index 0000000..ba1708c
--- /dev/null
+++ b/tests/pipelines/test_arc_face_recognition.py
@@ -0,0 +1,36 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+import numpy as np
+
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+
+
+class FaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+    def setUp(self) -> None:
+        self.task = Tasks.face_recognition
+        self.model_id = 'damo/cv_ir50_face-recognition_arcface'
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_face_compare(self):
+        img1 = 'data/test/images/face_recognition_1.png'
+        img2 = 'data/test/images/face_recognition_2.png'
+
+        face_recognition = pipeline(Tasks.face_recognition,
+                                    model=self.model_id)
+        emb1 = face_recognition(img1)[OutputKeys.IMG_EMBEDDING]
+        emb2 = face_recognition(img2)[OutputKeys.IMG_EMBEDDING]
+        sim = np.dot(emb1[0], emb2[0])
+        print(f'Cos similarity={sim:.3f}, img1:{img1}  img2:{img2}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_demo_compatibility(self):
+        self.compatibility_check()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_damofd_face_detection.py b/tests/pipelines/test_damofd_face_detection.py
new file mode 100644
index 0000000..d3b406d
--- /dev/null
+++ b/tests/pipelines/test_damofd_face_detection.py
@@ -0,0 +1,184 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+import os
+import numpy as np
+
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_result
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+from modelscope.utils.constant import DownloadMode
+
+
+class TinyMogFaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+
+    def setUp(self) -> None:
+        self.task = Tasks.face_detection
+        self.model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd'
+        self.img_path = 'data/test/images/mog_face_detection.jpg'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    def voc_ap(self, rec, prec):
+
+        # correct AP calculation
+        # first append sentinel values at the end
+        mrec = np.concatenate(([0.], rec, [1.]))
+        mpre = np.concatenate(([0.], prec, [0.]))
+
+        # compute the precision envelope
+        for i in range(mpre.size - 1, 0, -1):
+            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+        # to calculate area under PR curve, look for points
+        # where X axis (recall) changes value
+        i = np.where(mrec[1:] != mrec[:-1])[0]
+
+        # and sum (\Delta recall) * prec
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+        return ap
+
+    def image_eval(self, pred, gt, iou_thresh):
+        """ single image evaluation
+        pred: Nx5
+        gt: Nx4
+        ignore:
+        """
+        _pred = pred.copy()
+        _gt = gt.copy()
+        pred_recall = np.zeros(_pred.shape[0])
+        recall_list = np.zeros(_gt.shape[0])
+        proposal_list = np.ones(_pred.shape[0])
+
+        #_pred[:, 2] = _pred[:, 2] + _pred[:, 0]
+        #_pred[:, 3] = _pred[:, 3] + _pred[:, 1]
+        _gt[:, 2] = _gt[:, 2] + _gt[:, 0]
+        _gt[:, 3] = _gt[:, 3] + _gt[:, 1]
+
+
+        for h in range(_pred.shape[0]):
+            gt_overlap = self.bbox_overlap(_gt, _pred[h])
+            #gt_overlap = gt_overlap_list[h]
+            max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax()
+
+            if max_overlap >= iou_thresh:
+                if recall_list[max_idx] == 0:
+                    recall_list[max_idx] = 1
+
+            r_keep_index = np.where(recall_list == 1)[0]
+            pred_recall[h] = len(r_keep_index)
+
+        return pred_recall, proposal_list
+
+
+    def img_pr_info(self, thresh_num, pred_info, proposal_list, pred_recall):
+        pr_info = np.zeros((thresh_num, 2)).astype('float')
+        fp = np.zeros((pred_info.shape[0],), dtype=np.int32)
+        last_info = [-1, -1]
+        for t in range(thresh_num):
+
+            thresh = 1 - (t+1)/thresh_num
+            r_index = np.where(pred_info[:, 4] >= thresh)[0]
+            if len(r_index) == 0:
+                pr_info[t, 0] = 0
+                pr_info[t, 1] = 0
+            else:
+                r_index = r_index[-1]
+                p_index = np.where(proposal_list[:r_index+1] == 1)[0]
+                pr_info[t, 0] = len(p_index) #valid pred number
+                pr_info[t, 1] = pred_recall[r_index] # valid gt number
+
+                if t>0 and pr_info[t, 0] > pr_info[t-1,0] and pr_info[t, 1]==pr_info[t-1,1]:
+                    fp[r_index] = 1
+        return pr_info, fp
+
+    def gen_gt_info(self, img_gt):
+        gt_info = {}
+        fo = open(img_gt)
+        for line in fo:
+            if 'jpg'  in line:
+                img_name = line.strip()
+                gt_info[img_name] = []
+                continue
+            gt_info[img_name].append([float(item) for item in line.strip().split(' ')[:4]])
+        return gt_info
+
+    def dataset_pr_info(self, thresh_num, pr_curve, count_face):
+        _pr_curve = np.zeros((thresh_num, 2))
+        for i in range(thresh_num):
+            _pr_curve[i, 0] = pr_curve[i, 1] / pr_curve[i, 0]
+            _pr_curve[i, 1] = pr_curve[i, 1] / count_face
+        return _pr_curve
+
+    def bbox_overlap(self, a, b):
+        x1 = np.maximum(a[:,0], b[0])
+        y1 = np.maximum(a[:,1], b[1])
+        x2 = np.minimum(a[:,2], b[2])
+        y2 = np.minimum(a[:,3], b[3])
+        w = x2-x1+1
+        h = y2-y1+1
+        inter = w*h
+        aarea = (a[:,2]-a[:,0]+1) * (a[:,3]-a[:,1]+1)
+        barea = (b[2]-b[0]+1) * (b[3]-b[1]+1)
+        o = inter / (aarea+barea-inter)
+        o[w<=0] = 0
+        o[h<=0] = 0
+        return o
+
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_run_with_dataset(self):
+        val_set = MsDataset.load('widerface_mini_train_val', namespace='ly261666', split='validation')#, download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        img_base_path = next(iter(val_set))[1]
+        img_dir = osp.join(img_base_path, 'val_data')
+        img_gt = osp.join(img_base_path, 'val_label.txt')
+        gt_info = self.gen_gt_info(img_gt)
+        pred_info = {}
+        iou_th = 0.5
+        thresh_num = 1000
+        face_detection_func = pipeline(Tasks.face_detection, model=self.model_id)
+        count_face = 0
+        pr_curve = np.zeros((thresh_num, 2)).astype('float')
+        for img_name in os.listdir(img_dir):
+            abs_img_name = osp.join(img_dir, img_name)
+            result = face_detection_func(abs_img_name)
+            pred_info = np.concatenate([result['boxes'], np.array(result['scores'])[:,np.newaxis]], axis=1)
+            gt_box = np.array(gt_info[img_name])
+            pred_recall, proposal_list = self.image_eval(pred_info, gt_box, iou_th)
+            _img_pr_info, fp = self.img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
+            pr_curve += _img_pr_info
+            count_face += gt_box.shape[0]
+			
+        pr_curve = self.dataset_pr_info(thresh_num, pr_curve, count_face)
+        propose = pr_curve[:, 0]
+        recall = pr_curve[:, 1]
+        for srecall in np.arange(0.1, 1.0001, 0.1):
+            rindex = len(np.where(recall<=srecall)[0])-1
+            rthresh = 1.0 - float(rindex)/thresh_num
+            print('Recall-Precision-Thresh:', recall[rindex], propose[rindex], rthresh)
+        ap = self.voc_ap(recall, propose)
+        print('ap: %.5f, iou_th: %.2f'%(ap, iou_th))
+        self.show_result(abs_img_name, result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_detection, model=self.model_id)
+
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_demo_compatibility(self):
+        self.compatibility_check()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_face_attribute_recognition.py b/tests/pipelines/test_face_attribute_recognition.py
new file mode 100644
index 0000000..6d80024
--- /dev/null
+++ b/tests/pipelines/test_face_attribute_recognition.py
@@ -0,0 +1,35 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+import numpy as np
+
+from modelscope.msdatasets import MsDataset
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_attribute_result
+from modelscope.utils.test_utils import test_level
+
+
+class FaceAttributeRecognitionTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'damo/cv_resnet34_face-attribute-recognition_fairface'
+
+    def show_result(self, img_path, facial_expression_result):
+        img = draw_face_attribute_result(img_path, facial_expression_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        fair_face = pipeline(Tasks.face_attribute_recognition,
+                             model=self.model_id)
+        img_path = 'data/test/images/face_recognition_1.png'
+        result = fair_face(img_path)
+        self.show_result(img_path, result)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_face_detection.py b/tests/pipelines/test_face_detection.py
new file mode 100644
index 0000000..230bb46
--- /dev/null
+++ b/tests/pipelines/test_face_detection.py
@@ -0,0 +1,50 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_result
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+
+
+class FaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+    def setUp(self) -> None:
+        self.task = Tasks.face_detection
+        self.model_id = 'damo/cv_resnet_facedetection_scrfd10gkps'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_run_with_dataset(self):
+        input_location = ['data/test/images/face_detection2.jpeg']
+
+        dataset = MsDataset.load(input_location, target='image')
+        face_detection = pipeline(Tasks.face_detection, model=self.model_id)
+        # note that for dataset output, the inference-output is a Generator that can be iterated.
+        result = face_detection(dataset)
+        result = next(result)
+        self.show_result(input_location[0], result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_detection, model=self.model_id)
+        img_path = 'data/test/images/face_detection2.jpeg'
+
+        result = face_detection(img_path)
+        self.show_result(img_path, result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_demo_compatibility(self):
+        self.compatibility_check()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_face_liveness_ir.py b/tests/pipelines/test_face_liveness_ir.py
new file mode 100644
index 0000000..214a0df
--- /dev/null
+++ b/tests/pipelines/test_face_liveness_ir.py
@@ -0,0 +1,37 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_no_lm_result
+from modelscope.utils.test_utils import test_level
+
+
+class FaceLivenessIrTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'damo/cv_manual_face-liveness_flir'
+        self.img_path = 'data/test/images/face_liveness_ir.jpg'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_no_lm_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_liveness, model=self.model_id)
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_default_model(self):
+        face_detection = pipeline(Tasks.face_liveness)
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_face_liveness_rgb.py b/tests/pipelines/test_face_liveness_rgb.py
new file mode 100644
index 0000000..31b5fd2
--- /dev/null
+++ b/tests/pipelines/test_face_liveness_rgb.py
@@ -0,0 +1,37 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_no_lm_result
+from modelscope.utils.test_utils import test_level
+
+
+class FaceLivenessRgbTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'damo/cv_manual_face-liveness_flrgb'
+        self.img_path = 'data/test/images/face_liveness_rgb.png'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_no_lm_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_liveness, model=self.model_id)
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_run_default_model(self):
+        face_detection = pipeline(Tasks.face_liveness)
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_face_liveness_xc.py b/tests/pipelines/test_face_liveness_xc.py
new file mode 100644
index 0000000..29c605c
--- /dev/null
+++ b/tests/pipelines/test_face_liveness_xc.py
@@ -0,0 +1,37 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_no_lm_result
+from modelscope.utils.test_utils import test_level
+
+
+class FaceLivenessXcTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'damo/cv_manual_face-liveness_flxc'
+        self.img_path = 'data/test/images/face_liveness_rgb.png'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_no_lm_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_liveness, model=self.model_id)
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_default_model(self):
+        face_detection = pipeline(Tasks.face_liveness)
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_face_quality_assessment.py b/tests/pipelines/test_face_quality_assessment.py
new file mode 100644
index 0000000..248cb3a
--- /dev/null
+++ b/tests/pipelines/test_face_quality_assessment.py
@@ -0,0 +1,38 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_no_lm_result
+from modelscope.utils.test_utils import test_level
+
+
+class FaceQualityAssessmentTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'damo/cv_manual_face-quality-assessment_fqa'
+        self.img_path = 'data/test/images/face_recognition_1.png'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_no_lm_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_quality_assessment,
+                                  model=self.model_id)
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_default_model(self):
+        face_detection = pipeline(Tasks.face_quality_assessment)
+        result = face_detection(self.img_path)
+        self.show_result(self.img_path, result)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_face_recognition.py b/tests/pipelines/test_face_recognition.py
new file mode 100644
index 0000000..3fafbcc
--- /dev/null
+++ b/tests/pipelines/test_face_recognition.py
@@ -0,0 +1,36 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+import numpy as np
+
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+
+
+class FaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+    def setUp(self) -> None:
+        self.task = Tasks.face_recognition
+        self.model_id = 'damo/cv_ir101_facerecognition_cfglint'
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_face_compare(self):
+        img1 = 'data/test/images/face_recognition_1.png'
+        img2 = 'data/test/images/face_recognition_2.png'
+
+        face_recognition = pipeline(Tasks.face_recognition,
+                                    model=self.model_id)
+        emb1 = face_recognition(img1)[OutputKeys.IMG_EMBEDDING]
+        emb2 = face_recognition(img2)[OutputKeys.IMG_EMBEDDING]
+        sim = np.dot(emb1[0], emb2[0])
+        print(f'Cos similarity={sim:.3f}, img1:{img1}  img2:{img2}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_demo_compatibility(self):
+        self.compatibility_check()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_face_recognition_onnx_fm.py b/tests/pipelines/test_face_recognition_onnx_fm.py
new file mode 100644
index 0000000..5eedb41
--- /dev/null
+++ b/tests/pipelines/test_face_recognition_onnx_fm.py
@@ -0,0 +1,36 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+import numpy as np
+
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+
+
+class FmFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+    def setUp(self) -> None:
+        self.task = Tasks.face_recognition
+        self.model_id = 'damo/cv_manual_face-recognition_frfm'
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_face_compare(self):
+        img1 = 'data/test/images/face_recognition_1.png'
+        img2 = 'data/test/images/face_recognition_2.png'
+
+        face_recognition = pipeline(Tasks.face_recognition,
+                                    model=self.model_id)
+        emb1 = face_recognition(img1)[OutputKeys.IMG_EMBEDDING]
+        emb2 = face_recognition(img2)[OutputKeys.IMG_EMBEDDING]
+        sim = np.dot(emb1[0], emb2[0])
+        print(f'Cos similarity={sim:.3f}, img1:{img1}  img2:{img2}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_demo_compatibility(self):
+        self.compatibility_check()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_face_recognition_onnx_ir.py b/tests/pipelines/test_face_recognition_onnx_ir.py
new file mode 100644
index 0000000..9961241
--- /dev/null
+++ b/tests/pipelines/test_face_recognition_onnx_ir.py
@@ -0,0 +1,36 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+import numpy as np
+
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+
+
+class IrFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+    def setUp(self) -> None:
+        self.task = Tasks.face_recognition
+        self.model_id = 'damo/cv_manual_face-recognition_frir'
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_face_compare(self):
+        img1 = 'data/test/images/ir_face_recognition_1.png'
+        img2 = 'data/test/images/ir_face_recognition_2.png'
+
+        face_recognition = pipeline(Tasks.face_recognition,
+                                    model=self.model_id)
+        emb1 = face_recognition(img1)[OutputKeys.IMG_EMBEDDING]
+        emb2 = face_recognition(img2)[OutputKeys.IMG_EMBEDDING]
+        sim = np.dot(emb1[0], emb2[0])
+        print(f'Cos similarity={sim:.3f}, img1:{img1}  img2:{img2}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_demo_compatibility(self):
+        self.compatibility_check()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_face_recognition_ood.py b/tests/pipelines/test_face_recognition_ood.py
new file mode 100644
index 0000000..42ee521
--- /dev/null
+++ b/tests/pipelines/test_face_recognition_ood.py
@@ -0,0 +1,42 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+import numpy as np
+
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+
+
+class FaceRecognitionOodTest(unittest.TestCase, DemoCompatibilityCheck):
+    def setUp(self) -> None:
+        self.task = Tasks.face_recognition
+        self.model_id = 'damo/cv_ir_face-recognition-ood_rts'
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_face_compare(self):
+        img1 = 'data/test/images/face_recognition_1.png'
+        img2 = 'data/test/images/face_recognition_2.png'
+
+        face_recognition = pipeline(self.task, model=self.model_id)
+        result1 = face_recognition(img1)
+        emb1 = result1[OutputKeys.IMG_EMBEDDING]
+        score1 = result1[OutputKeys.SCORES][0][0]
+
+        result2 = face_recognition(img2)
+        emb2 = result2[OutputKeys.IMG_EMBEDDING]
+        score2 = result2[OutputKeys.SCORES][0][0]
+
+        sim = np.dot(emb1[0], emb2[0])
+        print(f'Cos similarity={sim:.3f}, img1:{img1}  img2:{img2}')
+        print(f'OOD score: img1:{score1:.3f}  img2:{score2:.3f}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_demo_compatibility(self):
+        self.compatibility_check()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_facial_expression_recognition.py b/tests/pipelines/test_facial_expression_recognition.py
new file mode 100644
index 0000000..178c234
--- /dev/null
+++ b/tests/pipelines/test_facial_expression_recognition.py
@@ -0,0 +1,35 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+import numpy as np
+
+from modelscope.msdatasets import MsDataset
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_facial_expression_result
+from modelscope.utils.test_utils import test_level
+
+
+class FacialExpressionRecognitionTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'damo/cv_vgg19_facial-expression-recognition_fer'
+
+    def show_result(self, img_path, facial_expression_result):
+        img = draw_facial_expression_result(img_path, facial_expression_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skip('skip since the model is set to private for now')
+    def test_run_modelhub(self):
+        fer = pipeline(Tasks.facial_expression_recognition,
+                       model=self.model_id)
+        img_path = 'data/test/images/facial_expression_recognition.jpg'
+        result = fer(img_path)
+        self.show_result(img_path, result)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_facial_landmark_confidence.py b/tests/pipelines/test_facial_landmark_confidence.py
new file mode 100644
index 0000000..db93bff
--- /dev/null
+++ b/tests/pipelines/test_facial_landmark_confidence.py
@@ -0,0 +1,34 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+import numpy as np
+
+from modelscope.msdatasets import MsDataset
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_result
+from modelscope.utils.test_utils import test_level
+
+
+class FacialLandmarkConfidenceTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'damo/cv_manual_facial-landmark-confidence_flcm'
+
+    def show_result(self, img_path, facial_expression_result):
+        img = draw_face_detection_result(img_path, facial_expression_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        flcm = pipeline(Tasks.face_2d_keypoints, model=self.model_id)
+        img_path = 'data/test/images/face_recognition_1.png'
+        result = flcm(img_path)
+        self.show_result(img_path, result)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_mask_face_recognition.py b/tests/pipelines/test_mask_face_recognition.py
new file mode 100644
index 0000000..07f0ae3
--- /dev/null
+++ b/tests/pipelines/test_mask_face_recognition.py
@@ -0,0 +1,36 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+import numpy as np
+
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+
+
+class MaskFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+    def setUp(self) -> None:
+        self.task = Tasks.face_recognition
+        self.model_id = 'damo/cv_resnet_face-recognition_facemask'
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_face_compare(self):
+        img1 = 'data/test/images/mask_face_recognition_1.jpg'
+        img2 = 'data/test/images/mask_face_recognition_2.jpg'
+
+        face_recognition = pipeline(Tasks.face_recognition,
+                                    model=self.model_id)
+        emb1 = face_recognition(img1)[OutputKeys.IMG_EMBEDDING]
+        emb2 = face_recognition(img2)[OutputKeys.IMG_EMBEDDING]
+        sim = np.dot(emb1[0], emb2[0])
+        print(f'Cos similarity={sim:.3f}, img1:{img1}  img2:{img2}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_demo_compatibility(self):
+        self.compatibility_check()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_mog_face_detection.py b/tests/pipelines/test_mog_face_detection.py
new file mode 100644
index 0000000..ecbc5c4
--- /dev/null
+++ b/tests/pipelines/test_mog_face_detection.py
@@ -0,0 +1,40 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_no_lm_result
+from modelscope.utils.test_utils import test_level
+
+
+class MogFaceDetectionTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'damo/cv_resnet101_face-detection_cvpr22papermogface'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_no_lm_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_detection, model=self.model_id)
+        img_path = 'data/test/images/mog_face_detection.jpg'
+
+        result = face_detection(img_path)
+        self.show_result(img_path, result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_default_model(self):
+        face_detection = pipeline(Tasks.face_detection)
+        img_path = 'data/test/images/mog_face_detection.jpg'
+
+        result = face_detection(img_path)
+        self.show_result(img_path, result)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_mtcnn_face_detection.py b/tests/pipelines/test_mtcnn_face_detection.py
new file mode 100644
index 0000000..4b15f2b
--- /dev/null
+++ b/tests/pipelines/test_mtcnn_face_detection.py
@@ -0,0 +1,37 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+from PIL import Image
+
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_result
+from modelscope.utils.test_utils import test_level
+
+
+class MtcnnFaceDetectionTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'damo/cv_manual_face-detection_mtcnn'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_detection, model=self.model_id)
+        img_path = 'data/test/images/mtcnn_face_detection.jpg'
+        img = Image.open(img_path)
+
+        result_1 = face_detection(img_path)
+        self.show_result(img_path, result_1)
+
+        result_2 = face_detection(img)
+        self.show_result(img_path, result_2)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_retina_face_detection.py b/tests/pipelines/test_retina_face_detection.py
new file mode 100644
index 0000000..b7eae74
--- /dev/null
+++ b/tests/pipelines/test_retina_face_detection.py
@@ -0,0 +1,32 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_result
+from modelscope.utils.test_utils import test_level
+
+
+class RetinaFaceDetectionTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'damo/cv_resnet50_face-detection_retinaface'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_detection, model=self.model_id)
+        img_path = 'data/test/images/retina_face_detection.jpg'
+
+        result = face_detection(img_path)
+        self.show_result(img_path, result)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_tinymog_face_detection.py b/tests/pipelines/test_tinymog_face_detection.py
new file mode 100644
index 0000000..977267d
--- /dev/null
+++ b/tests/pipelines/test_tinymog_face_detection.py
@@ -0,0 +1,200 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import os.path as osp
+import unittest
+
+import cv2
+import numpy as np
+
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import DownloadMode, Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_result
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+
+
+class TinyMogFaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+    def setUp(self) -> None:
+        self.task = Tasks.face_detection
+        self.model_id = 'damo/cv_manual_face-detection_tinymog'
+        self.img_path = 'data/test/images/mog_face_detection.jpg'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    def voc_ap(self, rec, prec):
+
+        # correct AP calculation
+        # first append sentinel values at the end
+        mrec = np.concatenate(([0.], rec, [1.]))
+        mpre = np.concatenate(([0.], prec, [0.]))
+
+        # compute the precision envelope
+        for i in range(mpre.size - 1, 0, -1):
+            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+        # to calculate area under PR curve, look for points
+        # where X axis (recall) changes value
+        i = np.where(mrec[1:] != mrec[:-1])[0]
+
+        # and sum (\Delta recall) * prec
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+        return ap
+
+    def image_eval(self, pred, gt, iou_thresh):
+        """ single image evaluation
+        pred: Nx5
+        gt: Nx4
+        ignore:
+        """
+        _pred = pred.copy()
+        _gt = gt.copy()
+        pred_recall = np.zeros(_pred.shape[0])
+        recall_list = np.zeros(_gt.shape[0])
+        proposal_list = np.ones(_pred.shape[0])
+
+        #_pred[:, 2] = _pred[:, 2] + _pred[:, 0]
+        #_pred[:, 3] = _pred[:, 3] + _pred[:, 1]
+        _gt[:, 2] = _gt[:, 2] + _gt[:, 0]
+        _gt[:, 3] = _gt[:, 3] + _gt[:, 1]
+
+        for h in range(_pred.shape[0]):
+            gt_overlap = self.bbox_overlap(_gt, _pred[h])
+            #gt_overlap = gt_overlap_list[h]
+            max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax()
+
+            if max_overlap >= iou_thresh:
+                if recall_list[max_idx] == 0:
+                    recall_list[max_idx] = 1
+
+            r_keep_index = np.where(recall_list == 1)[0]
+            pred_recall[h] = len(r_keep_index)
+
+        return pred_recall, proposal_list
+
+    def img_pr_info(self, thresh_num, pred_info, proposal_list, pred_recall):
+        pr_info = np.zeros((thresh_num, 2)).astype('float')
+        fp = np.zeros((pred_info.shape[0], ), dtype=np.int32)
+        last_info = [-1, -1]
+        for t in range(thresh_num):
+
+            thresh = 1 - (t + 1) / thresh_num
+            r_index = np.where(pred_info[:, 4] >= thresh)[0]
+            if len(r_index) == 0:
+                pr_info[t, 0] = 0
+                pr_info[t, 1] = 0
+            else:
+                r_index = r_index[-1]
+                p_index = np.where(proposal_list[:r_index + 1] == 1)[0]
+                pr_info[t, 0] = len(p_index)  #valid pred number
+                pr_info[t, 1] = pred_recall[r_index]  # valid gt number
+
+                if t > 0 and pr_info[t, 0] > pr_info[t - 1, 0] and pr_info[
+                        t, 1] == pr_info[t - 1, 1]:
+                    fp[r_index] = 1
+        return pr_info, fp
+
+    def gen_gt_info(self, img_gt):
+        gt_info = {}
+        fo = open(img_gt)
+        for line in fo:
+            if 'jpg' in line:
+                img_name = line.strip()
+                gt_info[img_name] = []
+                continue
+            gt_info[img_name].append(
+                [float(item) for item in line.strip().split(' ')[:4]])
+        return gt_info
+
+    def dataset_pr_info(self, thresh_num, pr_curve, count_face):
+        _pr_curve = np.zeros((thresh_num, 2))
+        for i in range(thresh_num):
+            _pr_curve[i, 0] = pr_curve[i, 1] / pr_curve[i, 0]
+            _pr_curve[i, 1] = pr_curve[i, 1] / count_face
+        return _pr_curve
+
+    def bbox_overlap(self, a, b):
+        x1 = np.maximum(a[:, 0], b[0])
+        y1 = np.maximum(a[:, 1], b[1])
+        x2 = np.minimum(a[:, 2], b[2])
+        y2 = np.minimum(a[:, 3], b[3])
+        w = x2 - x1 + 1
+        h = y2 - y1 + 1
+        inter = w * h
+        aarea = (a[:, 2] - a[:, 0] + 1) * (a[:, 3] - a[:, 1] + 1)
+        barea = (b[2] - b[0] + 1) * (b[3] - b[1] + 1)
+        o = inter / (aarea + barea - inter)
+        o[w <= 0] = 0
+        o[h <= 0] = 0
+        return o
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_run_with_dataset(self):
+        input_location = [
+            'data/test/images/mog_face_detection.jpg',
+            'data/test/images/mog_face_detection.jpg'
+        ]
+
+        dataset = MsDataset.load(input_location, target='image')
+        val_set = MsDataset.load(
+            'widerface_mini_train_val',
+            namespace='ly261666',
+            split='validation'
+        )  #, download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        img_base_path = next(iter(val_set))[1]
+        img_dir = osp.join(img_base_path, 'val_data')
+        img_gt = osp.join(img_base_path, 'val_label.txt')
+        gt_info = self.gen_gt_info(img_gt)
+        pred_info = {}
+        iou_th = 0.5
+        thresh_num = 1000
+        face_detection_func = pipeline(Tasks.face_detection,
+                                       model=self.model_id)
+        count_face = 0
+        pr_curve = np.zeros((thresh_num, 2)).astype('float')
+        for img_name in os.listdir(img_dir):
+            abs_img_name = osp.join(img_dir, img_name)
+            result = face_detection_func(abs_img_name)
+            pred_info = np.concatenate(
+                [result['boxes'],
+                 np.array(result['scores'])[:, np.newaxis]],
+                axis=1)
+            gt_box = np.array(gt_info[img_name])
+            pred_recall, proposal_list = self.image_eval(
+                pred_info, gt_box, iou_th)
+            _img_pr_info, fp = self.img_pr_info(thresh_num, pred_info,
+                                                proposal_list, pred_recall)
+            pr_curve += _img_pr_info
+            count_face += gt_box.shape[0]
+
+        pr_curve = self.dataset_pr_info(thresh_num, pr_curve, count_face)
+        propose = pr_curve[:, 0]
+        recall = pr_curve[:, 1]
+        for srecall in np.arange(0.1, 1.0001, 0.1):
+            rindex = len(np.where(recall <= srecall)[0]) - 1
+            rthresh = 1.0 - float(rindex) / thresh_num
+            print('Recall-Precision-Thresh:', recall[rindex], propose[rindex],
+                  rthresh)
+        ap = self.voc_ap(recall, propose)
+        print('ap: %.5f, iou_th: %.2f' % (ap, iou_th))
+        self.show_result(abs_img_name, result)
+        import pdb
+        pdb.set_trace()
+
+    #@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    #def test_run_modelhub(self):
+    #    face_detection = pipeline(Tasks.face_detection, model=self.model_id)
+
+    #    result = face_detection(self.img_path)
+    #    self.show_result(self.img_path, result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_demo_compatibility(self):
+        self.compatibility_check()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_ulfd_face_detection.py b/tests/pipelines/test_ulfd_face_detection.py
new file mode 100644
index 0000000..45ed440
--- /dev/null
+++ b/tests/pipelines/test_ulfd_face_detection.py
@@ -0,0 +1,35 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+import cv2
+import numpy as np
+
+from modelscope.msdatasets import MsDataset
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import draw_face_detection_no_lm_result
+from modelscope.utils.test_utils import test_level
+
+
+class UlfdFaceDetectionTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'damo/cv_manual_face-detection_ulfd'
+
+    def show_result(self, img_path, detection_result):
+        img = draw_face_detection_no_lm_result(img_path, detection_result)
+        cv2.imwrite('result.png', img)
+        print(f'output written to {osp.abspath("result.png")}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_modelhub(self):
+        face_detection = pipeline(Tasks.face_detection, model=self.model_id)
+        img_path = 'data/test/images/ulfd_face_detection.jpg'
+
+        result = face_detection(img_path)
+        self.show_result(img_path, result)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/__init__.py b/tests/trainers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/trainers/audio/__init__.py b/tests/trainers/audio/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/trainers/audio/test_ans_trainer.py b/tests/trainers/audio/test_ans_trainer.py
new file mode 100644
index 0000000..b7bc29c
--- /dev/null
+++ b/tests/trainers/audio/test_ans_trainer.py
@@ -0,0 +1,65 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import shutil
+import tempfile
+import unittest
+from functools import partial
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.audio.audio_utils import to_segment
+from modelscope.utils.constant import DownloadMode
+from modelscope.utils.hub import read_config
+from modelscope.utils.test_utils import test_level
+
+SEGMENT_LENGTH_TEST = 640
+
+
+class TestANSTrainer(unittest.TestCase):
+    def setUp(self):
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+        self.model_id = 'damo/speech_frcrn_ans_cirm_16k'
+        cfg = read_config(self.model_id)
+        cfg.train.max_epochs = 2
+        cfg.train.dataloader.batch_size_per_gpu = 1
+        self.cfg_file = os.path.join(self.tmp_dir, 'train_config.json')
+        cfg.dump(self.cfg_file)
+
+        hf_ds = MsDataset.load(
+            'ICASSP_2021_DNS_Challenge',
+            split='test',
+            download_mode=DownloadMode.FORCE_REDOWNLOAD).to_hf_dataset()
+        mapped_ds = hf_ds.map(partial(to_segment,
+                                      segment_length=SEGMENT_LENGTH_TEST),
+                              remove_columns=['duration'],
+                              batched=True,
+                              batch_size=2)
+        self.dataset = MsDataset.from_hf_dataset(mapped_ds)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer(self):
+        kwargs = dict(model=self.model_id,
+                      train_dataset=self.dataset,
+                      eval_dataset=self.dataset,
+                      max_epochs=2,
+                      train_iters_per_epoch=2,
+                      val_iters_per_epoch=1,
+                      cfg_file=self.cfg_file,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(Trainers.speech_frcrn_ans_cirm_16k,
+                                default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(2):
+            self.assertIn(f'epoch_{i + 1}.pth', results_files)
diff --git a/tests/trainers/audio/test_asr_trainer.py b/tests/trainers/audio/test_asr_trainer.py
new file mode 100644
index 0000000..dcd3977
--- /dev/null
+++ b/tests/trainers/audio/test_asr_trainer.py
@@ -0,0 +1,48 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.audio.audio_utils import TtsTrainType
+from modelscope.utils.constant import DownloadMode, Fields, Tasks
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+logger = get_logger()
+
+
+class TestASRTrainer(unittest.TestCase):
+    def setUp(self):
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+        self.model_id = 'damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
+        self.dataset_id = 'speech_asr_aishell1_trainsets'
+        self.dataset_namespace = 'speech_asr'
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer(self):
+        ds_dict = MsDataset.load(self.dataset_id,
+                                 namespace=self.dataset_namespace)
+        kwargs = dict(model=self.model_id,
+                      work_dir=self.tmp_dir,
+                      data_dir=ds_dict)
+        trainer = build_trainer(Trainers.speech_asr_trainer,
+                                default_args=kwargs)
+        trainer.train()
+        result_model = os.path.join(self.tmp_dir, 'valid.acc.best.pth')
+        assert os.path.exists(result_model)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/audio/test_kws_farfield_trainer.py b/tests/trainers/audio/test_kws_farfield_trainer.py
new file mode 100644
index 0000000..efc148a
--- /dev/null
+++ b/tests/trainers/audio/test_kws_farfield_trainer.py
@@ -0,0 +1,78 @@
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.metainfo import Trainers
+from modelscope.trainers import build_trainer
+from modelscope.utils.test_utils import test_level
+
+POS_FILE = 'data/test/audios/wake_word_with_label_xyxy.wav'
+NEG_FILE = 'data/test/audios/speech_with_noise.wav'
+NOISE_FILE = 'data/test/audios/speech_with_noise.wav'
+INTERF_FILE = 'data/test/audios/speech_with_noise.wav'
+REF_FILE = 'data/test/audios/farend_speech.wav'
+NOISE_2CH_FILE = 'data/test/audios/noise_2ch.wav'
+
+
+class TestKwsFarfieldTrainer(unittest.TestCase):
+    def setUp(self):
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        print(f'tmp dir: {self.tmp_dir}')
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+        self.model_id = 'damo/speech_dfsmn_kws_char_farfield_16k_nihaomiya'
+
+        train_pos_list = self.create_list('pos.list', POS_FILE)
+        train_neg_list = self.create_list('neg.list', NEG_FILE)
+        train_noise1_list = self.create_list('noise.list', NOISE_FILE)
+        train_noise2_list = self.create_list('noise_2ch.list', NOISE_2CH_FILE)
+        train_interf_list = self.create_list('interf.list', INTERF_FILE)
+        train_ref_list = self.create_list('ref.list', REF_FILE)
+
+        base_dict = dict(train_pos_list=train_pos_list,
+                         train_neg_list=train_neg_list,
+                         train_noise1_list=train_noise1_list)
+        fintune_dict = dict(train_pos_list=train_pos_list,
+                            train_neg_list=train_neg_list,
+                            train_noise1_list=train_noise1_list,
+                            train_noise2_type='1',
+                            train_noise1_ratio='0.2',
+                            train_noise2_list=train_noise2_list,
+                            train_interf_list=train_interf_list,
+                            train_ref_list=train_ref_list)
+        self.custom_conf = dict(basetrain_easy=base_dict,
+                                basetrain_normal=base_dict,
+                                basetrain_hard=base_dict,
+                                finetune_easy=fintune_dict,
+                                finetune_normal=fintune_dict,
+                                finetune_hard=fintune_dict)
+
+    def create_list(self, list_name, audio_file):
+        pos_list_file = os.path.join(self.tmp_dir, list_name)
+        with open(pos_list_file, 'w') as f:
+            for i in range(10):
+                f.write(f'{os.path.join(os.getcwd(), audio_file)}\n')
+        train_pos_list = f'{pos_list_file}, 1.0'
+        return train_pos_list
+
+    def tearDown(self) -> None:
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_normal(self):
+        kwargs = dict(model=self.model_id,
+                      work_dir=self.tmp_dir,
+                      workers=2,
+                      max_epochs=2,
+                      train_iters_per_epoch=2,
+                      val_iters_per_epoch=1,
+                      custom_conf=self.custom_conf)
+
+        trainer = build_trainer(Trainers.speech_dfsmn_kws_char_farfield,
+                                default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files,
+                      f'work_dir:{self.tmp_dir}')
diff --git a/tests/trainers/audio/test_kws_nearfield_trainer.py b/tests/trainers/audio/test_kws_nearfield_trainer.py
new file mode 100644
index 0000000..fd678bd
--- /dev/null
+++ b/tests/trainers/audio/test_kws_nearfield_trainer.py
@@ -0,0 +1,116 @@
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.metainfo import Trainers
+from modelscope.trainers import build_trainer
+from modelscope.utils.hub import read_config, snapshot_download
+from modelscope.utils.test_utils import test_level
+from modelscope.utils.torch_utils import get_dist_info
+
+POS_FILE = 'data/test/audios/kws_xiaoyunxiaoyun.wav'
+NEG_FILE = 'data/test/audios/kws_bofangyinyue.wav'
+
+
+class TestKwsNearfieldTrainer(unittest.TestCase):
+    def setUp(self):
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        print(f'tmp dir: {self.tmp_dir}')
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+        self.model_id = 'damo/speech_charctc_kws_phone-xiaoyun'
+
+        model_dir = snapshot_download(self.model_id)
+        print(model_dir)
+        self.configs = read_config(self.model_id)
+
+        # update some configs
+        self.configs.train.max_epochs = 10
+        self.configs.train.batch_size_per_gpu = 4
+        self.configs.train.dataloader.workers_per_gpu = 1
+        self.configs.evaluation.batch_size_per_gpu = 4
+        self.configs.evaluation.dataloader.workers_per_gpu = 1
+
+        self.config_file = os.path.join(self.tmp_dir, 'config.json')
+        self.configs.dump(self.config_file)
+
+        self.train_scp, self.cv_scp, self.trans_file = self.create_list()
+
+        print(f'test level is {test_level()}')
+
+    def create_list(self):
+        train_scp_file = os.path.join(self.tmp_dir, 'train.scp')
+        cv_scp_file = os.path.join(self.tmp_dir, 'cv.scp')
+        trans_file = os.path.join(self.tmp_dir, 'merged.trans')
+
+        with open(trans_file, 'w') as fp_trans:
+            with open(train_scp_file, 'w') as fp_scp:
+                for i in range(8):
+                    fp_scp.write(
+                        f'train_pos_wav_{i}\t{os.path.join(os.getcwd(), POS_FILE)}\n'
+                    )
+                    fp_trans.write(f'train_pos_wav_{i}\t小云小云\n')
+
+                for i in range(16):
+                    fp_scp.write(
+                        f'train_neg_wav_{i}\t{os.path.join(os.getcwd(), NEG_FILE)}\n'
+                    )
+                    fp_trans.write(f'train_neg_wav_{i}\t播放音乐\n')
+
+            with open(cv_scp_file, 'w') as fp_scp:
+                for i in range(2):
+                    fp_scp.write(
+                        f'cv_pos_wav_{i}\t{os.path.join(os.getcwd(), POS_FILE)}\n'
+                    )
+                    fp_trans.write(f'cv_pos_wav_{i}\t小云小云\n')
+
+                for i in range(2):
+                    fp_scp.write(
+                        f'cv_neg_wav_{i}\t{os.path.join(os.getcwd(), NEG_FILE)}\n'
+                    )
+                    fp_trans.write(f'cv_neg_wav_{i}\t播放音乐\n')
+
+        return train_scp_file, cv_scp_file, trans_file
+
+    def tearDown(self) -> None:
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_normal(self):
+        print('test start ...')
+        kwargs = dict(model=self.model_id,
+                      work_dir=self.tmp_dir,
+                      cfg_file=self.config_file)
+
+        trainer = build_trainer(Trainers.speech_kws_fsmn_char_ctc_nearfield,
+                                default_args=kwargs)
+
+        kwargs = dict(train_data=self.train_scp,
+                      cv_data=self.cv_scp,
+                      trans_data=self.trans_file)
+        trainer.train(**kwargs)
+
+        rank, _ = get_dist_info()
+        if rank == 0:
+            results_files = os.listdir(self.tmp_dir)
+            for i in range(self.configs.train.max_epochs):
+                self.assertIn(f'{i}.pt', results_files)
+
+            kwargs = dict(
+                test_dir=self.tmp_dir,
+                gpu=-1,
+                keywords='小云小云',
+                batch_size=4,
+            )
+            trainer.evaluate(None, None, **kwargs)
+
+            results_files = os.listdir(self.tmp_dir)
+            self.assertIn('convert.kaldi.txt', results_files)
+
+        print('test finished ...')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/audio/test_separation_trainer.py b/tests/trainers/audio/test_separation_trainer.py
new file mode 100644
index 0000000..278c3be
--- /dev/null
+++ b/tests/trainers/audio/test_separation_trainer.py
@@ -0,0 +1,82 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.preprocessors.audio import AudioBrainPreprocessor
+from modelscope.trainers import build_trainer
+from modelscope.utils.test_utils import test_level
+
+MIX_SPEECH_FILE = 'data/test/audios/mix_speech.wav'
+S1_SPEECH_FILE = 'data/test/audios/s1_speech.wav'
+S2_SPEECH_FILE = 'data/test/audios/s2_speech.wav'
+
+
+class TestSeparationTrainer(unittest.TestCase):
+    def setUp(self):
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+        self.model_id = 'damo/speech_mossformer_separation_temporal_8k'
+
+        csv_path = os.path.join(self.tmp_dir, 'test.csv')
+        mix_path = os.path.join(os.getcwd(), MIX_SPEECH_FILE)
+        s1_path = os.path.join(os.getcwd(), S1_SPEECH_FILE)
+        s2_path = os.path.join(os.getcwd(), S2_SPEECH_FILE)
+        with open(csv_path, 'w') as w:
+            w.write(f'id,mix_wav:FILE,s1_wav:FILE,s2_wav:FILE\n'
+                    f'0,{mix_path},{s1_path},{s2_path}\n')
+        self.dataset = MsDataset.load('csv', data_files={
+            'test': [csv_path]
+        }).to_torch_dataset(preprocessors=[
+            AudioBrainPreprocessor(takes='mix_wav:FILE', provides='mix_sig'),
+            AudioBrainPreprocessor(takes='s1_wav:FILE', provides='s1_sig'),
+            AudioBrainPreprocessor(takes='s2_wav:FILE', provides='s2_sig')
+        ],
+                            to_tensor=False)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer(self):
+        kwargs = dict(model=self.model_id,
+                      train_dataset=self.dataset,
+                      eval_dataset=self.dataset,
+                      max_epochs=2,
+                      work_dir=self.tmp_dir)
+        trainer = build_trainer(Trainers.speech_separation,
+                                default_args=kwargs)
+        # model placement
+        trainer.model.load_check_point(device=trainer.device)
+        trainer.train()
+
+        logging_path = os.path.join(self.tmp_dir, 'train_log.txt')
+        self.assertTrue(os.path.exists(logging_path),
+                        f'Cannot find logging file {logging_path}')
+        save_dir = os.path.join(self.tmp_dir, 'save')
+        checkpoint_dirs = os.listdir(save_dir)
+        self.assertEqual(len(checkpoint_dirs), 2,
+                         f'Cannot find checkpoint in {save_dir}!')
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_eval(self):
+        kwargs = dict(model=self.model_id,
+                      train_dataset=None,
+                      eval_dataset=self.dataset,
+                      max_epochs=2,
+                      work_dir=self.tmp_dir)
+        trainer = build_trainer(Trainers.speech_separation,
+                                default_args=kwargs)
+        result = trainer.evaluate(None)
+        self.assertTrue('si-snr' in result)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/audio/test_tts_trainer.py b/tests/trainers/audio/test_tts_trainer.py
new file mode 100644
index 0000000..7a3b65b
--- /dev/null
+++ b/tests/trainers/audio/test_tts_trainer.py
@@ -0,0 +1,64 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.audio.audio_utils import TtsTrainType
+from modelscope.utils.constant import DownloadMode, Fields, Tasks
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+logger = get_logger()
+
+
+class TestTtsTrainer(unittest.TestCase):
+    def setUp(self):
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+        self.model_id = 'speech_tts/speech_sambert-hifigan_tts_zh-cn_multisp_pretrain_16k'
+        self.dataset_id = 'speech_kantts_opendata'
+        self.dataset_namespace = 'speech_tts'
+        self.train_info = {
+            TtsTrainType.TRAIN_TYPE_SAMBERT: {
+                'train_steps': 2,
+                'save_interval_steps': 1,
+                'eval_interval_steps': 1,
+                'log_interval': 1
+            },
+            TtsTrainType.TRAIN_TYPE_VOC: {
+                'train_steps': 2,
+                'save_interval_steps': 1,
+                'eval_interval_steps': 1,
+                'log_interval': 1
+            }
+        }
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer(self):
+        kwargs = dict(model=self.model_id,
+                      work_dir=self.tmp_dir,
+                      train_dataset=self.dataset_id,
+                      train_dataset_namespace=self.dataset_namespace,
+                      train_type=self.train_info)
+        trainer = build_trainer(Trainers.speech_kantts_trainer,
+                                default_args=kwargs)
+        trainer.train()
+        tmp_am = os.path.join(self.tmp_dir, 'tmp_am', 'ckpt')
+        tmp_voc = os.path.join(self.tmp_dir, 'tmp_voc', 'ckpt')
+        assert os.path.exists(tmp_am)
+        assert os.path.exists(tmp_voc)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/easycv/__init__.py b/tests/trainers/easycv/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/trainers/easycv/test_easycv_trainer.py b/tests/trainers/easycv/test_easycv_trainer.py
new file mode 100644
index 0000000..90d79e7
--- /dev/null
+++ b/tests/trainers/easycv/test_easycv_trainer.py
@@ -0,0 +1,233 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import json
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.metainfo import Models, Pipelines, Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import LogKeys, ModeKeys, Tasks
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import DistributedTestCase, test_level
+from modelscope.utils.torch_utils import is_master
+
+
+def train_func(work_dir, dist=False, log_interval=3, imgs_per_gpu=4):
+    import easycv
+    config_path = os.path.join(
+        os.path.dirname(easycv.__file__),
+        'configs/detection/yolox/yolox_s_8xb16_300e_coco.py')
+
+    cfg = Config.from_file(config_path)
+
+    cfg.log_config.update(
+        dict(hooks=[
+            dict(type='TextLoggerHook'),
+            dict(type='TensorboardLoggerHook')
+        ]))  # not support TensorboardLoggerHookV2
+
+    ms_cfg_file = os.path.join(work_dir, 'ms_yolox_s_8xb16_300e_coco.json')
+    from easycv.utils.ms_utils import to_ms_config
+
+    if is_master():
+        to_ms_config(cfg,
+                     dump=True,
+                     task=Tasks.image_object_detection,
+                     ms_model_name=Models.yolox,
+                     pipeline_name=Pipelines.easycv_detection,
+                     save_path=ms_cfg_file)
+
+    trainer_name = Trainers.easycv
+    train_dataset = MsDataset.load(dataset_name='small_coco_for_test',
+                                   namespace='EasyCV',
+                                   split='train')
+    eval_dataset = MsDataset.load(dataset_name='small_coco_for_test',
+                                  namespace='EasyCV',
+                                  split='validation')
+
+    cfg_options = {
+        'train.max_epochs':
+        2,
+        'train.dataloader.batch_size_per_gpu':
+        imgs_per_gpu,
+        'evaluation.dataloader.batch_size_per_gpu':
+        2,
+        'train.hooks': [
+            {
+                'type': 'CheckpointHook',
+                'interval': 1
+            },
+            {
+                'type': 'EvaluationHook',
+                'interval': 1
+            },
+            {
+                'type': 'TextLoggerHook',
+                'ignore_rounding_keys': None,
+                'interval': log_interval
+            },
+        ]
+    }
+    kwargs = dict(cfg_file=ms_cfg_file,
+                  train_dataset=train_dataset,
+                  eval_dataset=eval_dataset,
+                  work_dir=work_dir,
+                  cfg_options=cfg_options,
+                  launcher='pytorch' if dist else None)
+
+    trainer = build_trainer(trainer_name, kwargs)
+    trainer.train()
+
+
+@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
+class EasyCVTrainerTestSingleGpu(unittest.TestCase):
+    def setUp(self):
+        self.logger = get_logger()
+        self.logger.info(
+            ('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_single_gpu(self):
+        train_func(self.tmp_dir)
+
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+
+        with open(json_files[0], 'r', encoding='utf-8') as f:
+            lines = [i.strip() for i in f.readlines()]
+
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 1,
+                LogKeys.ITER: 3,
+                LogKeys.LR: 0.00013
+            }, json.loads(lines[0]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 1,
+                LogKeys.ITER: 10
+            }, json.loads(lines[1]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 2,
+                LogKeys.ITER: 3,
+                LogKeys.LR: 0.00157
+            }, json.loads(lines[2]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 2,
+                LogKeys.ITER: 10
+            }, json.loads(lines[3]))
+        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+        for i in [0, 2]:
+            self.assertIn(LogKeys.DATA_LOAD_TIME, lines[i])
+            self.assertIn(LogKeys.ITER_TIME, lines[i])
+            self.assertIn(LogKeys.MEMORY, lines[i])
+            self.assertIn('total_loss', lines[i])
+        for i in [1, 3]:
+            self.assertIn(
+                'CocoDetectionEvaluator_DetectionBoxes_Precision/mAP',
+                lines[i])
+            self.assertIn('DetectionBoxes_Precision/mAP', lines[i])
+            self.assertIn('DetectionBoxes_Precision/mAP@.50IOU', lines[i])
+            self.assertIn('DetectionBoxes_Precision/mAP@.75IOU', lines[i])
+            self.assertIn('DetectionBoxes_Precision/mAP (small)', lines[i])
+
+
+@unittest.skipIf(not torch.cuda.is_available()
+                 or torch.cuda.device_count() <= 1, 'distributed unittest')
+class EasyCVTrainerTestMultiGpus(DistributedTestCase):
+    def setUp(self):
+        self.logger = get_logger()
+        self.logger.info(
+            ('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_multi_gpus(self):
+        self.start(train_func,
+                   num_gpus=2,
+                   work_dir=self.tmp_dir,
+                   dist=True,
+                   log_interval=2,
+                   imgs_per_gpu=5)
+
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+
+        with open(json_files[0], 'r', encoding='utf-8') as f:
+            lines = [i.strip() for i in f.readlines()]
+
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 1,
+                LogKeys.ITER: 2,
+                LogKeys.LR: 0.0002
+            }, json.loads(lines[0]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 1,
+                LogKeys.ITER: 5
+            }, json.loads(lines[1]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 2,
+                LogKeys.ITER: 2,
+                LogKeys.LR: 0.0018
+            }, json.loads(lines[2]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 2,
+                LogKeys.ITER: 5
+            }, json.loads(lines[3]))
+
+        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+
+        for i in [0, 2]:
+            self.assertIn(LogKeys.DATA_LOAD_TIME, lines[i])
+            self.assertIn(LogKeys.ITER_TIME, lines[i])
+            self.assertIn(LogKeys.MEMORY, lines[i])
+            self.assertIn('total_loss', lines[i])
+        for i in [1, 3]:
+            self.assertIn(
+                'CocoDetectionEvaluator_DetectionBoxes_Precision/mAP',
+                lines[i])
+            self.assertIn('DetectionBoxes_Precision/mAP', lines[i])
+            self.assertIn('DetectionBoxes_Precision/mAP@.50IOU', lines[i])
+            self.assertIn('DetectionBoxes_Precision/mAP@.75IOU', lines[i])
+            self.assertIn('DetectionBoxes_Precision/mAP (small)', lines[i])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_detection_dino.py b/tests/trainers/easycv/test_easycv_trainer_detection_dino.py
new file mode 100644
index 0000000..c8c7cbf
--- /dev/null
+++ b/tests/trainers/easycv/test_easycv_trainer_detection_dino.py
@@ -0,0 +1,66 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import LogKeys
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+
+@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
+class EasyCVTrainerTestDetectionDino(unittest.TestCase):
+    model_id = 'damo/cv_swinl_image-object-detection_dino'
+
+    def setUp(self):
+        self.logger = get_logger()
+        self.logger.info(
+            ('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+    def _train(self, tmp_dir):
+        cfg_options = {'train.max_epochs': 1}
+
+        trainer_name = Trainers.easycv
+
+        train_dataset = MsDataset.load(dataset_name='small_coco_for_test',
+                                       namespace='EasyCV',
+                                       split='train')
+        eval_dataset = MsDataset.load(dataset_name='small_coco_for_test',
+                                      namespace='EasyCV',
+                                      split='validation')
+
+        kwargs = dict(model=self.model_id,
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      work_dir=tmp_dir,
+                      cfg_options=cfg_options)
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_single_gpu(self):
+        temp_file_dir = tempfile.TemporaryDirectory()
+        tmp_dir = temp_file_dir.name
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+
+        self._train(tmp_dir)
+
+        results_files = os.listdir(tmp_dir)
+        json_files = glob.glob(os.path.join(tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
+
+        temp_file_dir.cleanup()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py b/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py
new file mode 100644
index 0000000..3051065
--- /dev/null
+++ b/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py
@@ -0,0 +1,71 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import DownloadMode, LogKeys, Tasks
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+
+@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
+class EasyCVTrainerTestFace2DKeypoints(unittest.TestCase):
+    model_id = 'damo/cv_mobilenet_face-2d-keypoints_alignment'
+
+    def setUp(self):
+        self.logger = get_logger()
+        self.logger.info(
+            ('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+    def _train(self, tmp_dir):
+        cfg_options = {'train.max_epochs': 2}
+
+        trainer_name = Trainers.easycv
+
+        train_dataset = MsDataset.load(
+            dataset_name='face_2d_keypoints_dataset',
+            namespace='modelscope',
+            split='train',
+            download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS)
+        eval_dataset = MsDataset.load(
+            dataset_name='face_2d_keypoints_dataset',
+            namespace='modelscope',
+            split='train',
+            download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS)
+
+        kwargs = dict(model=self.model_id,
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      work_dir=tmp_dir,
+                      cfg_options=cfg_options)
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+
+    @unittest.skip(
+        'skip since face_2d_keypoints_dataset is set to private for now')
+    def test_trainer_single_gpu(self):
+        temp_file_dir = tempfile.TemporaryDirectory()
+        tmp_dir = temp_file_dir.name
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+
+        self._train(tmp_dir)
+
+        results_files = os.listdir(tmp_dir)
+        json_files = glob.glob(os.path.join(tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+
+        temp_file_dir.cleanup()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py b/tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py
new file mode 100644
index 0000000..ae9aa62
--- /dev/null
+++ b/tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py
@@ -0,0 +1,71 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import DownloadMode, LogKeys, Tasks
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+
+@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
+class EasyCVTrainerTestHand2dKeypoints(unittest.TestCase):
+    model_id = 'damo/cv_hrnetw18_hand-pose-keypoints_coco-wholebody'
+
+    def setUp(self):
+        self.logger = get_logger()
+        self.logger.info(
+            ('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+
+    def _train(self):
+        cfg_options = {'train.max_epochs': 20}
+
+        trainer_name = Trainers.easycv
+
+        train_dataset = MsDataset.load(
+            dataset_name='cv_hand_2d_keypoints_coco_wholebody',
+            namespace='chenhyer',
+            split='subtrain',
+            download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        eval_dataset = MsDataset.load(
+            dataset_name='cv_hand_2d_keypoints_coco_wholebody',
+            namespace='chenhyer',
+            split='subtrain',
+            download_mode=DownloadMode.FORCE_REDOWNLOAD)
+
+        kwargs = dict(model=self.model_id,
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      work_dir=self.tmp_dir,
+                      cfg_options=cfg_options)
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_single_gpu(self):
+        self._train()
+
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+        self.assertIn(f'{LogKeys.EPOCH}_10.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_20.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_hand_detection.py b/tests/trainers/easycv/test_easycv_trainer_hand_detection.py
new file mode 100644
index 0000000..145e00a
--- /dev/null
+++ b/tests/trainers/easycv/test_easycv_trainer_hand_detection.py
@@ -0,0 +1,62 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import DownloadMode, LogKeys, Tasks
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+
+class EasyCVTrainerTestHandDetection(unittest.TestCase):
+    model_id = 'damo/cv_yolox-pai_hand-detection'
+
+    def setUp(self):
+        self.logger = get_logger()
+        self.logger.info(
+            ('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+    def _train(self, tmp_dir):
+        cfg_options = {'train.max_epochs': 2}
+
+        trainer_name = Trainers.easycv
+
+        train_dataset = MsDataset.load(dataset_name='hand_detection_dataset',
+                                       split='subtrain')
+        eval_dataset = MsDataset.load(dataset_name='hand_detection_dataset',
+                                      split='subtrain')
+
+        kwargs = dict(model=self.model_id,
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      work_dir=tmp_dir,
+                      cfg_options=cfg_options)
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_trainer_single_gpu(self):
+        temp_file_dir = tempfile.TemporaryDirectory()
+        tmp_dir = temp_file_dir.name
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+
+        self._train(tmp_dir)
+
+        results_files = os.listdir(tmp_dir)
+        # json_files = glob.glob(os.path.join(tmp_dir, '*.log.json'))
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+
+        temp_file_dir.cleanup()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py b/tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py
new file mode 100644
index 0000000..d81825e
--- /dev/null
+++ b/tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py
@@ -0,0 +1,68 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+from mmcv.runner.hooks import HOOKS as MMCV_HOOKS
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import LogKeys, Tasks
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+
+@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
+class EasyCVTrainerTestPanopticMask2Former(unittest.TestCase):
+    def setUp(self):
+        self.logger = get_logger()
+        self.logger.info(
+            ('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+
+    def _train(self):
+        cfg_options = {'train.max_epochs': 1}
+
+        trainer_name = Trainers.easycv
+
+        train_dataset = MsDataset.load(dataset_name='COCO2017_panopic_subset',
+                                       split='train')
+        eval_dataset = MsDataset.load(dataset_name='COCO2017_panopic_subset',
+                                      split='validation')
+        kwargs = dict(model='damo/cv_r50_panoptic-segmentation_cocopan',
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      work_dir=self.tmp_dir,
+                      cfg_options=cfg_options)
+
+        trainer = build_trainer(trainer_name, kwargs)
+
+        hook_name = 'YOLOXLrUpdaterHook'
+        mmcv_hook = MMCV_HOOKS._module_dict.pop(hook_name, None)
+
+        trainer.train()
+
+        MMCV_HOOKS._module_dict[hook_name] = mmcv_hook
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_single_gpu_mask2former_r50(self):
+        self._train()
+
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_realtime_object_detection.py b/tests/trainers/easycv/test_easycv_trainer_realtime_object_detection.py
new file mode 100644
index 0000000..94a89de
--- /dev/null
+++ b/tests/trainers/easycv/test_easycv_trainer_realtime_object_detection.py
@@ -0,0 +1,97 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import DownloadMode, LogKeys, Tasks
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+
+@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
+class EasyCVTrainerTestRealtimeObjectDetection(unittest.TestCase):
+    model_id = 'damo/cv_cspnet_image-object-detection_yolox'
+
+    def setUp(self):
+        self.logger = get_logger()
+        self.logger.info(
+            ('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+    def _train(self, tmp_dir):
+        # cfg_options = {'train.max_epochs': 2}
+        self.cache_path = snapshot_download(self.model_id)
+        cfg_options = {
+            'train.max_epochs':
+            2,
+            'train.dataloader.batch_size_per_gpu':
+            4,
+            'evaluation.dataloader.batch_size_per_gpu':
+            2,
+            'train.hooks': [
+                {
+                    'type': 'CheckpointHook',
+                    'interval': 1
+                },
+                {
+                    'type': 'EvaluationHook',
+                    'interval': 1
+                },
+                {
+                    'type': 'TextLoggerHook',
+                    'ignore_rounding_keys': None,
+                    'interval': 2
+                },
+            ],
+            'load_from':
+            os.path.join(self.cache_path, 'pytorch_model.bin')
+        }
+
+        trainer_name = Trainers.easycv
+
+        train_dataset = MsDataset.load(dataset_name='small_coco_for_test',
+                                       namespace='EasyCV',
+                                       split='train')
+        eval_dataset = MsDataset.load(dataset_name='small_coco_for_test',
+                                      namespace='EasyCV',
+                                      split='validation')
+
+        kwargs = dict(
+            model=self.model_id,
+            # model_revision='v1.0.2',
+            train_dataset=train_dataset,
+            eval_dataset=eval_dataset,
+            work_dir=tmp_dir,
+            cfg_options=cfg_options)
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+
+    @unittest.skipUnless(
+        test_level() >= 0,
+        'skip since face_2d_keypoints_dataset is set to private for now')
+    def test_trainer_single_gpu(self):
+        temp_file_dir = tempfile.TemporaryDirectory()
+        tmp_dir = temp_file_dir.name
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+
+        self._train(tmp_dir)
+
+        results_files = os.listdir(tmp_dir)
+        json_files = glob.glob(os.path.join(tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+
+        temp_file_dir.cleanup()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/easycv/test_segformer.py b/tests/trainers/easycv/test_segformer.py
new file mode 100644
index 0000000..cd9ff48
--- /dev/null
+++ b/tests/trainers/easycv/test_segformer.py
@@ -0,0 +1,69 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import LogKeys, Tasks
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+
+@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
+class EasyCVTrainerTestSegformer(unittest.TestCase):
+    def setUp(self):
+        self.logger = get_logger()
+        self.logger.info(
+            ('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+
+    def _train(self):
+
+        cfg_options = {
+            'train.max_epochs': 2,
+            'model.decode_head.norm_cfg.type': 'BN'
+        }
+
+        trainer_name = Trainers.easycv
+        train_dataset = MsDataset.load(dataset_name='small_coco_stuff164k',
+                                       namespace='EasyCV',
+                                       split='train')
+        eval_dataset = MsDataset.load(dataset_name='small_coco_stuff164k',
+                                      namespace='EasyCV',
+                                      split='validation')
+        kwargs = dict(
+            model=
+            'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k',
+            train_dataset=train_dataset,
+            eval_dataset=eval_dataset,
+            work_dir=self.tmp_dir,
+            cfg_options=cfg_options)
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_single_gpu_segformer(self):
+        self._train()
+
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/hooks/__init__.py b/tests/trainers/hooks/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/trainers/hooks/compression/__init__.py b/tests/trainers/hooks/compression/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/trainers/hooks/compression/test_sparsity_hook.py b/tests/trainers/hooks/compression/test_sparsity_hook.py
new file mode 100644
index 0000000..05ce5b7
--- /dev/null
+++ b/tests/trainers/hooks/compression/test_sparsity_hook.py
@@ -0,0 +1,111 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import os
+import shutil
+import tempfile
+import unittest
+
+import numpy as np
+import torch
+from torch import nn
+from torch.optim import SGD
+from torch.optim.lr_scheduler import MultiStepLR
+
+from modelscope.metainfo import Trainers
+from modelscope.models.base import Model
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import ModelFile, TrainerStages
+from modelscope.utils.test_utils import create_dummy_test_dataset
+
+dummy_dataset = create_dummy_test_dataset(np.random.random(size=(5, )),
+                                          np.random.randint(0, 4, (1, )), 10)
+
+
+class DummyModel(nn.Module, Model):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(5, 10)
+        self.bn = nn.BatchNorm1d(10)
+
+    def forward(self, feat, labels):
+        x = self.linear(feat)
+
+        x = self.bn(x)
+        loss = torch.sum(x)
+        return dict(logits=x, loss=loss)
+
+
+class SparsityHookTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    def test_sparsity_hook(self):
+        json_cfg = {
+            'task': 'image_classification',
+            'train': {
+                'work_dir':
+                self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                },
+                'hooks': [{
+                    'type': 'SparsityHook',
+                    'pruning_method': 'pst',
+                    'config': {
+                        'weight_rank': 1,
+                        'mask_rank': 1,
+                        'final_sparsity': 0.9,
+                        'frequency': 1,
+                    },
+                }],
+            },
+        }
+
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+
+        model = DummyModel()
+        optimizer = SGD(model.parameters(), lr=0.01)
+        lr_scheduler = MultiStepLR(optimizer, milestones=[2, 4])
+        trainer_name = Trainers.default
+        kwargs = dict(
+            cfg_file=config_path,
+            model=model,
+            train_dataset=dummy_dataset,
+            optimizers=(optimizer, lr_scheduler),
+            max_epochs=5,
+            device='cpu',
+        )
+
+        trainer = build_trainer(trainer_name, kwargs)
+        train_dataloader = trainer._build_dataloader_with_dataset(
+            trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
+        trainer.register_optimizers_hook()
+        trainer.register_hook_from_cfg(trainer.cfg.train.hooks)
+        trainer.train_dataloader = train_dataloader
+        trainer.data_loader = train_dataloader
+        trainer.invoke_hook(TrainerStages.before_run)
+        for i in range(trainer._epoch, trainer._max_epochs):
+            trainer.invoke_hook(TrainerStages.before_train_epoch)
+            for _, data_batch in enumerate(train_dataloader):
+                trainer.invoke_hook(TrainerStages.before_train_iter)
+                trainer.train_step(trainer.model, data_batch)
+                trainer.invoke_hook(TrainerStages.after_train_iter)
+            trainer.invoke_hook(TrainerStages.after_train_epoch)
+        trainer.invoke_hook(TrainerStages.after_run)
+
+        self.assertEqual(torch.mean(1.0 * (trainer.model.linear.weight == 0)),
+                         0.9)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/hooks/logger/__init__.py b/tests/trainers/hooks/logger/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/trainers/hooks/logger/test_tensorboard_hook.py b/tests/trainers/hooks/logger/test_tensorboard_hook.py
new file mode 100644
index 0000000..8d854fb
--- /dev/null
+++ b/tests/trainers/hooks/logger/test_tensorboard_hook.py
@@ -0,0 +1,107 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import json
+import os
+import shutil
+import tempfile
+import unittest
+
+import numpy as np
+import torch
+from torch import nn
+
+from modelscope.metainfo import Trainers
+from modelscope.models.base import TorchModel
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import LogKeys, ModelFile
+from modelscope.utils.test_utils import create_dummy_test_dataset
+
+dummy_dataset = create_dummy_test_dataset(np.random.random(size=(5, )),
+                                          np.random.randint(0, 4, (1, )), 20)
+
+
+class DummyModel(TorchModel):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(5, 4)
+        self.bn = nn.BatchNorm1d(4)
+
+    def forward(self, feat, labels):
+        x = self.linear(feat)
+
+        x = self.bn(x)
+        loss = torch.sum(x)
+        return dict(logits=x, loss=loss)
+
+
+class TensorboardHookTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    def test_tensorboard_hook(self):
+        json_cfg = {
+            'task': 'image_classification',
+            'train': {
+                'work_dir': self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                },
+                'optimizer': {
+                    'type': 'SGD',
+                    'lr': 0.01
+                },
+                'lr_scheduler': {
+                    'type': 'StepLR',
+                    'step_size': 2,
+                },
+                'hooks': [{
+                    'type': 'TensorboardHook',
+                    'interval': 2
+                }]
+            }
+        }
+
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+
+        trainer_name = Trainers.default
+        kwargs = dict(cfg_file=config_path,
+                      model=DummyModel(),
+                      data_collator=None,
+                      train_dataset=dummy_dataset,
+                      max_epochs=2)
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+        tb_out_dir = os.path.join(self.tmp_dir, 'tensorboard_output')
+
+        events_files = glob.glob(
+            os.path.join(tb_out_dir, 'events.out.tfevents.*'))
+        self.assertEqual(len(events_files), 1)
+
+        from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
+        ea = EventAccumulator(events_files[0])
+        ea.Reload()
+        self.assertEqual(len(ea.Scalars(LogKeys.LOSS)), 10)
+        self.assertEqual(len(ea.Scalars(LogKeys.LR)), 10)
+        for i in range(5):
+            self.assertAlmostEqual(ea.Scalars(LogKeys.LR)[i].value,
+                                   0.01,
+                                   delta=0.001)
+        for i in range(5, 10):
+            self.assertAlmostEqual(ea.Scalars(LogKeys.LR)[i].value,
+                                   0.01,
+                                   delta=0.0001)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/hooks/test_checkpoint_hook.py b/tests/trainers/hooks/test_checkpoint_hook.py
new file mode 100644
index 0000000..55906f6
--- /dev/null
+++ b/tests/trainers/hooks/test_checkpoint_hook.py
@@ -0,0 +1,216 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import os
+import shutil
+import tempfile
+import unittest
+
+import numpy as np
+import torch
+from torch import nn
+
+from modelscope.metainfo import Trainers
+from modelscope.metrics.builder import METRICS, MetricKeys
+from modelscope.models.base import TorchModel
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import LogKeys, ModelFile
+from modelscope.utils.registry import default_group
+from modelscope.utils.test_utils import create_dummy_test_dataset
+
+SRC_DIR = os.path.dirname(__file__)
+
+
+def create_dummy_metric():
+    _global_iter = 0
+
+    @METRICS.register_module(group_key=default_group,
+                             module_name='DummyMetric',
+                             force=True)
+    class DummyMetric:
+
+        _fake_acc_by_epoch = {1: 0.1, 2: 0.5, 3: 0.2}
+
+        def add(*args, **kwargs):
+            pass
+
+        def evaluate(self):
+            global _global_iter
+            _global_iter += 1
+            return {MetricKeys.ACCURACY: self._fake_acc_by_epoch[_global_iter]}
+
+
+dummy_dataset = create_dummy_test_dataset(np.random.random(size=(5, )),
+                                          np.random.randint(0, 4, (1, )), 20)
+
+
+class DummyModel(TorchModel):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(5, 4)
+        self.bn = nn.BatchNorm1d(4)
+        self.model_dir = SRC_DIR
+
+    def forward(self, feat, labels):
+        x = self.linear(feat)
+
+        x = self.bn(x)
+        loss = torch.sum(x)
+        return dict(logits=x, loss=loss)
+
+
+class CheckpointHookTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+        create_dummy_metric()
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    def test_checkpoint_hook(self):
+        global _global_iter
+        _global_iter = 0
+
+        json_cfg = {
+            'task': 'image_classification',
+            'train': {
+                'work_dir': self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                },
+                'optimizer': {
+                    'type': 'SGD',
+                    'lr': 0.01,
+                    'options': {
+                        'grad_clip': {
+                            'max_norm': 2.0
+                        }
+                    }
+                },
+                'lr_scheduler': {
+                    'type': 'StepLR',
+                    'step_size': 2,
+                    'options': {
+                        'warmup': {
+                            'type': 'LinearWarmup',
+                            'warmup_iters': 2
+                        }
+                    }
+                },
+                'hooks': [{
+                    'type': 'CheckpointHook',
+                    'interval': 1
+                }]
+            }
+        }
+
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+
+        trainer_name = Trainers.default
+        kwargs = dict(cfg_file=config_path,
+                      model=DummyModel(),
+                      data_collator=None,
+                      train_dataset=dummy_dataset,
+                      max_epochs=2)
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+
+        output_files = os.listdir(
+            os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR))
+        self.assertIn(ModelFile.CONFIGURATION, output_files)
+        self.assertIn(ModelFile.TORCH_MODEL_BIN_FILE, output_files)
+        copy_src_files = os.listdir(SRC_DIR)
+        self.assertIn(copy_src_files[0], output_files)
+        self.assertIn(copy_src_files[-1], output_files)
+
+
+class BestCkptSaverHookTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+        create_dummy_metric()
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    def test_best_checkpoint_hook(self):
+        global _global_iter
+        _global_iter = 0
+
+        json_cfg = {
+            'task': 'image_classification',
+            'train': {
+                'work_dir':
+                self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                },
+                'optimizer': {
+                    'type': 'SGD',
+                    'lr': 0.01
+                },
+                'lr_scheduler': {
+                    'type': 'StepLR',
+                    'step_size': 2
+                },
+                'hooks': [{
+                    'type': 'BestCkptSaverHook',
+                    'metric_key': MetricKeys.ACCURACY,
+                    'rule': 'min'
+                }, {
+                    'type': 'EvaluationHook',
+                    'interval': 1,
+                }]
+            },
+            'evaluation': {
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1,
+                    'shuffle': False
+                },
+                'metrics': ['DummyMetric']
+            }
+        }
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+
+        trainer_name = Trainers.default
+        kwargs = dict(cfg_file=config_path,
+                      model=DummyModel(),
+                      data_collator=None,
+                      train_dataset=dummy_dataset,
+                      eval_dataset=dummy_dataset,
+                      max_epochs=3)
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'best_{LogKeys.EPOCH}1_{MetricKeys.ACCURACY}0.1.pth',
+                      results_files)
+
+        output_files = os.listdir(
+            os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR))
+        self.assertIn(ModelFile.CONFIGURATION, output_files)
+        self.assertIn(ModelFile.TORCH_MODEL_BIN_FILE, output_files)
+        copy_src_files = os.listdir(SRC_DIR)
+        self.assertIn(copy_src_files[0], output_files)
+        self.assertIn(copy_src_files[-1], output_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/hooks/test_evaluation_hook.py b/tests/trainers/hooks/test_evaluation_hook.py
new file mode 100644
index 0000000..b1865ea
--- /dev/null
+++ b/tests/trainers/hooks/test_evaluation_hook.py
@@ -0,0 +1,113 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import os
+import shutil
+import tempfile
+import unittest
+
+import numpy as np
+import torch
+from torch import nn
+
+from modelscope.metainfo import Trainers
+from modelscope.metrics.builder import METRICS, MetricKeys
+from modelscope.models.base import TorchModel
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.registry import default_group
+from modelscope.utils.test_utils import create_dummy_test_dataset
+
+
+def create_dummy_metric():
+    @METRICS.register_module(group_key=default_group,
+                             module_name='DummyMetric',
+                             force=True)
+    class DummyMetric:
+        def add(*args, **kwargs):
+            pass
+
+        def evaluate(self):
+            return {MetricKeys.ACCURACY: 0.5}
+
+
+dummy_dataset = create_dummy_test_dataset(np.random.random(size=(5, )),
+                                          np.random.randint(0, 4, (1, )), 20)
+
+
+class DummyModel(TorchModel):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(5, 4)
+        self.bn = nn.BatchNorm1d(4)
+
+    def forward(self, feat, labels):
+        x = self.linear(feat)
+
+        x = self.bn(x)
+        loss = torch.sum(x)
+        return dict(logits=x, loss=loss)
+
+
+class EvaluationHookTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+        create_dummy_metric()
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    def test_evaluation_hook(self):
+        json_cfg = {
+            'task': 'image_classification',
+            'train': {
+                'work_dir': self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                },
+                'optimizer': {
+                    'type': 'SGD',
+                    'lr': 0.01,
+                },
+                'lr_scheduler': {
+                    'type': 'StepLR',
+                    'step_size': 2,
+                },
+                'hooks': [{
+                    'type': 'EvaluationHook',
+                    'interval': 1,
+                }]
+            },
+            'evaluation': {
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1,
+                    'shuffle': False
+                },
+                'metrics': ['DummyMetric']
+            }
+        }
+
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+
+        trainer_name = Trainers.default
+        kwargs = dict(cfg_file=config_path,
+                      model=DummyModel(),
+                      data_collator=None,
+                      train_dataset=dummy_dataset,
+                      eval_dataset=dummy_dataset,
+                      max_epochs=1)
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+        self.assertDictEqual(trainer.metric_values, {'accuracy': 0.5})
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/hooks/test_lr_scheduler_hook.py b/tests/trainers/hooks/test_lr_scheduler_hook.py
new file mode 100644
index 0000000..0de58b8
--- /dev/null
+++ b/tests/trainers/hooks/test_lr_scheduler_hook.py
@@ -0,0 +1,299 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import os
+import shutil
+import tempfile
+import unittest
+
+import numpy as np
+import torch
+from torch import nn
+from torch.optim import SGD
+from torch.optim.lr_scheduler import MultiStepLR
+
+from modelscope.metainfo import Trainers
+from modelscope.metrics.builder import METRICS, MetricKeys
+from modelscope.models.base import TorchModel
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import LogKeys, ModelFile, TrainerStages
+from modelscope.utils.registry import default_group
+from modelscope.utils.test_utils import create_dummy_test_dataset
+
+dummy_dataset = create_dummy_test_dataset(np.random.random(size=(5, )),
+                                          np.random.randint(0, 4, (1, )), 10)
+
+
+def create_dummy_metric():
+    _global_iter = 0
+
+    @METRICS.register_module(group_key=default_group,
+                             module_name='DummyMetric',
+                             force=True)
+    class DummyMetric:
+
+        _fake_acc_by_epoch = {1: 0.1, 2: 0.1, 3: 0.1, 4: 0.1, 5: 0.3}
+
+        def add(*args, **kwargs):
+            pass
+
+        def evaluate(self):
+            global _global_iter
+            _global_iter += 1
+            return {MetricKeys.ACCURACY: self._fake_acc_by_epoch[_global_iter]}
+
+
+class DummyModel(TorchModel):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(5, 4)
+        self.bn = nn.BatchNorm1d(4)
+
+    def forward(self, feat, labels):
+        x = self.linear(feat)
+
+        x = self.bn(x)
+        loss = torch.sum(x)
+        return dict(logits=x, loss=loss)
+
+
+class LrSchedulerHookTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+        create_dummy_metric()
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    def test_lr_scheduler_hook(self):
+        global _global_iter
+        _global_iter = 0
+
+        json_cfg = {
+            'task': 'image_classification',
+            'train': {
+                'work_dir': self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                }
+            }
+        }
+
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+
+        model = DummyModel()
+        optimizer = SGD(model.parameters(), lr=0.01)
+        lr_scheduler = MultiStepLR(optimizer, milestones=[2, 4])
+        trainer_name = Trainers.default
+        kwargs = dict(cfg_file=config_path,
+                      model=model,
+                      train_dataset=dummy_dataset,
+                      optimizers=(optimizer, lr_scheduler),
+                      max_epochs=5,
+                      device='cpu')
+
+        trainer = build_trainer(trainer_name, kwargs)
+        train_dataloader = trainer._build_dataloader_with_dataset(
+            trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
+        trainer.register_optimizers_hook()
+
+        trainer.invoke_hook(TrainerStages.before_run)
+        log_lrs = []
+        optim_lrs = []
+        for _ in range(trainer._epoch, trainer._max_epochs):
+            trainer.invoke_hook(TrainerStages.before_train_epoch)
+            for _, data_batch in enumerate(train_dataloader):
+                trainer.invoke_hook(TrainerStages.before_train_iter)
+                trainer.train_step(trainer.model, data_batch)
+                trainer.invoke_hook(TrainerStages.after_train_iter)
+
+                log_lrs.append(trainer.log_buffer.output[LogKeys.LR])
+                optim_lrs.append(optimizer.param_groups[0]['lr'])
+
+            trainer.invoke_hook(TrainerStages.after_train_epoch)
+            trainer._epoch += 1
+        trainer.invoke_hook(TrainerStages.after_run)
+
+        iters = 5
+        target_lrs = [0.01] * iters * 2 + [0.001] * iters * 2 + [0.0001
+                                                                 ] * iters * 1
+        self.assertListEqual(log_lrs, target_lrs)
+        self.assertListEqual(optim_lrs, target_lrs)
+
+    def test_warmup_lr_scheduler_hook(self):
+        global _global_iter
+        _global_iter = 0
+
+        json_cfg = {
+            'task': 'image_classification',
+            'train': {
+                'work_dir': self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                },
+                'optimizer': {
+                    'type': 'SGD',
+                    'lr': 0.01
+                },
+                'lr_scheduler': {
+                    'type': 'MultiStepLR',
+                    'milestones': [4, 6],
+                    'options': {
+                        'warmup': {
+                            'type': 'LinearWarmup',
+                            'warmup_iters': 3
+                        }
+                    }
+                }
+            }
+        }
+
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+
+        model = DummyModel()
+        trainer_name = Trainers.default
+        kwargs = dict(cfg_file=config_path,
+                      model=model,
+                      train_dataset=dummy_dataset,
+                      max_epochs=7,
+                      device='cpu')
+
+        trainer = build_trainer(trainer_name, kwargs)
+        train_dataloader = trainer._build_dataloader_with_dataset(
+            trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
+        trainer.register_optimizers_hook()
+
+        trainer.invoke_hook(TrainerStages.before_run)
+        log_lrs = []
+        optim_lrs = []
+        for _ in range(trainer._epoch, trainer._max_epochs):
+            trainer.invoke_hook(TrainerStages.before_train_epoch)
+            for _, data_batch in enumerate(train_dataloader):
+                trainer.invoke_hook(TrainerStages.before_train_iter)
+                trainer.train_step(trainer.model, data_batch)
+                trainer.invoke_hook(TrainerStages.after_train_iter)
+
+                log_lrs.append(round(trainer.log_buffer.output[LogKeys.LR], 5))
+                optim_lrs.append(
+                    round(trainer.optimizer.param_groups[0]['lr'], 5))
+
+            trainer.invoke_hook(TrainerStages.after_train_epoch)
+        trainer.invoke_hook(TrainerStages.after_run)
+
+        iters = 5
+        target_lrs = [0.001] * iters * 1 + [0.004] * iters * 1 + [
+            0.007
+        ] * iters * 1 + [0.01] * iters * 1 + [0.001] * iters * 2 + [
+            0.0001
+        ] * iters * 1
+
+        self.assertListEqual(log_lrs, target_lrs)
+        self.assertListEqual(optim_lrs, target_lrs)
+
+
+class PlateauLrSchedulerHookTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+        create_dummy_metric()
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    def test_plateau_lr_scheduler_hook(self):
+        global _global_iter
+        _global_iter = 0
+
+        json_cfg = {
+            'task': 'image_classification',
+            'train': {
+                'work_dir': self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                },
+                'lr_scheduler': {
+                    'type': 'ReduceLROnPlateau',
+                    'mode': 'max',
+                    'factor': 0.1,
+                    'patience': 2,
+                },
+                'lr_scheduler_hook': {
+                    'type': 'PlateauLrSchedulerHook',
+                    'metric_key': MetricKeys.ACCURACY
+                },
+                'hooks': [{
+                    'type': 'EvaluationHook',
+                    'interval': 1
+                }]
+            },
+            'evaluation': {
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1,
+                    'shuffle': False
+                },
+                'metrics': ['DummyMetric']
+            }
+        }
+
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+
+        model = DummyModel()
+        optimizer = SGD(model.parameters(), lr=0.01)
+        trainer_name = Trainers.default
+        kwargs = dict(cfg_file=config_path,
+                      model=model,
+                      train_dataset=dummy_dataset,
+                      eval_dataset=dummy_dataset,
+                      optimizers=(optimizer, None),
+                      max_epochs=5,
+                      device='cpu')
+
+        trainer = build_trainer(trainer_name, kwargs)
+        train_dataloader = trainer._build_dataloader_with_dataset(
+            trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
+        trainer.train_dataloader = train_dataloader
+        trainer.data_loader = train_dataloader
+        trainer.register_optimizers_hook()
+        trainer.register_hook_from_cfg(trainer.cfg.train.hooks)
+
+        trainer.invoke_hook(TrainerStages.before_run)
+        log_lrs = []
+        optim_lrs = []
+        for _ in range(trainer._epoch, trainer._max_epochs):
+            trainer.invoke_hook(TrainerStages.before_train_epoch)
+            for _, data_batch in enumerate(train_dataloader):
+                trainer.invoke_hook(TrainerStages.before_train_iter)
+                trainer.train_step(trainer.model, data_batch)
+                trainer.invoke_hook(TrainerStages.after_train_iter)
+
+                log_lrs.append(trainer.log_buffer.output[LogKeys.LR])
+                optim_lrs.append(optimizer.param_groups[0]['lr'])
+
+            trainer.invoke_hook(TrainerStages.after_train_epoch)
+            trainer._epoch += 1
+        trainer.invoke_hook(TrainerStages.after_run)
+
+        iters = 5
+        target_lrs = [0.01] * iters * 4 + [0.001] * iters * 1
+        self.assertListEqual(log_lrs, target_lrs)
+        self.assertListEqual(optim_lrs, target_lrs)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/hooks/test_optimizer_hook.py b/tests/trainers/hooks/test_optimizer_hook.py
new file mode 100644
index 0000000..b672b51
--- /dev/null
+++ b/tests/trainers/hooks/test_optimizer_hook.py
@@ -0,0 +1,176 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import os
+import shutil
+import tempfile
+import unittest
+
+import numpy as np
+import torch
+from torch import nn
+from torch.optim import SGD
+from torch.optim.lr_scheduler import MultiStepLR
+
+from modelscope.metainfo import Trainers
+from modelscope.models.base import Model
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import ModelFile, TrainerStages
+from modelscope.utils.test_utils import create_dummy_test_dataset
+
+dummy_dataset = create_dummy_test_dataset(np.random.random(size=(2, )),
+                                          np.random.randint(0, 2, (1, )), 10)
+
+
+class DummyModel(nn.Module, Model):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(2, 2)
+        self.bn = nn.BatchNorm1d(2)
+
+    def forward(self, feat, labels):
+        x = self.linear(feat)
+        x = self.bn(x)
+        loss = torch.sum(x)
+        return dict(logits=x, loss=loss)
+
+
+class OptimizerHookTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    def test_optimizer_hook(self):
+        json_cfg = {
+            'task': 'image_classification',
+            'train': {
+                'work_dir': self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                }
+            }
+        }
+
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+
+        model = DummyModel()
+        optimizer = SGD(model.parameters(), lr=0.01)
+        lr_scheduler = MultiStepLR(optimizer, milestones=[1, 2])
+        trainer_name = Trainers.default
+        kwargs = dict(cfg_file=config_path,
+                      model=model,
+                      train_dataset=dummy_dataset,
+                      optimizers=(optimizer, lr_scheduler),
+                      max_epochs=2,
+                      device='cpu')
+
+        trainer = build_trainer(trainer_name, kwargs)
+        train_dataloader = trainer._build_dataloader_with_dataset(
+            trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
+        trainer.register_optimizers_hook()
+
+        trainer.invoke_hook(TrainerStages.before_run)
+
+        for _ in range(trainer._epoch, trainer._max_epochs):
+            trainer.invoke_hook(TrainerStages.before_train_epoch)
+            for _, data_batch in enumerate(train_dataloader):
+                trainer.invoke_hook(TrainerStages.before_train_iter)
+                trainer.train_step(trainer.model, data_batch)
+                trainer.invoke_hook(TrainerStages.after_train_iter)
+
+                self.assertEqual(
+                    len(trainer.optimizer.param_groups[0]['params']), 4)
+                for i in range(4):
+                    self.assertTrue(trainer.optimizer.param_groups[0]['params']
+                                    [i].requires_grad)
+
+            trainer.invoke_hook(TrainerStages.after_train_epoch)
+            trainer._epoch += 1
+        trainer.invoke_hook(TrainerStages.after_run)
+
+
+class TorchAMPOptimizerHookTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    @unittest.skipIf(not torch.cuda.is_available(),
+                     'skip this test when cuda is not available')
+    def test_amp_optimizer_hook(self):
+        json_cfg = {
+            'task': 'image_classification',
+            'train': {
+                'work_dir': self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                }
+            }
+        }
+
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+
+        model = DummyModel().cuda()
+        optimizer = SGD(model.parameters(), lr=0.01)
+        lr_scheduler = MultiStepLR(optimizer, milestones=[1, 2])
+        trainer_name = Trainers.default
+        kwargs = dict(cfg_file=config_path,
+                      model=model,
+                      train_dataset=dummy_dataset,
+                      optimizers=(optimizer, lr_scheduler),
+                      max_epochs=2,
+                      use_fp16=True)
+
+        trainer = build_trainer(trainer_name, kwargs)
+        train_dataloader = trainer._build_dataloader_with_dataset(
+            trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
+        trainer.register_optimizers_hook()
+
+        trainer.invoke_hook(TrainerStages.before_run)
+
+        for _ in range(trainer._epoch, trainer._max_epochs):
+            trainer.invoke_hook(TrainerStages.before_train_epoch)
+            for _, data_batch in enumerate(train_dataloader):
+                for k, v in data_batch.items():
+                    data_batch[k] = v.cuda()
+                trainer.invoke_hook(TrainerStages.before_train_iter)
+                trainer.train_step(trainer.model, data_batch)
+                trainer.invoke_hook(TrainerStages.after_train_iter)
+
+                self.assertEqual(trainer.train_outputs['logits'].dtype,
+                                 torch.float16)
+
+                # test if `after_train_iter`, whether the model is reset to fp32
+                trainer.train_step(trainer.model, data_batch)
+                self.assertEqual(trainer.train_outputs['logits'].dtype,
+                                 torch.float32)
+
+                self.assertEqual(
+                    len(trainer.optimizer.param_groups[0]['params']), 4)
+                for i in range(4):
+                    self.assertTrue(trainer.optimizer.param_groups[0]['params']
+                                    [i].requires_grad)
+
+            trainer.invoke_hook(TrainerStages.after_train_epoch)
+            trainer._epoch += 1
+        trainer.invoke_hook(TrainerStages.after_run)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/hooks/test_timer_hook.py b/tests/trainers/hooks/test_timer_hook.py
new file mode 100644
index 0000000..dfc3cc7
--- /dev/null
+++ b/tests/trainers/hooks/test_timer_hook.py
@@ -0,0 +1,125 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import os
+import shutil
+import tempfile
+import unittest
+
+import numpy as np
+import torch
+from torch import nn
+from torch.optim import SGD
+from torch.optim.lr_scheduler import MultiStepLR
+
+from modelscope.metainfo import Trainers
+from modelscope.models.base import Model
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import LogKeys, ModelFile, TrainerStages
+from modelscope.utils.test_utils import create_dummy_test_dataset
+
+dummy_dataset = create_dummy_test_dataset(np.random.random(size=(5, )),
+                                          np.random.randint(0, 4, (1, )), 10)
+
+
+class DummyModel(nn.Module, Model):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(5, 4)
+        self.bn = nn.BatchNorm1d(4)
+
+    def forward(self, feat, labels):
+        x = self.linear(feat)
+
+        x = self.bn(x)
+        loss = torch.sum(x)
+        return dict(logits=x, loss=loss)
+
+
+class IterTimerHookTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    def test_iter_time_hook(self):
+        json_cfg = {
+            'task': 'image_classification',
+            'train': {
+                'work_dir': self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                },
+                'hooks': [{
+                    'type': 'IterTimerHook',
+                }]
+            }
+        }
+
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+
+        model = DummyModel()
+        optimizer = SGD(model.parameters(), lr=0.01)
+        lr_scheduler = MultiStepLR(optimizer, milestones=[2, 4])
+        trainer_name = Trainers.default
+        kwargs = dict(cfg_file=config_path,
+                      model=model,
+                      train_dataset=dummy_dataset,
+                      optimizers=(optimizer, lr_scheduler),
+                      max_epochs=5,
+                      device='cpu')
+
+        trainer = build_trainer(trainer_name, kwargs)
+        train_dataloader = trainer._build_dataloader_with_dataset(
+            trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
+        trainer.register_optimizers_hook()
+        trainer.register_hook_from_cfg(trainer.cfg.train.hooks)
+        trainer.train_dataloader = train_dataloader
+        trainer.data_loader = train_dataloader
+        trainer.invoke_hook(TrainerStages.before_run)
+        for i in range(trainer._epoch, trainer._max_epochs):
+            trainer.invoke_hook(TrainerStages.before_train_epoch)
+            for _, data_batch in enumerate(train_dataloader):
+                trainer.invoke_hook(TrainerStages.before_train_iter)
+                trainer.train_step(trainer.model, data_batch)
+                trainer.invoke_hook(TrainerStages.after_train_iter)
+
+                self.assertIn(LogKeys.DATA_LOAD_TIME,
+                              trainer.log_buffer.val_history)
+                self.assertIn(LogKeys.ITER_TIME,
+                              trainer.log_buffer.val_history)
+                self.assertIn(LogKeys.LOSS, trainer.log_buffer.val_history)
+
+            trainer.invoke_hook(TrainerStages.after_train_epoch)
+
+            target_len = 5
+            self.assertEqual(
+                len(trainer.log_buffer.val_history[LogKeys.DATA_LOAD_TIME]),
+                target_len)
+            self.assertEqual(
+                len(trainer.log_buffer.val_history[LogKeys.ITER_TIME]),
+                target_len)
+            self.assertEqual(len(trainer.log_buffer.val_history[LogKeys.LOSS]),
+                             target_len)
+
+            self.assertEqual(
+                len(trainer.log_buffer.n_history[LogKeys.DATA_LOAD_TIME]),
+                target_len)
+            self.assertEqual(
+                len(trainer.log_buffer.n_history[LogKeys.ITER_TIME]),
+                target_len)
+            self.assertEqual(len(trainer.log_buffer.n_history[LogKeys.LOSS]),
+                             target_len)
+
+        trainer.invoke_hook(TrainerStages.after_run)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/lrscheduler/__init__.py b/tests/trainers/lrscheduler/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/trainers/lrscheduler/warmup/__init__.py b/tests/trainers/lrscheduler/warmup/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/trainers/lrscheduler/warmup/test_warmup_base.py b/tests/trainers/lrscheduler/warmup/test_warmup_base.py
new file mode 100644
index 0000000..fb78b86
--- /dev/null
+++ b/tests/trainers/lrscheduler/warmup/test_warmup_base.py
@@ -0,0 +1,81 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+import torch
+from torch import nn
+from torch.optim.lr_scheduler import MultiStepLR
+
+
+class WarmupTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+    def test_constant_warmup(self):
+        from modelscope.trainers.lrscheduler.warmup import ConstantWarmup
+
+        net = nn.Linear(2, 2)
+        base_lr = 0.02
+        warmup_iters = 3
+        warmup_ratio = 0.2
+        optimizer = torch.optim.SGD(net.parameters(), lr=base_lr, momentum=0.9)
+        lr_scheduler = MultiStepLR(optimizer, milestones=[7, 9])
+        lr_scheduler_with_warmup = ConstantWarmup(lr_scheduler,
+                                                  warmup_iters=warmup_iters,
+                                                  warmup_ratio=warmup_ratio)
+
+        res = []
+        for _ in range(10):
+            lr_scheduler_with_warmup.step()
+            for _, group in enumerate(optimizer.param_groups):
+                res.append(group['lr'])
+
+        base_lrs = [0.02, 0.02, 0.02, 0.002, 0.002, 0.0002, 0.0002]
+        self.assertListEqual(res, [0.004, 0.004, 0.02] + base_lrs)
+
+    def test_linear_warmup(self):
+        from modelscope.trainers.lrscheduler.warmup import LinearWarmup
+
+        net = nn.Linear(2, 2)
+        base_lr = 0.02
+        warmup_iters = 3
+        warmup_ratio = 0.1
+        optimizer = torch.optim.SGD(net.parameters(), lr=base_lr, momentum=0.9)
+        lr_scheduler = MultiStepLR(optimizer, milestones=[7, 9])
+        lr_scheduler_with_warmup = LinearWarmup(lr_scheduler,
+                                                warmup_iters=warmup_iters,
+                                                warmup_ratio=warmup_ratio)
+
+        res = []
+        for _ in range(10):
+            lr_scheduler_with_warmup.step()
+            for _, group in enumerate(optimizer.param_groups):
+                res.append(round(group['lr'], 5))
+
+        base_lrs = [0.02, 0.02, 0.02, 0.002, 0.002, 0.0002, 0.0002]
+        self.assertListEqual(res, [0.0080, 0.0140, 0.02] + base_lrs)
+
+    def test_exp_warmup(self):
+        from modelscope.trainers.lrscheduler.warmup import ExponentialWarmup
+
+        net = nn.Linear(2, 2)
+        base_lr = 0.02
+        warmup_iters = 3
+        warmup_ratio = 0.1
+        optimizer = torch.optim.SGD(net.parameters(), lr=base_lr, momentum=0.9)
+        lr_scheduler = MultiStepLR(optimizer, milestones=[7, 9])
+        lr_scheduler_with_warmup = ExponentialWarmup(lr_scheduler,
+                                                     warmup_iters=warmup_iters,
+                                                     warmup_ratio=warmup_ratio)
+
+        res = []
+        for _ in range(10):
+            lr_scheduler_with_warmup.step()
+            for _, group in enumerate(optimizer.param_groups):
+                res.append(round(group['lr'], 5))
+
+        base_lrs = [0.02, 0.02, 0.02, 0.002, 0.002, 0.0002, 0.0002]
+        self.assertListEqual(res, [0.00431, 0.00928, 0.02] + base_lrs)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/model_trainer_map.py b/tests/trainers/model_trainer_map.py
new file mode 100644
index 0000000..4057c33
--- /dev/null
+++ b/tests/trainers/model_trainer_map.py
@@ -0,0 +1,145 @@
+model_trainer_map = {
+    'damo/speech_frcrn_ans_cirm_16k':
+    ['tests/trainers/audio/test_ans_trainer.py'],
+    'damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch':
+    ['tests/trainers/audio/test_asr_trainer.py'],
+    'damo/speech_dfsmn_kws_char_farfield_16k_nihaomiya':
+    ['tests/trainers/audio/test_kws_farfield_trainer.py'],
+    'damo/speech_charctc_kws_phone-xiaoyun':
+    ['tests/trainers/audio/test_kws_nearfield_trainer.py'],
+    'damo/speech_mossformer_separation_temporal_8k':
+    ['tests/trainers/audio/test_separation_trainer.py'],
+    'speech_tts/speech_sambert-hifigan_tts_zh-cn_multisp_pretrain_16k':
+    ['tests/trainers/audio/test_tts_trainer.py'],
+    'damo/cv_mobilenet_face-2d-keypoints_alignment':
+    ['tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py'],
+    'damo/cv_hrnetw18_hand-pose-keypoints_coco-wholebody':
+    ['tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py'],
+    'damo/cv_yolox-pai_hand-detection':
+    ['tests/trainers/easycv/test_easycv_trainer_hand_detection.py'],
+    'damo/cv_r50_panoptic-segmentation_cocopan':
+    ['tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py'],
+    'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k':
+    ['tests/trainers/easycv/test_segformer.py'],
+    'damo/cv_resnet_carddetection_scrfd34gkps':
+    ['tests/trainers/test_card_detection_scrfd_trainer.py'],
+    'damo/multi-modal_clip-vit-base-patch16_zh': [
+        'tests/trainers/test_clip_trainer.py'
+    ],
+    'damo/nlp_space_pretrained-dialog-model': [
+        'tests/trainers/test_dialog_intent_trainer.py'
+    ],
+    'damo/cv_resnet_facedetection_scrfd10gkps': [
+        'tests/trainers/test_face_detection_scrfd_trainer.py'
+    ],
+    'damo/nlp_structbert_faq-question-answering_chinese-base': [
+        'tests/trainers/test_finetune_faq_question_answering.py'
+    ],
+    'PAI/nlp_gpt3_text-generation_0.35B_MoE-64': [
+        'tests/trainers/test_finetune_gpt_moe.py'
+    ],
+    'damo/nlp_gpt3_text-generation_1.3B': [
+        'tests/trainers/test_finetune_gpt3.py'
+    ],
+    'damo/mgeo_backbone_chinese_base': [
+        'tests/trainers/test_finetune_mgeo.py'
+    ],
+    'damo/mplug_backbone_base_en': ['tests/trainers/test_finetune_mplug.py'],
+    'damo/nlp_structbert_backbone_base_std': [
+        'tests/trainers/test_finetune_sequence_classification.py',
+        'tests/trainers/test_finetune_token_classification.py'
+    ],
+    'damo/nlp_palm2.0_text-generation_english-base': [
+        'tests/trainers/test_finetune_text_generation.py'
+    ],
+    'damo/nlp_gpt3_text-generation_chinese-base': [
+        'tests/trainers/test_finetune_text_generation.py'
+    ],
+    'damo/nlp_palm2.0_text-generation_chinese-base': [
+        'tests/trainers/test_finetune_text_generation.py'
+    ],
+    'damo/nlp_corom_passage-ranking_english-base': [
+        'tests/trainers/test_finetune_text_ranking.py'
+    ],
+    'damo/nlp_rom_passage-ranking_chinese-base': [
+        'tests/trainers/test_finetune_text_ranking.py'
+    ],
+    'damo/cv_nextvit-small_image-classification_Dailylife-labels': [
+        'tests/trainers/test_general_image_classification_trainer.py'
+    ],
+    'damo/cv_convnext-base_image-classification_garbage': [
+        'tests/trainers/test_general_image_classification_trainer.py'
+    ],
+    'damo/cv_beitv2-base_image-classification_patch16_224_pt1k_ft22k_in1k': [
+        'tests/trainers/test_general_image_classification_trainer.py'
+    ],
+    'damo/cv_csrnet_image-color-enhance-models': [
+        'tests/trainers/test_image_color_enhance_trainer.py'
+    ],
+    'damo/cv_nafnet_image-deblur_gopro': [
+        'tests/trainers/test_image_deblur_trainer.py'
+    ],
+    'damo/cv_resnet101_detection_fewshot-defrcn': [
+        'tests/trainers/test_image_defrcn_fewshot_trainer.py'
+    ],
+    'damo/cv_nafnet_image-denoise_sidd': [
+        'tests/trainers/test_image_denoise_trainer.py'
+    ],
+    'damo/cv_fft_inpainting_lama': [
+        'tests/trainers/test_image_inpainting_trainer.py'
+    ],
+    'damo/cv_swin-b_image-instance-segmentation_coco': [
+        'tests/trainers/test_image_instance_segmentation_trainer.py'
+    ],
+    'damo/cv_gpen_image-portrait-enhancement': [
+        'tests/trainers/test_image_portrait_enhancement_trainer.py'
+    ],
+    'damo/cv_clip-it_video-summarization_language-guided_en': [
+        'tests/trainers/test_language_guided_video_summarization_trainer.py'
+    ],
+    'damo/cv_resnet50-bert_video-scene-segmentation_movienet': [
+        'tests/trainers/test_movie_scene_segmentation_trainer.py'
+    ],
+    'damo/ofa_mmspeech_pretrain_base_zh': [
+        'tests/trainers/test_ofa_mmspeech_trainer.py'
+    ],
+    'damo/ofa_ocr-recognition_scene_base_zh': [
+        'tests/trainers/test_ofa_trainer.py'
+    ],
+    'damo/nlp_plug_text-generation_27B': [
+        'tests/trainers/test_plug_finetune_text_generation.py'
+    ],
+    'damo/cv_swin-t_referring_video-object-segmentation': [
+        'tests/trainers/test_referring_video_object_segmentation_trainer.py'
+    ],
+    'damo/nlp_convai_text2sql_pretrain_cn': [
+        'tests/trainers/test_table_question_answering_trainer.py'
+    ],
+    'damo/multi-modal_team-vit-large-patch14_multi-modal-similarity': [
+        'tests/trainers/test_team_transfer_trainer.py'
+    ],
+    'damo/cv_tinynas_object-detection_damoyolo': [
+        'tests/trainers/test_tinynas_damoyolo_trainer.py'
+    ],
+    'damo/nlp_structbert_sentence-similarity_chinese-tiny': [
+        'tests/trainers/test_trainer_with_nlp.py'
+    ],
+    'damo/nlp_structbert_sentiment-classification_chinese-base': [
+        'tests/trainers/test_trainer_with_nlp.py'
+    ],
+    'damo/nlp_structbert_sentence-similarity_chinese-base': [
+        'tests/trainers/test_trainer_with_nlp.py'
+    ],
+    'damo/nlp_csanmt_translation_en2zh': [
+        'tests/trainers/test_translation_trainer.py'
+    ],
+    'damo/nlp_csanmt_translation_en2fr': [
+        'tests/trainers/test_translation_trainer.py'
+    ],
+    'damo/nlp_csanmt_translation_en2es': [
+        'tests/trainers/test_translation_trainer.py'
+    ],
+    'damo/cv_googlenet_pgl-video-summarization': [
+        'tests/trainers/test_video_summarization_trainer.py'
+    ],
+}
diff --git a/tests/trainers/test_card_detection_scrfd_trainer.py b/tests/trainers/test_card_detection_scrfd_trainer.py
new file mode 100644
index 0000000..9fe9bfa
--- /dev/null
+++ b/tests/trainers/test_card_detection_scrfd_trainer.py
@@ -0,0 +1,146 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.test_utils import DistributedTestCase, test_level
+
+
+def _setup():
+    model_id = 'damo/cv_resnet_carddetection_scrfd34gkps'
+    # mini dataset only for unit test, remove '_mini' for full dataset.
+    ms_ds_syncards = MsDataset.load('SyntheticCards_mini',
+                                    namespace='shaoxuan')
+
+    data_path = ms_ds_syncards.config_kwargs['split_config']
+    train_dir = data_path['train']
+    val_dir = data_path['validation']
+    train_root = train_dir + '/' + os.listdir(train_dir)[0] + '/'
+    val_root = val_dir + '/' + os.listdir(val_dir)[0] + '/'
+    max_epochs = 1  # run epochs in unit test
+
+    cache_path = snapshot_download(model_id)
+
+    tmp_dir = tempfile.TemporaryDirectory().name
+    if not os.path.exists(tmp_dir):
+        os.makedirs(tmp_dir)
+    return train_root, val_root, max_epochs, cache_path, tmp_dir
+
+
+def train_func(**kwargs):
+    trainer = build_trainer(name=Trainers.card_detection_scrfd,
+                            default_args=kwargs)
+    trainer.train()
+
+
+class TestCardDetectionScrfdTrainerSingleGPU(unittest.TestCase):
+    def setUp(self):
+        print(('SingleGPU Testing %s.%s' %
+               (type(self).__name__, self._testMethodName)))
+        self.train_root, self.val_root, self.max_epochs, self.cache_path, self.tmp_dir = _setup(
+        )
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    def _cfg_modify_fn(self, cfg):
+        cfg.checkpoint_config.interval = 1
+        cfg.log_config.interval = 10
+        cfg.evaluation.interval = 1
+        cfg.data.workers_per_gpu = 3
+        cfg.data.samples_per_gpu = 4  # batch size
+        return cfg
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_from_scratch(self):
+        kwargs = dict(cfg_file=os.path.join(self.cache_path, 'mmcv_scrfd.py'),
+                      work_dir=self.tmp_dir,
+                      train_root=self.train_root,
+                      val_root=self.val_root,
+                      total_epochs=self.max_epochs,
+                      cfg_modify_fn=self._cfg_modify_fn)
+
+        trainer = build_trainer(name=Trainers.card_detection_scrfd,
+                                default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_finetune(self):
+        pretrain_epoch = 640
+        self.max_epochs += pretrain_epoch
+        kwargs = dict(cfg_file=os.path.join(self.cache_path, 'mmcv_scrfd.py'),
+                      work_dir=self.tmp_dir,
+                      train_root=self.train_root,
+                      val_root=self.val_root,
+                      total_epochs=self.max_epochs,
+                      resume_from=os.path.join(self.cache_path,
+                                               ModelFile.TORCH_MODEL_BIN_FILE),
+                      cfg_modify_fn=self._cfg_modify_fn)
+
+        trainer = build_trainer(name=Trainers.card_detection_scrfd,
+                                default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(pretrain_epoch, self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+@unittest.skipIf(not torch.cuda.is_available()
+                 or torch.cuda.device_count() <= 1, 'distributed unittest')
+class TestCardDetectionScrfdTrainerMultiGpus(DistributedTestCase):
+    def setUp(self):
+        print(('MultiGPUs Testing %s.%s' %
+               (type(self).__name__, self._testMethodName)))
+        self.train_root, self.val_root, self.max_epochs, self.cache_path, self.tmp_dir = _setup(
+        )
+        cfg_file_path = os.path.join(self.cache_path, 'mmcv_scrfd.py')
+        cfg = Config.from_file(cfg_file_path)
+        cfg.checkpoint_config.interval = 1
+        cfg.log_config.interval = 10
+        cfg.evaluation.interval = 1
+        cfg.data.workers_per_gpu = 3
+        cfg.data.samples_per_gpu = 4
+        cfg.dump(cfg_file_path)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_multi_gpus_finetune(self):
+        pretrain_epoch = 640
+        self.max_epochs += pretrain_epoch
+        kwargs = dict(cfg_file=os.path.join(self.cache_path, 'mmcv_scrfd.py'),
+                      work_dir=self.tmp_dir,
+                      train_root=self.train_root,
+                      val_root=self.val_root,
+                      total_epochs=self.max_epochs,
+                      resume_from=os.path.join(self.cache_path,
+                                               ModelFile.TORCH_MODEL_BIN_FILE),
+                      launcher='pytorch')
+        self.start(train_func, num_gpus=2, **kwargs)
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+        for i in range(pretrain_epoch, self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_clip_trainer.py b/tests/trainers/test_clip_trainer.py
new file mode 100644
index 0000000..33e0ad4
--- /dev/null
+++ b/tests/trainers/test_clip_trainer.py
@@ -0,0 +1,82 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import os
+import shutil
+import unittest
+
+from modelscope.metainfo import Metrics, Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.test_utils import test_level
+
+
+class TestClipTrainer(unittest.TestCase):
+    def setUp(self) -> None:
+        self.finetune_cfg = \
+            {'framework': 'pytorch',
+             'task': 'multi-modal-embedding',
+             'pipeline': {'type': 'multi-modal-embedding'},
+             'pretrained_model': {'model_name': 'damo/multi-modal_clip-vit-base-patch16_zh'},
+             'dataset': {'column_map': {'img': 'image', 'text': 'query'}},
+             'train': {'work_dir': './workspace/ckpts/clip',
+                       # 'launcher': 'pytorch',
+                       'max_epochs': 1,
+                       'use_fp16': True,
+                       'dataloader': {'batch_size_per_gpu': 8,
+                                      'workers_per_gpu': 0,
+                                      'shuffle': True,
+                                      'drop_last': True},
+                       'lr_scheduler': {'name': 'cosine',
+                                        'warmup_proportion': 0.01},
+                       'lr_scheduler_hook': {'type': 'LrSchedulerHook', 'by_epoch': False},
+                       'optimizer': {'type': 'AdamW'},
+                       'optimizer_hparams': {'lr': 5e-05, 'weight_decay': 0.01},
+                       'optimizer_hook': {'type': 'TorchAMPOptimizerHook',
+                                          'cumulative_iters': 1,
+                                          'loss_keys': 'loss'},
+                       'loss_cfg': {'aggregate': True},
+                       'hooks': [{'type': 'BestCkptSaverHook',
+                                  'metric_key': 'inbatch_t2i_recall_at_1',
+                                  'interval': 100},
+                                 {'type': 'TextLoggerHook', 'interval': 1},
+                                 {'type': 'IterTimerHook'},
+                                 {'type': 'EvaluationHook', 'by_epoch': True, 'interval': 1},
+                                 {'type': 'ClipClampLogitScaleHook'}]},
+             'evaluation': {'dataloader': {'batch_size_per_gpu': 8,
+                                           'workers_per_gpu': 0,
+                                           'shuffle': True,
+                                           'drop_last': True},
+                            'metrics': [{'type': 'inbatch_recall'}]},
+             'preprocessor': []}
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_std(self):
+        WORKSPACE = './workspace/ckpts/clip'
+        os.makedirs(WORKSPACE, exist_ok=True)
+        config_file = os.path.join(WORKSPACE, ModelFile.CONFIGURATION)
+        with open(config_file, 'w') as writer:
+            json.dump(self.finetune_cfg, writer)
+
+        pretrained_model = 'damo/multi-modal_clip-vit-base-patch16_zh'
+        args = dict(model=pretrained_model,
+                    work_dir=WORKSPACE,
+                    train_dataset=MsDataset.load('muge',
+                                                 namespace='modelscope',
+                                                 split='train[:200]'),
+                    eval_dataset=MsDataset.load('muge',
+                                                namespace='modelscope',
+                                                split='validation[:100]'),
+                    metrics=[Metrics.inbatch_recall],
+                    cfg_file=config_file)
+        trainer = build_trainer(name=Trainers.clip_multi_modal_embedding,
+                                default_args=args)
+        trainer.train()
+
+        self.assertIn(ModelFile.TORCH_MODEL_BIN_FILE,
+                      os.listdir(os.path.join(WORKSPACE, 'output')))
+        shutil.rmtree(WORKSPACE)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_dialog_intent_trainer.py b/tests/trainers/test_dialog_intent_trainer.py
new file mode 100644
index 0000000..c48b067
--- /dev/null
+++ b/tests/trainers/test_dialog_intent_trainer.py
@@ -0,0 +1,100 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import json
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import DownloadMode, ModelFile, Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class TestDialogIntentTrainer(unittest.TestCase):
+    def setUp(self):
+        self.save_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.save_dir):
+            os.mkdir(self.save_dir)
+
+    def tearDown(self):
+        shutil.rmtree(self.save_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_trainer_with_model_and_args(self):
+        model_id = 'damo/nlp_space_pretrained-dialog-model'
+        data_banking = MsDataset.load('banking77')
+        self.data_dir = data_banking._hf_ds.config_kwargs['split_config'][
+            'train']
+        self.model_dir = snapshot_download(model_id)
+        self.debugging = True
+        kwargs = dict(model_dir=self.model_dir,
+                      cfg_name='intent_train_config.json',
+                      cfg_modify_fn=self.cfg_modify_fn)
+        trainer = build_trainer(name=Trainers.dialog_intent_trainer,
+                                default_args=kwargs)
+        trainer.train()
+
+    def cfg_modify_fn(self, cfg):
+        config = {
+            'num_intent': 77,
+            'BPETextField': {
+                'vocab_path': '',
+                'data_name': 'banking77',
+                'data_root': self.data_dir,
+                'understand': True,
+                'generation': False,
+                'max_len': 256
+            },
+            'Dataset': {
+                'data_dir': self.data_dir,
+                'with_contrastive': False,
+                'trigger_role': 'user',
+                'trigger_data': 'banking'
+            },
+            'Trainer': {
+                'can_norm': True,
+                'seed': 11,
+                'gpu': 1,
+                'save_dir': self.save_dir,
+                'batch_size_label': 128,
+                'batch_size_nolabel': 0,
+                'log_steps': 20
+            },
+            'Model': {
+                'init_checkpoint': self.model_dir,
+                'model': 'IntentUnifiedTransformer',
+                'example': False,
+                'num_intent': 77,
+                'with_rdrop': True,
+                'num_turn_embeddings': 21,
+                'dropout': 0.25,
+                'kl_ratio': 5.0,
+                'embed_dropout': 0.25,
+                'attn_dropout': 0.25,
+                'ff_dropout': 0.25,
+                'with_pool': False,
+                'warmup_steps': -1
+            }
+        }
+        cfg.BPETextField.vocab_path = os.path.join(self.model_dir,
+                                                   ModelFile.VOCAB_FILE)
+        cfg.num_intent = 77
+        cfg.Trainer.update(config['Trainer'])
+        cfg.BPETextField.update(config['BPETextField'])
+        cfg.Dataset.update(config['Dataset'])
+        cfg.Model.update(config['Model'])
+        if self.debugging:
+            cfg.Trainer.save_checkpoint = False
+            cfg.Trainer.num_epochs = 1
+            cfg.Trainer.batch_size_label = 64
+        return cfg
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_dialog_modeling_trainer.py b/tests/trainers/test_dialog_modeling_trainer.py
new file mode 100644
index 0000000..d2aa129
--- /dev/null
+++ b/tests/trainers/test_dialog_modeling_trainer.py
@@ -0,0 +1,76 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import unittest
+
+import torch
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Preprocessors, Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import DownloadMode, ModelFile
+from modelscope.utils.test_utils import test_level
+
+
+class TestDialogModelingTrainer(unittest.TestCase):
+
+    model_id = 'damo/nlp_space_pretrained-dialog-model'
+    output_dir = './dialog_fintune_result'
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_trainer_with_model_and_args(self):
+        # download data set
+        data_multiwoz = MsDataset.load(
+            'MultiWoz2.0', download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS)
+        data_dir = os.path.join(
+            data_multiwoz._hf_ds.config_kwargs['split_config']['train'],
+            'data')
+
+        # download model
+        model_dir = snapshot_download(self.model_id)
+
+        # dialog finetune config
+        def cfg_modify_fn(cfg):
+            config = {
+                'seed': 10,
+                'gpu': 1,
+                'use_data_distributed': False,
+                'valid_metric_name': '-loss',
+                'num_epochs': 60,
+                'save_dir': self.output_dir,
+                'token_loss': True,
+                'batch_size': 4,
+                'log_steps': 10,
+                'valid_steps': 0,
+                'save_checkpoint': True,
+                'save_summary': False,
+                'shuffle': True,
+                'sort_pool_size': 0
+            }
+
+            cfg.Trainer = config
+            cfg.use_gpu = torch.cuda.is_available() and config['gpu'] >= 1
+            return cfg
+
+        # trainer config
+        kwargs = dict(model_dir=model_dir,
+                      cfg_name='gen_train_config.json',
+                      data_dir=data_dir,
+                      cfg_modify_fn=cfg_modify_fn)
+
+        trainer = build_trainer(name=Trainers.dialog_modeling_trainer,
+                                default_args=kwargs)
+        assert trainer is not None
+
+        # todo: it takes too long time to train and evaluate. It will be optimized later.
+        """
+        trainer.train()
+        checkpoint_path = os.path.join(self.output_dir,
+                                       ModelFile.TORCH_MODEL_BIN_FILE)
+        assert os.path.exists(checkpoint_path)
+        trainer.evaluate(checkpoint_path=checkpoint_path)
+        """
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_document_grounded_dialog_generate_trainer.py b/tests/trainers/test_document_grounded_dialog_generate_trainer.py
new file mode 100644
index 0000000..858d90e
--- /dev/null
+++ b/tests/trainers/test_document_grounded_dialog_generate_trainer.py
@@ -0,0 +1,47 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import os
+import unittest
+
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers.nlp.document_grounded_dialog_generate_trainer import \
+    DocumentGroundedDialogGenerateTrainer
+from modelscope.utils.constant import DownloadMode, ModelFile
+from modelscope.utils.test_utils import test_level
+
+
+class DocumentGroundedDialogGenerateTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'DAMO_ConvAI/nlp_convai_generation_pretrain'
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_with_model_name(self):
+        # load data
+        train_dataset = MsDataset.load(
+            'DAMO_ConvAI/FrDoc2BotGeneration',
+            download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        test_len = 1
+        sub_train_dataset = [x for x in train_dataset][:1]
+        sub_train_dataset = [{
+            'query':
+            x['query'][:test_len],
+            'rerank':
+            json.dumps([p[:test_len] for p in json.loads(x['rerank'])]),
+            'response':
+            x['response'][:test_len]
+        } for x in sub_train_dataset]
+
+        trainer = DocumentGroundedDialogGenerateTrainer(
+            model=self.model_id,
+            train_dataset=sub_train_dataset,
+            eval_dataset=sub_train_dataset,
+        )
+        trainer.model.model.config['num_beams'] = 1
+        trainer.model.model.config['target_sequence_length'] = test_len
+        trainer.train(batch_size=1, total_epoches=1, learning_rate=2e-4)
+        trainer.evaluate(checkpoint_path=os.path.join(trainer.model.model_dir,
+                                                      'finetuned_model.bin'))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_document_grounded_dialog_rerank_trainer.py b/tests/trainers/test_document_grounded_dialog_rerank_trainer.py
new file mode 100644
index 0000000..05f1971
--- /dev/null
+++ b/tests/trainers/test_document_grounded_dialog_rerank_trainer.py
@@ -0,0 +1,74 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import os
+import unittest
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers.nlp.document_grounded_dialog_rerank_trainer import \
+    DocumentGroundedDialogRerankTrainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import DownloadMode, ModelFile, Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class TestDialogIntentTrainer(unittest.TestCase):
+    def setUp(self):
+        self.model_id = 'DAMO_ConvAI/nlp_convai_ranking_pretrain'
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_with_model_and_args(self):
+        args = {
+            'device': 'gpu',
+            'tokenizer_name': '',
+            'cache_dir': '',
+            'instances_size': 1,
+            'output_dir': './model',
+            'max_num_seq_pairs_per_device': 32,
+            'full_train_batch_size': 32,
+            'gradient_accumulation_steps': 32,
+            'per_gpu_train_batch_size': 1,
+            'num_train_epochs': 1,
+            'train_instances': -1,
+            'learning_rate': 3e-5,
+            'max_seq_length': 128,
+            'num_labels': 2,
+            'fold': '',  # IofN
+            'doc_match_weight': 0.0,
+            'query_length': 64,
+            'resume_from': '',  # to resume training from a checkpoint
+            'config_name': '',
+            'do_lower_case': True,
+            'weight_decay': 0.0,  # previous default was 0.01
+            'adam_epsilon': 1e-8,
+            'max_grad_norm': 1.0,
+            'warmup_instances': 0,  # previous default was 0.1 of total
+            'warmup_fraction': 0.0,  # only applies if warmup_instances <= 0
+            'no_cuda': False,
+            'n_gpu': 1,
+            'seed': 42,
+            'fp16': False,
+            'fp16_opt_level': 'O1',  # previous default was O2
+            'per_gpu_eval_batch_size': 8,
+            'log_on_all_nodes': False,
+            'world_size': 1,
+            'global_rank': 0,
+            'local_rank': -1,
+            'tokenizer_resize': True,
+            'model_resize': True
+        }
+        args[
+            'gradient_accumulation_steps'] = args['full_train_batch_size'] // (
+                args['per_gpu_train_batch_size'] * args['world_size'])
+        data = MsDataset.load('DAMO_ConvAI/FrDoc2BotRerank',
+                              download_mode=DownloadMode.FORCE_REDOWNLOAD,
+                              split='train')
+        sub_train_dataset = [x for x in data][:10]
+        trainer = DocumentGroundedDialogRerankTrainer(
+            model=self.model_id, dataset=sub_train_dataset, args=args)
+        trainer.train()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_document_grounded_dialog_retrieval_trainer.py b/tests/trainers/test_document_grounded_dialog_retrieval_trainer.py
new file mode 100644
index 0000000..590fe1c
--- /dev/null
+++ b/tests/trainers/test_document_grounded_dialog_retrieval_trainer.py
@@ -0,0 +1,40 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import os
+import unittest
+
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers.nlp.document_grounded_dialog_retrieval_trainer import \
+    DocumentGroundedDialogRetrievalTrainer
+from modelscope.utils.constant import DownloadMode, ModelFile
+from modelscope.utils.test_utils import test_level
+
+
+class DocumentGroundedDialogRetrievalTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_id = 'DAMO_ConvAI/nlp_convai_retrieval_pretrain'
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_with_model_name(self):
+        # load data
+        train_dataset = MsDataset.load(
+            'DAMO_ConvAI/FrDoc2BotRetrieval',
+            download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        sub_train_dataset = [x for x in train_dataset][:10]
+        all_passages = ['阑尾炎', '肠胃炎', '肚脐开始', '肚脐为止']
+
+        trainer = DocumentGroundedDialogRetrievalTrainer(
+            model=self.model_id,
+            train_dataset=sub_train_dataset,
+            eval_dataset=sub_train_dataset,
+            all_passages=all_passages)
+        trainer.train(
+            batch_size=64,
+            total_epoches=2,
+        )
+        trainer.evaluate(checkpoint_path=os.path.join(trainer.model.model_dir,
+                                                      'finetuned_model.bin'))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_face_detection_damofd_trainer.py b/tests/trainers/test_face_detection_damofd_trainer.py
new file mode 100644
index 0000000..4a36791
--- /dev/null
+++ b/tests/trainers/test_face_detection_damofd_trainer.py
@@ -0,0 +1,150 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.test_utils import DistributedTestCase, test_level
+
+
+def _setup():
+    model_id = 'damo/cv_ddsar_face-detection_iclr23-damofd'
+    # mini dataset only for unit test, remove '_mini' for full dataset.
+    ms_ds_widerface = MsDataset.load('WIDER_FACE_mini', namespace='shaoxuan')
+
+    data_path = ms_ds_widerface.config_kwargs['split_config']
+    train_dir = data_path['train']
+    val_dir = data_path['validation']
+    train_root = train_dir + '/' + os.listdir(train_dir)[0] + '/'
+    val_root = val_dir + '/' + os.listdir(val_dir)[0] + '/'
+    max_epochs = 1  # run epochs in unit test
+
+    cache_path = snapshot_download(model_id)
+
+    tmp_dir = tempfile.TemporaryDirectory().name
+    if not os.path.exists(tmp_dir):
+        os.makedirs(tmp_dir)
+    return train_root, val_root, max_epochs, cache_path, tmp_dir
+
+
+def train_func(**kwargs):
+    trainer = build_trainer(
+        name=Trainers.face_detection_scrfd, default_args=kwargs)
+    trainer.train()
+
+
+class TestFaceDetectionDamofdTrainerSingleGPU(unittest.TestCase):
+
+    def setUp(self):
+        print(('SingleGPU Testing %s.%s' %
+               (type(self).__name__, self._testMethodName)))
+        self.train_root, self.val_root, self.max_epochs, self.cache_path, self.tmp_dir = _setup(
+        )
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    def _cfg_modify_fn(self, cfg):
+        cfg.checkpoint_config.interval = 1
+        cfg.log_config.interval = 10
+        cfg.evaluation.interval = 1
+        cfg.data.workers_per_gpu = 3
+        cfg.data.samples_per_gpu = 4  # batch size
+        return cfg
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_from_scratch(self):
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'DamoFD_lms.py'),
+            work_dir=self.tmp_dir,
+            train_root=self.train_root,
+            val_root=self.val_root,
+            total_epochs=self.max_epochs,
+            cfg_modify_fn=self._cfg_modify_fn)
+
+        trainer = build_trainer(
+            name=Trainers.face_detection_scrfd, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_finetune(self):
+        pretrain_epoch = 640
+        self.max_epochs += pretrain_epoch
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'DamoFD_lms.py'),
+            work_dir=self.tmp_dir,
+            train_root=self.train_root,
+            val_root=self.val_root,
+            total_epochs=self.max_epochs,
+            resume_from=os.path.join(self.cache_path,
+                                     ModelFile.TORCH_MODEL_FILE),
+            cfg_modify_fn=self._cfg_modify_fn)
+
+        trainer = build_trainer(
+            name=Trainers.face_detection_scrfd, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(pretrain_epoch, self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+@unittest.skipIf(not torch.cuda.is_available()
+                 or torch.cuda.device_count() <= 1, 'distributed unittest')
+class TestFaceDetectionDamofdTrainerMultiGpus(DistributedTestCase):
+
+    def setUp(self):
+        print(('MultiGPUs Testing %s.%s' %
+               (type(self).__name__, self._testMethodName)))
+        self.train_root, self.val_root, self.max_epochs, self.cache_path, self.tmp_dir = _setup(
+        )
+        cfg_file_path = os.path.join(self.cache_path, 'DamoFD_lms.py')
+        cfg = Config.from_file(cfg_file_path)
+        cfg.checkpoint_config.interval = 1
+        cfg.log_config.interval = 10
+        cfg.evaluation.interval = 1
+        cfg.data.workers_per_gpu = 3
+        cfg.data.samples_per_gpu = 4
+        cfg.dump(cfg_file_path)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_multi_gpus_finetune(self):
+        pretrain_epoch = 640
+        self.max_epochs += pretrain_epoch
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'DamoFD_lms.py'),
+            work_dir=self.tmp_dir,
+            train_root=self.train_root,
+            val_root=self.val_root,
+            total_epochs=self.max_epochs,
+            resume_from=os.path.join(self.cache_path,
+                                     ModelFile.TORCH_MODEL_FILE),
+            launcher='pytorch')
+        self.start(train_func, num_gpus=2, **kwargs)
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+        for i in range(pretrain_epoch, self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_face_detection_scrfd_trainer.py b/tests/trainers/test_face_detection_scrfd_trainer.py
new file mode 100644
index 0000000..e432b09
--- /dev/null
+++ b/tests/trainers/test_face_detection_scrfd_trainer.py
@@ -0,0 +1,145 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.test_utils import DistributedTestCase, test_level
+
+
+def _setup():
+    model_id = 'damo/cv_resnet_facedetection_scrfd10gkps'
+    # mini dataset only for unit test, remove '_mini' for full dataset.
+    ms_ds_widerface = MsDataset.load('WIDER_FACE_mini', namespace='shaoxuan')
+
+    data_path = ms_ds_widerface.config_kwargs['split_config']
+    train_dir = data_path['train']
+    val_dir = data_path['validation']
+    train_root = train_dir + '/' + os.listdir(train_dir)[0] + '/'
+    val_root = val_dir + '/' + os.listdir(val_dir)[0] + '/'
+    max_epochs = 1  # run epochs in unit test
+
+    cache_path = snapshot_download(model_id)
+
+    tmp_dir = tempfile.TemporaryDirectory().name
+    if not os.path.exists(tmp_dir):
+        os.makedirs(tmp_dir)
+    return train_root, val_root, max_epochs, cache_path, tmp_dir
+
+
+def train_func(**kwargs):
+    trainer = build_trainer(name=Trainers.face_detection_scrfd,
+                            default_args=kwargs)
+    trainer.train()
+
+
+class TestFaceDetectionScrfdTrainerSingleGPU(unittest.TestCase):
+    def setUp(self):
+        print(('SingleGPU Testing %s.%s' %
+               (type(self).__name__, self._testMethodName)))
+        self.train_root, self.val_root, self.max_epochs, self.cache_path, self.tmp_dir = _setup(
+        )
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    def _cfg_modify_fn(self, cfg):
+        cfg.checkpoint_config.interval = 1
+        cfg.log_config.interval = 10
+        cfg.evaluation.interval = 1
+        cfg.data.workers_per_gpu = 3
+        cfg.data.samples_per_gpu = 4  # batch size
+        return cfg
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_from_scratch(self):
+        kwargs = dict(cfg_file=os.path.join(self.cache_path, 'mmcv_scrfd.py'),
+                      work_dir=self.tmp_dir,
+                      train_root=self.train_root,
+                      val_root=self.val_root,
+                      total_epochs=self.max_epochs,
+                      cfg_modify_fn=self._cfg_modify_fn)
+
+        trainer = build_trainer(name=Trainers.face_detection_scrfd,
+                                default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_finetune(self):
+        pretrain_epoch = 640
+        self.max_epochs += pretrain_epoch
+        kwargs = dict(cfg_file=os.path.join(self.cache_path, 'mmcv_scrfd.py'),
+                      work_dir=self.tmp_dir,
+                      train_root=self.train_root,
+                      val_root=self.val_root,
+                      total_epochs=self.max_epochs,
+                      resume_from=os.path.join(self.cache_path,
+                                               ModelFile.TORCH_MODEL_BIN_FILE),
+                      cfg_modify_fn=self._cfg_modify_fn)
+
+        trainer = build_trainer(name=Trainers.face_detection_scrfd,
+                                default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(pretrain_epoch, self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+@unittest.skipIf(not torch.cuda.is_available()
+                 or torch.cuda.device_count() <= 1, 'distributed unittest')
+class TestFaceDetectionScrfdTrainerMultiGpus(DistributedTestCase):
+    def setUp(self):
+        print(('MultiGPUs Testing %s.%s' %
+               (type(self).__name__, self._testMethodName)))
+        self.train_root, self.val_root, self.max_epochs, self.cache_path, self.tmp_dir = _setup(
+        )
+        cfg_file_path = os.path.join(self.cache_path, 'mmcv_scrfd.py')
+        cfg = Config.from_file(cfg_file_path)
+        cfg.checkpoint_config.interval = 1
+        cfg.log_config.interval = 10
+        cfg.evaluation.interval = 1
+        cfg.data.workers_per_gpu = 3
+        cfg.data.samples_per_gpu = 4
+        cfg.dump(cfg_file_path)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_multi_gpus_finetune(self):
+        pretrain_epoch = 640
+        self.max_epochs += pretrain_epoch
+        kwargs = dict(cfg_file=os.path.join(self.cache_path, 'mmcv_scrfd.py'),
+                      work_dir=self.tmp_dir,
+                      train_root=self.train_root,
+                      val_root=self.val_root,
+                      total_epochs=self.max_epochs,
+                      resume_from=os.path.join(self.cache_path,
+                                               ModelFile.TORCH_MODEL_BIN_FILE),
+                      launcher='pytorch')
+        self.start(train_func, num_gpus=2, **kwargs)
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+        for i in range(pretrain_epoch, self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_finetune_faq_question_answering.py b/tests/trainers/test_finetune_faq_question_answering.py
new file mode 100644
index 0000000..1c8c16f
--- /dev/null
+++ b/tests/trainers/test_finetune_faq_question_answering.py
@@ -0,0 +1,141 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.hub import read_config
+from modelscope.utils.test_utils import test_level
+
+
+class TestFinetuneFaqQuestionAnswering(unittest.TestCase):
+    param = {
+        'query_set': ['给妈买的，挺好的，妈妈喜欢。'],
+        'support_set': [{
+            'text': '挺好的，质量和服务都蛮好',
+            'label': '1'
+        }, {
+            'text': '内容较晦涩，小孩不感兴趣',
+            'label': '0'
+        }, {
+            'text': '贵且于我无用，买亏了',
+            'label': '0'
+        }, {
+            'text': '挺好，不错，喜欢，，',
+            'label': '1'
+        }]
+    }
+    model_id = 'damo/nlp_structbert_faq-question-answering_chinese-base'
+    mgimn_model_id = 'damo/nlp_mgimn_faq-question-answering_chinese-base'
+
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    def build_trainer(self, model_id, revision):
+        train_dataset = MsDataset.load('jd',
+                                       namespace='DAMO_NLP',
+                                       split='train').remap_columns(
+                                           {'sentence': 'text'})
+        eval_dataset = MsDataset.load('jd',
+                                      namespace='DAMO_NLP',
+                                      split='validation').remap_columns(
+                                          {'sentence': 'text'})
+
+        cfg: Config = read_config(model_id, revision)
+        cfg.train.train_iters_per_epoch = 50
+        cfg.evaluation.val_iters_per_epoch = 2
+        cfg.train.seed = 1234
+        cfg.train.hooks = [{
+            'type': 'CheckpointHook',
+            'by_epoch': False,
+            'interval': 50
+        }, {
+            'type': 'EvaluationHook',
+            'by_epoch': False,
+            'interval': 50
+        }, {
+            'type': 'TextLoggerHook',
+            'by_epoch': False,
+            'rounding_digits': 5,
+            'interval': 10
+        }]
+        cfg_file = os.path.join(self.tmp_dir, 'config.json')
+        cfg.dump(cfg_file)
+
+        trainer = build_trainer(Trainers.faq_question_answering_trainer,
+                                default_args=dict(model=model_id,
+                                                  work_dir=self.tmp_dir,
+                                                  train_dataset=train_dataset,
+                                                  eval_dataset=eval_dataset,
+                                                  cfg_file=cfg_file))
+        return trainer
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_faq_model_finetune(self):
+        trainer = self.build_trainer(self.model_id, 'v1.0.1')
+        trainer.train()
+        evaluate_result = trainer.evaluate()
+        self.assertAlmostEqual(evaluate_result['accuracy'], 0.95, delta=0.1)
+
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(ModelFile.TRAIN_OUTPUT_DIR, results_files)
+
+        output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
+        pipeline_ins = pipeline(task=Tasks.faq_question_answering,
+                                model=self.model_id)
+        result_before = pipeline_ins(self.param)
+        self.assertEqual(result_before['output'][0][0]['label'], '1')
+        self.assertAlmostEqual(result_before['output'][0][0]['score'],
+                               0.2,
+                               delta=0.2)
+        pipeline_ins = pipeline(task=Tasks.faq_question_answering,
+                                model=output_dir)
+        result_after = pipeline_ins(self.param)
+        self.assertEqual(result_after['output'][0][0]['label'], '1')
+        self.assertAlmostEqual(result_after['output'][0][0]['score'],
+                               0.8,
+                               delta=0.2)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_faq_mgimn_model_finetune(self):
+        trainer = self.build_trainer(self.mgimn_model_id, 'v1.0.0')
+        trainer.train()
+        evaluate_result = trainer.evaluate()
+        self.assertAlmostEqual(evaluate_result['accuracy'], 0.75, delta=0.1)
+
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(ModelFile.TRAIN_OUTPUT_DIR, results_files)
+
+        output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
+        pipeline_ins = pipeline(task=Tasks.faq_question_answering,
+                                model=self.mgimn_model_id,
+                                model_revision='v1.0.0')
+        result_before = pipeline_ins(self.param)
+        self.assertEqual(result_before['output'][0][0]['label'], '1')
+        self.assertAlmostEqual(result_before['output'][0][0]['score'],
+                               0.9,
+                               delta=0.2)
+        pipeline_ins = pipeline(task=Tasks.faq_question_answering,
+                                model=output_dir)
+        result_after = pipeline_ins(self.param)
+        self.assertEqual(result_after['output'][0][0]['label'], '1')
+        self.assertAlmostEqual(result_after['output'][0][0]['score'],
+                               0.9,
+                               delta=0.2)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_finetune_gpt3.py b/tests/trainers/test_finetune_gpt3.py
new file mode 100644
index 0000000..7a32761
--- /dev/null
+++ b/tests/trainers/test_finetune_gpt3.py
@@ -0,0 +1,139 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+
+
+class TestFinetuneTextGeneration(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skip(
+        'skip since the test requires multiple GPU and takes a long time to run'
+    )
+    def test_finetune_poetry(self):
+        dataset_dict = MsDataset.load('chinese-poetry-collection')
+        train_dataset = dataset_dict['train'].remap_columns(
+            {'text1': 'src_txt'})
+        eval_dataset = dataset_dict['test'].remap_columns({'text1': 'src_txt'})
+        max_epochs = 10
+        tmp_dir = './gpt3_poetry'
+
+        num_warmup_steps = 100
+
+        def noam_lambda(current_step: int):
+            current_step += 1
+            return min(current_step**(-0.5),
+                       current_step * num_warmup_steps**(-1.5))
+
+        def cfg_modify_fn(cfg):
+            cfg.train.lr_scheduler = {
+                'type': 'LambdaLR',
+                'lr_lambda': noam_lambda,
+                'options': {
+                    'by_epoch': False
+                }
+            }
+            cfg.train.optimizer = {'type': 'AdamW', 'lr': 3e-4}
+            cfg.train.dataloader = {
+                'batch_size_per_gpu': 16,
+                'workers_per_gpu': 1
+            }
+            cfg.train.hooks.append({
+                'type': 'EvaluationHook',
+                'by_epoch': True,
+                'interval': 1
+            })
+            cfg.evaluation.dataloader = {
+                'batch_size_per_gpu': 8,
+                'workers_per_gpu': 1
+            }
+            cfg.evaluation.metrics = 'ppl'
+            return cfg
+
+        kwargs = dict(model='damo/nlp_gpt3_text-generation_1.3B',
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      max_epochs=max_epochs,
+                      work_dir=tmp_dir,
+                      cfg_modify_fn=cfg_modify_fn)
+
+        # Construct trainer and train
+        trainer = build_trainer(name=Trainers.gpt3_trainer,
+                                default_args=kwargs)
+        trainer.train()
+
+    @unittest.skip(
+        'skip since the test requires multiple GPU and takes a long time to run'
+    )
+    def test_finetune_dureader(self):
+        # DuReader_robust-QG is an example data set,
+        # users can also use their own data set for training
+
+        dataset_dict = MsDataset.load('DuReader_robust-QG')
+
+        train_dataset = dataset_dict['train'].remap_columns({'text1': 'src_txt', 'text2': 'tgt_txt'}) \
+            .map(lambda example: {'src_txt': example['src_txt'].replace('[SEP]', '<sep>') + '\n'})
+        eval_dataset = dataset_dict['validation'].remap_columns({'text1': 'src_txt', 'text2': 'tgt_txt'}) \
+            .map(lambda example: {'src_txt': example['src_txt'].replace('[SEP]', '<sep>') + '\n'})
+
+        max_epochs = 10
+
+        tmp_dir = './gpt3_dureader'
+
+        num_warmup_steps = 200
+
+        def noam_lambda(current_step: int):
+            current_step += 1
+            return min(current_step**(-0.5),
+                       current_step * num_warmup_steps**(-1.5))
+
+        def cfg_modify_fn(cfg):
+            cfg.train.lr_scheduler = {
+                'type': 'LambdaLR',
+                'lr_lambda': noam_lambda,
+                'options': {
+                    'by_epoch': False
+                }
+            }
+            cfg.train.optimizer = {'type': 'AdamW', 'lr': 1e-4}
+            cfg.train.dataloader = {
+                'batch_size_per_gpu': 16,
+                'workers_per_gpu': 1
+            }
+            cfg.train.hooks.append({
+                'type': 'EvaluationHook',
+                'by_epoch': True,
+                'interval': 1
+            })
+            cfg.preprocessor.sequence_length = 512
+            cfg.model.checkpoint_model_parallel_size = 1
+            return cfg
+
+        kwargs = dict(model='damo/nlp_gpt3_text-generation_1.3B',
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      max_epochs=max_epochs,
+                      work_dir=tmp_dir,
+                      cfg_modify_fn=cfg_modify_fn)
+
+        # Construct trainer and train
+        trainer = build_trainer(name=Trainers.gpt3_trainer,
+                                default_args=kwargs)
+        trainer.train()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_finetune_gpt_moe.py b/tests/trainers/test_finetune_gpt_moe.py
new file mode 100644
index 0000000..9816857
--- /dev/null
+++ b/tests/trainers/test_finetune_gpt_moe.py
@@ -0,0 +1,129 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+
+
+class TestFinetuneTextGeneration(unittest.TestCase):
+
+    test_model_id = 'PAI/nlp_gpt3_text-generation_0.35B_MoE-64'
+
+    def setUp(self):
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skip(
+        'skip since the test requires multiple GPU and takes a long time to run'
+    )
+    def test_finetune_poetry(self):
+        dataset_dict = MsDataset.load('chinese-poetry-collection')
+        train_dataset = dataset_dict['train'].remap_columns(
+            {'text1': 'src_txt'})
+        eval_dataset = dataset_dict['test'].remap_columns({'text1': 'src_txt'})
+        max_epochs = 10
+        tmp_dir = './gpt_moe_poetry'
+
+        num_warmup_steps = 100
+
+        def noam_lambda(current_step: int):
+            current_step += 1
+            return min(current_step**(-0.5),
+                       current_step * num_warmup_steps**(-1.5))
+
+        def cfg_modify_fn(cfg):
+            cfg.train.lr_scheduler = {
+                'type': 'LambdaLR',
+                'lr_lambda': noam_lambda,
+                'options': {
+                    'by_epoch': False
+                }
+            }
+            cfg.train.optimizer = {'type': 'AdamW', 'lr': 3e-4}
+            cfg.train.dataloader = {
+                'batch_size_per_gpu': 1,
+                'workers_per_gpu': 1
+            }
+            return cfg
+
+        kwargs = dict(model=self.test_model_id,
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      max_epochs=max_epochs,
+                      work_dir=tmp_dir,
+                      cfg_modify_fn=cfg_modify_fn)
+
+        # Construct trainer and train
+        trainer = build_trainer(name=Trainers.gpt_moe_trainer,
+                                default_args=kwargs)
+        trainer.train()
+
+    @unittest.skip(
+        'skip since the test requires multiple GPU and takes a long time to run'
+    )
+    def test_finetune_dureader(self):
+        # DuReader_robust-QG is an example data set,
+        # users can also use their own data set for training
+        dataset_dict = MsDataset.load('DuReader_robust-QG')
+
+        train_dataset = dataset_dict['train'].remap_columns({'text1': 'src_txt', 'text2': 'tgt_txt'}) \
+            .map(lambda example: {'src_txt': example['src_txt'].replace('[SEP]', '<sep>') + '\n'})
+        eval_dataset = dataset_dict['validation'].remap_columns({'text1': 'src_txt', 'text2': 'tgt_txt'}) \
+            .map(lambda example: {'src_txt': example['src_txt'].replace('[SEP]', '<sep>') + '\n'})
+
+        max_epochs = 10
+        tmp_dir = './gpt_moe_dureader'
+
+        num_warmup_steps = 200
+
+        def noam_lambda(current_step: int):
+            current_step += 1
+            return min(current_step**(-0.5),
+                       current_step * num_warmup_steps**(-1.5))
+
+        def cfg_modify_fn(cfg):
+            cfg.train.lr_scheduler = {
+                'type': 'LambdaLR',
+                'lr_lambda': noam_lambda,
+                'options': {
+                    'by_epoch': False
+                }
+            }
+            cfg.train.optimizer = {'type': 'AdamW', 'lr': 3e-4}
+            cfg.train.dataloader = {
+                'batch_size_per_gpu': 16,
+                'workers_per_gpu': 1
+            }
+            cfg.train.hooks.append({
+                'type': 'EvaluationHook',
+                'by_epoch': True,
+                'interval': 1
+            })
+            cfg.preprocessor.sequence_length = 512
+            cfg.model.checkpoint_model_parallel_size = 1
+            return cfg
+
+        kwargs = dict(model=self.test_model_id,
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      max_epochs=max_epochs,
+                      work_dir=tmp_dir,
+                      cfg_modify_fn=cfg_modify_fn)
+
+        # Construct trainer and train
+        trainer = build_trainer(name=Trainers.gpt_moe_trainer,
+                                default_args=kwargs)
+        trainer.train()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_finetune_mgeo.py b/tests/trainers/test_finetune_mgeo.py
new file mode 100644
index 0000000..ff0b602
--- /dev/null
+++ b/tests/trainers/test_finetune_mgeo.py
@@ -0,0 +1,282 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+from typing import Any, Callable, Dict, List, NewType, Optional, Tuple, Union
+
+import torch
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase
+
+from modelscope.metainfo import Preprocessors, Trainers
+from modelscope.models import Model
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class TestFinetuneMGeo(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    def finetune(self,
+                 model_id,
+                 train_dataset,
+                 eval_dataset,
+                 name=Trainers.nlp_text_ranking_trainer,
+                 cfg_modify_fn=None,
+                 **kwargs):
+        kwargs = dict(model=model_id,
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      work_dir=self.tmp_dir,
+                      cfg_modify_fn=cfg_modify_fn,
+                      **kwargs)
+
+        os.environ['LOCAL_RANK'] = '0'
+        trainer = build_trainer(name=name, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_finetune_geotes_rerank(self):
+        def cfg_modify_fn(cfg):
+            neg_sample = 19
+            cfg.task = 'text-ranking'
+            cfg['preprocessor'] = {'type': 'mgeo-ranking'}
+            cfg.train.optimizer.lr = 5e-5
+            cfg['dataset'] = {
+                'train': {
+                    'type': 'mgeo',
+                    'query_sequence': 'query',
+                    'pos_sequence': 'positive_passages',
+                    'neg_sequence': 'negative_passages',
+                    'text_fileds': ['text', 'gis'],
+                    'qid_field': 'query_id',
+                    'neg_sample': neg_sample,
+                    'sequence_length': 64
+                },
+                'val': {
+                    'type': 'mgeo',
+                    'query_sequence': 'query',
+                    'pos_sequence': 'positive_passages',
+                    'neg_sequence': 'negative_passages',
+                    'text_fileds': ['text', 'gis'],
+                    'qid_field': 'query_id'
+                },
+            }
+            cfg.evaluation.dataloader.batch_size_per_gpu = 16
+            cfg.train.dataloader.batch_size_per_gpu = 3
+            cfg.train.dataloader.workers_per_gpu = 16
+            cfg.evaluation.dataloader.workers_per_gpu = 16
+            cfg.train.train_iters_per_epoch = 10
+            cfg.evaluation.val_iters_per_epoch = 10
+            cfg['evaluation']['metrics'] = 'text-ranking-metric'
+            cfg.train.max_epochs = 1
+            cfg.model['neg_sample'] = neg_sample
+            cfg.model['gis_num'] = 2
+            cfg.model['finetune_mode'] = 'multi-modal'
+            cfg.train.hooks = [{
+                'type': 'CheckpointHook',
+                'interval': 1
+            }, {
+                'type': 'TextLoggerHook',
+                'interval': 100
+            }, {
+                'type': 'IterTimerHook'
+            }, {
+                'type': 'EvaluationHook',
+                'by_epoch': True
+            }]
+            # lr_scheduler的配置
+
+            cfg.train.lr_scheduler = {
+                'type':
+                'LinearLR',
+                'start_factor':
+                1.0,
+                'end_factor':
+                0.5,
+                'total_iters':
+                int(len(train_ds) / cfg.train.dataloader.batch_size_per_gpu) *
+                cfg.train.max_epochs,
+                'options': {
+                    'warmup': {
+                        'type':
+                        'LinearWarmup',
+                        'warmup_iters':
+                        int(
+                            len(train_ds) /
+                            cfg.train.dataloader.batch_size_per_gpu)
+                    },
+                    'by_epoch': False
+                }
+            }
+
+            return cfg
+
+        # load dataset
+        train_dataset = MsDataset.load('GeoGLUE',
+                                       subset_name='GeoTES-rerank',
+                                       split='train',
+                                       namespace='damo')
+        dev_dataset = MsDataset.load('GeoGLUE',
+                                     subset_name='GeoTES-rerank',
+                                     split='validation',
+                                     namespace='damo')
+
+        train_ds = train_dataset['train']
+        dev_ds = dev_dataset['validation']
+
+        model_id = 'damo/mgeo_backbone_chinese_base'
+        self.finetune(model_id=model_id,
+                      train_dataset=train_ds,
+                      eval_dataset=dev_ds,
+                      cfg_modify_fn=cfg_modify_fn,
+                      name=Trainers.mgeo_ranking_trainer)
+
+        output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
+        print(f'model is saved to {output_dir}')
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_finetune_geoeag(self):
+        def cfg_modify_fn(cfg):
+            cfg.task = Tasks.sentence_similarity
+            cfg['preprocessor'] = {'type': Preprocessors.sen_sim_tokenizer}
+
+            cfg.train.dataloader.batch_size_per_gpu = 64
+            cfg.evaluation.dataloader.batch_size_per_gpu = 64
+            cfg.train.optimizer.lr = 2e-5
+            cfg.train.max_epochs = 1
+            cfg.train.train_iters_per_epoch = 10
+            cfg.evaluation.val_iters_per_epoch = 10
+
+            cfg['dataset'] = {
+                'train': {
+                    'labels': ['not_match', 'partial_match', 'exact_match'],
+                    'first_sequence': 'sentence1',
+                    'second_sequence': 'sentence2',
+                    'label': 'label',
+                    'sequence_length': 128
+                }
+            }
+            cfg['evaluation']['metrics'] = 'seq-cls-metric'
+            cfg.train.hooks = [{
+                'type': 'CheckpointHook',
+                'interval': 1
+            }, {
+                'type': 'TextLoggerHook',
+                'interval': 100
+            }, {
+                'type': 'IterTimerHook'
+            }, {
+                'type': 'EvaluationHook',
+                'by_epoch': True
+            }]
+            cfg.train.lr_scheduler.total_iters = int(
+                len(train_dataset) / 32) * cfg.train.max_epochs
+            return cfg
+
+        # load dataset
+        train_dataset = MsDataset.load('GeoGLUE',
+                                       subset_name='GeoEAG',
+                                       split='train',
+                                       namespace='damo')
+        dev_dataset = MsDataset.load('GeoGLUE',
+                                     subset_name='GeoEAG',
+                                     split='validation',
+                                     namespace='damo')
+
+        model_id = 'damo/mgeo_backbone_chinese_base'
+        self.finetune(model_id=model_id,
+                      train_dataset=train_dataset['train'],
+                      eval_dataset=dev_dataset['validation'],
+                      cfg_modify_fn=cfg_modify_fn,
+                      name='nlp-base-trainer')
+
+        output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
+        print(f'model is saved to {output_dir}')
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_finetune_geoeta(self):
+        def cfg_modify_fn(cfg):
+            cfg.task = 'token-classification'
+            cfg['dataset'] = {
+                'train': {
+                    'labels': label_enumerate_values,
+                    'first_sequence': 'tokens',
+                    'label': 'ner_tags',
+                    'sequence_length': 128
+                }
+            }
+            cfg['preprocessor'] = {
+                'type': 'token-cls-tokenizer',
+                'padding': 'max_length'
+            }
+            cfg.train.max_epochs = 1
+            cfg.train.dataloader.batch_size_per_gpu = 32
+            cfg.train.train_iters_per_epoch = 10
+            cfg.evaluation.val_iters_per_epoch = 10
+            cfg.train.optimizer.lr = 3e-5
+            cfg.train.hooks = [{
+                'type': 'CheckpointHook',
+                'interval': 1
+            }, {
+                'type': 'TextLoggerHook',
+                'interval': 100
+            }, {
+                'type': 'IterTimerHook'
+            }, {
+                'type': 'EvaluationHook',
+                'by_epoch': True
+            }]
+            cfg.train.lr_scheduler.total_iters = int(
+                len(train_dataset) / 32) * cfg.train.max_epochs
+
+            return cfg
+
+        def get_label_list(labels):
+            unique_labels = set()
+            for label in labels:
+                unique_labels = unique_labels | set(label)
+            label_list = list(unique_labels)
+            label_list.sort()
+            return label_list
+
+        # load dataset
+        train_dataset = MsDataset.load('GeoGLUE',
+                                       subset_name='GeoETA',
+                                       split='train',
+                                       namespace='damo')
+        dev_dataset = MsDataset.load('GeoGLUE',
+                                     subset_name='GeoETA',
+                                     split='validation',
+                                     namespace='damo')
+
+        label_enumerate_values = get_label_list(
+            train_dataset._hf_ds['train']['ner_tags'] +
+            dev_dataset._hf_ds['validation']['ner_tags'])
+
+        model_id = 'damo/mgeo_backbone_chinese_base'
+        self.finetune(model_id=model_id,
+                      train_dataset=train_dataset['train'],
+                      eval_dataset=dev_dataset['validation'],
+                      cfg_modify_fn=cfg_modify_fn,
+                      name='nlp-base-trainer')
+
+        output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
+        print(f'model is saved to {output_dir}')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_finetune_mplug.py b/tests/trainers/test_finetune_mplug.py
new file mode 100644
index 0000000..dfde1ba
--- /dev/null
+++ b/tests/trainers/test_finetune_mplug.py
@@ -0,0 +1,144 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.models.multi_modal import MPlugForAllTasks
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import EpochBasedTrainer, build_trainer
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class TestFinetuneMPlug(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+        datadict = MsDataset.load('coco_captions_small_slice')
+        self.train_dataset = MsDataset(
+            datadict['train'].remap_columns({
+                'image:FILE': 'image',
+                'answer:Value': 'answer'
+            }).map(lambda _: {'question': 'what the picture describes?'}))
+        self.test_dataset = MsDataset(
+            datadict['test'].remap_columns({
+                'image:FILE': 'image',
+                'answer:Value': 'answer'
+            }).map(lambda _: {'question': 'what the picture describes?'}))
+        self.max_epochs = 2
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_with_caption(self):
+        kwargs = dict(model='damo/mplug_backbone_base_en',
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.test_dataset,
+                      max_epochs=self.max_epochs,
+                      work_dir=self.tmp_dir,
+                      task=Tasks.image_captioning)
+
+        trainer: EpochBasedTrainer = build_trainer(name=Trainers.mplug,
+                                                   default_args=kwargs)
+        trainer.train()
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_with_caption_with_model_and_args(self):
+        cache_path = snapshot_download('damo/mplug_backbone_base_en')
+        model = MPlugForAllTasks.from_pretrained(cache_path,
+                                                 task=Tasks.image_captioning)
+        kwargs = dict(cfg_file=os.path.join(cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.test_dataset,
+                      max_epochs=self.max_epochs,
+                      work_dir=self.tmp_dir)
+
+        trainer: EpochBasedTrainer = build_trainer(name=Trainers.mplug,
+                                                   default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_with_vqa(self):
+        kwargs = dict(model='damo/mplug_backbone_base_en',
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.test_dataset,
+                      max_epochs=self.max_epochs,
+                      work_dir=self.tmp_dir,
+                      task=Tasks.visual_question_answering)
+
+        trainer: EpochBasedTrainer = build_trainer(name=Trainers.mplug,
+                                                   default_args=kwargs)
+        trainer.train()
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_with_vqa_with_model_and_args(self):
+        cache_path = snapshot_download(
+            'damo/mplug_visual-question-answering_coco_large_en')
+        model = MPlugForAllTasks.from_pretrained(
+            cache_path, task=Tasks.visual_question_answering)
+        kwargs = dict(cfg_file=os.path.join(cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.test_dataset,
+                      max_epochs=self.max_epochs,
+                      work_dir=self.tmp_dir)
+
+        trainer: EpochBasedTrainer = build_trainer(name=Trainers.mplug,
+                                                   default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_with_retrieval(self):
+        kwargs = dict(model='damo/mplug_backbone_base_en',
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.test_dataset,
+                      max_epochs=self.max_epochs,
+                      work_dir=self.tmp_dir,
+                      task=Tasks.image_text_retrieval)
+
+        trainer: EpochBasedTrainer = build_trainer(name=Trainers.mplug,
+                                                   default_args=kwargs)
+        trainer.train()
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_with_retrieval_with_model_and_args(self):
+        cache_path = snapshot_download('damo/mplug_backbone_base_en')
+        model = MPlugForAllTasks.from_pretrained(
+            cache_path, task=Tasks.image_text_retrieval)
+        kwargs = dict(cfg_file=os.path.join(cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.test_dataset,
+                      max_epochs=self.max_epochs,
+                      work_dir=self.tmp_dir)
+
+        trainer: EpochBasedTrainer = build_trainer(name=Trainers.mplug,
+                                                   default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_finetune_plug_mental.py b/tests/trainers/test_finetune_plug_mental.py
new file mode 100644
index 0000000..5c90671
--- /dev/null
+++ b/tests/trainers/test_finetune_plug_mental.py
@@ -0,0 +1,104 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+from typing import Any, Callable, Dict, List, NewType, Optional, Tuple, Union
+
+import torch
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase
+
+from modelscope.metainfo import Preprocessors, Trainers
+from modelscope.models import Model
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class TestFinetunePlugMental(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    def finetune(self,
+                 model_id,
+                 train_dataset,
+                 eval_dataset,
+                 name=Trainers.nlp_base_trainer,
+                 cfg_modify_fn=None,
+                 **kwargs):
+        kwargs = dict(model=model_id,
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      work_dir=self.tmp_dir,
+                      cfg_modify_fn=cfg_modify_fn,
+                      **kwargs)
+
+        os.environ['LOCAL_RANK'] = '0'
+        trainer = build_trainer(name=name, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.epoch_num):
+            self.assertIn(f'epoch_{i + 1}.pth', results_files)
+
+        output_files = os.listdir(
+            os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR))
+        self.assertIn(ModelFile.CONFIGURATION, output_files)
+        self.assertIn(ModelFile.TORCH_MODEL_BIN_FILE, output_files)
+        copy_src_files = os.listdir(trainer.model_dir)
+
+        print(f'copy_src_files are {copy_src_files}')
+        print(f'output_files are {output_files}')
+        for item in copy_src_files:
+            if not item.startswith('.'):
+                self.assertIn(item, output_files)
+
+    def pipeline_sentence_similarity(self, model_dir):
+        sentence1 = '今天气温比昨天高么？'
+        sentence2 = '今天湿度比昨天高么？'
+        model = Model.from_pretrained(model_dir)
+        pipeline_ins = pipeline(task=Tasks.sentence_similarity, model=model)
+        print(pipeline_ins(input=(sentence1, sentence2)))
+
+    @unittest.skip
+    def test_finetune_afqmc(self):
+        """This unittest is used to reproduce the clue:afqmc dataset + plug meantal model training results.
+
+        User can train a custom dataset by modifying this piece of code and comment the @unittest.skip.
+        """
+        def cfg_modify_fn(cfg):
+            cfg.task = Tasks.sentence_similarity
+            cfg['preprocessor'] = {'type': Preprocessors.sen_sim_tokenizer}
+            cfg.train.optimizer.lr = 2e-5
+            cfg['dataset'] = {
+                'train': {
+                    'labels': ['0', '1'],
+                    'first_sequence': 'sentence1',
+                    'second_sequence': 'sentence2',
+                    'label': 'label',
+                }
+            }
+            cfg.train.lr_scheduler.total_iters = int(
+                len(dataset['train']) / 32) * cfg.train.max_epochs
+            return cfg
+
+        dataset = MsDataset.load('clue', subset_name='afqmc')
+        self.finetune(model_id='damo/nlp_plug-mental_backbone_base',
+                      train_dataset=dataset['train'],
+                      eval_dataset=dataset['validation'],
+                      cfg_modify_fn=cfg_modify_fn)
+        output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
+        self.pipeline_sentence_similarity(output_dir)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_finetune_sentence_embedding.py b/tests/trainers/test_finetune_sentence_embedding.py
new file mode 100644
index 0000000..ab995bc
--- /dev/null
+++ b/tests/trainers/test_finetune_sentence_embedding.py
@@ -0,0 +1,182 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+from typing import Any, Callable, Dict, List, NewType, Optional, Tuple, Union
+
+import torch
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase
+
+from modelscope.metainfo import Trainers
+from modelscope.models import Model
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class TestFinetuneSentenceEmbedding(unittest.TestCase):
+    inputs = {
+        'source_sentence': ["how long it take to get a master's degree"],
+        'sentences_to_compare': [
+            "On average, students take about 18 to 24 months to complete a master's degree.",
+            'On the other hand, some students prefer to go at a slower pace and choose to take '
+            'several years to complete their studies.',
+            'It can take anywhere from two semesters'
+        ]
+    }
+
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    def finetune(self,
+                 model_id,
+                 train_dataset,
+                 eval_dataset,
+                 name=Trainers.nlp_sentence_embedding_trainer,
+                 cfg_modify_fn=None,
+                 **kwargs):
+        kwargs = dict(model=model_id,
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      work_dir=self.tmp_dir,
+                      cfg_modify_fn=cfg_modify_fn,
+                      **kwargs)
+
+        os.environ['LOCAL_RANK'] = '0'
+        trainer = build_trainer(name=name, default_args=kwargs)
+        trainer.train()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_finetune_msmarco(self):
+        def cfg_modify_fn(cfg):
+            neg_sample = 2
+            cfg.task = 'sentence-embedding'
+            cfg['preprocessor'] = {'type': 'sentence-embedding'}
+            cfg.train.optimizer.lr = 2e-5
+            cfg['dataset'] = {
+                'train': {
+                    'type': 'bert',
+                    'query_sequence': 'query',
+                    'pos_sequence': 'positive_passages',
+                    'neg_sequence': 'negative_passages',
+                    'text_fileds': ['title', 'text'],
+                    'qid_field': 'query_id',
+                    'neg_sample': neg_sample
+                },
+                'val': {
+                    'type': 'bert',
+                    'query_sequence': 'query',
+                    'pos_sequence': 'positive_passages',
+                    'neg_sequence': 'negative_passages',
+                    'text_fileds': ['title', 'text'],
+                    'qid_field': 'query_id'
+                },
+            }
+            cfg['evaluation']['dataloader']['batch_size_per_gpu'] = 30
+            cfg.train.max_epochs = 1
+            cfg.train.train_batch_size = 2
+            cfg.train.lr_scheduler = {
+                'type': 'LinearLR',
+                'start_factor': 1.0,
+                'end_factor': 0.0,
+                'options': {
+                    'by_epoch': False
+                }
+            }
+            cfg.model['neg_sample'] = 4
+            cfg.train.hooks = [{
+                'type': 'CheckpointHook',
+                'interval': 1
+            }, {
+                'type': 'TextLoggerHook',
+                'interval': 1
+            }, {
+                'type': 'IterTimerHook'
+            }]
+            return cfg
+
+        # load dataset
+        ds = MsDataset.load('passage-ranking-demo', 'zyznull')
+        train_ds = ds['train'].to_hf_dataset()
+        dev_ds = ds['dev'].to_hf_dataset()
+
+        model_id = 'damo/nlp_corom_sentence-embedding_english-base'
+        self.finetune(model_id=model_id,
+                      train_dataset=train_ds,
+                      eval_dataset=dev_ds,
+                      cfg_modify_fn=cfg_modify_fn)
+
+        output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
+        self.pipeline_sentence_embedding(output_dir)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_finetune_dureader(self):
+        def cfg_modify_fn(cfg):
+            cfg.task = 'sentence-embedding'
+            cfg['preprocessor'] = {
+                'type': 'sentence-embedding',
+                'max_length': 384
+            }
+            cfg.train.optimizer.lr = 3e-5
+            cfg['dataset'] = {
+                'train': {
+                    'type': 'bert',
+                    'query_sequence': 'query',
+                    'pos_sequence': 'positive_passages',
+                    'neg_sequence': 'negative_passages',
+                    'text_fileds': ['text'],
+                    'qid_field': 'query_id',
+                    'neg_sample': 4
+                },
+                'val': {
+                    'type': 'bert',
+                    'query_sequence': 'query',
+                    'pos_sequence': 'positive_passages',
+                    'neg_sequence': 'negative_passages',
+                    'text_fileds': ['text'],
+                    'qid_field': 'query_id'
+                },
+            }
+            cfg['evaluation']['dataloader']['batch_size_per_gpu'] = 3
+            cfg.train.max_epochs = 2
+            cfg.train.train_batch_size = 4
+            cfg.train.hooks = [{
+                'type': 'CheckpointHook',
+                'interval': 1
+            }, {
+                'type': 'TextLoggerHook',
+                'interval': 1
+            }, {
+                'type': 'IterTimerHook'
+            }]
+            return cfg
+
+        # load dataset
+        ds = MsDataset.load('dureader-retrieval-ranking', 'zyznull')
+        train_ds = ds['train'].to_hf_dataset().shard(1000, index=0)
+        dev_ds = ds['dev'].to_hf_dataset()
+        model_id = 'damo/nlp_corom_sentence-embedding_chinese-base'
+        self.finetune(model_id=model_id,
+                      train_dataset=train_ds,
+                      eval_dataset=dev_ds,
+                      cfg_modify_fn=cfg_modify_fn)
+
+    def pipeline_sentence_embedding(self, model_dir):
+        model = Model.from_pretrained(model_dir)
+        pipeline_ins = pipeline(task=Tasks.sentence_embedding, model=model)
+        print('inputs', self.inputs)
+        print(pipeline_ins(input=self.inputs))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_finetune_sequence_classification.py b/tests/trainers/test_finetune_sequence_classification.py
new file mode 100644
index 0000000..f298a66
--- /dev/null
+++ b/tests/trainers/test_finetune_sequence_classification.py
@@ -0,0 +1,522 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.metainfo import Preprocessors, Trainers
+from modelscope.models import Model
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.trainers import build_trainer
+from modelscope.trainers.hooks import Hook
+from modelscope.trainers.nlp_trainer import (EpochBasedTrainer,
+                                             NlpEpochBasedTrainer)
+from modelscope.trainers.optimizer.child_tuning_adamw_optimizer import \
+    calculate_fisher
+from modelscope.trainers.training_args import TrainingArgs
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.data_utils import to_device
+from modelscope.utils.regress_test_utils import (MsRegressTool,
+                                                 compare_arguments_nested)
+from modelscope.utils.test_utils import test_level
+
+
+class TestFinetuneSequenceClassification(unittest.TestCase):
+    epoch_num = 1
+
+    sentence1 = '今天气温比昨天高么？'
+    sentence2 = '今天湿度比昨天高么？'
+
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+        self.regress_tool = MsRegressTool(baseline=False)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skip
+    def test_trainer_cfg_class(self):
+        dataset = MsDataset.load('clue', subset_name='tnews')
+        train_dataset = dataset['train']
+        validation_dataset = dataset['validation']
+        cfg_modify_fn = TrainingArgs(
+            task=Tasks.text_classification,
+            preprocessor_type=Preprocessors.sen_cls_tokenizer,
+            train_first_sequence='sentence',
+            train_label='label',
+            labels=[
+                '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11',
+                '12', '13', '14'
+            ],
+            max_epochs=5,
+            optimizer_args={
+                'lr': 3e-5,
+            },
+            lr_scheduler_args={
+                'total_iters': int(len(train_dataset) / 32) * 5,
+            },
+            checkpoint_saving_type='BestCkptSaverHook',
+            metric_key='accuracy',
+            train_batch_size_per_gpu=32,
+            checkpoint_interval=1,
+            train_workers_per_gpu=0,
+            checkpoint_by_epoch=False,
+            evaluation_interval=1,
+            evaluation_by_epoch=False,
+            eval_workers_per_gpu=0,
+            metrics=['seq-cls-metric'],
+        )
+
+        kwargs = dict(model='damo/nlp_structbert_backbone_base_std',
+                      train_dataset=train_dataset,
+                      eval_dataset=validation_dataset,
+                      work_dir=self.tmp_dir,
+                      seed=42,
+                      cfg_modify_fn=cfg_modify_fn)
+
+        os.environ['LOCAL_RANK'] = '0'
+        trainer: EpochBasedTrainer = build_trainer(
+            name=Trainers.nlp_base_trainer, default_args=kwargs)
+        trainer.train()
+
+    @unittest.skip(
+        'Skip testing trainer repeatable, because it\'s unstable in daily UT')
+    def test_trainer_repeatable(self):
+        import torch  # noqa
+
+        def compare_fn(value1, value2, key, type):
+            # Ignore the differences between optimizers of two torch versions
+            if type != 'optimizer':
+                return None
+
+            match = (value1['type'] == value2['type'])
+            shared_defaults = set(value1['defaults'].keys()).intersection(
+                set(value2['defaults'].keys()))
+            match = all([
+                compare_arguments_nested(f'Optimizer defaults {key} not match',
+                                         value1['defaults'][key],
+                                         value2['defaults'][key])
+                for key in shared_defaults
+            ]) and match
+            match = (len(value1['state_dict']['param_groups']) == len(
+                value2['state_dict']['param_groups'])) and match
+            for group1, group2 in zip(value1['state_dict']['param_groups'],
+                                      value2['state_dict']['param_groups']):
+                shared_keys = set(group1.keys()).intersection(
+                    set(group2.keys()))
+                match = all([
+                    compare_arguments_nested(
+                        f'Optimizer param_groups {key} not match', group1[key],
+                        group2[key]) for key in shared_keys
+                ]) and match
+            return match
+
+        def cfg_modify_fn(cfg):
+            cfg.task = 'nli'
+            cfg['preprocessor'] = {'type': 'nli-tokenizer'}
+            cfg.train.optimizer.lr = 2e-5
+            cfg['dataset'] = {
+                'train': {
+                    'labels': [
+                        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
+                        '11', '12', '13', '14'
+                    ],
+                    'first_sequence':
+                    'sentence',
+                    'label':
+                    'label',
+                }
+            }
+            cfg.train.max_epochs = 5
+            cfg.train.lr_scheduler = {
+                'type': 'LinearLR',
+                'start_factor': 1.0,
+                'end_factor': 0.0,
+                'total_iters':
+                int(len(dataset['train']) / 32) * cfg.train.max_epochs,
+                'options': {
+                    'by_epoch': False
+                }
+            }
+            cfg.train.hooks = [{
+                'type': 'CheckpointHook',
+                'interval': 1
+            }, {
+                'type': 'TextLoggerHook',
+                'interval': 1
+            }, {
+                'type': 'IterTimerHook'
+            }, {
+                'type': 'EvaluationHook',
+                'by_epoch': False,
+                'interval': 100
+            }]
+            return cfg
+
+        dataset = MsDataset.load('clue', subset_name='tnews')
+
+        kwargs = dict(model='damo/nlp_structbert_backbone_base_std',
+                      train_dataset=dataset['train'],
+                      eval_dataset=dataset['validation'],
+                      work_dir=self.tmp_dir,
+                      seed=42,
+                      cfg_modify_fn=cfg_modify_fn)
+
+        os.environ['LOCAL_RANK'] = '0'
+        trainer: EpochBasedTrainer = build_trainer(
+            name=Trainers.nlp_base_trainer, default_args=kwargs)
+
+        with self.regress_tool.monitor_ms_train(trainer,
+                                                'sbert-base-tnews',
+                                                level='strict',
+                                                compare_fn=compare_fn):
+            trainer.train()
+
+    def finetune(self,
+                 model_id,
+                 train_dataset,
+                 eval_dataset,
+                 name=Trainers.nlp_base_trainer,
+                 cfg_modify_fn=None,
+                 **kwargs):
+        kwargs = dict(model=model_id,
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      work_dir=self.tmp_dir,
+                      cfg_modify_fn=cfg_modify_fn,
+                      **kwargs)
+
+        os.environ['LOCAL_RANK'] = '0'
+        trainer = build_trainer(name=name, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.epoch_num):
+            self.assertIn(f'epoch_{i + 1}.pth', results_files)
+
+        output_files = os.listdir(
+            os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR))
+        self.assertIn(ModelFile.CONFIGURATION, output_files)
+        self.assertIn(ModelFile.TORCH_MODEL_BIN_FILE, output_files)
+        copy_src_files = os.listdir(trainer.model_dir)
+
+        print(f'copy_src_files are {copy_src_files}')
+        print(f'output_files are {output_files}')
+        for item in copy_src_files:
+            if not item.startswith('.'):
+                self.assertIn(item, output_files)
+
+    def pipeline_sentence_similarity(self, model_dir):
+        model = Model.from_pretrained(model_dir)
+        pipeline_ins = pipeline(task=Tasks.sentence_similarity, model=model)
+        print(pipeline_ins(input=(self.sentence1, self.sentence2)))
+
+    @unittest.skip
+    def test_finetune_afqmc(self):
+        """This unittest is used to reproduce the clue:afqmc dataset + structbert model training results.
+
+        User can train a custom dataset by modifying this piece of code and comment the @unittest.skip.
+        """
+        def cfg_modify_fn(cfg):
+            cfg.task = Tasks.sentence_similarity
+            cfg['preprocessor'] = {'type': Preprocessors.sen_sim_tokenizer}
+            cfg.train.optimizer.lr = 2e-5
+            cfg['dataset'] = {
+                'train': {
+                    'labels': ['0', '1'],
+                    'first_sequence': 'sentence1',
+                    'second_sequence': 'sentence2',
+                    'label': 'label',
+                }
+            }
+            cfg.train.max_epochs = self.epoch_num
+            cfg.train.lr_scheduler = {
+                'type': 'LinearLR',
+                'start_factor': 1.0,
+                'end_factor': 0.0,
+                'total_iters':
+                int(len(dataset['train']) / 32) * cfg.train.max_epochs,
+                'options': {
+                    'by_epoch': False
+                }
+            }
+            cfg.train.hooks = [{
+                'type': 'CheckpointHook',
+                'interval': 1
+            }, {
+                'type': 'TextLoggerHook',
+                'interval': 1
+            }, {
+                'type': 'IterTimerHook'
+            }, {
+                'type': 'EvaluationHook',
+                'by_epoch': False,
+                'interval': 100
+            }]
+            return cfg
+
+        dataset = MsDataset.load('clue', subset_name='afqmc')
+        self.finetune(model_id='damo/nlp_structbert_backbone_base_std',
+                      train_dataset=dataset['train'],
+                      eval_dataset=dataset['validation'],
+                      cfg_modify_fn=cfg_modify_fn)
+
+        output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
+        self.pipeline_sentence_similarity(output_dir)
+
+    @unittest.skip
+    def test_finetune_tnews(self):
+        """This unittest is used to reproduce the clue:tnews dataset + structbert model training results.
+
+        User can train a custom dataset by modifying this piece of code and comment the @unittest.skip.
+        """
+        def cfg_modify_fn(cfg):
+            # TODO no proper task for tnews
+            cfg.task = 'nli'
+            cfg['preprocessor'] = {'type': 'nli-tokenizer'}
+            cfg.train.optimizer.lr = 2e-5
+            cfg['dataset'] = {
+                'train': {
+                    'labels': [
+                        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
+                        '11', '12', '13', '14'
+                    ],
+                    'first_sequence':
+                    'sentence',
+                    'label':
+                    'label',
+                }
+            }
+            cfg.train.max_epochs = 5
+            cfg.train.lr_scheduler = {
+                'type': 'LinearLR',
+                'start_factor': 1.0,
+                'end_factor': 0.0,
+                'total_iters':
+                int(len(dataset['train']) / 32) * cfg.train.max_epochs,
+                'options': {
+                    'by_epoch': False
+                }
+            }
+            cfg.train.hooks = [{
+                'type': 'CheckpointHook',
+                'interval': 1
+            }, {
+                'type': 'TextLoggerHook',
+                'interval': 1
+            }, {
+                'type': 'IterTimerHook'
+            }, {
+                'type': 'EvaluationHook',
+                'by_epoch': False,
+                'interval': 100
+            }]
+            return cfg
+
+        dataset = MsDataset.load('clue', subset_name='tnews')
+
+        self.finetune(model_id='damo/nlp_structbert_backbone_base_std',
+                      train_dataset=dataset['train'],
+                      eval_dataset=dataset['validation'],
+                      cfg_modify_fn=cfg_modify_fn)
+
+    @unittest.skip
+    def test_veco_xnli(self):
+        """This unittest is used to reproduce the xnli dataset + veco model training results.
+
+        Here we follow the training scenario listed in the Alicemind open source project:
+        https://github.com/alibaba/AliceMind/tree/main/VECO
+        by training the english language subset.
+        User can train a custom dataset by modifying this piece of code and comment the @unittest.skip.
+        """
+
+        langs = ['en']
+        langs_eval = ['en']
+        train_datasets = []
+        for lang in langs:
+            train_datasets.append(
+                MsDataset.load('xnli', subset_name=lang, split='train'))
+        eval_datasets = []
+        for lang in langs_eval:
+            eval_datasets.append(
+                MsDataset.load('xnli', subset_name=lang, split='validation'))
+        train_len = sum([len(dataset) for dataset in train_datasets])
+        labels = ['0', '1', '2']
+
+        def cfg_modify_fn(cfg):
+            cfg.task = 'nli'
+            cfg['preprocessor'] = {'type': 'nli-tokenizer'}
+            cfg['dataset'] = {
+                'train': {
+                    'first_sequence': 'premise',
+                    'second_sequence': 'hypothesis',
+                    'labels': labels,
+                    'label': 'label',
+                }
+            }
+            cfg['train'] = {
+                'work_dir':
+                '/tmp',
+                'max_epochs':
+                2,
+                'dataloader': {
+                    'batch_size_per_gpu': 16,
+                    'workers_per_gpu': 0
+                },
+                'optimizer': {
+                    'type': 'AdamW',
+                    'lr': 2e-5,
+                    'options': {
+                        'cumulative_iters': 8,
+                    }
+                },
+                'lr_scheduler': {
+                    'type': 'LinearLR',
+                    'start_factor': 1.0,
+                    'end_factor': 0.0,
+                    'total_iters': int(train_len / 16) * 2,
+                    'options': {
+                        'by_epoch': False
+                    }
+                },
+                'hooks': [{
+                    'type': 'CheckpointHook',
+                    'interval': 1,
+                }, {
+                    'type': 'TextLoggerHook',
+                    'interval': 1
+                }, {
+                    'type': 'IterTimerHook'
+                }, {
+                    'type': 'EvaluationHook',
+                    'by_epoch': False,
+                    'interval': 500
+                }]
+            }
+            cfg['evaluation'] = {
+                'dataloader': {
+                    'batch_size_per_gpu': 128,
+                    'workers_per_gpu': 0,
+                    'shuffle': False
+                }
+            }
+            return cfg
+
+        self.finetune('damo/nlp_veco_fill-mask-large',
+                      train_datasets,
+                      eval_datasets,
+                      name=Trainers.nlp_veco_trainer,
+                      cfg_modify_fn=cfg_modify_fn)
+
+    @unittest.skip
+    def test_finetune_cluewsc(self):
+        """This unittest is used to reproduce the clue:wsc dataset + structbert model training results.
+
+        A runnable sample of child-tuning is also showed here.
+
+        User can train a custom dataset by modifying this piece of code and comment the @unittest.skip.
+        """
+
+        child_tuning_type = 'ChildTuning-F'
+        mode = {}
+        if child_tuning_type is not None:
+            mode = {'mode': child_tuning_type, 'reserve_p': 0.2}
+
+        def cfg_modify_fn(cfg):
+            cfg.task = 'nli'
+            cfg['preprocessor'] = {'type': 'nli-tokenizer'}
+            cfg['dataset'] = {
+                'train': {
+                    'labels': ['0', '1'],
+                    'first_sequence': 'text',
+                    'second_sequence': 'text2',
+                    'label': 'label',
+                }
+            }
+            cfg.train.dataloader.batch_size_per_gpu = 16
+            cfg.train.max_epochs = 30
+            cfg.train.optimizer = {
+                'type':
+                'AdamW' if child_tuning_type is None else 'ChildTuningAdamW',
+                'lr': 1e-5,
+                'options': {},
+                **mode,
+            }
+            cfg.train.lr_scheduler = {
+                'type':
+                'LinearLR',
+                'start_factor':
+                1.0,
+                'end_factor':
+                0.0,
+                'total_iters':
+                int(
+                    len(dataset['train']) /
+                    cfg.train.dataloader.batch_size_per_gpu) *
+                cfg.train.max_epochs,
+                'options': {
+                    'by_epoch': False
+                }
+            }
+            cfg.train.hooks = [{
+                'type': 'CheckpointHook',
+                'interval': 1
+            }, {
+                'type': 'TextLoggerHook',
+                'interval': 1
+            }, {
+                'type': 'IterTimerHook'
+            }, {
+                'type': 'EvaluationHook',
+                'by_epoch': False,
+                'interval': 30
+            }]
+            return cfg
+
+        def add_sentence2(features):
+            return {
+                'text2':
+                features['target']['span2_text'] + '指代' +
+                features['target']['span1_text']
+            }
+
+        dataset = MsDataset.load('clue', subset_name='cluewsc2020')
+        dataset = {
+            k: v.to_hf_dataset().map(add_sentence2)
+            for k, v in dataset.items()
+        }
+
+        kwargs = dict(model='damo/nlp_structbert_backbone_base_std',
+                      train_dataset=dataset['train'],
+                      eval_dataset=dataset['validation'],
+                      work_dir=self.tmp_dir,
+                      cfg_modify_fn=cfg_modify_fn)
+
+        os.environ['LOCAL_RANK'] = '0'
+        trainer: NlpEpochBasedTrainer = build_trainer(
+            name=Trainers.nlp_base_trainer, default_args=kwargs)
+
+        class CalculateFisherHook(Hook):
+            @staticmethod
+            def forward_step(model, inputs):
+                inputs = to_device(inputs, trainer.device)
+                trainer.train_step(model, inputs)
+                return trainer.train_outputs['loss']
+
+            def before_run(self, trainer: NlpEpochBasedTrainer):
+                v = calculate_fisher(trainer.model, trainer.train_dataloader,
+                                     self.forward_step, 0.2)
+                trainer.optimizer.set_gradient_mask(v)
+
+        if child_tuning_type == 'ChildTuning-D':
+            trainer.register_hook(CalculateFisherHook())
+        trainer.train()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_finetune_text_generation.py b/tests/trainers/test_finetune_text_generation.py
new file mode 100644
index 0000000..793791b
--- /dev/null
+++ b/tests/trainers/test_finetune_text_generation.py
@@ -0,0 +1,169 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.models.nlp import GPT3ForTextGeneration, PalmForTextGeneration
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.test_utils import test_level
+
+
+class TestFinetuneTextGeneration(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+        from datasets import Dataset
+
+        src_dataset_dict = {
+            'src_txt': [
+                'This is test sentence1-1', 'This is test sentence2-1',
+                'This is test sentence3-1'
+            ]
+        }
+        src_tgt_dataset_dict = {
+            'src_txt':
+            src_dataset_dict['src_txt'],
+            'tgt_txt': [
+                'This is test sentence1-2', 'This is test sentence2-2',
+                'This is test sentence3-2'
+            ]
+        }
+
+        self.src_dataset = MsDataset(Dataset.from_dict(src_dataset_dict))
+        self.src_tgt_dataset = MsDataset(
+            Dataset.from_dict(src_tgt_dataset_dict))
+
+        self.max_epochs = 3
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_with_palm(self):
+
+        kwargs = dict(model='damo/nlp_palm2.0_text-generation_english-base',
+                      train_dataset=self.src_tgt_dataset,
+                      eval_dataset=self.src_tgt_dataset,
+                      max_epochs=self.max_epochs,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(name=Trainers.text_generation_trainer,
+                                default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_with_palm_with_model_and_args(self):
+
+        cache_path = snapshot_download(
+            'damo/nlp_palm2.0_text-generation_english-base')
+        model = PalmForTextGeneration.from_pretrained(cache_path)
+        kwargs = dict(cfg_file=os.path.join(cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      train_dataset=self.src_tgt_dataset,
+                      eval_dataset=self.src_tgt_dataset,
+                      max_epochs=self.max_epochs,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(name=Trainers.text_generation_trainer,
+                                default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_with_gpt3(self):
+
+        kwargs = dict(model='damo/nlp_gpt3_text-generation_chinese-base',
+                      train_dataset=self.src_dataset,
+                      eval_dataset=self.src_dataset,
+                      max_epochs=self.max_epochs,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(name=Trainers.text_generation_trainer,
+                                default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_with_gpt3_with_model_and_args(self):
+
+        cache_path = snapshot_download(
+            'damo/nlp_gpt3_text-generation_chinese-base')
+        model = GPT3ForTextGeneration.from_pretrained(cache_path)
+        kwargs = dict(cfg_file=os.path.join(cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      train_dataset=self.src_dataset,
+                      eval_dataset=self.src_dataset,
+                      max_epochs=self.max_epochs,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skip
+    def test_finetune_cnndm(self):
+        from modelscope.msdatasets import MsDataset
+        dataset_dict = MsDataset.load('DuReader_robust-QG')
+        train_dataset = dataset_dict['train'].remap_columns({
+            'text1': 'src_txt',
+            'text2': 'tgt_txt'
+        })
+        eval_dataset = dataset_dict['validation'].remap_columns({
+            'text1':
+            'src_txt',
+            'text2':
+            'tgt_txt'
+        })
+        num_warmup_steps = 200
+
+        def noam_lambda(current_step: int):
+            current_step += 1
+            return min(current_step**(-0.5),
+                       current_step * num_warmup_steps**(-1.5))
+
+        def cfg_modify_fn(cfg):
+            cfg.train.lr_scheduler = {
+                'type': 'LambdaLR',
+                'lr_lambda': noam_lambda,
+                'options': {
+                    'by_epoch': False
+                }
+            }
+            return cfg
+
+        kwargs = dict(model='damo/nlp_palm2.0_text-generation_chinese-base',
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      work_dir=self.tmp_dir,
+                      cfg_modify_fn=cfg_modify_fn)
+        trainer = build_trainer(name=Trainers.text_generation_trainer,
+                                default_args=kwargs)
+        trainer.train()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_finetune_text_ranking.py b/tests/trainers/test_finetune_text_ranking.py
new file mode 100644
index 0000000..d582695
--- /dev/null
+++ b/tests/trainers/test_finetune_text_ranking.py
@@ -0,0 +1,195 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+from typing import Any, Callable, Dict, List, NewType, Optional, Tuple, Union
+
+import torch
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase
+
+from modelscope.metainfo import Trainers
+from modelscope.models import Model
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class TestFinetuneSequenceClassification(unittest.TestCase):
+    inputs = {
+        'source_sentence': ["how long it take to get a master's degree"],
+        'sentences_to_compare': [
+            "On average, students take about 18 to 24 months to complete a master's degree.",
+            'On the other hand, some students prefer to go at a slower pace and choose to take '
+            'several years to complete their studies.',
+            'It can take anywhere from two semesters'
+        ]
+    }
+
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    def finetune(self,
+                 model_id,
+                 train_dataset,
+                 eval_dataset,
+                 name=Trainers.nlp_text_ranking_trainer,
+                 cfg_modify_fn=None,
+                 **kwargs):
+        kwargs = dict(model=model_id,
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      work_dir=self.tmp_dir,
+                      cfg_modify_fn=cfg_modify_fn,
+                      **kwargs)
+
+        os.environ['LOCAL_RANK'] = '0'
+        trainer = build_trainer(name=name, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_finetune_msmarco(self):
+        def cfg_modify_fn(cfg):
+            neg_sample = 4
+            cfg.task = 'text-ranking'
+            cfg['preprocessor'] = {'type': 'text-ranking'}
+            cfg.train.optimizer.lr = 2e-5
+            cfg['dataset'] = {
+                'train': {
+                    'type': 'bert',
+                    'query_sequence': 'query',
+                    'pos_sequence': 'positive_passages',
+                    'neg_sequence': 'negative_passages',
+                    'text_fileds': ['title', 'text'],
+                    'qid_field': 'query_id',
+                    'neg_sample': neg_sample
+                },
+                'val': {
+                    'type': 'bert',
+                    'query_sequence': 'query',
+                    'pos_sequence': 'positive_passages',
+                    'neg_sequence': 'negative_passages',
+                    'text_fileds': ['title', 'text'],
+                    'qid_field': 'query_id'
+                },
+            }
+            cfg['evaluation']['dataloader']['batch_size_per_gpu'] = 30
+            cfg.train.max_epochs = 1
+            cfg.train.train_batch_size = 4
+            cfg.train.lr_scheduler = {
+                'type': 'LinearLR',
+                'start_factor': 1.0,
+                'end_factor': 0.0,
+                'options': {
+                    'by_epoch': False
+                }
+            }
+            cfg.model['neg_sample'] = 4
+            cfg.train.hooks = [{
+                'type': 'CheckpointHook',
+                'interval': 1
+            }, {
+                'type': 'TextLoggerHook',
+                'interval': 1
+            }, {
+                'type': 'IterTimerHook'
+            }, {
+                'type': 'EvaluationHook',
+                'by_epoch': False,
+                'interval': 15
+            }]
+            return cfg
+
+        # load dataset
+        ds = MsDataset.load('passage-ranking-demo', 'zyznull')
+        train_ds = ds['train'].to_hf_dataset()
+        dev_ds = ds['dev'].to_hf_dataset()
+
+        model_id = 'damo/nlp_corom_passage-ranking_english-base'
+        self.finetune(model_id=model_id,
+                      train_dataset=train_ds,
+                      eval_dataset=dev_ds,
+                      cfg_modify_fn=cfg_modify_fn)
+
+        output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
+        self.pipeline_text_ranking(output_dir)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_finetune_dureader(self):
+        def cfg_modify_fn(cfg):
+            cfg.task = 'text-ranking'
+            cfg['preprocessor'] = {'type': 'text-ranking'}
+            cfg.train.optimizer.lr = 2e-5
+            cfg['dataset'] = {
+                'train': {
+                    'type': 'bert',
+                    'query_sequence': 'query',
+                    'pos_sequence': 'positive_passages',
+                    'neg_sequence': 'negative_passages',
+                    'text_fileds': ['text'],
+                    'qid_field': 'query_id'
+                },
+                'val': {
+                    'type': 'bert',
+                    'query_sequence': 'query',
+                    'pos_sequence': 'positive_passages',
+                    'neg_sequence': 'negative_passages',
+                    'text_fileds': ['text'],
+                    'qid_field': 'query_id'
+                },
+            }
+            cfg['evaluation']['dataloader']['batch_size_per_gpu'] = 30
+            cfg.train.max_epochs = 1
+            cfg.train.train_batch_size = 4
+            cfg.train.lr_scheduler = {
+                'type': 'LinearLR',
+                'start_factor': 1.0,
+                'end_factor': 0.0,
+                'options': {
+                    'by_epoch': False
+                }
+            }
+            cfg.train.hooks = [{
+                'type': 'CheckpointHook',
+                'interval': 1
+            }, {
+                'type': 'TextLoggerHook',
+                'interval': 1
+            }, {
+                'type': 'IterTimerHook'
+            }, {
+                'type': 'EvaluationHook',
+                'by_epoch': False,
+                'interval': 5000
+            }]
+            return cfg
+
+        # load dataset
+        ds = MsDataset.load('dureader-retrieval-ranking', 'zyznull')
+        train_ds = ds['train'].to_hf_dataset().shard(1000, index=0)
+        dev_ds = ds['dev'].to_hf_dataset()
+        model_id = 'damo/nlp_rom_passage-ranking_chinese-base'
+        self.finetune(model_id=model_id,
+                      train_dataset=train_ds,
+                      eval_dataset=dev_ds,
+                      cfg_modify_fn=cfg_modify_fn)
+
+    def pipeline_text_ranking(self, model_dir):
+        model = Model.from_pretrained(model_dir)
+        pipeline_ins = pipeline(task=Tasks.text_ranking, model=model)
+        print(pipeline_ins(input=self.inputs))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_finetune_token_classification.py b/tests/trainers/test_finetune_token_classification.py
new file mode 100644
index 0000000..9dbec2d
--- /dev/null
+++ b/tests/trainers/test_finetune_token_classification.py
@@ -0,0 +1,131 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+from functools import reduce
+
+from modelscope.metainfo import Trainers
+from modelscope.trainers import build_trainer
+from modelscope.utils.test_utils import test_level
+
+
+class TestFinetuneTokenClassification(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    def finetune(self,
+                 model_id,
+                 train_dataset,
+                 eval_dataset,
+                 name=Trainers.nlp_base_trainer,
+                 cfg_modify_fn=None,
+                 **kwargs):
+        kwargs = dict(model=model_id,
+                      train_dataset=train_dataset,
+                      eval_dataset=eval_dataset,
+                      work_dir=self.tmp_dir,
+                      cfg_modify_fn=cfg_modify_fn,
+                      **kwargs)
+
+        os.environ['LOCAL_RANK'] = '0'
+        trainer = build_trainer(name=name, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(10):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skip
+    def test_word_segmentation(self):
+        """This unittest is used to reproduce the icwb2:pku dataset + structbert model training results.
+
+        User can train a custom dataset by modifying this piece of code and comment the @unittest.skip.
+        """
+
+        os.system(
+            f'curl http://sighan.cs.uchicago.edu/bakeoff2005/data/icwb2-data.zip > {self.tmp_dir}/icwb2-data.zip'
+        )
+        shutil.unpack_archive(f'{self.tmp_dir}/icwb2-data.zip', self.tmp_dir)
+        from datasets import load_dataset
+        from modelscope.preprocessors.nlp import WordSegmentationBlankSetToLabelPreprocessor
+        preprocessor = WordSegmentationBlankSetToLabelPreprocessor()
+        dataset = load_dataset(
+            'text',
+            data_files=f'{self.tmp_dir}/icwb2-data/training/pku_training.utf8')
+
+        def split_to_dict(examples):
+            return preprocessor(examples['text'])
+
+        dataset = dataset.map(split_to_dict, batched=False)
+
+        def reducer(x, y):
+            x = x.split(' ') if isinstance(x, str) else x
+            y = y.split(' ') if isinstance(y, str) else y
+            return x + y
+
+        label_enumerate_values = list(
+            set(reduce(reducer, dataset['train'][:1000]['labels'])))
+        label_enumerate_values.sort()
+
+        train_len = int(len(dataset['train']) * 0.7)
+        train_dataset = dataset['train'].select(range(train_len))
+        dev_dataset = dataset['train'].select(
+            range(train_len, len(dataset['train'])))
+
+        def cfg_modify_fn(cfg):
+            cfg.task = 'token-classification'
+            cfg['dataset'] = {
+                'train': {
+                    'labels': label_enumerate_values,
+                    'first_sequence': 'tokens',
+                    'label': 'labels',
+                }
+            }
+            cfg['preprocessor'] = {
+                'type': 'token-cls-tokenizer',
+                'padding': 'max_length'
+            }
+            cfg.train.max_epochs = 2
+            cfg.train.dataloader.workers_per_gpu = 0
+            cfg.evaluation.dataloader.workers_per_gpu = 0
+            cfg.train.lr_scheduler = {
+                'type': 'LinearLR',
+                'start_factor': 1.0,
+                'end_factor': 0.0,
+                'total_iters':
+                int(len(train_dataset) / 32) * cfg.train.max_epochs,
+                'options': {
+                    'by_epoch': False
+                }
+            }
+            cfg.train.hooks = [{
+                'type': 'CheckpointHook',
+                'interval': 1
+            }, {
+                'type': 'TextLoggerHook',
+                'interval': 1
+            }, {
+                'type': 'IterTimerHook'
+            }, {
+                'type': 'EvaluationHook',
+                'by_epoch': False,
+                'interval': 50
+            }]
+            return cfg
+
+        self.finetune('damo/nlp_structbert_backbone_base_std',
+                      train_dataset,
+                      dev_dataset,
+                      cfg_modify_fn=cfg_modify_fn)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_general_image_classification_trainer.py b/tests/trainers/test_general_image_classification_trainer.py
new file mode 100644
index 0000000..64212bb
--- /dev/null
+++ b/tests/trainers/test_general_image_classification_trainer.py
@@ -0,0 +1,169 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+import zipfile
+from functools import partial
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config, ConfigDict
+from modelscope.utils.constant import DownloadMode, ModelFile
+from modelscope.utils.test_utils import test_level
+
+
+class TestGeneralImageClassificationTestTrainer(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+        try:
+            self.train_dataset = MsDataset.load('cats_and_dogs',
+                                                namespace='tany0699',
+                                                subset_name='default',
+                                                split='train')
+
+            self.eval_dataset = MsDataset.load('cats_and_dogs',
+                                               namespace='tany0699',
+                                               subset_name='default',
+                                               split='validation')
+        except Exception as e:
+            print(f'Download dataset error: {e}')
+
+        self.max_epochs = 1
+
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_nextvit_dailylife_train(self):
+        model_id = 'damo/cv_nextvit-small_image-classification_Dailylife-labels'
+
+        def cfg_modify_fn(cfg):
+            cfg.train.dataloader.batch_size_per_gpu = 32
+            cfg.train.dataloader.workers_per_gpu = 1
+            cfg.train.max_epochs = self.max_epochs
+            cfg.model.mm_model.head.num_classes = 2
+            cfg.train.optimizer.lr = 1e-4
+            cfg.train.lr_config.warmup_iters = 1
+            cfg.train.evaluation.metric_options = {'topk': (1, )}
+            cfg.evaluation.metric_options = {'topk': (1, )}
+            return cfg
+
+        kwargs = dict(model=model_id,
+                      work_dir=self.tmp_dir,
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.eval_dataset,
+                      cfg_modify_fn=cfg_modify_fn)
+
+        trainer = build_trainer(name=Trainers.image_classification,
+                                default_args=kwargs)
+        trainer.train()
+
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_nextvit_dailylife_eval(self):
+        model_id = 'damo/cv_nextvit-small_image-classification_Dailylife-labels'
+
+        kwargs = dict(model=model_id,
+                      work_dir=self.tmp_dir,
+                      train_dataset=None,
+                      eval_dataset=self.eval_dataset)
+
+        trainer = build_trainer(name=Trainers.image_classification,
+                                default_args=kwargs)
+        result = trainer.evaluate()
+        print(result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_convnext_garbage_train(self):
+        model_id = 'damo/cv_convnext-base_image-classification_garbage'
+
+        def cfg_modify_fn(cfg):
+            cfg.train.dataloader.batch_size_per_gpu = 16
+            cfg.train.dataloader.workers_per_gpu = 1
+            cfg.train.max_epochs = self.max_epochs
+            cfg.model.mm_model.head.num_classes = 2
+            cfg.train.optimizer.lr = 1e-4
+            cfg.train.lr_config.warmup_iters = 1
+            cfg.train.evaluation.metric_options = {'topk': (1, )}
+            cfg.evaluation.metric_options = {'topk': (1, )}
+            return cfg
+
+        kwargs = dict(model=model_id,
+                      work_dir=self.tmp_dir,
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.eval_dataset,
+                      cfg_modify_fn=cfg_modify_fn)
+
+        trainer = build_trainer(name=Trainers.image_classification,
+                                default_args=kwargs)
+        trainer.train()
+
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_convnext_garbage_eval(self):
+        model_id = 'damo/cv_convnext-base_image-classification_garbage'
+
+        kwargs = dict(model=model_id,
+                      work_dir=self.tmp_dir,
+                      train_dataset=None,
+                      eval_dataset=self.eval_dataset)
+
+        trainer = build_trainer(name=Trainers.image_classification,
+                                default_args=kwargs)
+        result = trainer.evaluate()
+        print(result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_beitv2_train_eval(self):
+        model_id = 'damo/cv_beitv2-base_image-classification_patch16_224_pt1k_ft22k_in1k'
+
+        def cfg_modify_fn(cfg):
+            cfg.train.dataloader.batch_size_per_gpu = 16
+            cfg.train.dataloader.workers_per_gpu = 1
+            cfg.train.max_epochs = self.max_epochs
+            cfg.model.mm_model.head.num_classes = 2
+            cfg.model.mm_model.head.loss.num_classes = 2
+            cfg.train.optimizer.lr = 1e-4
+            cfg.train.lr_config.warmup_iters = 1
+            cfg.train.evaluation.metric_options = {'topk': (1, )}
+            cfg.evaluation.metric_options = {'topk': (1, )}
+            return cfg
+
+        kwargs = dict(model=model_id,
+                      work_dir=self.tmp_dir,
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.eval_dataset,
+                      cfg_modify_fn=cfg_modify_fn)
+
+        trainer = build_trainer(name=Trainers.image_classification,
+                                default_args=kwargs)
+        trainer.train()
+
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+        result = trainer.evaluate()
+        print(result)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_image_color_enhance_trainer.py b/tests/trainers/test_image_color_enhance_trainer.py
new file mode 100644
index 0000000..0d68af8
--- /dev/null
+++ b/tests/trainers/test_image_color_enhance_trainer.py
@@ -0,0 +1,105 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import os.path as osp
+import shutil
+import tempfile
+import unittest
+from typing import Callable, List, Optional, Tuple, Union
+
+import cv2
+import torch
+from torch.utils import data as data
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.models.cv.image_color_enhance import ImageColorEnhance
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.test_utils import test_level
+
+
+class PairedImageDataset(data.Dataset):
+    def __init__(self, root):
+        super(PairedImageDataset, self).__init__()
+        gt_dir = osp.join(root, 'gt')
+        lq_dir = osp.join(root, 'lq')
+        self.gt_filelist = os.listdir(gt_dir)
+        self.gt_filelist = sorted(self.gt_filelist, key=lambda x: int(x[:-4]))
+        self.gt_filelist = [osp.join(gt_dir, f) for f in self.gt_filelist]
+        self.lq_filelist = os.listdir(lq_dir)
+        self.lq_filelist = sorted(self.lq_filelist, key=lambda x: int(x[:-4]))
+        self.lq_filelist = [osp.join(lq_dir, f) for f in self.lq_filelist]
+
+    def _img_to_tensor(self, img):
+        return torch.from_numpy(img[:, :, [2, 1, 0]]).permute(2, 0, 1).type(
+            torch.float32) / 255.
+
+    def __getitem__(self, index):
+        lq = cv2.imread(self.lq_filelist[index])
+        gt = cv2.imread(self.gt_filelist[index])
+        lq = cv2.resize(lq, (256, 256), interpolation=cv2.INTER_CUBIC)
+        gt = cv2.resize(gt, (256, 256), interpolation=cv2.INTER_CUBIC)
+        return \
+            {'src': self._img_to_tensor(lq), 'target': self._img_to_tensor(gt)}
+
+    def __len__(self):
+        return len(self.gt_filelist)
+
+    def to_torch_dataset(self,
+                         columns: Union[str, List[str]] = None,
+                         preprocessors: Union[Callable, List[Callable]] = None,
+                         **format_kwargs):
+        return self
+
+
+class TestImageColorEnhanceTrainer(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+        self.model_id = 'damo/cv_csrnet_image-color-enhance-models'
+
+        self.dataset = PairedImageDataset(
+            './data/test/images/image_color_enhance/')
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer(self):
+        kwargs = dict(model=self.model_id,
+                      train_dataset=self.dataset,
+                      eval_dataset=self.dataset,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(3):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_trainer_with_model_and_args(self):
+        cache_path = snapshot_download(self.model_id)
+        model = ImageColorEnhance.from_pretrained(cache_path)
+        kwargs = dict(cfg_file=os.path.join(cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      train_dataset=self.dataset,
+                      eval_dataset=self.dataset,
+                      max_epochs=2,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(2):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_image_deblur_trainer.py b/tests/trainers/test_image_deblur_trainer.py
new file mode 100644
index 0000000..55fa1b0
--- /dev/null
+++ b/tests/trainers/test_image_deblur_trainer.py
@@ -0,0 +1,87 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.models.cv.image_deblur import NAFNetForImageDeblur
+from modelscope.msdatasets import MsDataset
+from modelscope.msdatasets.task_datasets.gopro_image_deblurring_dataset import \
+    GoproImageDeblurringDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import DownloadMode, ModelFile
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+logger = get_logger()
+
+
+class ImageDeblurTrainerTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+        self.model_id = 'damo/cv_nafnet_image-deblur_gopro'
+        self.cache_path = snapshot_download(self.model_id)
+        self.config = Config.from_file(
+            os.path.join(self.cache_path, ModelFile.CONFIGURATION))
+        dataset_train = MsDataset.load(
+            'GOPRO',
+            namespace='damo',
+            subset_name='default',
+            split='test',
+            download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS)._hf_ds
+        dataset_val = MsDataset.load(
+            'GOPRO',
+            namespace='damo',
+            subset_name='subset',
+            split='test',
+            download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS)._hf_ds
+        self.dataset_train = GoproImageDeblurringDataset(dataset_train,
+                                                         self.config.dataset,
+                                                         is_train=True)
+        self.dataset_val = GoproImageDeblurringDataset(dataset_val,
+                                                       self.config.dataset,
+                                                       is_train=False)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer(self):
+        kwargs = dict(model=self.model_id,
+                      train_dataset=self.dataset_train,
+                      eval_dataset=self.dataset_val,
+                      work_dir=self.tmp_dir)
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(1):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_trainer_with_model_and_args(self):
+        model = NAFNetForImageDeblur.from_pretrained(self.cache_path)
+        kwargs = dict(cfg_file=os.path.join(self.cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      train_dataset=self.dataset_train,
+                      eval_dataset=self.dataset_val,
+                      max_epochs=1,
+                      work_dir=self.tmp_dir)
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(1):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_image_defrcn_fewshot_trainer.py b/tests/trainers/test_image_defrcn_fewshot_trainer.py
new file mode 100644
index 0000000..b780e75
--- /dev/null
+++ b/tests/trainers/test_image_defrcn_fewshot_trainer.py
@@ -0,0 +1,87 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import unittest
+
+from modelscope.hub.utils.utils import get_cache_dir
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import DownloadMode
+from modelscope.utils.test_utils import test_level
+
+
+class TestImageDefrcnFewShotTrainer(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        cmd = [
+            sys.executable, '-m', 'pip', 'install', 'detectron2==0.3', '-f',
+            'https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html'
+        ]
+        subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+        self.model_id = 'damo/cv_resnet101_detection_fewshot-defrcn'
+
+        data_voc = MsDataset.load(
+            dataset_name='VOC_fewshot',
+            namespace='shimin2023',
+            split='train',
+            download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS)
+        self.data_dir = os.path.join(
+            data_voc.config_kwargs['split_config']['train'], 'data')
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer(self):
+
+        split = 1
+
+        def base_cfg_modify_fn(cfg):
+            cfg.train.work_dir = self.tmp_dir
+
+            cfg.model.roi_heads.backward_scale = 0.75
+            cfg.model.roi_heads.num_classes = 15
+            cfg.model.roi_heads.freeze_feat = False
+            cfg.model.roi_heads.cls_dropout = False
+            cfg.model.weights = os.path.join(
+                get_cache_dir(), self.model_id,
+                'ImageNetPretrained/MSRA/R-101.pkl')
+
+            cfg.datasets.root = self.data_dir
+            cfg.datasets.type = 'pascal_voc'
+            cfg.datasets.train = [
+                'voc_2007_trainval_base{}'.format(split),
+                'voc_2012_trainval_base{}'.format(split)
+            ]
+            cfg.datasets.test = ['voc_2007_test_base{}'.format(split)]
+            cfg.input.min_size_test = 50
+            cfg.train.dataloader.ims_per_batch = 4
+            cfg.train.max_iter = 300
+            cfg.train.optimizer.lr = 0.001
+            cfg.train.lr_scheduler.warmup_iters = 100
+
+            cfg.test.pcb_enable = False
+            return cfg
+
+        kwargs = dict(model=self.model_id, cfg_modify_fn=base_cfg_modify_fn)
+        trainer = build_trainer(name=Trainers.image_fewshot_detection,
+                                default_args=kwargs)
+        trainer.train()
+
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn('metrics.json', results_files)
+        self.assertIn('model_final.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_image_denoise_trainer.py b/tests/trainers/test_image_denoise_trainer.py
new file mode 100644
index 0000000..bce0bae
--- /dev/null
+++ b/tests/trainers/test_image_denoise_trainer.py
@@ -0,0 +1,87 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.models.cv.image_denoise import NAFNetForImageDenoise
+from modelscope.msdatasets import MsDataset
+from modelscope.msdatasets.task_datasets.sidd_image_denoising import \
+    SiddImageDenoisingDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import DownloadMode, ModelFile
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+logger = get_logger()
+
+
+class ImageDenoiseTrainerTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+        self.model_id = 'damo/cv_nafnet_image-denoise_sidd'
+        self.cache_path = snapshot_download(self.model_id)
+        self.config = Config.from_file(
+            os.path.join(self.cache_path, ModelFile.CONFIGURATION))
+        dataset_train = MsDataset.load(
+            'SIDD',
+            namespace='huizheng',
+            subset_name='default',
+            split='test',
+            download_mode=DownloadMode.FORCE_REDOWNLOAD)._hf_ds
+        dataset_val = MsDataset.load(
+            'SIDD',
+            namespace='huizheng',
+            subset_name='default',
+            split='test',
+            download_mode=DownloadMode.FORCE_REDOWNLOAD)._hf_ds
+        self.dataset_train = SiddImageDenoisingDataset(dataset_train,
+                                                       self.config.dataset,
+                                                       is_train=True)
+        self.dataset_val = SiddImageDenoisingDataset(dataset_val,
+                                                     self.config.dataset,
+                                                     is_train=False)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer(self):
+        kwargs = dict(model=self.model_id,
+                      train_dataset=self.dataset_train,
+                      eval_dataset=self.dataset_val,
+                      work_dir=self.tmp_dir)
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(1):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_trainer_with_model_and_args(self):
+        model = NAFNetForImageDenoise.from_pretrained(self.cache_path)
+        kwargs = dict(cfg_file=os.path.join(self.cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      train_dataset=self.dataset_train,
+                      eval_dataset=self.dataset_val,
+                      max_epochs=1,
+                      work_dir=self.tmp_dir)
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(1):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_image_inpainting_trainer.py b/tests/trainers/test_image_inpainting_trainer.py
new file mode 100644
index 0000000..6fec3be
--- /dev/null
+++ b/tests/trainers/test_image_inpainting_trainer.py
@@ -0,0 +1,81 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.models.cv.image_inpainting import FFTInpainting
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config, ConfigDict
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+logger = get_logger()
+
+
+class ImageInpaintingTrainerTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+        self.model_id = 'damo/cv_fft_inpainting_lama'
+        self.cache_path = snapshot_download(self.model_id)
+        cfg = Config.from_file(
+            os.path.join(self.cache_path, ModelFile.CONFIGURATION))
+
+        train_data_cfg = ConfigDict(
+            name='PlacesToydataset',
+            split='train',
+            mask_gen_kwargs=cfg.dataset.mask_gen_kwargs,
+            out_size=cfg.dataset.train_out_size,
+            test_mode=False)
+
+        test_data_cfg = ConfigDict(name='PlacesToydataset',
+                                   split='test',
+                                   mask_gen_kwargs=cfg.dataset.mask_gen_kwargs,
+                                   out_size=cfg.dataset.val_out_size,
+                                   test_mode=True)
+
+        self.train_dataset = MsDataset.load(
+            dataset_name=train_data_cfg.name,
+            split=train_data_cfg.split,
+            mask_gen_kwargs=train_data_cfg.mask_gen_kwargs,
+            out_size=train_data_cfg.out_size,
+            test_mode=train_data_cfg.test_mode)
+        assert next(
+            iter(self.train_dataset.config_kwargs['split_config'].values()))
+
+        self.test_dataset = MsDataset.load(
+            dataset_name=test_data_cfg.name,
+            split=test_data_cfg.split,
+            mask_gen_kwargs=test_data_cfg.mask_gen_kwargs,
+            out_size=test_data_cfg.out_size,
+            test_mode=test_data_cfg.test_mode)
+        assert next(
+            iter(self.test_dataset.config_kwargs['split_config'].values()))
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_trainer(self):
+        kwargs = dict(model=self.model_id,
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.test_dataset)
+
+        trainer = build_trainer(name=Trainers.image_inpainting,
+                                default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(trainer.work_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_image_instance_segmentation_trainer.py b/tests/trainers/test_image_instance_segmentation_trainer.py
new file mode 100644
index 0000000..4435cf4
--- /dev/null
+++ b/tests/trainers/test_image_instance_segmentation_trainer.py
@@ -0,0 +1,125 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+import zipfile
+from functools import partial
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.models.cv.image_instance_segmentation import \
+    CascadeMaskRCNNSwinModel
+from modelscope.msdatasets import MsDataset
+from modelscope.msdatasets.task_datasets import \
+    ImageInstanceSegmentationCocoDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config, ConfigDict
+from modelscope.utils.constant import DownloadMode, ModelFile
+from modelscope.utils.test_utils import test_level
+
+
+class TestImageInstanceSegmentationTrainer(unittest.TestCase):
+
+    model_id = 'damo/cv_swin-b_image-instance-segmentation_coco'
+
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+        cache_path = snapshot_download(self.model_id)
+        config_path = os.path.join(cache_path, ModelFile.CONFIGURATION)
+        cfg = Config.from_file(config_path)
+
+        max_epochs = cfg.train.max_epochs
+        samples_per_gpu = cfg.train.dataloader.batch_size_per_gpu
+        try:
+            train_data_cfg = cfg.dataset.train
+            val_data_cfg = cfg.dataset.val
+        except Exception:
+            train_data_cfg = None
+            val_data_cfg = None
+        if train_data_cfg is None:
+            # use default toy data
+            train_data_cfg = ConfigDict(name='pets_small',
+                                        split='train',
+                                        test_mode=False)
+        if val_data_cfg is None:
+            val_data_cfg = ConfigDict(name='pets_small',
+                                      split='validation',
+                                      test_mode=True)
+
+        self.train_dataset = MsDataset.load(
+            dataset_name=train_data_cfg.name,
+            split=train_data_cfg.split,
+            test_mode=train_data_cfg.test_mode,
+            download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        assert self.train_dataset.config_kwargs['classes']
+        assert next(
+            iter(self.train_dataset.config_kwargs['split_config'].values()))
+
+        self.eval_dataset = MsDataset.load(
+            dataset_name=val_data_cfg.name,
+            split=val_data_cfg.split,
+            test_mode=val_data_cfg.test_mode,
+            download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        assert self.eval_dataset.config_kwargs['classes']
+        assert next(
+            iter(self.eval_dataset.config_kwargs['split_config'].values()))
+
+        from mmcv.parallel import collate
+
+        self.collate_fn = partial(collate, samples_per_gpu=samples_per_gpu)
+
+        self.max_epochs = max_epochs
+
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer(self):
+        kwargs = dict(model=self.model_id,
+                      data_collator=self.collate_fn,
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.eval_dataset,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(name=Trainers.image_instance_segmentation,
+                                default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_with_model_and_args(self):
+        tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+
+        cache_path = snapshot_download(self.model_id)
+        model = CascadeMaskRCNNSwinModel.from_pretrained(cache_path)
+        kwargs = dict(cfg_file=os.path.join(cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      data_collator=self.collate_fn,
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.eval_dataset,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(name=Trainers.image_instance_segmentation,
+                                default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(self.max_epochs):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_image_portrait_enhancement_trainer.py b/tests/trainers/test_image_portrait_enhancement_trainer.py
new file mode 100644
index 0000000..a0e9312
--- /dev/null
+++ b/tests/trainers/test_image_portrait_enhancement_trainer.py
@@ -0,0 +1,92 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import os.path as osp
+import shutil
+import tempfile
+import unittest
+from typing import Callable, List, Optional, Tuple, Union
+
+import cv2
+import torch
+from torch.utils import data as data
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.models.cv.image_portrait_enhancement import \
+    ImagePortraitEnhancement
+from modelscope.msdatasets import MsDataset
+from modelscope.msdatasets.task_datasets.image_portrait_enhancement import \
+    ImagePortraitEnhancementDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import DownloadMode, ModelFile
+from modelscope.utils.test_utils import test_level
+
+
+class TestImagePortraitEnhancementTrainer(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+        self.model_id = 'damo/cv_gpen_image-portrait-enhancement'
+
+        dataset_train = MsDataset.load(
+            'image-portrait-enhancement-dataset',
+            namespace='modelscope',
+            subset_name='default',
+            split='test',
+            download_mode=DownloadMode.FORCE_REDOWNLOAD)._hf_ds
+        dataset_val = MsDataset.load(
+            'image-portrait-enhancement-dataset',
+            namespace='modelscope',
+            subset_name='default',
+            split='test',
+            download_mode=DownloadMode.FORCE_REDOWNLOAD)._hf_ds
+
+        self.dataset_train = ImagePortraitEnhancementDataset(dataset_train,
+                                                             is_train=True)
+        self.dataset_val = ImagePortraitEnhancementDataset(dataset_val,
+                                                           is_train=False)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer(self):
+        kwargs = dict(model=self.model_id,
+                      train_dataset=self.dataset_train,
+                      eval_dataset=self.dataset_val,
+                      device='gpu',
+                      max_epochs=1,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(name=Trainers.image_portrait_enhancement,
+                                default_args=kwargs)
+        trainer.train()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_trainer_with_model_and_args(self):
+        tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+
+        cache_path = snapshot_download(self.model_id)
+        model = ImagePortraitEnhancement.from_pretrained(cache_path)
+        kwargs = dict(cfg_file=os.path.join(cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      train_dataset=self.dataset_train,
+                      eval_dataset=self.dataset_val,
+                      device='gpu',
+                      max_epochs=1,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(name=Trainers.image_portrait_enhancement,
+                                default_args=kwargs)
+        trainer.train()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_language_guided_video_summarization_trainer.py b/tests/trainers/test_language_guided_video_summarization_trainer.py
new file mode 100644
index 0000000..37ca75e
--- /dev/null
+++ b/tests/trainers/test_language_guided_video_summarization_trainer.py
@@ -0,0 +1,74 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.models.cv.language_guided_video_summarization import \
+    ClipItVideoSummarization
+from modelscope.msdatasets.task_datasets import \
+    LanguageGuidedVideoSummarizationDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+logger = get_logger()
+
+
+class LanguageGuidedVideoSummarizationTrainerTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+        self.model_id = 'damo/cv_clip-it_video-summarization_language-guided_en'
+        self.cache_path = snapshot_download(self.model_id)
+        self.config = Config.from_file(
+            os.path.join(self.cache_path, ModelFile.CONFIGURATION))
+        self.dataset_train = LanguageGuidedVideoSummarizationDataset(
+            'train', self.config.dataset, self.cache_path)
+        self.dataset_val = LanguageGuidedVideoSummarizationDataset(
+            'test', self.config.dataset, self.cache_path)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer(self):
+        kwargs = dict(model=self.model_id,
+                      train_dataset=self.dataset_train,
+                      eval_dataset=self.dataset_val,
+                      max_epochs=2,
+                      work_dir=self.tmp_dir)
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(2):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_trainer_with_model_and_args(self):
+        model = ClipItVideoSummarization.from_pretrained(self.cache_path)
+        kwargs = dict(cfg_file=os.path.join(self.cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      train_dataset=self.dataset_train,
+                      eval_dataset=self.dataset_val,
+                      max_epochs=2,
+                      work_dir=self.tmp_dir)
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(2):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_movie_scene_segmentation_trainer.py b/tests/trainers/test_movie_scene_segmentation_trainer.py
new file mode 100644
index 0000000..17134ca
--- /dev/null
+++ b/tests/trainers/test_movie_scene_segmentation_trainer.py
@@ -0,0 +1,102 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+import zipfile
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.models.cv.movie_scene_segmentation import \
+    MovieSceneSegmentationModel
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config, ConfigDict
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.test_utils import test_level
+
+
+class TestImageInstanceSegmentationTrainer(unittest.TestCase):
+
+    model_id = 'damo/cv_resnet50-bert_video-scene-segmentation_movienet'
+
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+        cache_path = snapshot_download(self.model_id)
+        config_path = os.path.join(cache_path, ModelFile.CONFIGURATION)
+        cfg = Config.from_file(config_path)
+
+        max_epochs = cfg.train.max_epochs
+
+        train_data_cfg = ConfigDict(name='movie_scene_seg_toydata',
+                                    split='train',
+                                    cfg=cfg.preprocessor,
+                                    test_mode=False)
+
+        test_data_cfg = ConfigDict(name='movie_scene_seg_toydata',
+                                   split='test',
+                                   cfg=cfg.preprocessor,
+                                   test_mode=True)
+
+        self.train_dataset = MsDataset.load(dataset_name=train_data_cfg.name,
+                                            split=train_data_cfg.split,
+                                            cfg=train_data_cfg.cfg,
+                                            test_mode=train_data_cfg.test_mode)
+        assert next(
+            iter(self.train_dataset.config_kwargs['split_config'].values()))
+
+        self.test_dataset = MsDataset.load(dataset_name=test_data_cfg.name,
+                                           split=test_data_cfg.split,
+                                           cfg=test_data_cfg.cfg,
+                                           test_mode=test_data_cfg.test_mode)
+        assert next(
+            iter(self.test_dataset.config_kwargs['split_config'].values()))
+
+        self.max_epochs = max_epochs
+
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_trainer(self):
+        kwargs = dict(model=self.model_id,
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.test_dataset,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(name=Trainers.movie_scene_segmentation,
+                                default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(trainer.work_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_with_model_and_args(self):
+        tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+
+        cache_path = snapshot_download(self.model_id)
+        model = MovieSceneSegmentationModel.from_pretrained(cache_path)
+        kwargs = dict(cfg_file=os.path.join(cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.test_dataset,
+                      work_dir=tmp_dir)
+
+        trainer = build_trainer(name=Trainers.movie_scene_segmentation,
+                                default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(trainer.work_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_ofa_mmspeech_trainer.py b/tests/trainers/test_ofa_mmspeech_trainer.py
new file mode 100644
index 0000000..781e992
--- /dev/null
+++ b/tests/trainers/test_ofa_mmspeech_trainer.py
@@ -0,0 +1,105 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import os
+import shutil
+import unittest
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import DownloadMode, ModelFile
+from modelscope.utils.test_utils import test_level
+
+
+class TestMMSpeechTrainer(unittest.TestCase):
+    def setUp(self) -> None:
+        self.finetune_cfg = \
+            {'framework': 'pytorch',
+             'task': 'auto-speech-recognition',
+             'model': {'type': 'ofa',
+                       'beam_search': {'beam_size': 5,
+                                       'max_len_b': 128,
+                                       'min_len': 1,
+                                       'no_repeat_ngram_size': 5,
+                                       'constraint_range': '4,21134'},
+                       'seed': 7,
+                       'max_src_length': 256,
+                       'language': 'zh',
+                       'gen_type': 'generation',
+                       'multimodal_type': 'mmspeech'},
+             'pipeline': {'type': 'ofa-asr'},
+             'n_frames_per_step': 1,
+             'dataset': {'column_map': {'wav': 'Audio:FILE', 'text': 'Text:LABEL'}},
+             'train': {'work_dir': 'work/ckpts/asr_recognition',
+                       # 'launcher': 'pytorch',
+                       'max_epochs': 1,
+                       'use_fp16': True,
+                       'dataloader': {'batch_size_per_gpu': 16, 'workers_per_gpu': 0},
+                       'lr_scheduler': {'name': 'polynomial_decay',
+                                        'warmup_proportion': 0.01,
+                                        'lr_end': 1e-07},
+                       'lr_scheduler_hook': {'type': 'LrSchedulerHook', 'by_epoch': False},
+                       'optimizer': {'type': 'AdamW', 'lr': 5e-05, 'weight_decay': 0.01},
+                       'optimizer_hook': {'type': 'TorchAMPOptimizerHook',
+                                          'cumulative_iters': 1,
+                                          'grad_clip': {'max_norm': 1.0, 'norm_type': 2},
+                                          'loss_keys': 'loss'},
+                       'criterion': {'name': 'AdjustLabelSmoothedCrossEntropyCriterion',
+                                     'constraint_range': '4,21134',
+                                     'drop_worst_after': 0,
+                                     'drop_worst_ratio': 0.0,
+                                     'ignore_eos': False,
+                                     'ignore_prefix_size': 0,
+                                     'label_smoothing': 0.1,
+                                     'reg_alpha': 1.0,
+                                     'report_accuracy': False,
+                                     'sample_patch_num': 196,
+                                     'sentence_avg': True,
+                                     'use_rdrop': False,
+                                     'ctc_weight': 1.0},
+                       'hooks': [{'type': 'BestCkptSaverHook',
+                                  'metric_key': 'accuracy',
+                                  'interval': 100},
+                                 {'type': 'TextLoggerHook', 'interval': 1},
+                                 {'type': 'IterTimerHook'},
+                                 {'type': 'EvaluationHook', 'by_epoch': True, 'interval': 1}]},
+             'evaluation': {'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0},
+                            'metrics': [{'type': 'accuracy'}]},
+             'preprocessor': []}
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_std(self):
+        WORKSPACE = './workspace/ckpts/asr_recognition'
+        os.makedirs(WORKSPACE, exist_ok=True)
+        config_file = os.path.join(WORKSPACE, ModelFile.CONFIGURATION)
+        with open(config_file, 'w') as writer:
+            json.dump(self.finetune_cfg, writer)
+
+        pretrained_model = 'damo/ofa_mmspeech_pretrain_base_zh'
+
+        args = dict(model=pretrained_model,
+                    work_dir=WORKSPACE,
+                    train_dataset=MsDataset.load(
+                        'aishell1_subset',
+                        subset_name='default',
+                        namespace='modelscope',
+                        split='train',
+                        download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS),
+                    eval_dataset=MsDataset.load(
+                        'aishell1_subset',
+                        subset_name='default',
+                        namespace='modelscope',
+                        split='test',
+                        download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS),
+                    cfg_file=config_file)
+        trainer = build_trainer(name=Trainers.ofa, default_args=args)
+        trainer.train()
+
+        self.assertIn(
+            ModelFile.TORCH_MODEL_BIN_FILE,
+            os.listdir(os.path.join(WORKSPACE, ModelFile.TRAIN_OUTPUT_DIR)))
+        shutil.rmtree(WORKSPACE)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_ofa_trainer.py b/tests/trainers/test_ofa_trainer.py
new file mode 100644
index 0000000..c08d2be
--- /dev/null
+++ b/tests/trainers/test_ofa_trainer.py
@@ -0,0 +1,105 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import os
+import shutil
+import unittest
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import DownloadMode, ModelFile
+from modelscope.utils.hub import read_config
+from modelscope.utils.test_utils import test_level
+
+
+class TestOfaTrainer(unittest.TestCase):
+    def setUp(self) -> None:
+        self.finetune_cfg = \
+            {'framework': 'pytorch',
+             'task': 'ocr-recognition',
+             'model': {'type': 'ofa',
+                       'beam_search': {'beam_size': 5,
+                                       'max_len_b': 64,
+                                       'min_len': 1,
+                                       'no_repeat_ngram_size': 0},
+                       'seed': 7,
+                       'max_src_length': 128,
+                       'language': 'zh',
+                       'gen_type': 'generation',
+                       'patch_image_size': 480,
+                       'is_document': False,
+                       'max_image_size': 480,
+                       'imagenet_default_mean_and_std': False},
+             'pipeline': {'type': 'ofa-ocr-recognition'},
+             'dataset': {'column_map': {'text': 'label'}},
+             'train': {'work_dir': 'work/ckpts/recognition',
+                       # 'launcher': 'pytorch',
+                       'max_epochs': 1,
+                       'use_fp16': False,
+                       'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0},
+                       'lr_scheduler': {'name': 'polynomial_decay',
+                                        'warmup_proportion': 0.01,
+                                        'lr_end': 1e-07},
+                       'lr_scheduler_hook': {'type': 'LrSchedulerHook', 'by_epoch': False},
+                       'optimizer': {'type': 'AdamW', 'lr': 5e-05, 'weight_decay': 0.01},
+                       'optimizer_hook': {'type': 'TorchAMPOptimizerHook',
+                                          'cumulative_iters': 1,
+                                          'grad_clip': {'max_norm': 1.0, 'norm_type': 2},
+                                          'loss_keys': 'loss'},
+                       'criterion': {'name': 'AdjustLabelSmoothedCrossEntropyCriterion',
+                                     'constraint_range': None,
+                                     'drop_worst_after': 0,
+                                     'drop_worst_ratio': 0.0,
+                                     'ignore_eos': False,
+                                     'ignore_prefix_size': 0,
+                                     'label_smoothing': 0.1,
+                                     'reg_alpha': 1.0,
+                                     'report_accuracy': False,
+                                     'sample_patch_num': 196,
+                                     'sentence_avg': False,
+                                     'use_rdrop': True},
+                       'hooks': [{'type': 'BestCkptSaverHook',
+                                  'metric_key': 'accuracy',
+                                  'interval': 100},
+                                 {'type': 'TextLoggerHook', 'interval': 1},
+                                 {'type': 'IterTimerHook'},
+                                 {'type': 'EvaluationHook', 'by_epoch': True, 'interval': 1}]},
+             'evaluation': {'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0},
+                            'metrics': [{'type': 'accuracy'}]},
+             'preprocessor': []}
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_std(self):
+        WORKSPACE = './workspace/ckpts/recognition'
+        os.makedirs(WORKSPACE, exist_ok=True)
+        config_file = os.path.join(WORKSPACE, ModelFile.CONFIGURATION)
+        with open(config_file, 'w') as writer:
+            json.dump(self.finetune_cfg, writer, indent=4)
+        pretrained_model = 'damo/ofa_ocr-recognition_scene_base_zh'
+
+        args = dict(model=pretrained_model,
+                    work_dir=WORKSPACE,
+                    train_dataset=MsDataset.load(
+                        'ocr_fudanvi_zh',
+                        subset_name='scene',
+                        namespace='modelscope',
+                        split='train[800:900]',
+                        download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS),
+                    eval_dataset=MsDataset.load(
+                        'ocr_fudanvi_zh',
+                        subset_name='scene',
+                        namespace='modelscope',
+                        split='test[:20]',
+                        download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS),
+                    cfg_file=config_file)
+        trainer = build_trainer(name=Trainers.ofa, default_args=args)
+        trainer.train()
+
+        self.assertIn(
+            ModelFile.TORCH_MODEL_BIN_FILE,
+            os.listdir(os.path.join(WORKSPACE, ModelFile.TRAIN_OUTPUT_DIR)))
+        shutil.rmtree(WORKSPACE)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_plug_finetune_text_generation.py b/tests/trainers/test_plug_finetune_text_generation.py
new file mode 100644
index 0000000..9d9b5ee
--- /dev/null
+++ b/tests/trainers/test_plug_finetune_text_generation.py
@@ -0,0 +1,51 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import argparse
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.test_utils import test_level
+
+
+def test_trainer_with_model_and_args():
+    def concat_answer_context(dataset):
+        dataset['src_txt'] = dataset['answers']['text'][0] + '[SEP]' + dataset[
+            'context']
+        return dataset
+
+    from datasets import load_dataset
+    dataset_dict = load_dataset('luozhouyang/dureader', 'robust')
+
+    train_dataset = dataset_dict['train'].map(concat_answer_context) \
+        .rename_columns({'question': 'tgt_txt'}).remove_columns('context') \
+        .remove_columns('id').remove_columns('answers')
+    eval_dataset = dataset_dict['validation'].map(concat_answer_context) \
+        .rename_columns({'question': 'tgt_txt'}).remove_columns('context') \
+        .remove_columns('id').remove_columns('answers')
+
+    tmp_dir = tempfile.TemporaryDirectory().name
+    if not os.path.exists(tmp_dir):
+        os.makedirs(tmp_dir)
+
+    model_id = 'damo/nlp_plug_text-generation_27B'
+
+    kwargs = dict(model=model_id,
+                  train_dataset=train_dataset,
+                  eval_dataset=eval_dataset,
+                  work_dir=tmp_dir)
+
+    trainer = build_trainer(name=Trainers.nlp_plug_trainer,
+                            default_args=kwargs)
+    trainer.train()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--local_rank')
+    test_trainer_with_model_and_args()
diff --git a/tests/trainers/test_referring_video_object_segmentation_trainer.py b/tests/trainers/test_referring_video_object_segmentation_trainer.py
new file mode 100644
index 0000000..6040940
--- /dev/null
+++ b/tests/trainers/test_referring_video_object_segmentation_trainer.py
@@ -0,0 +1,94 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+import zipfile
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.models.cv.referring_video_object_segmentation import \
+    ReferringVideoObjectSegmentation
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config, ConfigDict
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.test_utils import test_level
+
+
+class TestImageInstanceSegmentationTrainer(unittest.TestCase):
+
+    model_id = 'damo/cv_swin-t_referring_video-object-segmentation'
+    dataset_name = 'referring_vos_toydata'
+
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+        cache_path = snapshot_download(self.model_id)
+        config_path = os.path.join(cache_path, ModelFile.CONFIGURATION)
+        cfg = Config.from_file(config_path)
+
+        max_epochs = cfg.train.max_epochs
+
+        train_data_cfg = ConfigDict(name=self.dataset_name,
+                                    split='train',
+                                    test_mode=False,
+                                    cfg=cfg.dataset)
+
+        test_data_cfg = ConfigDict(name=self.dataset_name,
+                                   split='test',
+                                   test_mode=True,
+                                   cfg=cfg.dataset)
+
+        self.train_dataset = MsDataset.load(dataset_name=train_data_cfg.name,
+                                            split=train_data_cfg.split,
+                                            cfg=train_data_cfg.cfg,
+                                            test_mode=train_data_cfg.test_mode)
+        assert next(
+            iter(self.train_dataset.config_kwargs['split_config'].values()))
+
+        self.test_dataset = MsDataset.load(dataset_name=test_data_cfg.name,
+                                           split=test_data_cfg.split,
+                                           cfg=test_data_cfg.cfg,
+                                           test_mode=test_data_cfg.test_mode)
+        assert next(
+            iter(self.test_dataset.config_kwargs['split_config'].values()))
+
+        self.max_epochs = max_epochs
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer(self):
+        kwargs = dict(model=self.model_id,
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.test_dataset,
+                      work_dir='./work_dir')
+
+        trainer = build_trainer(
+            name=Trainers.referring_video_object_segmentation,
+            default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(trainer.work_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_with_model_and_args(self):
+
+        cache_path = snapshot_download(self.model_id)
+        model = ReferringVideoObjectSegmentation.from_pretrained(cache_path)
+        kwargs = dict(cfg_file=os.path.join(cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      train_dataset=self.train_dataset,
+                      eval_dataset=self.test_dataset,
+                      work_dir='./work_dir')
+
+        trainer = build_trainer(
+            name=Trainers.referring_video_object_segmentation,
+            default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(trainer.work_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_table_question_answering_trainer.py b/tests/trainers/test_table_question_answering_trainer.py
new file mode 100644
index 0000000..f51485e
--- /dev/null
+++ b/tests/trainers/test_table_question_answering_trainer.py
@@ -0,0 +1,43 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import os
+import unittest
+
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers.nlp.table_question_answering_trainer import \
+    TableQuestionAnsweringTrainer
+from modelscope.utils.constant import DownloadMode, ModelFile
+from modelscope.utils.test_utils import test_level
+
+
+class TableQuestionAnsweringTest(unittest.TestCase):
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_with_model_name(self):
+        # load data
+        input_dataset = MsDataset.load(
+            'ChineseText2SQL', download_mode=DownloadMode.FORCE_REDOWNLOAD)
+        train_dataset = []
+        for name in input_dataset['train']._hf_ds.data[1]:
+            train_dataset.append(json.load(open(str(name), 'r')))
+        eval_dataset = []
+        for name in input_dataset['test']._hf_ds.data[1]:
+            eval_dataset.append(json.load(open(str(name), 'r')))
+        print('size of training set', len(train_dataset))
+        print('size of evaluation set', len(eval_dataset))
+
+        model_id = 'damo/nlp_convai_text2sql_pretrain_cn'
+        trainer = TableQuestionAnsweringTrainer(
+            model=model_id,
+            train_dataset=train_dataset,
+            eval_dataset=eval_dataset,
+        )
+        trainer.train(
+            batch_size=8,
+            total_epoches=2,
+        )
+        trainer.evaluate(checkpoint_path=os.path.join(trainer.model.model_dir,
+                                                      'finetuned_model.bin'))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_team_transfer_trainer.py b/tests/trainers/test_team_transfer_trainer.py
new file mode 100644
index 0000000..2e87f69
--- /dev/null
+++ b/tests/trainers/test_team_transfer_trainer.py
@@ -0,0 +1,92 @@
+import json
+import os
+import unittest
+
+import requests
+import torch
+import torch.distributed as dist
+import torch.multiprocessing as mp
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.trainers.multi_modal.team.team_trainer_utils import (
+    collate_fn, train_mapping, val_mapping)
+from modelscope.utils.config import Config
+from modelscope.utils.constant import DownloadMode, ModeKeys, ModelFile
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+logger = get_logger()
+
+
+def train_worker(device_id):
+    model_id = 'damo/multi-modal_team-vit-large-patch14_multi-modal-similarity'
+    ckpt_dir = './ckpt'
+    os.makedirs(ckpt_dir, exist_ok=True)
+    # Use epoch=1 for faster training here
+    cfg = Config({
+        'framework': 'pytorch',
+        'task': 'multi-modal-similarity',
+        'pipeline': {
+            'type': 'multi-modal-similarity'
+        },
+        'model': {
+            'type': 'team-multi-modal-similarity'
+        },
+        'dataset': {
+            'name': 'Caltech101',
+            'class_num': 101
+        },
+        'preprocessor': {},
+        'train': {
+            'epoch': 1,
+            'batch_size': 32,
+            'ckpt_dir': ckpt_dir
+        },
+        'evaluation': {
+            'batch_size': 64
+        }
+    })
+    cfg_file = '{}/{}'.format(ckpt_dir, ModelFile.CONFIGURATION)
+    cfg.dump(cfg_file)
+
+    train_dataset = MsDataset.load(
+        cfg.dataset.name,
+        namespace='modelscope',
+        split='train',
+        download_mode=DownloadMode.FORCE_REDOWNLOAD).to_hf_dataset()
+    train_dataset = train_dataset.with_transform(train_mapping)
+    val_dataset = MsDataset.load(
+        cfg.dataset.name,
+        namespace='modelscope',
+        split='validation',
+        download_mode=DownloadMode.FORCE_REDOWNLOAD).to_hf_dataset()
+    val_dataset = val_dataset.with_transform(val_mapping)
+
+    default_args = dict(cfg_file=cfg_file,
+                        model=model_id,
+                        device_id=device_id,
+                        data_collator=collate_fn,
+                        train_dataset=train_dataset,
+                        val_dataset=val_dataset)
+
+    trainer = build_trainer(name=Trainers.image_classification_team,
+                            default_args=default_args)
+    trainer.train()
+    trainer.evaluate()
+
+
+class TEAMTransferTrainerTest(unittest.TestCase):
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer(self):
+        if torch.cuda.device_count() > 0:
+            train_worker(device_id=0)
+        else:
+            train_worker(device_id=-1)
+        logger.info('Training done')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_tinynas_damoyolo_trainer.py b/tests/trainers/test_tinynas_damoyolo_trainer.py
new file mode 100644
index 0000000..0770d3b
--- /dev/null
+++ b/tests/trainers/test_tinynas_damoyolo_trainer.py
@@ -0,0 +1,127 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.test_utils import DistributedTestCase, test_level
+
+
+def _setup():
+    model_id = 'damo/cv_tinynas_object-detection_damoyolo'
+    cache_path = snapshot_download(model_id)
+    return cache_path
+
+
+class TestTinynasDamoyoloTrainerSingleGPU(unittest.TestCase):
+    def setUp(self):
+        self.model_id = 'damo/cv_tinynas_object-detection_damoyolo'
+        self.cache_path = _setup()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_from_scratch_singleGPU(self):
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'configuration.json'),
+            gpu_ids=[
+                0,
+            ],
+            batch_size=2,
+            max_epochs=3,
+            num_classes=80,
+            base_lr_per_img=0.001,
+            cache_path=self.cache_path,
+            train_image_dir='./data/test/images/image_detection/images',
+            val_image_dir='./data/test/images/image_detection/images',
+            train_ann=
+            './data/test/images/image_detection/annotations/coco_sample.json',
+            val_ann=
+            './data/test/images/image_detection/annotations/coco_sample.json',
+        )
+        trainer = build_trainer(name=Trainers.tinynas_damoyolo,
+                                default_args=kwargs)
+        trainer.train()
+        trainer.evaluate(checkpoint_path=os.path.join('./workdirs/damoyolo_s',
+                                                      'epoch_3_ckpt.pth'))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_from_scratch_singleGPU_model_id(self):
+        kwargs = dict(
+            model=self.model_id,
+            gpu_ids=[
+                0,
+            ],
+            batch_size=2,
+            max_epochs=3,
+            num_classes=80,
+            load_pretrain=True,
+            base_lr_per_img=0.001,
+            train_image_dir='./data/test/images/image_detection/images',
+            val_image_dir='./data/test/images/image_detection/images',
+            train_ann=
+            './data/test/images/image_detection/annotations/coco_sample.json',
+            val_ann=
+            './data/test/images/image_detection/annotations/coco_sample.json',
+        )
+        trainer = build_trainer(name=Trainers.tinynas_damoyolo,
+                                default_args=kwargs)
+        trainer.train()
+        trainer.evaluate(checkpoint_path=os.path.join(
+            self.cache_path, 'damoyolo_tinynasL25_S.pt'))
+
+    @unittest.skip('multiGPU test is varified offline')
+    def test_trainer_from_scratch_multiGPU(self):
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'configuration.json'),
+            gpu_ids=[
+                0,
+                1,
+            ],
+            batch_size=32,
+            max_epochs=3,
+            num_classes=1,
+            cache_path=self.cache_path,
+            train_image_dir='./data/test/images/image_detection/images',
+            val_image_dir='./data/test/images/image_detection/images',
+            train_ann=
+            './data/test/images/image_detection/annotations/coco_sample.json',
+            val_ann=
+            './data/test/images/image_detection/annotations/coco_sample.json')
+        trainer = build_trainer(name=Trainers.tinynas_damoyolo,
+                                default_args=kwargs)
+        trainer.train()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_finetune_singleGPU(self):
+        kwargs = dict(
+            cfg_file=os.path.join(self.cache_path, 'configuration.json'),
+            gpu_ids=[
+                0,
+            ],
+            batch_size=16,
+            max_epochs=3,
+            num_classes=1,
+            load_pretrain=True,
+            pretrain_model=os.path.join(self.cache_path,
+                                        'damoyolo_tinynasL25_S.pt'),
+            cache_path=self.cache_path,
+            train_image_dir='./data/test/images/image_detection/images',
+            val_image_dir='./data/test/images/image_detection/images',
+            train_ann=
+            './data/test/images/image_detection/annotations/coco_sample.json',
+            val_ann=
+            './data/test/images/image_detection/annotations/coco_sample.json')
+        trainer = build_trainer(name=Trainers.tinynas_damoyolo,
+                                default_args=kwargs)
+        trainer.train()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_trainer.py b/tests/trainers/test_trainer.py
new file mode 100644
index 0000000..c3230d4
--- /dev/null
+++ b/tests/trainers/test_trainer.py
@@ -0,0 +1,557 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import json
+import os
+import shutil
+import tempfile
+import unittest
+
+import cv2
+import numpy as np
+import torch
+from torch import nn
+from torch.optim import SGD
+from torch.optim.lr_scheduler import StepLR
+from torch.utils.data import IterableDataset
+
+from modelscope.metainfo import Metrics, Trainers
+from modelscope.metrics.builder import MetricKeys
+from modelscope.models.base import TorchModel
+from modelscope.trainers import build_trainer
+from modelscope.trainers.base import DummyTrainer
+from modelscope.trainers.builder import TRAINERS
+from modelscope.trainers.trainer import EpochBasedTrainer
+from modelscope.utils.constant import LogKeys, ModeKeys, ModelFile, Tasks
+from modelscope.utils.test_utils import create_dummy_test_dataset, test_level
+
+
+class DummyIterableDataset(IterableDataset):
+    def __iter__(self):
+        feat = np.random.random(size=(5, )).astype(np.float32)
+        labels = np.random.randint(0, 4, (1, ))
+        iterations = [{'feat': feat, 'labels': labels}] * 500
+        return iter(iterations)
+
+
+dummy_dataset_small = create_dummy_test_dataset(np.random.random(size=(5, )),
+                                                np.random.randint(0, 4, (1, )),
+                                                20)
+
+dummy_dataset_big = create_dummy_test_dataset(np.random.random(size=(5, )),
+                                              np.random.randint(0, 4, (1, )),
+                                              40)
+
+
+class DummyModel(TorchModel):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(5, 4)
+        self.bn = nn.BatchNorm1d(4)
+
+    def forward(self, feat, labels):
+        x = self.linear(feat)
+
+        x = self.bn(x)
+        loss = torch.sum(x)
+        return dict(logits=x, loss=loss)
+
+
+@TRAINERS.register_module(module_name='test_vis')
+class VisTrainer(EpochBasedTrainer):
+    def visualization(self, results, dataset, **kwargs):
+        num_image = 5
+        f = 'data/test/images/bird.JPEG'
+        filenames = [f for _ in range(num_image)]
+        imgs = [cv2.imread(f) for f in filenames]
+        filenames = [f + str(i) for i in range(num_image)]
+        vis_results = {'images': imgs, 'filenames': filenames}
+
+        # visualization results will be displayed in group named eva_vis
+        self.visualization_buffer.output['eval_vis'] = vis_results
+
+
+class TrainerTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_train_0(self):
+        json_cfg = {
+            'task': Tasks.image_classification,
+            'train': {
+                'work_dir':
+                self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                },
+                'optimizer': {
+                    'type': 'SGD',
+                    'lr': 0.01,
+                    'options': {
+                        'grad_clip': {
+                            'max_norm': 2.0
+                        }
+                    }
+                },
+                'lr_scheduler': {
+                    'type': 'StepLR',
+                    'step_size': 2,
+                    'options': {
+                        'warmup': {
+                            'type': 'LinearWarmup',
+                            'warmup_iters': 2
+                        }
+                    }
+                },
+                'hooks': [{
+                    'type': 'CheckpointHook',
+                    'interval': 1
+                }, {
+                    'type': 'TextLoggerHook',
+                    'interval': 1
+                }, {
+                    'type': 'IterTimerHook'
+                }, {
+                    'type': 'EvaluationHook',
+                    'interval': 1
+                }, {
+                    'type': 'TensorboardHook',
+                    'interval': 1
+                }]
+            },
+            'evaluation': {
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1,
+                    'shuffle': False
+                },
+                'metrics': [Metrics.seq_cls_metric],
+            }
+        }
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+        trainer_name = Trainers.default
+        kwargs = dict(cfg_file=config_path,
+                      model=DummyModel(),
+                      data_collator=None,
+                      train_dataset=dummy_dataset_small,
+                      eval_dataset=dummy_dataset_small,
+                      max_epochs=3,
+                      device='cpu')
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        with open(f'{self.tmp_dir}/{trainer.timestamp}.log', 'r') as infile:
+            lines = infile.readlines()
+            self.assertTrue(len(lines) > 20)
+        self.assertIn(f'{trainer.timestamp}.log', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files)
+        self.assertIn('tensorboard_output', results_files)
+        self.assertTrue(len(glob.glob(f'{self.tmp_dir}/*/*events*')) > 0)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_train_visualization(self):
+        json_cfg = {
+            'task': Tasks.image_classification,
+            'train': {
+                'work_dir':
+                self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                },
+                'optimizer': {
+                    'type': 'SGD',
+                    'lr': 0.01,
+                    'options': {
+                        'grad_clip': {
+                            'max_norm': 2.0
+                        }
+                    }
+                },
+                'lr_scheduler': {
+                    'type': 'StepLR',
+                    'step_size': 2,
+                    'options': {
+                        'warmup': {
+                            'type': 'LinearWarmup',
+                            'warmup_iters': 2
+                        }
+                    }
+                },
+                'hooks': [{
+                    'type': 'CheckpointHook',
+                    'interval': 1
+                }, {
+                    'type': 'TextLoggerHook',
+                    'interval': 1
+                }, {
+                    'type': 'IterTimerHook'
+                }, {
+                    'type': 'EvaluationHook',
+                    'interval': 1
+                }, {
+                    'type': 'TensorboardHook',
+                    'interval': 1
+                }]
+            },
+            'evaluation': {
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1,
+                    'shuffle': False
+                },
+                'metrics': [Metrics.seq_cls_metric],
+                'visualization': {},
+            }
+        }
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+
+        trainer_name = 'test_vis'
+        kwargs = dict(cfg_file=config_path,
+                      model=DummyModel(),
+                      data_collator=None,
+                      train_dataset=dummy_dataset_small,
+                      eval_dataset=dummy_dataset_small,
+                      max_epochs=3,
+                      device='cpu')
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files)
+        self.assertTrue(len(glob.glob(f'{self.tmp_dir}/*/*events*')) > 0)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_train_1(self):
+        json_cfg = {
+            'task': Tasks.image_classification,
+            'train': {
+                'work_dir':
+                self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                },
+                'hooks': [{
+                    'type': 'CheckpointHook',
+                    'interval': 1
+                }, {
+                    'type': 'TextLoggerHook',
+                    'interval': 1
+                }, {
+                    'type': 'IterTimerHook'
+                }, {
+                    'type': 'EvaluationHook',
+                    'interval': 1
+                }, {
+                    'type': 'TensorboardHook',
+                    'interval': 1
+                }]
+            },
+            'evaluation': {
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1,
+                    'shuffle': False
+                },
+                'metrics': [Metrics.seq_cls_metric]
+            }
+        }
+
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+
+        model = DummyModel()
+        optimmizer = SGD(model.parameters(), lr=0.01)
+        lr_scheduler = StepLR(optimmizer, 2)
+        trainer_name = Trainers.default
+        kwargs = dict(cfg_file=config_path,
+                      model=model,
+                      data_collator=None,
+                      train_dataset=dummy_dataset_small,
+                      eval_dataset=dummy_dataset_small,
+                      optimizers=(optimmizer, lr_scheduler),
+                      max_epochs=3,
+                      device='cpu')
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files)
+        self.assertTrue(len(glob.glob(f'{self.tmp_dir}/*/*events*')) > 0)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_train_with_default_config(self):
+        json_cfg = {
+            'task': Tasks.image_classification,
+            'train': {
+                'work_dir': self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                },
+                'hooks': [{
+                    'type': 'EvaluationHook',
+                    'interval': 1
+                }]
+            },
+            'evaluation': {
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1,
+                    'shuffle': False
+                },
+                'metrics': [Metrics.seq_cls_metric]
+            }
+        }
+
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+
+        model = DummyModel()
+        optimmizer = SGD(model.parameters(), lr=0.01)
+        lr_scheduler = StepLR(optimmizer, 2)
+        trainer_name = Trainers.default
+        kwargs = dict(cfg_file=config_path,
+                      model=model,
+                      data_collator=None,
+                      train_dataset=dummy_dataset_big,
+                      eval_dataset=dummy_dataset_small,
+                      optimizers=(optimmizer, lr_scheduler),
+                      max_epochs=3,
+                      device='cpu')
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+
+        json_file = os.path.join(self.tmp_dir, f'{trainer.timestamp}.log.json')
+        with open(json_file, 'r', encoding='utf-8') as f:
+            lines = [i.strip() for i in f.readlines()]
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 1,
+                LogKeys.ITER: 10,
+                LogKeys.LR: 0.01
+            }, json.loads(lines[0]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 1,
+                LogKeys.ITER: 20,
+                LogKeys.LR: 0.01
+            }, json.loads(lines[1]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 1,
+                LogKeys.ITER: 10
+            }, json.loads(lines[2]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 2,
+                LogKeys.ITER: 10,
+                LogKeys.LR: 0.01
+            }, json.loads(lines[3]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 2,
+                LogKeys.ITER: 20,
+                LogKeys.LR: 0.01
+            }, json.loads(lines[4]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 2,
+                LogKeys.ITER: 10
+            }, json.loads(lines[5]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 3,
+                LogKeys.ITER: 10,
+                LogKeys.LR: 0.001
+            }, json.loads(lines[6]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 3,
+                LogKeys.ITER: 20,
+                LogKeys.LR: 0.001
+            }, json.loads(lines[7]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 3,
+                LogKeys.ITER: 10
+            }, json.loads(lines[8]))
+        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files)
+        for i in [0, 1, 3, 4, 6, 7]:
+            self.assertIn(LogKeys.DATA_LOAD_TIME, lines[i])
+            self.assertIn(LogKeys.ITER_TIME, lines[i])
+        for i in [2, 5, 8]:
+            self.assertIn(MetricKeys.ACCURACY, lines[i])
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_train_with_iters_per_epoch(self):
+        json_cfg = {
+            'task': Tasks.image_classification,
+            'train': {
+                'work_dir': self.tmp_dir,
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1
+                },
+                'hooks': [{
+                    'type': 'EvaluationHook',
+                    'interval': 1
+                }]
+            },
+            'evaluation': {
+                'dataloader': {
+                    'batch_size_per_gpu': 2,
+                    'workers_per_gpu': 1,
+                    'shuffle': False
+                },
+                'metrics': [Metrics.seq_cls_metric]
+            }
+        }
+        config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
+        with open(config_path, 'w') as f:
+            json.dump(json_cfg, f)
+
+        model = DummyModel()
+        optimmizer = SGD(model.parameters(), lr=0.01)
+        lr_scheduler = StepLR(optimmizer, 2)
+        trainer_name = Trainers.default
+        kwargs = dict(cfg_file=config_path,
+                      model=model,
+                      data_collator=None,
+                      optimizers=(optimmizer, lr_scheduler),
+                      train_dataset=DummyIterableDataset(),
+                      eval_dataset=DummyIterableDataset(),
+                      train_iters_per_epoch=20,
+                      val_iters_per_epoch=10,
+                      max_epochs=3,
+                      device='cpu')
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        json_file = os.path.join(self.tmp_dir, f'{trainer.timestamp}.log.json')
+        with open(json_file, 'r', encoding='utf-8') as f:
+            lines = [i.strip() for i in f.readlines()]
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 1,
+                LogKeys.ITER: 10,
+                LogKeys.LR: 0.01
+            }, json.loads(lines[0]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 1,
+                LogKeys.ITER: 20,
+                LogKeys.LR: 0.01
+            }, json.loads(lines[1]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 1,
+                LogKeys.ITER: 10
+            }, json.loads(lines[2]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 2,
+                LogKeys.ITER: 10,
+                LogKeys.LR: 0.01
+            }, json.loads(lines[3]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 2,
+                LogKeys.ITER: 20,
+                LogKeys.LR: 0.01
+            }, json.loads(lines[4]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 2,
+                LogKeys.ITER: 10
+            }, json.loads(lines[5]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 3,
+                LogKeys.ITER: 10,
+                LogKeys.LR: 0.001
+            }, json.loads(lines[6]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 3,
+                LogKeys.ITER: 20,
+                LogKeys.LR: 0.001
+            }, json.loads(lines[7]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 3,
+                LogKeys.ITER: 10
+            }, json.loads(lines[8]))
+        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files)
+        for i in [0, 1, 3, 4, 6, 7]:
+            self.assertIn(LogKeys.DATA_LOAD_TIME, lines[i])
+            self.assertIn(LogKeys.ITER_TIME, lines[i])
+        for i in [2, 5, 8]:
+            self.assertIn(MetricKeys.ACCURACY, lines[i])
+
+
+class DummyTrainerTest(unittest.TestCase):
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_dummy(self):
+        default_args = dict(cfg_file='configs/examples/train.json')
+        trainer = build_trainer('dummy', default_args)
+
+        trainer.train()
+        trainer.evaluate()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_trainer_gpu.py b/tests/trainers/test_trainer_gpu.py
new file mode 100644
index 0000000..993fef4
--- /dev/null
+++ b/tests/trainers/test_trainer_gpu.py
@@ -0,0 +1,326 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import json
+import os
+import shutil
+import tempfile
+import unittest
+
+import numpy as np
+import torch
+from torch import nn
+from torch.optim import SGD
+from torch.optim.lr_scheduler import StepLR
+from torch.utils.data import IterableDataset
+
+from modelscope.metainfo import Metrics, Trainers
+from modelscope.metrics.builder import MetricKeys
+from modelscope.models.base import Model, TorchModel
+from modelscope.trainers import EpochBasedTrainer, build_trainer
+from modelscope.utils.constant import LogKeys, ModeKeys, ModelFile, Tasks
+from modelscope.utils.test_utils import (DistributedTestCase,
+                                         create_dummy_test_dataset, test_level)
+
+
+class DummyIterableDataset(IterableDataset):
+    def __iter__(self):
+        feat = np.random.random(size=(5, )).astype(np.float32)
+        labels = np.random.randint(0, 4, (1, ))
+        iterations = [{'feat': feat, 'labels': labels}] * 500
+        return iter(iterations)
+
+
+dummy_dataset_small = create_dummy_test_dataset(np.random.random(size=(5, )),
+                                                np.random.randint(0, 4, (1, )),
+                                                20)
+
+dummy_dataset_big = create_dummy_test_dataset(np.random.random(size=(5, )),
+                                              np.random.randint(0, 4, (1, )),
+                                              40)
+
+
+class DummyModel(TorchModel):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(5, 4)
+        self.bn = nn.BatchNorm1d(4)
+
+    def forward(self, feat, labels):
+        x = self.linear(feat)
+
+        x = self.bn(x)
+        loss = torch.sum(x)
+        return dict(logits=x, loss=loss)
+
+
+class DummyModelForwardInputs(DummyModel):
+    def forward(self, inputs):
+        feat, labels = inputs['feat'], inputs['labels']
+        return super().forward(feat, labels)
+
+
+def train_func(work_dir,
+               dist=False,
+               iterable_dataset=False,
+               forward_inputs=False,
+               **kwargs):
+    json_cfg = {
+        'task': Tasks.image_classification,
+        'model': {},
+        'train': {
+            'work_dir': work_dir,
+            'dataloader': {
+                'batch_size_per_gpu': 2,
+                'workers_per_gpu': 1
+            },
+            'hooks': [{
+                'type': 'EvaluationHook',
+                'interval': 1
+            }]
+        },
+        'evaluation': {
+            'dataloader': {
+                'batch_size_per_gpu': 1,
+                'workers_per_gpu': 1,
+                'shuffle': False
+            },
+            'metrics': [Metrics.seq_cls_metric]
+        }
+    }
+
+    config_path = os.path.join(work_dir, ModelFile.CONFIGURATION)
+    with open(config_path, 'w') as f:
+        json.dump(json_cfg, f)
+
+    if forward_inputs:
+        model = DummyModelForwardInputs()
+    else:
+        model = DummyModel()
+    optimmizer = SGD(model.parameters(), lr=0.01)
+    lr_scheduler = StepLR(optimmizer, 2)
+    trainer_name = Trainers.default
+    if iterable_dataset:
+        train_dataset = DummyIterableDataset()
+        eval_dataset = DummyIterableDataset()
+    else:
+        train_dataset = dummy_dataset_big
+        eval_dataset = dummy_dataset_small
+    _kwargs = dict(cfg_file=config_path,
+                   model=model,
+                   data_collator=None,
+                   train_dataset=train_dataset,
+                   eval_dataset=eval_dataset,
+                   optimizers=(optimmizer, lr_scheduler),
+                   max_epochs=3,
+                   device='gpu',
+                   launcher='pytorch' if dist else None,
+                   **kwargs)
+
+    trainer = build_trainer(trainer_name, _kwargs)
+    trainer.train()
+
+
+@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
+class TrainerTestSingleGpu(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_single_gpu(self):
+        train_func(self.tmp_dir)
+
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+
+        with open(json_files[0], 'r', encoding='utf-8') as f:
+            lines = [i.strip() for i in f.readlines()]
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 1,
+                LogKeys.ITER: 10,
+                LogKeys.LR: 0.01
+            }, json.loads(lines[0]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 1,
+                LogKeys.ITER: 20,
+                LogKeys.LR: 0.01
+            }, json.loads(lines[1]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 1,
+                LogKeys.ITER: 20
+            }, json.loads(lines[2]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 2,
+                LogKeys.ITER: 10,
+                LogKeys.LR: 0.01
+            }, json.loads(lines[3]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 2,
+                LogKeys.ITER: 20,
+                LogKeys.LR: 0.01
+            }, json.loads(lines[4]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 2,
+                LogKeys.ITER: 20
+            }, json.loads(lines[5]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 3,
+                LogKeys.ITER: 10,
+                LogKeys.LR: 0.001
+            }, json.loads(lines[6]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 3,
+                LogKeys.ITER: 20,
+                LogKeys.LR: 0.001
+            }, json.loads(lines[7]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 3,
+                LogKeys.ITER: 20
+            }, json.loads(lines[8]))
+        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files)
+        for i in [0, 1, 3, 4, 6, 7]:
+            self.assertIn(LogKeys.DATA_LOAD_TIME, lines[i])
+            self.assertIn(LogKeys.ITER_TIME, lines[i])
+        for i in [2, 5, 8]:
+            self.assertIn(MetricKeys.ACCURACY, lines[i])
+
+
+@unittest.skipIf(not torch.cuda.is_available()
+                 or torch.cuda.device_count() <= 1, 'distributed unittest')
+class TrainerTestMultiGpus(DistributedTestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_multi_gpus(self):
+        self.start(train_func, num_gpus=2, work_dir=self.tmp_dir, dist=True)
+
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+
+        with open(json_files[0], 'r', encoding='utf-8') as f:
+            lines = [i.strip() for i in f.readlines()]
+
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 1,
+                LogKeys.ITER: 10,
+                LogKeys.LR: 0.01
+            }, json.loads(lines[0]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 1,
+                LogKeys.ITER: 10
+            }, json.loads(lines[1]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 2,
+                LogKeys.ITER: 10,
+                LogKeys.LR: 0.01
+            }, json.loads(lines[2]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 2,
+                LogKeys.ITER: 10
+            }, json.loads(lines[3]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.TRAIN,
+                LogKeys.EPOCH: 3,
+                LogKeys.ITER: 10,
+                LogKeys.LR: 0.001
+            }, json.loads(lines[4]))
+        self.assertDictContainsSubset(
+            {
+                LogKeys.MODE: ModeKeys.EVAL,
+                LogKeys.EPOCH: 3,
+                LogKeys.ITER: 10
+            }, json.loads(lines[5]))
+        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files)
+        for i in [0, 2, 4]:
+            self.assertIn(LogKeys.DATA_LOAD_TIME, lines[i])
+            self.assertIn(LogKeys.ITER_TIME, lines[i])
+        for i in [1, 3, 5]:
+            self.assertIn(MetricKeys.ACCURACY, lines[i])
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_multi_gpus_forward_inputs(self):
+        self.start(train_func,
+                   num_gpus=2,
+                   work_dir=self.tmp_dir,
+                   dist=True,
+                   forward_inputs=True)
+
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+        self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files)
+
+    # TODO: support iters_per_epoch for dist mode
+    @unittest.skipIf(True, 'need to adapt to DistributedSampler')
+    def test_multi_gpus_with_iters_per_epoch(self):
+        self.start(
+            train_func,
+            num_gpus=2,
+            work_dir=self.tmp_dir,
+            dist=True,
+            iterable_dataset=True,
+            train_iters_per_epoch=20,
+            val_iters_per_epoch=10,
+        )
+
+        results_files = os.listdir(self.tmp_dir)
+        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+
+        with open(json_files[0], 'r', encoding='utf-8') as f:
+            lines = [i.strip() for i in f.readlines()]
+
+        print(results_files, lines)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_trainer_with_nlp.py b/tests/trainers/test_trainer_with_nlp.py
new file mode 100644
index 0000000..cf9d206
--- /dev/null
+++ b/tests/trainers/test_trainer_with_nlp.py
@@ -0,0 +1,487 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+
+import numpy as np
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Metrics
+from modelscope.models.base import Model
+from modelscope.models.nlp import SbertForSequenceClassification
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.trainers import EpochBasedTrainer, build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.hub import read_config
+from modelscope.utils.test_utils import test_level
+
+
+class TestTrainerWithNlp(unittest.TestCase):
+    sentence1 = '今天气温比昨天高么？'
+    sentence2 = '今天湿度比昨天高么？'
+
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+        self.dataset = MsDataset.load('clue',
+                                      subset_name='afqmc',
+                                      split='train').to_hf_dataset().select(
+                                          range(2))
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer(self):
+        model_id = 'damo/nlp_structbert_sentence-similarity_chinese-tiny'
+        kwargs = dict(model=model_id,
+                      train_dataset=self.dataset,
+                      eval_dataset=self.dataset,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(10):
+            self.assertIn(f'epoch_{i + 1}.pth', results_files)
+
+        output_files = os.listdir(
+            os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR))
+        self.assertIn(ModelFile.CONFIGURATION, output_files)
+        self.assertIn(ModelFile.TORCH_MODEL_BIN_FILE, output_files)
+        copy_src_files = os.listdir(trainer.model_dir)
+
+        print(f'copy_src_files are {copy_src_files}')
+        print(f'output_files are {output_files}')
+        for item in copy_src_files:
+            if not item.startswith('.'):
+                self.assertIn(item, output_files)
+
+        def pipeline_sentence_similarity(model_dir):
+            model = Model.from_pretrained(model_dir)
+            pipeline_ins = pipeline(task=Tasks.sentence_similarity,
+                                    model=model)
+            print(pipeline_ins(input=(self.sentence1, self.sentence2)))
+
+        output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
+        pipeline_sentence_similarity(output_dir)
+
+    @unittest.skip
+    def test_trainer_with_backbone_head(self):
+        model_id = 'damo/nlp_structbert_sentiment-classification_chinese-base'
+        kwargs = dict(model=model_id,
+                      train_dataset=self.dataset,
+                      eval_dataset=self.dataset,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(10):
+            self.assertIn(f'epoch_{i + 1}.pth', results_files)
+
+        eval_results = trainer.evaluate(
+            checkpoint_path=os.path.join(self.tmp_dir, 'epoch_10.pth'))
+        self.assertTrue(Metrics.accuracy in eval_results)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_trainer_with_user_defined_config(self):
+        model_id = 'damo/nlp_structbert_sentiment-classification_chinese-base'
+        cfg = read_config(model_id)
+        cfg.train.max_epochs = 20
+        cfg.preprocessor.train['label2id'] = {'0': 0, '1': 1}
+        cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1}
+        cfg.train.work_dir = self.tmp_dir
+        cfg_file = os.path.join(self.tmp_dir, 'config.json')
+        cfg.dump(cfg_file)
+        kwargs = dict(model=model_id,
+                      train_dataset=self.dataset,
+                      eval_dataset=self.dataset,
+                      cfg_file=cfg_file)
+
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(20):
+            self.assertIn(f'epoch_{i + 1}.pth', results_files)
+
+        eval_results = trainer.evaluate(
+            checkpoint_path=os.path.join(self.tmp_dir, 'epoch_10.pth'))
+        self.assertTrue(Metrics.accuracy in eval_results)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_trainer_save_best_ckpt(self):
+        class MockTrainer(EpochBasedTrainer):
+            def evaluation_loop(self, data_loader, metric_classes):
+                return {'accuracy': 10 + (-1)**self.iter * 1 * self.iter}
+
+        from modelscope.utils.regress_test_utils import MsRegressTool
+        model_id = 'damo/nlp_structbert_sentence-similarity_chinese-base'
+        cfg: Config = read_config(model_id)
+        cfg.train.max_epochs = 10
+        cfg.preprocessor.first_sequence = 'sentence1'
+        cfg.preprocessor.second_sequence = 'sentence2'
+        cfg.preprocessor.label = 'label'
+        cfg.preprocessor.train['label2id'] = {'0': 0, '1': 1}
+        cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1}
+        cfg.train.dataloader.batch_size_per_gpu = 2
+        cfg.train.hooks = [{
+            'type': 'BestCkptSaverHook',
+            'interval': 1,
+            'by_epoch': False,
+            'metric_key': 'accuracy',
+            'max_checkpoint_num': 4,
+            'restore_best': True,
+        }, {
+            'type': 'TextLoggerHook',
+            'interval': 1
+        }, {
+            'type': 'IterTimerHook'
+        }, {
+            'type': 'EvaluationHook',
+            'by_epoch': False,
+            'interval': 1
+        }]
+        cfg.train.work_dir = self.tmp_dir
+        cfg_file = os.path.join(self.tmp_dir, 'config.json')
+        cfg.dump(cfg_file)
+        dataset = MsDataset.load('clue', subset_name='afqmc', split='train')
+        dataset = dataset.to_hf_dataset().select(range(4))
+        kwargs = dict(model=model_id,
+                      train_dataset=dataset,
+                      eval_dataset=dataset,
+                      cfg_file=cfg_file)
+
+        regress_tool = MsRegressTool(baseline=True)
+        trainer: MockTrainer = MockTrainer(**kwargs)
+
+        def lazy_stop_callback():
+            from modelscope.trainers.hooks.hook import Hook, Priority
+
+            class EarlyStopHook(Hook):
+                PRIORITY = Priority.VERY_LOW
+
+                def after_iter(self, trainer):
+                    if trainer.iter == 10:
+                        raise MsRegressTool.EarlyStopError('Test finished.')
+
+            if 'EarlyStopHook' not in [
+                    hook.__class__.__name__ for hook in trainer.hooks
+            ]:
+                trainer.register_hook(EarlyStopHook())
+
+        with regress_tool.monitor_ms_train(
+                trainer,
+                'trainer_continue_train',
+                level='strict',
+                lazy_stop_callback=lazy_stop_callback):
+            trainer.train()
+
+        results_files = os.listdir(self.tmp_dir)
+        print(results_files)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in [22, 24, 26, 28]:
+            self.assertTrue(
+                any([
+                    f'accuracy{i}.pth' in filename
+                    for filename in results_files
+                ]))
+        self.assertTrue(
+            os.path.isfile(
+                os.path.join(self.tmp_dir, 'output', 'pytorch_model.bin')))
+        self.assertTrue(
+            os.path.isfile(
+                os.path.join(self.tmp_dir, 'output_best',
+                             'pytorch_model.bin')))
+
+    @unittest.skip('skip for now before test is re-configured')
+    def test_trainer_with_configured_datasets(self):
+        model_id = 'damo/nlp_structbert_sentence-similarity_chinese-base'
+        cfg: Config = read_config(model_id)
+        cfg.train.max_epochs = 20
+        cfg.preprocessor.train['label2id'] = {'0': 0, '1': 1}
+        cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1}
+        cfg.train.work_dir = self.tmp_dir
+        cfg.dataset = {
+            'train': {
+                'name': 'clue',
+                'subset_name': 'afqmc',
+                'split': 'train',
+            },
+            'val': {
+                'name': 'clue',
+                'subset_name': 'afqmc',
+                'split': 'train',
+            },
+        }
+        cfg_file = os.path.join(self.tmp_dir, 'config.json')
+        cfg.dump(cfg_file)
+        kwargs = dict(model=model_id, cfg_file=cfg_file)
+
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(cfg.train.max_epochs):
+            self.assertIn(f'epoch_{i + 1}.pth', results_files)
+
+        eval_results = trainer.evaluate(
+            checkpoint_path=os.path.join(self.tmp_dir, 'epoch_10.pth'))
+        self.assertTrue(Metrics.accuracy in eval_results)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_with_continue_train(self):
+        from modelscope.utils.regress_test_utils import MsRegressTool
+        model_id = 'damo/nlp_structbert_sentence-similarity_chinese-base'
+        cfg: Config = read_config(model_id)
+        cfg.train.max_epochs = 3
+        cfg.preprocessor.first_sequence = 'sentence1'
+        cfg.preprocessor.second_sequence = 'sentence2'
+        cfg.preprocessor.label = 'label'
+        cfg.preprocessor.train['label2id'] = {'0': 0, '1': 1}
+        cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1}
+        cfg.train.dataloader.batch_size_per_gpu = 2
+        cfg.train.hooks = [{
+            'type': 'CheckpointHook',
+            'interval': 3,
+            'by_epoch': False,
+        }, {
+            'type': 'TextLoggerHook',
+            'interval': 1
+        }, {
+            'type': 'IterTimerHook'
+        }, {
+            'type': 'EvaluationHook',
+            'interval': 1
+        }]
+        cfg.train.work_dir = self.tmp_dir
+        cfg_file = os.path.join(self.tmp_dir, 'config.json')
+        cfg.dump(cfg_file)
+        dataset = MsDataset.load('clue', subset_name='afqmc', split='train')
+        dataset = dataset.to_hf_dataset().select(range(4))
+        kwargs = dict(model=model_id,
+                      train_dataset=dataset,
+                      eval_dataset=dataset,
+                      cfg_file=cfg_file)
+
+        regress_tool = MsRegressTool(baseline=True)
+        trainer: EpochBasedTrainer = build_trainer(default_args=kwargs)
+
+        def lazy_stop_callback():
+            from modelscope.trainers.hooks.hook import Hook, Priority
+
+            class EarlyStopHook(Hook):
+                PRIORITY = Priority.VERY_LOW
+
+                _should_save = False
+
+                def after_iter(self, trainer):
+                    if trainer.iter == 3:
+                        raise MsRegressTool.EarlyStopError('Test finished.')
+
+            if 'EarlyStopHook' not in [
+                    hook.__class__.__name__ for hook in trainer.hooks
+            ]:
+                trainer.register_hook(EarlyStopHook())
+
+        with regress_tool.monitor_ms_train(
+                trainer,
+                'trainer_continue_train',
+                level='strict',
+                lazy_stop_callback=lazy_stop_callback):
+            trainer.train()
+
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        trainer = build_trainer(default_args=kwargs)
+        regress_tool = MsRegressTool(baseline=False)
+        with regress_tool.monitor_ms_train(trainer,
+                                           'trainer_continue_train',
+                                           level='strict'):
+            trainer.train(os.path.join(self.tmp_dir, 'iter_3.pth'))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_with_new_style_configuration(self):
+        tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+
+        def cfg_modify_fn(cfg):
+            cfg.train['checkpoint'] = {
+                # 保存最优metric对应的checkpoint
+                'best': {
+                    # 是否按照epoch进行保存，false为按照iter
+                    'by_epoch': True,
+                    # 保存的间隔
+                    'interval': 2,
+                    # 保存checkpoint数量的最大值
+                    'max_checkpoint_num': 2,
+                    # 根据指定的指标判断当前checkpoint是否为历史最优
+                    'metric_key': 'f1',
+                }
+            }
+            return cfg
+
+        kwargs = dict(
+            model='damo/nlp_structbert_sentence-similarity_chinese-tiny',
+            train_dataset=self.dataset,
+            eval_dataset=self.dataset,
+            cfg_modify_fn=cfg_modify_fn,
+            work_dir=self.tmp_dir)
+
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_trainer_with_evaluation(self):
+        tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+
+        model_id = 'damo/nlp_structbert_sentence-similarity_chinese-tiny'
+        cache_path = snapshot_download(model_id)
+        model = SbertForSequenceClassification.from_pretrained(cache_path)
+
+        def cfg_modify_fn(cfg):
+            cfg.preprocessor.val.keep_original_columns = [
+                'sentence1', 'sentence2'
+            ]
+            return cfg
+
+        kwargs = dict(cfg_file=os.path.join(cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      eval_dataset=self.dataset,
+                      cfg_modify_fn=cfg_modify_fn,
+                      work_dir=self.tmp_dir,
+                      remove_unused_data=True)
+
+        trainer = build_trainer(default_args=kwargs)
+
+        def saving_fn(inputs, outputs):
+            with open(f'{tmp_dir}/predicts.txt', 'a') as f:
+                sentence1 = inputs.sentence1
+                sentence2 = inputs.sentence2
+                labels = inputs['labels']
+                predictions = np.argmax(outputs['logits'].cpu().numpy(),
+                                        axis=1)
+                labels = labels.cpu().numpy()
+                for sent1, sent2, pred, label in zip(sentence1, sentence2,
+                                                     predictions, labels):
+                    f.writelines(f'{sent1}, {sent2}, {pred}, {label}\n')
+
+        print(
+            trainer.evaluate(cache_path + '/pytorch_model.bin',
+                             saving_fn=saving_fn))
+        self.assertTrue(os.path.isfile(f'{tmp_dir}/predicts.txt'))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_with_prediction(self):
+        tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+
+        model_id = 'damo/nlp_structbert_sentence-similarity_chinese-tiny'
+        cache_path = snapshot_download(model_id)
+        model = SbertForSequenceClassification.from_pretrained(cache_path)
+
+        def cfg_modify_fn(cfg):
+            cfg.preprocessor.val.keep_original_columns = [
+                'sentence1', 'sentence2'
+            ]
+            return cfg
+
+        kwargs = dict(cfg_file=os.path.join(cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      eval_dataset=self.dataset,
+                      cfg_modify_fn=cfg_modify_fn,
+                      work_dir=self.tmp_dir,
+                      remove_unused_data=True)
+
+        trainer = build_trainer(default_args=kwargs)
+
+        def saving_fn(inputs, outputs):
+            with open(f'{tmp_dir}/predicts.txt', 'a') as f:
+                sentence1 = inputs.sentence1
+                sentence2 = inputs.sentence2
+                predictions = np.argmax(outputs['logits'].cpu().numpy(),
+                                        axis=1)
+                for sent1, sent2, pred in zip(sentence1, sentence2,
+                                              predictions):
+                    f.writelines(f'{sent1}, {sent2}, {pred}\n')
+
+        trainer.predict(predict_datasets=self.dataset,
+                        saving_fn=saving_fn,
+                        checkpoint_path=cache_path + '/pytorch_model.bin')
+        self.assertTrue(os.path.isfile(f'{tmp_dir}/predicts.txt'))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_with_prediction_msdataset(self):
+        tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+
+        model_id = 'damo/nlp_structbert_sentence-similarity_chinese-tiny'
+        cache_path = snapshot_download(model_id)
+        model = SbertForSequenceClassification.from_pretrained(cache_path)
+
+        kwargs = dict(cfg_file=os.path.join(cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      eval_dataset=self.dataset,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(default_args=kwargs)
+
+        def saving_fn(inputs, outputs):
+            with open(f'{tmp_dir}/predicts.txt', 'a') as f:
+                predictions = np.argmax(outputs['logits'].cpu().numpy(),
+                                        axis=1)
+                for pred in predictions:
+                    f.writelines(f'{pred}\n')
+
+        dataset = MsDataset.load('afqmc_small', split='train')
+
+        trainer.predict(predict_datasets=dataset,
+                        saving_fn=saving_fn,
+                        checkpoint_path=cache_path + '/pytorch_model.bin')
+        self.assertTrue(os.path.isfile(f'{tmp_dir}/predicts.txt'))
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_trainer_with_model_and_args(self):
+        tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+
+        model_id = 'damo/nlp_structbert_sentence-similarity_chinese-tiny'
+        cache_path = snapshot_download(model_id)
+        model = SbertForSequenceClassification.from_pretrained(cache_path)
+        kwargs = dict(cfg_file=os.path.join(cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      train_dataset=self.dataset,
+                      eval_dataset=self.dataset,
+                      max_epochs=2,
+                      work_dir=self.tmp_dir)
+
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(2):
+            self.assertIn(f'epoch_{i + 1}.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_training_args.py b/tests/trainers/test_training_args.py
new file mode 100644
index 0000000..cafb230
--- /dev/null
+++ b/tests/trainers/test_training_args.py
@@ -0,0 +1,47 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from modelscope.trainers.default_config import DEFAULT_CONFIG
+from modelscope.trainers.training_args import CliArgumentParser, TrainingArgs
+from modelscope.utils.test_utils import test_level
+
+
+class TrainingArgsTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+    def tearDown(self):
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_define_args(self):
+        myparser = CliArgumentParser(TrainingArgs())
+        input_args = [
+            '--max_epochs', '100', '--work_dir', 'ddddd',
+            '--per_device_train_batch_size', '8', '--unkown', 'unkown'
+        ]
+        args, remainning = myparser.parse_known_args(input_args)
+        myparser.print_help()
+        self.assertTrue(args.max_epochs == 100)
+        self.assertTrue(args.work_dir == 'ddddd')
+        self.assertTrue(args.per_device_train_batch_size == 8)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_flatten_args(self):
+        cfg = DEFAULT_CONFIG
+        input_args = [
+            '--optimizer_params',
+            'weight_decay=0.8,eps=1e-6,correct_bias=False',
+            '--lr_scheduler_params', 'initial_lr=3e-5,niter_decay=1'
+        ]
+        training_args = TrainingArgs.from_cli(input_args)
+        cfg = training_args(cfg)
+        self.assertAlmostEqual(cfg.train.optimizer.weight_decay, 0.8)
+        self.assertAlmostEqual(cfg.train.optimizer.eps, 1e-6)
+        self.assertFalse(cfg.train.optimizer.correct_bias)
+        self.assertAlmostEqual(cfg.train.lr_scheduler.initial_lr, 3e-5)
+        self.assertEqual(cfg.train.lr_scheduler.niter_decay, 1)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_translation_trainer.py b/tests/trainers/test_translation_trainer.py
new file mode 100644
index 0000000..52800bd
--- /dev/null
+++ b/tests/trainers/test_translation_trainer.py
@@ -0,0 +1,29 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from modelscope.trainers.nlp import CsanmtTranslationTrainer
+from modelscope.utils.test_utils import test_level
+
+
+class TranslationTest(unittest.TestCase):
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_name_for_en2zh(self):
+        model_id = 'damo/nlp_csanmt_translation_en2zh'
+        trainer = CsanmtTranslationTrainer(model=model_id)
+        trainer.train()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_name_for_en2fr(self):
+        model_id = 'damo/nlp_csanmt_translation_en2fr'
+        trainer = CsanmtTranslationTrainer(model=model_id)
+        trainer.train()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_name_for_en2es(self):
+        model_id = 'damo/nlp_csanmt_translation_en2es'
+        trainer = CsanmtTranslationTrainer(model=model_id)
+        trainer.train()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_video_summarization_trainer.py b/tests/trainers/test_video_summarization_trainer.py
new file mode 100644
index 0000000..f48215c
--- /dev/null
+++ b/tests/trainers/test_video_summarization_trainer.py
@@ -0,0 +1,73 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.models.cv.video_summarization import PGLVideoSummarization
+from modelscope.msdatasets.task_datasets import VideoSummarizationDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+logger = get_logger()
+
+
+class VideoSummarizationTrainerTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+        self.model_id = 'damo/cv_googlenet_pgl-video-summarization'
+        self.cache_path = snapshot_download(self.model_id)
+        self.config = Config.from_file(
+            os.path.join(self.cache_path, ModelFile.CONFIGURATION))
+        self.dataset_train = VideoSummarizationDataset('train',
+                                                       self.config.dataset,
+                                                       self.cache_path)
+        self.dataset_val = VideoSummarizationDataset('test',
+                                                     self.config.dataset,
+                                                     self.cache_path)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer(self):
+        kwargs = dict(model=self.model_id,
+                      train_dataset=self.dataset_train,
+                      eval_dataset=self.dataset_val,
+                      work_dir=self.tmp_dir)
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(2):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_trainer_with_model_and_args(self):
+        model = PGLVideoSummarization.from_pretrained(self.cache_path)
+        kwargs = dict(cfg_file=os.path.join(self.cache_path,
+                                            ModelFile.CONFIGURATION),
+                      model=model,
+                      train_dataset=self.dataset_train,
+                      eval_dataset=self.dataset_val,
+                      max_epochs=2,
+                      work_dir=self.tmp_dir)
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
+        for i in range(2):
+            self.assertIn(f'epoch_{i+1}.pth', results_files)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/utils/__init__.py b/tests/trainers/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/trainers/utils/test_inference.py b/tests/trainers/utils/test_inference.py
new file mode 100644
index 0000000..7744b32
--- /dev/null
+++ b/tests/trainers/utils/test_inference.py
@@ -0,0 +1,121 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+from torch import nn
+from torch.utils.data import DataLoader
+
+from modelscope.metrics.builder import MetricKeys
+from modelscope.metrics.sequence_classification_metric import \
+    SequenceClassificationMetric
+from modelscope.models.base import Model
+from modelscope.trainers import EpochBasedTrainer
+from modelscope.trainers.utils.inference import multi_gpu_test, single_gpu_test
+from modelscope.utils.test_utils import (DistributedTestCase,
+                                         create_dummy_test_dataset, test_level)
+from modelscope.utils.torch_utils import get_dist_info, init_dist
+
+dummy_dataset = create_dummy_test_dataset(torch.rand((5, )),
+                                          torch.randint(0, 4, (1, )), 20)
+
+
+class DummyModel(nn.Module, Model):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(5, 4)
+        self.bn = nn.BatchNorm1d(4)
+
+    def forward(self, feat, labels):
+        x = self.linear(feat)
+
+        x = self.bn(x)
+        loss = torch.sum(x)
+        return dict(logits=x, loss=loss)
+
+
+class DummyTrainer(EpochBasedTrainer):
+    def __init__(self, model):
+        self.model = model
+
+
+def test_func(dist=False):
+    dummy_model = DummyModel()
+    dataset = dummy_dataset.to_torch_dataset()
+
+    dummy_loader = DataLoader(
+        dataset,
+        batch_size=2,
+    )
+
+    metric_class = SequenceClassificationMetric()
+
+    if dist:
+        init_dist(launcher='pytorch')
+
+    rank, world_size = get_dist_info()
+    device = torch.device(f'cuda:{rank}')
+    dummy_model.cuda()
+
+    if world_size > 1:
+        from torch.nn.parallel.distributed import DistributedDataParallel
+        dummy_model = DistributedDataParallel(
+            dummy_model, device_ids=[torch.cuda.current_device()])
+        test_func = multi_gpu_test
+    else:
+        test_func = single_gpu_test
+
+    dummy_trainer = DummyTrainer(dummy_model)
+
+    metric_results = test_func(dummy_trainer,
+                               dummy_loader,
+                               device=device,
+                               metric_classes=[metric_class])
+
+    return metric_results
+
+
+@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
+class SingleGpuTestTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_single_gpu_test(self):
+        metric_results = test_func()
+        self.assertIn(MetricKeys.ACCURACY, metric_results)
+
+
+@unittest.skipIf(not torch.cuda.is_available()
+                 or torch.cuda.device_count() <= 1, 'distributed unittest')
+class MultiGpuTestTest(DistributedTestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_multi_gpu_test(self):
+        self.start(
+            test_func,
+            num_gpus=2,
+            assert_callback=lambda x: self.assertIn(MetricKeys.ACCURACY, x),
+            dist=True)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py
new file mode 100644
index 0000000..f1a5003
--- /dev/null
+++ b/tests/utils/__init__.py
@@ -0,0 +1,3 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from .profiler import *  # noqa F403
diff --git a/tests/utils/case_file_analyzer.py b/tests/utils/case_file_analyzer.py
new file mode 100644
index 0000000..a5100e7
--- /dev/null
+++ b/tests/utils/case_file_analyzer.py
@@ -0,0 +1,414 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from __future__ import print_function
+
+import ast
+import os
+from typing import Any
+
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+SYSTEM_TRAINER_BUILDER_FINCTION_NAME = 'build_trainer'
+SYSTEM_TRAINER_BUILDER_PARAMETER_NAME = 'name'
+SYSTEM_PIPELINE_BUILDER_FUNCTION_NAME = 'pipeline'
+SYSTEM_PIPELINE_BUILDER_PARAMETER_NAME = 'task'
+
+
+class AnalysisTestFile(ast.NodeVisitor):
+    """Analysis test suite files.
+       Get global function and test class
+
+    Args:
+        ast (NodeVisitor): The ast node.
+    Examples:
+        >>> with open(test_suite_file, "rb") as f:
+        >>>     src = f.read()
+        >>> analyzer = AnalysisTestFile(test_suite_file)
+        >>> analyzer.visit(ast.parse(src, filename=test_suite_file))
+    """
+    def __init__(self, test_suite_file, builder_function_name) -> None:
+        super().__init__()
+        self.test_classes = []
+        self.builder_function_name = builder_function_name
+        self.global_functions = []
+        self.custom_global_builders = [
+        ]  # global trainer builder method(call build_trainer)
+        self.custom_global_builder_calls = []  # the builder call statement
+
+    def visit_ClassDef(self, node) -> bool:
+        """Check if the class is a unittest suite.
+
+        Args:
+            node (ast.Node): the ast node
+
+        Returns: True if is a test class.
+        """
+        for base in node.bases:
+            if isinstance(base, ast.Attribute) and base.attr == 'TestCase':
+                self.test_classes.append(node)
+            elif isinstance(base, ast.Name) and 'TestCase' in base.id:
+                self.test_classes.append(node)
+
+    def visit_FunctionDef(self, node: ast.FunctionDef):
+        self.global_functions.append(node)
+        for statement in ast.walk(node):
+            if isinstance(statement, ast.Call) and \
+               isinstance(statement.func, ast.Name):
+                if statement.func.id == self.builder_function_name:
+                    self.custom_global_builders.append(node)
+                    self.custom_global_builder_calls.append(statement)
+
+
+class AnalysisTestClass(ast.NodeVisitor):
+    def __init__(self, test_class_node, builder_function_name) -> None:
+        super().__init__()
+        self.test_class_node = test_class_node
+        self.builder_function_name = builder_function_name
+        self.setup_variables = {}
+        self.test_methods = []
+        self.custom_class_method_builders = [
+        ]  # class method trainer builder(call build_trainer)
+        self.custom_class_method_builder_calls = [
+        ]  # the builder call statement
+
+    def visit_FunctionDef(self, node: ast.FunctionDef) -> Any:
+        if node.name.startswith('setUp'):
+            for statement in node.body:
+                if isinstance(statement, ast.Assign):
+                    if len(statement.targets) == 1 and \
+                       isinstance(statement.targets[0], ast.Attribute) and \
+                       isinstance(statement.value, ast.Attribute):
+                        self.setup_variables[str(
+                            statement.targets[0].attr)] = str(
+                                statement.value.attr)
+        elif node.name.startswith('test_'):
+            self.test_methods.append(node)
+        else:
+            for statement in ast.walk(node):
+                if isinstance(statement, ast.Call) and \
+                   isinstance(statement.func, ast.Name):
+                    if statement.func.id == self.builder_function_name:
+                        self.custom_class_method_builders.append(node)
+                        self.custom_class_method_builder_calls.append(
+                            statement)
+
+
+def get_local_arg_value(target_method, args_name):
+    for statement in target_method.body:
+        if isinstance(statement, ast.Assign):
+            for target in statement.targets:
+                if isinstance(target, ast.Name) and target.id == args_name:
+                    if isinstance(statement.value, ast.Attribute):
+                        return statement.value.attr
+                    elif isinstance(statement.value, ast.Str):
+                        return statement.value.s
+    return None
+
+
+def get_custom_builder_parameter_name(args, keywords, builder, builder_call,
+                                      builder_arg_name):
+    # get build_trainer call name argument name.
+    arg_name = None
+    if len(builder_call.args) > 0:
+        if isinstance(builder_call.args[0], ast.Name):
+            # build_trainer name is a variable
+            arg_name = builder_call.args[0].id
+        elif isinstance(builder_call.args[0], ast.Attribute):
+            # Attribute access, such as Trainers.image_classification_team
+            return builder_call.args[0].attr
+        else:
+            raise Exception('Invalid argument name')
+    else:
+        use_default_name = True
+        for kw in builder_call.keywords:
+            if kw.arg == builder_arg_name:
+                use_default_name = False
+                if isinstance(kw.value, ast.Attribute):
+                    return kw.value.attr
+                elif isinstance(kw.value,
+                                ast.Name) and kw.arg == builder_arg_name:
+                    arg_name = kw.value.id
+                else:
+                    raise Exception('Invalid keyword argument')
+        if use_default_name:
+            return 'default'
+
+    if arg_name is None:
+        raise Exception('Invalid build_trainer call')
+
+    arg_value = get_local_arg_value(builder, arg_name)
+    if arg_value is not None:  # trainer_name is a local variable
+        return arg_value
+    # get build_trainer name parameter, if it's passed
+    default_name = None
+    arg_idx = 100000
+    for idx, arg in enumerate(builder.args.args):
+        if arg.arg == arg_name:
+            arg_idx = idx
+            if idx >= len(builder.args.args) - len(builder.args.defaults):
+                default_name = builder.args.defaults[idx - (
+                    len(builder.args.args) - len(builder.args.defaults))].attr
+                break
+    if len(builder.args.args
+           ) > 0 and builder.args.args[0].arg == 'self':  # class method
+        if len(args) > arg_idx - 1:  # - self
+            if isinstance(args[arg_idx - 1], ast.Attribute):
+                return args[arg_idx - 1].attr
+
+    for keyword in keywords:
+        if keyword.arg == arg_name:
+            if isinstance(keyword.value, ast.Attribute):
+                return keyword.value.attr
+
+    return default_name
+
+
+def get_system_builder_parameter_value(builder_call, test_method,
+                                       setup_attributes,
+                                       builder_parameter_name):
+    if len(builder_call.args) > 0:
+        if isinstance(builder_call.args[0], ast.Name):
+            return get_local_arg_value(test_method, builder_call.args[0].id)
+        elif isinstance(builder_call.args[0], ast.Attribute):
+            if builder_call.args[0].attr in setup_attributes:
+                return setup_attributes[builder_call.args[0].attr]
+            return builder_call.args[0].attr
+        elif isinstance(builder_call.args[0], ast.Str):  # TODO check py38
+            return builder_call.args[0].s
+
+    for kw in builder_call.keywords:
+        if kw.arg == builder_parameter_name:
+            if isinstance(kw.value, ast.Attribute):
+                if kw.value.attr in setup_attributes:
+                    return setup_attributes[kw.value.attr]
+                else:
+                    return kw.value.attr
+            elif isinstance(kw.value,
+                            ast.Name) and kw.arg == builder_parameter_name:
+                return kw.value.id
+
+    return 'default'  # use build_trainer default argument.
+
+
+def get_builder_parameter_value(test_method, setup_variables, builder,
+                                builder_call, system_builder_func_name,
+                                builder_parameter_name):
+    """
+    get target builder parameter name, for tariner we get trainer name, for pipeline we get pipeline task
+    """
+    for node in ast.walk(test_method):
+        if builder is None:  # direct call build_trainer
+            for node in ast.walk(test_method):
+                if (isinstance(node, ast.Call)
+                        and isinstance(node.func, ast.Name)
+                        and node.func.id == system_builder_func_name):
+                    return get_system_builder_parameter_value(
+                        node, test_method, setup_variables,
+                        builder_parameter_name)
+        elif (isinstance(node, ast.Call)
+              and isinstance(node.func, ast.Attribute)
+              and node.func.attr == builder.name):
+            return get_custom_builder_parameter_name(node.args, node.keywords,
+                                                     builder, builder_call,
+                                                     builder_parameter_name)
+        elif (isinstance(node, ast.Expr) and isinstance(node.value, ast.Call)
+              and isinstance(node.value.func, ast.Name)
+              and node.value.func.id == builder.name):
+            return get_custom_builder_parameter_name(node.value.args,
+                                                     node.value.keywords,
+                                                     builder, builder_call,
+                                                     builder_parameter_name)
+        elif (isinstance(node, ast.Expr) and isinstance(node.value, ast.Call)
+              and isinstance(node.value.func, ast.Attribute)
+              and node.value.func.attr == builder.name):
+            # self.class_method_builder
+            return get_custom_builder_parameter_name(node.value.args,
+                                                     node.value.keywords,
+                                                     builder, builder_call,
+                                                     builder_parameter_name)
+        elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Call):
+            for arg in node.value.args:
+                if isinstance(arg, ast.Name) and arg.id == builder.name:
+                    # self.start(train_func, num_gpus=2, **kwargs)
+                    return get_custom_builder_parameter_name(
+                        None, None, builder, builder_call,
+                        builder_parameter_name)
+
+    return None
+
+
+def get_class_constructor(test_method, modified_register_modules, module_name):
+    # module_name 'TRAINERS' | 'PIPELINES'
+    for node in ast.walk(test_method):
+        if isinstance(node, ast.Assign) and isinstance(node.value, ast.Call):
+            # trainer = CsanmtTranslationTrainer(model=model_id)
+            for modified_register_module in modified_register_modules:
+                if isinstance(node.value.func, ast.Name) and \
+                   node.value.func.id == modified_register_module[3] and \
+                   modified_register_module[0] == module_name:
+                    if module_name == 'TRAINERS':
+                        return modified_register_module[2]
+                    elif module_name == 'PIPELINES':
+                        return modified_register_module[1]  # pipeline
+
+    return None
+
+
+def analysis_trainer_test_suite(test_file, modified_register_modules):
+    tested_trainers = []
+    with open(test_file, 'rb') as tsf:
+        src = tsf.read()
+    # get test file global function and test class
+    test_suite_root = ast.parse(src, test_file)
+    test_suite_analyzer = AnalysisTestFile(
+        test_file, SYSTEM_TRAINER_BUILDER_FINCTION_NAME)
+    test_suite_analyzer.visit(test_suite_root)
+
+    for test_class in test_suite_analyzer.test_classes:
+        test_class_analyzer = AnalysisTestClass(
+            test_class, SYSTEM_TRAINER_BUILDER_FINCTION_NAME)
+        test_class_analyzer.visit(test_class)
+        for test_method in test_class_analyzer.test_methods:
+            for idx, custom_global_builder in enumerate(
+                    test_suite_analyzer.custom_global_builders
+            ):  # custom test method is global method
+                trainer_name = get_builder_parameter_value(
+                    test_method, test_class_analyzer.setup_variables,
+                    custom_global_builder,
+                    test_suite_analyzer.custom_global_builder_calls[idx],
+                    SYSTEM_TRAINER_BUILDER_FINCTION_NAME,
+                    SYSTEM_TRAINER_BUILDER_PARAMETER_NAME)
+                if trainer_name is not None:
+                    tested_trainers.append(trainer_name)
+            for idx, custom_class_method_builder in enumerate(
+                    test_class_analyzer.custom_class_method_builders
+            ):  # custom class method builder.
+                trainer_name = get_builder_parameter_value(
+                    test_method, test_class_analyzer.setup_variables,
+                    custom_class_method_builder,
+                    test_class_analyzer.custom_class_method_builder_calls[idx],
+                    SYSTEM_TRAINER_BUILDER_FINCTION_NAME,
+                    SYSTEM_TRAINER_BUILDER_PARAMETER_NAME)
+                if trainer_name is not None:
+                    tested_trainers.append(trainer_name)
+
+            trainer_name = get_builder_parameter_value(
+                test_method, test_class_analyzer.setup_variables, None, None,
+                SYSTEM_TRAINER_BUILDER_FINCTION_NAME,
+                SYSTEM_TRAINER_BUILDER_PARAMETER_NAME
+            )  # direct call the build_trainer
+            if trainer_name is not None:
+                tested_trainers.append(trainer_name)
+
+            if len(tested_trainers
+                   ) == 0:  # suppose no builder call is direct construct.
+                trainer_name = get_class_constructor(
+                    test_method, modified_register_modules, 'TRAINERS')
+                if trainer_name is not None:
+                    tested_trainers.append(trainer_name)
+
+    return tested_trainers
+
+
+def analysis_pipeline_test_suite(test_file, modified_register_modules):
+    tested_tasks = []
+    with open(test_file, 'rb') as tsf:
+        src = tsf.read()
+    # get test file global function and test class
+    test_suite_root = ast.parse(src, test_file)
+    test_suite_analyzer = AnalysisTestFile(
+        test_file, SYSTEM_PIPELINE_BUILDER_FUNCTION_NAME)
+    test_suite_analyzer.visit(test_suite_root)
+
+    for test_class in test_suite_analyzer.test_classes:
+        test_class_analyzer = AnalysisTestClass(
+            test_class, SYSTEM_PIPELINE_BUILDER_FUNCTION_NAME)
+        test_class_analyzer.visit(test_class)
+        for test_method in test_class_analyzer.test_methods:
+            for idx, custom_global_builder in enumerate(
+                    test_suite_analyzer.custom_global_builders
+            ):  # custom test method is global method
+                task_name = get_builder_parameter_value(
+                    test_method, test_class_analyzer.setup_variables,
+                    custom_global_builder,
+                    test_suite_analyzer.custom_global_builder_calls[idx],
+                    SYSTEM_PIPELINE_BUILDER_FUNCTION_NAME,
+                    SYSTEM_PIPELINE_BUILDER_PARAMETER_NAME)
+                if task_name is not None:
+                    tested_tasks.append(task_name)
+            for idx, custom_class_method_builder in enumerate(
+                    test_class_analyzer.custom_class_method_builders
+            ):  # custom class method builder.
+                task_name = get_builder_parameter_value(
+                    test_method, test_class_analyzer.setup_variables,
+                    custom_class_method_builder,
+                    test_class_analyzer.custom_class_method_builder_calls[idx],
+                    SYSTEM_PIPELINE_BUILDER_FUNCTION_NAME,
+                    SYSTEM_PIPELINE_BUILDER_PARAMETER_NAME)
+                if task_name is not None:
+                    tested_tasks.append(task_name)
+
+            task_name = get_builder_parameter_value(
+                test_method, test_class_analyzer.setup_variables, None, None,
+                SYSTEM_PIPELINE_BUILDER_FUNCTION_NAME,
+                SYSTEM_PIPELINE_BUILDER_PARAMETER_NAME
+            )  # direct call the build_trainer
+            if task_name is not None:
+                tested_tasks.append(task_name)
+
+            if len(tested_tasks
+                   ) == 0:  # suppose no builder call is direct construct.
+                task_name = get_class_constructor(test_method,
+                                                  modified_register_modules,
+                                                  'PIPELINES')
+                if task_name is not None:
+                    tested_tasks.append(task_name)
+
+    return tested_tasks
+
+
+def get_pipelines_trainers_test_info(register_modules):
+    all_trainer_cases = [
+        os.path.join(dp, f) for dp, dn, filenames in os.walk(
+            os.path.join(os.getcwd(), 'tests', 'trainers')) for f in filenames
+        if os.path.splitext(f)[1] == '.py'
+    ]
+    trainer_test_info = {}
+    for test_file in all_trainer_cases:
+        tested_trainers = analysis_trainer_test_suite(test_file,
+                                                      register_modules)
+        if len(tested_trainers) == 0:
+            logger.warn('test_suite: %s has no trainer name' % test_file)
+        else:
+            tested_trainers = list(set(tested_trainers))
+            for trainer_name in tested_trainers:
+                if trainer_name not in trainer_test_info:
+                    trainer_test_info[trainer_name] = []
+                trainer_test_info[trainer_name].append(test_file)
+
+    pipeline_test_info = {}
+    all_pipeline_cases = [
+        os.path.join(dp, f) for dp, dn, filenames in os.walk(
+            os.path.join(os.getcwd(), 'tests', 'pipelines')) for f in filenames
+        if os.path.splitext(f)[1] == '.py'
+    ]
+    for test_file in all_pipeline_cases:
+        tested_pipelines = analysis_pipeline_test_suite(
+            test_file, register_modules)
+        if len(tested_pipelines) == 0:
+            logger.warn('test_suite: %s has no pipeline task' % test_file)
+        else:
+            tested_pipelines = list(set(tested_pipelines))
+            for pipeline_task in tested_pipelines:
+                if pipeline_task not in pipeline_test_info:
+                    pipeline_test_info[pipeline_task] = []
+                pipeline_test_info[pipeline_task].append(test_file)
+    return pipeline_test_info, trainer_test_info
+
+
+if __name__ == '__main__':
+    test_file = 'tests/pipelines/test_action_detection.py'
+    tasks = analysis_pipeline_test_suite(test_file, None)
+
+    print(tasks)
diff --git a/tests/utils/plugins/.modelscope_plugins b/tests/utils/plugins/.modelscope_plugins
new file mode 100644
index 0000000..421376d
--- /dev/null
+++ b/tests/utils/plugins/.modelscope_plugins
@@ -0,0 +1 @@
+dummy
diff --git a/tests/utils/plugins/dummy/__init__.py b/tests/utils/plugins/dummy/__init__.py
new file mode 100644
index 0000000..a0d8600
--- /dev/null
+++ b/tests/utils/plugins/dummy/__init__.py
@@ -0,0 +1 @@
+import dummy.dummy_model
diff --git a/tests/utils/plugins/dummy/dummy_model.py b/tests/utils/plugins/dummy/dummy_model.py
new file mode 100644
index 0000000..8a89c12
--- /dev/null
+++ b/tests/utils/plugins/dummy/dummy_model.py
@@ -0,0 +1,8 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from modelscope.models.base import Model
+from modelscope.models.builder import MODELS
+
+
+@MODELS.register_module(group_key='dummy-group', module_name='dummy-model')
+class DummyModel(Model):
+    pass
diff --git a/tests/utils/profiler.py b/tests/utils/profiler.py
new file mode 100644
index 0000000..e843e1b
--- /dev/null
+++ b/tests/utils/profiler.py
@@ -0,0 +1,58 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import importlib
+import sys
+from functools import wraps
+from typing import Any, Callable, Dict, Tuple, Type
+
+
+def reraise(tp, value, tb):
+    try:
+        if value is None:
+            value = tp()
+        if value.__traceback__ is not tb:
+            raise value.with_traceback(tb)
+        raise value
+    finally:
+        value = None
+        tb = None
+
+
+class Profiler:
+    def __init__(self) -> None:
+        import cProfile
+        self.pr = cProfile.Profile()
+
+    def __enter__(self):
+        self.pr.enable()
+
+    def __exit__(self, tp, exc, tb):
+        self.pr.disable()
+        if tp is not None:
+            reraise(tp, exc, tb)
+
+        import pstats
+        ps = pstats.Stats(self.pr, stream=sys.stderr).sort_stats('tottime')
+        ps.print_stats(20)
+
+
+def wrapper(tp: Type[Profiler]) -> Callable[[], Callable[..., Any]]:
+    def _inner(func: Callable[..., Any]) -> Callable[..., Any]:
+        @wraps(func)
+        def executor(*args: Tuple[Any, ...], **kwargs: Dict[str, Any]) -> Any:
+            with tp():
+                return func(*args, **kwargs)
+
+        return executor
+
+    return _inner
+
+
+PIPELINE_BASE_MODULE = 'modelscope.pipelines.base'
+PIPELINE_BASE_CLASS = 'Pipeline'
+
+
+def enable():
+    base = importlib.import_module(PIPELINE_BASE_MODULE)
+    Pipeline = getattr(base, PIPELINE_BASE_CLASS)
+    Pipeline.__call__ = wrapper(Profiler)(Pipeline.__call__)
diff --git a/tests/utils/source_file_analyzer.py b/tests/utils/source_file_analyzer.py
new file mode 100644
index 0000000..78c6fe5
--- /dev/null
+++ b/tests/utils/source_file_analyzer.py
@@ -0,0 +1,292 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from __future__ import print_function
+
+import ast
+import importlib.util
+import os
+import pkgutil
+import site
+import sys
+
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+def is_relative_import(path):
+    # from .x import y or from ..x import y
+    return path.startswith('.')
+
+
+def resolve_import(module_name):
+    try:
+        spec = importlib.util.find_spec(module_name)
+        return spec and spec.origin
+    except Exception:
+        return None
+
+
+def convert_to_path(name):
+    if name.startswith('.'):
+        remainder = name.lstrip('.')
+        dot_count = (len(name) - len(remainder))
+        prefix = '../' * (dot_count - 1)
+    else:
+        remainder = name
+        dot_count = 0
+        prefix = ''
+    filename = prefix + os.path.join(*remainder.split('.'))
+    return filename
+
+
+def resolve_relative_import(source_file_path, module_name):
+    current_package = os.path.dirname(source_file_path).replace('/', '.')
+    absolute_name = importlib.util.resolve_name(module_name,
+                                                current_package)  # get
+    return resolve_absolute_import(absolute_name)
+
+
+def onerror(name):
+    logger.error('Importing module %s error!' % name)
+
+
+def resolve_absolute_import(module_name):
+    module_file_path = resolve_import(module_name)
+    if module_file_path is None:
+        # find from base module.
+        parent_module, sub_module = module_name.rsplit('.', 1)
+        if parent_module in sys.modules:
+            if hasattr(sys.modules[parent_module], '_import_structure'):
+                import_structure = sys.modules[parent_module]._import_structure
+                for k, v in import_structure.items():
+                    if sub_module in v:
+                        parent_module = parent_module + '.' + k
+                        break
+            module_file_path = resolve_absolute_import(parent_module)
+            # the parent_module is a package, we need find the module_name's file
+            if os.path.basename(module_file_path) == '__init__.py' and \
+                (os.path.relpath(module_file_path, site.getsitepackages()[0]) != 'modelscope/__init__.py'
+                 or os.path.relpath(module_file_path, os.getcwd()) != 'modelscope/__init__.py'):
+                for _, sub_module_name, _ in pkgutil.walk_packages(
+                    [os.path.dirname(module_file_path)],
+                        parent_module + '.',
+                        onerror=onerror):
+                    try:
+                        module_ = importlib.import_module(sub_module_name)
+                        for k, v in module_.__dict__.items():
+                            if k == sub_module and v.__module__ == module_.__name__:
+                                module_file_path = module_.__file__
+                                break
+                    except ModuleNotFoundError as e:
+                        logger.warn(
+                            'Import error in %s, ModuleNotFoundError: %s' %
+                            (sub_module_name, e))
+                        continue
+                    except Exception as e:
+                        logger.warn('Import error in %s, Exception: %s' %
+                                    (sub_module_name, e))
+                        continue
+            else:
+                return module_file_path
+        else:
+            module_file_path = resolve_absolute_import(parent_module)
+    return module_file_path
+
+
+class AnalysisSourceFileImports(ast.NodeVisitor):
+    """Analysis source file imports
+        List imports of the modelscope.
+    """
+    def __init__(self, source_file_path) -> None:
+        super().__init__()
+        self.imports = []
+        self.source_file_path = source_file_path
+
+    def visit_Import(self, node):
+        """Processing import x,y,z or import os.path as osp"""
+        for alias in node.names:
+            if alias.name.startswith('modelscope'):
+                file_path = resolve_absolute_import(alias.name)
+                if file_path.startswith(site.getsitepackages()[0]):
+                    self.imports.append(
+                        os.path.relpath(file_path,
+                                        site.getsitepackages()[0]))
+                else:
+                    self.imports.append(os.path.relpath(
+                        file_path, os.getcwd()))
+
+    def visit_ImportFrom(self, node):
+        # level 0 absolute import such as from os.path import join
+        # level 1 from .x import y
+        # level 2 from ..x import y
+        module_name = '.' * node.level + (node.module or '')
+        for alias in node.names:
+            if alias.name == '*':  # from x import *
+                if is_relative_import(module_name):
+                    # resolve model path.
+                    file_path = resolve_relative_import(
+                        self.source_file_path, module_name)
+                elif module_name.startswith('modelscope'):
+                    file_path = resolve_absolute_import(module_name)
+                else:
+                    file_path = None  # ignore other package.
+            else:
+                if not module_name.endswith('.'):
+                    module_name = module_name + '.'
+                name = module_name + alias.name
+                if is_relative_import(name):
+                    # resolve model path.
+                    file_path = resolve_relative_import(
+                        self.source_file_path, name)
+                elif name.startswith('modelscope'):
+                    file_path = resolve_absolute_import(name)
+                else:
+                    file_path = None  # ignore other package.
+
+            if file_path is not None:
+                if file_path.startswith(site.getsitepackages()[0]):
+                    self.imports.append(
+                        os.path.relpath(file_path,
+                                        site.getsitepackages()[0]))
+                else:
+                    self.imports.append(os.path.relpath(
+                        file_path, os.getcwd()))
+
+
+class AnalysisSourceFileRegisterModules(ast.NodeVisitor):
+    """Get register_module call of the python source file.
+
+
+    Args:
+        ast (NodeVisitor): The ast node.
+
+    Examples:
+        >>> with open(source_file_path, "rb") as f:
+        >>>     src = f.read()
+        >>> analyzer = AnalysisSourceFileRegisterModules(source_file_path)
+        >>> analyzer.visit(ast.parse(src, filename=source_file_path))
+    """
+    def __init__(self, source_file_path) -> None:
+        super().__init__()
+        self.source_file_path = source_file_path
+        self.register_modules = []
+
+    def visit_ClassDef(self, node: ast.ClassDef):
+        if len(node.decorator_list) > 0:
+            for dec in node.decorator_list:
+                if isinstance(dec, ast.Call):
+                    target_name = ''
+                    module_name_param = ''
+                    task_param = ''
+                    if isinstance(dec.func, ast.Attribute
+                                  ) and dec.func.attr == 'register_module':
+                        target_name = dec.func.value.id  # MODELS
+                        if len(dec.args) > 0:
+                            if isinstance(dec.args[0], ast.Attribute):
+                                task_param = dec.args[0].attr
+                            elif isinstance(dec.args[0], ast.Constant):
+                                task_param = dec.args[0].value
+                        if len(dec.keywords) > 0:
+                            for kw in dec.keywords:
+                                if kw.arg == 'module_name':
+                                    if isinstance(kw.value, ast.Str):
+                                        module_name_param = kw.value.s
+                                    else:
+                                        module_name_param = kw.value.attr
+                                elif kw.arg == 'group_key':
+                                    if isinstance(kw.value, ast.Str):
+                                        task_param = kw.value.s
+                                    elif isinstance(kw.value, ast.Name):
+                                        task_param = kw.value.id
+                                    else:
+                                        task_param = kw.value.attr
+                        if task_param == '' and module_name_param == '':
+                            logger.warn(
+                                'File %s %s.register_module has no parameters'
+                                % (self.source_file_path, target_name))
+                            continue
+                        if target_name == 'PIPELINES' and task_param == '':
+                            logger.warn(
+                                'File %s %s.register_module has no task_param'
+                                % (self.source_file_path, target_name))
+                        self.register_modules.append(
+                            (target_name, task_param, module_name_param,
+                             node.name))  # PIPELINES, task, module, class_name
+
+
+def get_imported_files(file_path):
+    """Get file dependencies.
+    """
+    if os.path.isabs(file_path):
+        file_path = os.path.relpath(file_path, os.getcwd())
+    with open(file_path, 'rb') as f:
+        src = f.read()
+    analyzer = AnalysisSourceFileImports(file_path)
+    analyzer.visit(ast.parse(src, filename=file_path))
+    return list(set(analyzer.imports))
+
+
+def path_to_module_name(file_path):
+    if os.path.isabs(file_path):
+        file_path = os.path.relpath(file_path, os.getcwd())
+    module_name = os.path.dirname(file_path).replace('/', '.')
+    return module_name
+
+
+def get_file_register_modules(file_path):
+    logger.info('Get file: %s register_module' % file_path)
+    with open(file_path, 'rb') as f:
+        src = f.read()
+    analyzer = AnalysisSourceFileRegisterModules(file_path)
+    analyzer.visit(ast.parse(src, filename=file_path))
+    return analyzer.register_modules
+
+
+def get_import_map():
+    all_files = [
+        os.path.join(dp, f) for dp, dn, filenames in os.walk(
+            os.path.join(os.getcwd(), 'modelscope')) for f in filenames
+        if os.path.splitext(f)[1] == '.py'
+    ]
+    import_map = {}
+    for f in all_files:
+        files = get_imported_files(f)
+        import_map[os.path.relpath(f, os.getcwd())] = files
+
+    return import_map
+
+
+def get_reverse_import_map():
+    all_files = [
+        os.path.join(dp, f) for dp, dn, filenames in os.walk(
+            os.path.join(os.getcwd(), 'modelscope')) for f in filenames
+        if os.path.splitext(f)[1] == '.py'
+    ]
+    import_map = get_import_map()
+
+    reverse_depend_map = {}
+    for f in all_files:
+        depend_by = []
+        for k, v in import_map.items():
+            if f in v and f != k:
+                depend_by.append(k)
+        reverse_depend_map[f] = depend_by
+
+    return reverse_depend_map, import_map
+
+
+def get_all_register_modules():
+    all_files = [
+        os.path.join(dp, f) for dp, dn, filenames in os.walk(
+            os.path.join(os.getcwd(), 'modelscope')) for f in filenames
+        if os.path.splitext(f)[1] == '.py'
+    ]
+    all_register_modules = []
+    for f in all_files:
+        all_register_modules.extend(get_file_register_modules(f))
+    return all_register_modules
+
+
+if __name__ == '__main__':
+    pass
diff --git a/tests/utils/test_ast.py b/tests/utils/test_ast.py
new file mode 100644
index 0000000..84cf8a8
--- /dev/null
+++ b/tests/utils/test_ast.py
@@ -0,0 +1,201 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import shutil
+import tempfile
+import time
+import unittest
+from pathlib import Path
+
+from modelscope.utils.ast_utils import (FILES_MTIME_KEY, INDEX_KEY, MD5_KEY,
+                                        MODELSCOPE_PATH_KEY, REQUIREMENT_KEY,
+                                        VERSION_KEY, AstScanning,
+                                        FilesAstScanning,
+                                        generate_ast_template,
+                                        load_from_prebuilt, load_index)
+
+p = Path(__file__)
+
+MODELSCOPE_PATH = p.resolve().parents[2].joinpath('modelscope')
+
+
+class AstScaningTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        self.test_file = os.path.join(self.tmp_dir, 'test.py')
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        super().tearDown()
+        shutil.rmtree(self.tmp_dir)
+
+    def test_ast_scaning_class(self):
+        astScaner = AstScanning()
+        pipeline_file = os.path.join(MODELSCOPE_PATH, 'pipelines', 'nlp',
+                                     'text_generation_pipeline.py')
+        output = astScaner.generate_ast(pipeline_file)
+        self.assertTrue(output['imports'] is not None)
+        self.assertTrue(output['from_imports'] is not None)
+        self.assertTrue(output['decorators'] is not None)
+        imports, from_imports, decorators = output['imports'], output[
+            'from_imports'], output['decorators']
+        self.assertIsInstance(imports, dict)
+        self.assertIsInstance(from_imports, dict)
+        self.assertIsInstance(decorators, list)
+        self.assertListEqual(list(set(imports.keys()) - set(['torch', 'os'])),
+                             [])
+        self.assertEqual(len(from_imports.keys()), 10)
+        self.assertTrue(from_imports['modelscope.metainfo'] is not None)
+        self.assertEqual(from_imports['modelscope.metainfo'], ['Pipelines'])
+        self.assertEqual(
+            decorators,
+            [('PIPELINES', 'text-generation', 'text-generation'),
+             ('PIPELINES', 'text2text-generation', 'translation_en_to_de'),
+             ('PIPELINES', 'text2text-generation', 'translation_en_to_ro'),
+             ('PIPELINES', 'text2text-generation', 'translation_en_to_fr'),
+             ('PIPELINES', 'text2text-generation', 'text2text-generation')])
+
+    def test_files_scaning_method(self):
+        fileScaner = FilesAstScanning()
+        # case of pass in files directly
+        pipeline_file = os.path.join(MODELSCOPE_PATH, 'pipelines', 'nlp',
+                                     'text_generation_pipeline.py')
+        file_list = [pipeline_file]
+        output = fileScaner.get_files_scan_results(file_list)
+        self.assertTrue(output[INDEX_KEY] is not None)
+        self.assertTrue(output[REQUIREMENT_KEY] is not None)
+        index, requirements = output[INDEX_KEY], output[REQUIREMENT_KEY]
+        self.assertIsInstance(index, dict)
+        self.assertIsInstance(requirements, dict)
+        self.assertIsInstance(list(index.keys())[0], tuple)
+        index_0 = list(index.keys())[0]
+        self.assertIsInstance(index[index_0], dict)
+        self.assertTrue(index[index_0]['imports'] is not None)
+        self.assertIsInstance(index[index_0]['imports'], list)
+        self.assertTrue(index[index_0]['module'] is not None)
+        self.assertIsInstance(index[index_0]['module'], str)
+        index_0 = list(requirements.keys())[0]
+        self.assertIsInstance(requirements[index_0], list)
+
+    def test_file_mtime_md5_method(self):
+        fileScaner = FilesAstScanning()
+        # create first file
+        with open(self.test_file, 'w', encoding='utf-8') as f:
+            f.write('This is the new test!')
+
+        md5_1, mtime_1 = fileScaner.files_mtime_md5(self.tmp_dir, [])
+        md5_2, mtime_2 = fileScaner.files_mtime_md5(self.tmp_dir, [])
+        self.assertEqual(md5_1, md5_2)
+        self.assertEqual(mtime_1, mtime_2)
+        self.assertIsInstance(mtime_1, dict)
+        self.assertEqual(list(mtime_1.keys()), [self.test_file])
+        self.assertEqual(mtime_1[self.test_file], mtime_2[self.test_file])
+
+        time.sleep(2)
+        # case of revise
+        with open(self.test_file, 'w', encoding='utf-8') as f:
+            f.write('test again')
+        md5_3, mtime_3 = fileScaner.files_mtime_md5(self.tmp_dir, [])
+        self.assertNotEqual(md5_1, md5_3)
+        self.assertNotEqual(mtime_1[self.test_file], mtime_3[self.test_file])
+
+        # case of create
+        self.test_file_new = os.path.join(self.tmp_dir, 'test_1.py')
+        time.sleep(2)
+        with open(self.test_file_new, 'w', encoding='utf-8') as f:
+            f.write('test again')
+        md5_4, mtime_4 = fileScaner.files_mtime_md5(self.tmp_dir, [])
+        self.assertNotEqual(md5_1, md5_4)
+        self.assertNotEqual(md5_3, md5_4)
+        self.assertEqual(
+            set(mtime_4.keys()) - set([self.test_file, self.test_file_new]),
+            set())
+
+    def test_load_index_method(self):
+        # test full indexing case
+        output = load_index()
+        self.assertTrue(output[INDEX_KEY] is not None)
+        self.assertTrue(output[REQUIREMENT_KEY] is not None)
+        index, requirements = output[INDEX_KEY], output[REQUIREMENT_KEY]
+        self.assertIsInstance(index, dict)
+        self.assertIsInstance(requirements, dict)
+        self.assertIsInstance(list(index.keys())[0], tuple)
+        index_0 = list(index.keys())[0]
+        self.assertIsInstance(index[index_0], dict)
+        self.assertTrue(index[index_0]['imports'] is not None)
+        self.assertIsInstance(index[index_0]['imports'], list)
+        self.assertTrue(index[index_0]['module'] is not None)
+        self.assertIsInstance(index[index_0]['module'], str)
+        index_0 = list(requirements.keys())[0]
+        self.assertIsInstance(requirements[index_0], list)
+        self.assertIsInstance(output[MD5_KEY], str)
+        self.assertIsInstance(output[MODELSCOPE_PATH_KEY], str)
+        self.assertIsInstance(output[VERSION_KEY], str)
+        self.assertIsInstance(output[FILES_MTIME_KEY], dict)
+
+        # generate ast_template
+        file_path = os.path.join(self.tmp_dir, 'index_file.py')
+        index = generate_ast_template(file_path=file_path, force_rebuild=False)
+        self.assertTrue(os.path.exists(file_path))
+        self.assertEqual(output, index)
+        index_from_prebuilt = load_from_prebuilt(file_path)
+        self.assertEqual(index, index_from_prebuilt)
+
+    @unittest.skip(
+        'skipped the method for not cpu time on this case not stable')
+    def test_update_load_index_method(self):
+        file_number = 20
+        file_list = []
+        for i in range(file_number):
+            filename = os.path.join(self.tmp_dir, f'test_{i}.py')
+            with open(filename, 'w', encoding='utf-8') as f:
+                f.write('import os')
+            file_list.append(filename)
+
+        index_file = 'ast_indexer_1'
+
+        start = time.time()
+        index = load_index(file_list=file_list,
+                           indexer_file_dir=self.tmp_dir,
+                           indexer_file=index_file)
+        duration_1 = time.time() - start
+        self.assertEqual(len(index[FILES_MTIME_KEY]), file_number)
+
+        # no changing case, time should be less than original
+        start = time.time()
+        index = load_index(file_list=file_list,
+                           indexer_file_dir=self.tmp_dir,
+                           indexer_file=index_file)
+        duration_2 = time.time() - start
+        self.assertGreater(duration_1, duration_2)
+        self.assertEqual(len(index[FILES_MTIME_KEY]), file_number)
+
+        # adding new file, time should be less than original
+        test_file_new_2 = os.path.join(self.tmp_dir, 'test_new.py')
+        with open(test_file_new_2, 'w', encoding='utf-8') as f:
+            f.write('import os')
+        file_list.append(test_file_new_2)
+
+        start = time.time()
+        index = load_index(file_list=file_list,
+                           indexer_file_dir=self.tmp_dir,
+                           indexer_file=index_file)
+        duration_3 = time.time() - start
+        self.assertGreater(duration_1, duration_3)
+        self.assertEqual(len(index[FILES_MTIME_KEY]), file_number + 1)
+
+        # deleting one file, time should be less than original
+        file_list.pop()
+        start = time.time()
+        index = load_index(file_list=file_list,
+                           indexer_file_dir=self.tmp_dir,
+                           indexer_file=index_file)
+        duration_4 = time.time() - start
+        self.assertGreater(duration_1, duration_4)
+        self.assertEqual(len(index[FILES_MTIME_KEY]), file_number)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/utils/test_compatibility.py b/tests/utils/test_compatibility.py
new file mode 100644
index 0000000..7884160
--- /dev/null
+++ b/tests/utils/test_compatibility.py
@@ -0,0 +1,18 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import unittest
+
+
+class CompatibilityTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+    def tearDown(self):
+        super().tearDown()
+
+    def test_xtcocotools(self):
+        from xtcocotools.coco import COCO
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/utils/test_config.py b/tests/utils/test_config.py
new file mode 100644
index 0000000..e65c384
--- /dev/null
+++ b/tests/utils/test_config.py
@@ -0,0 +1,226 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import argparse
+import copy
+import json
+import tempfile
+import unittest
+
+from modelscope.utils.config import Config, check_config
+
+obj = {'a': 1, 'b': {'c': [1, 2, 3], 'd': 'dd'}}
+
+
+class ConfigTest(unittest.TestCase):
+    def test_json(self):
+        config_file = 'configs/examples/configuration.json'
+        cfg = Config.from_file(config_file)
+        self.assertEqual(cfg.a, 1)
+        self.assertEqual(cfg.b, obj['b'])
+
+    def test_yaml(self):
+        config_file = 'configs/examples/configuration.yaml'
+        cfg = Config.from_file(config_file)
+        self.assertEqual(cfg.a, 1)
+        self.assertEqual(cfg.b, obj['b'])
+
+    def test_py(self):
+        config_file = 'configs/examples/configuration.py'
+        cfg = Config.from_file(config_file)
+        self.assertEqual(cfg.a, 1)
+        self.assertEqual(cfg.b, obj['b'])
+
+    def test_dump(self):
+        config_file = 'configs/examples/configuration.py'
+        cfg = Config.from_file(config_file)
+        self.assertEqual(cfg.a, 1)
+        self.assertEqual(cfg.b, obj['b'])
+        pretty_text = 'a = 1\n'
+        pretty_text += "b = dict(c=[1, 2, 3], d='dd')\n"
+
+        json_str = '{"a": 1, "b": {"c": [1, 2, 3], "d": "dd"}}'
+        yaml_str = 'a: 1\nb:\n  c:\n  - 1\n  - 2\n  - 3\n  d: dd\n'
+        with tempfile.NamedTemporaryFile(suffix='.json') as ofile:
+            self.assertEqual(pretty_text, cfg.dump())
+            cfg.dump(ofile.name)
+            with open(ofile.name, 'r') as infile:
+                self.assertDictEqual(json.loads(json_str),
+                                     json.loads(infile.read()))
+
+        with tempfile.NamedTemporaryFile(suffix='.yaml') as ofile:
+            cfg.dump(ofile.name)
+            with open(ofile.name, 'r') as infile:
+                self.assertEqual(yaml_str, infile.read())
+
+    def test_to_dict(self):
+        config_file = 'configs/examples/configuration.json'
+        cfg = Config.from_file(config_file)
+        d = cfg.to_dict()
+        print(d)
+        self.assertTrue(isinstance(d, dict))
+
+    def test_to_args(self):
+        def parse_fn(args):
+            parser = argparse.ArgumentParser(prog='PROG')
+            parser.add_argument('--model-dir', default='')
+            parser.add_argument('--lr', type=float, default=0.001)
+            parser.add_argument('--optimizer', default='')
+            parser.add_argument('--weight-decay', type=float, default=1e-7)
+            parser.add_argument('--save-checkpoint-epochs',
+                                type=int,
+                                default=30)
+            return parser.parse_args(args)
+
+        cfg = Config.from_file('configs/examples/plain_args.yaml')
+        args = cfg.to_args(parse_fn)
+
+        self.assertEqual(args.model_dir, 'path/to/model')
+        self.assertAlmostEqual(args.lr, 0.01)
+        self.assertAlmostEqual(args.weight_decay, 1e-6)
+        self.assertEqual(args.optimizer, 'Adam')
+        self.assertEqual(args.save_checkpoint_epochs, 20)
+
+    def test_check_config(self):
+        check_config('configs/cv/configuration.json')
+        check_config('configs/nlp/sbert_sentence_similarity.json')
+
+    def test_merge_from_dict(self):
+        base_cfg = copy.deepcopy(obj)
+        base_cfg.update({'dict_list': [dict(l1=1), dict(l2=2)]})
+
+        cfg = Config(base_cfg)
+
+        merge_dict = {
+            'a': 2,
+            'b.d': 'ee',
+            'b.c': [3, 3, 3],
+            'dict_list': {
+                '0': dict(l1=3)
+            },
+            'c': 'test'
+        }
+
+        cfg1 = copy.deepcopy(cfg)
+        cfg1.merge_from_dict(merge_dict)
+        self.assertDictEqual(
+            cfg1._cfg_dict, {
+                'a': 2,
+                'b': {
+                    'c': [3, 3, 3],
+                    'd': 'ee'
+                },
+                'dict_list': [dict(l1=3), dict(l2=2)],
+                'c': 'test'
+            })
+
+        cfg2 = copy.deepcopy(cfg)
+        cfg2.merge_from_dict(merge_dict, force=False)
+        self.assertDictEqual(
+            cfg2._cfg_dict, {
+                'a': 1,
+                'b': {
+                    'c': [1, 2, 3],
+                    'd': 'dd'
+                },
+                'dict_list': [dict(l1=1), dict(l2=2)],
+                'c': 'test'
+            })
+
+    def test_merge_from_dict_with_list(self):
+        base_cfg = {
+            'a':
+            1,
+            'b': {
+                'c': [1, 2, 3],
+                'd': 'dd'
+            },
+            'dict_list': [dict(type='l1', v=1),
+                          dict(type='l2', v=2)],
+            'dict_list2': [
+                dict(type='l1',
+                     v=[dict(type='l1_1', v=1),
+                        dict(type='l1_2', v=2)]),
+                dict(type='l2', v=2)
+            ]
+        }
+        cfg = Config(base_cfg)
+
+        merge_dict_for_list = {
+            'a':
+            2,
+            'b.c': [3, 3, 3],
+            'b.d':
+            'ee',
+            'dict_list': [dict(type='l1', v=8),
+                          dict(type='l3', v=8)],
+            'dict_list2': [
+                dict(type='l1',
+                     v=[
+                         dict(type='l1_1', v=8),
+                         dict(type='l1_2', v=2),
+                         dict(type='l1_3', v=8),
+                     ]),
+                dict(type='l2', v=8)
+            ],
+            'c':
+            'test'
+        }
+
+        cfg1 = copy.deepcopy(cfg)
+        cfg1.merge_from_dict(merge_dict_for_list, force=False)
+        self.assertDictEqual(
+            cfg1._cfg_dict, {
+                'a':
+                1,
+                'b': {
+                    'c': [1, 2, 3],
+                    'd': 'dd'
+                },
+                'dict_list': [
+                    dict(type='l1', v=1),
+                    dict(type='l2', v=2),
+                    dict(type='l3', v=8)
+                ],
+                'dict_list2': [
+                    dict(type='l1',
+                         v=[
+                             dict(type='l1_1', v=1),
+                             dict(type='l1_2', v=2),
+                             dict(type='l1_3', v=8),
+                         ]),
+                    dict(type='l2', v=2)
+                ],
+                'c':
+                'test'
+            })
+
+        cfg2 = copy.deepcopy(cfg)
+        cfg2.merge_from_dict(merge_dict_for_list, force=True)
+        self.assertDictEqual(
+            cfg2._cfg_dict, {
+                'a':
+                2,
+                'b': {
+                    'c': [3, 3, 3],
+                    'd': 'ee'
+                },
+                'dict_list': [
+                    dict(type='l1', v=8),
+                    dict(type='l2', v=2),
+                    dict(type='l3', v=8)
+                ],
+                'dict_list2': [
+                    dict(type='l1',
+                         v=[
+                             dict(type='l1_1', v=8),
+                             dict(type='l1_2', v=2),
+                             dict(type='l1_3', v=8),
+                         ]),
+                    dict(type='l2', v=8)
+                ],
+                'c':
+                'test'
+            })
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/utils/test_device.py b/tests/utils/test_device.py
new file mode 100644
index 0000000..4af6cf1
--- /dev/null
+++ b/tests/utils/test_device.py
@@ -0,0 +1,107 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import shutil
+import tempfile
+import time
+import unittest
+
+import torch
+
+from modelscope.utils.constant import Frameworks
+from modelscope.utils.device import (create_device, device_placement,
+                                     verify_device)
+
+# import tensorflow must be imported after torch is imported when using tf1.15
+import tensorflow as tf  # isort:skip
+
+
+class DeviceTest(unittest.TestCase):
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+    def tearDown(self):
+        super().tearDown()
+
+    def test_verify(self):
+        device_name, device_id = verify_device('cpu')
+        self.assertEqual(device_name, 'cpu')
+        self.assertTrue(device_id is None)
+        device_name, device_id = verify_device('CPU')
+        self.assertEqual(device_name, 'cpu')
+
+        device_name, device_id = verify_device('gpu')
+        self.assertEqual(device_name, 'gpu')
+        self.assertTrue(device_id == 0)
+
+        device_name, device_id = verify_device('cuda')
+        self.assertEqual(device_name, 'gpu')
+        self.assertTrue(device_id == 0)
+
+        device_name, device_id = verify_device('cuda:0')
+        self.assertEqual(device_name, 'gpu')
+        self.assertTrue(device_id == 0)
+
+        device_name, device_id = verify_device('gpu:1')
+        self.assertEqual(device_name, 'gpu')
+        self.assertTrue(device_id == 1)
+
+        with self.assertRaises(AssertionError):
+            verify_device('xgu')
+
+        with self.assertRaises(AssertionError):
+            verify_device('')
+
+        with self.assertRaises(AssertionError):
+            verify_device(None)
+
+    def test_create_device_torch(self):
+        if torch.cuda.is_available():
+            target_device_type = 'cuda'
+            target_device_index = 0
+        else:
+            target_device_type = 'cpu'
+            target_device_index = None
+        device = create_device('gpu')
+        self.assertTrue(isinstance(device, torch.device))
+        self.assertTrue(device.type == target_device_type)
+        self.assertTrue(device.index == target_device_index)
+
+        device = create_device('gpu:0')
+        self.assertTrue(isinstance(device, torch.device))
+        self.assertTrue(device.type == target_device_type)
+        self.assertTrue(device.index == target_device_index)
+
+        device = create_device('cuda')
+        self.assertTrue(device.type == target_device_type)
+        self.assertTrue(isinstance(device, torch.device))
+        self.assertTrue(device.index == target_device_index)
+
+        device = create_device('cuda:0')
+        self.assertTrue(isinstance(device, torch.device))
+        self.assertTrue(device.type == target_device_type)
+        self.assertTrue(device.index == target_device_index)
+
+    def test_device_placement_cpu(self):
+        with device_placement(Frameworks.torch, 'cpu'):
+            pass
+
+    @unittest.skip('skip this test to avoid debug logging.')
+    def test_device_placement_tf_gpu(self):
+        tf.debugging.set_log_device_placement(True)
+        with device_placement(Frameworks.tf, 'gpu:0'):
+            a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+            b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
+            c = tf.matmul(a, b)
+            s = tf.Session()
+            s.run(c)
+        tf.debugging.set_log_device_placement(False)
+
+    def test_device_placement_torch_gpu(self):
+        with device_placement(Frameworks.torch, 'gpu:0'):
+            if torch.cuda.is_available():
+                self.assertEqual(torch.cuda.current_device(), 0)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/utils/test_plugin.py b/tests/utils/test_plugin.py
new file mode 100644
index 0000000..2210175
--- /dev/null
+++ b/tests/utils/test_plugin.py
@@ -0,0 +1,40 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from modelscope.models.builder import MODELS
+from modelscope.utils.plugins import (discover_plugins, import_all_plugins,
+                                      import_file_plugins, import_plugins,
+                                      pushd)
+
+
+class PluginTest(unittest.TestCase):
+    def setUp(self):
+        self.plugins_root = 'tests/utils/plugins/'
+
+    def test_no_plugins(self):
+        available_plugins = set(discover_plugins())
+        assert available_plugins == set()
+
+    def test_file_plugins(self):
+        with pushd(self.plugins_root):
+            available_plugins = set(discover_plugins())
+            assert available_plugins == {'dummy'}
+
+            import_file_plugins()
+            assert MODELS.get('dummy-model', 'dummy-group') is not None
+
+    def test_custom_plugins(self):
+        with pushd(self.plugins_root):
+            available_plugins = set(discover_plugins())
+            assert available_plugins == {'dummy'}
+
+            import_plugins(['dummy'])
+            assert MODELS.get('dummy-model', 'dummy-group') is not None
+
+    def test_all_plugins(self):
+        with pushd(self.plugins_root):
+            available_plugins = set(discover_plugins())
+            assert available_plugins == {'dummy'}
+
+            import_all_plugins()
+            assert MODELS.get('dummy-model', 'dummy-group') is not None
diff --git a/tests/utils/test_registry.py b/tests/utils/test_registry.py
new file mode 100644
index 0000000..59ba955
--- /dev/null
+++ b/tests/utils/test_registry.py
@@ -0,0 +1,93 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from modelscope.utils.constant import Tasks
+from modelscope.utils.registry import Registry, build_from_cfg, default_group
+
+
+class RegistryTest(unittest.TestCase):
+    def test_register_class_no_task(self):
+        MODELS = Registry('models')
+        self.assertTrue(MODELS.name == 'models')
+        self.assertTrue(default_group in MODELS.modules)
+        self.assertTrue(MODELS.modules[default_group] == {})
+
+        self.assertEqual(len(MODELS.modules), 1)
+
+        @MODELS.register_module(module_name='cls-resnet')
+        class ResNetForCls(object):
+            pass
+
+        self.assertTrue(default_group in MODELS.modules)
+        self.assertTrue(MODELS.get('cls-resnet') is ResNetForCls)
+
+    def test_register_class_with_task(self):
+        MODELS = Registry('models')
+
+        @MODELS.register_module(Tasks.image_classification, 'SwinT')
+        class SwinTForCls(object):
+            pass
+
+        self.assertTrue(Tasks.image_classification in MODELS.modules)
+        self.assertTrue(
+            MODELS.get('SwinT', Tasks.image_classification) is SwinTForCls)
+
+        @MODELS.register_module(Tasks.sentiment_analysis, 'Bert')
+        class BertForSentimentAnalysis(object):
+            pass
+
+        self.assertTrue(Tasks.sentiment_analysis in MODELS.modules)
+        self.assertTrue(
+            MODELS.get('Bert', Tasks.sentiment_analysis) is
+            BertForSentimentAnalysis)
+
+        @MODELS.register_module(Tasks.image_object_detection)
+        class DETR(object):
+            pass
+
+        self.assertTrue(Tasks.image_object_detection in MODELS.modules)
+        self.assertTrue(
+            MODELS.get('DETR', Tasks.image_object_detection) is DETR)
+
+        self.assertEqual(len(MODELS.modules), 4)
+
+    def test_list(self):
+        MODELS = Registry('models')
+
+        @MODELS.register_module(Tasks.image_classification, 'SwinT')
+        class SwinTForCls(object):
+            pass
+
+        @MODELS.register_module(Tasks.sentiment_analysis, 'Bert')
+        class BertForSentimentAnalysis(object):
+            pass
+
+        MODELS.list()
+        print(MODELS)
+
+    def test_build(self):
+        MODELS = Registry('models')
+
+        @MODELS.register_module(Tasks.image_classification, 'SwinT')
+        class SwinTForCls(object):
+            pass
+
+        @MODELS.register_module(Tasks.sentiment_analysis, 'Bert')
+        class BertForSentimentAnalysis(object):
+            pass
+
+        cfg = dict(type='SwinT')
+        model = build_from_cfg(cfg, MODELS, Tasks.image_classification)
+        self.assertTrue(isinstance(model, SwinTForCls))
+
+        cfg = dict(type='Bert')
+        model = build_from_cfg(cfg, MODELS, Tasks.sentiment_analysis)
+        self.assertTrue(isinstance(model, BertForSentimentAnalysis))
+
+        with self.assertRaises(KeyError):
+            cfg = dict(type='Bert')
+            model = build_from_cfg(cfg, MODELS, Tasks.image_classification)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/utils/test_type_assert.py b/tests/utils/test_type_assert.py
new file mode 100644
index 0000000..12b83a2
--- /dev/null
+++ b/tests/utils/test_type_assert.py
@@ -0,0 +1,21 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import unittest
+from typing import List, Union
+
+from modelscope.utils.type_assert import type_assert
+
+
+class type_assertTest(unittest.TestCase):
+    @type_assert(object, list, (int, str))
+    def a(self, a: List[int], b: Union[int, str]):
+        print(a, b)
+
+    def test_type_assert(self):
+        with self.assertRaises(TypeError):
+            self.a([1], 2)
+            self.a(1, [123])
+
+
+if __name__ == '__main__':
+    unittest.main()