mirror of
https://github.com/deepinsight/insightface.git
synced 2025-12-30 08:02:27 +00:00
536 lines
65 KiB
Plaintext
536 lines
65 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/home/jd4615/miniconda3/envs/insightface/lib/python2.7/site-packages/sklearn/utils/fixes.py:313: FutureWarning: numpy not_equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
|
||
" _nan_object_mask = _nan_object_array != _nan_object_array\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import os\n",
|
||
"import numpy as np\n",
|
||
"import cPickle\n",
|
||
"from sklearn.metrics import roc_curve, auc\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import timeit\n",
|
||
"import sklearn\n",
|
||
"import cv2\n",
|
||
"import sys\n",
|
||
"import glob\n",
|
||
"sys.path.append('./recognition')\n",
|
||
"from embedding import Embedding\n",
|
||
"from menpo.visualize import print_progress\n",
|
||
"from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap\n",
|
||
"from prettytable import PrettyTable\n",
|
||
"from pathlib import Path\n",
|
||
"import warnings \n",
|
||
"warnings.filterwarnings(\"ignore\") "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def read_template_media_list(path):\n",
|
||
" ijb_meta = np.loadtxt(path, dtype=str)\n",
|
||
" templates = ijb_meta[:,1].astype(np.int)\n",
|
||
" medias = ijb_meta[:,2].astype(np.int)\n",
|
||
" return templates, medias"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def read_template_pair_list(path):\n",
|
||
" pairs = np.loadtxt(path, dtype=str)\n",
|
||
" t1 = pairs[:,0].astype(np.int)\n",
|
||
" t2 = pairs[:,1].astype(np.int)\n",
|
||
" label = pairs[:,2].astype(np.int)\n",
|
||
" return t1, t2, label"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def read_image_feature(path):\n",
|
||
" with open(path, 'rb') as fid:\n",
|
||
" img_feats = cPickle.load(fid)\n",
|
||
" return img_feats"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def get_image_feature(img_path, img_list_path, model_path, gpu_id):\n",
|
||
" img_list = open(img_list_path)\n",
|
||
" embedding = Embedding(model_path, 0, gpu_id)\n",
|
||
" files = img_list.readlines()\n",
|
||
" img_feats = []\n",
|
||
" faceness_scores = []\n",
|
||
" for img_index, each_line in enumerate(print_progress(files)):\n",
|
||
" name_lmk_score = each_line.strip().split(' ')\n",
|
||
" img_name = os.path.join(img_path, name_lmk_score[0])\n",
|
||
" img = cv2.imread(img_name)\n",
|
||
" lmk = np.array([float(x) for x in name_lmk_score[1:-1]], dtype=np.float32)\n",
|
||
" lmk = lmk.reshape( (5,2) )\n",
|
||
" img_feats.append(embedding.get(img,lmk))\n",
|
||
" faceness_scores.append(name_lmk_score[-1])\n",
|
||
" img_feats = np.array(img_feats).astype(np.float32)\n",
|
||
" faceness_scores = np.array(faceness_scores).astype(np.float32)\n",
|
||
" return img_feats, faceness_scores"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def image2template_feature(img_feats = None, templates = None, medias = None):\n",
|
||
" # ==========================================================\n",
|
||
" # 1. face image feature l2 normalization. img_feats:[number_image x feats_dim]\n",
|
||
" # 2. compute media feature.\n",
|
||
" # 3. compute template feature.\n",
|
||
" # ========================================================== \n",
|
||
" unique_templates = np.unique(templates)\n",
|
||
" template_feats = np.zeros((len(unique_templates), img_feats.shape[1]))\n",
|
||
"\n",
|
||
" for count_template, uqt in enumerate(unique_templates):\n",
|
||
" (ind_t,) = np.where(templates == uqt)\n",
|
||
" face_norm_feats = img_feats[ind_t]\n",
|
||
" face_medias = medias[ind_t]\n",
|
||
" unique_medias, unique_media_counts = np.unique(face_medias, return_counts=True)\n",
|
||
" media_norm_feats = []\n",
|
||
" for u,ct in zip(unique_medias, unique_media_counts):\n",
|
||
" (ind_m,) = np.where(face_medias == u)\n",
|
||
" if ct == 1:\n",
|
||
" media_norm_feats += [face_norm_feats[ind_m]]\n",
|
||
" else: # image features from the same video will be aggregated into one feature\n",
|
||
" media_norm_feats += [np.mean(face_norm_feats[ind_m], 0, keepdims=True)]\n",
|
||
" media_norm_feats = np.array(media_norm_feats)\n",
|
||
" # media_norm_feats = media_norm_feats / np.sqrt(np.sum(media_norm_feats ** 2, -1, keepdims=True))\n",
|
||
" template_feats[count_template] = np.sum(media_norm_feats, 0)\n",
|
||
" if count_template % 2000 == 0: \n",
|
||
" print('Finish Calculating {} template features.'.format(count_template))\n",
|
||
" template_norm_feats = template_feats / np.sqrt(np.sum(template_feats ** 2, -1, keepdims=True))\n",
|
||
" return template_norm_feats, unique_templates"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def verification(template_norm_feats = None, unique_templates = None, p1 = None, p2 = None):\n",
|
||
" # ==========================================================\n",
|
||
" # Compute set-to-set Similarity Score.\n",
|
||
" # ==========================================================\n",
|
||
" template2id = np.zeros((max(unique_templates)+1,1),dtype=int)\n",
|
||
" for count_template, uqt in enumerate(unique_templates):\n",
|
||
" template2id[uqt] = count_template\n",
|
||
" \n",
|
||
" score = np.zeros((len(p1),)) # save cosine distance between pairs \n",
|
||
"\n",
|
||
" total_pairs = np.array(range(len(p1)))\n",
|
||
" batchsize = 100000 # small batchsize instead of all pairs in one batch due to the memory limiation\n",
|
||
" sublists = [total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)]\n",
|
||
" total_sublists = len(sublists)\n",
|
||
" for c, s in enumerate(sublists):\n",
|
||
" feat1 = template_norm_feats[template2id[p1[s]]]\n",
|
||
" feat2 = template_norm_feats[template2id[p2[s]]]\n",
|
||
" similarity_score = np.sum(feat1 * feat2, -1)\n",
|
||
" score[s] = similarity_score.flatten()\n",
|
||
" if c % 10 == 0:\n",
|
||
" print('Finish {}/{} pairs.'.format(c, total_sublists))\n",
|
||
" return score"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def read_score(path):\n",
|
||
" with open(path, 'rb') as fid:\n",
|
||
" img_feats = cPickle.load(fid)\n",
|
||
" return img_feats"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Step1: Load Meta Data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Time: 0.83 s. \n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# =============================================================\n",
|
||
"# load image and template relationships for template feature embedding\n",
|
||
"# tid --> template id, mid --> media id \n",
|
||
"# format:\n",
|
||
"# image_name tid mid\n",
|
||
"# =============================================================\n",
|
||
"start = timeit.default_timer()\n",
|
||
"templates, medias = read_template_media_list(os.path.join('IJBB/meta', 'ijbb_face_tid_mid.txt'))\n",
|
||
"stop = timeit.default_timer()\n",
|
||
"print('Time: %.2f s. ' % (stop - start))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Time: 31.75 s. \n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# =============================================================\n",
|
||
"# load template pairs for template-to-template verification\n",
|
||
"# tid : template id, label : 1/0\n",
|
||
"# format:\n",
|
||
"# tid_1 tid_2 label\n",
|
||
"# =============================================================\n",
|
||
"start = timeit.default_timer()\n",
|
||
"p1, p2, label = read_template_pair_list(os.path.join('IJBB/meta', 'ijbb_template_pair_label.txt'))\n",
|
||
"stop = timeit.default_timer()\n",
|
||
"print('Time: %.2f s. ' % (stop - start))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Step 2: Get Image Features"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"('loading', './pretrained_models/VGG2-ResNet50-Arcface/model', 0)\n",
|
||
"[====================] 100% (227630/227630) - done. \n",
|
||
"Time: 2386.28 s. \n",
|
||
"Feature Shape: (227630 , 1024) .\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# =============================================================\n",
|
||
"# load image features \n",
|
||
"# format:\n",
|
||
"# img_feats: [image_num x feats_dim] (227630, 512)\n",
|
||
"# =============================================================\n",
|
||
"start = timeit.default_timer()\n",
|
||
"#img_feats = read_image_feature('./MS1MV2/IJBB_MS1MV2_r100_arcface.pkl')\n",
|
||
"img_path = './IJBB/loose_crop'\n",
|
||
"img_list_path = './IJBB/meta/ijbb_name_5pts_score.txt'\n",
|
||
"model_path = './pretrained_models/VGG2-ResNet50-Arcface/model'\n",
|
||
"gpu_id = 0\n",
|
||
"img_feats, faceness_scores = get_image_feature(img_path, img_list_path, model_path, gpu_id)\n",
|
||
"stop = timeit.default_timer()\n",
|
||
"print('Time: %.2f s. ' % (stop - start))\n",
|
||
"print('Feature Shape: ({} , {}) .'.format(img_feats.shape[0], img_feats.shape[1]))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Step3: Get Template Features"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Finish Calculating 0 template features.\n",
|
||
"Finish Calculating 2000 template features.\n",
|
||
"Finish Calculating 4000 template features.\n",
|
||
"Finish Calculating 6000 template features.\n",
|
||
"Finish Calculating 8000 template features.\n",
|
||
"Finish Calculating 10000 template features.\n",
|
||
"Finish Calculating 12000 template features.\n",
|
||
"Time: 3.41 s. \n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# =============================================================\n",
|
||
"# compute template features from image features.\n",
|
||
"# =============================================================\n",
|
||
"start = timeit.default_timer()\n",
|
||
"# ========================================================== \n",
|
||
"# Norm feature before aggregation into template feature?\n",
|
||
"# Feature norm from embedding network and faceness score are able to decrease weights for noise samples (not face).\n",
|
||
"# ========================================================== \n",
|
||
"# 1. FaceScore (Feature Norm)\n",
|
||
"# 2. FaceScore (Detector)\n",
|
||
"\n",
|
||
"use_norm_score = False # if True, TestMode(N1) \n",
|
||
"use_detector_score = True # if True, TestMode(D1)\n",
|
||
"use_flip_test = True # if True, TestMode(F1)\n",
|
||
"\n",
|
||
"if use_flip_test:\n",
|
||
" # concat --- F1\n",
|
||
" #img_input_feats = img_feats \n",
|
||
" # add --- F2\n",
|
||
" img_input_feats = img_feats[:,0:img_feats.shape[1]/2] + img_feats[:,img_feats.shape[1]/2:]\n",
|
||
"else:\n",
|
||
" img_input_feats = img_feats[:,0:img_feats.shape[1]/2]\n",
|
||
" \n",
|
||
"if use_norm_score:\n",
|
||
" img_input_feats = img_input_feats\n",
|
||
"else:\n",
|
||
" # normalise features to remove norm information\n",
|
||
" img_input_feats = img_input_feats / np.sqrt(np.sum(img_input_feats ** 2, -1, keepdims=True)) \n",
|
||
" \n",
|
||
"if use_detector_score:\n",
|
||
" img_input_feats = img_input_feats * np.matlib.repmat(faceness_scores[:,np.newaxis], 1, img_input_feats.shape[1])\n",
|
||
"else:\n",
|
||
" img_input_feats = img_input_feats\n",
|
||
"\n",
|
||
"template_norm_feats, unique_templates = image2template_feature(img_input_feats, templates, medias)\n",
|
||
"stop = timeit.default_timer()\n",
|
||
"print('Time: %.2f s. ' % (stop - start))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Step 4: Get Template Similarity Scores"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Finish 0/81 pairs.\n",
|
||
"Finish 10/81 pairs.\n",
|
||
"Finish 20/81 pairs.\n",
|
||
"Finish 30/81 pairs.\n",
|
||
"Finish 40/81 pairs.\n",
|
||
"Finish 50/81 pairs.\n",
|
||
"Finish 60/81 pairs.\n",
|
||
"Finish 70/81 pairs.\n",
|
||
"Finish 80/81 pairs.\n",
|
||
"Time: 38.38 s. \n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# =============================================================\n",
|
||
"# compute verification scores between template pairs.\n",
|
||
"# =============================================================\n",
|
||
"start = timeit.default_timer()\n",
|
||
"score = verification(template_norm_feats, unique_templates, p1, p2)\n",
|
||
"stop = timeit.default_timer()\n",
|
||
"print('Time: %.2f s. ' % (stop - start))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"score_save_name = './IJBB/result/VGG2-ResNet50-ArcFace-TestMode(N0D1F2).npy'\n",
|
||
"np.save(score_save_name, score)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Step 5: Get ROC Curves and TPR@FPR Table"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "\n",
|
||
"text/plain": [
|
||
"<Figure size 432x288 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {
|
||
"needs_background": "light"
|
||
},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"score_save_path = './IJBB/result'\n",
|
||
"files = glob.glob(score_save_path + '/VGG2*.npy') \n",
|
||
"methods = []\n",
|
||
"scores = []\n",
|
||
"for file in files:\n",
|
||
" methods.append(Path(file).stem)\n",
|
||
" scores.append(np.load(file)) \n",
|
||
"methods = np.array(methods)\n",
|
||
"scores = dict(zip(methods,scores))\n",
|
||
"colours = dict(zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2')))\n",
|
||
"#x_labels = [1/(10**x) for x in np.linspace(6, 0, 6)]\n",
|
||
"x_labels = [10**-6, 10**-5, 10**-4,10**-3, 10**-2, 10**-1]\n",
|
||
"tpr_fpr_table = PrettyTable(['Methods'] + map(str, x_labels))\n",
|
||
"fig = plt.figure()\n",
|
||
"for method in methods:\n",
|
||
" fpr, tpr, _ = roc_curve(label, scores[method])\n",
|
||
" roc_auc = auc(fpr, tpr)\n",
|
||
" fpr = np.flipud(fpr)\n",
|
||
" tpr = np.flipud(tpr) # select largest tpr at same fpr\n",
|
||
" plt.plot(fpr, tpr, color=colours[method], lw=1, label=('[%s (AUC = %0.4f %%)]' % (method.split('-')[-1], roc_auc*100)))\n",
|
||
" tpr_fpr_row = []\n",
|
||
" tpr_fpr_row.append(method)\n",
|
||
" for fpr_iter in np.arange(len(x_labels)):\n",
|
||
" _, min_index = min(list(zip(abs(fpr-x_labels[fpr_iter]), range(len(fpr)))))\n",
|
||
" tpr_fpr_row.append('%.4f' % tpr[min_index])\n",
|
||
" tpr_fpr_table.add_row(tpr_fpr_row)\n",
|
||
"plt.xlim([10**-6, 0.1])\n",
|
||
"plt.ylim([0.3, 1.0])\n",
|
||
"plt.grid(linestyle='--', linewidth=1)\n",
|
||
"plt.xticks(x_labels) \n",
|
||
"plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True)) \n",
|
||
"plt.xscale('log')\n",
|
||
"plt.xlabel('False Positive Rate')\n",
|
||
"plt.ylabel('True Positive Rate')\n",
|
||
"plt.title('ROC on IJB-B')\n",
|
||
"plt.legend(loc=\"lower right\")\n",
|
||
"plt.show()\n",
|
||
"#fig.savefig('IJB-B.pdf')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"+----------------------------------------+--------+--------+--------+--------+--------+--------+\n",
|
||
"| Methods | 1e-06 | 1e-05 | 0.0001 | 0.001 | 0.01 | 0.1 |\n",
|
||
"+----------------------------------------+--------+--------+--------+--------+--------+--------+\n",
|
||
"| VGG2-ResNet50-ArcFace-TestMode(N1D1F2) | 0.4044 | 0.8145 | 0.9056 | 0.9497 | 0.9779 | 0.9922 |\n",
|
||
"| VGG2-ResNet50-ArcFace-TestMode(N1D0F0) | 0.4035 | 0.8038 | 0.8976 | 0.9437 | 0.9755 | 0.9914 |\n",
|
||
"| VGG2-ResNet50-ArcFace-TestMode(N1D1F1) | 0.3940 | 0.8124 | 0.9028 | 0.9479 | 0.9770 | 0.9919 |\n",
|
||
"| VGG2-ResNet50-ArcFace-TestMode(N0D0F0) | 0.3893 | 0.8050 | 0.8990 | 0.9448 | 0.9759 | 0.9918 |\n",
|
||
"| VGG2-ResNet50-ArcFace-TestMode(N1D1F0) | 0.4098 | 0.8123 | 0.9022 | 0.9463 | 0.9766 | 0.9918 |\n",
|
||
"| VGG2-ResNet50-ArcFace-TestMode(N0D1F0) | 0.3949 | 0.8130 | 0.9036 | 0.9471 | 0.9767 | 0.9919 |\n",
|
||
"| VGG2-ResNet50-ArcFace-TestMode(N0D1F2) | 0.4011 | 0.8210 | 0.9069 | 0.9500 | 0.9779 | 0.9924 |\n",
|
||
"+----------------------------------------+--------+--------+--------+--------+--------+--------+\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(tpr_fpr_table)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# setting N0D1F2 is the best"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Test Setting Conclusions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### (1) add is better than concat for the flip test (N1D1F2 v.s. N1D1F1)\n",
|
||
"#### (2) detection score contains some faceness information to decrease weights of noise samples within the template (N0D1F0 v.s. N0D0F0)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 2",
|
||
"language": "python",
|
||
"name": "python2"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 2
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython2",
|
||
"version": "2.7.15"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|