mirror of
https://github.com/deepinsight/insightface.git
synced 2025-12-30 08:02:27 +00:00
812 lines
33 KiB
Python
812 lines
33 KiB
Python
import numbers
|
|
import os
|
|
import os.path as osp
|
|
import pickle
|
|
import queue as Queue
|
|
import threading
|
|
import logging
|
|
import numbers
|
|
import math
|
|
import pandas as pd
|
|
from scipy.spatial.transform import Rotation
|
|
|
|
import mxnet as mx
|
|
from pathlib import Path
|
|
import numpy as np
|
|
import torch
|
|
from torch.utils.data import DataLoader, Dataset
|
|
from skimage import transform as sktrans
|
|
import cv2
|
|
import albumentations as A
|
|
from albumentations.pytorch import ToTensorV2
|
|
from augs import *
|
|
|
|
|
|
def Rt26dof(R_t, degrees=False):
|
|
yaw_gt, pitch_gt, roll_gt = Rotation.from_matrix(R_t[:3, :3].T).as_euler('yxz', degrees=degrees)
|
|
label_euler = np.array([pitch_gt, yaw_gt, roll_gt])
|
|
label_translation = R_t[3, :3]
|
|
label_6dof = np.concatenate([label_euler, label_translation])
|
|
return label_6dof
|
|
|
|
|
|
def gen_target_pip(target, target_map, target_local_x, target_local_y):
|
|
map_channel, map_height, map_width = target_map.shape
|
|
target = target.reshape(-1, 2)
|
|
assert map_channel == target.shape[0]
|
|
|
|
for i in range(map_channel):
|
|
mu_x = int(math.floor(target[i][0] * map_width))
|
|
mu_y = int(math.floor(target[i][1] * map_height))
|
|
mu_x = max(0, mu_x)
|
|
mu_y = max(0, mu_y)
|
|
mu_x = min(mu_x, map_width-1)
|
|
mu_y = min(mu_y, map_height-1)
|
|
target_map[i, mu_y, mu_x] = 1
|
|
shift_x = target[i][0] * map_width - mu_x
|
|
shift_y = target[i][1] * map_height - mu_y
|
|
target_local_x[i, mu_y, mu_x] = shift_x
|
|
target_local_y[i, mu_y, mu_x] = shift_y
|
|
|
|
|
|
return target_map, target_local_x, target_local_y
|
|
|
|
def get_tris(cfg):
|
|
import trimesh
|
|
data_root = Path(cfg.root_dir)
|
|
obj_path = data_root / 'resources/example.obj'
|
|
mesh = trimesh.load(obj_path, process=False)
|
|
verts_template = np.array(mesh.vertices, dtype=np.float32)
|
|
tris = np.array(mesh.faces, dtype=np.int32)
|
|
#print(verts_template.shape, tris.shape)
|
|
return tris
|
|
|
|
|
|
class BackgroundGenerator(threading.Thread):
|
|
def __init__(self, generator, local_rank, max_prefetch=6):
|
|
super(BackgroundGenerator, self).__init__()
|
|
self.queue = Queue.Queue(max_prefetch)
|
|
self.generator = generator
|
|
self.local_rank = local_rank
|
|
self.daemon = True
|
|
self.start()
|
|
|
|
def run(self):
|
|
torch.cuda.set_device(self.local_rank)
|
|
for item in self.generator:
|
|
self.queue.put(item)
|
|
self.queue.put(None)
|
|
|
|
def next(self):
|
|
next_item = self.queue.get()
|
|
if next_item is None:
|
|
raise StopIteration
|
|
return next_item
|
|
|
|
def __next__(self):
|
|
return self.next()
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
|
|
class DataLoaderX(DataLoader):
|
|
def __init__(self, local_rank, **kwargs):
|
|
super(DataLoaderX, self).__init__(**kwargs)
|
|
self.stream = torch.cuda.Stream(local_rank)
|
|
self.local_rank = local_rank
|
|
|
|
def __iter__(self):
|
|
self.iter = super(DataLoaderX, self).__iter__()
|
|
self.iter = BackgroundGenerator(self.iter, self.local_rank)
|
|
self.preload()
|
|
return self
|
|
|
|
def preload(self):
|
|
self.batch = next(self.iter, None)
|
|
if self.batch is None:
|
|
return None
|
|
with torch.cuda.stream(self.stream):
|
|
for k in range(len(self.batch)):
|
|
self.batch[k] = self.batch[k].to(device=self.local_rank,
|
|
non_blocking=True)
|
|
|
|
def __next__(self):
|
|
torch.cuda.current_stream().wait_stream(self.stream)
|
|
batch = self.batch
|
|
if batch is None:
|
|
raise StopIteration
|
|
self.preload()
|
|
return batch
|
|
|
|
|
|
class FaceDataset(Dataset):
|
|
def __init__(self, cfg, is_train=True, is_test=False, local_rank=0):
|
|
super(FaceDataset, self).__init__()
|
|
|
|
|
|
self.data_root = Path(cfg.root_dir)
|
|
self.input_size = cfg.input_size
|
|
self.transform = get_aug_transform(cfg)
|
|
self.local_rank = local_rank
|
|
self.is_test = is_test
|
|
txt_path = self.data_root / 'resources/projection_matrix.txt'
|
|
self.M_proj = np.loadtxt(txt_path, dtype=np.float32)
|
|
if is_test:
|
|
data_root = Path(cfg.root_dir)
|
|
csv_path = data_root / 'list/WCPA_track2_test.csv'
|
|
self.df = pd.read_csv(csv_path, dtype={'subject_id': str, 'facial_action': str, 'img_id': str})
|
|
else:
|
|
if is_train:
|
|
self.df = pd.read_csv(osp.join(cfg.cache_dir, 'train_list.csv'), dtype={'subject_id': str, 'facial_action': str, 'img_id': str})
|
|
else:
|
|
self.df = pd.read_csv(osp.join(cfg.cache_dir, 'val_list.csv'), dtype={'subject_id': str, 'facial_action': str, 'img_id': str})
|
|
self.label_6dof_mean = [-0.018197, -0.017891, 0.025348, -0.005368, 0.001176, -0.532206] # mean of pitch, yaw, roll, tx, ty, tz
|
|
self.label_6dof_std = [0.314015, 0.271809, 0.081881, 0.022173, 0.048839, 0.065444] # std of pitch, yaw, roll, tx, ty, tz
|
|
self.align_face = cfg.align_face
|
|
if not self.align_face:
|
|
self.dst_pts = np.float32([
|
|
[0, 0],
|
|
[0, cfg.input_size- 1],
|
|
[cfg.input_size- 1, 0]
|
|
])
|
|
else:
|
|
dst_pts = np.array([
|
|
[38.2946, 51.6963],
|
|
[73.5318, 51.5014],
|
|
[56.0252, 71.7366],
|
|
[41.5493, 92.3655],
|
|
[70.7299, 92.2041] ], dtype=np.float32 )
|
|
|
|
new_size = 144
|
|
dst_pts[:,0] += ((new_size-112)//2)
|
|
dst_pts[:,1] += 8
|
|
dst_pts[:,:] *= (self.input_size/float(new_size))
|
|
self.dst_pts = dst_pts
|
|
|
|
if local_rank==0:
|
|
logging.info('data_transform_list:%s'%self.transform)
|
|
logging.info('len:%d'%len(self.df))
|
|
self.is_test_aug = False
|
|
self.eye_dataset = None
|
|
if cfg.eyes is not None:
|
|
from eye_dataset import EyeDataset
|
|
self.eye_dataset = EyeDataset(cfg.eyes['root'])
|
|
|
|
def set_test_aug(self):
|
|
if not self.is_test_aug:
|
|
from easydict import EasyDict as edict
|
|
cfg = edict()
|
|
cfg.aug_modes = ['test-aug']
|
|
cfg.input_size = self.input_size
|
|
cfg.task = 0
|
|
self.transform = get_aug_transform(cfg)
|
|
self.is_test_aug = True
|
|
|
|
def get_names(self, index):
|
|
subject_id = self.df['subject_id'][index]
|
|
facial_action = self.df['facial_action'][index]
|
|
img_id = self.df['img_id'][index]
|
|
return subject_id, facial_action, img_id
|
|
|
|
def __getitem__(self, index):
|
|
subject_id = self.df['subject_id'][index]
|
|
facial_action = self.df['facial_action'][index]
|
|
img_id = self.df['img_id'][index]
|
|
|
|
img_path = self.data_root / 'image' / subject_id / facial_action / f'{img_id}_ar.jpg'
|
|
npz_path = self.data_root / 'info' / subject_id / facial_action / f'{img_id}_info.npz'
|
|
txt_path = self.data_root / '68landmarks' / subject_id / facial_action / f'{img_id}_68landmarks.txt'
|
|
#if not osp.exists(img_path):
|
|
# continue
|
|
|
|
#print(img_path)
|
|
img_raw = cv2.imread(str(img_path))
|
|
#if img_raw is None:
|
|
# print('XXX ERR:', img_path)
|
|
img_raw = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB)
|
|
#print(img_raw.shape)
|
|
img_h, img_w, _ = img_raw.shape
|
|
pts68 = np.loadtxt(txt_path, dtype=np.int32)
|
|
|
|
x_min, y_min = pts68.min(axis=0)
|
|
x_max, y_max = pts68.max(axis=0)
|
|
x_center = (x_min + x_max) / 2
|
|
y_center = (y_min + y_max) / 2
|
|
w, h = x_max - x_min, y_max - y_min
|
|
|
|
|
|
if not self.align_face:
|
|
size = max(w, h)
|
|
ss = np.array([0.75, 0.75, 0.85, 0.65]) # predefined expand size
|
|
|
|
left = x_center - ss[0] * size
|
|
right = x_center + ss[1] * size
|
|
top = y_center - ss[2] * size
|
|
bottom = y_center + ss[3] * size
|
|
|
|
src_pts = np.float32([
|
|
[left, top],
|
|
[left, bottom],
|
|
[right, top]
|
|
])
|
|
tform = cv2.getAffineTransform(src_pts, self.dst_pts)
|
|
else:
|
|
src_pts = np.float32([
|
|
(pts68[36] + pts68[39])/2,
|
|
(pts68[42] + pts68[45])/2,
|
|
pts68[30],
|
|
pts68[48],
|
|
pts68[54]
|
|
])
|
|
tf = sktrans.SimilarityTransform()
|
|
tf.estimate(src_pts, self.dst_pts)
|
|
tform = tf.params[0:2,:]
|
|
|
|
img_local = cv2.warpAffine(img_raw, tform, (self.input_size,)*2, flags=cv2.INTER_CUBIC)
|
|
fake_points2d = np.ones( (1,2), dtype=np.float32) * (self.input_size//2)
|
|
|
|
#tform_inv = cv2.invertAffineTransform(tform)
|
|
#img_global = cv2.warpAffine(img_local, tform_inv, (img_w, img_h), borderValue=0.0)
|
|
#img_global = cv2.resize(img_global, (self.input_size, self.input_size))
|
|
if self.transform is not None:
|
|
t = self.transform(image=img_local, keypoints=fake_points2d)
|
|
img_local = t['image']
|
|
if self.is_test_aug:
|
|
height, width = img_local.shape[:2]
|
|
for trans in t["replay"]["transforms"]:
|
|
if trans['__class_fullname__']=='ShiftScaleRotate' and trans['applied']:
|
|
param = trans['params']
|
|
dx, dy, angle, scale = param['dx'], param['dy'], param['angle'], param['scale']
|
|
center = (width / 2, height / 2)
|
|
matrix = cv2.getRotationMatrix2D(center, angle, scale)
|
|
matrix[0, 2] += dx * width
|
|
matrix[1, 2] += dy * height
|
|
new_matrix = np.identity(3)
|
|
new_matrix[:2,:3] = matrix
|
|
old_tform = np.identity(3)
|
|
old_tform[:2,:3] = tform
|
|
#new_tform = np.dot(old_tform, new_matrix)
|
|
new_tform = np.dot(new_matrix, old_tform)
|
|
#print('label_tform:')
|
|
#print(label_tform.flatten())
|
|
#print(new_matrix.flatten())
|
|
#print(new_tform.flatten())
|
|
tform = new_tform[:2,:3]
|
|
break
|
|
#print('trans param:', param)
|
|
#img_global = self.transform(image=img_global)['image']
|
|
|
|
tform_tensor = torch.tensor(tform, dtype=torch.float32)
|
|
d = {'img_local': img_local, 'tform': tform_tensor}
|
|
if self.eye_dataset is not None:
|
|
eye_key = str(Path('image') / subject_id / facial_action / f'{img_id}_ar.jpg')
|
|
#print(eye_key)
|
|
eyel, eyer = self.eye_dataset.get(eye_key, to_homo=True)
|
|
if eyel is not None:
|
|
#print(eye_key, el_inv.shape, er_inv.shape)
|
|
d['eye_world_left'] = torch.tensor(eyel, dtype=torch.float32)
|
|
d['eye_world_right'] = torch.tensor(eyer, dtype=torch.float32)
|
|
if not self.is_test:
|
|
M = np.load(npz_path)
|
|
#yaw_gt, pitch_gt, roll_gt = Rotation.from_matrix(M['R_t'][:3, :3].T).as_euler('yxz', degrees=False)
|
|
#label_euler = np.array([pitch_gt, yaw_gt, roll_gt])
|
|
#label_translation = M['R_t'][3, :3]
|
|
#label_6dof = np.concatenate([label_euler, label_translation])
|
|
#label_6dof = (label_6dof - self.label_6dof_mean) / self.label_6dof_std
|
|
#label_6dof_tensor = torch.tensor(label_6dof, dtype=torch.float32)
|
|
#label_verts = M['verts'] * 10.0 # roughly [-1, 1]
|
|
#label_verts_tensor = torch.tensor(label_verts, dtype=torch.float32)
|
|
#return img_local, label_verts_tensor, label_6dof_tensor
|
|
label_verts_tensor = torch.tensor(M['verts'], dtype=torch.float32)
|
|
label_Rt_tensor = torch.tensor(M['R_t'], dtype=torch.float32)
|
|
d['verts'] = label_verts_tensor
|
|
d['rt'] = label_Rt_tensor
|
|
#return img_local, img_global, label_verts_tensor, label_Rt_tensor, tform_tensor
|
|
#return img_local, label_verts_tensor, label_Rt_tensor, tform_tensor
|
|
else:
|
|
#return img_local, img_global, tform_tensor
|
|
index_tensor = torch.tensor(index, dtype=torch.long)
|
|
d['index'] = index_tensor
|
|
#return img_local, tform_tensor, index_tensor
|
|
return d
|
|
|
|
|
|
def __len__(self):
|
|
return len(self.df)
|
|
|
|
class MXFaceDataset(Dataset):
|
|
def __init__(self, cfg, is_train=True, norm_6dof=True, degrees_6dof=False, local_rank=0):
|
|
super(MXFaceDataset, self).__init__()
|
|
|
|
|
|
self.is_train = is_train
|
|
self.data_root = Path(cfg.root_dir)
|
|
self.input_size = cfg.input_size
|
|
self.transform = get_aug_transform(cfg)
|
|
self.local_rank = local_rank
|
|
self.use_trainval = cfg.use_trainval
|
|
self.use_eye = cfg.eyes is not None
|
|
if is_train:
|
|
#self.df = pd.read_csv(osp.join(cfg.cache_dir, 'train_list.csv'), dtype={'subject_id': str, 'facial_action': str, 'img_id': str})
|
|
path_imgrec = os.path.join(cfg.cache_dir, 'train.rec')
|
|
path_imgidx = os.path.join(cfg.cache_dir, 'train.idx')
|
|
self.imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')
|
|
self.imgidx = list(self.imgrec.keys)
|
|
self.imggroup = [0] * len(self.imgidx)
|
|
self.size_train = len(self.imgidx)
|
|
if self.use_trainval:
|
|
assert not cfg.sampling_hard
|
|
path_imgrec = os.path.join(cfg.cache_dir, 'val.rec')
|
|
path_imgidx = os.path.join(cfg.cache_dir, 'val.idx')
|
|
self.imgrec2 = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')
|
|
imgidx2 = list(self.imgrec2.keys)
|
|
self.imggroup += [1] * len(imgidx2)
|
|
self.imgidx += imgidx2
|
|
else:
|
|
#self.df = pd.read_csv(osp.join(cfg.cache_dir, 'val_list.csv'), dtype={'subject_id': str, 'facial_action': str, 'img_id': str})
|
|
path_imgrec = os.path.join(cfg.cache_dir, 'val.rec')
|
|
path_imgidx = os.path.join(cfg.cache_dir, 'val.idx')
|
|
self.imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')
|
|
self.imgidx = list(self.imgrec.keys)
|
|
self.imggroup = [0] * len(self.imgidx)
|
|
self.imgidx = np.array(self.imgidx)
|
|
self.imggroup = np.array(self.imggroup)
|
|
if cfg.sampling_hard and is_train:
|
|
meta = np.load(os.path.join(cfg.cache_dir, 'train.meta.npy'))
|
|
assert meta.shape[0]==len(self.imgidx)
|
|
new_imgidx = []
|
|
for i in range(len(self.imgidx)):
|
|
idx = self.imgidx[i]
|
|
assert i==idx
|
|
pose = np.abs(meta[i,:2])
|
|
#repeat = np.sum(pose>=35)*3+1
|
|
if np.max(pose)<15:
|
|
repeat = 2
|
|
else:
|
|
repeat = 1
|
|
new_imgidx += [idx]*repeat
|
|
if local_rank==0:
|
|
print('new-imgidx:', len(self.imgidx), len(new_imgidx))
|
|
self.imgidx = np.array(new_imgidx)
|
|
self.label_6dof_mean = [-0.018197, -0.017891, 0.025348, -0.005368, 0.001176, -0.532206] # mean of pitch, yaw, roll, tx, ty, tz
|
|
self.label_6dof_std = [0.314015, 0.271809, 0.081881, 0.022173, 0.048839, 0.065444] # std of pitch, yaw, roll, tx, ty, tz
|
|
txt_path = self.data_root / 'resources/projection_matrix.txt'
|
|
self.M_proj = np.loadtxt(txt_path, dtype=np.float32)
|
|
self.M1 = np.array([
|
|
[400.0, 0, 0, 0],
|
|
[ 0, 400.0, 0, 0],
|
|
[ 0, 0, 1, 0],
|
|
[400.0, 400.0, 0, 1]
|
|
])
|
|
self.dst_pts = np.float32([
|
|
[0, 0],
|
|
[0, cfg.input_size- 1],
|
|
[cfg.input_size- 1, 0]
|
|
])
|
|
self.norm_6dof = norm_6dof
|
|
self.degrees_6dof = degrees_6dof
|
|
self.task = cfg.task
|
|
self.num_verts = cfg.num_verts
|
|
self.loss_pip = cfg.loss_pip
|
|
self.net_stride = 32
|
|
if local_rank==0:
|
|
logging.info('data_transform_list:%s'%self.transform)
|
|
logging.info('len:%d'%len(self.imgidx))
|
|
logging.info('glen:%d'%len(self.imggroup))
|
|
self.is_test_aug = False
|
|
|
|
self.enable_flip = cfg.enable_flip
|
|
self.flipindex = cfg.flipindex.copy()
|
|
self.verts3d_central_index = cfg.verts3d_central_index
|
|
self.eye_dataset = None
|
|
self.use_eye = False
|
|
if cfg.eyes is not None:
|
|
#from eye_dataset import EyeDataset
|
|
#self.eye_dataset = EyeDataset(cfg.eyes['root'], load_data=False)
|
|
self.use_eye = True
|
|
|
|
def set_test_aug(self):
|
|
if not self.is_test_aug:
|
|
from easydict import EasyDict as edict
|
|
cfg = edict()
|
|
cfg.aug_modes = ['test-aug']
|
|
cfg.input_size = self.input_size
|
|
cfg.task = 0
|
|
self.transform = get_aug_transform(cfg)
|
|
self.is_test_aug = True
|
|
|
|
def __getitem__(self, index):
|
|
idx = self.imgidx[index]
|
|
group = self.imggroup[index]
|
|
if group==0:
|
|
imgrec = self.imgrec
|
|
elif group==1:
|
|
imgrec = self.imgrec2
|
|
elif group==2:
|
|
imgrec = self.imgrec3
|
|
|
|
s = imgrec.read_idx(idx)
|
|
header, img = mx.recordio.unpack(s)
|
|
hlabel = header.label
|
|
img = mx.image.imdecode(img).asnumpy() #rgb numpy
|
|
|
|
label_verts = np.array(hlabel[:1220*3], dtype=np.float32).reshape(-1,3)
|
|
label_Rt = np.array(hlabel[1220*3:1220*3+16], dtype=np.float32).reshape(4,4)
|
|
label_tform = np.array(hlabel[1220*3+16:1220*3+16+6], dtype=np.float32).reshape(2,3)
|
|
label_6dof = Rt26dof(label_Rt, self.degrees_6dof)
|
|
if self.norm_6dof:
|
|
label_6dof = (label_6dof - self.label_6dof_mean) / self.label_6dof_std
|
|
label_6dof_tensor = torch.tensor(label_6dof, dtype=torch.float32)
|
|
el_inv = None
|
|
er_inv = None
|
|
if self.use_eye:
|
|
a = 1220*3+16+6
|
|
el_inv = np.array(hlabel[a:a+481*3], dtype=np.float32).reshape(-1,3)
|
|
a+=481*3
|
|
er_inv = np.array(hlabel[a:a+481*3], dtype=np.float32).reshape(-1,3)
|
|
#el_inv = torch.tensor(el_inv, dtype=torch.float32)
|
|
#er_inv = torch.tensor(er_inv, dtype=torch.float32)
|
|
#eye_verts = [el_inv, er_inv]
|
|
eye_verts = np.concatenate( (el_inv, er_inv), axis=0 )
|
|
|
|
#img_local = None
|
|
img_raw = None
|
|
#if self.task==0 or self.task==2:
|
|
# img_raw = img[:,self.input_size:,:]
|
|
#if self.task==0 or self.task==1 or self.task==3:
|
|
# img_local = img[:,:self.input_size,:]
|
|
assert img.shape[0]==img.shape[1] and img.shape[0]>=self.input_size
|
|
if img.shape[0]>self.input_size:
|
|
scale = float(self.input_size) / img.shape[0]
|
|
#print('scale:', scale)
|
|
#src_pts = np.float32([
|
|
# [0, 0],
|
|
# [0, 799],
|
|
# [799, 0]
|
|
#])
|
|
#tform = cv2.getAffineTransform(src_pts, self.dst_pts)
|
|
#new_tform = np.identity(3)
|
|
#new_tform[:2,:3] = tform
|
|
#label_tform = np.dot(new_tform, label_tform.T).T
|
|
|
|
src_pts = np.float32([
|
|
[0, 0, 1],
|
|
[0, 799, 1],
|
|
[799, 0, 1]
|
|
])
|
|
dst_pts = np.dot(label_tform, src_pts.T).T
|
|
dst_pts *= scale
|
|
dst_pts = dst_pts.copy()
|
|
src_pts = src_pts[:,:2].copy()
|
|
#print('index:', index)
|
|
#print(src_pts.shape, dst_pts.shape)
|
|
#print(label_tform.shape)
|
|
#print(src_pts.dtype)
|
|
#print(dst_pts.dtype)
|
|
tform = cv2.getAffineTransform(src_pts, dst_pts)
|
|
label_tform = tform
|
|
|
|
img = cv2.resize(img, (self.input_size, self.input_size))
|
|
|
|
img_local = img
|
|
need_points2d = (self.task==0 or self.task==3)
|
|
|
|
if need_points2d:
|
|
ones = np.ones([label_verts.shape[0], 1])
|
|
verts_homo = np.concatenate([label_verts, ones], axis=1)
|
|
verts = verts_homo @ label_Rt @ self.M_proj @ self.M1
|
|
w_ = verts[:, [3]]
|
|
verts = verts / w_
|
|
points2d = verts[:, :3]
|
|
points2d[:, 1] = 800.0 - points2d[:, 1]
|
|
verts2d = points2d[:,:2].copy()
|
|
points2d[:,2] = 1.0
|
|
points2d = np.dot(label_tform, points2d.T).T
|
|
else:
|
|
points2d = np.ones( (1,2), dtype=np.float32) * (self.input_size//2)
|
|
if self.use_eye:
|
|
verts_homo = eye_verts
|
|
if verts_homo.shape[1] == 3:
|
|
ones = np.ones([verts_homo.shape[0], 1])
|
|
verts_homo = np.concatenate([verts_homo, ones], axis=1)
|
|
verts_out = verts_homo @ label_Rt @ self.M_proj @ self.M1
|
|
w_ = verts_out[:, [3]]
|
|
verts_out = verts_out / w_
|
|
_points2d = verts_out[:, :3]
|
|
_points2d[:, 1] = 800.0 - _points2d[:, 1]
|
|
_points2d[:,2] = 1.0
|
|
_points2d = np.dot(label_tform, _points2d.T).T
|
|
eye_points = _points2d
|
|
#if img.shape[0]!=self.input_size:
|
|
# assert img.shape[0]>self.input_size
|
|
#img = cv2.resize(img, (self.input_size, self.input_size))
|
|
#scale = float(self.input_size) / img.shape[0]
|
|
#points2d *= scale
|
|
|
|
if self.transform is not None:
|
|
if img_raw is not None:
|
|
img_raw = self.transform(image=img_raw, keypoints=points2d)['image']
|
|
if img_local is not None:
|
|
height, width = img_local.shape[:2]
|
|
x = self.transform(image=img_local, keypoints=points2d)
|
|
img_local = x['image']
|
|
points2d = x['keypoints']
|
|
points2d = np.array(points2d, dtype=np.float32)
|
|
if self.is_test_aug:
|
|
for trans in x["replay"]["transforms"]:
|
|
if trans['__class_fullname__']=='ShiftScaleRotate' and trans['applied']:
|
|
param = trans['params']
|
|
dx, dy, angle, scale = param['dx'], param['dy'], param['angle'], param['scale']
|
|
center = (width / 2, height / 2)
|
|
matrix = cv2.getRotationMatrix2D(center, angle, scale)
|
|
matrix[0, 2] += dx * width
|
|
matrix[1, 2] += dy * height
|
|
new_matrix = np.identity(3)
|
|
new_matrix[:2,:3] = matrix
|
|
old_tform = np.identity(3)
|
|
old_tform[:2,:3] = label_tform
|
|
#new_tform = np.dot(old_tform, new_matrix)
|
|
new_tform = np.dot(new_matrix, old_tform)
|
|
#print('label_tform:')
|
|
#print(label_tform.flatten())
|
|
#print(new_matrix.flatten())
|
|
#print(new_tform.flatten())
|
|
label_tform = new_tform[:2,:3]
|
|
break
|
|
#print('trans param:', param)
|
|
|
|
|
|
if self.loss_pip:
|
|
target_map = np.zeros((self.num_verts, int(self.input_size/self.net_stride), int(self.input_size/self.net_stride)))
|
|
target_local_x = np.zeros((self.num_verts, int(self.input_size/self.net_stride), int(self.input_size/self.net_stride)))
|
|
target_local_y = np.zeros((self.num_verts, int(self.input_size/self.net_stride), int(self.input_size/self.net_stride)))
|
|
target = points2d / self.input_size
|
|
target_map, target_local_x, target_local_y = gen_target_pip(target, target_map, target_local_x, target_local_y)
|
|
target_map_tensor = torch.tensor(target_map, dtype=torch.float32)
|
|
target_x_tensor = torch.tensor(target_local_x, dtype=torch.float32)
|
|
target_y_tensor = torch.tensor(target_local_y, dtype=torch.float32)
|
|
d['pip_map'] = target_map_tensor
|
|
d['pip_x'] = target_x_tensor
|
|
d['pip_y'] = target_y_tensor
|
|
|
|
if self.is_train and self.enable_flip and np.random.random()<0.5:
|
|
#if self.local_rank==0:
|
|
# print('XXX:', label_verts[:5,:2])
|
|
img_local = img_local.flip([2])
|
|
x_of_central = 0.0
|
|
#x_of_central = label_verts[self.verts3d_central_index,0]
|
|
#x_of_central = np.mean(x_of_central)
|
|
label_verts = label_verts[self.flipindex,:]
|
|
label_verts[:,0] -= x_of_central
|
|
label_verts[:,0] *= -1.0
|
|
label_verts[:,0] += x_of_central
|
|
|
|
if need_points2d:
|
|
flipped_p2d = points2d[self.flipindex,:].copy()
|
|
flipped_p2d[:,0] = self.input_size - 1 - flipped_p2d[:,0]
|
|
points2d = flipped_p2d
|
|
if self.use_eye:
|
|
flipped_p2d = eye_points[self.flipindex,:].copy()
|
|
flipped_p2d[:,0] = self.input_size - 1 - flipped_p2d[:,0]
|
|
eye_points = flipped_p2d
|
|
label_verts_tensor = torch.tensor(label_verts*10.0, dtype=torch.float32)
|
|
d = {}
|
|
d['img_local'] = img_local
|
|
d['verts'] = label_verts_tensor
|
|
d['6dof'] = label_6dof_tensor
|
|
d['rt'] = torch.tensor(label_Rt, dtype=torch.float32)
|
|
if need_points2d:
|
|
points2d = points2d / (self.input_size//2) - 1.0
|
|
points2d_tensor = torch.tensor(points2d, dtype=torch.float32)
|
|
d['points2d'] = points2d_tensor
|
|
if self.use_eye:
|
|
d['eye_verts'] = torch.tensor(eye_verts, dtype=torch.float32)
|
|
eye_points = eye_points / (self.input_size//2) - 1.0
|
|
eye_points_tensor = torch.tensor(eye_points, dtype=torch.float32)
|
|
d['eye_points'] = eye_points_tensor
|
|
|
|
loss_weight = 1.0
|
|
if group!=0:
|
|
loss_weight = 0.0
|
|
loss_weight_tensor = torch.tensor(loss_weight, dtype=torch.float32)
|
|
d['loss_weight'] = loss_weight_tensor
|
|
label_tform_tensor = torch.tensor(label_tform, dtype=torch.float32)
|
|
d['tform'] = label_tform_tensor
|
|
|
|
#if img_local is None:
|
|
# image = (img_raw,)
|
|
#elif img_raw is None:
|
|
# image = (img_local,)
|
|
#else:
|
|
# image = (img_local,img_raw)
|
|
#ret = image + (label_verts_tensor, label_6dof_tensor, points2d_tensor)
|
|
if not self.is_train:
|
|
idx_tensor = torch.tensor([idx], dtype=torch.long)
|
|
d['idx'] = idx_tensor
|
|
d['verts2d'] = torch.tensor(verts2d, dtype=torch.float32)
|
|
return d
|
|
|
|
|
|
def __len__(self):
|
|
return len(self.imgidx)
|
|
|
|
def test_dataset1(cfg):
|
|
cfg.task = 0
|
|
is_train = False
|
|
center_axis = []
|
|
dataset = MXFaceDataset(cfg, is_train=is_train, norm_6dof=False, local_rank=0)
|
|
for i in range(len(dataset.flipindex)):
|
|
if i==dataset.flipindex[i]:
|
|
center_axis.append(i)
|
|
print(center_axis)
|
|
#dataset.transform = None
|
|
print('total:', len(dataset))
|
|
total = len(dataset)
|
|
#total = 50
|
|
list_6dof = []
|
|
all_mean_xs = []
|
|
for idx in range(total):
|
|
#img_local, img_raw, label_verts, label_6dof, = dataset[idx]
|
|
#img_local, img_raw, label_verts, label_6dof, points2d, tform, data_idx = dataset[idx]
|
|
#img_local, label_verts, label_6dof, points2d, tform, data_idx = dataset[idx]
|
|
d = dataset[idx]
|
|
img_local = d['img_local']
|
|
label_verts = d['verts']
|
|
label_6dof = d['6dof']
|
|
points2d = d['points2d']
|
|
label_verts = label_verts.numpy()
|
|
label_6dof = label_6dof.numpy()
|
|
points2d = points2d.numpy()
|
|
#print(img_local.shape, label_verts.shape, label_6dof.shape, points2d.shape)
|
|
verts3d = label_verts / 10.0
|
|
xs = []
|
|
for c in center_axis:
|
|
_x = verts3d[c,0]
|
|
xs.append(_x)
|
|
_std = np.std(xs)
|
|
print(xs)
|
|
print(_std)
|
|
#print(np.mean(xs))
|
|
all_mean_xs.append(np.mean(xs))
|
|
if idx%100==0:
|
|
print('processing:', idx, np.mean(all_mean_xs))
|
|
#print(label_verts[:3,:], label_6dof)
|
|
#list_6dof.append(label_6dof)
|
|
#print(image.__class__, label_verts.__class__)
|
|
#label = list(label_verts.numpy().flatten()) + list(label_6dof.numpy().flatten())
|
|
#points2d = label_verts2[:,:2]
|
|
#points2d = (points2d+1) * 128.0
|
|
#img_local = img_local.numpy()
|
|
#img_local = (img_local+1.0) * 128.0
|
|
#draw = img_local.astype(np.uint8).transpose( (1,2,0) )[:,:,::-1].copy()
|
|
#for i in range(points2d.shape[0]):
|
|
# pt = points2d[i].astype(np.int)
|
|
# cv2.circle(draw, pt, 2, (255,0,0), 2)
|
|
##output_path = "outputs/%d_%.3f_%.3f_%.3f.jpg"%(idx, label_6dof[0], label_6dof[1], label_6dof[2])
|
|
#output_path = "outputs/%06d.jpg"%(idx)
|
|
#cv2.imwrite(output_path, draw)
|
|
#list_6dof = np.array(list_6dof)
|
|
#print('MEAN:')
|
|
#print(np.mean(list_6dof, axis=0))
|
|
|
|
def test_loader1(cfg):
|
|
cfg.task = 0
|
|
is_train = True
|
|
dataset = MXFaceDataset(cfg, is_train=is_train, norm_6dof=False, local_rank=0)
|
|
loader = DataLoader(dataset, batch_size=64, shuffle=True)
|
|
for index, d in enumerate(loader):
|
|
#img_local = d['img_local']
|
|
label_verts = d['verts']
|
|
points2d = d['points2d']
|
|
tform = d['tform']
|
|
label_verts /= 10.0
|
|
points2d = (points2d + 1.0) * (cfg.input_size//2)
|
|
tform = tform.numpy()
|
|
verts = label_verts.numpy()
|
|
points2d = points2d.numpy()
|
|
print(verts.shape, points2d.shape, tform.shape)
|
|
np.save("temp/verts3d.npy", verts)
|
|
np.save("temp/points2d.npy", points2d)
|
|
np.save("temp/tform.npy", tform)
|
|
break
|
|
|
|
def test_facedataset1(cfg):
|
|
cfg.task = 0
|
|
cfg.input_size = 512
|
|
dataset = FaceDataset(cfg, is_train=True, local_rank=0)
|
|
for idx in range(100000):
|
|
img_local, label_verts, label_Rt, tform = dataset[idx]
|
|
label_Rt = label_Rt.numpy()
|
|
if label_Rt[0,0]>1.0:
|
|
print(idx, label_Rt.shape)
|
|
print(label_Rt)
|
|
break
|
|
|
|
def test_arcface(cfg):
|
|
cfg.task = 0
|
|
is_train = True
|
|
dataset = MXFaceDataset(cfg, is_train=is_train, norm_6dof=False, local_rank=0)
|
|
loader = DataLoader(dataset, batch_size=1, shuffle=True)
|
|
for index, d in enumerate(loader):
|
|
img = d['img_local'].numpy()
|
|
img /= 2.0
|
|
img += 0.5
|
|
img *= 255.0
|
|
img = img[0]
|
|
img = img.transpose( (1,2,0) )
|
|
img = img.astype(np.uint8)
|
|
img = cv2.resize(img, (144,144))
|
|
img = img[:,:,::-1]
|
|
img = img[8:120,16:128,:]
|
|
print(img.shape)
|
|
cv2.imwrite("temp/arc_%d.jpg"%index, img)
|
|
#np.save("temp/verts3d.npy", verts)
|
|
#np.save("temp/points2d.npy", points2d)
|
|
#np.save("temp/tform.npy", tform)
|
|
if index>100:
|
|
break
|
|
|
|
def test_dataset2(cfg):
|
|
cfg.task = 0
|
|
is_train = False
|
|
center_axis = []
|
|
dataset = MXFaceDataset(cfg, is_train=is_train, norm_6dof=False, local_rank=0)
|
|
for i in range(len(dataset.flipindex)):
|
|
if i==dataset.flipindex[i]:
|
|
center_axis.append(i)
|
|
print(center_axis)
|
|
#dataset.transform = None
|
|
print('total:', len(dataset))
|
|
total = len(dataset)
|
|
total = 50
|
|
list_6dof = []
|
|
all_mean_xs = []
|
|
for idx in range(total):
|
|
d = dataset[idx]
|
|
img_local = d['img_local']
|
|
label_verts = d['verts']
|
|
label_6dof = d['6dof']
|
|
points2d = d['points2d']
|
|
label_verts = label_verts.numpy()
|
|
label_6dof = label_6dof.numpy()
|
|
points2d = points2d.numpy()
|
|
eye_points = d['eye_points'].numpy()
|
|
eye_verts = d['eye_verts'].numpy()
|
|
print(eye_verts[:5,:])
|
|
#print(img_local.shape, label_verts.shape, label_6dof.shape, points2d.shape)
|
|
verts3d = label_verts / 10.0
|
|
#print(label_verts[:3,:], label_6dof)
|
|
#list_6dof.append(label_6dof)
|
|
#print(image.__class__, label_verts.__class__)
|
|
#label = list(label_verts.numpy().flatten()) + list(label_6dof.numpy().flatten())
|
|
#points2d = label_verts2[:,:2]
|
|
points2d = (points2d+1) * 128.0
|
|
eye_points = (eye_points+1) * 128.0
|
|
img_local = img_local.numpy()
|
|
img_local = (img_local+1.0) * 128.0
|
|
draw = img_local.astype(np.uint8).transpose( (1,2,0) )[:,:,::-1].copy()
|
|
for i in range(points2d.shape[0]):
|
|
pt = points2d[i].astype(np.int)
|
|
cv2.circle(draw, pt, 2, (255,0,0), 2)
|
|
for i in range(eye_points.shape[0]):
|
|
pt = eye_points[i].astype(np.int)
|
|
cv2.circle(draw, pt, 2, (0,255,0), 2)
|
|
##output_path = "outputs/%d_%.3f_%.3f_%.3f.jpg"%(idx, label_6dof[0], label_6dof[1], label_6dof[2])
|
|
output_path = "outputs/%06d.jpg"%(idx)
|
|
cv2.imwrite(output_path, draw)
|
|
#list_6dof = np.array(list_6dof)
|
|
#print('MEAN:')
|
|
#print(np.mean(list_6dof, axis=0))
|
|
|
|
if __name__ == "__main__":
|
|
from utils.utils_config import get_config
|
|
#cfg = get_config('configs/r0_a1.py')
|
|
cfg = get_config('configs/s2')
|
|
#test_loader1(cfg)
|
|
#test_facedataset1(cfg)
|
|
#test_arcface(cfg)
|
|
test_dataset2(cfg)
|
|
|
|
|