mirror of
https://github.com/deepinsight/insightface.git
synced 2026-05-15 04:37:50 +00:00
241 lines
9.6 KiB
Python
241 lines
9.6 KiB
Python
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
from scipy import misc
|
|
import sys
|
|
import os
|
|
import argparse
|
|
import tensorflow as tf
|
|
import numpy as np
|
|
#import facenet
|
|
import detect_face
|
|
import random
|
|
from time import sleep
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
|
|
import face_image
|
|
from skimage import transform as trans
|
|
import cv2
|
|
|
|
def to_rgb(img):
|
|
w, h = img.shape
|
|
ret = np.empty((w, h, 3), dtype=np.uint8)
|
|
ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
|
|
return ret
|
|
|
|
|
|
def IOU(Reframe,GTframe):
|
|
x1 = Reframe[0];
|
|
y1 = Reframe[1];
|
|
width1 = Reframe[2]-Reframe[0];
|
|
height1 = Reframe[3]-Reframe[1];
|
|
|
|
x2 = GTframe[0]
|
|
y2 = GTframe[1]
|
|
width2 = GTframe[2]-GTframe[0]
|
|
height2 = GTframe[3]-GTframe[1]
|
|
|
|
endx = max(x1+width1,x2+width2)
|
|
startx = min(x1,x2)
|
|
width = width1+width2-(endx-startx)
|
|
|
|
endy = max(y1+height1,y2+height2)
|
|
starty = min(y1,y2)
|
|
height = height1+height2-(endy-starty)
|
|
|
|
if width <=0 or height <= 0:
|
|
ratio = 0
|
|
else:
|
|
Area = width*height
|
|
Area1 = width1*height1
|
|
Area2 = width2*height2
|
|
ratio = Area*1./(Area1+Area2-Area)
|
|
return ratio
|
|
|
|
|
|
def main(args):
|
|
output_dir = os.path.expanduser(args.output_dir)
|
|
if not os.path.exists(output_dir):
|
|
os.makedirs(output_dir)
|
|
# Store some git revision info in a text file in the log directory
|
|
src_path,_ = os.path.split(os.path.realpath(__file__))
|
|
#facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
|
|
dataset = face_image.get_dataset(args.name, args.input_dir)
|
|
print('dataset size', args.name, len(dataset))
|
|
|
|
print('Creating networks and loading parameters')
|
|
|
|
with tf.Graph().as_default():
|
|
#gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
|
|
#sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
|
|
sess = tf.Session()
|
|
with sess.as_default():
|
|
pnet, rnet, onet = detect_face.create_mtcnn(sess, None)
|
|
|
|
minsize = 100 # minimum size of face
|
|
threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold
|
|
factor = 0.709 # scale factor
|
|
#image_size = [112,96]
|
|
image_size = [112,112]
|
|
src = np.array([
|
|
[30.2946, 51.6963],
|
|
[65.5318, 51.5014],
|
|
[48.0252, 71.7366],
|
|
[33.5493, 92.3655],
|
|
[62.7299, 92.2041] ], dtype=np.float32 )
|
|
|
|
if image_size[1]==112:
|
|
src[:,0] += 8.0
|
|
|
|
# Add a random key to the filename to allow alignment using multiple processes
|
|
#random_key = np.random.randint(0, high=99999)
|
|
#bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
|
|
#output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name)
|
|
if not os.path.exists(args.output_dir):
|
|
os.makedirs(args.output_dir)
|
|
|
|
output_filename = os.path.join(args.output_dir, 'lst')
|
|
|
|
|
|
with open(output_filename, "w") as text_file:
|
|
nrof_images_total = 0
|
|
nrof = np.zeros( (5,), dtype=np.int32)
|
|
for fimage in dataset:
|
|
if nrof_images_total%100==0:
|
|
print("Processing %d, (%s)" % (nrof_images_total, nrof))
|
|
nrof_images_total += 1
|
|
#if nrof_images_total<950000:
|
|
# continue
|
|
image_path = fimage.image_path
|
|
if not os.path.exists(image_path):
|
|
print('image not found (%s)'%image_path)
|
|
continue
|
|
filename = os.path.splitext(os.path.split(image_path)[1])[0]
|
|
#print(image_path)
|
|
try:
|
|
img = misc.imread(image_path)
|
|
except (IOError, ValueError, IndexError) as e:
|
|
errorMessage = '{}: {}'.format(image_path, e)
|
|
print(errorMessage)
|
|
else:
|
|
if img.ndim<2:
|
|
print('Unable to align "%s", img dim error' % image_path)
|
|
#text_file.write('%s\n' % (output_filename))
|
|
continue
|
|
if img.ndim == 2:
|
|
img = to_rgb(img)
|
|
img = img[:,:,0:3]
|
|
_paths = fimage.image_path.split('/')
|
|
a,b,c = _paths[-3], _paths[-2], _paths[-1]
|
|
target_dir = os.path.join(args.output_dir, a, b)
|
|
if not os.path.exists(target_dir):
|
|
os.makedirs(target_dir)
|
|
target_file = os.path.join(target_dir, c)
|
|
warped = None
|
|
if fimage.landmark is not None:
|
|
dst = fimage.landmark.astype(np.float32)
|
|
|
|
tform = trans.SimilarityTransform()
|
|
tform.estimate(dst, src[0:3,:]*1.5+image_size[0]*0.25)
|
|
M = tform.params[0:2,:]
|
|
warped0 = cv2.warpAffine(img,M,(image_size[1]*2,image_size[0]*2), borderValue = 0.0)
|
|
_minsize = image_size[0]
|
|
bounding_boxes, points = detect_face.detect_face(warped0, _minsize, pnet, rnet, onet, threshold, factor)
|
|
if bounding_boxes.shape[0]>0:
|
|
bindex = 0
|
|
det = bounding_boxes[bindex,0:4]
|
|
#points need to be transpose, points = points.reshape( (5,2) ).transpose()
|
|
dst = points[:, bindex].reshape( (2,5) ).T
|
|
tform = trans.SimilarityTransform()
|
|
tform.estimate(dst, src)
|
|
M = tform.params[0:2,:]
|
|
warped = cv2.warpAffine(warped0,M,(image_size[1],image_size[0]), borderValue = 0.0)
|
|
nrof[0]+=1
|
|
#assert fimage.bbox is not None
|
|
if warped is None and fimage.bbox is not None:
|
|
_minsize = img.shape[0]//4
|
|
bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor)
|
|
if bounding_boxes.shape[0]>0:
|
|
det = bounding_boxes[:,0:4]
|
|
bindex = -1
|
|
index2 = [0.0, 0]
|
|
for i in xrange(det.shape[0]):
|
|
_det = det[i]
|
|
iou = IOU(fimage.bbox, _det)
|
|
if iou>index2[0]:
|
|
index2[0] = iou
|
|
index2[1] = i
|
|
if index2[0]>0.3:
|
|
bindex = index2[1]
|
|
if bindex>=0:
|
|
dst = points[:, bindex].reshape( (2,5) ).T
|
|
tform = trans.SimilarityTransform()
|
|
tform.estimate(dst, src)
|
|
M = tform.params[0:2,:]
|
|
warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0)
|
|
nrof[1]+=1
|
|
#print('1',target_file,index2[0])
|
|
if warped is None and fimage.bbox is not None:
|
|
bb = fimage.bbox
|
|
#croped = img[bb[1]:bb[3],bb[0]:bb[2],:]
|
|
bounding_boxes, points = detect_face.detect_face_force(img, bb, pnet, rnet, onet)
|
|
assert bounding_boxes.shape[0]==1
|
|
_box = bounding_boxes[0]
|
|
if _box[4]>=0.3:
|
|
dst = points[:, 0].reshape( (2,5) ).T
|
|
tform = trans.SimilarityTransform()
|
|
tform.estimate(dst, src)
|
|
M = tform.params[0:2,:]
|
|
warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0)
|
|
nrof[2]+=1
|
|
#print('2',target_file)
|
|
|
|
if warped is None:
|
|
roi = np.zeros( (4,), dtype=np.int32)
|
|
roi[0] = int(img.shape[1]*0.06)
|
|
roi[1] = int(img.shape[0]*0.06)
|
|
roi[2] = img.shape[1]-roi[0]
|
|
roi[3] = img.shape[0]-roi[1]
|
|
if fimage.bbox is not None:
|
|
bb = fimage.bbox
|
|
h = bb[3]-bb[1]
|
|
w = bb[2]-bb[0]
|
|
x = bb[0]
|
|
y = bb[1]
|
|
#roi = np.copy(bb)
|
|
_w = int( (float(h)/image_size[0])*image_size[1] )
|
|
x += (w-_w)//2
|
|
#x = min( max(0,x), img.shape[1] )
|
|
x = max(0,x)
|
|
xw = x+_w
|
|
xw = min(xw, img.shape[1])
|
|
roi = np.array( (x, y, xw, y+h), dtype=np.int32)
|
|
nrof[3]+=1
|
|
else:
|
|
nrof[4]+=1
|
|
#print('3',bb,roi,img.shape)
|
|
#print('3',target_file)
|
|
warped = img[roi[1]:roi[3],roi[0]:roi[2],:]
|
|
#print(warped.shape)
|
|
warped = cv2.resize(warped, (image_size[1], image_size[0]))
|
|
bgr = warped[...,::-1]
|
|
cv2.imwrite(target_file, bgr)
|
|
oline = '%d\t%s\t%d\n' % (1,target_file, int(fimage.classname))
|
|
text_file.write(oline)
|
|
|
|
|
|
def parse_arguments(argv):
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument('--input-dir', type=str, help='Directory with unaligned images.')
|
|
parser.add_argument('--name', type=str, help='dataset name, can be facescrub, megaface, webface, celeb.')
|
|
parser.add_argument('--output-dir', type=str, help='Directory with aligned face thumbnails.')
|
|
#parser.add_argument('--image_size', type=str, help='Image size (height, width) in pixels.', default='112,112')
|
|
#parser.add_argument('--margin', type=int,
|
|
# help='Margin for the crop around the bounding box (height, width) in pixels.', default=44)
|
|
return parser.parse_args(argv)
|
|
|
|
if __name__ == '__main__':
|
|
main(parse_arguments(sys.argv[1:]))
|
|
|