Initial Matlab version of MTCNN.

This commit is contained in:
Tadas Baltrusaitis
2017-08-07 14:49:29 -04:00
parent cdd258330f
commit 6a613dffb6
10 changed files with 430 additions and 1 deletions

Binary file not shown.

View File

@@ -0,0 +1,20 @@
function [ out_map ] = PReLU( input_maps, PReLU_params )
%PRELU Summary of this function goes here
% Detailed explanation goes here
out_map = [];
if(numel(size(input_maps)) > 2)
for i=1:size(input_maps,3)
in_map = input_maps(:,:,i,:);
in_map(in_map < 0) = in_map(in_map<0) * PReLU_params(i);
out_map = cat(3, out_map, in_map);
end
else
for i=1:size(input_maps,2)
in_map = input_maps(:,i);
in_map(in_map < 0) = in_map(in_map<0) * PReLU_params(i);
out_map = cat(2, out_map, in_map);
end
end
end

View File

@@ -0,0 +1,22 @@
function [ output_maps ] = convolution( input_maps, kernels, biases )
%CONVOLUTION Summary of this function goes here
% Detailed explanation goes here
% n_filters = size(kernels, 4);
% kernels2 = kernels(:,:,end:-1:1,:);
% for i=1:n_filters
% for n_in_maps=1:size(kernels,3)
% kernels2(:,:,n_in_maps,i) = fliplr(squeeze(kernels2(:,:,n_in_maps,i)));
% kernels2(:,:,n_in_maps,i) = flipud(squeeze(kernels2(:,:,n_in_maps,i)));
% end
% end
% output_maps_1 = [];
% for i=1:n_filters
% output_maps_1 = cat(3, output_maps_1, convn(input_maps, kernels2(:,:,:,i), 'valid') + biases(i));
% end
output_maps = vl_nnconv(single(input_maps), kernels, biases);
end

View File

@@ -0,0 +1,178 @@
clear;
% Make sure we have the dependencies for convolution
od = cd('../../face_validation');
setup;
cd(od);
img = imread('test1.jpg');
height_orig = size(img,1);
width_orig = size(img,2);
% Everything is done in floats
img = single(img);
% Minimum face size
min_face_size = 30;
% Image pyramid scaling factor
factor = 0.709;
% Thresholds for the PNet, ONet, and RNet
threshold=[0.6 0.7 0.7];
min_dim = min([width_orig height_orig]);
% Face support region is 12x12 px, so from that can work out the largest
% scale (which is 12 / min), and work down from there to smallest scale (no smaller than
% 12x12px)
face_support = 12;
num_scales = floor(log(min_face_size / min_dim) / log(factor));
scales = (face_support / min_face_size)*factor.^(0:num_scales);
load('PNet_mlab');
load('RNet_mlab');
load('ONet_mlab');
total_bboxes = [];
% First the PNet stage on image pyramid
for s = scales
h_pyr = ceil(height_orig * s);
w_pyr = ceil(width_orig * s);
% Resize the image and normalize to what MTCNN expects it to be
im_data=(imresize(img, [h_pyr w_pyr],'bilinear')-127.5)*0.0078125;
[ out_prob, out_correction ] = PNet( im_data, PNet_mlab );
% Generate bounding boxes from the heatmap
bboxes = generate_bounding_boxes(out_prob, out_correction, s, threshold(1), face_support);
% TODO correct bboxes before running NMS?, as now lots of overlaping
% boxes are present
% Perform non maximum supression to remove reduntant bounding boxes
pick = non_maximum_supression(bboxes, 0.5, 'Union');
bboxes=bboxes(pick,:);
if ~isempty(bboxes)
total_bboxes = cat(1, total_bboxes, bboxes);
end
end
if ~isempty(total_bboxes)
% Non maximum supression accross bounding boxes, and their offset
% correction
total_bboxes = correct_bbox(total_bboxes(:,1:5), total_bboxes(:,6:end), false, true, true);
end
num_bbox = size(total_bboxes,1);
% RNet stage
if num_bbox > 0
proposal_imgs = zeros(24, 24, 3, num_bbox);
for k=1:num_bbox
width_target = total_bboxes(k,3) - total_bboxes(k,1) + 1;
height_target = total_bboxes(k,4) - total_bboxes(k,2) + 1;
% Work out the start and end indices in the original image
start_x_in = max(total_bboxes(k,1), 1);
start_y_in = max(total_bboxes(k,2), 1);
end_x_in = min(total_bboxes(k,3), width_orig);
end_y_in = min(total_bboxes(k,4), height_orig);
% Work out the start and end indices in the target image
start_x_out = max(-total_bboxes(k,1)+2, 1);
start_y_out = max(-total_bboxes(k,2)+2, 1);
end_x_out = min(width_target - (total_bboxes(k,3)-width_orig), width_target);
end_y_out = min(height_target - (total_bboxes(k,4)-height_orig), height_target);
tmp = zeros(height_target, width_target, 3);
tmp(start_y_out:end_y_out,start_x_out:end_x_out,:) = ...
img(start_y_in:end_y_in, start_x_in:end_x_in,:);
proposal_imgs(:,:,:,k) = imresize(tmp, [24 24], 'bilinear');
end
% Normalize the proposal images
proposal_imgs = (proposal_imgs - 127.5) * 0.0078125;
% Apply RNet to proposal faces
[ score, out_correction ] = RNet( proposal_imgs, RNet_mlab );
out_correction = out_correction';
% Find faces above the threshold
to_keep = find(score > threshold(2));
total_bboxes = [total_bboxes(to_keep,1:4) score(to_keep)'];
out_correction = out_correction(to_keep,:);
if ~isempty(total_bboxes)
% Non maximum supression accross bounding boxes, and their offset
% correction
total_bboxes = correct_bbox(total_bboxes, out_correction, true, true, true);
end
end
num_bbox = size(total_bboxes,1);
% ONet stage
if num_bbox > 0
proposal_imgs = zeros(48, 48, 3, num_bbox);
for k=1:num_bbox
width_target = total_bboxes(k,3) - total_bboxes(k,1) + 1;
height_target = total_bboxes(k,4) - total_bboxes(k,2) + 1;
% Work out the start and end indices in the original image
start_x_in = max(total_bboxes(k,1), 1);
start_y_in = max(total_bboxes(k,2), 1);
end_x_in = min(total_bboxes(k,3), width_orig);
end_y_in = min(total_bboxes(k,4), height_orig);
% Work out the start and end indices in the target image
start_x_out = max(-total_bboxes(k,1)+2, 1);
start_y_out = max(-total_bboxes(k,2)+2, 1);
end_x_out = min(width_target - (total_bboxes(k,3)-width_orig), width_target);
end_y_out = min(height_target - (total_bboxes(k,4)-height_orig), height_target);
tmp = zeros(height_target, width_target, 3);
tmp(start_y_out:end_y_out,start_x_out:end_x_out,:) = ...
img(start_y_in:end_y_in, start_x_in:end_x_in,:);
proposal_imgs(:,:,:,k) = imresize(tmp, [48 48], 'bilinear');
end
% Normalize the proposal images
proposal_imgs = (proposal_imgs - 127.5) * 0.0078125;
% Apply ONet to proposal faces
[ score, out_correction, lmarks ] = ONet( proposal_imgs, ONet_mlab );
out_correction = out_correction';
lmarks = lmarks';
% Pick the final faces above the threshold
to_keep = find(score > threshold(3));
lmarks = lmarks(to_keep, :);
out_correction = out_correction(to_keep, :);
total_bboxes = [total_bboxes(to_keep,1:4) score(to_keep)'];
% Correct for the landmarks
bbw = total_bboxes(:,3) - total_bboxes(:,1) + 1;
bbh = total_bboxes(:,4) - total_bboxes(:,2) + 1;
lmarks(:, 1:5) = bbw .* lmarks(:,1:5) + total_bboxes(:,1) - 1;
lmarks(:, 6:10) = bbh .* lmarks(:,6:10) + total_bboxes(:,2) - 1;
% Correct the bounding boxes
if size(total_bboxes,1)>0
[total_bboxes, to_keep] = correct_bbox(total_bboxes, out_correction, true, false, false);
lmarks = lmarks(to_keep, :);
end
end

View File

@@ -0,0 +1,25 @@
function [bboxes] = generate_bounding_boxes(heatmap, correction, scale, t, face_support)
%use heatmap to generate bounding boxes in the original image space
% Correction for the pooling
stride = 2;
% Offsets for, x, y, width and height
dx1=correction(:,:,1);
dy1=correction(:,:,2);
dx2=correction(:,:,3);
dy2=correction(:,:,4);
% Find the parts of a heatmap above the threshold (x, y, and indices)
[x, y]= find(heatmap >= t);
inds = find(heatmap >= t);
% Find the corresponding scores and bbox corrections
score=heatmap(inds);
correction=[dx1(inds) dy1(inds) dx2(inds) dy2(inds)];
% Correcting for Matlab's format
bboxes=[y - 1 x - 1];
bboxes=[fix((stride*(bboxes)+1)/scale) fix((stride*(bboxes)+face_support)/scale) score correction];
end

View File

@@ -0,0 +1,57 @@
function [ output_maps ] = max_pooling( input_maps)
%POOLING Summary of this function goes here
% Detailed explanation goes here
orig_rows = size(input_maps,1);
orig_cols = size(input_maps,2);
pooled_rows = ceil(orig_rows / 2);
pooled_cols = ceil(orig_cols / 2);
up_to_rows_out = floor(orig_rows / 2);
up_to_cols_out = floor(orig_cols / 2);
if(mod(orig_cols,2) == 0)
up_to_cols = orig_cols;
else
up_to_cols = orig_cols - 1;
end
if(mod(orig_rows,2) == 0)
up_to_rows = orig_rows;
else
up_to_rows = orig_rows - 1;
end
output_maps = zeros(pooled_rows, pooled_cols, size(input_maps,3));
for i=1:size(input_maps,3)
temp = im2col(input_maps(1:up_to_rows,1:up_to_cols,i), [2,2], 'distinct');
max_val = max(temp);
output_maps(1:up_to_rows_out,1:up_to_cols_out,i) = reshape(max_val, up_to_rows_out, up_to_cols_out);
end
% A bit of a hack for non-even number of rows or columns
if(mod(orig_cols,2) ~= 0)
for i=1:size(input_maps,3)
temp = im2col(input_maps(1:up_to_rows,end,i), [2,1], 'distinct');
max_val = max(temp);
output_maps(1:up_to_rows_out,end,i) = max_val;
end
end
if(mod(orig_rows,2) ~= 0)
for i=1:size(input_maps,3)
temp = im2col(input_maps(end, 1:up_to_cols,i), [1,2], 'distinct');
max_val = max(temp);
output_maps(end, 1:up_to_cols_out,i) = max_val;
end
end
if(mod(orig_cols,2) ~= 0 && mod(orig_rows,2) ~= 0)
output_maps(end,end,:) = input_maps(end,end,:);
end
end

View File

@@ -0,0 +1,66 @@
function [ output_maps ] = max_pooling2( input_maps, kernel_size, stride)
%POOLING Summary of this function goes here
% Detailed explanation goes here
orig_rows = size(input_maps,1);
orig_cols = size(input_maps,2);
pooled_rows = round((orig_rows - kernel_size)/stride) + 1;
pooled_cols = round((orig_cols - kernel_size)/stride) + 1;
up_to_rows_out = floor((orig_rows - kernel_size)/stride) + 1;
up_to_cols_out = floor((orig_cols - kernel_size)/stride) + 1;
% How many full max-pooling steps are there
up_to_cols = kernel_size + (up_to_cols_out-1) * stride;
up_to_rows = kernel_size + (up_to_rows_out-1) * stride;
output_maps = zeros(pooled_rows, pooled_cols, size(input_maps,3), size(input_maps,4));
% Pick only the striding elements
[y, x] = meshgrid(1:up_to_cols-kernel_size+1, 1:up_to_rows-kernel_size+1);
to_keep_map = mod(y, stride) == 1 & mod(x, stride) == 1;
to_keep = find(to_keep_map);
for m=1:size(input_maps,4)
for i=1:size(input_maps,3)
temp = im2col(input_maps(1:up_to_rows,1:up_to_cols,i,m), [kernel_size, kernel_size], 'sliding');
temp = temp(:,to_keep);
max_val = max(temp);
output_maps(1:up_to_rows_out,1:up_to_cols_out,i,m) = reshape(max_val, up_to_rows_out, up_to_cols_out);
end
end
% A bit of a hack for non-even number of rows or columns
if(orig_cols ~= up_to_cols)
span = orig_cols - (up_to_cols - kernel_size + stride);
for m=1:size(input_maps,4)
for i=1:size(input_maps,3)
temp = im2col(input_maps(1:up_to_rows,end-span+1:end,i,m), [kernel_size, span], 'sliding');
max_val = max(temp(:,1:stride:end));
output_maps(1:up_to_rows_out,end,i,m) = max_val;
end
end
end
if(orig_rows ~= up_to_rows)
span = orig_rows - (up_to_rows - kernel_size + stride);
for m=1:size(input_maps,4)
for i=1:size(input_maps,3)
temp = im2col(input_maps(end-span+1:end, 1:up_to_cols,i,m), [span, kernel_size], 'sliding');
max_val = max(temp(:,1:stride:end));
output_maps(end, 1:up_to_cols_out,i,m) = max_val;
end
end
end
if(orig_cols ~= up_to_cols && orig_rows ~= up_to_rows)
for m=1:size(input_maps,4)
for i=1:size(input_maps,3)
tmp = input_maps(up_to_rows- kernel_size + stride + 1:end,up_to_cols - kernel_size + stride+1:end,i,m);
output_maps(end,end,i,m) = max(tmp(:));
end
end
end
end

View File

@@ -0,0 +1,46 @@
function pick = non_maximum_supression(boxes, overlap_threshold,type)
%NMS
if isempty(boxes)
pick = [];
return;
end
% Compute the corners of boxes and the area
x1 = boxes(:,1);
y1 = boxes(:,2);
x2 = boxes(:,3);
y2 = boxes(:,4);
s = boxes(:,5);
area = (x2-x1+1) .* (y2-y1+1);
% Sorting based on confidence scores
[vals, I] = sort(s);
pick = zeros(numel(s),1);
counter = 1;
while ~isempty(I)
last = length(I);
i = I(last);
pick(counter) = i;
counter = counter + 1;
xx1 = max(x1(i), x1(I(1:last-1)));
yy1 = max(y1(i), y1(I(1:last-1)));
xx2 = min(x2(i), x2(I(1:last-1)));
yy2 = min(y2(i), y2(I(1:last-1)));
w = max(0.0, xx2-xx1+1);
h = max(0.0, yy2-yy1+1);
inter = w.*h;
if strcmp(type,'Min')
o = inter ./ min(area(i),area(I(1:last-1)));
else
o = inter ./ (area(i) + area(I(1:last-1)) - inter);
end
I = I(find(o<=overlap_threshold));
end
pick = pick(1:(counter-1));
end

View File

@@ -0,0 +1,15 @@
function [bbox_out] = rectify(bbox_in)
%convert bboxA to square
heights = bbox_in(:,4) - bbox_in(:,2);
widths = bbox_in(:,3) - bbox_in(:,1);
max_side = max([widths'; heights'])';
% Correct the starts based on new size
new_min_x = bbox_in(:,1) + 0.5 * (widths - max_side);
new_min_y = bbox_in(:,2) + 0.5 * (heights - max_side);
bbox_out = [new_min_x, new_min_y, new_min_x + max_side, new_min_y + max_side];
end

View File

@@ -83,7 +83,7 @@ function WriteOutFaceCheckersCNNbinary(locationTxt, faceCheckers)
for k=1:num_in_map
for k2=1:num_out_kerns
% Write out the bias term
% Write out the kernel
W = squeeze(cnn.layers{layers}.weights{1}(:,:,k,k2));
writeMatrixBin(faceCheckerFile, W, 5);
end