mirror of
https://gitcode.com/gh_mirrors/ope/OpenFace.git
synced 2025-12-30 13:02:30 +00:00
Initial Matlab version of MTCNN.
This commit is contained in:
BIN
matlab_version/face_detection/mtcnn/PNet_mlab.mat
Normal file
BIN
matlab_version/face_detection/mtcnn/PNet_mlab.mat
Normal file
Binary file not shown.
20
matlab_version/face_detection/mtcnn/PReLU.m
Normal file
20
matlab_version/face_detection/mtcnn/PReLU.m
Normal file
@@ -0,0 +1,20 @@
|
||||
function [ out_map ] = PReLU( input_maps, PReLU_params )
|
||||
%PRELU Summary of this function goes here
|
||||
% Detailed explanation goes here
|
||||
|
||||
out_map = [];
|
||||
if(numel(size(input_maps)) > 2)
|
||||
for i=1:size(input_maps,3)
|
||||
in_map = input_maps(:,:,i,:);
|
||||
in_map(in_map < 0) = in_map(in_map<0) * PReLU_params(i);
|
||||
out_map = cat(3, out_map, in_map);
|
||||
end
|
||||
else
|
||||
for i=1:size(input_maps,2)
|
||||
in_map = input_maps(:,i);
|
||||
in_map(in_map < 0) = in_map(in_map<0) * PReLU_params(i);
|
||||
out_map = cat(2, out_map, in_map);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
22
matlab_version/face_detection/mtcnn/convolution.m
Normal file
22
matlab_version/face_detection/mtcnn/convolution.m
Normal file
@@ -0,0 +1,22 @@
|
||||
function [ output_maps ] = convolution( input_maps, kernels, biases )
|
||||
%CONVOLUTION Summary of this function goes here
|
||||
% Detailed explanation goes here
|
||||
|
||||
% n_filters = size(kernels, 4);
|
||||
|
||||
% kernels2 = kernels(:,:,end:-1:1,:);
|
||||
% for i=1:n_filters
|
||||
% for n_in_maps=1:size(kernels,3)
|
||||
% kernels2(:,:,n_in_maps,i) = fliplr(squeeze(kernels2(:,:,n_in_maps,i)));
|
||||
% kernels2(:,:,n_in_maps,i) = flipud(squeeze(kernels2(:,:,n_in_maps,i)));
|
||||
% end
|
||||
% end
|
||||
% output_maps_1 = [];
|
||||
% for i=1:n_filters
|
||||
% output_maps_1 = cat(3, output_maps_1, convn(input_maps, kernels2(:,:,:,i), 'valid') + biases(i));
|
||||
% end
|
||||
|
||||
|
||||
output_maps = vl_nnconv(single(input_maps), kernels, biases);
|
||||
end
|
||||
|
||||
178
matlab_version/face_detection/mtcnn/detect_face.m
Normal file
178
matlab_version/face_detection/mtcnn/detect_face.m
Normal file
@@ -0,0 +1,178 @@
|
||||
clear;
|
||||
|
||||
% Make sure we have the dependencies for convolution
|
||||
od = cd('../../face_validation');
|
||||
setup;
|
||||
cd(od);
|
||||
|
||||
img = imread('test1.jpg');
|
||||
height_orig = size(img,1);
|
||||
width_orig = size(img,2);
|
||||
|
||||
% Everything is done in floats
|
||||
img = single(img);
|
||||
|
||||
% Minimum face size
|
||||
min_face_size = 30;
|
||||
|
||||
% Image pyramid scaling factor
|
||||
factor = 0.709;
|
||||
|
||||
% Thresholds for the PNet, ONet, and RNet
|
||||
threshold=[0.6 0.7 0.7];
|
||||
|
||||
min_dim = min([width_orig height_orig]);
|
||||
|
||||
% Face support region is 12x12 px, so from that can work out the largest
|
||||
% scale (which is 12 / min), and work down from there to smallest scale (no smaller than
|
||||
% 12x12px)
|
||||
face_support = 12;
|
||||
num_scales = floor(log(min_face_size / min_dim) / log(factor));
|
||||
scales = (face_support / min_face_size)*factor.^(0:num_scales);
|
||||
|
||||
load('PNet_mlab');
|
||||
load('RNet_mlab');
|
||||
load('ONet_mlab');
|
||||
|
||||
total_bboxes = [];
|
||||
|
||||
% First the PNet stage on image pyramid
|
||||
for s = scales
|
||||
h_pyr = ceil(height_orig * s);
|
||||
w_pyr = ceil(width_orig * s);
|
||||
|
||||
% Resize the image and normalize to what MTCNN expects it to be
|
||||
im_data=(imresize(img, [h_pyr w_pyr],'bilinear')-127.5)*0.0078125;
|
||||
|
||||
[ out_prob, out_correction ] = PNet( im_data, PNet_mlab );
|
||||
|
||||
% Generate bounding boxes from the heatmap
|
||||
bboxes = generate_bounding_boxes(out_prob, out_correction, s, threshold(1), face_support);
|
||||
|
||||
% TODO correct bboxes before running NMS?, as now lots of overlaping
|
||||
% boxes are present
|
||||
|
||||
% Perform non maximum supression to remove reduntant bounding boxes
|
||||
pick = non_maximum_supression(bboxes, 0.5, 'Union');
|
||||
bboxes=bboxes(pick,:);
|
||||
if ~isempty(bboxes)
|
||||
total_bboxes = cat(1, total_bboxes, bboxes);
|
||||
end
|
||||
end
|
||||
|
||||
if ~isempty(total_bboxes)
|
||||
% Non maximum supression accross bounding boxes, and their offset
|
||||
% correction
|
||||
total_bboxes = correct_bbox(total_bboxes(:,1:5), total_bboxes(:,6:end), false, true, true);
|
||||
|
||||
end
|
||||
num_bbox = size(total_bboxes,1);
|
||||
|
||||
% RNet stage
|
||||
if num_bbox > 0
|
||||
|
||||
proposal_imgs = zeros(24, 24, 3, num_bbox);
|
||||
for k=1:num_bbox
|
||||
|
||||
width_target = total_bboxes(k,3) - total_bboxes(k,1) + 1;
|
||||
height_target = total_bboxes(k,4) - total_bboxes(k,2) + 1;
|
||||
|
||||
% Work out the start and end indices in the original image
|
||||
start_x_in = max(total_bboxes(k,1), 1);
|
||||
start_y_in = max(total_bboxes(k,2), 1);
|
||||
end_x_in = min(total_bboxes(k,3), width_orig);
|
||||
end_y_in = min(total_bboxes(k,4), height_orig);
|
||||
|
||||
% Work out the start and end indices in the target image
|
||||
start_x_out = max(-total_bboxes(k,1)+2, 1);
|
||||
start_y_out = max(-total_bboxes(k,2)+2, 1);
|
||||
end_x_out = min(width_target - (total_bboxes(k,3)-width_orig), width_target);
|
||||
end_y_out = min(height_target - (total_bboxes(k,4)-height_orig), height_target);
|
||||
|
||||
tmp = zeros(height_target, width_target, 3);
|
||||
|
||||
tmp(start_y_out:end_y_out,start_x_out:end_x_out,:) = ...
|
||||
img(start_y_in:end_y_in, start_x_in:end_x_in,:);
|
||||
|
||||
proposal_imgs(:,:,:,k) = imresize(tmp, [24 24], 'bilinear');
|
||||
end
|
||||
|
||||
% Normalize the proposal images
|
||||
proposal_imgs = (proposal_imgs - 127.5) * 0.0078125;
|
||||
|
||||
% Apply RNet to proposal faces
|
||||
[ score, out_correction ] = RNet( proposal_imgs, RNet_mlab );
|
||||
out_correction = out_correction';
|
||||
|
||||
% Find faces above the threshold
|
||||
to_keep = find(score > threshold(2));
|
||||
|
||||
total_bboxes = [total_bboxes(to_keep,1:4) score(to_keep)'];
|
||||
out_correction = out_correction(to_keep,:);
|
||||
|
||||
if ~isempty(total_bboxes)
|
||||
% Non maximum supression accross bounding boxes, and their offset
|
||||
% correction
|
||||
total_bboxes = correct_bbox(total_bboxes, out_correction, true, true, true);
|
||||
end
|
||||
end
|
||||
|
||||
num_bbox = size(total_bboxes,1);
|
||||
|
||||
% ONet stage
|
||||
if num_bbox > 0
|
||||
|
||||
proposal_imgs = zeros(48, 48, 3, num_bbox);
|
||||
for k=1:num_bbox
|
||||
|
||||
width_target = total_bboxes(k,3) - total_bboxes(k,1) + 1;
|
||||
height_target = total_bboxes(k,4) - total_bboxes(k,2) + 1;
|
||||
|
||||
% Work out the start and end indices in the original image
|
||||
start_x_in = max(total_bboxes(k,1), 1);
|
||||
start_y_in = max(total_bboxes(k,2), 1);
|
||||
end_x_in = min(total_bboxes(k,3), width_orig);
|
||||
end_y_in = min(total_bboxes(k,4), height_orig);
|
||||
|
||||
% Work out the start and end indices in the target image
|
||||
start_x_out = max(-total_bboxes(k,1)+2, 1);
|
||||
start_y_out = max(-total_bboxes(k,2)+2, 1);
|
||||
end_x_out = min(width_target - (total_bboxes(k,3)-width_orig), width_target);
|
||||
end_y_out = min(height_target - (total_bboxes(k,4)-height_orig), height_target);
|
||||
|
||||
tmp = zeros(height_target, width_target, 3);
|
||||
|
||||
tmp(start_y_out:end_y_out,start_x_out:end_x_out,:) = ...
|
||||
img(start_y_in:end_y_in, start_x_in:end_x_in,:);
|
||||
|
||||
proposal_imgs(:,:,:,k) = imresize(tmp, [48 48], 'bilinear');
|
||||
end
|
||||
|
||||
% Normalize the proposal images
|
||||
proposal_imgs = (proposal_imgs - 127.5) * 0.0078125;
|
||||
|
||||
% Apply ONet to proposal faces
|
||||
[ score, out_correction, lmarks ] = ONet( proposal_imgs, ONet_mlab );
|
||||
out_correction = out_correction';
|
||||
lmarks = lmarks';
|
||||
|
||||
% Pick the final faces above the threshold
|
||||
to_keep = find(score > threshold(3));
|
||||
lmarks = lmarks(to_keep, :);
|
||||
out_correction = out_correction(to_keep, :);
|
||||
total_bboxes = [total_bboxes(to_keep,1:4) score(to_keep)'];
|
||||
|
||||
% Correct for the landmarks
|
||||
bbw = total_bboxes(:,3) - total_bboxes(:,1) + 1;
|
||||
bbh = total_bboxes(:,4) - total_bboxes(:,2) + 1;
|
||||
|
||||
lmarks(:, 1:5) = bbw .* lmarks(:,1:5) + total_bboxes(:,1) - 1;
|
||||
lmarks(:, 6:10) = bbh .* lmarks(:,6:10) + total_bboxes(:,2) - 1;
|
||||
|
||||
% Correct the bounding boxes
|
||||
if size(total_bboxes,1)>0
|
||||
[total_bboxes, to_keep] = correct_bbox(total_bboxes, out_correction, true, false, false);
|
||||
lmarks = lmarks(to_keep, :);
|
||||
end
|
||||
|
||||
end
|
||||
@@ -0,0 +1,25 @@
|
||||
function [bboxes] = generate_bounding_boxes(heatmap, correction, scale, t, face_support)
|
||||
%use heatmap to generate bounding boxes in the original image space
|
||||
|
||||
% Correction for the pooling
|
||||
stride = 2;
|
||||
|
||||
% Offsets for, x, y, width and height
|
||||
dx1=correction(:,:,1);
|
||||
dy1=correction(:,:,2);
|
||||
dx2=correction(:,:,3);
|
||||
dy2=correction(:,:,4);
|
||||
|
||||
% Find the parts of a heatmap above the threshold (x, y, and indices)
|
||||
[x, y]= find(heatmap >= t);
|
||||
inds = find(heatmap >= t);
|
||||
|
||||
% Find the corresponding scores and bbox corrections
|
||||
score=heatmap(inds);
|
||||
correction=[dx1(inds) dy1(inds) dx2(inds) dy2(inds)];
|
||||
|
||||
% Correcting for Matlab's format
|
||||
bboxes=[y - 1 x - 1];
|
||||
bboxes=[fix((stride*(bboxes)+1)/scale) fix((stride*(bboxes)+face_support)/scale) score correction];
|
||||
end
|
||||
|
||||
57
matlab_version/face_detection/mtcnn/max_pooling.m
Normal file
57
matlab_version/face_detection/mtcnn/max_pooling.m
Normal file
@@ -0,0 +1,57 @@
|
||||
function [ output_maps ] = max_pooling( input_maps)
|
||||
%POOLING Summary of this function goes here
|
||||
% Detailed explanation goes here
|
||||
|
||||
orig_rows = size(input_maps,1);
|
||||
orig_cols = size(input_maps,2);
|
||||
|
||||
pooled_rows = ceil(orig_rows / 2);
|
||||
pooled_cols = ceil(orig_cols / 2);
|
||||
|
||||
up_to_rows_out = floor(orig_rows / 2);
|
||||
up_to_cols_out = floor(orig_cols / 2);
|
||||
|
||||
if(mod(orig_cols,2) == 0)
|
||||
up_to_cols = orig_cols;
|
||||
else
|
||||
up_to_cols = orig_cols - 1;
|
||||
end
|
||||
|
||||
if(mod(orig_rows,2) == 0)
|
||||
up_to_rows = orig_rows;
|
||||
else
|
||||
up_to_rows = orig_rows - 1;
|
||||
end
|
||||
|
||||
output_maps = zeros(pooled_rows, pooled_cols, size(input_maps,3));
|
||||
for i=1:size(input_maps,3)
|
||||
temp = im2col(input_maps(1:up_to_rows,1:up_to_cols,i), [2,2], 'distinct');
|
||||
max_val = max(temp);
|
||||
output_maps(1:up_to_rows_out,1:up_to_cols_out,i) = reshape(max_val, up_to_rows_out, up_to_cols_out);
|
||||
end
|
||||
|
||||
% A bit of a hack for non-even number of rows or columns
|
||||
if(mod(orig_cols,2) ~= 0)
|
||||
for i=1:size(input_maps,3)
|
||||
temp = im2col(input_maps(1:up_to_rows,end,i), [2,1], 'distinct');
|
||||
max_val = max(temp);
|
||||
output_maps(1:up_to_rows_out,end,i) = max_val;
|
||||
end
|
||||
end
|
||||
|
||||
if(mod(orig_rows,2) ~= 0)
|
||||
for i=1:size(input_maps,3)
|
||||
temp = im2col(input_maps(end, 1:up_to_cols,i), [1,2], 'distinct');
|
||||
max_val = max(temp);
|
||||
output_maps(end, 1:up_to_cols_out,i) = max_val;
|
||||
end
|
||||
end
|
||||
|
||||
if(mod(orig_cols,2) ~= 0 && mod(orig_rows,2) ~= 0)
|
||||
output_maps(end,end,:) = input_maps(end,end,:);
|
||||
end
|
||||
|
||||
|
||||
|
||||
end
|
||||
|
||||
66
matlab_version/face_detection/mtcnn/max_pooling2.m
Normal file
66
matlab_version/face_detection/mtcnn/max_pooling2.m
Normal file
@@ -0,0 +1,66 @@
|
||||
function [ output_maps ] = max_pooling2( input_maps, kernel_size, stride)
|
||||
%POOLING Summary of this function goes here
|
||||
% Detailed explanation goes here
|
||||
|
||||
orig_rows = size(input_maps,1);
|
||||
orig_cols = size(input_maps,2);
|
||||
|
||||
pooled_rows = round((orig_rows - kernel_size)/stride) + 1;
|
||||
pooled_cols = round((orig_cols - kernel_size)/stride) + 1;
|
||||
|
||||
up_to_rows_out = floor((orig_rows - kernel_size)/stride) + 1;
|
||||
up_to_cols_out = floor((orig_cols - kernel_size)/stride) + 1;
|
||||
|
||||
% How many full max-pooling steps are there
|
||||
up_to_cols = kernel_size + (up_to_cols_out-1) * stride;
|
||||
up_to_rows = kernel_size + (up_to_rows_out-1) * stride;
|
||||
|
||||
output_maps = zeros(pooled_rows, pooled_cols, size(input_maps,3), size(input_maps,4));
|
||||
|
||||
% Pick only the striding elements
|
||||
[y, x] = meshgrid(1:up_to_cols-kernel_size+1, 1:up_to_rows-kernel_size+1);
|
||||
to_keep_map = mod(y, stride) == 1 & mod(x, stride) == 1;
|
||||
to_keep = find(to_keep_map);
|
||||
|
||||
for m=1:size(input_maps,4)
|
||||
for i=1:size(input_maps,3)
|
||||
temp = im2col(input_maps(1:up_to_rows,1:up_to_cols,i,m), [kernel_size, kernel_size], 'sliding');
|
||||
temp = temp(:,to_keep);
|
||||
max_val = max(temp);
|
||||
output_maps(1:up_to_rows_out,1:up_to_cols_out,i,m) = reshape(max_val, up_to_rows_out, up_to_cols_out);
|
||||
end
|
||||
end
|
||||
% A bit of a hack for non-even number of rows or columns
|
||||
if(orig_cols ~= up_to_cols)
|
||||
span = orig_cols - (up_to_cols - kernel_size + stride);
|
||||
for m=1:size(input_maps,4)
|
||||
for i=1:size(input_maps,3)
|
||||
temp = im2col(input_maps(1:up_to_rows,end-span+1:end,i,m), [kernel_size, span], 'sliding');
|
||||
max_val = max(temp(:,1:stride:end));
|
||||
output_maps(1:up_to_rows_out,end,i,m) = max_val;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if(orig_rows ~= up_to_rows)
|
||||
span = orig_rows - (up_to_rows - kernel_size + stride);
|
||||
for m=1:size(input_maps,4)
|
||||
for i=1:size(input_maps,3)
|
||||
temp = im2col(input_maps(end-span+1:end, 1:up_to_cols,i,m), [span, kernel_size], 'sliding');
|
||||
max_val = max(temp(:,1:stride:end));
|
||||
output_maps(end, 1:up_to_cols_out,i,m) = max_val;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if(orig_cols ~= up_to_cols && orig_rows ~= up_to_rows)
|
||||
for m=1:size(input_maps,4)
|
||||
for i=1:size(input_maps,3)
|
||||
tmp = input_maps(up_to_rows- kernel_size + stride + 1:end,up_to_cols - kernel_size + stride+1:end,i,m);
|
||||
output_maps(end,end,i,m) = max(tmp(:));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
46
matlab_version/face_detection/mtcnn/non_maximum_supression.m
Normal file
46
matlab_version/face_detection/mtcnn/non_maximum_supression.m
Normal file
@@ -0,0 +1,46 @@
|
||||
function pick = non_maximum_supression(boxes, overlap_threshold,type)
|
||||
|
||||
%NMS
|
||||
if isempty(boxes)
|
||||
pick = [];
|
||||
return;
|
||||
end
|
||||
|
||||
% Compute the corners of boxes and the area
|
||||
x1 = boxes(:,1);
|
||||
y1 = boxes(:,2);
|
||||
x2 = boxes(:,3);
|
||||
y2 = boxes(:,4);
|
||||
s = boxes(:,5);
|
||||
area = (x2-x1+1) .* (y2-y1+1);
|
||||
|
||||
% Sorting based on confidence scores
|
||||
[vals, I] = sort(s);
|
||||
|
||||
pick = zeros(numel(s),1);
|
||||
|
||||
counter = 1;
|
||||
while ~isempty(I)
|
||||
last = length(I);
|
||||
i = I(last);
|
||||
pick(counter) = i;
|
||||
counter = counter + 1;
|
||||
|
||||
xx1 = max(x1(i), x1(I(1:last-1)));
|
||||
yy1 = max(y1(i), y1(I(1:last-1)));
|
||||
xx2 = min(x2(i), x2(I(1:last-1)));
|
||||
yy2 = min(y2(i), y2(I(1:last-1)));
|
||||
w = max(0.0, xx2-xx1+1);
|
||||
h = max(0.0, yy2-yy1+1);
|
||||
inter = w.*h;
|
||||
|
||||
if strcmp(type,'Min')
|
||||
o = inter ./ min(area(i),area(I(1:last-1)));
|
||||
else
|
||||
o = inter ./ (area(i) + area(I(1:last-1)) - inter);
|
||||
end
|
||||
I = I(find(o<=overlap_threshold));
|
||||
end
|
||||
|
||||
pick = pick(1:(counter-1));
|
||||
end
|
||||
15
matlab_version/face_detection/mtcnn/rectify.m
Normal file
15
matlab_version/face_detection/mtcnn/rectify.m
Normal file
@@ -0,0 +1,15 @@
|
||||
function [bbox_out] = rectify(bbox_in)
|
||||
|
||||
%convert bboxA to square
|
||||
heights = bbox_in(:,4) - bbox_in(:,2);
|
||||
widths = bbox_in(:,3) - bbox_in(:,1);
|
||||
|
||||
max_side = max([widths'; heights'])';
|
||||
|
||||
% Correct the starts based on new size
|
||||
new_min_x = bbox_in(:,1) + 0.5 * (widths - max_side);
|
||||
new_min_y = bbox_in(:,2) + 0.5 * (heights - max_side);
|
||||
|
||||
bbox_out = [new_min_x, new_min_y, new_min_x + max_side, new_min_y + max_side];
|
||||
end
|
||||
|
||||
@@ -83,7 +83,7 @@ function WriteOutFaceCheckersCNNbinary(locationTxt, faceCheckers)
|
||||
|
||||
for k=1:num_in_map
|
||||
for k2=1:num_out_kerns
|
||||
% Write out the bias term
|
||||
% Write out the kernel
|
||||
W = squeeze(cnn.layers{layers}.weights{1}(:,:,k,k2));
|
||||
writeMatrixBin(faceCheckerFile, W, 5);
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user