diff --git a/matlab_version/face_detection/mtcnn/PNet_mlab.mat b/matlab_version/face_detection/mtcnn/PNet_mlab.mat new file mode 100644 index 00000000..40726a48 Binary files /dev/null and b/matlab_version/face_detection/mtcnn/PNet_mlab.mat differ diff --git a/matlab_version/face_detection/mtcnn/PReLU.m b/matlab_version/face_detection/mtcnn/PReLU.m new file mode 100644 index 00000000..37150154 --- /dev/null +++ b/matlab_version/face_detection/mtcnn/PReLU.m @@ -0,0 +1,20 @@ +function [ out_map ] = PReLU( input_maps, PReLU_params ) +%PRELU Summary of this function goes here +% Detailed explanation goes here + + out_map = []; + if(numel(size(input_maps)) > 2) + for i=1:size(input_maps,3) + in_map = input_maps(:,:,i,:); + in_map(in_map < 0) = in_map(in_map<0) * PReLU_params(i); + out_map = cat(3, out_map, in_map); + end + else + for i=1:size(input_maps,2) + in_map = input_maps(:,i); + in_map(in_map < 0) = in_map(in_map<0) * PReLU_params(i); + out_map = cat(2, out_map, in_map); + end + end +end + diff --git a/matlab_version/face_detection/mtcnn/convolution.m b/matlab_version/face_detection/mtcnn/convolution.m new file mode 100644 index 00000000..dcbe63e2 --- /dev/null +++ b/matlab_version/face_detection/mtcnn/convolution.m @@ -0,0 +1,22 @@ +function [ output_maps ] = convolution( input_maps, kernels, biases ) +%CONVOLUTION Summary of this function goes here +% Detailed explanation goes here + +% n_filters = size(kernels, 4); + +% kernels2 = kernels(:,:,end:-1:1,:); +% for i=1:n_filters +% for n_in_maps=1:size(kernels,3) +% kernels2(:,:,n_in_maps,i) = fliplr(squeeze(kernels2(:,:,n_in_maps,i))); +% kernels2(:,:,n_in_maps,i) = flipud(squeeze(kernels2(:,:,n_in_maps,i))); +% end +% end +% output_maps_1 = []; +% for i=1:n_filters +% output_maps_1 = cat(3, output_maps_1, convn(input_maps, kernels2(:,:,:,i), 'valid') + biases(i)); +% end + + + output_maps = vl_nnconv(single(input_maps), kernels, biases); +end + diff --git a/matlab_version/face_detection/mtcnn/detect_face.m b/matlab_version/face_detection/mtcnn/detect_face.m new file mode 100644 index 00000000..a1ff6b77 --- /dev/null +++ b/matlab_version/face_detection/mtcnn/detect_face.m @@ -0,0 +1,178 @@ +clear; + +% Make sure we have the dependencies for convolution +od = cd('../../face_validation'); +setup; +cd(od); + +img = imread('test1.jpg'); +height_orig = size(img,1); +width_orig = size(img,2); + +% Everything is done in floats +img = single(img); + +% Minimum face size +min_face_size = 30; + +% Image pyramid scaling factor +factor = 0.709; + +% Thresholds for the PNet, ONet, and RNet +threshold=[0.6 0.7 0.7]; + +min_dim = min([width_orig height_orig]); + +% Face support region is 12x12 px, so from that can work out the largest +% scale (which is 12 / min), and work down from there to smallest scale (no smaller than +% 12x12px) +face_support = 12; +num_scales = floor(log(min_face_size / min_dim) / log(factor)); +scales = (face_support / min_face_size)*factor.^(0:num_scales); + +load('PNet_mlab'); +load('RNet_mlab'); +load('ONet_mlab'); + +total_bboxes = []; + +% First the PNet stage on image pyramid +for s = scales + h_pyr = ceil(height_orig * s); + w_pyr = ceil(width_orig * s); + + % Resize the image and normalize to what MTCNN expects it to be + im_data=(imresize(img, [h_pyr w_pyr],'bilinear')-127.5)*0.0078125; + + [ out_prob, out_correction ] = PNet( im_data, PNet_mlab ); + + % Generate bounding boxes from the heatmap + bboxes = generate_bounding_boxes(out_prob, out_correction, s, threshold(1), face_support); + + % TODO correct bboxes before running NMS?, as now lots of overlaping + % boxes are present + + % Perform non maximum supression to remove reduntant bounding boxes + pick = non_maximum_supression(bboxes, 0.5, 'Union'); + bboxes=bboxes(pick,:); + if ~isempty(bboxes) + total_bboxes = cat(1, total_bboxes, bboxes); + end +end + +if ~isempty(total_bboxes) + % Non maximum supression accross bounding boxes, and their offset + % correction + total_bboxes = correct_bbox(total_bboxes(:,1:5), total_bboxes(:,6:end), false, true, true); + +end +num_bbox = size(total_bboxes,1); + +% RNet stage +if num_bbox > 0 + + proposal_imgs = zeros(24, 24, 3, num_bbox); + for k=1:num_bbox + + width_target = total_bboxes(k,3) - total_bboxes(k,1) + 1; + height_target = total_bboxes(k,4) - total_bboxes(k,2) + 1; + + % Work out the start and end indices in the original image + start_x_in = max(total_bboxes(k,1), 1); + start_y_in = max(total_bboxes(k,2), 1); + end_x_in = min(total_bboxes(k,3), width_orig); + end_y_in = min(total_bboxes(k,4), height_orig); + + % Work out the start and end indices in the target image + start_x_out = max(-total_bboxes(k,1)+2, 1); + start_y_out = max(-total_bboxes(k,2)+2, 1); + end_x_out = min(width_target - (total_bboxes(k,3)-width_orig), width_target); + end_y_out = min(height_target - (total_bboxes(k,4)-height_orig), height_target); + + tmp = zeros(height_target, width_target, 3); + + tmp(start_y_out:end_y_out,start_x_out:end_x_out,:) = ... + img(start_y_in:end_y_in, start_x_in:end_x_in,:); + + proposal_imgs(:,:,:,k) = imresize(tmp, [24 24], 'bilinear'); + end + + % Normalize the proposal images + proposal_imgs = (proposal_imgs - 127.5) * 0.0078125; + + % Apply RNet to proposal faces + [ score, out_correction ] = RNet( proposal_imgs, RNet_mlab ); + out_correction = out_correction'; + + % Find faces above the threshold + to_keep = find(score > threshold(2)); + + total_bboxes = [total_bboxes(to_keep,1:4) score(to_keep)']; + out_correction = out_correction(to_keep,:); + + if ~isempty(total_bboxes) + % Non maximum supression accross bounding boxes, and their offset + % correction + total_bboxes = correct_bbox(total_bboxes, out_correction, true, true, true); + end +end + +num_bbox = size(total_bboxes,1); + +% ONet stage +if num_bbox > 0 + + proposal_imgs = zeros(48, 48, 3, num_bbox); + for k=1:num_bbox + + width_target = total_bboxes(k,3) - total_bboxes(k,1) + 1; + height_target = total_bboxes(k,4) - total_bboxes(k,2) + 1; + + % Work out the start and end indices in the original image + start_x_in = max(total_bboxes(k,1), 1); + start_y_in = max(total_bboxes(k,2), 1); + end_x_in = min(total_bboxes(k,3), width_orig); + end_y_in = min(total_bboxes(k,4), height_orig); + + % Work out the start and end indices in the target image + start_x_out = max(-total_bboxes(k,1)+2, 1); + start_y_out = max(-total_bboxes(k,2)+2, 1); + end_x_out = min(width_target - (total_bboxes(k,3)-width_orig), width_target); + end_y_out = min(height_target - (total_bboxes(k,4)-height_orig), height_target); + + tmp = zeros(height_target, width_target, 3); + + tmp(start_y_out:end_y_out,start_x_out:end_x_out,:) = ... + img(start_y_in:end_y_in, start_x_in:end_x_in,:); + + proposal_imgs(:,:,:,k) = imresize(tmp, [48 48], 'bilinear'); + end + + % Normalize the proposal images + proposal_imgs = (proposal_imgs - 127.5) * 0.0078125; + + % Apply ONet to proposal faces + [ score, out_correction, lmarks ] = ONet( proposal_imgs, ONet_mlab ); + out_correction = out_correction'; + lmarks = lmarks'; + + % Pick the final faces above the threshold + to_keep = find(score > threshold(3)); + lmarks = lmarks(to_keep, :); + out_correction = out_correction(to_keep, :); + total_bboxes = [total_bboxes(to_keep,1:4) score(to_keep)']; + + % Correct for the landmarks + bbw = total_bboxes(:,3) - total_bboxes(:,1) + 1; + bbh = total_bboxes(:,4) - total_bboxes(:,2) + 1; + + lmarks(:, 1:5) = bbw .* lmarks(:,1:5) + total_bboxes(:,1) - 1; + lmarks(:, 6:10) = bbh .* lmarks(:,6:10) + total_bboxes(:,2) - 1; + + % Correct the bounding boxes + if size(total_bboxes,1)>0 + [total_bboxes, to_keep] = correct_bbox(total_bboxes, out_correction, true, false, false); + lmarks = lmarks(to_keep, :); + end + +end \ No newline at end of file diff --git a/matlab_version/face_detection/mtcnn/generate_bounding_boxes.m b/matlab_version/face_detection/mtcnn/generate_bounding_boxes.m new file mode 100644 index 00000000..cf6e6077 --- /dev/null +++ b/matlab_version/face_detection/mtcnn/generate_bounding_boxes.m @@ -0,0 +1,25 @@ +function [bboxes] = generate_bounding_boxes(heatmap, correction, scale, t, face_support) + %use heatmap to generate bounding boxes in the original image space + + % Correction for the pooling + stride = 2; + + % Offsets for, x, y, width and height + dx1=correction(:,:,1); + dy1=correction(:,:,2); + dx2=correction(:,:,3); + dy2=correction(:,:,4); + + % Find the parts of a heatmap above the threshold (x, y, and indices) + [x, y]= find(heatmap >= t); + inds = find(heatmap >= t); + + % Find the corresponding scores and bbox corrections + score=heatmap(inds); + correction=[dx1(inds) dy1(inds) dx2(inds) dy2(inds)]; + + % Correcting for Matlab's format + bboxes=[y - 1 x - 1]; + bboxes=[fix((stride*(bboxes)+1)/scale) fix((stride*(bboxes)+face_support)/scale) score correction]; +end + diff --git a/matlab_version/face_detection/mtcnn/max_pooling.m b/matlab_version/face_detection/mtcnn/max_pooling.m new file mode 100644 index 00000000..b49ed0c3 --- /dev/null +++ b/matlab_version/face_detection/mtcnn/max_pooling.m @@ -0,0 +1,57 @@ +function [ output_maps ] = max_pooling( input_maps) +%POOLING Summary of this function goes here +% Detailed explanation goes here + + orig_rows = size(input_maps,1); + orig_cols = size(input_maps,2); + + pooled_rows = ceil(orig_rows / 2); + pooled_cols = ceil(orig_cols / 2); + + up_to_rows_out = floor(orig_rows / 2); + up_to_cols_out = floor(orig_cols / 2); + + if(mod(orig_cols,2) == 0) + up_to_cols = orig_cols; + else + up_to_cols = orig_cols - 1; + end + + if(mod(orig_rows,2) == 0) + up_to_rows = orig_rows; + else + up_to_rows = orig_rows - 1; + end + + output_maps = zeros(pooled_rows, pooled_cols, size(input_maps,3)); + for i=1:size(input_maps,3) + temp = im2col(input_maps(1:up_to_rows,1:up_to_cols,i), [2,2], 'distinct'); + max_val = max(temp); + output_maps(1:up_to_rows_out,1:up_to_cols_out,i) = reshape(max_val, up_to_rows_out, up_to_cols_out); + end + + % A bit of a hack for non-even number of rows or columns + if(mod(orig_cols,2) ~= 0) + for i=1:size(input_maps,3) + temp = im2col(input_maps(1:up_to_rows,end,i), [2,1], 'distinct'); + max_val = max(temp); + output_maps(1:up_to_rows_out,end,i) = max_val; + end + end + + if(mod(orig_rows,2) ~= 0) + for i=1:size(input_maps,3) + temp = im2col(input_maps(end, 1:up_to_cols,i), [1,2], 'distinct'); + max_val = max(temp); + output_maps(end, 1:up_to_cols_out,i) = max_val; + end + end + + if(mod(orig_cols,2) ~= 0 && mod(orig_rows,2) ~= 0) + output_maps(end,end,:) = input_maps(end,end,:); + end + + + +end + diff --git a/matlab_version/face_detection/mtcnn/max_pooling2.m b/matlab_version/face_detection/mtcnn/max_pooling2.m new file mode 100644 index 00000000..bcaf8732 --- /dev/null +++ b/matlab_version/face_detection/mtcnn/max_pooling2.m @@ -0,0 +1,66 @@ +function [ output_maps ] = max_pooling2( input_maps, kernel_size, stride) +%POOLING Summary of this function goes here +% Detailed explanation goes here + + orig_rows = size(input_maps,1); + orig_cols = size(input_maps,2); + + pooled_rows = round((orig_rows - kernel_size)/stride) + 1; + pooled_cols = round((orig_cols - kernel_size)/stride) + 1; + + up_to_rows_out = floor((orig_rows - kernel_size)/stride) + 1; + up_to_cols_out = floor((orig_cols - kernel_size)/stride) + 1; + + % How many full max-pooling steps are there + up_to_cols = kernel_size + (up_to_cols_out-1) * stride; + up_to_rows = kernel_size + (up_to_rows_out-1) * stride; + + output_maps = zeros(pooled_rows, pooled_cols, size(input_maps,3), size(input_maps,4)); + + % Pick only the striding elements + [y, x] = meshgrid(1:up_to_cols-kernel_size+1, 1:up_to_rows-kernel_size+1); + to_keep_map = mod(y, stride) == 1 & mod(x, stride) == 1; + to_keep = find(to_keep_map); + + for m=1:size(input_maps,4) + for i=1:size(input_maps,3) + temp = im2col(input_maps(1:up_to_rows,1:up_to_cols,i,m), [kernel_size, kernel_size], 'sliding'); + temp = temp(:,to_keep); + max_val = max(temp); + output_maps(1:up_to_rows_out,1:up_to_cols_out,i,m) = reshape(max_val, up_to_rows_out, up_to_cols_out); + end + end + % A bit of a hack for non-even number of rows or columns + if(orig_cols ~= up_to_cols) + span = orig_cols - (up_to_cols - kernel_size + stride); + for m=1:size(input_maps,4) + for i=1:size(input_maps,3) + temp = im2col(input_maps(1:up_to_rows,end-span+1:end,i,m), [kernel_size, span], 'sliding'); + max_val = max(temp(:,1:stride:end)); + output_maps(1:up_to_rows_out,end,i,m) = max_val; + end + end + end + + if(orig_rows ~= up_to_rows) + span = orig_rows - (up_to_rows - kernel_size + stride); + for m=1:size(input_maps,4) + for i=1:size(input_maps,3) + temp = im2col(input_maps(end-span+1:end, 1:up_to_cols,i,m), [span, kernel_size], 'sliding'); + max_val = max(temp(:,1:stride:end)); + output_maps(end, 1:up_to_cols_out,i,m) = max_val; + end + end + end + + if(orig_cols ~= up_to_cols && orig_rows ~= up_to_rows) + for m=1:size(input_maps,4) + for i=1:size(input_maps,3) + tmp = input_maps(up_to_rows- kernel_size + stride + 1:end,up_to_cols - kernel_size + stride+1:end,i,m); + output_maps(end,end,i,m) = max(tmp(:)); + end + end + end + +end + diff --git a/matlab_version/face_detection/mtcnn/non_maximum_supression.m b/matlab_version/face_detection/mtcnn/non_maximum_supression.m new file mode 100644 index 00000000..6c23c871 --- /dev/null +++ b/matlab_version/face_detection/mtcnn/non_maximum_supression.m @@ -0,0 +1,46 @@ +function pick = non_maximum_supression(boxes, overlap_threshold,type) + + %NMS + if isempty(boxes) + pick = []; + return; + end + + % Compute the corners of boxes and the area + x1 = boxes(:,1); + y1 = boxes(:,2); + x2 = boxes(:,3); + y2 = boxes(:,4); + s = boxes(:,5); + area = (x2-x1+1) .* (y2-y1+1); + + % Sorting based on confidence scores + [vals, I] = sort(s); + + pick = zeros(numel(s),1); + + counter = 1; + while ~isempty(I) + last = length(I); + i = I(last); + pick(counter) = i; + counter = counter + 1; + + xx1 = max(x1(i), x1(I(1:last-1))); + yy1 = max(y1(i), y1(I(1:last-1))); + xx2 = min(x2(i), x2(I(1:last-1))); + yy2 = min(y2(i), y2(I(1:last-1))); + w = max(0.0, xx2-xx1+1); + h = max(0.0, yy2-yy1+1); + inter = w.*h; + + if strcmp(type,'Min') + o = inter ./ min(area(i),area(I(1:last-1))); + else + o = inter ./ (area(i) + area(I(1:last-1)) - inter); + end + I = I(find(o<=overlap_threshold)); + end + + pick = pick(1:(counter-1)); +end \ No newline at end of file diff --git a/matlab_version/face_detection/mtcnn/rectify.m b/matlab_version/face_detection/mtcnn/rectify.m new file mode 100644 index 00000000..29123251 --- /dev/null +++ b/matlab_version/face_detection/mtcnn/rectify.m @@ -0,0 +1,15 @@ +function [bbox_out] = rectify(bbox_in) + + %convert bboxA to square + heights = bbox_in(:,4) - bbox_in(:,2); + widths = bbox_in(:,3) - bbox_in(:,1); + + max_side = max([widths'; heights'])'; + + % Correct the starts based on new size + new_min_x = bbox_in(:,1) + 0.5 * (widths - max_side); + new_min_y = bbox_in(:,2) + 0.5 * (heights - max_side); + + bbox_out = [new_min_x, new_min_y, new_min_x + max_side, new_min_y + max_side]; +end + diff --git a/matlab_version/face_validation/WriteOutFaceCheckersCNNbinary.m b/matlab_version/face_validation/WriteOutFaceCheckersCNNbinary.m index 24b28ece..cba61d57 100644 --- a/matlab_version/face_validation/WriteOutFaceCheckersCNNbinary.m +++ b/matlab_version/face_validation/WriteOutFaceCheckersCNNbinary.m @@ -83,7 +83,7 @@ function WriteOutFaceCheckersCNNbinary(locationTxt, faceCheckers) for k=1:num_in_map for k2=1:num_out_kerns - % Write out the bias term + % Write out the kernel W = squeeze(cnn.layers{layers}.weights{1}(:,:,k,k2)); writeMatrixBin(faceCheckerFile, W, 5); end