diff --git a/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp b/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp index 94969ed3..fa24e1da 100644 --- a/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp +++ b/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp @@ -152,6 +152,7 @@ std::vector> CNN::Inference(const cv::Mat& input_img) cv::Mat channels[3]; cv::split(input_img, channels); + // Flip the BGR order to RGB vector > input_maps; input_maps.push_back(channels[2]); input_maps.push_back(channels[1]); @@ -341,14 +342,6 @@ std::vector> CNN::Inference(const cv::Mat& input_img) // Set the outputs of this layer to inputs of the next input_maps = outputs; - // TODO rem - //cv::Mat to_vis = input_maps[0]; - //cout << to_vis << endl; - //double min, max; - //cv::minMaxIdx(to_vis, &min, &max); - //cv::imshow("image 1", (to_vis - min)/(max-min)); - //cv::waitKey(0); - } @@ -531,32 +524,114 @@ void FaceDetectorMTCNN::Read(string location) } } -cv::Mat_ generate_bounding_boxes(cv::Mat_ heatmap, vector > corrections, double scale, double threshold, int face_support) +// Perform non maximum supression on proposal bounding boxes prioritizing boxes with high score/confidence +std::vector non_maximum_supression(const std::vector >& original_bb, const std::vector& scores, float thresh) +{ + + // Sort the input bounding boxes by the detection score, using the nice trick of multimap always being sorted internally + std::multimap idxs; + for (size_t i = 0; i < original_bb.size(); ++i) + { + idxs.insert(std::pair(scores[i], i)); + } + + std::vector output_ids; + + // keep looping while some indexes still remain in the indexes list + while (idxs.size() > 0) + { + // grab the last rectangle + auto lastElem = --std::end(idxs); + size_t curr_id = lastElem->second; + + const cv::Rect& rect1 = original_bb[curr_id]; + + idxs.erase(lastElem); + + // Iterate through remaining bounding boxes and choose which ones to remove + for (auto pos = std::begin(idxs); pos != std::end(idxs); ) + { + // grab the current rectangle + const cv::Rect& rect2 = original_bb[pos->second]; + + float intArea = (rect1 & rect2).area(); + float unionArea = rect1.area() + rect2.area() - intArea; + float overlap = intArea / unionArea; + + // Remove the bounding boxes with less confidence but with significant overlap with the current one + if (overlap > thresh) + { + pos = idxs.erase(pos); + } + else + { + ++pos; + } + } + output_ids.push_back(curr_id); + + } + + return output_ids; + +} + +// Helper function for selecting a subset of bounding boxes based on indices +void select_subset(const vector& to_keep, vector >& bounding_boxes, vector& scores, vector >& corrections) +{ + vector > bounding_boxes_tmp; + vector scores_tmp; + vector > corrections_tmp; + + for (size_t i = 0; i < to_keep.size(); ++i) + { + bounding_boxes_tmp.push_back(bounding_boxes[to_keep[i]]); + scores_tmp.push_back(scores[to_keep[i]]); + corrections_tmp.push_back(corrections[to_keep[i]]); + } + + bounding_boxes = bounding_boxes_tmp; + scores = scores_tmp; + corrections = corrections_tmp; +} + +// Use the heatmap generated by PNet to generate bounding boxes in the original image space, also generate the correction values and scores of the bounding boxes as well +void generate_bounding_boxes(vector >& o_bounding_boxes, vector& o_scores, vector >& o_corrections, const cv::Mat_& heatmap, const vector >& corrections, double scale, double threshold, int face_support) { - // use heatmap to generate bounding boxes in the original image space // Correction for the pooling int stride = 2; - // Offsets for, x, y, width and height - //cv::Mat_ dx1 = corrections.col(1); - //cv::Mat_ dy1 = corrections.col(2); - //cv::Mat_ dx2 = corrections.col(3); - //cv::Mat_ dy2 = corrections.col(4); + o_bounding_boxes.clear(); + o_scores.clear(); + o_corrections.clear(); - // Find the parts of a heatmap above the threshold(x, y, and indices) - cv::Mat_ mask = heatmap >= threshold; + int counter = 0; + for (int x = 0; x < heatmap.cols; ++x) + { + for(int y = 0; y < heatmap.rows; ++y) + { + if (heatmap.at(y, x) >= threshold) + { + float min_x = int((stride * x + 1) / scale); + float max_x = int((stride * x + face_support) / scale); + float min_y = int((stride * y + 1) / scale); + float max_y = int((stride * y + face_support) / scale); - // Find the corresponding scores and bbox corrections - //score = heatmap(inds); - //correction = [dx1(inds) dy1(inds) dx2(inds) dy2(inds)]; + o_bounding_boxes.push_back(cv::Rect_(min_x, min_y, max_x - min_x, max_y - min_y)); + o_scores.push_back(heatmap.at(y, x)); - // Correcting for Matlab's format - //bboxes = [y - 1 x - 1]; - //bboxes = [fix((stride*(bboxes)+1) / scale) fix((stride*(bboxes)+face_support) / scale) score correction]; - - return cv::Mat_(); + float corr_x = corrections[0].at(y, x); + float corr_y = corrections[1].at(y, x); + float corr_width = corrections[2].at(y, x); + float corr_height = corrections[3].at(y, x); + o_corrections.push_back(cv::Rect_(corr_x, corr_y, corr_width, corr_height)); + counter++; + } + } + } + } @@ -585,6 +660,10 @@ bool FaceDetectorMTCNN::DetectFaces(vector >& o_regions, const cv::Mat img_float; input_img.convertTo(img_float, CV_32FC3); + vector > proposal_boxes_all; + vector scores_all; + vector > proposal_corrections_all; + for (int i = 0; i < num_scales; ++i) { double scale = ((double)face_support / (double)min_face_size)*cv::pow(pyramid_factor, i); @@ -594,19 +673,51 @@ bool FaceDetectorMTCNN::DetectFaces(vector >& o_regions, const cv::Mat normalised_img; cv::resize(img_float, normalised_img, cv::Size(w_pyr, h_pyr)); - + + // Normalize the image normalised_img = (normalised_img - 127.5) * 0.0078125; + // Actual PNet CNN step std::vector > pnet_out = PNet.Inference(normalised_img); - cv::Mat_ out_prob; - cv::exp(pnet_out[0]- pnet_out[1], out_prob); - out_prob = 1.0 / (1.0 + out_prob); + // Extract the probabilities from PNet response + cv::Mat_ prob_heatmap; + cv::exp(pnet_out[0]- pnet_out[1], prob_heatmap); + prob_heatmap = 1.0 / (1.0 + prob_heatmap); + + // Extract the probabilities from PNet response + std::vector> corrections_heatmap(pnet_out.begin() + 2, pnet_out.end()); // Grab the detections + vector > proposal_boxes; + vector scores; + vector > proposal_corrections; + generate_bounding_boxes(proposal_boxes, scores, proposal_corrections, prob_heatmap, corrections_heatmap, scale, t1, face_support); + // Perform non-maximum supression on proposals in this scale + vector to_keep = non_maximum_supression(proposal_boxes, scores, 0.5); + select_subset(to_keep, proposal_boxes, scores, proposal_corrections); + + proposal_boxes_all.insert(proposal_boxes_all.end(), proposal_boxes.begin(), proposal_boxes.end()); + scores_all.insert(scores_all.end(), scores.begin(), scores.end()); + proposal_corrections_all.insert(proposal_corrections_all.end(), proposal_corrections.begin(), proposal_corrections.end()); } + + // Preparation for RNet step + + // Non maximum supression accross bounding boxes, and their offset correction + vector to_keep = non_maximum_supression(proposal_boxes_all, scores_all, 0.7); + select_subset(to_keep, proposal_boxes_all, scores_all, proposal_corrections_all); + + //total_bboxes = apply_correction(total_bboxes, corrections, false); + + //% Making them into rectangles + // total_bboxes(:, 1 : 4) = rectify(total_bboxes(:, 1 : 4)); + + //% Rounding to pixels + // + return true; } diff --git a/matlab_version/face_detection/mtcnn/detect_face_mtcnn.m b/matlab_version/face_detection/mtcnn/detect_face_mtcnn.m index aa69923c..714f8d2b 100644 --- a/matlab_version/face_detection/mtcnn/detect_face_mtcnn.m +++ b/matlab_version/face_detection/mtcnn/detect_face_mtcnn.m @@ -43,7 +43,7 @@ for s = scales w_pyr = ceil(width_orig * s); % Resize the image and normalize to what MTCNN expects it to be - im_data=(imresize(img, [h_pyr w_pyr],'bilinear')-127.5)*0.0078125; + im_data=(imresize(img, [h_pyr w_pyr],'bilinear','AntiAliasing',false)-127.5)*0.0078125; [ out_prob, out_correction ] = PNet( im_data, PNet_mlab ); @@ -107,7 +107,7 @@ if num_bbox > 0 tmp(start_y_out:end_y_out,start_x_out:end_x_out,:) = ... img(start_y_in:end_y_in, start_x_in:end_x_in,:); - proposal_imgs(:,:,:,k) = imresize(tmp, [24 24], 'bilinear'); + proposal_imgs(:,:,:,k) = imresize(tmp, [24 24], 'bilinear','AntiAliasing',false); end % Normalize the proposal images @@ -168,7 +168,7 @@ if num_bbox > 0 tmp(start_y_out:end_y_out,start_x_out:end_x_out,:) = ... img(start_y_in:end_y_in, start_x_in:end_x_in,:); - proposal_imgs(:,:,:,k) = imresize(tmp, [48 48], 'bilinear'); + proposal_imgs(:,:,:,k) = imresize(tmp, [48 48], 'bilinear','AntiAliasing',false); end % Normalize the proposal images