Helper functions for MTCNN implementation.

This commit is contained in:
Tadas Baltrusaitis
2017-08-14 16:23:44 +01:00
parent 6dbed701bc
commit 0f94d53d9d
2 changed files with 143 additions and 32 deletions

View File

@@ -152,6 +152,7 @@ std::vector<cv::Mat_<float>> CNN::Inference(const cv::Mat& input_img)
cv::Mat channels[3];
cv::split(input_img, channels);
// Flip the BGR order to RGB
vector<cv::Mat_<float> > input_maps;
input_maps.push_back(channels[2]);
input_maps.push_back(channels[1]);
@@ -341,14 +342,6 @@ std::vector<cv::Mat_<float>> CNN::Inference(const cv::Mat& input_img)
// Set the outputs of this layer to inputs of the next
input_maps = outputs;
// TODO rem
//cv::Mat to_vis = input_maps[0];
//cout << to_vis << endl;
//double min, max;
//cv::minMaxIdx(to_vis, &min, &max);
//cv::imshow("image 1", (to_vis - min)/(max-min));
//cv::waitKey(0);
}
@@ -531,32 +524,114 @@ void FaceDetectorMTCNN::Read(string location)
}
}
cv::Mat_<float> generate_bounding_boxes(cv::Mat_<float> heatmap, vector<cv::Mat_<float> > corrections, double scale, double threshold, int face_support)
// Perform non maximum supression on proposal bounding boxes prioritizing boxes with high score/confidence
std::vector<int> non_maximum_supression(const std::vector<cv::Rect_<float> >& original_bb, const std::vector<float>& scores, float thresh)
{
// Sort the input bounding boxes by the detection score, using the nice trick of multimap always being sorted internally
std::multimap<float, size_t> idxs;
for (size_t i = 0; i < original_bb.size(); ++i)
{
idxs.insert(std::pair<float, size_t>(scores[i], i));
}
std::vector<int> output_ids;
// keep looping while some indexes still remain in the indexes list
while (idxs.size() > 0)
{
// grab the last rectangle
auto lastElem = --std::end(idxs);
size_t curr_id = lastElem->second;
const cv::Rect& rect1 = original_bb[curr_id];
idxs.erase(lastElem);
// Iterate through remaining bounding boxes and choose which ones to remove
for (auto pos = std::begin(idxs); pos != std::end(idxs); )
{
// grab the current rectangle
const cv::Rect& rect2 = original_bb[pos->second];
float intArea = (rect1 & rect2).area();
float unionArea = rect1.area() + rect2.area() - intArea;
float overlap = intArea / unionArea;
// Remove the bounding boxes with less confidence but with significant overlap with the current one
if (overlap > thresh)
{
pos = idxs.erase(pos);
}
else
{
++pos;
}
}
output_ids.push_back(curr_id);
}
return output_ids;
}
// Helper function for selecting a subset of bounding boxes based on indices
void select_subset(const vector<int>& to_keep, vector<cv::Rect_<float> >& bounding_boxes, vector<float>& scores, vector<cv::Rect_<float> >& corrections)
{
vector<cv::Rect_<float> > bounding_boxes_tmp;
vector<float> scores_tmp;
vector<cv::Rect_<float> > corrections_tmp;
for (size_t i = 0; i < to_keep.size(); ++i)
{
bounding_boxes_tmp.push_back(bounding_boxes[to_keep[i]]);
scores_tmp.push_back(scores[to_keep[i]]);
corrections_tmp.push_back(corrections[to_keep[i]]);
}
bounding_boxes = bounding_boxes_tmp;
scores = scores_tmp;
corrections = corrections_tmp;
}
// Use the heatmap generated by PNet to generate bounding boxes in the original image space, also generate the correction values and scores of the bounding boxes as well
void generate_bounding_boxes(vector<cv::Rect_<float> >& o_bounding_boxes, vector<float>& o_scores, vector<cv::Rect_<float> >& o_corrections, const cv::Mat_<float>& heatmap, const vector<cv::Mat_<float> >& corrections, double scale, double threshold, int face_support)
{
// use heatmap to generate bounding boxes in the original image space
// Correction for the pooling
int stride = 2;
// Offsets for, x, y, width and height
//cv::Mat_<float> dx1 = corrections.col(1);
//cv::Mat_<float> dy1 = corrections.col(2);
//cv::Mat_<float> dx2 = corrections.col(3);
//cv::Mat_<float> dy2 = corrections.col(4);
o_bounding_boxes.clear();
o_scores.clear();
o_corrections.clear();
// Find the parts of a heatmap above the threshold(x, y, and indices)
cv::Mat_<uchar> mask = heatmap >= threshold;
int counter = 0;
for (int x = 0; x < heatmap.cols; ++x)
{
for(int y = 0; y < heatmap.rows; ++y)
{
if (heatmap.at<float>(y, x) >= threshold)
{
float min_x = int((stride * x + 1) / scale);
float max_x = int((stride * x + face_support) / scale);
float min_y = int((stride * y + 1) / scale);
float max_y = int((stride * y + face_support) / scale);
// Find the corresponding scores and bbox corrections
//score = heatmap(inds);
//correction = [dx1(inds) dy1(inds) dx2(inds) dy2(inds)];
o_bounding_boxes.push_back(cv::Rect_<float>(min_x, min_y, max_x - min_x, max_y - min_y));
o_scores.push_back(heatmap.at<float>(y, x));
// Correcting for Matlab's format
//bboxes = [y - 1 x - 1];
//bboxes = [fix((stride*(bboxes)+1) / scale) fix((stride*(bboxes)+face_support) / scale) score correction];
return cv::Mat_<float>();
float corr_x = corrections[0].at<float>(y, x);
float corr_y = corrections[1].at<float>(y, x);
float corr_width = corrections[2].at<float>(y, x);
float corr_height = corrections[3].at<float>(y, x);
o_corrections.push_back(cv::Rect_<float>(corr_x, corr_y, corr_width, corr_height));
counter++;
}
}
}
}
@@ -585,6 +660,10 @@ bool FaceDetectorMTCNN::DetectFaces(vector<cv::Rect_<double> >& o_regions, const
cv::Mat img_float;
input_img.convertTo(img_float, CV_32FC3);
vector<cv::Rect_<float> > proposal_boxes_all;
vector<float> scores_all;
vector<cv::Rect_<float> > proposal_corrections_all;
for (int i = 0; i < num_scales; ++i)
{
double scale = ((double)face_support / (double)min_face_size)*cv::pow(pyramid_factor, i);
@@ -594,19 +673,51 @@ bool FaceDetectorMTCNN::DetectFaces(vector<cv::Rect_<double> >& o_regions, const
cv::Mat normalised_img;
cv::resize(img_float, normalised_img, cv::Size(w_pyr, h_pyr));
// Normalize the image
normalised_img = (normalised_img - 127.5) * 0.0078125;
// Actual PNet CNN step
std::vector<cv::Mat_<float> > pnet_out = PNet.Inference(normalised_img);
cv::Mat_<float> out_prob;
cv::exp(pnet_out[0]- pnet_out[1], out_prob);
out_prob = 1.0 / (1.0 + out_prob);
// Extract the probabilities from PNet response
cv::Mat_<float> prob_heatmap;
cv::exp(pnet_out[0]- pnet_out[1], prob_heatmap);
prob_heatmap = 1.0 / (1.0 + prob_heatmap);
// Extract the probabilities from PNet response
std::vector<cv::Mat_<float>> corrections_heatmap(pnet_out.begin() + 2, pnet_out.end());
// Grab the detections
vector<cv::Rect_<float> > proposal_boxes;
vector<float> scores;
vector<cv::Rect_<float> > proposal_corrections;
generate_bounding_boxes(proposal_boxes, scores, proposal_corrections, prob_heatmap, corrections_heatmap, scale, t1, face_support);
// Perform non-maximum supression on proposals in this scale
vector<int> to_keep = non_maximum_supression(proposal_boxes, scores, 0.5);
select_subset(to_keep, proposal_boxes, scores, proposal_corrections);
proposal_boxes_all.insert(proposal_boxes_all.end(), proposal_boxes.begin(), proposal_boxes.end());
scores_all.insert(scores_all.end(), scores.begin(), scores.end());
proposal_corrections_all.insert(proposal_corrections_all.end(), proposal_corrections.begin(), proposal_corrections.end());
}
// Preparation for RNet step
// Non maximum supression accross bounding boxes, and their offset correction
vector<int> to_keep = non_maximum_supression(proposal_boxes_all, scores_all, 0.7);
select_subset(to_keep, proposal_boxes_all, scores_all, proposal_corrections_all);
//total_bboxes = apply_correction(total_bboxes, corrections, false);
//% Making them into rectangles
// total_bboxes(:, 1 : 4) = rectify(total_bboxes(:, 1 : 4));
//% Rounding to pixels
//
return true;
}