Helper functions for MTCNN implementation.

2026-05-16 20:28:00 +00:00 · 2017-08-14 16:23:44 +01:00
parent 6dbed701bc
commit 0f94d53d9d
2 changed files with 143 additions and 32 deletions
--- a/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp
+++ b/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp
@@ -152,6 +152,7 @@ std::vector<cv::Mat_<float>> CNN::Inference(const cv::Mat& input_img)
 	cv::Mat channels[3]; 
 	cv::split(input_img, channels);  

+	// Flip the BGR order to RGB
 	vector<cv::Mat_<float> > input_maps;
 	input_maps.push_back(channels[2]);
 	input_maps.push_back(channels[1]);
@@ -341,14 +342,6 @@ std::vector<cv::Mat_<float>> CNN::Inference(const cv::Mat& input_img)
 		// Set the outputs of this layer to inputs of the next
 		input_maps = outputs;
 		
-		// TODO rem
-		//cv::Mat to_vis = input_maps[0];
-		//cout << to_vis << endl;
-		//double min, max;
-		//cv::minMaxIdx(to_vis, &min, &max);
-		//cv::imshow("image 1", (to_vis - min)/(max-min));
-		//cv::waitKey(0);
-
 	}

 	
@@ -531,32 +524,114 @@ void FaceDetectorMTCNN::Read(string location)
 	}
 }

-cv::Mat_<float> generate_bounding_boxes(cv::Mat_<float> heatmap, vector<cv::Mat_<float> > corrections, double scale, double threshold, int face_support)
+// Perform non maximum supression on proposal bounding boxes prioritizing boxes with high score/confidence
+std::vector<int> non_maximum_supression(const std::vector<cv::Rect_<float> >& original_bb, const std::vector<float>& scores, float thresh)
+{
+
+	// Sort the input bounding boxes by the detection score, using the nice trick of multimap always being sorted internally
+	std::multimap<float, size_t> idxs;
+	for (size_t i = 0; i < original_bb.size(); ++i)
+	{
+		idxs.insert(std::pair<float, size_t>(scores[i], i));
+	}
+
+	std::vector<int> output_ids;
+
+	// keep looping while some indexes still remain in the indexes list
+	while (idxs.size() > 0)
+	{
+		// grab the last rectangle
+		auto lastElem = --std::end(idxs);
+		size_t curr_id = lastElem->second;
+
+		const cv::Rect& rect1 = original_bb[curr_id];
+
+		idxs.erase(lastElem);
+
+		// Iterate through remaining bounding boxes and choose which ones to remove
+		for (auto pos = std::begin(idxs); pos != std::end(idxs); )
+		{
+			// grab the current rectangle
+			const cv::Rect& rect2 = original_bb[pos->second];
+
+			float intArea = (rect1 & rect2).area();
+			float unionArea = rect1.area() + rect2.area() - intArea;
+			float overlap = intArea / unionArea;
+
+			// Remove the bounding boxes with less confidence but with significant overlap with the current one
+			if (overlap > thresh)
+			{
+				pos = idxs.erase(pos);
+			}
+			else
+			{
+				++pos;
+			}
+		}
+		output_ids.push_back(curr_id);
+
+	}
+
+	return output_ids;
+
+}
+
+// Helper function for selecting a subset of bounding boxes based on indices
+void select_subset(const vector<int>& to_keep, vector<cv::Rect_<float> >& bounding_boxes, vector<float>& scores, vector<cv::Rect_<float> >& corrections)
+{
+	vector<cv::Rect_<float> > bounding_boxes_tmp;
+	vector<float> scores_tmp;
+	vector<cv::Rect_<float> > corrections_tmp;
+
+	for (size_t i = 0; i < to_keep.size(); ++i)
+	{
+		bounding_boxes_tmp.push_back(bounding_boxes[to_keep[i]]);
+		scores_tmp.push_back(scores[to_keep[i]]);
+		corrections_tmp.push_back(corrections[to_keep[i]]);
+	}
+
+	bounding_boxes = bounding_boxes_tmp;
+	scores = scores_tmp;
+	corrections = corrections_tmp;
+}
+
+// Use the heatmap generated by PNet to generate bounding boxes in the original image space, also generate the correction values and scores of the bounding boxes as well
+void generate_bounding_boxes(vector<cv::Rect_<float> >& o_bounding_boxes, vector<float>& o_scores, vector<cv::Rect_<float> >& o_corrections, const cv::Mat_<float>& heatmap, const vector<cv::Mat_<float> >& corrections, double scale, double threshold, int face_support)
 {
-	// use heatmap to generate bounding boxes in the original image space

 	// Correction for the pooling
 	int stride = 2;

-	// Offsets for, x, y, width and height
-	//cv::Mat_<float> dx1 = corrections.col(1);
-	//cv::Mat_<float> dy1 = corrections.col(2);
-	//cv::Mat_<float> dx2 = corrections.col(3);
-	//cv::Mat_<float> dy2 = corrections.col(4);
+	o_bounding_boxes.clear();
+	o_scores.clear();
+	o_corrections.clear();

-	// Find the parts of a heatmap above the threshold(x, y, and indices)
-	cv::Mat_<uchar> mask = heatmap >= threshold;
+	int counter = 0;
+	for (int x = 0; x < heatmap.cols; ++x)
+	{
+		for(int y = 0; y < heatmap.rows; ++y)
+		{
+			if (heatmap.at<float>(y, x) >= threshold)
+			{
+				float min_x = int((stride * x + 1) / scale);
+				float max_x = int((stride * x + face_support) / scale);
+				float min_y = int((stride * y + 1) / scale);
+				float max_y = int((stride * y + face_support) / scale);

-	// Find the corresponding scores and bbox corrections
-	//score = heatmap(inds);
-	//correction = [dx1(inds) dy1(inds) dx2(inds) dy2(inds)];
+				o_bounding_boxes.push_back(cv::Rect_<float>(min_x, min_y, max_x - min_x, max_y - min_y));
+				o_scores.push_back(heatmap.at<float>(y, x));

-	// Correcting for Matlab's format
-	//bboxes = [y - 1 x - 1];
-	//bboxes = [fix((stride*(bboxes)+1) / scale) fix((stride*(bboxes)+face_support) / scale) score correction];
-
-	return cv::Mat_<float>();
+				float corr_x = corrections[0].at<float>(y, x);
+				float corr_y = corrections[1].at<float>(y, x);
+				float corr_width = corrections[2].at<float>(y, x);
+				float corr_height = corrections[3].at<float>(y, x);
+				o_corrections.push_back(cv::Rect_<float>(corr_x, corr_y, corr_width, corr_height));

+				counter++;
+			}
+		}
+	}
+	
 }


@@ -585,6 +660,10 @@ bool FaceDetectorMTCNN::DetectFaces(vector<cv::Rect_<double> >& o_regions, const
 	cv::Mat img_float;
 	input_img.convertTo(img_float, CV_32FC3);

+	vector<cv::Rect_<float> > proposal_boxes_all;
+	vector<float> scores_all;
+	vector<cv::Rect_<float> > proposal_corrections_all;
+
 	for (int i = 0; i < num_scales; ++i)
 	{
 		double scale = ((double)face_support / (double)min_face_size)*cv::pow(pyramid_factor, i);
@@ -594,19 +673,51 @@ bool FaceDetectorMTCNN::DetectFaces(vector<cv::Rect_<double> >& o_regions, const

 		cv::Mat normalised_img;
 		cv::resize(img_float, normalised_img, cv::Size(w_pyr, h_pyr));
-
+		
+		// Normalize the image
 		normalised_img = (normalised_img - 127.5) * 0.0078125;

+		// Actual PNet CNN step
 		std::vector<cv::Mat_<float> > pnet_out = PNet.Inference(normalised_img);
 		
-		cv::Mat_<float> out_prob;
-		cv::exp(pnet_out[0]- pnet_out[1], out_prob);
-		out_prob = 1.0 / (1.0 + out_prob);
+		// Extract the probabilities from PNet response
+		cv::Mat_<float> prob_heatmap;
+		cv::exp(pnet_out[0]- pnet_out[1], prob_heatmap);
+		prob_heatmap = 1.0 / (1.0 + prob_heatmap);
+
+		// Extract the probabilities from PNet response
+		std::vector<cv::Mat_<float>> corrections_heatmap(pnet_out.begin() + 2, pnet_out.end());

 		// Grab the detections
+		vector<cv::Rect_<float> > proposal_boxes;
+		vector<float> scores;
+		vector<cv::Rect_<float> > proposal_corrections;
+		generate_bounding_boxes(proposal_boxes, scores, proposal_corrections, prob_heatmap, corrections_heatmap, scale, t1, face_support);

+		// Perform non-maximum supression on proposals in this scale
+		vector<int> to_keep = non_maximum_supression(proposal_boxes, scores, 0.5);
+		select_subset(to_keep, proposal_boxes, scores, proposal_corrections);
+
+		proposal_boxes_all.insert(proposal_boxes_all.end(), proposal_boxes.begin(), proposal_boxes.end());
+		scores_all.insert(scores_all.end(), scores.begin(), scores.end());
+		proposal_corrections_all.insert(proposal_corrections_all.end(), proposal_corrections.begin(), proposal_corrections.end());

 	}
+
+	// Preparation for RNet step
+
+	// Non maximum supression accross bounding boxes, and their offset correction
+	vector<int> to_keep = non_maximum_supression(proposal_boxes_all, scores_all, 0.7);
+	select_subset(to_keep, proposal_boxes_all, scores_all, proposal_corrections_all);
+
+	//total_bboxes = apply_correction(total_bboxes, corrections, false);
+
+	//% Making them into rectangles
+	//	total_bboxes(:, 1 : 4) = rectify(total_bboxes(:, 1 : 4));
+
+	//% Rounding to pixels
+	//	
+
 	return true;

 }