From 13e42a70cfb0f5a8714c04021618e162cbb71f9e Mon Sep 17 00:00:00 2001 From: Tadas Baltrusaitis Date: Wed, 9 Aug 2017 16:16:31 -0400 Subject: [PATCH] Some more work on MTCNN. --- .../include/FaceDetectorMTCNN.h | 4 +-- .../src/FaceDetectorMTCNN.cpp | 34 ++++++++++++++++++- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/lib/local/LandmarkDetector/include/FaceDetectorMTCNN.h b/lib/local/LandmarkDetector/include/FaceDetectorMTCNN.h index 6e07d516..3fe432b1 100644 --- a/lib/local/LandmarkDetector/include/FaceDetectorMTCNN.h +++ b/lib/local/LandmarkDetector/include/FaceDetectorMTCNN.h @@ -82,7 +82,7 @@ namespace LandmarkDetector CNN(const CNN& other); // Given an image, orientation and detected landmarks output the result of the appropriate regressor - cv::Mat_ Inference(const cv::Mat_& input_img); + cv::Mat_ Inference(const cv::Mat_& input_img); // Reading in the model void Read(string location); @@ -123,7 +123,7 @@ namespace LandmarkDetector FaceDetectorMTCNN(const FaceDetectorMTCNN& other); // Given an image, orientation and detected landmarks output the result of the appropriate regressor - bool DetectFaces(vector >& o_regions, const cv::Mat_& intensity, std::vector& o_confidences, int min_face = 30, double t1 = 0.6, double t2 = 0.7, double t3 = 0.7); + bool DetectFaces(vector >& o_regions, const cv::Mat_& input_img, std::vector& o_confidences, int min_face = 30, double t1 = 0.6, double t2 = 0.7, double t3 = 0.7); // Reading in the model void Read(string location); diff --git a/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp b/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp index 2cc3cb3b..0594aab3 100644 --- a/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp +++ b/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp @@ -136,7 +136,7 @@ CNN::CNN(const CNN& other) : cnn_layer_types(other.cnn_layer_types), cnn_max_poo } } -cv::Mat_ CNN::Inference(const cv::Mat_& input_img) +cv::Mat_ CNN::Inference(const cv::Mat_& input_img) { if (input_img.channels() == 1) { @@ -490,4 +490,36 @@ void FaceDetectorMTCNN::Read(string location) } } +// The actual MTCNN face detection step +bool DetectFaces(vector >& o_regions, const cv::Mat_& input_img, std::vector& o_confidences, int min_face_size = 30, double t1 = 0.6, double t2 = 0.7, double t3 = 0.7) +{ + + int height_orig = input_img.rows; + int width_orig = input_img.cols; + + // Size ratio of image pyramids + double pyramid_factor = 0.709; + + // Face support region is 12x12 px, so from that can work out the largest + // scale(which is 12 / min), and work down from there to smallest scale(no smaller than 12x12px) + int min_dim = std::min(height_orig, width_orig); + + int face_support = 12; + int num_scales = floor(log(min_face_size / min_dim) / log(pyramid_factor)) + 1; + + for (int i = 0; i < num_scales; ++i) + { + double scale = (face_support / min_face_size)*cv::pow(pyramid_factor, i); + + int h_pyr = ceil(height_orig * scale); + int w_pyr = ceil(width_orig * scale); + + cv::Mat_ normalised_img; + cv::resize(input_img, normalised_img, cv::Size(w_pyr, h_pyr)); + + normalised_img = (normalised_img - 127.5) * 0.0078125; + + } + +}