2017-08-09 11:00:38 -04:00
///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2016, Carnegie Mellon University and University of Cambridge,
// all rights reserved.
//
// THIS SOFTWARE IS PROVIDED <20> AS IS<49> FOR ACADEMIC USE ONLY AND ANY EXPRESS
// OR IMPLIED WARRANTIES WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS
// BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY.
// OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Notwithstanding the license granted herein, Licensee acknowledges that certain components
// of the Software may be covered by so-called <20> open source<63> software licenses (<28> Open Source
// Components<74> ), which means any software licenses approved as open source licenses by the
// Open Source Initiative or any substantially similar licenses, including without limitation any
// license that, as a condition of distribution of the software licensed under such license,
// requires that the distributor make the software available in source code format. Licensor shall
// provide a list of Open Source Components for a particular version of the Software upon
// Licensee<65> s request. Licensee will comply with the applicable terms of such licenses and to
// the extent required by the licenses covering Open Source Components, the terms of such
// licenses will apply in lieu of the terms of this Agreement. To the extent the terms of the
// licenses applicable to Open Source Components prohibit any of the restrictions in this
// License Agreement with respect to such Open Source Component, such restrictions will not
// apply to such Open Source Component. To the extent the terms of the licenses applicable to
// Open Source Components require Licensor to make an offer to provide source code or
// related information in connection with the Software, such offer is hereby made. Any request
// for source code or related information should be directed to cl-face-tracker-distribution@lists.cam.ac.uk
// Licensee acknowledges receipt of notices for the Open Source Components for the initial
// delivery of the Software.
// * Any publications arising from the use of this software, including but
// not limited to academic journal and conference publications, technical
// reports and manuals, must cite at least one of the following works:
//
// OpenFace: an open source facial behavior analysis toolkit
// Tadas Baltru<72> aitis, Peter Robinson, and Louis-Philippe Morency
// in IEEE Winter Conference on Applications of Computer Vision, 2016
//
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
// Erroll Wood, Tadas Baltru<72> aitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
// in IEEE International. Conference on Computer Vision (ICCV), 2015
//
// Cross-dataset learning and person-speci?c normalisation for automatic Action Unit detection
// Tadas Baltru<72> aitis, Marwa Mahmoud, and Peter Robinson
// in Facial Expression Recognition and Analysis Challenge,
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
//
// Constrained Local Neural Fields for robust facial landmark detection in the wild.
// Tadas Baltru<72> aitis, Peter Robinson, and Louis-Philippe Morency.
// in IEEE Int. Conference on Computer Vision Workshops, 300 Faces in-the-Wild Challenge, 2013.
//
///////////////////////////////////////////////////////////////////////////////
# include "stdafx.h"
# include "FaceDetectorMTCNN.h"
// OpenCV includes
# include <opencv2/core/core.hpp>
# include <opencv2/imgproc.hpp>
// TBB includes
# include <tbb/tbb.h>
// System includes
# include <fstream>
// Math includes
# define _USE_MATH_DEFINES
# include <cmath>
2017-08-09 12:01:53 -04:00
// Boost includes
# include <filesystem.hpp>
# include <filesystem/fstream.hpp>
2017-08-09 11:00:38 -04:00
# ifndef M_PI
# define M_PI 3.14159265358979323846
# endif
2017-08-09 15:04:14 -04:00
# include "LandmarkDetectorUtils.h"
2017-08-09 11:00:38 -04:00
using namespace LandmarkDetector ;
2017-08-18 10:03:01 +01:00
// Constructor from model file location
FaceDetectorMTCNN : : FaceDetectorMTCNN ( const string & location )
{
this - > Read ( location ) ;
}
2017-08-09 11:00:38 -04:00
// Copy constructor
FaceDetectorMTCNN : : FaceDetectorMTCNN ( const FaceDetectorMTCNN & other ) : PNet ( other . PNet ) , RNet ( other . RNet ) , ONet ( other . ONet )
{
}
2017-08-09 12:01:53 -04:00
CNN : : CNN ( const CNN & other ) : cnn_layer_types ( other . cnn_layer_types ) , cnn_max_pooling_layers ( other . cnn_max_pooling_layers ) , cnn_convolutional_layers_bias ( other . cnn_convolutional_layers_bias )
{
2017-08-17 11:40:57 +01:00
this - > cnn_convolutional_layers_weights . resize ( other . cnn_convolutional_layers_weights . size ( ) ) ;
for ( size_t l = 0 ; l < other . cnn_convolutional_layers_weights . size ( ) ; + + l )
{
// Make sure the matrix is copied.
this - > cnn_convolutional_layers_weights [ l ] = other . cnn_convolutional_layers_weights [ l ] . clone ( ) ;
}
2017-08-17 12:35:11 +01:00
this - > cnn_convolutional_layers . resize ( other . cnn_convolutional_layers . size ( ) ) ;
for ( size_t l = 0 ; l < other . cnn_convolutional_layers . size ( ) ; + + l )
2017-08-16 17:14:00 +01:00
{
2017-08-17 12:35:11 +01:00
this - > cnn_convolutional_layers [ l ] . resize ( other . cnn_convolutional_layers [ l ] . size ( ) ) ;
2017-08-16 17:14:00 +01:00
2017-08-17 12:35:11 +01:00
for ( size_t i = 0 ; i < other . cnn_convolutional_layers [ l ] . size ( ) ; + + i )
2017-08-16 17:14:00 +01:00
{
2017-08-17 12:35:11 +01:00
this - > cnn_convolutional_layers [ l ] [ i ] . resize ( other . cnn_convolutional_layers [ l ] [ i ] . size ( ) ) ;
2017-08-16 17:14:00 +01:00
2017-08-17 12:35:11 +01:00
for ( size_t k = 0 ; k < other . cnn_convolutional_layers [ l ] [ i ] . size ( ) ; + + k )
2017-08-16 17:14:00 +01:00
{
// Make sure the matrix is copied.
2017-08-17 12:35:11 +01:00
this - > cnn_convolutional_layers [ l ] [ i ] [ k ] = other . cnn_convolutional_layers [ l ] [ i ] [ k ] . clone ( ) ;
2017-08-16 17:14:00 +01:00
}
}
}
2017-08-09 12:01:53 -04:00
this - > cnn_fully_connected_layers_weights . resize ( other . cnn_fully_connected_layers_weights . size ( ) ) ;
for ( size_t l = 0 ; l < other . cnn_fully_connected_layers_weights . size ( ) ; + + l )
{
// Make sure the matrix is copied.
this - > cnn_fully_connected_layers_weights [ l ] = other . cnn_fully_connected_layers_weights [ l ] . clone ( ) ;
}
this - > cnn_fully_connected_layers_biases . resize ( other . cnn_fully_connected_layers_biases . size ( ) ) ;
for ( size_t l = 0 ; l < other . cnn_fully_connected_layers_biases . size ( ) ; + + l )
{
// Make sure the matrix is copied.
this - > cnn_fully_connected_layers_biases [ l ] = other . cnn_fully_connected_layers_biases [ l ] . clone ( ) ;
}
this - > cnn_prelu_layer_weights . resize ( other . cnn_prelu_layer_weights . size ( ) ) ;
for ( size_t l = 0 ; l < other . cnn_prelu_layer_weights . size ( ) ; + + l )
{
// Make sure the matrix is copied.
this - > cnn_prelu_layer_weights [ l ] = other . cnn_prelu_layer_weights [ l ] . clone ( ) ;
}
}
2017-08-23 22:25:28 +01:00
void PReLU ( std : : vector < cv : : Mat_ < float > > & input_output_maps , cv : : Mat_ < float > prelu_weights )
2017-08-15 21:53:25 +01:00
{
2017-08-23 22:25:28 +01:00
if ( input_output_maps . size ( ) > 1 )
2017-08-15 21:53:25 +01:00
{
2017-08-23 22:25:28 +01:00
int h = input_output_maps [ 0 ] . rows ;
int w = input_output_maps [ 0 ] . cols ;
size_t size_in = h * w ;
for ( size_t k = 0 ; k < input_output_maps . size ( ) ; + + k )
2017-08-15 21:53:25 +01:00
{
// Apply the PReLU
2017-08-23 22:25:28 +01:00
auto iter = input_output_maps [ k ] . begin ( ) ;
float neg_mult = prelu_weights . at < float > ( k ) ;
for ( size_t i = 0 ; i < size_in ; + + i )
{
float in_val = * iter ;
2017-08-15 21:53:25 +01:00
2017-08-23 22:25:28 +01:00
// The prelu step
* iter + + = in_val > = 0 ? in_val : in_val * neg_mult ;
}
2017-08-15 21:53:25 +01:00
}
}
else
{
2017-08-23 22:25:28 +01:00
int w = input_output_maps [ 0 ] . cols ;
2017-08-15 21:53:25 +01:00
for ( size_t k = 0 ; k < prelu_weights . rows ; + + k )
{
2017-08-23 22:25:28 +01:00
auto iter = input_output_maps [ 0 ] . row ( k ) . begin ( ) ;
float neg_mult = prelu_weights . at < float > ( k ) ;
for ( size_t i = 0 ; i < w ; + + i )
{
float in_val = * iter ;
// Apply the PReLU
* iter + + = in_val > = 0 ? in_val : in_val * neg_mult ;
}
2017-08-15 21:53:25 +01:00
}
}
}
2017-08-15 18:06:12 +01:00
void fully_connected ( std : : vector < cv : : Mat_ < float > > & outputs , const std : : vector < cv : : Mat_ < float > > & input_maps , cv : : Mat_ < float > weights , cv : : Mat_ < float > biases )
{
2017-08-15 21:53:25 +01:00
outputs . clear ( ) ;
2017-08-15 18:06:12 +01:00
if ( input_maps . size ( ) > 1 )
{
// Concatenate all the maps
cv : : Size orig_size = input_maps [ 0 ] . size ( ) ;
2017-08-24 17:00:32 +01:00
cv : : Mat_ < float > input_concat ( input_maps . size ( ) , input_maps [ 0 ] . cols * input_maps [ 0 ] . rows ) ;
2017-08-15 18:06:12 +01:00
2017-08-24 17:00:32 +01:00
for ( size_t in = 0 ; in < input_maps . size ( ) ; + + in )
2017-08-15 18:06:12 +01:00
{
2017-08-24 17:00:32 +01:00
cv : : Mat_ < float > add = input_maps [ in ] ;
// Reshape if all of the data will be flattened
if ( input_concat . rows ! = weights . cols )
{
add = add . t ( ) ;
}
2017-08-15 18:06:12 +01:00
add = add . reshape ( 0 , 1 ) ;
2017-08-24 17:00:32 +01:00
add . copyTo ( input_concat . row ( in ) ) ;
2017-08-15 18:06:12 +01:00
}
// Treat the input as separate feature maps
2017-08-24 17:00:32 +01:00
if ( input_concat . rows = = weights . cols )
2017-08-15 18:06:12 +01:00
{
2017-08-24 17:00:32 +01:00
input_concat = weights * input_concat ;
2017-08-15 18:06:12 +01:00
// Add biases
for ( size_t k = 0 ; k < biases . rows ; + + k )
{
2017-08-24 17:00:32 +01:00
input_concat . row ( k ) = input_concat . row ( k ) + biases . at < float > ( k ) ;
2017-08-15 18:06:12 +01:00
}
outputs . clear ( ) ;
// Resize and add as output
for ( size_t k = 0 ; k < biases . rows ; + + k )
{
2017-08-24 17:00:32 +01:00
cv : : Mat_ < float > reshaped = input_concat . row ( k ) . clone ( ) ;
reshaped = reshaped . reshape ( 1 , orig_size . height ) ;
2017-08-15 18:06:12 +01:00
outputs . push_back ( reshaped ) ;
}
}
else
{
// Flatten the input
2017-08-24 17:00:32 +01:00
input_concat = input_concat . reshape ( 0 , input_concat . rows * input_concat . cols ) ;
2017-08-15 18:06:12 +01:00
2017-08-24 17:00:32 +01:00
input_concat = weights * input_concat + biases ;
2017-08-15 18:06:12 +01:00
outputs . clear ( ) ;
2017-08-24 17:00:32 +01:00
outputs . push_back ( input_concat ) ;
2017-08-15 18:06:12 +01:00
}
}
else
{
2017-08-24 17:00:32 +01:00
cv : : Mat out = weights * input_maps [ 0 ] + biases ;
2017-08-15 18:06:12 +01:00
outputs . clear ( ) ;
2017-08-24 17:00:32 +01:00
outputs . push_back ( out . t ( ) ) ;
2017-08-15 18:06:12 +01:00
}
}
2017-08-15 17:57:25 +01:00
void max_pooling ( std : : vector < cv : : Mat_ < float > > & outputs , const std : : vector < cv : : Mat_ < float > > & input_maps , int stride_x , int stride_y , int kernel_size_x , int kernel_size_y )
{
vector < cv : : Mat_ < float > > outputs_sub ;
// Iterate over kernel height and width, based on stride
for ( size_t in = 0 ; in < input_maps . size ( ) ; + + in )
{
// Help with rounding up a bit, to match caffe style output
int out_x = round ( ( double ) ( input_maps [ in ] . cols - kernel_size_x ) / ( double ) stride_x ) + 1 ;
int out_y = round ( ( double ) ( input_maps [ in ] . rows - kernel_size_y ) / ( double ) stride_y ) + 1 ;
cv : : Mat_ < float > sub_out ( out_y , out_x , 0.0 ) ;
cv : : Mat_ < float > in_map = input_maps [ in ] ;
for ( int x = 0 ; x < input_maps [ in ] . cols ; x + = stride_x )
{
int max_x = cv : : min ( input_maps [ in ] . cols , x + kernel_size_x ) ;
int x_in_out = floor ( x / stride_x ) ;
if ( x_in_out > = out_x )
continue ;
for ( int y = 0 ; y < input_maps [ in ] . rows ; y + = stride_y )
{
int y_in_out = floor ( y / stride_y ) ;
if ( y_in_out > = out_y )
continue ;
int max_y = cv : : min ( input_maps [ in ] . rows , y + kernel_size_y ) ;
float curr_max = - FLT_MAX ;
for ( int x_in = x ; x_in < max_x ; + + x_in )
{
for ( int y_in = y ; y_in < max_y ; + + y_in )
{
float curr_val = in_map . at < float > ( y_in , x_in ) ;
if ( curr_val > curr_max )
{
curr_max = curr_val ;
}
}
}
sub_out . at < float > ( y_in_out , x_in_out ) = curr_max ;
}
}
outputs_sub . push_back ( sub_out ) ;
}
outputs = outputs_sub ;
}
2017-08-16 17:14:00 +01:00
////////////////////////////////////////////////////////////////////////////////////////////////////////
void convolution_single_kern_fft ( const vector < cv : : Mat_ < float > > & input_imgs , vector < cv : : Mat_ < double > > & img_dfts , const vector < cv : : Mat_ < float > > & _templs , map < int , vector < cv : : Mat_ < double > > > & _templ_dfts , cv : : Mat_ < float > & result )
{
// Assume result is defined properly
if ( result . empty ( ) )
{
cv : : Size corrSize ( input_imgs [ 0 ] . cols - _templs [ 0 ] . cols + 1 , input_imgs [ 0 ] . rows - _templs [ 0 ] . rows + 1 ) ;
result . create ( corrSize ) ;
}
// Our model will always be under min block size so can ignore this
//const double blockScale = 4.5;
//const int minBlockSize = 256;
int maxDepth = CV_64F ;
cv : : Size dftsize ;
dftsize . width = cv : : getOptimalDFTSize ( result . cols + _templs [ 0 ] . cols - 1 ) ;
dftsize . height = cv : : getOptimalDFTSize ( result . rows + _templs [ 0 ] . rows - 1 ) ;
// Compute block size
cv : : Size blocksize ;
blocksize . width = dftsize . width - _templs [ 0 ] . cols + 1 ;
blocksize . width = MIN ( blocksize . width , result . cols ) ;
blocksize . height = dftsize . height - _templs [ 0 ] . rows + 1 ;
blocksize . height = MIN ( blocksize . height , result . rows ) ;
vector < cv : : Mat_ < double > > dftTempl ;
// if this has not been precomputed, precompute it, otherwise use it
if ( _templ_dfts . find ( dftsize . width ) = = _templ_dfts . end ( ) )
{
dftTempl . resize ( _templs . size ( ) ) ;
for ( size_t k = 0 ; k < _templs . size ( ) ; + + k )
{
dftTempl [ k ] . create ( dftsize . height , dftsize . width ) ;
cv : : Mat_ < float > src = _templs [ k ] ;
cv : : Mat_ < double > dst ( dftTempl [ k ] , cv : : Rect ( 0 , 0 , dftsize . width , dftsize . height ) ) ;
cv : : Mat_ < double > dst1 ( dftTempl [ k ] , cv : : Rect ( 0 , 0 , _templs [ k ] . cols , _templs [ k ] . rows ) ) ;
if ( dst1 . data ! = src . data )
src . convertTo ( dst1 , dst1 . depth ( ) ) ;
if ( dst . cols > _templs [ k ] . cols )
{
cv : : Mat_ < double > part ( dst , cv : : Range ( 0 , _templs [ k ] . rows ) , cv : : Range ( _templs [ k ] . cols , dst . cols ) ) ;
part . setTo ( 0 ) ;
}
// Perform DFT of the template
dft ( dst , dst , 0 , _templs [ k ] . rows ) ;
}
_templ_dfts [ dftsize . width ] = dftTempl ;
}
else
{
dftTempl = _templ_dfts [ dftsize . width ] ;
}
cv : : Size bsz ( std : : min ( blocksize . width , result . cols ) , std : : min ( blocksize . height , result . rows ) ) ;
cv : : Mat src ;
cv : : Mat cdst ( result , cv : : Rect ( 0 , 0 , bsz . width , bsz . height ) ) ;
vector < cv : : Mat_ < double > > dftImgs ;
dftImgs . resize ( input_imgs . size ( ) ) ;
if ( img_dfts . empty ( ) )
{
for ( size_t k = 0 ; k < input_imgs . size ( ) ; + + k )
{
dftImgs [ k ] . create ( dftsize ) ;
dftImgs [ k ] . setTo ( 0.0 ) ;
cv : : Size dsz ( bsz . width + _templs [ k ] . cols - 1 , bsz . height + _templs [ k ] . rows - 1 ) ;
int x2 = std : : min ( input_imgs [ k ] . cols , dsz . width ) ;
int y2 = std : : min ( input_imgs [ k ] . rows , dsz . height ) ;
cv : : Mat src0 ( input_imgs [ k ] , cv : : Range ( 0 , y2 ) , cv : : Range ( 0 , x2 ) ) ;
cv : : Mat dst ( dftImgs [ k ] , cv : : Rect ( 0 , 0 , dsz . width , dsz . height ) ) ;
cv : : Mat dst1 ( dftImgs [ k ] , cv : : Rect ( 0 , 0 , x2 , y2 ) ) ;
src = src0 ;
if ( dst1 . data ! = src . data )
src . convertTo ( dst1 , dst1 . depth ( ) ) ;
dft ( dftImgs [ k ] , dftImgs [ k ] , 0 , dsz . height ) ;
img_dfts . push_back ( dftImgs [ k ] . clone ( ) ) ;
}
}
cv : : Mat_ < double > dft_img ( img_dfts [ 0 ] . rows , img_dfts [ 0 ] . cols , 0.0 ) ;
for ( size_t k = 0 ; k < input_imgs . size ( ) ; + + k )
{
cv : : Mat dftTempl1 ( dftTempl [ k ] , cv : : Rect ( 0 , 0 , dftsize . width , dftsize . height ) ) ;
2017-08-17 11:40:57 +01:00
if ( k = = 0 )
{
cv : : mulSpectrums ( img_dfts [ k ] , dftTempl1 , dft_img , 0 , true ) ;
}
else
{
cv : : mulSpectrums ( img_dfts [ k ] , dftTempl1 , dftImgs [ k ] , 0 , true ) ;
dft_img = dft_img + dftImgs [ k ] ;
}
2017-08-16 17:14:00 +01:00
}
cv : : dft ( dft_img , dft_img , cv : : DFT_INVERSE + cv : : DFT_SCALE , bsz . height ) ;
src = dft_img ( cv : : Rect ( 0 , 0 , bsz . width , bsz . height ) ) ;
src . convertTo ( cdst , CV_32F ) ;
}
2017-08-26 13:23:16 +01:00
void im2col_t ( const cv : : Mat_ < float > & input , int width , int height , cv : : Mat_ < float > & output )
2017-08-23 20:00:18 +01:00
{
int m = input . cols ;
int n = input . rows ;
// determine how many blocks there will be with a sliding window of width x height in the input
int yB = m - height + 1 ;
int xB = n - width + 1 ;
// Allocate the output size
if ( output . rows ! = width * height & & output . cols ! = xB * yB )
{
output = cv : : Mat : : ones ( width * height , xB * yB , CV_32F ) ;
}
// Iterate over the whole image
for ( int i = 0 ; i < yB ; i + + )
{
int rowIdx = i ;
for ( int j = 0 ; j < xB ; j + + )
{
//int rowIdx = i; +j*yB;
// iterate over the blocks within the image
for ( unsigned int yy = 0 ; yy < height ; + + yy )
{
// Faster iteration over the image
const float * Mi = input . ptr < float > ( j + yy ) ;
for ( unsigned int xx = 0 ; xx < width ; + + xx )
{
int colIdx = xx * height + yy ;
output . at < float > ( colIdx , rowIdx ) = Mi [ i + xx ] ;
}
}
rowIdx + = yB ;
}
}
}
2017-08-17 11:40:57 +01:00
void convolution_direct ( std : : vector < cv : : Mat_ < float > > & outputs , const std : : vector < cv : : Mat_ < float > > & input_maps , const cv : : Mat_ < float > & weight_matrix , const std : : vector < float > & biases , int height_k , int width_k )
{
outputs . clear ( ) ;
int height_in = input_maps [ 0 ] . rows ;
int width_n = input_maps [ 0 ] . cols ;
// determine how many blocks there will be with a sliding window of width x height in the input
int yB = height_in - height_k + 1 ;
int xB = width_n - width_k + 1 ;
2017-08-23 20:43:19 +01:00
cv : : Mat_ < float > input_matrix ( input_maps . size ( ) * height_k * width_k + 1.0 , yB * xB , 1.0f ) ;
2017-08-17 11:40:57 +01:00
// Comibine im2col accross channels to prepare for matrix multiplication
for ( size_t i = 0 ; i < input_maps . size ( ) ; + + i )
{
2017-08-26 13:23:16 +01:00
im2col_t ( input_maps [ i ] , width_k , height_k , input_matrix ( cv : : Rect ( 0 , i * height_k * width_k , yB * xB , height_k * width_k ) ) ) ;
2017-08-17 11:40:57 +01:00
}
2017-08-23 20:43:19 +01:00
// Actual convolution (through multiplication)
2017-08-23 20:00:18 +01:00
cv : : Mat_ < float > out = weight_matrix * input_matrix ;
2017-08-17 11:40:57 +01:00
// Move back to vectors and reshape accordingly (also add the bias)
2017-08-23 20:00:18 +01:00
for ( size_t k = 0 ; k < out . rows ; + + k )
2017-08-17 11:40:57 +01:00
{
2017-08-23 20:43:19 +01:00
outputs . push_back ( out . row ( k ) . reshape ( 1 , yB ) ) ;
2017-08-17 11:40:57 +01:00
}
}
2017-08-16 17:14:00 +01:00
void convolution_fft2 ( std : : vector < cv : : Mat_ < float > > & outputs , const std : : vector < cv : : Mat_ < float > > & input_maps , const std : : vector < std : : vector < cv : : Mat_ < float > > > & kernels , const std : : vector < float > & biases , vector < map < int , vector < cv : : Mat_ < double > > > > & precomp_dfts )
{
outputs . clear ( ) ;
// Useful precomputed data placeholders for quick correlation (convolution)
vector < cv : : Mat_ < double > > input_image_dft ;
for ( size_t k = 0 ; k < kernels . size ( ) ; + + k )
{
// The convolution (with precomputation)
cv : : Mat_ < float > output ;
convolution_single_kern_fft ( input_maps , input_image_dft , kernels [ k ] , precomp_dfts [ k ] , output ) ;
// Combining the maps
outputs . push_back ( output + biases [ k ] ) ;
}
}
2017-08-15 17:57:25 +01:00
void convolution_fft ( std : : vector < cv : : Mat_ < float > > & outputs , const std : : vector < cv : : Mat_ < float > > & input_maps , const std : : vector < std : : vector < cv : : Mat_ < float > > > & kernels , const std : : vector < float > & biases , vector < vector < pair < int , cv : : Mat_ < double > > > > & precomp_dfts )
{
outputs . clear ( ) ;
for ( size_t in = 0 ; in < input_maps . size ( ) ; + + in )
{
cv : : Mat_ < float > input_image = input_maps [ in ] ;
// Useful precomputed data placeholders for quick correlation (convolution)
cv : : Mat_ < double > input_image_dft ;
cv : : Mat integral_image ;
cv : : Mat integral_image_sq ;
for ( size_t k = 0 ; k < kernels [ in ] . size ( ) ; + + k )
{
cv : : Mat_ < float > kernel = kernels [ in ] [ k ] ;
// The convolution (with precomputation)
cv : : Mat_ < float > output ;
if ( precomp_dfts [ in ] [ k ] . second . empty ( ) )
{
std : : map < int , cv : : Mat_ < double > > precomputed_dft ;
LandmarkDetector : : matchTemplate_m ( input_image , input_image_dft , integral_image , integral_image_sq , kernel , precomputed_dft , output , CV_TM_CCORR ) ;
precomp_dfts [ in ] [ k ] . first = precomputed_dft . begin ( ) - > first ;
precomp_dfts [ in ] [ k ] . second = precomputed_dft . begin ( ) - > second ;
}
else
{
std : : map < int , cv : : Mat_ < double > > precomputed_dft ;
precomputed_dft [ precomp_dfts [ in ] [ k ] . first ] = precomp_dfts [ in ] [ k ] . second ;
LandmarkDetector : : matchTemplate_m ( input_image , input_image_dft , integral_image , integral_image_sq , kernel , precomputed_dft , output , CV_TM_CCORR ) ;
}
// Combining the maps
if ( in = = 0 )
{
outputs . push_back ( output ) ;
}
else
{
outputs [ k ] = outputs [ k ] + output ;
}
}
}
for ( size_t k = 0 ; k < biases . size ( ) ; + + k )
{
outputs [ k ] = outputs [ k ] + biases [ k ] ;
}
}
2017-08-17 11:40:57 +01:00
std : : vector < cv : : Mat_ < float > > CNN : : Inference ( const cv : : Mat & input_img , bool direct )
2017-08-09 15:04:14 -04:00
{
if ( input_img . channels ( ) = = 1 )
{
cv : : cvtColor ( input_img , input_img , cv : : COLOR_GRAY2BGR ) ;
}
int cnn_layer = 0 ;
int fully_connected_layer = 0 ;
int prelu_layer = 0 ;
int max_pool_layer = 0 ;
2017-08-10 15:30:35 -04:00
// Slit a BGR image into three chnels
cv : : Mat channels [ 3 ] ;
cv : : split ( input_img , channels ) ;
2017-08-14 16:23:44 +01:00
// Flip the BGR order to RGB
2017-08-09 15:04:14 -04:00
vector < cv : : Mat_ < float > > input_maps ;
2017-08-10 15:30:35 -04:00
input_maps . push_back ( channels [ 2 ] ) ;
input_maps . push_back ( channels [ 1 ] ) ;
input_maps . push_back ( channels [ 0 ] ) ;
2017-08-09 15:04:14 -04:00
vector < cv : : Mat_ < float > > outputs ;
for ( size_t layer = 0 ; layer < cnn_layer_types . size ( ) ; + + layer )
{
2017-08-10 15:30:35 -04:00
2017-08-09 15:04:14 -04:00
// Determine layer type
int layer_type = cnn_layer_types [ layer ] ;
// Convolutional layer
2017-08-15 17:57:25 +01:00
if ( layer_type = = 0 )
2017-08-09 15:04:14 -04:00
{
2017-08-16 17:14:00 +01:00
2017-08-17 11:40:57 +01:00
// Either perform direct convolution through matrix multiplication or use an FFT optimized version, which one is optimal depends on the kernel and input sizes
if ( direct )
{
2017-08-17 12:35:11 +01:00
convolution_direct ( outputs , input_maps , cnn_convolutional_layers_weights [ cnn_layer ] , cnn_convolutional_layers_bias [ cnn_layer ] , cnn_convolutional_layers [ cnn_layer ] [ 0 ] [ 0 ] . rows , cnn_convolutional_layers [ cnn_layer ] [ 0 ] [ 0 ] . cols ) ;
2017-08-17 11:40:57 +01:00
}
else
{
2017-08-17 12:35:11 +01:00
convolution_fft2 ( outputs , input_maps , cnn_convolutional_layers [ cnn_layer ] , cnn_convolutional_layers_bias [ cnn_layer ] , cnn_convolutional_layers_dft [ cnn_layer ] ) ;
2017-08-17 11:40:57 +01:00
}
2017-08-16 17:14:00 +01:00
//vector<cv::Mat_<float> > outs;
//convolution_fft(outs, input_maps, cnn_convolutional_layers[cnn_layer], cnn_convolutional_layers_bias[cnn_layer], cnn_convolutional_layers_dft[cnn_layer]);
2017-08-23 20:00:18 +01:00
2017-08-09 15:04:14 -04:00
cnn_layer + + ;
}
if ( layer_type = = 1 )
{
int stride_x = std : : get < 2 > ( cnn_max_pooling_layers [ max_pool_layer ] ) ;
int stride_y = std : : get < 3 > ( cnn_max_pooling_layers [ max_pool_layer ] ) ;
int kernel_size_x = std : : get < 0 > ( cnn_max_pooling_layers [ max_pool_layer ] ) ;
int kernel_size_y = std : : get < 1 > ( cnn_max_pooling_layers [ max_pool_layer ] ) ;
2017-08-15 17:57:25 +01:00
max_pooling ( outputs , input_maps , stride_x , stride_y , kernel_size_x , kernel_size_y ) ;
max_pool_layer + + ;
2017-08-09 15:04:14 -04:00
}
if ( layer_type = = 2 )
{
2017-08-15 18:06:12 +01:00
fully_connected ( outputs , input_maps , cnn_fully_connected_layers_weights [ fully_connected_layer ] , cnn_fully_connected_layers_biases [ fully_connected_layer ] ) ;
2017-08-09 15:04:14 -04:00
fully_connected_layer + + ;
}
2017-08-10 15:30:35 -04:00
if ( layer_type = = 3 ) // PReLU
2017-08-09 15:04:14 -04:00
{
2017-08-23 22:25:28 +01:00
// In place prelu computation
PReLU ( input_maps , cnn_prelu_layer_weights [ prelu_layer ] ) ;
outputs = input_maps ;
2017-08-10 15:30:35 -04:00
prelu_layer + + ;
2017-08-09 15:04:14 -04:00
}
if ( layer_type = = 4 )
{
outputs . clear ( ) ;
for ( size_t k = 0 ; k < input_maps . size ( ) ; + + k )
{
// Apply the sigmoid
cv : : exp ( - input_maps [ k ] , input_maps [ k ] ) ;
input_maps [ k ] = 1.0 / ( 1.0 + input_maps [ k ] ) ;
outputs . push_back ( input_maps [ k ] ) ;
}
}
2017-08-15 18:06:12 +01:00
// Set the outputs of this layer to inputs of the next one
input_maps = outputs ;
2017-08-09 15:04:14 -04:00
}
2017-08-10 15:30:35 -04:00
return outputs ;
2017-08-09 15:04:14 -04:00
}
2017-08-09 11:00:38 -04:00
void ReadMatBin ( std : : ifstream & stream , cv : : Mat & output_mat )
{
// Read in the number of rows, columns and the data type
int row , col , type ;
stream . read ( ( char * ) & row , 4 ) ;
stream . read ( ( char * ) & col , 4 ) ;
stream . read ( ( char * ) & type , 4 ) ;
output_mat = cv : : Mat ( row , col , type ) ;
int size = output_mat . rows * output_mat . cols * output_mat . elemSize ( ) ;
stream . read ( ( char * ) output_mat . data , size ) ;
}
2017-08-17 12:35:11 +01:00
void CNN : : ClearPrecomp ( )
{
for ( size_t k1 = 0 ; k1 < cnn_convolutional_layers_dft . size ( ) ; + + k1 )
{
for ( size_t k2 = 0 ; k2 < cnn_convolutional_layers_dft [ k1 ] . size ( ) ; + + k2 )
{
cnn_convolutional_layers_dft [ k1 ] [ k2 ] . clear ( ) ;
}
}
}
2017-08-18 10:03:01 +01:00
void CNN : : Read ( const string & location )
2017-08-09 11:00:38 -04:00
{
ifstream cnn_stream ( location , ios : : in | ios : : binary ) ;
if ( cnn_stream . is_open ( ) )
{
cnn_stream . seekg ( 0 , ios : : beg ) ;
// Reading in CNNs
int network_depth ;
cnn_stream . read ( ( char * ) & network_depth , 4 ) ;
cnn_layer_types . resize ( network_depth ) ;
for ( int layer = 0 ; layer < network_depth ; + + layer )
{
int layer_type ;
cnn_stream . read ( ( char * ) & layer_type , 4 ) ;
cnn_layer_types [ layer ] = layer_type ;
// convolutional
if ( layer_type = = 0 )
{
// Read the number of input maps
int num_in_maps ;
cnn_stream . read ( ( char * ) & num_in_maps , 4 ) ;
// Read the number of kernels for each input map
int num_kernels ;
cnn_stream . read ( ( char * ) & num_kernels , 4 ) ;
vector < vector < cv : : Mat_ < float > > > kernels ;
kernels . resize ( num_in_maps ) ;
vector < float > biases ;
for ( int k = 0 ; k < num_kernels ; + + k )
{
float bias ;
cnn_stream . read ( ( char * ) & bias , 4 ) ;
biases . push_back ( bias ) ;
}
cnn_convolutional_layers_bias . push_back ( biases ) ;
// For every input map
for ( int in = 0 ; in < num_in_maps ; + + in )
{
kernels [ in ] . resize ( num_kernels ) ;
// For every kernel on that input map
for ( int k = 0 ; k < num_kernels ; + + k )
{
ReadMatBin ( cnn_stream , kernels [ in ] [ k ] ) ;
}
}
2017-08-16 17:14:00 +01:00
// Rearrange the kernels for faster inference with FFT
vector < vector < cv : : Mat_ < float > > > kernels_rearr ;
kernels_rearr . resize ( num_kernels ) ;
// Fill up the rearranged layer
for ( int k = 0 ; k < num_kernels ; + + k )
{
for ( int in = 0 ; in < num_in_maps ; + + in )
{
kernels_rearr [ k ] . push_back ( kernels [ in ] [ k ] ) ;
}
}
2017-08-17 12:35:11 +01:00
cnn_convolutional_layers . push_back ( kernels_rearr ) ;
// Place-holders for DFT precomputation
vector < map < int , vector < cv : : Mat_ < double > > > > cnn_convolutional_layers_dft_curr_layer ;
cnn_convolutional_layers_dft_curr_layer . resize ( num_kernels ) ;
cnn_convolutional_layers_dft . push_back ( cnn_convolutional_layers_dft_curr_layer ) ;
2017-08-16 17:14:00 +01:00
2017-08-17 11:40:57 +01:00
// Rearrange the flattened kernels into weight matrices for direct convolution computation
cv : : Mat_ < float > weight_matrix ( num_in_maps * kernels_rearr [ 0 ] [ 0 ] . rows * kernels_rearr [ 0 ] [ 0 ] . cols , num_kernels ) ;
for ( size_t k = 0 ; k < num_kernels ; + + k )
{
for ( size_t i = 0 ; i < num_in_maps ; + + i )
{
// Flatten the kernel
cv : : Mat_ < float > k_flat = kernels_rearr [ k ] [ i ] . t ( ) ;
k_flat = k_flat . reshape ( 0 , 1 ) . t ( ) ;
k_flat . copyTo ( weight_matrix ( cv : : Rect ( k , i * kernels_rearr [ 0 ] [ 0 ] . rows * kernels_rearr [ 0 ] [ 0 ] . cols , 1 , kernels_rearr [ 0 ] [ 0 ] . rows * kernels_rearr [ 0 ] [ 0 ] . cols ) ) ) ;
}
}
2017-08-23 20:43:19 +01:00
2017-08-23 20:00:18 +01:00
// Transpose the weight matrix for more convenient computation
2017-08-23 20:43:19 +01:00
weight_matrix = weight_matrix . t ( ) ;
// Add a bias term to the weight matrix for efficiency
cv : : Mat_ < float > W ( weight_matrix . rows , weight_matrix . cols + 1 , 1.0 ) ;
for ( size_t k = 0 ; k < weight_matrix . rows ; + + k )
{
W . at < float > ( k , weight_matrix . cols ) = biases [ k ] ;
}
weight_matrix . copyTo ( W ( cv : : Rect ( 0 , 0 , weight_matrix . cols , weight_matrix . rows ) ) ) ;
cnn_convolutional_layers_weights . push_back ( W ) ;
2017-08-17 11:40:57 +01:00
2017-08-09 11:00:38 -04:00
}
else if ( layer_type = = 1 )
{
int kernel_x , kernel_y , stride_x , stride_y ;
cnn_stream . read ( ( char * ) & kernel_x , 4 ) ;
cnn_stream . read ( ( char * ) & kernel_y , 4 ) ;
cnn_stream . read ( ( char * ) & stride_x , 4 ) ;
cnn_stream . read ( ( char * ) & stride_y , 4 ) ;
cnn_max_pooling_layers . push_back ( std : : tuple < int , int , int , int > ( kernel_x , kernel_y , stride_x , stride_y ) ) ;
}
else if ( layer_type = = 2 )
{
cv : : Mat_ < float > biases ;
ReadMatBin ( cnn_stream , biases ) ;
cnn_fully_connected_layers_biases . push_back ( biases ) ;
// Fully connected layer
cv : : Mat_ < float > weights ;
ReadMatBin ( cnn_stream , weights ) ;
2017-08-24 17:00:32 +01:00
cnn_fully_connected_layers_weights . push_back ( weights . t ( ) ) ;
2017-08-09 11:00:38 -04:00
}
2017-08-09 12:01:53 -04:00
else if ( layer_type = = 3 )
2017-08-09 11:00:38 -04:00
{
cv : : Mat_ < float > weights ;
ReadMatBin ( cnn_stream , weights ) ;
cnn_prelu_layer_weights . push_back ( weights ) ;
}
}
}
else
{
cout < < " WARNING: Can't find the CNN location " < < endl ;
}
}
//===========================================================================
// Read in the MTCNN detector
2017-08-18 10:03:01 +01:00
void FaceDetectorMTCNN : : Read ( const string & location )
2017-08-09 11:00:38 -04:00
{
cout < < " Reading the MTCNN face detector from: " < < location < < endl ;
ifstream locations ( location . c_str ( ) , ios_base : : in ) ;
if ( ! locations . is_open ( ) )
{
cout < < " Couldn't open the model file, aborting " < < endl ;
return ;
}
string line ;
// The other module locations should be defined as relative paths from the main model
boost : : filesystem : : path root = boost : : filesystem : : path ( location ) . parent_path ( ) ;
// The main file contains the references to other files
while ( ! locations . eof ( ) )
{
getline ( locations , line ) ;
stringstream lineStream ( line ) ;
string module ;
string location ;
// figure out which module is to be read from which file
lineStream > > module ;
lineStream > > location ;
// remove carriage return at the end for compatibility with unix systems
if ( location . size ( ) > 0 & & location . at ( location . size ( ) - 1 ) = = ' \r ' )
{
location = location . substr ( 0 , location . size ( ) - 1 ) ;
}
// append to root
location = ( root / location ) . string ( ) ;
if ( module . compare ( " PNet " ) = = 0 )
{
cout < < " Reading the PNet module from: " < < location < < endl ;
PNet . Read ( location ) ;
}
else if ( module . compare ( " RNet " ) = = 0 )
{
cout < < " Reading the RNet module from: " < < location < < endl ;
RNet . Read ( location ) ;
}
else if ( module . compare ( " ONet " ) = = 0 )
{
cout < < " Reading the ONet module from: " < < location < < endl ;
ONet . Read ( location ) ;
}
}
}
2017-08-14 16:23:44 +01:00
// Perform non maximum supression on proposal bounding boxes prioritizing boxes with high score/confidence
2017-08-15 11:44:05 +01:00
std : : vector < int > non_maximum_supression ( const std : : vector < cv : : Rect_ < float > > & original_bb , const std : : vector < float > & scores , float thresh , bool minimum )
2017-08-14 16:23:44 +01:00
{
// Sort the input bounding boxes by the detection score, using the nice trick of multimap always being sorted internally
std : : multimap < float , size_t > idxs ;
for ( size_t i = 0 ; i < original_bb . size ( ) ; + + i )
{
idxs . insert ( std : : pair < float , size_t > ( scores [ i ] , i ) ) ;
}
std : : vector < int > output_ids ;
// keep looping while some indexes still remain in the indexes list
while ( idxs . size ( ) > 0 )
{
// grab the last rectangle
auto lastElem = - - std : : end ( idxs ) ;
size_t curr_id = lastElem - > second ;
const cv : : Rect & rect1 = original_bb [ curr_id ] ;
idxs . erase ( lastElem ) ;
// Iterate through remaining bounding boxes and choose which ones to remove
for ( auto pos = std : : begin ( idxs ) ; pos ! = std : : end ( idxs ) ; )
{
// grab the current rectangle
const cv : : Rect & rect2 = original_bb [ pos - > second ] ;
float intArea = ( rect1 & rect2 ) . area ( ) ;
2017-08-15 11:44:05 +01:00
float unionArea ;
if ( minimum )
{
unionArea = cv : : min ( rect1 . area ( ) , rect2 . area ( ) ) ;
}
else
{
unionArea = rect1 . area ( ) + rect2 . area ( ) - intArea ;
}
2017-08-14 16:23:44 +01:00
float overlap = intArea / unionArea ;
// Remove the bounding boxes with less confidence but with significant overlap with the current one
if ( overlap > thresh )
{
pos = idxs . erase ( pos ) ;
}
else
{
+ + pos ;
}
}
output_ids . push_back ( curr_id ) ;
}
return output_ids ;
}
// Helper function for selecting a subset of bounding boxes based on indices
void select_subset ( const vector < int > & to_keep , vector < cv : : Rect_ < float > > & bounding_boxes , vector < float > & scores , vector < cv : : Rect_ < float > > & corrections )
{
vector < cv : : Rect_ < float > > bounding_boxes_tmp ;
vector < float > scores_tmp ;
vector < cv : : Rect_ < float > > corrections_tmp ;
for ( size_t i = 0 ; i < to_keep . size ( ) ; + + i )
{
bounding_boxes_tmp . push_back ( bounding_boxes [ to_keep [ i ] ] ) ;
scores_tmp . push_back ( scores [ to_keep [ i ] ] ) ;
corrections_tmp . push_back ( corrections [ to_keep [ i ] ] ) ;
}
2017-08-15 21:53:25 +01:00
2017-08-14 16:23:44 +01:00
bounding_boxes = bounding_boxes_tmp ;
scores = scores_tmp ;
corrections = corrections_tmp ;
}
// Use the heatmap generated by PNet to generate bounding boxes in the original image space, also generate the correction values and scores of the bounding boxes as well
void generate_bounding_boxes ( vector < cv : : Rect_ < float > > & o_bounding_boxes , vector < float > & o_scores , vector < cv : : Rect_ < float > > & o_corrections , const cv : : Mat_ < float > & heatmap , const vector < cv : : Mat_ < float > > & corrections , double scale , double threshold , int face_support )
2017-08-11 14:56:45 -04:00
{
// Correction for the pooling
int stride = 2 ;
2017-08-14 16:23:44 +01:00
o_bounding_boxes . clear ( ) ;
o_scores . clear ( ) ;
o_corrections . clear ( ) ;
2017-08-11 14:56:45 -04:00
2017-08-14 16:23:44 +01:00
int counter = 0 ;
for ( int x = 0 ; x < heatmap . cols ; + + x )
{
for ( int y = 0 ; y < heatmap . rows ; + + y )
{
if ( heatmap . at < float > ( y , x ) > = threshold )
{
float min_x = int ( ( stride * x + 1 ) / scale ) ;
float max_x = int ( ( stride * x + face_support ) / scale ) ;
float min_y = int ( ( stride * y + 1 ) / scale ) ;
float max_y = int ( ( stride * y + face_support ) / scale ) ;
2017-08-11 14:56:45 -04:00
2017-08-14 16:23:44 +01:00
o_bounding_boxes . push_back ( cv : : Rect_ < float > ( min_x , min_y , max_x - min_x , max_y - min_y ) ) ;
o_scores . push_back ( heatmap . at < float > ( y , x ) ) ;
2017-08-11 14:56:45 -04:00
2017-08-14 16:23:44 +01:00
float corr_x = corrections [ 0 ] . at < float > ( y , x ) ;
float corr_y = corrections [ 1 ] . at < float > ( y , x ) ;
float corr_width = corrections [ 2 ] . at < float > ( y , x ) ;
float corr_height = corrections [ 3 ] . at < float > ( y , x ) ;
o_corrections . push_back ( cv : : Rect_ < float > ( corr_x , corr_y , corr_width , corr_height ) ) ;
2017-08-11 14:56:45 -04:00
2017-08-14 16:23:44 +01:00
counter + + ;
}
}
}
2017-08-11 14:56:45 -04:00
}
2017-08-14 22:07:21 +01:00
// Converting the bounding boxes to squares
void rectify ( vector < cv : : Rect_ < float > > & total_bboxes )
{
// Apply size and location offsets
for ( size_t i = 0 ; i < total_bboxes . size ( ) ; + + i )
{
float height = total_bboxes [ i ] . height ;
float width = total_bboxes [ i ] . width ;
float max_side = max ( width , height ) ;
// Correct the starts based on new size
float new_min_x = total_bboxes [ i ] . x + 0.5 * ( width - max_side ) ;
float new_min_y = total_bboxes [ i ] . y + 0.5 * ( height - max_side ) ;
total_bboxes [ i ] . x = ( int ) new_min_x ;
total_bboxes [ i ] . y = ( int ) new_min_y ;
total_bboxes [ i ] . width = ( int ) max_side ;
total_bboxes [ i ] . height = ( int ) max_side ;
}
}
void apply_correction ( vector < cv : : Rect_ < float > > & total_bboxes , const vector < cv : : Rect_ < float > > corrections , bool add1 )
{
// Apply size and location offsets
for ( size_t i = 0 ; i < total_bboxes . size ( ) ; + + i )
{
cv : : Rect curr_box = total_bboxes [ i ] ;
if ( add1 )
{
curr_box . width + + ;
curr_box . height + + ;
}
float new_min_x = curr_box . x + corrections [ i ] . x * curr_box . width ;
float new_min_y = curr_box . y + corrections [ i ] . y * curr_box . height ;
float new_max_x = curr_box . x + curr_box . width + curr_box . width * corrections [ i ] . width ;
float new_max_y = curr_box . y + curr_box . height + curr_box . height * corrections [ i ] . height ;
total_bboxes [ i ] = cv : : Rect_ < float > ( new_min_x , new_min_y , new_max_x - new_min_x , new_max_y - new_min_y ) ;
}
}
2017-08-11 14:56:45 -04:00
2017-08-09 16:16:31 -04:00
// The actual MTCNN face detection step
2017-08-18 10:03:01 +01:00
bool FaceDetectorMTCNN : : DetectFaces ( vector < cv : : Rect_ < double > > & o_regions , const cv : : Mat & img_in , std : : vector < double > & o_confidences , int min_face_size , double t1 , double t2 , double t3 )
2017-08-09 16:16:31 -04:00
{
2017-08-18 10:03:01 +01:00
int height_orig = img_in . size ( ) . height ;
int width_orig = img_in . size ( ) . width ;
2017-08-09 16:16:31 -04:00
// Size ratio of image pyramids
double pyramid_factor = 0.709 ;
// Face support region is 12x12 px, so from that can work out the largest
// scale(which is 12 / min), and work down from there to smallest scale(no smaller than 12x12px)
int min_dim = std : : min ( height_orig , width_orig ) ;
int face_support = 12 ;
2017-08-10 15:30:35 -04:00
int num_scales = floor ( log ( ( double ) min_face_size / ( double ) min_dim ) / log ( pyramid_factor ) ) + 1 ;
2017-08-18 10:03:01 +01:00
cv : : Mat input_img ;
if ( img_in . channels ( ) = = 1 )
{
cv : : cvtColor ( img_in , input_img , CV_GRAY2RGB ) ;
}
else
2017-08-10 15:30:35 -04:00
{
2017-08-18 10:03:01 +01:00
input_img = img_in ;
2017-08-10 15:30:35 -04:00
}
cv : : Mat img_float ;
input_img . convertTo ( img_float , CV_32FC3 ) ;
2017-08-09 16:16:31 -04:00
2017-08-14 16:23:44 +01:00
vector < cv : : Rect_ < float > > proposal_boxes_all ;
vector < float > scores_all ;
vector < cv : : Rect_ < float > > proposal_corrections_all ;
2017-08-17 14:17:20 +01:00
// As the scales will be done in parallel have some containers for them
vector < vector < cv : : Rect_ < float > > > proposal_boxes_cross_scale ( num_scales ) ;
vector < vector < float > > scores_cross_scale ( num_scales ) ;
vector < vector < cv : : Rect_ < float > > > proposal_corrections_cross_scale ( num_scales ) ;
2017-08-23 20:00:18 +01:00
//tbb::parallel_for(0, (int)num_scales, [&](int i) {
for ( int i = 0 ; i < num_scales ; + + i )
2017-08-09 16:16:31 -04:00
{
2017-08-10 15:30:35 -04:00
double scale = ( ( double ) face_support / ( double ) min_face_size ) * cv : : pow ( pyramid_factor , i ) ;
2017-08-09 16:16:31 -04:00
int h_pyr = ceil ( height_orig * scale ) ;
int w_pyr = ceil ( width_orig * scale ) ;
2017-08-10 15:30:35 -04:00
cv : : Mat normalised_img ;
cv : : resize ( img_float , normalised_img , cv : : Size ( w_pyr , h_pyr ) ) ;
2017-08-14 16:23:44 +01:00
// Normalize the image
2017-08-09 16:16:31 -04:00
normalised_img = ( normalised_img - 127.5 ) * 0.0078125 ;
2017-08-14 16:23:44 +01:00
// Actual PNet CNN step
2017-08-17 11:40:57 +01:00
std : : vector < cv : : Mat_ < float > > pnet_out = PNet . Inference ( normalised_img , true ) ;
2017-08-17 14:17:20 +01:00
// Clear the precomputations, as the image sizes will be different
2017-08-17 12:35:11 +01:00
PNet . ClearPrecomp ( ) ;
2017-08-16 17:14:00 +01:00
2017-08-14 16:23:44 +01:00
// Extract the probabilities from PNet response
cv : : Mat_ < float > prob_heatmap ;
cv : : exp ( pnet_out [ 0 ] - pnet_out [ 1 ] , prob_heatmap ) ;
prob_heatmap = 1.0 / ( 1.0 + prob_heatmap ) ;
// Extract the probabilities from PNet response
std : : vector < cv : : Mat_ < float > > corrections_heatmap ( pnet_out . begin ( ) + 2 , pnet_out . end ( ) ) ;
2017-08-10 15:30:35 -04:00
2017-08-11 14:56:45 -04:00
// Grab the detections
2017-08-14 16:23:44 +01:00
vector < cv : : Rect_ < float > > proposal_boxes ;
vector < float > scores ;
vector < cv : : Rect_ < float > > proposal_corrections ;
generate_bounding_boxes ( proposal_boxes , scores , proposal_corrections , prob_heatmap , corrections_heatmap , scale , t1 , face_support ) ;
2017-08-17 14:17:20 +01:00
proposal_boxes_cross_scale [ i ] = proposal_boxes ;
scores_cross_scale [ i ] = scores ;
proposal_corrections_cross_scale [ i ] = proposal_corrections ;
}
2017-08-23 20:00:18 +01:00
//});
2017-08-11 14:56:45 -04:00
2017-08-17 14:17:20 +01:00
// Perform non-maximum supression on proposals accross scales and combine them
for ( int i = 0 ; i < num_scales ; + + i )
{
vector < int > to_keep = non_maximum_supression ( proposal_boxes_cross_scale [ i ] , scores_cross_scale [ i ] , 0.5 , false ) ;
select_subset ( to_keep , proposal_boxes_cross_scale [ i ] , scores_cross_scale [ i ] , proposal_corrections_cross_scale [ i ] ) ;
proposal_boxes_all . insert ( proposal_boxes_all . end ( ) , proposal_boxes_cross_scale [ i ] . begin ( ) , proposal_boxes_cross_scale [ i ] . end ( ) ) ;
scores_all . insert ( scores_all . end ( ) , scores_cross_scale [ i ] . begin ( ) , scores_cross_scale [ i ] . end ( ) ) ;
proposal_corrections_all . insert ( proposal_corrections_all . end ( ) , proposal_corrections_cross_scale [ i ] . begin ( ) , proposal_corrections_cross_scale [ i ] . end ( ) ) ;
2017-08-09 16:16:31 -04:00
}
2017-08-14 16:23:44 +01:00
// Preparation for RNet step
// Non maximum supression accross bounding boxes, and their offset correction
2017-08-15 11:44:05 +01:00
vector < int > to_keep = non_maximum_supression ( proposal_boxes_all , scores_all , 0.7 , false ) ;
2017-08-14 16:23:44 +01:00
select_subset ( to_keep , proposal_boxes_all , scores_all , proposal_corrections_all ) ;
2017-08-14 22:07:21 +01:00
apply_correction ( proposal_boxes_all , proposal_corrections_all , false ) ;
2017-08-14 16:23:44 +01:00
2017-08-14 22:07:21 +01:00
// Convert to rectangles and round
rectify ( proposal_boxes_all ) ;
2017-08-14 16:23:44 +01:00
2017-08-14 22:07:21 +01:00
// Creating proposal images from previous step detections
2017-08-17 14:17:20 +01:00
vector < bool > above_thresh ( proposal_boxes_all . size ( ) ) ;
2017-08-23 20:00:18 +01:00
//tbb::parallel_for(0, (int)proposal_boxes_all.size(), [&](int k) {
for ( size_t k = 0 ; k < proposal_boxes_all . size ( ) ; + + k )
2017-08-14 22:07:21 +01:00
{
float width_target = proposal_boxes_all [ k ] . width + 1 ;
float height_target = proposal_boxes_all [ k ] . height + 1 ;
// Work out the start and end indices in the original image
int start_x_in = cv : : max ( ( int ) ( proposal_boxes_all [ k ] . x - 1 ) , 0 ) ;
int start_y_in = cv : : max ( ( int ) ( proposal_boxes_all [ k ] . y - 1 ) , 0 ) ;
int end_x_in = cv : : min ( ( int ) ( proposal_boxes_all [ k ] . x + width_target - 1 ) , width_orig ) ;
int end_y_in = cv : : min ( ( int ) ( proposal_boxes_all [ k ] . y + height_target - 1 ) , height_orig ) ;
// Work out the start and end indices in the target image
int start_x_out = cv : : max ( ( int ) ( - proposal_boxes_all [ k ] . x + 1 ) , 0 ) ;
int start_y_out = cv : : max ( ( int ) ( - proposal_boxes_all [ k ] . y + 1 ) , 0 ) ;
int end_x_out = cv : : min ( width_target - ( proposal_boxes_all [ k ] . x + proposal_boxes_all [ k ] . width - width_orig ) , width_target ) ;
int end_y_out = cv : : min ( height_target - ( proposal_boxes_all [ k ] . y + proposal_boxes_all [ k ] . height - height_orig ) , height_target ) ;
2017-08-15 21:53:25 +01:00
cv : : Mat tmp ( height_target , width_target , CV_32FC3 , cv : : Scalar ( 0.0f , 0.0f , 0.0f ) ) ;
2017-08-14 22:07:21 +01:00
img_float ( cv : : Rect ( start_x_in , start_y_in , end_x_in - start_x_in , end_y_in - start_y_in ) ) . copyTo (
tmp ( cv : : Rect ( start_x_out , start_y_out , end_x_out - start_x_out , end_y_out - start_y_out ) ) ) ;
cv : : Mat prop_img ;
cv : : resize ( tmp , prop_img , cv : : Size ( 24 , 24 ) ) ;
prop_img = ( prop_img - 127.5 ) * 0.0078125 ;
2017-08-15 08:55:37 +01:00
// Perform RNet on the proposal image
2017-08-17 11:40:57 +01:00
std : : vector < cv : : Mat_ < float > > rnet_out = RNet . Inference ( prop_img , true ) ;
2017-08-15 10:13:08 +01:00
float prob = 1.0 / ( 1.0 + cv : : exp ( rnet_out [ 0 ] . at < float > ( 0 ) - rnet_out [ 0 ] . at < float > ( 1 ) ) ) ;
scores_all [ k ] = prob ;
proposal_corrections_all [ k ] . x = rnet_out [ 0 ] . at < float > ( 2 ) ;
proposal_corrections_all [ k ] . y = rnet_out [ 0 ] . at < float > ( 3 ) ;
proposal_corrections_all [ k ] . width = rnet_out [ 0 ] . at < float > ( 4 ) ;
proposal_corrections_all [ k ] . height = rnet_out [ 0 ] . at < float > ( 5 ) ;
if ( prob > = t2 )
{
2017-08-17 14:17:20 +01:00
above_thresh [ k ] = true ;
}
else
{
above_thresh [ k ] = false ;
2017-08-15 10:13:08 +01:00
}
2017-08-15 21:53:25 +01:00
2017-08-15 10:13:08 +01:00
}
2017-08-23 20:00:18 +01:00
//});
2017-08-17 14:17:20 +01:00
to_keep . clear ( ) ;
for ( size_t i = 0 ; i < above_thresh . size ( ) ; + + i )
{
if ( above_thresh [ i ] )
{
to_keep . push_back ( i ) ;
}
}
2017-08-15 10:13:08 +01:00
// Pick only the bounding boxes above the threshold
select_subset ( to_keep , proposal_boxes_all , scores_all , proposal_corrections_all ) ;
// Non maximum supression accross bounding boxes, and their offset correction
2017-08-15 11:44:05 +01:00
to_keep = non_maximum_supression ( proposal_boxes_all , scores_all , 0.7 , false ) ;
2017-08-15 10:13:08 +01:00
select_subset ( to_keep , proposal_boxes_all , scores_all , proposal_corrections_all ) ;
apply_correction ( proposal_boxes_all , proposal_corrections_all , false ) ;
// Convert to rectangles and round
rectify ( proposal_boxes_all ) ;
// Preparing for the ONet stage
2017-08-17 14:17:20 +01:00
above_thresh . clear ( ) ;
above_thresh . resize ( proposal_boxes_all . size ( ) ) ;
2017-08-23 20:00:18 +01:00
//tbb::parallel_for(0, (int)proposal_boxes_all.size(), [&](int k) {
for ( size_t k = 0 ; k < proposal_boxes_all . size ( ) ; + + k )
2017-08-15 10:13:08 +01:00
{
float width_target = proposal_boxes_all [ k ] . width + 1 ;
float height_target = proposal_boxes_all [ k ] . height + 1 ;
// Work out the start and end indices in the original image
int start_x_in = cv : : max ( ( int ) ( proposal_boxes_all [ k ] . x - 1 ) , 0 ) ;
int start_y_in = cv : : max ( ( int ) ( proposal_boxes_all [ k ] . y - 1 ) , 0 ) ;
int end_x_in = cv : : min ( ( int ) ( proposal_boxes_all [ k ] . x + width_target - 1 ) , width_orig ) ;
int end_y_in = cv : : min ( ( int ) ( proposal_boxes_all [ k ] . y + height_target - 1 ) , height_orig ) ;
// Work out the start and end indices in the target image
int start_x_out = cv : : max ( ( int ) ( - proposal_boxes_all [ k ] . x + 1 ) , 0 ) ;
int start_y_out = cv : : max ( ( int ) ( - proposal_boxes_all [ k ] . y + 1 ) , 0 ) ;
int end_x_out = cv : : min ( width_target - ( proposal_boxes_all [ k ] . x + proposal_boxes_all [ k ] . width - width_orig ) , width_target ) ;
int end_y_out = cv : : min ( height_target - ( proposal_boxes_all [ k ] . y + proposal_boxes_all [ k ] . height - height_orig ) , height_target ) ;
2017-08-15 21:53:25 +01:00
cv : : Mat tmp ( height_target , width_target , CV_32FC3 , cv : : Scalar ( 0.0f , 0.0f , 0.0f ) ) ;
2017-08-15 10:13:08 +01:00
img_float ( cv : : Rect ( start_x_in , start_y_in , end_x_in - start_x_in , end_y_in - start_y_in ) ) . copyTo (
tmp ( cv : : Rect ( start_x_out , start_y_out , end_x_out - start_x_out , end_y_out - start_y_out ) ) ) ;
cv : : Mat prop_img ;
cv : : resize ( tmp , prop_img , cv : : Size ( 48 , 48 ) ) ;
prop_img = ( prop_img - 127.5 ) * 0.0078125 ;
// Perform RNet on the proposal image
2017-08-17 11:40:57 +01:00
std : : vector < cv : : Mat_ < float > > onet_out = ONet . Inference ( prop_img , true ) ;
2017-08-15 10:13:08 +01:00
float prob = 1.0 / ( 1.0 + cv : : exp ( onet_out [ 0 ] . at < float > ( 0 ) - onet_out [ 0 ] . at < float > ( 1 ) ) ) ;
scores_all [ k ] = prob ;
proposal_corrections_all [ k ] . x = onet_out [ 0 ] . at < float > ( 2 ) ;
proposal_corrections_all [ k ] . y = onet_out [ 0 ] . at < float > ( 3 ) ;
proposal_corrections_all [ k ] . width = onet_out [ 0 ] . at < float > ( 4 ) ;
proposal_corrections_all [ k ] . height = onet_out [ 0 ] . at < float > ( 5 ) ;
if ( prob > = t3 )
{
2017-08-17 14:17:20 +01:00
above_thresh [ k ] = true ;
}
else
{
above_thresh [ k ] = false ;
}
}
2017-08-23 20:00:18 +01:00
//});
2017-08-17 14:17:20 +01:00
to_keep . clear ( ) ;
for ( size_t i = 0 ; i < above_thresh . size ( ) ; + + i )
{
if ( above_thresh [ i ] )
{
to_keep . push_back ( i ) ;
2017-08-15 10:13:08 +01:00
}
2017-08-14 22:07:21 +01:00
}
2017-08-14 16:23:44 +01:00
2017-08-15 10:13:08 +01:00
// Pick only the bounding boxes above the threshold
select_subset ( to_keep , proposal_boxes_all , scores_all , proposal_corrections_all ) ;
apply_correction ( proposal_boxes_all , proposal_corrections_all , true ) ;
// Non maximum supression accross bounding boxes, and their offset correction
2017-08-15 11:44:05 +01:00
to_keep = non_maximum_supression ( proposal_boxes_all , scores_all , 0.7 , true ) ;
2017-08-15 10:13:08 +01:00
select_subset ( to_keep , proposal_boxes_all , scores_all , proposal_corrections_all ) ;
2017-08-15 11:44:05 +01:00
// Correct the box to expectation to be tight around facial landmarks
for ( size_t k = 0 ; k < proposal_boxes_all . size ( ) ; + + k )
{
proposal_boxes_all [ k ] . x = proposal_boxes_all [ k ] . width * - 0.0075 + proposal_boxes_all [ k ] . x ;
proposal_boxes_all [ k ] . y = proposal_boxes_all [ k ] . height * 0.2459 + proposal_boxes_all [ k ] . y ;
proposal_boxes_all [ k ] . width = 1.0323 * proposal_boxes_all [ k ] . width ;
proposal_boxes_all [ k ] . height = 0.7751 * proposal_boxes_all [ k ] . height ;
2017-08-15 10:13:08 +01:00
2017-08-15 11:44:05 +01:00
o_regions . push_back ( cv : : Rect_ < double > ( proposal_boxes_all [ k ] . x , proposal_boxes_all [ k ] . y , proposal_boxes_all [ k ] . width , proposal_boxes_all [ k ] . height ) ) ;
o_confidences . push_back ( scores_all [ k ] ) ;
2017-08-15 10:13:08 +01:00
2017-08-15 17:57:25 +01:00
}
2017-08-09 16:16:31 -04:00
2017-08-15 17:57:25 +01:00
if ( o_regions . size ( ) > 0 )
{
return true ;
}
else
{
return false ;
}
2017-08-09 16:16:31 -04:00
}
2017-08-09 11:00:38 -04:00