From 441777c9edff853a5c3da8db77527713d52f6289 Mon Sep 17 00:00:00 2001 From: Tadas Baltrusaitis Date: Thu, 17 Aug 2017 12:35:11 +0100 Subject: [PATCH] Code cleanup and rearrangement. --- .../include/FaceDetectorMTCNN.h | 16 ++-- .../src/FaceDetectorMTCNN.cpp | 85 ++++++------------- 2 files changed, 37 insertions(+), 64 deletions(-) diff --git a/lib/local/LandmarkDetector/include/FaceDetectorMTCNN.h b/lib/local/LandmarkDetector/include/FaceDetectorMTCNN.h index 8ec1c20b..441e0cf9 100644 --- a/lib/local/LandmarkDetector/include/FaceDetectorMTCNN.h +++ b/lib/local/LandmarkDetector/include/FaceDetectorMTCNN.h @@ -87,26 +87,28 @@ namespace LandmarkDetector // Reading in the model void Read(string location); - // Precomputations for faster convolution - vector > > > > cnn_convolutional_layers_dft; - vector > > > > cnn_convolutional_layers_dft2; + // Clearing precomputed DFTs + void ClearPrecomp(); private: //========================================== // Convolutional Neural Network // CNN layers - // layer -> input maps -> kernels - // Bit ugly with so much nesting, but oh well - vector > > > cnn_convolutional_layers; + // Layer -> Weight matrix vector > cnn_convolutional_layers_weights; - vector > > > cnn_convolutional_layers_rearr; + // Layer -> kernel -> input maps + vector > > > cnn_convolutional_layers; vector > cnn_convolutional_layers_bias; + // Layer matrix + bas vector > cnn_fully_connected_layers_weights; vector > cnn_fully_connected_layers_biases; vector > cnn_prelu_layer_weights; vector > cnn_max_pooling_layers; + // Precomputations for faster convolution + vector > > > > cnn_convolutional_layers_dft; + // CNN: 0 - convolutional, 1 - max pooling, 2 - fully connected, 3 - prelu, 4 - sigmoid vector cnn_layer_types; }; diff --git a/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp b/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp index ded812bc..91b6b56a 100644 --- a/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp +++ b/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp @@ -94,6 +94,14 @@ FaceDetectorMTCNN::FaceDetectorMTCNN(const FaceDetectorMTCNN& other) : PNet(othe CNN::CNN(const CNN& other) : cnn_layer_types(other.cnn_layer_types), cnn_max_pooling_layers(other.cnn_max_pooling_layers), cnn_convolutional_layers_bias(other.cnn_convolutional_layers_bias) { + + this->cnn_convolutional_layers_weights.resize(other.cnn_convolutional_layers_weights.size()); + for (size_t l = 0; l < other.cnn_convolutional_layers_weights.size(); ++l) + { + // Make sure the matrix is copied. + this->cnn_convolutional_layers_weights[l] = other.cnn_convolutional_layers_weights[l].clone(); + } + this->cnn_convolutional_layers.resize(other.cnn_convolutional_layers.size()); for (size_t l = 0; l < other.cnn_convolutional_layers.size(); ++l) { @@ -111,30 +119,6 @@ CNN::CNN(const CNN& other) : cnn_layer_types(other.cnn_layer_types), cnn_max_poo } } - this->cnn_convolutional_layers_weights.resize(other.cnn_convolutional_layers_weights.size()); - for (size_t l = 0; l < other.cnn_convolutional_layers_weights.size(); ++l) - { - // Make sure the matrix is copied. - this->cnn_convolutional_layers_weights[l] = other.cnn_convolutional_layers_weights[l].clone(); - } - - this->cnn_convolutional_layers_rearr.resize(other.cnn_convolutional_layers_rearr.size()); - for (size_t l = 0; l < other.cnn_convolutional_layers_rearr.size(); ++l) - { - this->cnn_convolutional_layers_rearr[l].resize(other.cnn_convolutional_layers_rearr[l].size()); - - for (size_t i = 0; i < other.cnn_convolutional_layers_rearr[l].size(); ++i) - { - this->cnn_convolutional_layers_rearr[l][i].resize(other.cnn_convolutional_layers_rearr[l][i].size()); - - for (size_t k = 0; k < other.cnn_convolutional_layers_rearr[l][i].size(); ++k) - { - // Make sure the matrix is copied. - this->cnn_convolutional_layers_rearr[l][i][k] = other.cnn_convolutional_layers_rearr[l][i][k].clone(); - } - } - } - this->cnn_fully_connected_layers_weights.resize(other.cnn_fully_connected_layers_weights.size()); for (size_t l = 0; l < other.cnn_fully_connected_layers_weights.size(); ++l) @@ -642,11 +626,11 @@ std::vector> CNN::Inference(const cv::Mat& input_img, bool direc // Either perform direct convolution through matrix multiplication or use an FFT optimized version, which one is optimal depends on the kernel and input sizes if (direct) { - convolution_direct(outputs, input_maps, cnn_convolutional_layers_weights[cnn_layer], cnn_convolutional_layers_bias[cnn_layer], cnn_convolutional_layers_rearr[cnn_layer][0][0].rows, cnn_convolutional_layers_rearr[cnn_layer][0][0].cols); + convolution_direct(outputs, input_maps, cnn_convolutional_layers_weights[cnn_layer], cnn_convolutional_layers_bias[cnn_layer], cnn_convolutional_layers[cnn_layer][0][0].rows, cnn_convolutional_layers[cnn_layer][0][0].cols); } else { - convolution_fft2(outputs, input_maps, cnn_convolutional_layers_rearr[cnn_layer], cnn_convolutional_layers_bias[cnn_layer], cnn_convolutional_layers_dft2[cnn_layer]); + convolution_fft2(outputs, input_maps, cnn_convolutional_layers[cnn_layer], cnn_convolutional_layers_bias[cnn_layer], cnn_convolutional_layers_dft[cnn_layer]); } //vector > outs; //convolution_fft(outs, input_maps, cnn_convolutional_layers[cnn_layer], cnn_convolutional_layers_bias[cnn_layer], cnn_convolutional_layers_dft[cnn_layer]); @@ -719,6 +703,17 @@ void ReadMatBin(std::ifstream& stream, cv::Mat &output_mat) } +void CNN::ClearPrecomp() +{ + for (size_t k1 = 0; k1 < cnn_convolutional_layers_dft.size(); ++k1) + { + for (size_t k2 = 0; k2 < cnn_convolutional_layers_dft[k1].size(); ++k2) + { + cnn_convolutional_layers_dft[k1][k2].clear(); + } + } +} + void CNN::Read(string location) { ifstream cnn_stream(location, ios::in | ios::binary); @@ -753,10 +748,8 @@ void CNN::Read(string location) cnn_stream.read((char*)&num_kernels, 4); vector > > kernels; - vector > > > kernel_dfts; kernels.resize(num_in_maps); - kernel_dfts.resize(num_in_maps); vector biases; for (int k = 0; k < num_kernels; ++k) @@ -772,7 +765,6 @@ void CNN::Read(string location) for (int in = 0; in < num_in_maps; ++in) { kernels[in].resize(num_kernels); - kernel_dfts[in].resize(num_kernels); // For every kernel on that input map for (int k = 0; k < num_kernels; ++k) @@ -782,14 +774,6 @@ void CNN::Read(string location) } } - cnn_convolutional_layers.push_back(kernels); - cnn_convolutional_layers_dft.push_back(kernel_dfts); - - - vector > > > cnn_convolutional_layers_dft2_curr_layer; - cnn_convolutional_layers_dft2_curr_layer.resize(num_kernels); - cnn_convolutional_layers_dft2.push_back(cnn_convolutional_layers_dft2_curr_layer); - // Rearrange the kernels for faster inference with FFT vector > > kernels_rearr; kernels_rearr.resize(num_kernels); @@ -803,7 +787,12 @@ void CNN::Read(string location) } } - cnn_convolutional_layers_rearr.push_back(kernels_rearr); + cnn_convolutional_layers.push_back(kernels_rearr); + + // Place-holders for DFT precomputation + vector > > > cnn_convolutional_layers_dft_curr_layer; + cnn_convolutional_layers_dft_curr_layer.resize(num_kernels); + cnn_convolutional_layers_dft.push_back(cnn_convolutional_layers_dft_curr_layer); // Rearrange the flattened kernels into weight matrices for direct convolution computation cv::Mat_ weight_matrix(num_in_maps * kernels_rearr[0][0].rows * kernels_rearr[0][0].cols, num_kernels); @@ -1126,25 +1115,7 @@ bool FaceDetectorMTCNN::DetectFaces(vector >& o_regions, const std::vector > pnet_out = PNet.Inference(normalised_img, true); // Clear the precomputations, as the image sizes will be different (TODO could be useful for videos) - for (size_t k1 = 0; k1 < PNet.cnn_convolutional_layers_dft.size(); ++k1) - { - for (size_t k2 = 0; k2 < PNet.cnn_convolutional_layers_dft[k1].size(); ++k2) - { - for (size_t k3 = 0; k3 < PNet.cnn_convolutional_layers_dft[k1][k2].size(); ++k3) - { - PNet.cnn_convolutional_layers_dft[k1][k2][k3].second = cv::Mat_(0, 0, 0.0); - } - } - } - - for (size_t k1 = 0; k1 < PNet.cnn_convolutional_layers_dft2.size(); ++k1) - { - for (size_t k2 = 0; k2 < PNet.cnn_convolutional_layers_dft2[k1].size(); ++k2) - { - PNet.cnn_convolutional_layers_dft2[k1][k2].clear(); - } - } - + PNet.ClearPrecomp(); // Extract the probabilities from PNet response cv::Mat_ prob_heatmap;