From 92dde3b80f077f106ff6a81d173056d6a48e7c22 Mon Sep 17 00:00:00 2001 From: Tadas Baltrusaitis Date: Thu, 24 Aug 2017 17:00:32 +0100 Subject: [PATCH] Speeding up the fully connected layer through better data layout --- .../src/FaceDetectorMTCNN.cpp | 38 +++++++++++-------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp b/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp index 516d83f8..9da5c85f 100644 --- a/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp +++ b/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp @@ -205,52 +205,58 @@ void fully_connected(std::vector >& outputs, const std::vector input_concat = input_maps[0].t(); - input_concat = input_concat.reshape(0, 1); + cv::Mat_ input_concat(input_maps.size(), input_maps[0].cols * input_maps[0].rows); - for (size_t in = 1; in < input_maps.size(); ++in) + for (size_t in = 0; in < input_maps.size(); ++in) { - cv::Mat_ add = input_maps[in].t(); + cv::Mat_ add = input_maps[in]; + + // Reshape if all of the data will be flattened + if (input_concat.rows != weights.cols) + { + add = add.t(); + } + add = add.reshape(0, 1); - cv::vconcat(input_concat, add, input_concat); + add.copyTo(input_concat.row(in)); } // Treat the input as separate feature maps - if (input_concat.rows == weights.rows) + if (input_concat.rows == weights.cols) { - input_concat = input_concat.t() * weights; + input_concat = weights * input_concat; // Add biases for (size_t k = 0; k < biases.rows; ++k) { - input_concat.col(k) = input_concat.col(k) + biases.at(k); + input_concat.row(k) = input_concat.row(k) + biases.at(k); } outputs.clear(); // Resize and add as output for (size_t k = 0; k < biases.rows; ++k) { - cv::Mat_ reshaped = input_concat.col(k).clone(); - reshaped = reshaped.reshape(1, orig_size.width).t(); + cv::Mat_ reshaped = input_concat.row(k).clone(); + reshaped = reshaped.reshape(1, orig_size.height); outputs.push_back(reshaped); } } else { // Flatten the input - input_concat = input_concat.reshape(0, 1); + input_concat = input_concat.reshape(0, input_concat.rows * input_concat.cols); - input_concat = input_concat * weights + biases.t(); + input_concat = weights * input_concat + biases; outputs.clear(); - outputs.push_back(input_concat.t()); + outputs.push_back(input_concat); } } else { - cv::Mat out = input_maps[0].t() * weights + biases.t(); + cv::Mat out = weights * input_maps[0] + biases; outputs.clear(); - outputs.push_back(out); + outputs.push_back(out.t()); } } @@ -892,7 +898,7 @@ void CNN::Read(const string& location) // Fully connected layer cv::Mat_ weights; ReadMatBin(cnn_stream, weights); - cnn_fully_connected_layers_weights.push_back(weights); + cnn_fully_connected_layers_weights.push_back(weights.t()); } else if (layer_type == 3)