From 4d4f0e47e158d9137f2a6966e31fb2ab1d2a50a9 Mon Sep 17 00:00:00 2001 From: Tadas Baltrusaitis Date: Tue, 29 Aug 2017 08:08:20 +0100 Subject: [PATCH] Another speedup. --- lib/local/LandmarkDetector/src/CEN_patch_expert.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp b/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp index 743436bc..498c4cbc 100644 --- a/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp +++ b/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp @@ -459,18 +459,20 @@ void CEN_patch_expert::ResponseSparse(const cv::Mat_ &area_of_interest, c // Mean and standard deviation normalization, TODO can combine with above contrastNorm(input_col, response); - cv::Mat_ response_blas = response.clone(); + //cv::Mat_ response_blas;// = response.clone(); for (size_t layer = 0; layer < activation_function.size(); ++layer) { // We are performing response = response * weights[layers], but in OpenBLAS as that is significantly quicker than OpenCV - response_blas = response.clone(); - float* m1 = (float*)response_blas.data; + // TODO is the cloning needed + //response_blas = response.clone(); + + float* m1 = (float*)response.data; float* m2 = (float*)weights[layer].data; - cv::Mat_ resp_blas(response_blas.rows, weights[layer].cols); + cv::Mat_ resp_blas(response.rows, weights[layer].cols); float* m3 = (float*)resp_blas.data; // Perform matrix multiplication