From 53a1881ede7ff2c5f416370199779b4665f3d2f8 Mon Sep 17 00:00:00 2001 From: Tadas Baltrusaitis Date: Wed, 30 Aug 2017 15:35:03 +0100 Subject: [PATCH] Some cleanup. --- .../LandmarkDetector/src/CEN_patch_expert.cpp | 67 ++++++++++++------- .../LandmarkDetector/src/Patch_experts.cpp | 5 ++ 2 files changed, 47 insertions(+), 25 deletions(-) diff --git a/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp b/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp index 65ee0e08..7ddb6f1f 100644 --- a/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp +++ b/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp @@ -230,52 +230,69 @@ void CEN_patch_expert::Response(const cv::Mat_ &area_of_interest, cv::Mat // Mean and standard deviation normalization contrastNorm(input_col, response); - cv::Mat_ response_blas = response.clone(); + response = response.t(); for (size_t layer = 0; layer < activation_function.size(); ++layer) { - // We are performing response = response * weights[layers], but in OpenBLAS as that is significantly quicker than OpenCV - response_blas = response.clone(); + // We are performing response = weights[layers] * response(t), but in OpenBLAS as that is significantly quicker than OpenCV + cv::Mat_ resp = response; + float* m1 = (float*)resp.data; + cv::Mat_ weight = weights[layer]; + float* m2 = (float*)weight.data; - float* m1 = (float*)response_blas.data; - float* m2 = (float*)weights[layer].data; - - cv::Mat_ resp_blas(response_blas.rows, weights[layer].cols, 1.0); + cv::Mat_ resp_blas(weight.rows, resp.cols); float* m3 = (float*)resp_blas.data; - cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, weights[layer].cols, response.rows, response.cols, 1, m2, weights[layer].cols, m1, response.cols, 0.0, m3, weights[layer].cols); + // Perform matrix multiplication in OpenBLAS (fortran call) + float alpha1 = 1.0; + float beta1 = 0.0; + sgemm_("N", "N", &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols); - // TODO check speed - float alpha = 1.0; - float beta = 0.0; - sgemm_("N", "N", &weights[layer].cols, &response.rows, &response.cols, &alpha, m2, &weights[layer].cols, m1, &response.cols, &beta, m3, &weights[layer].cols); - - // TODO correct this with weight transpose + // The above is a faster version of this, by calling the fortran version directly + //cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, resp.cols, weight.rows, weight.cols, 1, m1, resp.cols, m2, weight.cols, 0.0, m3, resp.cols); + // Adding the bias (bit ugly, but the fastest way to do this) response = resp_blas; - // TODO bias could be pre-allocated to the window size so that addition could be quicker - for (size_t y = 0; y < response.rows; ++y) + float* data = (float*)response.data; + size_t height = response.rows; + size_t width = response.cols; + float* data_b = (float*)biases[layer].data; + for (size_t y = 0; y < height; ++y) { - response(cv::Rect(0, y, response.cols, 1)) = response(cv::Rect(0, y, response.cols, 1)) + biases[layer]; - } - - // Perform activation - if (activation_function[layer] == 0) // Sigmoid - { - for (cv::MatIterator_ p = response.begin(); p != response.end(); p++) + float bias = data_b[y]; + for (size_t x = 0; x < width; ++x) { - *p = 1.0 / (1.0 + exp(-(*p))); + float in = *data + bias; + *data++ = in; } } - else if(activation_function[layer] == 2)// ReLU + + // Perform activation and add bias at the same time + if (activation_function[layer] == 0) // Sigmoid + { + + size_t resp_size = response.rows * response.cols; + + // Iterate over the data directly + float* data = (float*)response.data; + + for (size_t counter = 0; counter < resp_size; ++counter) + { + float in = *data; + *data++ = 1.0 / (1.0 + exp(-(in))); + } + + } + else if (activation_function[layer] == 2)// ReLU { cv::threshold(response, response, 0, 0, cv::THRESH_TOZERO); } } + response = response.t(); response = response.reshape(1, response_height); response = response.t(); diff --git a/lib/local/LandmarkDetector/src/Patch_experts.cpp b/lib/local/LandmarkDetector/src/Patch_experts.cpp index e9bb2988..730c9f13 100644 --- a/lib/local/LandmarkDetector/src/Patch_experts.cpp +++ b/lib/local/LandmarkDetector/src/Patch_experts.cpp @@ -226,7 +226,12 @@ void Patch_experts::Response(vector >& patch_expert_responses, c // Get intensity response either from the SVR, CCNF, or CEN patch experts (prefer CEN as they are the most accurate so far) if (!cen_expert_intensity.empty()) { + cen_expert_intensity[scale][view_id][i].ResponseSparse(area_of_interest, patch_expert_responses[i], interp_mat); + + // A slower, but slightly more accurate version + //cen_expert_intensity[scale][view_id][i].Response(area_of_interest, patch_expert_responses[i]); + } else if (!ccnf_expert_intensity.empty()) {