From f6c29288f0bf7294df5b4f13d2d473e63e1b69cc Mon Sep 17 00:00:00 2001 From: Tadas Baltrusaitis Date: Mon, 7 May 2018 09:11:58 +0100 Subject: [PATCH] Fix to the OpenBLAS calls for not visual studio compilers. --- .../dlib/include/dlib/matrix/matrix_trsm.h | 8 +------- .../LandmarkDetector/src/CCNF_patch_expert.cpp | 8 ++++---- .../LandmarkDetector/src/CEN_patch_expert.cpp | 16 ++++++++-------- lib/local/LandmarkDetector/src/CNN_utils.cpp | 4 ++-- .../src/LandmarkDetectorModel.cpp | 4 ++-- lib/local/LandmarkDetector/src/PDM.cpp | 8 ++++---- 6 files changed, 21 insertions(+), 27 deletions(-) diff --git a/lib/3rdParty/dlib/include/dlib/matrix/matrix_trsm.h b/lib/3rdParty/dlib/include/dlib/matrix/matrix_trsm.h index 25fe70a6..1576c0d9 100644 --- a/lib/3rdParty/dlib/include/dlib/matrix/matrix_trsm.h +++ b/lib/3rdParty/dlib/include/dlib/matrix/matrix_trsm.h @@ -9,10 +9,7 @@ namespace dlib { namespace blas_bindings { - #ifndef __VECLIB__ - #ifdef DLIB_USE_BLAS - #ifndef CBLAS_H - extern "C" + extern "C" { void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, @@ -28,9 +25,6 @@ namespace dlib } // ------------------------------------------------------------------------------------ - #endif // if not CBLAS_H - #endif // if DLIB_USE_BLAS - #endif // if __VECLIB__ /* Purpose */ /* ======= */ diff --git a/lib/local/LandmarkDetector/src/CCNF_patch_expert.cpp b/lib/local/LandmarkDetector/src/CCNF_patch_expert.cpp index b875a129..e0da3d44 100644 --- a/lib/local/LandmarkDetector/src/CCNF_patch_expert.cpp +++ b/lib/local/LandmarkDetector/src/CCNF_patch_expert.cpp @@ -455,8 +455,8 @@ void CCNF_patch_expert::ResponseOpenBlas(const cv::Mat_ &area_of_interest // Perform matrix multiplication in OpenBLAS (fortran call) float alpha1 = 1.0; float beta1 = 0.0; - char *not = "N"; - sgemm_(not, not, &normalized_input.cols, &weight_matrix.rows, &weight_matrix.cols, &alpha1, (float*)normalized_input.data, &normalized_input.cols, (float*)weight_matrix.data, &weight_matrix.cols, &beta1, (float*)neuron_resp_full.data, &normalized_input.cols); + char *nT = "N"; + sgemm_(nT, nT, &normalized_input.cols, &weight_matrix.rows, &weight_matrix.cols, &alpha1, (float*)normalized_input.data, &normalized_input.cols, (float*)weight_matrix.data, &weight_matrix.cols, &beta1, (float*)neuron_resp_full.data, &normalized_input.cols); // Above is a faster version of this //cv::Mat_ neuron_resp_full = this->weight_matrix * normalized_input; @@ -500,8 +500,8 @@ void CCNF_patch_expert::ResponseOpenBlas(const cv::Mat_ &area_of_interest // Perform matrix multiplication in OpenBLAS (fortran call) alpha1 = 1.0; beta1 = 0.0; - not = "N"; - sgemm_(not, not, &resp_vec_f.cols, &Sigmas[s_to_use].rows, &Sigmas[s_to_use].cols, &alpha1, (float*)resp_vec_f.data, &resp_vec_f.cols, (float*)Sigmas[s_to_use].data, &Sigmas[s_to_use].cols, &beta1, (float*)out.data, &resp_vec_f.cols); + nT = "N"; + sgemm_(nT, nT, &resp_vec_f.cols, &Sigmas[s_to_use].rows, &Sigmas[s_to_use].cols, &alpha1, (float*)resp_vec_f.data, &resp_vec_f.cols, (float*)Sigmas[s_to_use].data, &Sigmas[s_to_use].cols, &beta1, (float*)out.data, &resp_vec_f.cols); // Above is a faster version of this //cv::Mat out = Sigmas[s_to_use] * resp_vec_f; diff --git a/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp b/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp index e961dcb1..2d0db37f 100644 --- a/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp +++ b/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp @@ -246,8 +246,8 @@ void CEN_patch_expert::Response(const cv::Mat_ &area_of_interest, cv::Mat // Perform matrix multiplication in OpenBLAS (fortran call) float alpha1 = 1.0; float beta1 = 0.0; - char *not = "N"; - sgemm_(not, not, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols); + char *nT = "N"; + sgemm_(nT, nT, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols); // The above is a faster version of this, by calling the fortran version directly //cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, resp.cols, weight.rows, weight.cols, 1, m1, resp.cols, m2, weight.cols, 0.0, m3, resp.cols); @@ -529,8 +529,8 @@ void CEN_patch_expert::ResponseSparse(const cv::Mat_ &area_of_interest, c // Perform matrix multiplication in OpenBLAS (fortran call) float alpha1 = 1.0; float beta1 = 0.0; - char *not = "N"; - sgemm_(not, not, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols); + char *nT = "N"; + sgemm_(nT, nT, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols); // The above is a faster version of this, by calling the fortran version directly //cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, resp.cols, weight.rows, weight.cols, 1, m1, resp.cols, m2, weight.cols, 0.0, m3, resp.cols); @@ -610,8 +610,8 @@ void CEN_patch_expert::ResponseSparse_mirror(const cv::Mat_ &area_of_inte // Perform matrix multiplication in OpenBLAS (fortran call) float alpha1 = 1.0; float beta1 = 0.0; - char *not = "N"; - sgemm_(not, not, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols); + char *nT = "N"; + sgemm_(nT, nT, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols); // The above is a faster version of this, by calling the fortran version directly //cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, resp.cols, weight.rows, weight.cols, 1, m1, resp.cols, m2, weight.cols, 0.0, m3, resp.cols); @@ -695,8 +695,8 @@ void CEN_patch_expert::ResponseSparse_mirror_joint(const cv::Mat_ &area_o // Perform matrix multiplication in OpenBLAS (fortran call) float alpha1 = 1.0; float beta1 = 0.0; - char *not = "N"; - sgemm_(not, not, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols); + char *nT = "N"; + sgemm_(nT, nT, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols); // The above is a faster version of this, by calling the fortran version directly //cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, resp.cols, weight.rows, weight.cols, 1, m1, resp.cols, m2, weight.cols, 0.0, m3, resp.cols); diff --git a/lib/local/LandmarkDetector/src/CNN_utils.cpp b/lib/local/LandmarkDetector/src/CNN_utils.cpp index c81cdab2..0d841a28 100644 --- a/lib/local/LandmarkDetector/src/CNN_utils.cpp +++ b/lib/local/LandmarkDetector/src/CNN_utils.cpp @@ -563,8 +563,8 @@ namespace LandmarkDetector float alpha = 1.0f; float beta = 0.0f; // Call fortran directly (faster) - char *not = "N"; - sgemm_(not, not, &m2_cols, &num_rows, &pre_alloc_im2col.cols, &alpha, m2, &m2_cols, m1, &pre_alloc_im2col.cols, &beta, m3, &m2_cols); + char *nT = "N"; + sgemm_(nT, nT, &m2_cols, &num_rows, &pre_alloc_im2col.cols, &alpha, m2, &m2_cols, m1, &pre_alloc_im2col.cols, &beta, m3, &m2_cols); // Above is equivalent to out = pre_alloc_im2col * weight_matrix; diff --git a/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp b/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp index a3216bd6..148214b5 100644 --- a/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp +++ b/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp @@ -1115,8 +1115,8 @@ float CLNF::NU_RLMS(cv::Vec6f& final_global, cv::Mat_& final_local, const // Perform matrix multiplication in OpenBLAS (fortran call) float alpha1 = 1.0; float beta1 = 1.0; - char *not = "N"; - sgemm_(not, not, &J.cols, &J_w_t.rows, &J_w_t.cols, &alpha1, (float*)J.data, &J.cols, (float*)J_w_t.data, &J_w_t.cols, &beta1, (float*)Hessian.data, &J.cols); + char *nT = "N"; + sgemm_(nT, nT, &J.cols, &J_w_t.rows, &J_w_t.cols, &alpha1, (float*)J.data, &J.cols, (float*)J_w_t.data, &J_w_t.cols, &beta1, (float*)Hessian.data, &J.cols); // Above is a fast (but ugly) version of // cv::Mat_ Hessian = J_w_t * J + regTerm; diff --git a/lib/local/LandmarkDetector/src/PDM.cpp b/lib/local/LandmarkDetector/src/PDM.cpp index 148c755d..188f6598 100644 --- a/lib/local/LandmarkDetector/src/PDM.cpp +++ b/lib/local/LandmarkDetector/src/PDM.cpp @@ -150,8 +150,8 @@ void PDM::CalcShape3D(cv::Mat_& out_shape, const cv::Mat_& p_local int p_local_cols = p_local.cols; int princ_comp_rows = princ_comp.rows; int princ_comp_cols = princ_comp.cols; - char *not = "N"; - sgemm_(not, not, &p_local_cols, &princ_comp_rows, &princ_comp_cols, &alpha1, (float*)p_local.data, &p_local_cols, (float*)princ_comp.data, &princ_comp_cols, &beta1, (float*)out_shape.data, &p_local_cols); + char *nT = "N"; + sgemm_(nT, nT, &p_local_cols, &princ_comp_rows, &princ_comp_cols, &alpha1, (float*)p_local.data, &p_local_cols, (float*)princ_comp.data, &princ_comp_cols, &beta1, (float*)out_shape.data, &p_local_cols); // Above is a fast (but ugly) version of // out_shape = mean_shape + princ_comp * p_local; @@ -639,8 +639,8 @@ void PDM::CalcParams(cv::Vec6f& out_params_global, cv::Mat_& out_params_l // Perform matrix multiplication in OpenBLAS (fortran call) float alpha1 = 1.0; float beta1 = 1.0; - char *not = "N"; - sgemm_(not, not, &J.cols, &J_w_t.rows, &J_w_t.cols, &alpha1, (float*)J.data, &J.cols, (float*)J_w_t.data, &J_w_t.cols, &beta1, (float*)Hessian.data, &J.cols); + char *nT = "N"; + sgemm_(nT, nT, &J.cols, &J_w_t.rows, &J_w_t.cols, &alpha1, (float*)J.data, &J.cols, (float*)J_w_t.data, &J_w_t.cols, &beta1, (float*)Hessian.data, &J.cols); // Above is a fast (but ugly) version of // cv::Mat_ Hessian2 = J_w_t * J + regularisations;