mirror of
https://gitcode.com/gh_mirrors/ope/OpenFace.git
synced 2025-12-30 13:02:30 +00:00
Fix to the OpenBLAS calls for not visual studio compilers.
This commit is contained in:
@@ -9,10 +9,7 @@ namespace dlib
|
||||
{
|
||||
namespace blas_bindings
|
||||
{
|
||||
#ifndef __VECLIB__
|
||||
#ifdef DLIB_USE_BLAS
|
||||
#ifndef CBLAS_H
|
||||
extern "C"
|
||||
extern "C"
|
||||
{
|
||||
void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
|
||||
const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
|
||||
@@ -28,9 +25,6 @@ namespace dlib
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------
|
||||
#endif // if not CBLAS_H
|
||||
#endif // if DLIB_USE_BLAS
|
||||
#endif // if __VECLIB__
|
||||
/* Purpose */
|
||||
/* ======= */
|
||||
|
||||
|
||||
@@ -455,8 +455,8 @@ void CCNF_patch_expert::ResponseOpenBlas(const cv::Mat_<float> &area_of_interest
|
||||
// Perform matrix multiplication in OpenBLAS (fortran call)
|
||||
float alpha1 = 1.0;
|
||||
float beta1 = 0.0;
|
||||
char *not = "N";
|
||||
sgemm_(not, not, &normalized_input.cols, &weight_matrix.rows, &weight_matrix.cols, &alpha1, (float*)normalized_input.data, &normalized_input.cols, (float*)weight_matrix.data, &weight_matrix.cols, &beta1, (float*)neuron_resp_full.data, &normalized_input.cols);
|
||||
char *nT = "N";
|
||||
sgemm_(nT, nT, &normalized_input.cols, &weight_matrix.rows, &weight_matrix.cols, &alpha1, (float*)normalized_input.data, &normalized_input.cols, (float*)weight_matrix.data, &weight_matrix.cols, &beta1, (float*)neuron_resp_full.data, &normalized_input.cols);
|
||||
|
||||
// Above is a faster version of this
|
||||
//cv::Mat_<float> neuron_resp_full = this->weight_matrix * normalized_input;
|
||||
@@ -500,8 +500,8 @@ void CCNF_patch_expert::ResponseOpenBlas(const cv::Mat_<float> &area_of_interest
|
||||
// Perform matrix multiplication in OpenBLAS (fortran call)
|
||||
alpha1 = 1.0;
|
||||
beta1 = 0.0;
|
||||
not = "N";
|
||||
sgemm_(not, not, &resp_vec_f.cols, &Sigmas[s_to_use].rows, &Sigmas[s_to_use].cols, &alpha1, (float*)resp_vec_f.data, &resp_vec_f.cols, (float*)Sigmas[s_to_use].data, &Sigmas[s_to_use].cols, &beta1, (float*)out.data, &resp_vec_f.cols);
|
||||
nT = "N";
|
||||
sgemm_(nT, nT, &resp_vec_f.cols, &Sigmas[s_to_use].rows, &Sigmas[s_to_use].cols, &alpha1, (float*)resp_vec_f.data, &resp_vec_f.cols, (float*)Sigmas[s_to_use].data, &Sigmas[s_to_use].cols, &beta1, (float*)out.data, &resp_vec_f.cols);
|
||||
|
||||
// Above is a faster version of this
|
||||
//cv::Mat out = Sigmas[s_to_use] * resp_vec_f;
|
||||
|
||||
@@ -246,8 +246,8 @@ void CEN_patch_expert::Response(const cv::Mat_<float> &area_of_interest, cv::Mat
|
||||
// Perform matrix multiplication in OpenBLAS (fortran call)
|
||||
float alpha1 = 1.0;
|
||||
float beta1 = 0.0;
|
||||
char *not = "N";
|
||||
sgemm_(not, not, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols);
|
||||
char *nT = "N";
|
||||
sgemm_(nT, nT, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols);
|
||||
|
||||
// The above is a faster version of this, by calling the fortran version directly
|
||||
//cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, resp.cols, weight.rows, weight.cols, 1, m1, resp.cols, m2, weight.cols, 0.0, m3, resp.cols);
|
||||
@@ -529,8 +529,8 @@ void CEN_patch_expert::ResponseSparse(const cv::Mat_<float> &area_of_interest, c
|
||||
// Perform matrix multiplication in OpenBLAS (fortran call)
|
||||
float alpha1 = 1.0;
|
||||
float beta1 = 0.0;
|
||||
char *not = "N";
|
||||
sgemm_(not, not, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols);
|
||||
char *nT = "N";
|
||||
sgemm_(nT, nT, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols);
|
||||
|
||||
// The above is a faster version of this, by calling the fortran version directly
|
||||
//cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, resp.cols, weight.rows, weight.cols, 1, m1, resp.cols, m2, weight.cols, 0.0, m3, resp.cols);
|
||||
@@ -610,8 +610,8 @@ void CEN_patch_expert::ResponseSparse_mirror(const cv::Mat_<float> &area_of_inte
|
||||
// Perform matrix multiplication in OpenBLAS (fortran call)
|
||||
float alpha1 = 1.0;
|
||||
float beta1 = 0.0;
|
||||
char *not = "N";
|
||||
sgemm_(not, not, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols);
|
||||
char *nT = "N";
|
||||
sgemm_(nT, nT, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols);
|
||||
|
||||
// The above is a faster version of this, by calling the fortran version directly
|
||||
//cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, resp.cols, weight.rows, weight.cols, 1, m1, resp.cols, m2, weight.cols, 0.0, m3, resp.cols);
|
||||
@@ -695,8 +695,8 @@ void CEN_patch_expert::ResponseSparse_mirror_joint(const cv::Mat_<float> &area_o
|
||||
// Perform matrix multiplication in OpenBLAS (fortran call)
|
||||
float alpha1 = 1.0;
|
||||
float beta1 = 0.0;
|
||||
char *not = "N";
|
||||
sgemm_(not, not, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols);
|
||||
char *nT = "N";
|
||||
sgemm_(nT, nT, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols);
|
||||
|
||||
// The above is a faster version of this, by calling the fortran version directly
|
||||
//cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, resp.cols, weight.rows, weight.cols, 1, m1, resp.cols, m2, weight.cols, 0.0, m3, resp.cols);
|
||||
|
||||
@@ -563,8 +563,8 @@ namespace LandmarkDetector
|
||||
float alpha = 1.0f;
|
||||
float beta = 0.0f;
|
||||
// Call fortran directly (faster)
|
||||
char *not = "N";
|
||||
sgemm_(not, not, &m2_cols, &num_rows, &pre_alloc_im2col.cols, &alpha, m2, &m2_cols, m1, &pre_alloc_im2col.cols, &beta, m3, &m2_cols);
|
||||
char *nT = "N";
|
||||
sgemm_(nT, nT, &m2_cols, &num_rows, &pre_alloc_im2col.cols, &alpha, m2, &m2_cols, m1, &pre_alloc_im2col.cols, &beta, m3, &m2_cols);
|
||||
|
||||
// Above is equivalent to out = pre_alloc_im2col * weight_matrix;
|
||||
|
||||
|
||||
@@ -1115,8 +1115,8 @@ float CLNF::NU_RLMS(cv::Vec6f& final_global, cv::Mat_<float>& final_local, const
|
||||
// Perform matrix multiplication in OpenBLAS (fortran call)
|
||||
float alpha1 = 1.0;
|
||||
float beta1 = 1.0;
|
||||
char *not = "N";
|
||||
sgemm_(not, not, &J.cols, &J_w_t.rows, &J_w_t.cols, &alpha1, (float*)J.data, &J.cols, (float*)J_w_t.data, &J_w_t.cols, &beta1, (float*)Hessian.data, &J.cols);
|
||||
char *nT = "N";
|
||||
sgemm_(nT, nT, &J.cols, &J_w_t.rows, &J_w_t.cols, &alpha1, (float*)J.data, &J.cols, (float*)J_w_t.data, &J_w_t.cols, &beta1, (float*)Hessian.data, &J.cols);
|
||||
|
||||
// Above is a fast (but ugly) version of
|
||||
// cv::Mat_<float> Hessian = J_w_t * J + regTerm;
|
||||
|
||||
@@ -150,8 +150,8 @@ void PDM::CalcShape3D(cv::Mat_<float>& out_shape, const cv::Mat_<float>& p_local
|
||||
int p_local_cols = p_local.cols;
|
||||
int princ_comp_rows = princ_comp.rows;
|
||||
int princ_comp_cols = princ_comp.cols;
|
||||
char *not = "N";
|
||||
sgemm_(not, not, &p_local_cols, &princ_comp_rows, &princ_comp_cols, &alpha1, (float*)p_local.data, &p_local_cols, (float*)princ_comp.data, &princ_comp_cols, &beta1, (float*)out_shape.data, &p_local_cols);
|
||||
char *nT = "N";
|
||||
sgemm_(nT, nT, &p_local_cols, &princ_comp_rows, &princ_comp_cols, &alpha1, (float*)p_local.data, &p_local_cols, (float*)princ_comp.data, &princ_comp_cols, &beta1, (float*)out_shape.data, &p_local_cols);
|
||||
|
||||
// Above is a fast (but ugly) version of
|
||||
// out_shape = mean_shape + princ_comp * p_local;
|
||||
@@ -639,8 +639,8 @@ void PDM::CalcParams(cv::Vec6f& out_params_global, cv::Mat_<float>& out_params_l
|
||||
// Perform matrix multiplication in OpenBLAS (fortran call)
|
||||
float alpha1 = 1.0;
|
||||
float beta1 = 1.0;
|
||||
char *not = "N";
|
||||
sgemm_(not, not, &J.cols, &J_w_t.rows, &J_w_t.cols, &alpha1, (float*)J.data, &J.cols, (float*)J_w_t.data, &J_w_t.cols, &beta1, (float*)Hessian.data, &J.cols);
|
||||
char *nT = "N";
|
||||
sgemm_(nT, nT, &J.cols, &J_w_t.rows, &J_w_t.cols, &alpha1, (float*)J.data, &J.cols, (float*)J_w_t.data, &J_w_t.cols, &beta1, (float*)Hessian.data, &J.cols);
|
||||
|
||||
// Above is a fast (but ugly) version of
|
||||
// cv::Mat_<float> Hessian2 = J_w_t * J + regularisations;
|
||||
|
||||
Reference in New Issue
Block a user