From 283bf810db36438e6e3d4151e166709186930974 Mon Sep 17 00:00:00 2001 From: Tadas Baltrusaitis Date: Thu, 20 Jul 2017 12:12:13 -0400 Subject: [PATCH] Sparse models in C++, gain in speed --- .gitignore | 4 + .../include/CEN_patch_expert.h | 3 + .../LandmarkDetector/src/CEN_patch_expert.cpp | 173 +++++++++++++++++- .../src/LandmarkDetectorFunc.cpp | 14 +- .../LandmarkDetector/src/Patch_experts.cpp | 11 +- .../results/landmark_detections.txt | 6 +- .../Generate_ceclm_results.m | 19 +- 7 files changed, 212 insertions(+), 18 deletions(-) diff --git a/.gitignore b/.gitignore index 61870668..16bc0392 100644 --- a/.gitignore +++ b/.gitignore @@ -103,3 +103,7 @@ matlab_runners/Feature Point Experiments/out_clnf/ matlab_runners/Feature Point Experiments/out_svr/ matlab_runners/Feature Point Experiments/yt_features_ceclm/ matlab_runners/Feature Point Experiments/yt_features_clnf/ +matlab_version/face_validation/CNN_new/data/ +matlab_version/face_validation/CNN_new/vlfeat-0.9.20/ +matlab_version/experiments_300VW/CECLM_res_validation/ +matlab_version/experiments_300VW/CECLM_res_general_v2/ diff --git a/lib/local/LandmarkDetector/include/CEN_patch_expert.h b/lib/local/LandmarkDetector/include/CEN_patch_expert.h index 91af8839..f6074ddb 100644 --- a/lib/local/LandmarkDetector/include/CEN_patch_expert.h +++ b/lib/local/LandmarkDetector/include/CEN_patch_expert.h @@ -102,6 +102,9 @@ namespace LandmarkDetector // The actual response computation from intensity image void Response(const cv::Mat_ &area_of_interest, cv::Mat_ &response); + // Faster version of the response that only considers a subset of the area_of_interest + void ResponseSparse(const cv::Mat_ &area_of_interest, cv::Mat_ &response); + }; } #endif diff --git a/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp b/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp index cf76f2e2..99c44a72 100644 --- a/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp +++ b/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp @@ -201,12 +201,10 @@ void im2colBias(const cv::Mat_& input, int width, int height, cv::Mat_ &area_of_interest, cv::Mat response = response.t(); } + +void im2colBiasSparse(const cv::Mat_& input, int width, int height, cv::Mat_& output) +{ + + int m = input.rows; + int n = input.cols; + + // determine how many blocks there will be with a sliding window of width x height in the input + int yB = m - height + 1; + int xB = n - width + 1; + + // As we will be skipping half of the outputs + int out_size = (yB*xB - 1) / 2; + + // Allocate the output size + if (output.rows != out_size && output.cols != width * height + 1) + { + output = cv::Mat::ones(out_size, width * height + 1, CV_32F); + } + + // Iterate over the blocks, skipping every second block + int rowIdx = 0; + int skipCounter = 0; + for (int j = 0; j< xB; j++) + { + for (int i = 0; i< yB; i++) + { + // Skip every second row + skipCounter++; + if ((skipCounter + 1) % 2 == 0) + { + continue; + } + + for (unsigned int yy = 0; yy < height; ++yy) + { + for (unsigned int xx = 0; xx < width; ++xx) + { + int colIdx = xx*height + yy; + output.at(rowIdx, colIdx + 1) = input.at(i + yy, j + xx); + } + } + rowIdx++; + } + } +} + + +//=========================================================================== +void CEN_patch_expert::ResponseSparse(const cv::Mat_ &area_of_interest, cv::Mat_ &response) +{ + + int response_height = area_of_interest.rows - height + 1; + int response_width = area_of_interest.cols - width + 1; + + cv::Mat_ input_col; + // Extract im2col but in a sparse way + im2colBiasSparse(area_of_interest, width, height, input_col); + + // Mean and standard deviation normalization + contrastNorm(input_col, response); + + cv::Mat_ response_blas = response.clone(); + + for (size_t layer = 0; layer < activation_function.size(); ++layer) + { + + // We are performing response = response * weights[layers], but in OpenBLAS as that is significantly quicker than OpenCV + response_blas = response.clone(); + + float* m1 = (float*)response_blas.data; + float* m2 = (float*)weights[layer].data; + + cv::Mat_ resp_blas(response_blas.rows, weights[layer].cols, 1.0); + float* m3 = (float*)resp_blas.data; + + cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, weights[layer].cols, response.rows, response.cols, 1, m2, weights[layer].cols, m1, response.cols, 0.0, m3, weights[layer].cols); + + response = resp_blas; + + // TODO bias could be pre-allocated to the window size so that addition could be quicker + for (size_t y = 0; y < response.rows; ++y) + { + response(cv::Rect(0, y, response.cols, 1)) = response(cv::Rect(0, y, response.cols, 1)) + biases[layer]; + } + + // Perform activation + if (activation_function[layer] == 0) // Sigmoid + { + for (cv::MatIterator_ p = response.begin(); p != response.end(); p++) + { + *p = 1.0 / (1.0 + exp(-(*p))); + } + } + else if (activation_function[layer] == 2)// ReLU + { + cv::threshold(response, response, 0, 0, cv::THRESH_TOZERO); + } + + } + + // Restructure the output with interpolation + cv::Mat_ mapMatrix(response.rows, response_height * response_width, 0.0f); + + // Find a mapping from indices in the computed sparse response and the original full response + cv::Mat_ value_id_matrix(response_width, response_height, 0); + + int ind = 0; + for (int k = 0; k < value_id_matrix.rows * value_id_matrix.cols; ++k) + { + if(k % 2 != 0) + { + value_id_matrix.at(k) = ind; + ind++; + } + } + value_id_matrix = value_id_matrix.t(); + + int skip_counter = 0; + for (int x = 0; x < response_width; ++x) + { + for (int y = 0; y < response_height; ++y) + { + int mapping_col = x * response_height + y; + skip_counter++; + if (skip_counter % 2 == 0) + { + int val_id = value_id_matrix.at(y, x); + mapMatrix.at(val_id, mapping_col) = 1; + continue; + } + + double num_neigh = 0.0; + vector val_ids; + if (x - 1 >= 0) + { + num_neigh++; + val_ids.push_back(value_id_matrix.at(y, x - 1)); + } + if (y - 1 >= 0) + { + num_neigh++; + val_ids.push_back(value_id_matrix.at(y - 1, x)); + } + if (x + 1 < response_width) + { + num_neigh++; + val_ids.push_back(value_id_matrix.at(y, x + 1)); + } + if (y + 1 < response_height) + { + num_neigh++; + val_ids.push_back(value_id_matrix.at(y+1, x)); + } + + for (size_t k = 0; k < val_ids.size(); ++k) + { + mapMatrix.at(val_ids[k], mapping_col) = 1.0 / num_neigh; + } + } + } + + response = response.t() * mapMatrix; + response = response.t(); + response = response.reshape(1, response_height); + response = response.t(); +} diff --git a/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp b/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp index a89562ac..dbbe532a 100644 --- a/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp +++ b/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp @@ -665,10 +665,16 @@ bool LandmarkDetector::DetectLandmarksInImage(const cv::Mat_ &grayscale_i // Try out different orientation initialisations // It is possible to add other orientation hypotheses easilly by just pushing to this vector rotation_hypotheses.push_back(cv::Vec3d(0,0,0)); - rotation_hypotheses.push_back(cv::Vec3d(0,0.5236,0)); - rotation_hypotheses.push_back(cv::Vec3d(0,-0.5236,0)); - rotation_hypotheses.push_back(cv::Vec3d(0,0, 0.5236)); - rotation_hypotheses.push_back(cv::Vec3d(0,0, -0.5236)); + rotation_hypotheses.push_back(cv::Vec3d(0, -0.5236, 0)); + rotation_hypotheses.push_back(cv::Vec3d(0, 0.5236,0)); + rotation_hypotheses.push_back(cv::Vec3d(0, -0.96, 0)); + rotation_hypotheses.push_back(cv::Vec3d(0, 0.96, 0)); + rotation_hypotheses.push_back(cv::Vec3d(0, 0, 0.5236)); + rotation_hypotheses.push_back(cv::Vec3d(0, 0, -0.5236)); + rotation_hypotheses.push_back(cv::Vec3d(0, -1.57, 0)); + rotation_hypotheses.push_back(cv::Vec3d(0, 1.57, 0)); + rotation_hypotheses.push_back(cv::Vec3d(0, -1.22, 0.698)); + rotation_hypotheses.push_back(cv::Vec3d(0, 1.22, -0.698)); } else { diff --git a/lib/local/LandmarkDetector/src/Patch_experts.cpp b/lib/local/LandmarkDetector/src/Patch_experts.cpp index aac8dff6..98bdd450 100644 --- a/lib/local/LandmarkDetector/src/Patch_experts.cpp +++ b/lib/local/LandmarkDetector/src/Patch_experts.cpp @@ -200,7 +200,7 @@ void Patch_experts::Response(vector >& patch_expert_responses, c #pragma omp parallel for #endif tbb::parallel_for(0, (int)n, [&](int i){ -// for(int i = 0; i < n; i++) + //for(int i = 0; i < n; i++) { if (visibilities[scale][view_id].rows == n) @@ -246,7 +246,14 @@ void Patch_experts::Response(vector >& patch_expert_responses, c // Get intensity response either from the SVR, CCNF, or CEN patch experts (prefer CEN as they are the most accurate so far) if (!cen_expert_intensity.empty()) { - cen_expert_intensity[scale][view_id][i].Response(area_of_interest, patch_expert_responses[i]); + if (scale <= 2) + { + cen_expert_intensity[scale][view_id][i].ResponseSparse(area_of_interest, patch_expert_responses[i]); + } + else + { + cen_expert_intensity[scale][view_id][i].Response(area_of_interest, patch_expert_responses[i]); + } } else if (!ccnf_expert_intensity.empty()) { diff --git a/matlab_runners/Feature Point Experiments/results/landmark_detections.txt b/matlab_runners/Feature Point Experiments/results/landmark_detections.txt index 29a77793..f0e396a1 100644 --- a/matlab_runners/Feature Point Experiments/results/landmark_detections.txt +++ b/matlab_runners/Feature Point Experiments/results/landmark_detections.txt @@ -1,4 +1,4 @@ Type, mean, median -err ce-clm: 0.042883, 0.035016 -err clnf: 0.055150, 0.038256 -err svr: 0.067395, 0.049832 +err ce-clm: 0.043155, 0.034947 +err clnf: 0.054968, 0.038257 +err svr: 0.070022, 0.049925 diff --git a/matlab_version/experiments_300VW/Generate_ceclm_results.m b/matlab_version/experiments_300VW/Generate_ceclm_results.m index 9ed51795..f04f712a 100644 --- a/matlab_version/experiments_300VW/Generate_ceclm_results.m +++ b/matlab_version/experiments_300VW/Generate_ceclm_results.m @@ -13,7 +13,7 @@ labels = zeros(68,2,0); % Load results for i=cat_1 - load(['CECLM_res_general/', num2str(i)]); + load(['CECLM_res_general_no_4/', num2str(i)]); ceclm_preds = cat(3, ceclm_preds, preds); labels = cat(3, labels, gt_landmarks); @@ -23,12 +23,14 @@ end labels = labels([1:60,62:64,66:end],:,:); ceclm_preds = ceclm_preds([1:60,62:64,66:end],:,:); ceclm_error_66_cat_1 = compute_error(labels, ceclm_preds); +ceclm_error_66_cat_1_auc = auc(ceclm_error_66_cat_1); % Do the 49 point version labels = labels(18:end,:,:); ceclm_preds = ceclm_preds(18:end,:,:); ceclm_error_49_cat_1 = compute_error(labels, ceclm_preds); +ceclm_error_49_cat_1_auc = auc(ceclm_error_49_cat_1); %% @@ -38,7 +40,7 @@ labels = zeros(68,2,0); % Load results for i=cat_2 - load(['CECLM_res_general/', num2str(i)]); + load(['CECLM_res_general_no_4/', num2str(i)]); ceclm_preds = cat(3, ceclm_preds, preds); labels = cat(3, labels, gt_landmarks); @@ -48,12 +50,15 @@ end labels = labels([1:60,62:64,66:end],:,:); ceclm_preds = ceclm_preds([1:60,62:64,66:end],:,:); ceclm_error_66_cat_2 = compute_error(labels, ceclm_preds); +ceclm_error_66_cat_2_auc = auc(ceclm_error_66_cat_2); % Do the 49 point version labels = labels(18:end,:,:); ceclm_preds = ceclm_preds(18:end,:,:); ceclm_error_49_cat_2 = compute_error(labels, ceclm_preds); +ceclm_error_49_cat_2_auc = auc(ceclm_error_49_cat_2); + %% ceclm_preds = zeros(68,2,0); @@ -61,7 +66,7 @@ labels = zeros(68,2,0); % Load results for i=cat_3 - load(['CECLM_res_general/', num2str(i)]); + load(['CECLM_res_general_no_4/', num2str(i)]); ceclm_preds = cat(3, ceclm_preds, preds); labels = cat(3, labels, gt_landmarks); @@ -71,13 +76,17 @@ end labels = labels([1:60,62:64,66:end],:,:); ceclm_preds = ceclm_preds([1:60,62:64,66:end],:,:); ceclm_error_66_cat_3 = compute_error(labels, ceclm_preds); +ceclm_error_66_cat_3_auc = auc(ceclm_error_66_cat_3); % Do the 49 point version labels = labels(18:end,:,:); ceclm_preds = ceclm_preds(18:end,:,:); ceclm_error_49_cat_3 = compute_error(labels, ceclm_preds); +ceclm_error_49_cat_3_auc = auc(ceclm_error_49_cat_3); %% Save the results -save('results/ceclm_errors', 'ceclm_error_66_cat_1', 'ceclm_error_66_cat_2', 'ceclm_error_66_cat_3',... - 'ceclm_error_49_cat_1', 'ceclm_error_49_cat_2', 'ceclm_error_49_cat_3'); +save('results/ceclm_errors_no_4', 'ceclm_error_66_cat_1', 'ceclm_error_66_cat_2', 'ceclm_error_66_cat_3',... + 'ceclm_error_49_cat_1', 'ceclm_error_49_cat_2', 'ceclm_error_49_cat_3',... + 'ceclm_error_66_cat_1_auc', 'ceclm_error_66_cat_2_auc', 'ceclm_error_66_cat_3_auc',... + 'ceclm_error_49_cat_1_auc', 'ceclm_error_49_cat_2_auc', 'ceclm_error_49_cat_3_auc');