From 283bf810db36438e6e3d4151e166709186930974 Mon Sep 17 00:00:00 2001
From: Tadas Baltrusaitis <tadyla@gmail.com>
Date: Thu, 20 Jul 2017 12:12:13 -0400
Subject: [PATCH] Sparse models in C++, gain in speed

---
 .gitignore                                    |   4 +
 .../include/CEN_patch_expert.h                |   3 +
 .../LandmarkDetector/src/CEN_patch_expert.cpp | 173 +++++++++++++++++-
 .../src/LandmarkDetectorFunc.cpp              |  14 +-
 .../LandmarkDetector/src/Patch_experts.cpp    |  11 +-
 .../results/landmark_detections.txt           |   6 +-
 .../Generate_ceclm_results.m                  |  19 +-
 7 files changed, 212 insertions(+), 18 deletions(-)
diff --git a/.gitignore b/.gitignore
index 61870668..16bc0392 100644
--- a/.gitignore
+++ b/.gitignore
@@ -103,3 +103,7 @@ matlab_runners/Feature Point Experiments/out_clnf/
 matlab_runners/Feature Point Experiments/out_svr/
 matlab_runners/Feature Point Experiments/yt_features_ceclm/
 matlab_runners/Feature Point Experiments/yt_features_clnf/
+matlab_version/face_validation/CNN_new/data/
+matlab_version/face_validation/CNN_new/vlfeat-0.9.20/
+matlab_version/experiments_300VW/CECLM_res_validation/
+matlab_version/experiments_300VW/CECLM_res_general_v2/
diff --git a/lib/local/LandmarkDetector/include/CEN_patch_expert.h b/lib/local/LandmarkDetector/include/CEN_patch_expert.h
index 91af8839..f6074ddb 100644
--- a/lib/local/LandmarkDetector/include/CEN_patch_expert.h
+++ b/lib/local/LandmarkDetector/include/CEN_patch_expert.h
@@ -102,6 +102,9 @@ namespace LandmarkDetector
 		// The actual response computation from intensity image
 		void Response(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response);
 
+		// Faster version of the response that only considers a subset of the area_of_interest
+		void ResponseSparse(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response);
+
 	};
 }
 #endif
diff --git a/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp b/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp
index cf76f2e2..99c44a72 100644
--- a/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp
+++ b/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp
@@ -201,12 +201,10 @@ void im2colBias(const cv::Mat_<float>& input, int width, int height, cv::Mat_<fl
 	}
 
 	// Iterate over the blocks
-	for (int i = 0; i< yB; i++)
+	for (int j = 0; j< xB; j++)
 	{
-		for (int j = 0; j< xB; j++)
+		for (int i = 0; i< yB; i++)
 		{
-			// here yours is in different order than I first thought:
-			//int rowIdx = j + i*xB;    // my intuition how to index the result
 			int rowIdx = i + j*yB;
 
 			for (unsigned int yy = 0; yy < height; ++yy)
@@ -274,3 +272,170 @@ void CEN_patch_expert::Response(const cv::Mat_<float> &area_of_interest, cv::Mat
 	response = response.t();
 
 }
+
+void im2colBiasSparse(const cv::Mat_<float>& input, int width, int height, cv::Mat_<float>& output)
+{
+
+	int m = input.rows;
+	int n = input.cols;
+
+	// determine how many blocks there will be with a sliding window of width x height in the input
+	int yB = m - height + 1;
+	int xB = n - width + 1;
+
+	// As we will be skipping half of the outputs
+	int out_size = (yB*xB - 1) / 2;
+
+	// Allocate the output size
+	if (output.rows != out_size && output.cols != width * height + 1)
+	{
+		output = cv::Mat::ones(out_size, width * height + 1, CV_32F);
+	}
+
+	// Iterate over the blocks, skipping every second block
+	int rowIdx = 0;
+	int skipCounter = 0;
+	for (int j = 0; j< xB; j++)
+	{
+		for (int i = 0; i< yB; i++)
+		{
+			// Skip every second row
+			skipCounter++;
+			if ((skipCounter + 1) % 2 == 0)
+			{
+				continue;
+			}
+
+			for (unsigned int yy = 0; yy < height; ++yy)
+			{
+				for (unsigned int xx = 0; xx < width; ++xx)
+				{
+					int colIdx = xx*height + yy;
+					output.at<float>(rowIdx, colIdx + 1) = input.at<float>(i + yy, j + xx);
+				}
+			}
+			rowIdx++;
+		}
+	}
+}
+
+
+//===========================================================================
+void CEN_patch_expert::ResponseSparse(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response)
+{
+
+	int response_height = area_of_interest.rows - height + 1;
+	int response_width = area_of_interest.cols - width + 1;
+
+	cv::Mat_<float> input_col;
+	// Extract im2col but in a sparse way
+	im2colBiasSparse(area_of_interest, width, height, input_col);
+
+	// Mean and standard deviation normalization
+	contrastNorm(input_col, response);
+
+	cv::Mat_<float> response_blas = response.clone();
+
+	for (size_t layer = 0; layer < activation_function.size(); ++layer)
+	{
+
+		// We are performing response = response * weights[layers], but in OpenBLAS as that is significantly quicker than OpenCV
+		response_blas = response.clone();
+
+		float* m1 = (float*)response_blas.data;
+		float* m2 = (float*)weights[layer].data;
+
+		cv::Mat_<float> resp_blas(response_blas.rows, weights[layer].cols, 1.0);
+		float* m3 = (float*)resp_blas.data;
+
+		cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, weights[layer].cols, response.rows, response.cols, 1, m2, weights[layer].cols, m1, response.cols, 0.0, m3, weights[layer].cols);
+
+		response = resp_blas;
+
+		// TODO bias could be pre-allocated to the window size so that addition could be quicker
+		for (size_t y = 0; y < response.rows; ++y)
+		{
+			response(cv::Rect(0, y, response.cols, 1)) = response(cv::Rect(0, y, response.cols, 1)) + biases[layer];
+		}
+
+		// Perform activation		
+		if (activation_function[layer] == 0) // Sigmoid
+		{
+			for (cv::MatIterator_<float> p = response.begin(); p != response.end(); p++)
+			{
+				*p = 1.0 / (1.0 + exp(-(*p)));
+			}
+		}
+		else if (activation_function[layer] == 2)// ReLU
+		{
+			cv::threshold(response, response, 0, 0, cv::THRESH_TOZERO);
+		}
+
+	}
+	
+	// Restructure the output with interpolation
+	cv::Mat_<float> mapMatrix(response.rows, response_height * response_width, 0.0f);
+
+	// Find a mapping from indices in the computed sparse response and the original full response
+	cv::Mat_<int> value_id_matrix(response_width, response_height, 0);
+
+	int ind = 0;
+	for (int k = 0; k < value_id_matrix.rows * value_id_matrix.cols; ++k)
+	{
+		if(k % 2 != 0)
+		{
+			value_id_matrix.at<int>(k) = ind;
+			ind++;
+		}
+	}
+	value_id_matrix = value_id_matrix.t();
+
+	int skip_counter = 0;
+	for (int x = 0; x < response_width; ++x)
+	{
+		for (int y = 0; y < response_height; ++y)
+		{
+			int mapping_col = x * response_height + y;
+			skip_counter++;
+			if (skip_counter % 2 == 0)
+			{
+				int val_id = value_id_matrix.at<int>(y, x);
+				mapMatrix.at<float>(val_id, mapping_col) = 1;
+				continue;
+			}
+
+			double num_neigh = 0.0;
+			vector<int> val_ids;
+			if (x - 1 >= 0)
+			{
+				num_neigh++;
+				val_ids.push_back(value_id_matrix.at<int>(y, x - 1));
+			}
+			if (y - 1 >= 0)
+			{
+				num_neigh++;
+				val_ids.push_back(value_id_matrix.at<int>(y - 1, x));
+			}
+			if (x + 1 < response_width)
+			{
+				num_neigh++;
+				val_ids.push_back(value_id_matrix.at<int>(y, x + 1));
+			}
+			if (y + 1 < response_height)
+			{
+				num_neigh++;
+				val_ids.push_back(value_id_matrix.at<int>(y+1, x));
+			}
+
+			for (size_t k = 0; k < val_ids.size(); ++k)
+			{
+				mapMatrix.at<float>(val_ids[k], mapping_col) = 1.0 / num_neigh;
+			}
+		}
+	}
+
+	response = response.t() * mapMatrix;
+	response = response.t();
+	response = response.reshape(1, response_height);
+	response = response.t();
+}
diff --git a/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp b/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp
index a89562ac..dbbe532a 100644
--- a/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp
+++ b/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp
@@ -665,10 +665,16 @@ bool LandmarkDetector::DetectLandmarksInImage(const cv::Mat_<uchar> &grayscale_i
 		// Try out different orientation initialisations
 		// It is possible to add other orientation hypotheses easilly by just pushing to this vector
 		rotation_hypotheses.push_back(cv::Vec3d(0,0,0));
-		rotation_hypotheses.push_back(cv::Vec3d(0,0.5236,0));
-		rotation_hypotheses.push_back(cv::Vec3d(0,-0.5236,0));
-		rotation_hypotheses.push_back(cv::Vec3d(0,0, 0.5236));
-		rotation_hypotheses.push_back(cv::Vec3d(0,0, -0.5236));
+		rotation_hypotheses.push_back(cv::Vec3d(0, -0.5236, 0));
+		rotation_hypotheses.push_back(cv::Vec3d(0, 0.5236,0));
+		rotation_hypotheses.push_back(cv::Vec3d(0, -0.96, 0));
+		rotation_hypotheses.push_back(cv::Vec3d(0, 0.96, 0));
+		rotation_hypotheses.push_back(cv::Vec3d(0, 0, 0.5236));
+		rotation_hypotheses.push_back(cv::Vec3d(0, 0, -0.5236));
+		rotation_hypotheses.push_back(cv::Vec3d(0, -1.57, 0));
+		rotation_hypotheses.push_back(cv::Vec3d(0, 1.57, 0));
+		rotation_hypotheses.push_back(cv::Vec3d(0, -1.22, 0.698));
+		rotation_hypotheses.push_back(cv::Vec3d(0, 1.22, -0.698));
 	}
 	else
 	{
diff --git a/lib/local/LandmarkDetector/src/Patch_experts.cpp b/lib/local/LandmarkDetector/src/Patch_experts.cpp
index aac8dff6..98bdd450 100644
--- a/lib/local/LandmarkDetector/src/Patch_experts.cpp
+++ b/lib/local/LandmarkDetector/src/Patch_experts.cpp
@@ -200,7 +200,7 @@ void Patch_experts::Response(vector<cv::Mat_<float> >& patch_expert_responses, c
 #pragma omp parallel for
 #endif
 	tbb::parallel_for(0, (int)n, [&](int i){
-//	for(int i = 0; i < n; i++)
+	//for(int i = 0; i < n; i++)
 	{
 
 		if (visibilities[scale][view_id].rows == n)
@@ -246,7 +246,14 @@ void Patch_experts::Response(vector<cv::Mat_<float> >& patch_expert_responses, c
 				// Get intensity response either from the SVR, CCNF, or CEN patch experts (prefer CEN as they are the most accurate so far)
 				if (!cen_expert_intensity.empty())
 				{
-					cen_expert_intensity[scale][view_id][i].Response(area_of_interest, patch_expert_responses[i]);
+					if (scale <= 2)
+					{
+						cen_expert_intensity[scale][view_id][i].ResponseSparse(area_of_interest, patch_expert_responses[i]);
+					}
+					else
+					{
+						cen_expert_intensity[scale][view_id][i].Response(area_of_interest, patch_expert_responses[i]);
+					}
 				}
 				else if (!ccnf_expert_intensity.empty())
 				{
diff --git a/matlab_runners/Feature Point Experiments/results/landmark_detections.txt b/matlab_runners/Feature Point Experiments/results/landmark_detections.txt
index 29a77793..f0e396a1 100644
--- a/matlab_runners/Feature Point Experiments/results/landmark_detections.txt	
+++ b/matlab_runners/Feature Point Experiments/results/landmark_detections.txt	
@@ -1,4 +1,4 @@
 Type, mean, median
-err ce-clm: 0.042883, 0.035016
-err clnf: 0.055150, 0.038256
-err svr: 0.067395, 0.049832
+err ce-clm: 0.043155, 0.034947
+err clnf: 0.054968, 0.038257
+err svr: 0.070022, 0.049925
diff --git a/matlab_version/experiments_300VW/Generate_ceclm_results.m b/matlab_version/experiments_300VW/Generate_ceclm_results.m
index 9ed51795..f04f712a 100644
--- a/matlab_version/experiments_300VW/Generate_ceclm_results.m
+++ b/matlab_version/experiments_300VW/Generate_ceclm_results.m
@@ -13,7 +13,7 @@ labels = zeros(68,2,0);
 % Load results
 for i=cat_1
     
-    load(['CECLM_res_general/', num2str(i)]);    
+    load(['CECLM_res_general_no_4/', num2str(i)]);    
     ceclm_preds = cat(3, ceclm_preds, preds);
 
     labels = cat(3, labels, gt_landmarks);    
@@ -23,12 +23,14 @@ end
 labels = labels([1:60,62:64,66:end],:,:);
 ceclm_preds = ceclm_preds([1:60,62:64,66:end],:,:);
 ceclm_error_66_cat_1 = compute_error(labels, ceclm_preds);
+ceclm_error_66_cat_1_auc = auc(ceclm_error_66_cat_1);
 
 % Do the 49 point version
 labels = labels(18:end,:,:);
 ceclm_preds = ceclm_preds(18:end,:,:);
 
 ceclm_error_49_cat_1 = compute_error(labels, ceclm_preds);
+ceclm_error_49_cat_1_auc = auc(ceclm_error_49_cat_1);
 
 %%
 
@@ -38,7 +40,7 @@ labels = zeros(68,2,0);
 % Load results
 for i=cat_2
     
-    load(['CECLM_res_general/', num2str(i)]);    
+    load(['CECLM_res_general_no_4/', num2str(i)]);    
     ceclm_preds = cat(3, ceclm_preds, preds);
 
     labels = cat(3, labels, gt_landmarks);    
@@ -48,12 +50,15 @@ end
 labels = labels([1:60,62:64,66:end],:,:);
 ceclm_preds = ceclm_preds([1:60,62:64,66:end],:,:);
 ceclm_error_66_cat_2 = compute_error(labels, ceclm_preds);
+ceclm_error_66_cat_2_auc = auc(ceclm_error_66_cat_2);
 
 % Do the 49 point version
 labels = labels(18:end,:,:);
 ceclm_preds = ceclm_preds(18:end,:,:);
 
 ceclm_error_49_cat_2 = compute_error(labels, ceclm_preds);
+ceclm_error_49_cat_2_auc = auc(ceclm_error_49_cat_2);
+
 %%
 ceclm_preds = zeros(68,2,0);
 
@@ -61,7 +66,7 @@ labels = zeros(68,2,0);
 % Load results
 for i=cat_3
     
-    load(['CECLM_res_general/', num2str(i)]);    
+    load(['CECLM_res_general_no_4/', num2str(i)]);    
     ceclm_preds = cat(3, ceclm_preds, preds);
 
     labels = cat(3, labels, gt_landmarks);    
@@ -71,13 +76,17 @@ end
 labels = labels([1:60,62:64,66:end],:,:);
 ceclm_preds = ceclm_preds([1:60,62:64,66:end],:,:);
 ceclm_error_66_cat_3 = compute_error(labels, ceclm_preds);
+ceclm_error_66_cat_3_auc = auc(ceclm_error_66_cat_3);
 
 % Do the 49 point version
 labels = labels(18:end,:,:);
 ceclm_preds = ceclm_preds(18:end,:,:);
 
 ceclm_error_49_cat_3 = compute_error(labels, ceclm_preds);
+ceclm_error_49_cat_3_auc = auc(ceclm_error_49_cat_3);
 
 %% Save the results
-save('results/ceclm_errors', 'ceclm_error_66_cat_1', 'ceclm_error_66_cat_2', 'ceclm_error_66_cat_3',...
-    'ceclm_error_49_cat_1', 'ceclm_error_49_cat_2', 'ceclm_error_49_cat_3'); 
+save('results/ceclm_errors_no_4', 'ceclm_error_66_cat_1', 'ceclm_error_66_cat_2', 'ceclm_error_66_cat_3',...
+    'ceclm_error_49_cat_1', 'ceclm_error_49_cat_2', 'ceclm_error_49_cat_3',...
+    'ceclm_error_66_cat_1_auc', 'ceclm_error_66_cat_2_auc', 'ceclm_error_66_cat_3_auc',...
+    'ceclm_error_49_cat_1_auc', 'ceclm_error_49_cat_2_auc', 'ceclm_error_49_cat_3_auc');