From 35ad003071a650c2afbe3d687bf6568f90ae7c2f Mon Sep 17 00:00:00 2001
From: Tadas Baltrusaitis <tadyla@gmail.com>
Date: Tue, 24 Apr 2018 08:19:45 +0100
Subject: [PATCH] Cleanup of the optimization of CLNF

---
 gui/OpenFaceOffline/MainWindow.xaml.cs        |  2 +-
 .../include/CCNF_patch_expert.h               |  4 +-
 .../src/CCNF_patch_expert.cpp                 | 87 ++-----------------
 .../LandmarkDetector/src/Patch_experts.cpp    |  7 +-
 .../Utilities/include/VisualizationUtils.h    |  2 -
 5 files changed, 13 insertions(+), 89 deletions(-)
diff --git a/gui/OpenFaceOffline/MainWindow.xaml.cs b/gui/OpenFaceOffline/MainWindow.xaml.cs
index f3c78334..e4af3856 100644
--- a/gui/OpenFaceOffline/MainWindow.xaml.cs
+++ b/gui/OpenFaceOffline/MainWindow.xaml.cs
@@ -101,7 +101,7 @@ namespace OpenFaceOffline
 
         // For tracking
         FaceDetector face_detector;
-        FaceModelParameters face_model_params; // TODO does this need to be reinitialized every time to deal with model reloading?
+        FaceModelParameters face_model_params;
         CLNF landmark_detector;
 
         // For face analysis
diff --git a/lib/local/LandmarkDetector/include/CCNF_patch_expert.h b/lib/local/LandmarkDetector/include/CCNF_patch_expert.h
index 7765a9d2..4ea40e76 100644
--- a/lib/local/LandmarkDetector/include/CCNF_patch_expert.h
+++ b/lib/local/LandmarkDetector/include/CCNF_patch_expert.h
@@ -118,8 +118,8 @@ public:
 	// actual work (can pass in an image and a potential depth image, if the CCNF is trained with depth)
 	void Response(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response);
 
-	// TODO rem
-	void ResponseOB(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response);
+	// A more efficient way to compute patch responses using matrix multiplication with OpenBLAS
+	void ResponseOpenBlas(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response);
 
 	// Helper function to compute relevant sigmas
 	void ComputeSigmas(std::vector<cv::Mat_<float> > sigma_components, int window_size);
diff --git a/lib/local/LandmarkDetector/src/CCNF_patch_expert.cpp b/lib/local/LandmarkDetector/src/CCNF_patch_expert.cpp
index 042842f6..520a5ea7 100644
--- a/lib/local/LandmarkDetector/src/CCNF_patch_expert.cpp
+++ b/lib/local/LandmarkDetector/src/CCNF_patch_expert.cpp
@@ -139,82 +139,7 @@ void CCNF_neuron::Read(ifstream &stream)
 
 }
 
-
-// Perform im2col, while at the same time doing contrast normalization and adding a bias term (also skip every other region)
-void im2colContrastNorm(const cv::Mat_<float>& input, const int width, const int height, cv::Mat_<float>& output)
-{
-	const int m = input.rows;
-	const int n = input.cols;
-
-	// determine how many blocks there will be with a sliding window of width x height in the input
-	const int yB = m - height + 1;
-	const int xB = n - width + 1;
-
-	// Allocate the output size
-	if (output.rows != xB*yB && output.cols != width * height)
-	{
-		output = cv::Mat::zeros(xB*yB, width * height, CV_32F);
-	}
-
-	// Iterate over the blocks,
-	int rowIdx = 0;
-	for (int j = 0; j< xB; j++)
-	{
-		for (int i = 0; i< yB; i++)
-		{
-
-			float* Mo = output.ptr<float>(rowIdx);
-
-			float sum = 0;
-
-			for (unsigned int yy = 0; yy < height; ++yy)
-			{
-				const float* Mi = input.ptr<float>(i + yy);
-				for (unsigned int xx = 0; xx < width; ++xx)
-				{
-					int colIdx = xx*height + yy;
-					float in = Mi[j + xx];
-					sum += in;
-
-					Mo[colIdx] = in;
-				}
-			}
-
-			// Working out the mean
-			float mean = sum / (float)(width * height);
-
-			float sum_sq = 0;
-
-			// Working out the sum squared and subtracting the mean
-			for (size_t x = 0; x < width*height; ++x)
-			{
-				float in = Mo[x] - mean;
-				Mo[x] = in;
-				sum_sq += in * in;
-			}
-
-			float norm = sqrt(sum_sq);
-
-			// Avoiding division by 0
-			if (norm == 0)
-			{
-				norm = 1;
-			}
-
-			// Flip multiplication to division for speed
-			norm = 1.0 / norm;
-
-			for (size_t x = 0; x < width*height; ++x)
-			{
-				Mo[x] *= norm;
-			}
-
-			rowIdx++;
-		}
-	}
-}
-
-// Perform im2col, while at the same time doing contrast normalization and adding a bias term (also skip every other region)
+// Perform im2col, while at the same time doing contrast normalization and adding a bias term 
 void im2colContrastNormBias(const cv::Mat_<float>& input, const int width, const int height, cv::Mat_<float>& output)
 {
 	const int m = input.rows;
@@ -407,6 +332,9 @@ void CCNF_patch_expert::Read(ifstream &stream, std::vector<int> window_sizes, st
 		weight_matrix.at<float>(i, 0) = neurons[i].bias;
 	}
 
+	// In case we are using OpenBLAS, make sure it is not multi-threading as we are multi-threading outside of it
+	openblas_set_num_threads(1);
+
 	int n_sigmas = window_sizes.size();
 
 	int n_betas = 0;
@@ -491,7 +419,7 @@ void CCNF_patch_expert::Response(const cv::Mat_<float> &area_of_interest, cv::Ma
 }
 
 //===========================================================================
-void CCNF_patch_expert::ResponseOB(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response)
+void CCNF_patch_expert::ResponseOpenBlas(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response)
 {
 
 	int response_height = area_of_interest.rows - height + 1;
@@ -540,15 +468,14 @@ void CCNF_patch_expert::ResponseOB(const cv::Mat_<float> &area_of_interest, cv::
 		{
 			cv::MatIterator_<float> p = response.begin();
 
-			cv::Mat_<float> rel_row = neuron_resp_full.row(i);// .clone();
+			cv::Mat_<float> rel_row = neuron_resp_full.row(i);
 			cv::MatIterator_<float> q1 = rel_row.begin(); // respone for each pixel
 			cv::MatIterator_<float> q2 = rel_row.end();
 
 			// the logistic function (sigmoid) applied to the response
 			while (q1 != q2)
 			{
-				//*p++ += (2 * neurons[i].alpha) * 1.0 / (1.0 + exp(-(*q1++ * neurons[i].norm_weights + neurons[i].bias)));
-				*p++ += (2 * neurons[i].alpha) * 1.0 / (1.0 + exp(-*q1++));
+				*p++ += (2.0 * neurons[i].alpha) / (1.0 + exp(-*q1++));
 			}
 		}
 	}
diff --git a/lib/local/LandmarkDetector/src/Patch_experts.cpp b/lib/local/LandmarkDetector/src/Patch_experts.cpp
index 2c9b0c9b..91966486 100644
--- a/lib/local/LandmarkDetector/src/Patch_experts.cpp
+++ b/lib/local/LandmarkDetector/src/Patch_experts.cpp
@@ -312,12 +312,11 @@ void Patch_experts::Response(vector<cv::Mat_<float> >& patch_expert_responses, c
 			// get the correct size response window			
 			patch_expert_responses[ind] = cv::Mat_<float>(window_size, window_size);
 			
-			ccnf_expert_intensity[scale][view_id][ind].ResponseOB(area_of_interest, patch_expert_responses[ind]);
+			ccnf_expert_intensity[scale][view_id][ind].ResponseOpenBlas(area_of_interest, patch_expert_responses[ind]);
 
-			// TODO rem
+			// Below is an alternative way to compute the same, but that uses FFT instead of OpenBLAS
 			// ccnf_expert_intensity[scale][view_id][ind].Response(area_of_interest, patch_expert_responses[ind]);
-			// cv::Mat_<float> placeholder(window_size, window_size);
-			// cout << cv::norm(placeholder - patch_expert_responses[ind]) << endl;
+
 		}
 		else
 		{				
diff --git a/lib/local/Utilities/include/VisualizationUtils.h b/lib/local/Utilities/include/VisualizationUtils.h
index bb1f7d81..5df09383 100644
--- a/lib/local/Utilities/include/VisualizationUtils.h
+++ b/lib/local/Utilities/include/VisualizationUtils.h
@@ -42,8 +42,6 @@
 namespace Utilities
 {
 
-	// TODO draw AU results
-
 	// Drawing a bounding box around the face in an image
 	void DrawBox(cv::Mat image, cv::Vec6f pose, cv::Scalar color, int thickness, float fx, float fy, float cx, float cy);
 	void DrawBox(const std::vector<std::pair<cv::Point2f, cv::Point2f>>& lines, cv::Mat image, cv::Scalar color, int thickness);