More work on AU - WIP

2026-05-14 11:17:53 +00:00 · 2016-06-14 17:55:16 -04:00
parent 65e91da3a6
commit 146dcd5e5b
676 changed files with 2971 additions and 297 deletions
--- a/lib/local/FaceAnalyser/src/FaceAnalyser.cpp
+++ b/lib/local/FaceAnalyser/src/FaceAnalyser.cpp
@@ -303,6 +303,59 @@ void FaceAnalyser::ExtractCurrentMedians(vector<cv::Mat>& hog_medians, vector<cv
 	}
 }

+std::pair<std::vector<std::pair<string, double>>, std::vector<std::pair<string, double>>> FaceAnalyser::PredictStaticAUs(const cv::Mat& frame, const LandmarkDetector::CLNF& clnf, bool visualise)
+{
+	
+	// First align the face
+	AlignFaceMask(aligned_face, frame, clnf, triangulation, true, align_scale, align_width, align_height);
+	
+	// Extract HOG descriptor from the frame and convert it to a useable format
+	cv::Mat_<double> hog_descriptor;
+	Extract_FHOG_descriptor(hog_descriptor, aligned_face, this->num_hog_rows, this->num_hog_cols);
+
+	// Store the descriptor
+	hog_desc_frame = hog_descriptor;
+
+	cv::Vec3d curr_orient(clnf.params_global[1], clnf.params_global[2], clnf.params_global[3]);
+	int orientation_to_use = GetViewId(this->head_orientations, curr_orient);
+	
+	// Geom descriptor and its median
+	geom_descriptor_frame = clnf.params_local.t();
+
+	// Stack with the actual feature point locations (without mean)
+	cv::Mat_<double> locs = clnf.pdm.princ_comp * geom_descriptor_frame.t();
+
+	cv::hconcat(locs.t(), geom_descriptor_frame.clone(), geom_descriptor_frame);
+	
+	// First convert the face image to double representation as a row vector
+	cv::Mat_<uchar> aligned_face_cols(1, aligned_face.cols * aligned_face.rows * aligned_face.channels(), aligned_face.data, 1);
+	cv::Mat_<double> aligned_face_cols_double;
+	aligned_face_cols.convertTo(aligned_face_cols_double, CV_64F);
+
+	// Visualising the median HOG
+	if (visualise)
+	{
+		FaceAnalysis::Visualise_FHOG(hog_descriptor, num_hog_rows, num_hog_cols, hog_descriptor_visualisation);
+	}
+
+	// Perform AU prediction	
+	auto AU_predictions_intensity = PredictCurrentAUs(orientation_to_use);
+	auto AU_predictions_occurence = PredictCurrentAUsClass(orientation_to_use);
+
+	// Make sure intensity is within range (0-5)
+	for (size_t au = 0; au < AU_predictions_intensity.size(); ++au)
+	{
+		if (AU_predictions_intensity[au].second < 0)
+			AU_predictions_intensity[au].second = 0;
+
+		if (AU_predictions_intensity[au].second > 5)
+			AU_predictions_intensity[au].second = 5;
+	}
+
+	return std::pair<std::vector<std::pair<std::string, double>>, std::vector<std::pair<std::string, double>>>(AU_predictions_intensity, AU_predictions_occurence);
+
+}
+
 void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CLNF& clnf_model, double timestamp_seconds, bool online, bool visualise)
 {

@@ -607,7 +660,6 @@ void FaceAnalyser::ExtractAllPredictionsOfflineReg(vector<std::pair<std::string,
 	confidences = this->confidences;
 	successes = this->valid_preds;

-	// TODO only if the video is long enough or there is enough range? Compare stdev of BP4D and this
 	for(auto au_iter = AU_predictions_reg_all_hist.begin(); au_iter != AU_predictions_reg_all_hist.end(); ++au_iter)
 	{
 		vector<double> au_good;
@@ -665,6 +717,28 @@ void FaceAnalyser::ExtractAllPredictionsOfflineReg(vector<std::pair<std::string,
 		}
 	}

+	// Perform some prediction smoothing
+	for (auto au_iter = au_predictions.begin(); au_iter != au_predictions.end(); ++au_iter)
+	{
+		string au_name = au_iter->first;
+
+		// Perform a moving average of 3 frames
+		int window_size = 3;
+		vector<double> au_vals_tmp = au_iter->second;
+		for (size_t i = (window_size - 1) / 2; i < au_iter->second.size() - (window_size - 1) / 2; ++i)
+		{
+			double sum = 0;
+			for (int w = -(window_size - 1) / 2; w < (window_size - 1) / 2; ++w)
+			{
+				sum += au_vals_tmp[i + w];
+			}
+			sum = sum / window_size;
+
+			au_iter->second[i] = sum;
+		}
+
+	}
+

 }