Initial commit with PAW alignment

2025-12-30 04:52:29 +00:00 · 2018-10-14 14:54:32 +01:00
parent 40af2cb623
commit 110f5f78ba
8 changed files with 86 additions and 28 deletions
--- a/lib/local/FaceAnalyser/include/FaceAnalyser.h
+++ b/lib/local/FaceAnalyser/include/FaceAnalyser.h
@@ -227,6 +227,7 @@ private:
 	int align_height_au;

 	bool align_mask;
+	bool align_paw;
 	double align_scale_out;
 	int align_width_out;
 	int align_height_out;
--- a/lib/local/FaceAnalyser/include/FaceAnalyserParameters.h
+++ b/lib/local/FaceAnalyser/include/FaceAnalyserParameters.h
@@ -64,12 +64,13 @@ public:
 	bool grayscale;

 	// Use getters and setters for these as they might need to reload models and make sure the scale and size ratio makes sense
-	void setAlignedOutput(int output_size, double scale=-1, bool masked = true);
+	void setAlignedOutput(int output_size, double scale=-1, bool masked = true, bool paw = false);
 	// This will also change the model location
 	void OptimizeForVideos();
 	void OptimizeForImages();

 	bool getAlignMask() const { return sim_align_face_mask; }
+	bool getAlignPAW() const { return align_paw; }
 	double getSimScaleOut() const { return sim_scale_out; }
 	int getSimSizeOut() const { return sim_size_out; }
 	bool getDynamic() const { return dynamic; }
@@ -87,6 +88,9 @@ private:
 	// Should aligned face be masked out from background
 	bool sim_align_face_mask;

+	// Should aligned face be PAW aligned, rather than similarity aligned
+	bool align_paw;
+
 	// Should a video stream be assumed
 	bool dynamic;

--- a/lib/local/FaceAnalyser/include/Face_utils.h
+++ b/lib/local/FaceAnalyser/include/Face_utils.h
@@ -47,8 +47,15 @@ namespace FaceAnalysis
 	// Defining a set of useful utility functions to be used within FaceAnalyser

 	// Aligning a face to a common reference frame
-	void AlignFace(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_<float>& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, bool rigid = true, double scale = 0.7, int width = 96, int height = 96);
-	void AlignFaceMask(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_<float>& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, const cv::Mat_<int>& triangulation, bool rigid = true, double scale = 0.7, int width = 96, int height = 96);
+
+	// Aligning a face to a common reference frame using a similarity transform
+	void AlignFaceSimilarity(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_<float>& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, bool rigid = true, double scale = 0.7, int width = 96, int height = 96);
+
+	// Aligning a face to a common reference frame using a similarity transform and masking out non-face area
+	void AlignFaceSimilarityMask(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_<float>& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, const cv::Mat_<int>& triangulation, bool rigid = true, double scale = 0.7, int width = 96, int height = 96);
+
+	// Align a face to a common reference frame using Piece-wise affine warping on triangles
+	void AlignFacePAW(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_<float>& detected_landmarks, const LandmarkDetector::PDM& pdm, const cv::Mat_<int> triangulation, double scale = 0.7, int width = 96, int height = 96);

 	void Extract_FHOG_descriptor(cv::Mat_<double>& descriptor, const cv::Mat& image, int& num_rows, int& num_cols, int cell_size = 8);

--- a/lib/local/FaceAnalyser/src/FaceAnalyser.cpp
+++ b/lib/local/FaceAnalyser/src/FaceAnalyser.cpp
@@ -64,6 +64,7 @@ FaceAnalyser::FaceAnalyser(const FaceAnalysis::FaceAnalyserParameters& face_anal
 	this->Read(face_analyser_params.getModelLoc());
 		
 	align_mask = face_analyser_params.getAlignMask();
+	align_paw = face_analyser_params.getAlignPAW();
 	align_scale_out = face_analyser_params.getSimScaleOut();
 	align_width_out = face_analyser_params.getSimSizeOut();
 	align_height_out = face_analyser_params.getSimSizeOut();
@@ -258,22 +259,26 @@ void FaceAnalyser::PredictStaticAUsAndComputeFeatures(const cv::Mat& frame, cons
 	pdm.CalcParams(params_global, params_local, detected_landmarks);

 	// The aligned face requirement for AUs
-	AlignFaceMask(aligned_face_for_au, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_au, align_width_au, align_height_au);
+	AlignFaceSimilarityMask(aligned_face_for_au, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_au, align_width_au, align_height_au);

 	// If the aligned face for AU matches the output requested one, just reuse it, else compute it
-	if (align_scale_out == align_scale_au && align_width_out == align_width_au && align_height_out == align_height_au && align_mask)
+	if (align_scale_out == align_scale_au && align_width_out == align_width_au && align_height_out == align_height_au && align_mask && !align_paw)
 	{
 		aligned_face_for_output = aligned_face_for_au.clone();
 	}
 	else
 	{
-		if (align_mask)
+		if (align_mask && !align_paw)
 		{
-			AlignFaceMask(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_out, align_width_out, align_height_out);
+			AlignFaceSimilarityMask(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_out, align_width_out, align_height_out);
+		}
+		if (align_paw)
+		{
+			AlignFacePAW(aligned_face_for_output, frame, detected_landmarks, pdm, triangulation, align_scale_out, align_width_out, align_height_out);
 		}
 		else
 		{
-			AlignFace(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, true, align_scale_out, align_width_out, align_height_out);
+			AlignFaceSimilarity(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, true, align_scale_out, align_width_out, align_height_out);
 		}
 	}

@@ -340,22 +345,26 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const cv::Mat_<float>& det
 		pdm.CalcParams(params_global, params_local, detected_landmarks);

 		// The aligned face requirement for AUs
-		AlignFaceMask(aligned_face_for_au, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_au, align_width_au, align_height_au);
+		AlignFaceSimilarityMask(aligned_face_for_au, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_au, align_width_au, align_height_au);

 		// If the aligned face for AU matches the output requested one, just reuse it, else compute it
-		if (align_scale_out == align_scale_au && align_width_out == align_width_au && align_height_out == align_height_au && align_mask)
+		if (align_scale_out == align_scale_au && align_width_out == align_width_au && align_height_out == align_height_au && align_mask && !align_paw)
 		{
 			aligned_face_for_output = aligned_face_for_au.clone();
 		}
 		else
 		{
-			if (align_mask)
+			if (align_mask && !align_paw)
 			{
-				AlignFaceMask(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_out, align_width_out, align_height_out);
+				AlignFaceSimilarityMask(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_out, align_width_out, align_height_out);
+			}
+			if (align_paw)
+			{
+				AlignFacePAW(aligned_face_for_output, frame, detected_landmarks, pdm, triangulation, align_scale_out, align_width_out, align_height_out);
 			}
 			else
 			{
-				AlignFace(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, true, align_scale_out, align_width_out, align_height_out);
+				AlignFaceSimilarity(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, true, align_scale_out, align_width_out, align_height_out);
 			}
 		}
 	}
--- a/lib/local/FaceAnalyser/src/FaceAnalyserParameters.cpp
+++ b/lib/local/FaceAnalyser/src/FaceAnalyserParameters.cpp
@@ -93,6 +93,11 @@ FaceAnalyserParameters::FaceAnalyserParameters(vector<string> &arguments):root()
 			sim_align_face_mask = false;
 			valid[i] = false;
 		}
+		else if (arguments[i].compare("-align_paw") == 0)
+		{
+			align_paw = true;
+			valid[i] = false;
+		}
 		else if (arguments[i].compare("-simscale") == 0)
 		{
 			sim_scale_out = stod(arguments[i + 1]);
@@ -161,6 +166,7 @@ void FaceAnalyserParameters::init()
 	this->sim_scale_out = 0.7;
 	this->sim_size_out = 112;
 	this->sim_align_face_mask = true;
+	this->align_paw = false;

 	this->model_location = "AU_predictors/main_dynamic_svms.txt";

@@ -190,7 +196,7 @@ void FaceAnalyserParameters::init()
 }

 // Use getters and setters for these as they might need to reload models and make sure the scale and size ratio makes sense
-void FaceAnalyserParameters::setAlignedOutput(int output_size, double scale, bool masked)
+void FaceAnalyserParameters::setAlignedOutput(int output_size, double scale, bool masked, bool paw)
 {
 	this->sim_size_out = output_size;
 	// If we set the size but not the scale, adapt the scale to the right size
@@ -204,6 +210,7 @@ void FaceAnalyserParameters::setAlignedOutput(int output_size, double scale, boo
 	}

 	this->sim_align_face_mask = masked;
+	this->align_paw = paw;

 }
 // This will also change the model location
--- a/lib/local/FaceAnalyser/src/Face_utils.cpp
+++ b/lib/local/FaceAnalyser/src/Face_utils.cpp
@@ -115,8 +115,8 @@ namespace FaceAnalysis
 		}
 	}

-	// Aligning a face to a common reference frame
-	void AlignFace(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_<float>& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, bool rigid, double sim_scale, int out_width, int out_height)
+	// Aligning a face to a common reference frame using a similarity transform
+	void AlignFaceSimilarity(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_<float>& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, bool rigid, double sim_scale, int out_width, int out_height)
 	{
 		// Will warp to scaled mean shape
 		cv::Mat_<float> similarity_normalised_shape = pdm.mean_shape * sim_scale;
@@ -154,8 +154,8 @@ namespace FaceAnalysis
 		cv::warpAffine(frame, aligned_face, warp_matrix, cv::Size(out_width, out_height), cv::INTER_LINEAR);
 	}

-	// Aligning a face to a common reference frame
-	void AlignFaceMask(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_<float>& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, const cv::Mat_<int>& triangulation, bool rigid, double sim_scale, int out_width, int out_height)
+	// Aligning a face to a common reference frame using a similarity transform and masking out non-face area
+	void AlignFaceSimilarityMask(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_<float>& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, const cv::Mat_<int>& triangulation, bool rigid, double sim_scale, int out_width, int out_height)
 	{
 		// Will warp to scaled mean shape
 		cv::Mat_<float> similarity_normalised_shape = pdm.mean_shape * sim_scale;
@@ -217,7 +217,7 @@ namespace FaceAnalysis

 		destination_landmarks = cv::Mat(destination_landmarks.t()).reshape(1, 1).t();

-		LandmarkDetector::PAW paw(destination_landmarks, triangulation, 0, 0, aligned_face.cols-1, aligned_face.rows-1);
+		LandmarkDetector::PAW paw(destination_landmarks, triangulation, 0,0, aligned_face.cols-1, aligned_face.rows-1, aligned_face.cols, aligned_face.rows);
 		
 		// Mask each of the channels (a bit of a roundabout way, but OpenCV 3.1 in debug mode doesn't seem to be able to handle a more direct way using split and merge)
 		vector<cv::Mat> aligned_face_channels(aligned_face.channels());
@@ -243,6 +243,39 @@ namespace FaceAnalysis
 		}
 	}

+	// Align a face to a common reference frame using Piece-wise affine warping on triangles
+	void AlignFacePAW(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_<float>& source_landmarks, const LandmarkDetector::PDM& pdm, const cv::Mat_<int> triangulation, double scale, int width, int height)
+	{
+		// Will warp to scaled mean shape
+		cv::Mat_<float> similarity_normalised_shape = pdm.mean_shape * scale;
+
+		int num_verts = pdm.NumberOfPoints();
+
+		// Discard the z component
+		similarity_normalised_shape = similarity_normalised_shape(cv::Rect(0, 0, 1, 2 * num_verts)).clone();
+
+		// Center around output image
+		double min_x;
+		double max_x;
+		cv::minMaxLoc(similarity_normalised_shape(cv::Rect(0, 0, 1, num_verts)), &min_x, &max_x);
+
+		double min_y;
+		double max_y;
+		cv::minMaxLoc(similarity_normalised_shape(cv::Rect(0, num_verts, 1, num_verts)), &min_y, &max_y);
+
+		float add_x = width / 2.0f - (max_x + min_x) / 2.0f;
+		float add_y = height / 2.0f - (max_x + min_x) / 2.0f;
+
+		similarity_normalised_shape(cv::Rect(0, 0, 1, num_verts)) += add_x;
+		similarity_normalised_shape(cv::Rect(0, num_verts, 1, num_verts)) += add_y;
+
+		LandmarkDetector::PAW paw(similarity_normalised_shape, triangulation, 0, 0, width, height, width, height);
+		cv::Mat warped;
+
+		paw.Warp(frame, aligned_face, source_landmarks);
+
+	}
+
 	// Create a row vector Felzenszwalb HOG descriptor from a given image
 	void Extract_FHOG_descriptor(cv::Mat_<double>& descriptor, const cv::Mat& image, int& num_rows, int& num_cols, int cell_size)
 	{
--- a/lib/local/LandmarkDetector/include/PAW.h
+++ b/lib/local/LandmarkDetector/include/PAW.h
@@ -102,8 +102,8 @@ namespace LandmarkDetector
 		PAW(const cv::Mat_<float>& destination_shape, const cv::Mat_<int>& triangulation);

 		// The final optional argument allows for optimisation if the triangle indices from previous frame are known (for tracking in video)
-		PAW(const cv::Mat_<float>& destination_shape, const cv::Mat_<int>& triangulation, float in_min_x, float in_min_y, float in_max_x, float in_max_y);
-
+		PAW(const cv::Mat_<float>& destination_shape, const cv::Mat_<int>& triangulation, float in_min_x, float in_min_y, float in_max_x, float in_max_y, int out_width, int out_height);
+		
 		// Copy constructor
 		PAW(const PAW& other);

--- a/lib/local/LandmarkDetector/src/PAW.cpp
+++ b/lib/local/LandmarkDetector/src/PAW.cpp
@@ -166,7 +166,7 @@ PAW::PAW(const cv::Mat_<float>& destination_shape, const cv::Mat_<int>& triangul
 }

 // Manually define min and max values
-PAW::PAW(const cv::Mat_<float>& destination_shape, const cv::Mat_<int>& triangulation, float in_min_x, float in_min_y, float in_max_x, float in_max_y)
+PAW::PAW(const cv::Mat_<float>& destination_shape, const cv::Mat_<int>& triangulation, float in_min_x, float in_min_y, float in_max_x, float in_max_y, int out_width, int out_height)
 {
 	// Initialise some variables directly
 	this->destination_landmarks = destination_shape;
@@ -242,14 +242,11 @@ PAW::PAW(const cv::Mat_<float>& destination_shape, const cv::Mat_<int>& triangul

 	max_x = in_max_x;
 	max_y = in_max_y;
-
-	int w = (int)(max_x - min_x + 1.5);
-	int h = (int)(max_y - min_y + 1.5);
-
+	
 	// Round the min_x and min_y for simplicity?

-	pixel_mask = cv::Mat_<uchar>(h, w, (uchar)0);
-	triangle_id = cv::Mat_<int>(h, w, -1);
+	pixel_mask = cv::Mat_<uchar>(out_height, out_width, (uchar)0);
+	triangle_id = cv::Mat_<int>(out_height, out_width, -1);

 	int curr_tri = -1;