From 110f5f78ba72691676069090143f26cd5d4088f9 Mon Sep 17 00:00:00 2001 From: Tadas Baltrusaitis Date: Sun, 14 Oct 2018 14:54:32 +0100 Subject: [PATCH] Initial commit with PAW alignment --- lib/local/FaceAnalyser/include/FaceAnalyser.h | 1 + .../include/FaceAnalyserParameters.h | 6 ++- lib/local/FaceAnalyser/include/Face_utils.h | 11 ++++- lib/local/FaceAnalyser/src/FaceAnalyser.cpp | 29 ++++++++----- .../src/FaceAnalyserParameters.cpp | 9 +++- lib/local/FaceAnalyser/src/Face_utils.cpp | 43 ++++++++++++++++--- lib/local/LandmarkDetector/include/PAW.h | 4 +- lib/local/LandmarkDetector/src/PAW.cpp | 11 ++--- 8 files changed, 86 insertions(+), 28 deletions(-) diff --git a/lib/local/FaceAnalyser/include/FaceAnalyser.h b/lib/local/FaceAnalyser/include/FaceAnalyser.h index 5e5c6844..10b75c12 100644 --- a/lib/local/FaceAnalyser/include/FaceAnalyser.h +++ b/lib/local/FaceAnalyser/include/FaceAnalyser.h @@ -227,6 +227,7 @@ private: int align_height_au; bool align_mask; + bool align_paw; double align_scale_out; int align_width_out; int align_height_out; diff --git a/lib/local/FaceAnalyser/include/FaceAnalyserParameters.h b/lib/local/FaceAnalyser/include/FaceAnalyserParameters.h index 4bc4f1ff..0fe66741 100644 --- a/lib/local/FaceAnalyser/include/FaceAnalyserParameters.h +++ b/lib/local/FaceAnalyser/include/FaceAnalyserParameters.h @@ -64,12 +64,13 @@ public: bool grayscale; // Use getters and setters for these as they might need to reload models and make sure the scale and size ratio makes sense - void setAlignedOutput(int output_size, double scale=-1, bool masked = true); + void setAlignedOutput(int output_size, double scale=-1, bool masked = true, bool paw = false); // This will also change the model location void OptimizeForVideos(); void OptimizeForImages(); bool getAlignMask() const { return sim_align_face_mask; } + bool getAlignPAW() const { return align_paw; } double getSimScaleOut() const { return sim_scale_out; } int getSimSizeOut() const { return sim_size_out; } bool getDynamic() const { return dynamic; } @@ -87,6 +88,9 @@ private: // Should aligned face be masked out from background bool sim_align_face_mask; + // Should aligned face be PAW aligned, rather than similarity aligned + bool align_paw; + // Should a video stream be assumed bool dynamic; diff --git a/lib/local/FaceAnalyser/include/Face_utils.h b/lib/local/FaceAnalyser/include/Face_utils.h index 6922acf4..d65f11d0 100644 --- a/lib/local/FaceAnalyser/include/Face_utils.h +++ b/lib/local/FaceAnalyser/include/Face_utils.h @@ -47,8 +47,15 @@ namespace FaceAnalysis // Defining a set of useful utility functions to be used within FaceAnalyser // Aligning a face to a common reference frame - void AlignFace(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, bool rigid = true, double scale = 0.7, int width = 96, int height = 96); - void AlignFaceMask(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, const cv::Mat_& triangulation, bool rigid = true, double scale = 0.7, int width = 96, int height = 96); + + // Aligning a face to a common reference frame using a similarity transform + void AlignFaceSimilarity(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, bool rigid = true, double scale = 0.7, int width = 96, int height = 96); + + // Aligning a face to a common reference frame using a similarity transform and masking out non-face area + void AlignFaceSimilarityMask(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, const cv::Mat_& triangulation, bool rigid = true, double scale = 0.7, int width = 96, int height = 96); + + // Align a face to a common reference frame using Piece-wise affine warping on triangles + void AlignFacePAW(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_& detected_landmarks, const LandmarkDetector::PDM& pdm, const cv::Mat_ triangulation, double scale = 0.7, int width = 96, int height = 96); void Extract_FHOG_descriptor(cv::Mat_& descriptor, const cv::Mat& image, int& num_rows, int& num_cols, int cell_size = 8); diff --git a/lib/local/FaceAnalyser/src/FaceAnalyser.cpp b/lib/local/FaceAnalyser/src/FaceAnalyser.cpp index 8fa93d9b..9a4c2472 100644 --- a/lib/local/FaceAnalyser/src/FaceAnalyser.cpp +++ b/lib/local/FaceAnalyser/src/FaceAnalyser.cpp @@ -64,6 +64,7 @@ FaceAnalyser::FaceAnalyser(const FaceAnalysis::FaceAnalyserParameters& face_anal this->Read(face_analyser_params.getModelLoc()); align_mask = face_analyser_params.getAlignMask(); + align_paw = face_analyser_params.getAlignPAW(); align_scale_out = face_analyser_params.getSimScaleOut(); align_width_out = face_analyser_params.getSimSizeOut(); align_height_out = face_analyser_params.getSimSizeOut(); @@ -258,22 +259,26 @@ void FaceAnalyser::PredictStaticAUsAndComputeFeatures(const cv::Mat& frame, cons pdm.CalcParams(params_global, params_local, detected_landmarks); // The aligned face requirement for AUs - AlignFaceMask(aligned_face_for_au, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_au, align_width_au, align_height_au); + AlignFaceSimilarityMask(aligned_face_for_au, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_au, align_width_au, align_height_au); // If the aligned face for AU matches the output requested one, just reuse it, else compute it - if (align_scale_out == align_scale_au && align_width_out == align_width_au && align_height_out == align_height_au && align_mask) + if (align_scale_out == align_scale_au && align_width_out == align_width_au && align_height_out == align_height_au && align_mask && !align_paw) { aligned_face_for_output = aligned_face_for_au.clone(); } else { - if (align_mask) + if (align_mask && !align_paw) { - AlignFaceMask(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_out, align_width_out, align_height_out); + AlignFaceSimilarityMask(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_out, align_width_out, align_height_out); + } + if (align_paw) + { + AlignFacePAW(aligned_face_for_output, frame, detected_landmarks, pdm, triangulation, align_scale_out, align_width_out, align_height_out); } else { - AlignFace(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, true, align_scale_out, align_width_out, align_height_out); + AlignFaceSimilarity(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, true, align_scale_out, align_width_out, align_height_out); } } @@ -340,22 +345,26 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const cv::Mat_& det pdm.CalcParams(params_global, params_local, detected_landmarks); // The aligned face requirement for AUs - AlignFaceMask(aligned_face_for_au, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_au, align_width_au, align_height_au); + AlignFaceSimilarityMask(aligned_face_for_au, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_au, align_width_au, align_height_au); // If the aligned face for AU matches the output requested one, just reuse it, else compute it - if (align_scale_out == align_scale_au && align_width_out == align_width_au && align_height_out == align_height_au && align_mask) + if (align_scale_out == align_scale_au && align_width_out == align_width_au && align_height_out == align_height_au && align_mask && !align_paw) { aligned_face_for_output = aligned_face_for_au.clone(); } else { - if (align_mask) + if (align_mask && !align_paw) { - AlignFaceMask(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_out, align_width_out, align_height_out); + AlignFaceSimilarityMask(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, triangulation, true, align_scale_out, align_width_out, align_height_out); + } + if (align_paw) + { + AlignFacePAW(aligned_face_for_output, frame, detected_landmarks, pdm, triangulation, align_scale_out, align_width_out, align_height_out); } else { - AlignFace(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, true, align_scale_out, align_width_out, align_height_out); + AlignFaceSimilarity(aligned_face_for_output, frame, detected_landmarks, params_global, pdm, true, align_scale_out, align_width_out, align_height_out); } } } diff --git a/lib/local/FaceAnalyser/src/FaceAnalyserParameters.cpp b/lib/local/FaceAnalyser/src/FaceAnalyserParameters.cpp index c5d3219b..01a16546 100644 --- a/lib/local/FaceAnalyser/src/FaceAnalyserParameters.cpp +++ b/lib/local/FaceAnalyser/src/FaceAnalyserParameters.cpp @@ -93,6 +93,11 @@ FaceAnalyserParameters::FaceAnalyserParameters(vector &arguments):root() sim_align_face_mask = false; valid[i] = false; } + else if (arguments[i].compare("-align_paw") == 0) + { + align_paw = true; + valid[i] = false; + } else if (arguments[i].compare("-simscale") == 0) { sim_scale_out = stod(arguments[i + 1]); @@ -161,6 +166,7 @@ void FaceAnalyserParameters::init() this->sim_scale_out = 0.7; this->sim_size_out = 112; this->sim_align_face_mask = true; + this->align_paw = false; this->model_location = "AU_predictors/main_dynamic_svms.txt"; @@ -190,7 +196,7 @@ void FaceAnalyserParameters::init() } // Use getters and setters for these as they might need to reload models and make sure the scale and size ratio makes sense -void FaceAnalyserParameters::setAlignedOutput(int output_size, double scale, bool masked) +void FaceAnalyserParameters::setAlignedOutput(int output_size, double scale, bool masked, bool paw) { this->sim_size_out = output_size; // If we set the size but not the scale, adapt the scale to the right size @@ -204,6 +210,7 @@ void FaceAnalyserParameters::setAlignedOutput(int output_size, double scale, boo } this->sim_align_face_mask = masked; + this->align_paw = paw; } // This will also change the model location diff --git a/lib/local/FaceAnalyser/src/Face_utils.cpp b/lib/local/FaceAnalyser/src/Face_utils.cpp index 9a924d0e..33c360eb 100644 --- a/lib/local/FaceAnalyser/src/Face_utils.cpp +++ b/lib/local/FaceAnalyser/src/Face_utils.cpp @@ -115,8 +115,8 @@ namespace FaceAnalysis } } - // Aligning a face to a common reference frame - void AlignFace(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, bool rigid, double sim_scale, int out_width, int out_height) + // Aligning a face to a common reference frame using a similarity transform + void AlignFaceSimilarity(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, bool rigid, double sim_scale, int out_width, int out_height) { // Will warp to scaled mean shape cv::Mat_ similarity_normalised_shape = pdm.mean_shape * sim_scale; @@ -154,8 +154,8 @@ namespace FaceAnalysis cv::warpAffine(frame, aligned_face, warp_matrix, cv::Size(out_width, out_height), cv::INTER_LINEAR); } - // Aligning a face to a common reference frame - void AlignFaceMask(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, const cv::Mat_& triangulation, bool rigid, double sim_scale, int out_width, int out_height) + // Aligning a face to a common reference frame using a similarity transform and masking out non-face area + void AlignFaceSimilarityMask(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_& detected_landmarks, cv::Vec6f params_global, const LandmarkDetector::PDM& pdm, const cv::Mat_& triangulation, bool rigid, double sim_scale, int out_width, int out_height) { // Will warp to scaled mean shape cv::Mat_ similarity_normalised_shape = pdm.mean_shape * sim_scale; @@ -217,7 +217,7 @@ namespace FaceAnalysis destination_landmarks = cv::Mat(destination_landmarks.t()).reshape(1, 1).t(); - LandmarkDetector::PAW paw(destination_landmarks, triangulation, 0, 0, aligned_face.cols-1, aligned_face.rows-1); + LandmarkDetector::PAW paw(destination_landmarks, triangulation, 0,0, aligned_face.cols-1, aligned_face.rows-1, aligned_face.cols, aligned_face.rows); // Mask each of the channels (a bit of a roundabout way, but OpenCV 3.1 in debug mode doesn't seem to be able to handle a more direct way using split and merge) vector aligned_face_channels(aligned_face.channels()); @@ -243,6 +243,39 @@ namespace FaceAnalysis } } + // Align a face to a common reference frame using Piece-wise affine warping on triangles + void AlignFacePAW(cv::Mat& aligned_face, const cv::Mat& frame, const cv::Mat_& source_landmarks, const LandmarkDetector::PDM& pdm, const cv::Mat_ triangulation, double scale, int width, int height) + { + // Will warp to scaled mean shape + cv::Mat_ similarity_normalised_shape = pdm.mean_shape * scale; + + int num_verts = pdm.NumberOfPoints(); + + // Discard the z component + similarity_normalised_shape = similarity_normalised_shape(cv::Rect(0, 0, 1, 2 * num_verts)).clone(); + + // Center around output image + double min_x; + double max_x; + cv::minMaxLoc(similarity_normalised_shape(cv::Rect(0, 0, 1, num_verts)), &min_x, &max_x); + + double min_y; + double max_y; + cv::minMaxLoc(similarity_normalised_shape(cv::Rect(0, num_verts, 1, num_verts)), &min_y, &max_y); + + float add_x = width / 2.0f - (max_x + min_x) / 2.0f; + float add_y = height / 2.0f - (max_x + min_x) / 2.0f; + + similarity_normalised_shape(cv::Rect(0, 0, 1, num_verts)) += add_x; + similarity_normalised_shape(cv::Rect(0, num_verts, 1, num_verts)) += add_y; + + LandmarkDetector::PAW paw(similarity_normalised_shape, triangulation, 0, 0, width, height, width, height); + cv::Mat warped; + + paw.Warp(frame, aligned_face, source_landmarks); + + } + // Create a row vector Felzenszwalb HOG descriptor from a given image void Extract_FHOG_descriptor(cv::Mat_& descriptor, const cv::Mat& image, int& num_rows, int& num_cols, int cell_size) { diff --git a/lib/local/LandmarkDetector/include/PAW.h b/lib/local/LandmarkDetector/include/PAW.h index e4b24f81..9145f6d1 100644 --- a/lib/local/LandmarkDetector/include/PAW.h +++ b/lib/local/LandmarkDetector/include/PAW.h @@ -102,8 +102,8 @@ namespace LandmarkDetector PAW(const cv::Mat_& destination_shape, const cv::Mat_& triangulation); // The final optional argument allows for optimisation if the triangle indices from previous frame are known (for tracking in video) - PAW(const cv::Mat_& destination_shape, const cv::Mat_& triangulation, float in_min_x, float in_min_y, float in_max_x, float in_max_y); - + PAW(const cv::Mat_& destination_shape, const cv::Mat_& triangulation, float in_min_x, float in_min_y, float in_max_x, float in_max_y, int out_width, int out_height); + // Copy constructor PAW(const PAW& other); diff --git a/lib/local/LandmarkDetector/src/PAW.cpp b/lib/local/LandmarkDetector/src/PAW.cpp index 974184b3..015a7bd2 100644 --- a/lib/local/LandmarkDetector/src/PAW.cpp +++ b/lib/local/LandmarkDetector/src/PAW.cpp @@ -166,7 +166,7 @@ PAW::PAW(const cv::Mat_& destination_shape, const cv::Mat_& triangul } // Manually define min and max values -PAW::PAW(const cv::Mat_& destination_shape, const cv::Mat_& triangulation, float in_min_x, float in_min_y, float in_max_x, float in_max_y) +PAW::PAW(const cv::Mat_& destination_shape, const cv::Mat_& triangulation, float in_min_x, float in_min_y, float in_max_x, float in_max_y, int out_width, int out_height) { // Initialise some variables directly this->destination_landmarks = destination_shape; @@ -242,14 +242,11 @@ PAW::PAW(const cv::Mat_& destination_shape, const cv::Mat_& triangul max_x = in_max_x; max_y = in_max_y; - - int w = (int)(max_x - min_x + 1.5); - int h = (int)(max_y - min_y + 1.5); - + // Round the min_x and min_y for simplicity? - pixel_mask = cv::Mat_(h, w, (uchar)0); - triangle_id = cv::Mat_(h, w, -1); + pixel_mask = cv::Mat_(out_height, out_width, (uchar)0); + triangle_id = cv::Mat_(out_height, out_width, -1); int curr_tri = -1;