From aa2b0c9def120c78bfdc1bbdcf15b2206b7aff0e Mon Sep 17 00:00:00 2001 From: Tadas Baltrusaitis Date: Tue, 27 Feb 2018 08:08:02 +0000 Subject: [PATCH] Adapting eye gaze script, moving to new multi-face interface, a bug fix in rotation conversion --- exe/FaceLandmarkImg/FaceLandmarkImg.cpp | 13 ++- exe/FaceLandmarkVid/FaceLandmarkVid.cpp | 42 +++---- .../FaceLandmarkVidMulti.cpp | 104 ++++++++++++++---- .../FaceLandmarkVidMulti.vcxproj | 14 ++- exe/FeatureExtraction/FeatureExtraction.cpp | 24 +++- lib/local/Utilities/include/RecorderCSV.h | 4 +- .../Utilities/include/RecorderOpenFace.h | 13 ++- .../include/RecorderOpenFaceParameters.h | 2 + lib/local/Utilities/include/RotationHelpers.h | 5 +- lib/local/Utilities/include/SequenceCapture.h | 2 + lib/local/Utilities/src/RecorderCSV.cpp | 9 +- lib/local/Utilities/src/RecorderOpenFace.cpp | 29 +++-- .../src/RecorderOpenFaceParameters.cpp | 54 ++++----- lib/local/Utilities/src/SequenceCapture.cpp | 21 +++- .../Gaze Experiments/extract_mpii_gaze_test.m | 61 +++++----- .../Gaze Experiments/mpii_1500_errs.txt | 2 +- 16 files changed, 266 insertions(+), 133 deletions(-) diff --git a/exe/FaceLandmarkImg/FaceLandmarkImg.cpp b/exe/FaceLandmarkImg/FaceLandmarkImg.cpp index d2a44cdb..7b4e3657 100644 --- a/exe/FaceLandmarkImg/FaceLandmarkImg.cpp +++ b/exe/FaceLandmarkImg/FaceLandmarkImg.cpp @@ -85,6 +85,14 @@ int main(int argc, char **argv) //Convert arguments to more convenient vector form vector arguments = get_arguments(argc, argv); + // no arguments: output usage + if (arguments.size() == 1) + { + cout << "For command line arguments see:" << endl; + cout << " https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments"; + return 0; + } + // Prepare for image reading Utilities::ImageCapture image_reader; @@ -119,11 +127,11 @@ int main(int argc, char **argv) cv::Mat rgb_image; rgb_image = image_reader.GetNextImage(); - + cout << "Starting tracking" << endl; while (!rgb_image.empty()) { - + Utilities::RecorderOpenFaceParameters recording_params(arguments, false, false, image_reader.fx, image_reader.fy, image_reader.cx, image_reader.cy); Utilities::RecorderOpenFace open_face_rec(image_reader.name, recording_params, arguments); @@ -211,6 +219,7 @@ int main(int argc, char **argv) open_face_rec.SetObservationPose(pose_estimate); open_face_rec.SetObservationGaze(gaze_direction0, gaze_direction1, gaze_angle, LandmarkDetector::CalculateAllEyeLandmarks(face_model), LandmarkDetector::Calculate3DEyeLandmarks(face_model, image_reader.fx, image_reader.fy, image_reader.cx, image_reader.cy)); open_face_rec.SetObservationFaceAlign(sim_warped_img); + open_face_rec.SetObservationFaceID(face); open_face_rec.WriteObservation(); } diff --git a/exe/FaceLandmarkVid/FaceLandmarkVid.cpp b/exe/FaceLandmarkVid/FaceLandmarkVid.cpp index 9c436336..11d85933 100644 --- a/exe/FaceLandmarkVid/FaceLandmarkVid.cpp +++ b/exe/FaceLandmarkVid/FaceLandmarkVid.cpp @@ -91,6 +91,14 @@ int main(int argc, char **argv) vector arguments = get_arguments(argc, argv); + // no arguments: output usage + if (arguments.size() == 1) + { + cout << "For command line arguments see:" << endl; + cout << " https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments"; + return 0; + } + LandmarkDetector::FaceModelParameters det_parameters(arguments); // The modules that are being used for tracking @@ -113,37 +121,21 @@ int main(int argc, char **argv) // The sequence reader chooses what to open based on command line arguments provided if (!sequence_reader.Open(arguments)) - { - // If failed to open because no input files specified, attempt to open a webcam - if (sequence_reader.no_input_specified && sequence_number == 0) - { - // If that fails, revert to webcam - INFO_STREAM("No input specified, attempting to open a webcam 0 at 640 x 480px"); - if (!sequence_reader.OpenWebcam(0, 640, 480)) - { - ERROR_STREAM("Failed to open the webcam"); - break; - } - } - else - { - // Either reached the end of sequences provided or failed to open them - break; - } - } + break; + INFO_STREAM("Device or file opened"); - cv::Mat rgb_image = sequence_reader.GetNextFrame(); + cv::Mat captured_image = sequence_reader.GetNextFrame(); INFO_STREAM("Starting tracking"); - while (!rgb_image.empty()) // this is not a for loop as we might also be reading from a webcam + while (!captured_image.empty()) // this is not a for loop as we might also be reading from a webcam { - // Reading the grayscale image as well (face detection is done in RGB, landmark detection in grayscale) + // Reading the images cv::Mat_ grayscale_image = sequence_reader.GetGrayFrame(); // The actual facial landmark detection / tracking - bool detection_success = LandmarkDetector::DetectLandmarksInVideo(rgb_image, face_model, det_parameters, grayscale_image); + bool detection_success = LandmarkDetector::DetectLandmarksInVideo(grayscale_image, face_model, det_parameters); // Gaze tracking, absolute gaze direction cv::Point3f gazeDirection0(0, 0, -1); @@ -157,13 +149,13 @@ int main(int argc, char **argv) } // Work out the pose of the head from the tracked model - cv::Vec6f pose_estimate = LandmarkDetector::GetPose(face_model, sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy); + cv::Vec6d pose_estimate = LandmarkDetector::GetPose(face_model, sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy); // Keeping track of FPS fps_tracker.AddFrame(); // Displaying the tracking visualizations - visualizer.SetImage(rgb_image, sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy); + visualizer.SetImage(captured_image, sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy); visualizer.SetObservationLandmarks(face_model.detected_landmarks, face_model.detection_certainty, face_model.GetVisibilities()); visualizer.SetObservationPose(pose_estimate, face_model.detection_certainty); visualizer.SetObservationGaze(gazeDirection0, gazeDirection1, LandmarkDetector::CalculateAllEyeLandmarks(face_model), LandmarkDetector::Calculate3DEyeLandmarks(face_model, sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy), face_model.detection_certainty); @@ -183,7 +175,7 @@ int main(int argc, char **argv) } // Grabbing the next frame in the sequence - rgb_image = sequence_reader.GetNextFrame(); + captured_image = sequence_reader.GetNextFrame(); } diff --git a/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.cpp b/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.cpp index cf72623a..c76dcfb2 100644 --- a/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.cpp +++ b/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.cpp @@ -39,6 +39,10 @@ #include "VisualizationUtils.h" #include "Visualizer.h" #include "SequenceCapture.h" +#include +#include +#include +#include #include #include @@ -110,6 +114,13 @@ int main(int argc, char **argv) vector arguments = get_arguments(argc, argv); + // no arguments: output usage + if (arguments.size() == 1) + { + cout << "For command line arguments see:" << endl; + cout << " https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments"; + return 0; + } LandmarkDetector::FaceModelParameters det_params(arguments); // This is so that the model would not try re-initialising itself @@ -146,6 +157,11 @@ int main(int argc, char **argv) det_parameters.push_back(det_params); } + // Load facial feature extractor and AU analyser (make sure it is static, as we don't reidentify faces) + FaceAnalysis::FaceAnalyserParameters face_analysis_params(arguments); + face_analysis_params.OptimizeForImages(); + FaceAnalysis::FaceAnalyser face_analyser(face_analysis_params); + // Open a sequence Utilities::SequenceCapture sequence_reader; @@ -160,32 +176,36 @@ int main(int argc, char **argv) while (true) // this is not a for loop as we might also be reading from a webcam { - // The sequence reader chooses what to open based on command line arguments provided if (!sequence_reader.Open(arguments)) - { - // If failed to open because no input files specified, attempt to open a webcam - if (sequence_reader.no_input_specified && sequence_number == 0) - { - // If that fails, revert to webcam - INFO_STREAM("No input specified, attempting to open a webcam 0"); - if (!sequence_reader.OpenWebcam(0)) - { - ERROR_STREAM("Failed to open the webcam"); - break; - } - } - else - { - break; - } - } + break; + INFO_STREAM("Device or file opened"); cv::Mat rgb_image = sequence_reader.GetNextFrame(); int frame_count = 0; + Utilities::RecorderOpenFaceParameters recording_params(arguments, true, sequence_reader.IsWebcam(), + sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy, sequence_reader.fps); + // Do not do AU detection on multi-face case as it is not supported + recording_params.setOutputAUs(false); + Utilities::RecorderOpenFace open_face_rec(sequence_reader.name, recording_params, arguments); + + if (recording_params.outputGaze() && !face_model.eye_model) + cout << "WARNING: no eye model defined, but outputting gaze" << endl; + + if (sequence_reader.IsWebcam()) + { + INFO_STREAM("WARNING: using a webcam in feature extraction, forcing visualization of tracking to allow quitting the application (press q)"); + visualizer.vis_track = true; + } + + if (recording_params.outputAUs()) + { + INFO_STREAM("WARNING: using a AU detection in multiple face mode, it might not be as accurate and is experimental"); + } + INFO_STREAM("Starting tracking"); while (!rgb_image.empty()) { @@ -282,14 +302,60 @@ int main(int argc, char **argv) visualizer.SetImage(rgb_image, sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy); - // Go through every model and visualise the results + // Go through every model and detect eye gaze, record results and visualise the results for (size_t model = 0; model < face_models.size(); ++model) { // Visualising the results if (active_models[model]) { + + // Estimate head pose and eye gaze + cv::Vec6d pose_estimate = LandmarkDetector::GetPose(face_models[model], sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy); + + cv::Point3f gaze_direction0(0, 0, 0); cv::Point3f gaze_direction1(0, 0, 0); cv::Vec2d gaze_angle(0, 0); + + // Detect eye gazes + if (face_models[model].detection_success && face_model.eye_model) + { + GazeAnalysis::EstimateGaze(face_models[model], gaze_direction0, sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy, true); + GazeAnalysis::EstimateGaze(face_models[model], gaze_direction1, sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy, false); + gaze_angle = GazeAnalysis::GetGazeAngle(gaze_direction0, gaze_direction1); + } + + // Face analysis step + cv::Mat sim_warped_img; + cv::Mat_ hog_descriptor; int num_hog_rows = 0, num_hog_cols = 0; + + // Perform AU detection and HOG feature extraction, as this can be expensive only compute it if needed by output or visualization + if (recording_params.outputAlignedFaces() || recording_params.outputHOG() || recording_params.outputAUs() || visualizer.vis_align || visualizer.vis_hog) + { + face_analyser.PredictStaticAUsAndComputeFeatures(rgb_image, face_models[model].detected_landmarks); + face_analyser.GetLatestAlignedFace(sim_warped_img); + face_analyser.GetLatestHOG(hog_descriptor, num_hog_rows, num_hog_cols); + } + + // Visualize the features + visualizer.SetObservationFaceAlign(sim_warped_img); + visualizer.SetObservationHOG(hog_descriptor, num_hog_rows, num_hog_cols); visualizer.SetObservationLandmarks(face_models[model].detected_landmarks, face_models[model].detection_certainty); visualizer.SetObservationPose(LandmarkDetector::GetPose(face_models[model], sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy), face_models[model].detection_certainty); + visualizer.SetObservationGaze(gaze_direction0, gaze_direction1, LandmarkDetector::CalculateAllEyeLandmarks(face_models[model]), LandmarkDetector::Calculate3DEyeLandmarks(face_models[model], sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy), face_models[model].detection_certainty); + + // Output features + open_face_rec.SetObservationHOG(face_models[model].detection_success, hog_descriptor, num_hog_rows, num_hog_cols, 31); // The number of channels in HOG is fixed at the moment, as using FHOG + open_face_rec.SetObservationVisualization(visualizer.GetVisImage()); + open_face_rec.SetObservationActionUnits(face_analyser.GetCurrentAUsReg(), face_analyser.GetCurrentAUsClass()); + open_face_rec.SetObservationLandmarks(face_models[model].detected_landmarks, face_models[model].GetShape(sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy), + face_models[model].params_global, face_models[model].params_local, face_models[model].detection_certainty, face_models[model].detection_success); + open_face_rec.SetObservationPose(pose_estimate); + open_face_rec.SetObservationGaze(gaze_direction0, gaze_direction1, gaze_angle, LandmarkDetector::CalculateAllEyeLandmarks(face_models[model]), LandmarkDetector::Calculate3DEyeLandmarks(face_models[model], sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy)); + open_face_rec.SetObservationFaceAlign(sim_warped_img); + open_face_rec.SetObservationFaceID(model); + open_face_rec.SetObservationTimestamp(sequence_reader.time_stamp); + open_face_rec.SetObservationFrameNumber(sequence_reader.GetFrameNumber()); + open_face_rec.WriteObservation(); + + } } visualizer.SetFps(fps_tracker.GetFPS()); diff --git a/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.vcxproj b/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.vcxproj index dddc313e..da0802d1 100644 --- a/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.vcxproj +++ b/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.vcxproj @@ -105,7 +105,7 @@ Level3 Disabled - $(SolutionDir)\lib\local\LandmarkDetector\include;$(SolutionDir)\lib\local\Utilities\include;%(AdditionalIncludeDirectories) + $(SolutionDir)\lib\local\LandmarkDetector\include;$(SolutionDir)\lib\local\Utilities\include;$(SolutionDir)\lib\local\FaceAnalyser\include;$(SolutionDir)\lib\local\GazeAnalyser\include;%(AdditionalIncludeDirectories) StreamingSIMDExtensions2 true @@ -117,7 +117,7 @@ Level3 Disabled - $(SolutionDir)\lib\local\LandmarkDetector\include;$(SolutionDir)\lib\local\Utilities\include;%(AdditionalIncludeDirectories) + $(SolutionDir)\lib\local\LandmarkDetector\include;$(SolutionDir)\lib\local\Utilities\include;$(SolutionDir)\lib\local\FaceAnalyser\include;$(SolutionDir)\lib\local\GazeAnalyser\include;%(AdditionalIncludeDirectories) AdvancedVectorExtensions true @@ -131,7 +131,7 @@ MaxSpeed true true - $(SolutionDir)\lib\local\LandmarkDetector\include;$(SolutionDir)\lib\local\Utilities\include;%(AdditionalIncludeDirectories) + $(SolutionDir)\lib\local\LandmarkDetector\include;$(SolutionDir)\lib\local\Utilities\include;$(SolutionDir)\lib\local\FaceAnalyser\include;$(SolutionDir)\lib\local\GazeAnalyser\include;%(AdditionalIncludeDirectories) StreamingSIMDExtensions2 MultiThreadedDLL true @@ -148,7 +148,7 @@ MaxSpeed true true - $(SolutionDir)\lib\local\LandmarkDetector\include;$(SolutionDir)\lib\local\Utilities\include;%(AdditionalIncludeDirectories) + $(SolutionDir)\lib\local\LandmarkDetector\include;$(SolutionDir)\lib\local\Utilities\include;$(SolutionDir)\lib\local\FaceAnalyser\include;$(SolutionDir)\lib\local\GazeAnalyser\include;%(AdditionalIncludeDirectories) AdvancedVectorExtensions MultiThreadedDLL true @@ -163,6 +163,12 @@ + + {0e7fc556-0e80-45ea-a876-dde4c2fedcd7} + + + {5f915541-f531-434f-9c81-79f5db58012b} + {bdc1d107-de17-4705-8e7b-cdde8bfb2bf8} diff --git a/exe/FeatureExtraction/FeatureExtraction.cpp b/exe/FeatureExtraction/FeatureExtraction.cpp index 1d0710a5..0b0f43a7 100644 --- a/exe/FeatureExtraction/FeatureExtraction.cpp +++ b/exe/FeatureExtraction/FeatureExtraction.cpp @@ -103,6 +103,14 @@ int main(int argc, char **argv) vector arguments = get_arguments(argc, argv); + // no arguments: output usage + if (arguments.size() == 1) + { + cout << "For command line arguments see:" << endl; + cout << " https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments"; + return 0; + } + // Load the modules that are being used for tracking and face analysis // Load face landmark detector LandmarkDetector::FaceModelParameters det_parameters(arguments); @@ -138,6 +146,8 @@ int main(int argc, char **argv) visualizer.vis_track = true; } + cv::Mat captured_image; + Utilities::RecorderOpenFaceParameters recording_params(arguments, true, sequence_reader.IsWebcam(), sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy, sequence_reader.fps); Utilities::RecorderOpenFace open_face_rec(sequence_reader.name, recording_params, arguments); @@ -145,20 +155,20 @@ int main(int argc, char **argv) if (recording_params.outputGaze() && !face_model.eye_model) cout << "WARNING: no eye model defined, but outputting gaze" << endl; - cv::Mat rgb_image = sequence_reader.GetNextFrame(); + captured_image = sequence_reader.GetNextFrame(); // For reporting progress double reported_completion = 0; INFO_STREAM("Starting tracking"); - while (!rgb_image.empty()) + while (!captured_image.empty()) { // Converting to grayscale cv::Mat_ grayscale_image = sequence_reader.GetGrayFrame(); // The actual facial landmark detection / tracking - bool detection_success = LandmarkDetector::DetectLandmarksInVideo(rgb_image, face_model, det_parameters, grayscale_image); + bool detection_success = LandmarkDetector::DetectLandmarksInVideo(grayscale_image, face_model, det_parameters); // Gaze tracking, absolute gaze direction cv::Point3f gazeDirection0(0, 0, 0); cv::Point3f gazeDirection1(0, 0, 0); cv::Vec2d gazeAngle(0, 0); @@ -177,7 +187,7 @@ int main(int argc, char **argv) // Perform AU detection and HOG feature extraction, as this can be expensive only compute it if needed by output or visualization if (recording_params.outputAlignedFaces() || recording_params.outputHOG() || recording_params.outputAUs() || visualizer.vis_align || visualizer.vis_hog) { - face_analyser.AddNextFrame(rgb_image, face_model.detected_landmarks, face_model.detection_success, sequence_reader.time_stamp, sequence_reader.IsWebcam()); + face_analyser.AddNextFrame(captured_image, face_model.detected_landmarks, face_model.detection_success, sequence_reader.time_stamp, sequence_reader.IsWebcam()); face_analyser.GetLatestAlignedFace(sim_warped_img); face_analyser.GetLatestHOG(hog_descriptor, num_hog_rows, num_hog_cols); } @@ -189,7 +199,7 @@ int main(int argc, char **argv) fps_tracker.AddFrame(); // Displaying the tracking visualizations - visualizer.SetImage(rgb_image, sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy); + visualizer.SetImage(captured_image, sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy); visualizer.SetObservationFaceAlign(sim_warped_img); visualizer.SetObservationHOG(hog_descriptor, num_hog_rows, num_hog_cols); visualizer.SetObservationLandmarks(face_model.detected_landmarks, face_model.detection_certainty, face_model.GetVisibilities()); @@ -215,6 +225,8 @@ int main(int argc, char **argv) open_face_rec.SetObservationPose(pose_estimate); open_face_rec.SetObservationGaze(gazeDirection0, gazeDirection1, gazeAngle, LandmarkDetector::CalculateAllEyeLandmarks(face_model), LandmarkDetector::Calculate3DEyeLandmarks(face_model, sequence_reader.fx, sequence_reader.fy, sequence_reader.cx, sequence_reader.cy)); open_face_rec.SetObservationTimestamp(sequence_reader.time_stamp); + open_face_rec.SetObservationFaceID(0); + open_face_rec.SetObservationFrameNumber(sequence_reader.GetFrameNumber()); open_face_rec.SetObservationFaceAlign(sim_warped_img); open_face_rec.WriteObservation(); @@ -230,7 +242,7 @@ int main(int argc, char **argv) } // Grabbing the next frame in the sequence - rgb_image = sequence_reader.GetNextFrame(); + captured_image = sequence_reader.GetNextFrame(); } diff --git a/lib/local/Utilities/include/RecorderCSV.h b/lib/local/Utilities/include/RecorderCSV.h index 1052e4f6..a388a69f 100644 --- a/lib/local/Utilities/include/RecorderCSV.h +++ b/lib/local/Utilities/include/RecorderCSV.h @@ -60,10 +60,12 @@ namespace Utilities bool Open(std::string output_file_name, bool is_sequence, bool output_2D_landmarks, bool output_3D_landmarks, bool output_model_params, bool output_pose, bool output_AUs, bool output_gaze, int num_face_landmarks, int num_model_modes, int num_eye_landmarks, const std::vector& au_names_class, const std::vector& au_names_reg); + bool isOpen() const { return output_file.is_open(); } + // Closing the file and cleaning up void Close(); - void WriteLine(int observation_count, double time_stamp, bool landmark_detection_success, float landmark_confidence, + void WriteLine(int face_id, int frame_num, double time_stamp, bool landmark_detection_success, double landmark_confidence, const cv::Mat_& landmarks_2D, const cv::Mat_& landmarks_3D, const cv::Mat_& pdm_model_params, const cv::Vec6f& rigid_shape_params, cv::Vec6f& pose_estimate, const cv::Point3f& gazeDirection0, const cv::Point3f& gazeDirection1, const cv::Vec2f& gaze_angle, const std::vector& eye_landmarks2d, const std::vector& eye_landmarks3d, const std::vector >& au_intensities, const std::vector >& au_occurences); diff --git a/lib/local/Utilities/include/RecorderOpenFace.h b/lib/local/Utilities/include/RecorderOpenFace.h index a04adda1..69ccb386 100644 --- a/lib/local/Utilities/include/RecorderOpenFace.h +++ b/lib/local/Utilities/include/RecorderOpenFace.h @@ -70,9 +70,15 @@ namespace Utilities // Required observations for video/image-sequence void SetObservationTimestamp(double timestamp); + // Required observations for video/image-sequence + void SetObservationFrameNumber(int frame_number); + + // If in multiple face mode, identifying which face was tracked + void SetObservationFaceID(int face_id); + // All observations relevant to facial landmarks void SetObservationLandmarks(const cv::Mat_& landmarks_2D, const cv::Mat_& landmarks_3D, - const cv::Vec6f& params_global, const cv::Mat_& params_local, float confidence, bool success); + const cv::Vec6f& params_global, const cv::Mat_& params_local, double confidence, bool success); // Pose related observations void SetObservationPose(const cv::Vec6f& pose); @@ -123,7 +129,10 @@ namespace Utilities RecorderHOG hog_recorder; // The actual temporary storage for the observations + double timestamp; + int face_id; + int frame_number; // Facial landmark related observations cv::Mat_ landmarks_2D; @@ -147,8 +156,6 @@ namespace Utilities std::vector eye_landmarks2D; std::vector eye_landmarks3D; - int observation_count; - // For video writing cv::VideoWriter video_writer; std::string media_filename; diff --git a/lib/local/Utilities/include/RecorderOpenFaceParameters.h b/lib/local/Utilities/include/RecorderOpenFaceParameters.h index 2f791e7a..8d6b1760 100644 --- a/lib/local/Utilities/include/RecorderOpenFaceParameters.h +++ b/lib/local/Utilities/include/RecorderOpenFaceParameters.h @@ -77,6 +77,8 @@ namespace Utilities float getCx() const { return cx; } float getCy() const { return cy; } + void setOutputAUs(bool output_AUs) { this->output_AUs = output_AUs; } + private: // If we are recording results from a sequence each row refers to a frame, if we are recording an image each row is a face diff --git a/lib/local/Utilities/include/RotationHelpers.h b/lib/local/Utilities/include/RotationHelpers.h index 34829a57..e5dc2323 100644 --- a/lib/local/Utilities/include/RotationHelpers.h +++ b/lib/local/Utilities/include/RotationHelpers.h @@ -77,9 +77,12 @@ namespace Utilities float q2 = (rotation_matrix(0, 2) - rotation_matrix(2, 0)) / (4.0f*q0); float q3 = (rotation_matrix(1, 0) - rotation_matrix(0, 1)) / (4.0f*q0); + // Slower, but dealing with degenerate cases due to precision float t1 = 2.0f * (q0*q2 + q1*q3); + if (t1 > 1) t1 = 1.0f; + if (t1 < -1) t1 = -1.0f; - float yaw = asin(2.0 * (q0*q2 + q1*q3)); + float yaw = asin(t1); float pitch = atan2(2.0 * (q0*q1 - q2*q3), q0*q0 - q1*q1 - q2*q2 + q3*q3); float roll = atan2(2.0 * (q0*q3 - q1*q2), q0*q0 + q1*q1 - q2*q2 - q3*q3); diff --git a/lib/local/Utilities/include/SequenceCapture.h b/lib/local/Utilities/include/SequenceCapture.h index 422faffc..daafbacf 100644 --- a/lib/local/Utilities/include/SequenceCapture.h +++ b/lib/local/Utilities/include/SequenceCapture.h @@ -85,6 +85,8 @@ namespace Utilities // Parameters describing the sequence and it's progress double GetProgress(); + int GetFrameNumber() { return frame_num; } + bool IsOpened(); void Close(); diff --git a/lib/local/Utilities/src/RecorderCSV.cpp b/lib/local/Utilities/src/RecorderCSV.cpp index c3e52c71..2f190e11 100644 --- a/lib/local/Utilities/src/RecorderCSV.cpp +++ b/lib/local/Utilities/src/RecorderCSV.cpp @@ -78,7 +78,7 @@ bool RecorderCSV::Open(std::string output_file_name, bool is_sequence, bool outp // Different headers if we are writing out the results on a sequence or an individual image if(this->is_sequence) { - output_file << "frame, timestamp, confidence, success"; + output_file << "frame, face_id, timestamp, confidence, success"; } else { @@ -176,7 +176,7 @@ bool RecorderCSV::Open(std::string output_file_name, bool is_sequence, bool outp } -void RecorderCSV::WriteLine(int observation_count, double time_stamp, bool landmark_detection_success, float landmark_confidence, +void RecorderCSV::WriteLine(int face_id, int frame_num, double time_stamp, bool landmark_detection_success, double landmark_confidence, const cv::Mat_& landmarks_2D, const cv::Mat_& landmarks_3D, const cv::Mat_& pdm_model_params, const cv::Vec6f& rigid_shape_params, cv::Vec6f& pose_estimate, const cv::Point3f& gazeDirection0, const cv::Point3f& gazeDirection1, const cv::Vec2f& gaze_angle, const std::vector& eye_landmarks2d, const std::vector& eye_landmarks3d, const std::vector >& au_intensities, const std::vector >& au_occurences) @@ -193,8 +193,9 @@ void RecorderCSV::WriteLine(int observation_count, double time_stamp, bool landm output_file << std::noshowpoint; if(is_sequence) { + output_file << std::setprecision(3); - output_file << observation_count << ", " << time_stamp; + output_file << frame_num << ", " << face_id << ", " << time_stamp; output_file << std::setprecision(2); output_file << ", " << landmark_confidence; output_file << std::setprecision(0); @@ -203,7 +204,7 @@ void RecorderCSV::WriteLine(int observation_count, double time_stamp, bool landm else { output_file << std::setprecision(3); - output_file << observation_count << ", " << landmark_confidence; + output_file << face_id << ", " << landmark_confidence; } // Output the estimated gaze if (output_gaze) diff --git a/lib/local/Utilities/src/RecorderOpenFace.cpp b/lib/local/Utilities/src/RecorderOpenFace.cpp index faa4586a..3d279009 100644 --- a/lib/local/Utilities/src/RecorderOpenFace.cpp +++ b/lib/local/Utilities/src/RecorderOpenFace.cpp @@ -153,7 +153,7 @@ void RecorderOpenFace::PrepareRecording(const std::string& in_filename) CreateDirectory(aligned_output_directory); } - observation_count = 0; + this->frame_number = 0; } @@ -275,11 +275,9 @@ void RecorderOpenFace::SetObservationVisualization(const cv::Mat &vis_track) void RecorderOpenFace::WriteObservation() { - observation_count++; - // Write out the CSV file (it will always be there, even if not outputting anything more but frame/face numbers) - - if(observation_count == 1) + // Write out the CSV file (it will always be there, even if not outputting anything more but frame/face numbers) + if(!csv_recorder.isOpen()) { // As we are writing out the header, work out some things like number of landmarks, names of AUs etc. int num_face_landmarks = landmarks_2D.rows / 2; @@ -315,7 +313,7 @@ void RecorderOpenFace::WriteObservation() params.outputAUs(), params.outputGaze(), num_face_landmarks, num_model_modes, num_eye_landmarks, au_names_class, au_names_reg); } - this->csv_recorder.WriteLine(observation_count, timestamp, landmark_detection_success, + this->csv_recorder.WriteLine(face_id, frame_number, timestamp, landmark_detection_success, landmark_detection_confidence, landmarks_2D, landmarks_3D, pdm_params_local, pdm_params_global, head_pose, gaze_direction0, gaze_direction1, gaze_angle, eye_landmarks2D, eye_landmarks3D, au_intensities, au_occurences); @@ -331,9 +329,9 @@ void RecorderOpenFace::WriteObservation() // Filename is based on frame number if(params.isSequence()) - std::sprintf(name, "frame_det_%06d.bmp", observation_count); + std::sprintf(name, "frame_det_%02d_%06d.bmp", face_id, frame_number); else - std::sprintf(name, "face_det_%06d.bmp", observation_count); + std::sprintf(name, "face_det_%06d.bmp", face_id); // Construct the output filename boost::filesystem::path slash("/"); @@ -387,8 +385,21 @@ void RecorderOpenFace::SetObservationTimestamp(double timestamp) this->timestamp = timestamp; } +// Required observations for video/image-sequence +void RecorderOpenFace::SetObservationFrameNumber(int frame_number) +{ + this->frame_number = frame_number; +} + +// If in multiple face mode, identifying which face was tracked +void RecorderOpenFace::SetObservationFaceID(int face_id) +{ + this->face_id = face_id; +} + + void RecorderOpenFace::SetObservationLandmarks(const cv::Mat_& landmarks_2D, const cv::Mat_& landmarks_3D, - const cv::Vec6f& pdm_params_global, const cv::Mat_& pdm_params_local, float confidence, bool success) + const cv::Vec6f& pdm_params_global, const cv::Mat_& pdm_params_local, double confidence, bool success) { this->landmarks_2D = landmarks_2D; this->landmarks_3D = landmarks_3D; diff --git a/lib/local/Utilities/src/RecorderOpenFaceParameters.cpp b/lib/local/Utilities/src/RecorderOpenFaceParameters.cpp index 2afa45ee..259cabd8 100644 --- a/lib/local/Utilities/src/RecorderOpenFaceParameters.cpp +++ b/lib/local/Utilities/src/RecorderOpenFaceParameters.cpp @@ -60,61 +60,61 @@ RecorderOpenFaceParameters::RecorderOpenFaceParameters(std::vector bool output_set = false; - output_2D_landmarks = false; - output_3D_landmarks = false; - output_model_params = false; - output_pose = false; - output_AUs = false; - output_gaze = false; - output_hog = false; - output_tracked = false; - output_aligned_faces = false; + this->output_2D_landmarks = false; + this->output_3D_landmarks = false; + this->output_model_params = false; + this->output_pose = false; + this->output_AUs = false; + this->output_gaze = false; + this->output_hog = false; + this->output_tracked = false; + this->output_aligned_faces = false; for (size_t i = 0; i < arguments.size(); ++i) { if (arguments[i].compare("-simalign") == 0) { - output_aligned_faces = true; + this->output_aligned_faces = true; output_set = true; } else if (arguments[i].compare("-hogalign") == 0) { - output_hog = true; + this->output_hog = true; output_set = true; } else if (arguments[i].compare("-2Dfp") == 0) { - output_2D_landmarks = true; + this->output_2D_landmarks = true; output_set = true; } else if (arguments[i].compare("-3Dfp") == 0) { - output_3D_landmarks = true; + this->output_3D_landmarks = true; output_set = true; } else if (arguments[i].compare("-pdmparams") == 0) { - output_model_params = true; + this->output_model_params = true; output_set = true; } else if (arguments[i].compare("-pose") == 0) { - output_pose = true; + this->output_pose = true; output_set = true; } else if (arguments[i].compare("-aus") == 0) { - output_AUs = true; + this->output_AUs = true; output_set = true; } else if (arguments[i].compare("-gaze") == 0) { - output_gaze = true; + this->output_gaze = true; output_set = true; } else if (arguments[i].compare("-tracked") == 0) { - output_tracked = true; + this->output_tracked = true; output_set = true; } } @@ -123,15 +123,15 @@ RecorderOpenFaceParameters::RecorderOpenFaceParameters(std::vector if (!output_set) { - output_2D_landmarks = true; - output_3D_landmarks = true; - output_model_params = true; - output_pose = true; - output_AUs = true; - output_gaze = true; - output_hog = true; - output_tracked = true; - output_aligned_faces = true; + this->output_2D_landmarks = true; + this->output_3D_landmarks = true; + this->output_model_params = true; + this->output_pose = true; + this->output_AUs = true; + this->output_gaze = true; + this->output_hog = true; + this->output_tracked = true; + this->output_aligned_faces = true; } } diff --git a/lib/local/Utilities/src/SequenceCapture.cpp b/lib/local/Utilities/src/SequenceCapture.cpp index ab272300..a0a7b1e6 100644 --- a/lib/local/Utilities/src/SequenceCapture.cpp +++ b/lib/local/Utilities/src/SequenceCapture.cpp @@ -94,6 +94,8 @@ bool SequenceCapture::Open(std::vector& arguments) std::string input_video_file; std::string input_sequence_directory; int device = -1; + int cam_width = 640; + int cam_height = 480; bool file_found = false; @@ -147,6 +149,22 @@ bool SequenceCapture::Open(std::vector& arguments) valid[i + 1] = false; i++; } + else if (arguments[i].compare("-cam_width") == 0) + { + std::stringstream data(arguments[i + 1]); + data >> cam_width; + valid[i] = false; + valid[i + 1] = false; + i++; + } + else if (arguments[i].compare("-cam_height") == 0) + { + std::stringstream data(arguments[i + 1]); + data >> cam_height; + valid[i] = false; + valid[i + 1] = false; + i++; + } } for (int i = (int)arguments.size() - 1; i >= 0; --i) @@ -162,8 +180,7 @@ bool SequenceCapture::Open(std::vector& arguments) // Based on what was read in open the sequence if (device != -1) { - // TODO allow to specify webcam resolution - return OpenWebcam(device, 640, 480, fx, fy, cx, cy); + return OpenWebcam(device, cam_width, cam_height, fx, fy, cx, cy); } if (!input_video_file.empty()) { diff --git a/matlab_runners/Gaze Experiments/extract_mpii_gaze_test.m b/matlab_runners/Gaze Experiments/extract_mpii_gaze_test.m index 734d28d2..8521173e 100644 --- a/matlab_runners/Gaze Experiments/extract_mpii_gaze_test.m +++ b/matlab_runners/Gaze Experiments/extract_mpii_gaze_test.m @@ -11,15 +11,12 @@ elseif(exist('F:\Dropbox/AAM/eye_clm/mpii_data/', 'file')) database_root = 'F:\Dropbox/AAM/eye_clm/mpii_data/'; elseif(exist('/multicomp/datasets/mpii_gaze/mpii_data/', 'file')) database_root = '/multicomp/datasets/mpii_gaze/mpii_data/'; +elseif(exist('/media/tadas/5E08AE0D08ADE3ED/Dropbox/AAM/eye_clm/mpii_data/', 'file')) + database_root = '/media/tadas/5E08AE0D08ADE3ED/Dropbox/AAM/eye_clm/mpii_data/'; else fprintf('MPII gaze dataset not found\n'); end -output_loc = './gaze_estimates_MPII/'; -if(~exist(output_loc, 'dir')) - mkdir(output_loc); -end - output = './mpii_out/'; %% Perform actual gaze predictions @@ -29,22 +26,21 @@ else executable = '"../../x64/Release/FaceLandmarkImg.exe"'; end -command = sprintf('%s -fx 1028 -fy 1028 -gaze ', executable); +command = sprintf('%s -fx 1028 -fy 1028 ', executable); p_dirs = dir([database_root, 'p*']); parfor p=1:numel(p_dirs) tic - input_loc = ['-fdir "', [database_root, p_dirs(p).name], '" ']; - out_img_loc = ['-oidir "', [output, p_dirs(p).name], '" ']; - out_p_loc = ['-opdir "', [output, p_dirs(p).name], '" ']; - command_c = cat(2, command, input_loc, out_img_loc, out_p_loc); + input_loc = ['-gaze -fdir "', [database_root, p_dirs(p).name], '" ']; + out_img_loc = ['-out_dir "', [output, p_dirs(p).name], '" ']; + command_c = cat(2, command, input_loc, out_img_loc); if(isunix) unix(command_c, '-echo'); else dos(command_c); - end; + end end %% @@ -65,29 +61,36 @@ for p=1:numel(p_dirs) for i=1:size(filenames, 1) - fname = sprintf('%s/%s/%d_%d_%d_%d_%d_%d_%d_det_0.pose', output, p_dirs(p).name,... + fname = sprintf('%s/%s/%d_%d_%d_%d_%d_%d_%d.csv', output, p_dirs(p).name,... filenames(i,1), filenames(i,2), filenames(i,3), filenames(i,4),... filenames(i,5), filenames(i,6), filenames(i,7)); - try - A = dlmread(fname, ' ', 'A79..F79'); - valid = true; - catch - A = zeros(1,6); - A(1,3) = -1; - A(1,6) = -1; - valid = false; + + if(p==1 && i==1) + % First read in the column names + tab = readtable(fname); + column_names = tab.Properties.VariableNames; + + gaze_0_ids = cellfun(@(x) ~isempty(x) && x==1, strfind(column_names, 'gaze_0_')); + gaze_1_ids = cellfun(@(x) ~isempty(x) && x==1, strfind(column_names, 'gaze_1_')); + end + + if(exist(fname, 'file')) + all_params = dlmread(fname, ',', 1, 0); + else + all_params = []; + end + + % If there was a face detected + if(size(all_params,1)>0) + predictions_r(curr,:) = all_params(1,gaze_0_ids); + predictions_l(curr,:) = all_params(1,gaze_1_ids); + else + predictions_r(curr,:) = [0,0,-1]; + predictions_l(curr,:) = [0,0,-1]; end head_rot = headpose(i,1:3); - - predictions_r(curr,:) = A(1:3); - predictions_l(curr,:) = A(4:6); - - if(~valid) - predictions_r(curr,:) = [0,0,-1]; - predictions_l(curr,:) = [0,0,-1]; - end - + gt_r(curr,:) = data.right.gaze(i,:)'; gt_r(curr,:) = gt_r(curr,:) / norm(gt_r(curr,:)); gt_l(curr,:) = data.left.gaze(i,:)'; diff --git a/matlab_runners/Gaze Experiments/mpii_1500_errs.txt b/matlab_runners/Gaze Experiments/mpii_1500_errs.txt index 416b1fe0..b1c97033 100644 --- a/matlab_runners/Gaze Experiments/mpii_1500_errs.txt +++ b/matlab_runners/Gaze Experiments/mpii_1500_errs.txt @@ -1,2 +1,2 @@ Mean error, median error -9.100, 8.468 +9.099, 8.469