diff --git a/lib/local/LandmarkDetector/include/FaceDetectorMTCNN.h b/lib/local/LandmarkDetector/include/FaceDetectorMTCNN.h new file mode 100644 index 00000000..27e1ce40 --- /dev/null +++ b/lib/local/LandmarkDetector/include/FaceDetectorMTCNN.h @@ -0,0 +1,142 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2016, Carnegie Mellon University and University of Cambridge, +// all rights reserved. +// +// THIS SOFTWARE IS PROVIDED “AS IS” FOR ACADEMIC USE ONLY AND ANY EXPRESS +// OR IMPLIED WARRANTIES WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS +// BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY. +// OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Notwithstanding the license granted herein, Licensee acknowledges that certain components +// of the Software may be covered by so-called “open source” software licenses (“Open Source +// Components”), which means any software licenses approved as open source licenses by the +// Open Source Initiative or any substantially similar licenses, including without limitation any +// license that, as a condition of distribution of the software licensed under such license, +// requires that the distributor make the software available in source code format. Licensor shall +// provide a list of Open Source Components for a particular version of the Software upon +// Licensee’s request. Licensee will comply with the applicable terms of such licenses and to +// the extent required by the licenses covering Open Source Components, the terms of such +// licenses will apply in lieu of the terms of this Agreement. To the extent the terms of the +// licenses applicable to Open Source Components prohibit any of the restrictions in this +// License Agreement with respect to such Open Source Component, such restrictions will not +// apply to such Open Source Component. To the extent the terms of the licenses applicable to +// Open Source Components require Licensor to make an offer to provide source code or +// related information in connection with the Software, such offer is hereby made. Any request +// for source code or related information should be directed to cl-face-tracker-distribution@lists.cam.ac.uk +// Licensee acknowledges receipt of notices for the Open Source Components for the initial +// delivery of the Software. + +// * Any publications arising from the use of this software, including but +// not limited to academic journal and conference publications, technical +// reports and manuals, must cite at least one of the following works: +// +// OpenFace: an open source facial behavior analysis toolkit +// Tadas Baltrušaitis, Peter Robinson, and Louis-Philippe Morency +// in IEEE Winter Conference on Applications of Computer Vision, 2016 +// +// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation +// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling +// in IEEE International. Conference on Computer Vision (ICCV), 2015 +// +// Cross-dataset learning and person-speci?c normalisation for automatic Action Unit detection +// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson +// in Facial Expression Recognition and Analysis Challenge, +// IEEE International Conference on Automatic Face and Gesture Recognition, 2015 +// +// Constrained Local Neural Fields for robust facial landmark detection in the wild. +// Tadas Baltrušaitis, Peter Robinson, and Louis-Philippe Morency. +// in IEEE Int. Conference on Computer Vision Workshops, 300 Faces in-the-Wild Challenge, 2013. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef __FACE_DETECTOR_MTCNN_h_ +#define __FACE_DETECTOR_MTCNN_h_ + +// OpenCV includes +#include + +// System includes +#include + +using namespace std; + +namespace LandmarkDetector +{ + class CNN + { + public: + + //========================================== + + // Default constructor + CNN() { ; } + + // Copy constructor + CNN(const CNN& other); + + // Given an image, orientation and detected landmarks output the result of the appropriate regressor + cv::Mat_ Inference(const cv::Mat_& intensity_img); + + // Reading in the model + void Read(string location); + + private: + //========================================== + // Convolutional Neural Network + + // CNN layers + // layer -> input maps -> kernels + vector > > > cnn_convolutional_layers; + // Bit ugly with so much nesting, but oh well + vector > > > > cnn_convolutional_layers_dft; + vector > cnn_convolutional_layers_bias; + vector > cnn_fully_connected_layers_weights; + vector > cnn_fully_connected_layers_biases; + vector > cnn_prelu_layer_weights; + vector > cnn_max_pooling_layers; + + // CNN: 0 - convolutional, 1 - max pooling, 2 - fully connected, 3 - prelu, 4 - sigmoid + vector cnn_layer_types; + }; + //=========================================================================== + // + // Checking if landmark detection was successful using an SVR regressor + // Using multiple validators trained add different views + // The regressor outputs -1 for ideal alignment and 1 for worst alignment + //=========================================================================== + class FaceDetectorMTCNN + { + + public: + + // Default constructor + FaceDetectorMTCNN() { ; } + + // Copy constructor + FaceDetectorMTCNN(const FaceDetectorMTCNN& other); + + // Given an image, orientation and detected landmarks output the result of the appropriate regressor + bool DetectFaces(vector >& o_regions, const cv::Mat_& intensity, std::vector& o_confidences, int min_face = 30, double t1 = 0.6, double t2 = 0.7, double t3 = 0.7); + + // Reading in the model + void Read(string location); + + private: + //========================================== + // Components of the model + + CNN PNet; + CNN RNet; + CNN ONet; + + }; + +} +#endif diff --git a/lib/local/LandmarkDetector/include/LandmarkDetectorModel.h b/lib/local/LandmarkDetector/include/LandmarkDetectorModel.h index 52ba2426..0f83b52c 100644 --- a/lib/local/LandmarkDetector/include/LandmarkDetectorModel.h +++ b/lib/local/LandmarkDetector/include/LandmarkDetectorModel.h @@ -85,6 +85,8 @@ public: //==================== Helpers for face detection and landmark detection validation ========================================= + // TODO these should be static, and loading should be made easier + // Haar cascade classifier for face detection cv::CascadeClassifier face_detector_HAAR; string face_detector_location; diff --git a/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp b/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp new file mode 100644 index 00000000..bfe4fa3f --- /dev/null +++ b/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp @@ -0,0 +1,263 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2016, Carnegie Mellon University and University of Cambridge, +// all rights reserved. +// +// THIS SOFTWARE IS PROVIDED “AS IS” FOR ACADEMIC USE ONLY AND ANY EXPRESS +// OR IMPLIED WARRANTIES WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS +// BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY. +// OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Notwithstanding the license granted herein, Licensee acknowledges that certain components +// of the Software may be covered by so-called “open source” software licenses (“Open Source +// Components”), which means any software licenses approved as open source licenses by the +// Open Source Initiative or any substantially similar licenses, including without limitation any +// license that, as a condition of distribution of the software licensed under such license, +// requires that the distributor make the software available in source code format. Licensor shall +// provide a list of Open Source Components for a particular version of the Software upon +// Licensee’s request. Licensee will comply with the applicable terms of such licenses and to +// the extent required by the licenses covering Open Source Components, the terms of such +// licenses will apply in lieu of the terms of this Agreement. To the extent the terms of the +// licenses applicable to Open Source Components prohibit any of the restrictions in this +// License Agreement with respect to such Open Source Component, such restrictions will not +// apply to such Open Source Component. To the extent the terms of the licenses applicable to +// Open Source Components require Licensor to make an offer to provide source code or +// related information in connection with the Software, such offer is hereby made. Any request +// for source code or related information should be directed to cl-face-tracker-distribution@lists.cam.ac.uk +// Licensee acknowledges receipt of notices for the Open Source Components for the initial +// delivery of the Software. + +// * Any publications arising from the use of this software, including but +// not limited to academic journal and conference publications, technical +// reports and manuals, must cite at least one of the following works: +// +// OpenFace: an open source facial behavior analysis toolkit +// Tadas Baltrušaitis, Peter Robinson, and Louis-Philippe Morency +// in IEEE Winter Conference on Applications of Computer Vision, 2016 +// +// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation +// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling +// in IEEE International. Conference on Computer Vision (ICCV), 2015 +// +// Cross-dataset learning and person-speci?c normalisation for automatic Action Unit detection +// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson +// in Facial Expression Recognition and Analysis Challenge, +// IEEE International Conference on Automatic Face and Gesture Recognition, 2015 +// +// Constrained Local Neural Fields for robust facial landmark detection in the wild. +// Tadas Baltrušaitis, Peter Robinson, and Louis-Philippe Morency. +// in IEEE Int. Conference on Computer Vision Workshops, 300 Faces in-the-Wild Challenge, 2013. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stdafx.h" + +#include "FaceDetectorMTCNN.h" + +// OpenCV includes +#include +#include + +// TBB includes +#include + +// System includes +#include + +// Math includes +#define _USE_MATH_DEFINES +#include + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +using namespace LandmarkDetector; + +// Copy constructor +FaceDetectorMTCNN::FaceDetectorMTCNN(const FaceDetectorMTCNN& other) : PNet(other.PNet), RNet(other.RNet), ONet(other.ONet) +{ +} + +void ReadMatBin(std::ifstream& stream, cv::Mat &output_mat) +{ + // Read in the number of rows, columns and the data type + int row, col, type; + + stream.read((char*)&row, 4); + stream.read((char*)&col, 4); + stream.read((char*)&type, 4); + + output_mat = cv::Mat(row, col, type); + int size = output_mat.rows * output_mat.cols * output_mat.elemSize(); + stream.read((char *)output_mat.data, size); + +} + +void CNN::Read(string location) +{ + ifstream cnn_stream(location, ios::in | ios::binary); + if (cnn_stream.is_open()) + { + cnn_stream.seekg(0, ios::beg); + + // Reading in CNNs + + int network_depth; + cnn_stream.read((char*)&network_depth, 4); + + cnn_layer_types.resize(network_depth); + + for (int layer = 0; layer < network_depth; ++layer) + { + + int layer_type; + cnn_stream.read((char*)&layer_type, 4); + cnn_layer_types[layer] = layer_type; + + // convolutional + if (layer_type == 0) + { + + // Read the number of input maps + int num_in_maps; + cnn_stream.read((char*)&num_in_maps, 4); + + // Read the number of kernels for each input map + int num_kernels; + cnn_stream.read((char*)&num_kernels, 4); + + vector > > kernels; + vector > > > kernel_dfts; + + kernels.resize(num_in_maps); + kernel_dfts.resize(num_in_maps); + + vector biases; + for (int k = 0; k < num_kernels; ++k) + { + float bias; + cnn_stream.read((char*)&bias, 4); + biases.push_back(bias); + } + + cnn_convolutional_layers_bias.push_back(biases); + + // For every input map + for (int in = 0; in < num_in_maps; ++in) + { + kernels[in].resize(num_kernels); + kernel_dfts[in].resize(num_kernels); + + // For every kernel on that input map + for (int k = 0; k < num_kernels; ++k) + { + ReadMatBin(cnn_stream, kernels[in][k]); + + } + } + + cnn_convolutional_layers.push_back(kernels); + cnn_convolutional_layers_dft.push_back(kernel_dfts); + } + else if (layer_type == 1) + { + int kernel_x, kernel_y, stride_x, stride_y; + cnn_stream.read((char*)&kernel_x, 4); + cnn_stream.read((char*)&kernel_y, 4); + cnn_stream.read((char*)&stride_x, 4); + cnn_stream.read((char*)&stride_y, 4); + cnn_max_pooling_layers.push_back(std::tuple(kernel_x, kernel_y, stride_x, stride_y)); + } + else if (layer_type == 2) + { + cv::Mat_ biases; + ReadMatBin(cnn_stream, biases); + cnn_fully_connected_layers_biases.push_back(biases); + + // Fully connected layer + cv::Mat_ weights; + ReadMatBin(cnn_stream, weights); + cnn_fully_connected_layers_weights.push_back(weights); + } + + else if (layer_type == 4) + { + cv::Mat_ weights; + ReadMatBin(cnn_stream, weights); + cnn_prelu_layer_weights.push_back(weights); + } + } + } + else + { + cout << "WARNING: Can't find the CNN location" << endl; + } +} + +//=========================================================================== +// Read in the MTCNN detector +void FaceDetectorMTCNN::Read(string location) +{ + + cout << "Reading the MTCNN face detector from: " << location << endl; + + ifstream locations(location.c_str(), ios_base::in); + if (!locations.is_open()) + { + cout << "Couldn't open the model file, aborting" << endl; + return; + } + string line; + + // The other module locations should be defined as relative paths from the main model + boost::filesystem::path root = boost::filesystem::path(location).parent_path(); + + // The main file contains the references to other files + while (!locations.eof()) + { + getline(locations, line); + + stringstream lineStream(line); + + string module; + string location; + + // figure out which module is to be read from which file + lineStream >> module; + + lineStream >> location; + + // remove carriage return at the end for compatibility with unix systems + if (location.size() > 0 && location.at(location.size() - 1) == '\r') + { + location = location.substr(0, location.size() - 1); + } + + // append to root + location = (root / location).string(); + if (module.compare("PNet") == 0) + { + cout << "Reading the PNet module from: " << location << endl; + PNet.Read(location); + } + else if(module.compare("RNet") == 0) + { + cout << "Reading the RNet module from: " << location << endl; + RNet.Read(location); + } + else if (module.compare("ONet") == 0) + { + cout << "Reading the ONet module from: " << location << endl; + ONet.Read(location); + } + } +} + + diff --git a/matlab_version/face_detection/mtcnn/convert_to_cpp/PNet.dat b/matlab_version/face_detection/mtcnn/convert_to_cpp/PNet.dat new file mode 100644 index 00000000..9550d39a Binary files /dev/null and b/matlab_version/face_detection/mtcnn/convert_to_cpp/PNet.dat differ diff --git a/matlab_version/face_detection/mtcnn/convert_to_cpp/Write_CNN_to_binary.m b/matlab_version/face_detection/mtcnn/convert_to_cpp/Write_CNN_to_binary.m new file mode 100644 index 00000000..12106e88 --- /dev/null +++ b/matlab_version/face_detection/mtcnn/convert_to_cpp/Write_CNN_to_binary.m @@ -0,0 +1,70 @@ +function Write_CNN_to_binary(location_binary, cnn) + + addpath('../../../PDM_helpers/'); + + % use little-endian + cnn_binary_file = fopen(location_binary, 'w', 'l'); + + num_layers = size(cnn.layers,2); + + % Get the number of layers + fwrite(cnn_binary_file, num_layers, 'uint'); % 4 bytes + + for layers=1:num_layers + + % write layer type: 0 - convolutional, 1 - max pooling, 2 - + % fully connected, 3 - prelu, 4 - sigmoid + if(strcmp(cnn.layers{layers}.type, 'conv')) + + % write the type (convolutional) + fwrite(cnn_binary_file, 0, 'uint'); % 4 bytes + + num_in_map = size(cnn.layers{layers}.weights{1},3); + + % write the number of input maps + fwrite(cnn_binary_file, num_in_map, 'uint'); % 4 bytes + + num_out_kerns = size(cnn.layers{layers}.weights{1},4); + + % write the number of kernels for each output map + fwrite(cnn_binary_file, num_out_kerns, 'uint'); % 4 bytes + + % Write output map bias terms + for k2=1:num_out_kerns + fwrite(cnn_binary_file, cnn.layers{layers}.weights{2}(k2), 'float32'); % 4 bytes + end + + for k=1:num_in_map + for k2=1:num_out_kerns + % Write out the kernel + W = squeeze(cnn.layers{layers}.weights{1}(:,:,k,k2)); + writeMatrixBin(cnn_binary_file, W, 5); + end + end + elseif(strcmp(cnn.layers{layers}.type, 'fc')) + + % This is the fully connected layer + fwrite(cnn_binary_file, 2, 'uint'); % 4 bytes + + % the bias term + writeMatrixBin(cnn_binary_file, cnn.layers{layers}.weights{2}, 5); + % the weights + writeMatrixBin(cnn_binary_file, cnn.layers{layers}.weights{1}, 5); + + elseif(strcmp(cnn.layers{layers}.type, 'max_pooling')) + fwrite(cnn_binary_file, 1, 'uint'); % 4 bytes, indicate max pooling layer + % params kernel and stride size + fwrite(cnn_binary_file, cnn.layers{layers}.kernel_size_x, 'uint'); % 4 bytes + fwrite(cnn_binary_file, cnn.layers{layers}.kernel_size_y, 'uint'); % 4 bytes + fwrite(cnn_binary_file, cnn.layers{layers}.stride_x, 'uint'); % 4 bytes + fwrite(cnn_binary_file, cnn.layers{layers}.stride_y, 'uint'); % 4 bytes + + elseif(strcmp(cnn.layers{layers}.type, 'prelu')) + fwrite(cnn_binary_file, 3, 'uint'); % 4 bytes, indicate a parametric relu layer + writeMatrixBin(cnn_binary_file, cnn.layers{layers}.weights{1}, 5); + end + end + + fclose(cnn_binary_file); + +end \ No newline at end of file diff --git a/matlab_version/face_detection/mtcnn/convert_to_cpp/Write_out_mtcnn.m b/matlab_version/face_detection/mtcnn/convert_to_cpp/Write_out_mtcnn.m new file mode 100644 index 00000000..20aaa714 --- /dev/null +++ b/matlab_version/face_detection/mtcnn/convert_to_cpp/Write_out_mtcnn.m @@ -0,0 +1,42 @@ +% First writing out PNet +load('../PNet_mlab.mat'); + +cnn = struct; +cnn.layers = cell(1,8); +cnn.layers{1} = struct; +cnn.layers{1}.type = 'conv'; +cnn.layers{1}.weights = {PNet_mlab.weights_conv1, PNet_mlab.biases_conv1}; + +cnn.layers{2} = struct; +cnn.layers{2}.type = 'prelu'; +cnn.layers{2}.weights = {PNet_mlab.prelu_weights_1}; + +cnn.layers{3} = struct; +cnn.layers{3}.type = 'max_pooling'; +cnn.layers{3}.weights = {}; +cnn.layers{3}.stride_x = 2; +cnn.layers{3}.stride_y = 2; +cnn.layers{3}.kernel_size_x = 2; +cnn.layers{3}.kernel_size_y = 2; + +cnn.layers{4} = struct; +cnn.layers{4}.type = 'conv'; +cnn.layers{4}.weights = {PNet_mlab.weights_conv2, PNet_mlab.biases_conv2}; + +cnn.layers{5} = struct; +cnn.layers{5}.type = 'prelu'; +cnn.layers{5}.weights = {PNet_mlab.prelu_weights_2}; + +cnn.layers{6} = struct; +cnn.layers{6}.type = 'conv'; +cnn.layers{6}.weights = {PNet_mlab.weights_conv3, PNet_mlab.biases_conv3}; + +cnn.layers{7} = struct; +cnn.layers{7}.type = 'prelu'; +cnn.layers{7}.weights = {PNet_mlab.prelu_weights_3}; + +cnn.layers{8} = struct; +cnn.layers{8}.type = 'fc'; +cnn.layers{8}.weights = {PNet_mlab.w, PNet_mlab.b}; + +Write_CNN_to_binary('PNet.dat', cnn); \ No newline at end of file