mirror of
https://gitcode.com/gh_mirrors/ope/OpenFace.git
synced 2026-05-11 18:02:46 +00:00
614 lines
23 KiB
C++
614 lines
23 KiB
C++
// Copyright (C) 2008 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#ifndef DLIB_REDUCEd_TRAINERS_
|
|
#define DLIB_REDUCEd_TRAINERS_
|
|
|
|
#include "reduced_abstract.h"
|
|
#include "../matrix.h"
|
|
#include "../algs.h"
|
|
#include "function.h"
|
|
#include "kernel.h"
|
|
#include "kcentroid.h"
|
|
#include "linearly_independent_subset_finder.h"
|
|
#include "../optimization.h"
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename trainer_type
|
|
>
|
|
class reduced_decision_function_trainer
|
|
{
|
|
public:
|
|
typedef typename trainer_type::kernel_type kernel_type;
|
|
typedef typename trainer_type::scalar_type scalar_type;
|
|
typedef typename trainer_type::sample_type sample_type;
|
|
typedef typename trainer_type::mem_manager_type mem_manager_type;
|
|
typedef typename trainer_type::trained_function_type trained_function_type;
|
|
|
|
reduced_decision_function_trainer (
|
|
) :num_bv(0) {}
|
|
|
|
reduced_decision_function_trainer (
|
|
const trainer_type& trainer_,
|
|
const unsigned long num_sb_
|
|
) :
|
|
trainer(trainer_),
|
|
num_bv(num_sb_)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(num_bv > 0,
|
|
"\t reduced_decision_function_trainer()"
|
|
<< "\n\t you have given invalid arguments to this function"
|
|
<< "\n\t num_bv: " << num_bv
|
|
);
|
|
}
|
|
|
|
template <
|
|
typename in_sample_vector_type,
|
|
typename in_scalar_vector_type
|
|
>
|
|
const decision_function<kernel_type> train (
|
|
const in_sample_vector_type& x,
|
|
const in_scalar_vector_type& y
|
|
) const
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(num_bv > 0,
|
|
"\t reduced_decision_function_trainer::train(x,y)"
|
|
<< "\n\t You have tried to use an uninitialized version of this object"
|
|
<< "\n\t num_bv: " << num_bv );
|
|
return do_train(mat(x), mat(y));
|
|
}
|
|
|
|
private:
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename in_sample_vector_type,
|
|
typename in_scalar_vector_type
|
|
>
|
|
const decision_function<kernel_type> do_train (
|
|
const in_sample_vector_type& x,
|
|
const in_scalar_vector_type& y
|
|
) const
|
|
{
|
|
// get the decision function object we are going to try and approximate
|
|
const decision_function<kernel_type>& dec_funct = trainer.train(x,y);
|
|
|
|
// now find a linearly independent subset of the training points of num_bv points.
|
|
linearly_independent_subset_finder<kernel_type> lisf(dec_funct.kernel_function, num_bv);
|
|
fill_lisf(lisf, x);
|
|
|
|
// The next few statements just find the best weights with which to approximate
|
|
// the dec_funct object with the smaller set of vectors in the lisf dictionary. This
|
|
// is really just a simple application of some linear algebra. For the details
|
|
// see page 554 of Learning with kernels by Scholkopf and Smola where they talk
|
|
// about "Optimal Expansion Coefficients."
|
|
|
|
const kernel_type kern(dec_funct.kernel_function);
|
|
|
|
matrix<scalar_type,0,1,mem_manager_type> alpha;
|
|
|
|
alpha = lisf.get_inv_kernel_marix()*(kernel_matrix(kern,lisf,dec_funct.basis_vectors)*dec_funct.alpha);
|
|
|
|
decision_function<kernel_type> new_df(alpha,
|
|
0,
|
|
kern,
|
|
lisf.get_dictionary());
|
|
|
|
// now we have to figure out what the new bias should be. It might be a little
|
|
// different since we just messed with all the weights and vectors.
|
|
double bias = 0;
|
|
for (long i = 0; i < x.nr(); ++i)
|
|
{
|
|
bias += new_df(x(i)) - dec_funct(x(i));
|
|
}
|
|
|
|
new_df.b = bias/x.nr();
|
|
|
|
return new_df;
|
|
}
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
trainer_type trainer;
|
|
unsigned long num_bv;
|
|
|
|
|
|
}; // end of class reduced_decision_function_trainer
|
|
|
|
template <typename trainer_type>
|
|
const reduced_decision_function_trainer<trainer_type> reduced (
|
|
const trainer_type& trainer,
|
|
const unsigned long num_bv
|
|
)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(num_bv > 0,
|
|
"\tconst reduced_decision_function_trainer reduced()"
|
|
<< "\n\t you have given invalid arguments to this function"
|
|
<< "\n\t num_bv: " << num_bv
|
|
);
|
|
|
|
return reduced_decision_function_trainer<trainer_type>(trainer, num_bv);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
namespace red_impl
|
|
{
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
template <typename kernel_type>
|
|
class objective
|
|
{
|
|
/*
|
|
This object represents the objective function we will try to
|
|
minimize in approximate_distance_function().
|
|
|
|
The objective is the distance, in kernel induced feature space, between
|
|
the original distance function and the approximated version.
|
|
|
|
*/
|
|
typedef typename kernel_type::scalar_type scalar_type;
|
|
typedef typename kernel_type::sample_type sample_type;
|
|
typedef typename kernel_type::mem_manager_type mem_manager_type;
|
|
public:
|
|
objective(
|
|
const distance_function<kernel_type>& dist_funct_,
|
|
matrix<scalar_type,0,1,mem_manager_type>& b_,
|
|
matrix<sample_type,0,1,mem_manager_type>& out_vectors_
|
|
) :
|
|
dist_funct(dist_funct_),
|
|
b(b_),
|
|
out_vectors(out_vectors_)
|
|
{
|
|
}
|
|
|
|
const matrix<scalar_type, 0, 1, mem_manager_type> state_to_vector (
|
|
) const
|
|
/*!
|
|
ensures
|
|
- returns a vector that contains all the information necessary to
|
|
reproduce the current state of the approximated distance function
|
|
!*/
|
|
{
|
|
matrix<scalar_type, 0, 1, mem_manager_type> z(b.nr() + out_vectors.size()*out_vectors(0).nr());
|
|
long i = 0;
|
|
for (long j = 0; j < b.nr(); ++j)
|
|
{
|
|
z(i) = b(j);
|
|
++i;
|
|
}
|
|
|
|
for (long j = 0; j < out_vectors.size(); ++j)
|
|
{
|
|
for (long k = 0; k < out_vectors(j).size(); ++k)
|
|
{
|
|
z(i) = out_vectors(j)(k);
|
|
++i;
|
|
}
|
|
}
|
|
return z;
|
|
}
|
|
|
|
|
|
void vector_to_state (
|
|
const matrix<scalar_type, 0, 1, mem_manager_type>& z
|
|
) const
|
|
/*!
|
|
requires
|
|
- z came from the state_to_vector() function or has a compatible format
|
|
ensures
|
|
- loads the vector z into the state variables of the approximate
|
|
distance function (i.e. b and out_vectors)
|
|
!*/
|
|
{
|
|
long i = 0;
|
|
for (long j = 0; j < b.nr(); ++j)
|
|
{
|
|
b(j) = z(i);
|
|
++i;
|
|
}
|
|
|
|
for (long j = 0; j < out_vectors.size(); ++j)
|
|
{
|
|
for (long k = 0; k < out_vectors(j).size(); ++k)
|
|
{
|
|
out_vectors(j)(k) = z(i);
|
|
++i;
|
|
}
|
|
}
|
|
}
|
|
|
|
double operator() (
|
|
const matrix<scalar_type, 0, 1, mem_manager_type>& z
|
|
) const
|
|
/*!
|
|
ensures
|
|
- loads the current approximate distance function with z
|
|
- returns the distance between the original distance function
|
|
and the approximate one.
|
|
!*/
|
|
{
|
|
vector_to_state(z);
|
|
const kernel_type k(dist_funct.get_kernel());
|
|
|
|
double temp = 0;
|
|
for (long i = 0; i < out_vectors.size(); ++i)
|
|
{
|
|
for (long j = 0; j < dist_funct.get_basis_vectors().nr(); ++j)
|
|
{
|
|
temp -= b(i)*dist_funct.get_alpha()(j)*k(out_vectors(i), dist_funct.get_basis_vectors()(j));
|
|
}
|
|
}
|
|
|
|
temp *= 2;
|
|
|
|
for (long i = 0; i < out_vectors.size(); ++i)
|
|
{
|
|
for (long j = 0; j < out_vectors.size(); ++j)
|
|
{
|
|
temp += b(i)*b(j)*k(out_vectors(i), out_vectors(j));
|
|
}
|
|
}
|
|
|
|
return temp + dist_funct.get_squared_norm();
|
|
}
|
|
|
|
private:
|
|
|
|
const distance_function<kernel_type>& dist_funct;
|
|
matrix<scalar_type,0,1,mem_manager_type>& b;
|
|
matrix<sample_type,0,1,mem_manager_type>& out_vectors;
|
|
|
|
};
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
template <typename kernel_type>
|
|
class objective_derivative
|
|
{
|
|
/*!
|
|
This object represents the derivative of the objective object
|
|
!*/
|
|
typedef typename kernel_type::scalar_type scalar_type;
|
|
typedef typename kernel_type::sample_type sample_type;
|
|
typedef typename kernel_type::mem_manager_type mem_manager_type;
|
|
public:
|
|
|
|
|
|
objective_derivative(
|
|
const distance_function<kernel_type>& dist_funct_,
|
|
matrix<scalar_type,0,1,mem_manager_type>& b_,
|
|
matrix<sample_type,0,1,mem_manager_type>& out_vectors_
|
|
) :
|
|
dist_funct(dist_funct_),
|
|
b(b_),
|
|
out_vectors(out_vectors_)
|
|
{
|
|
}
|
|
|
|
void vector_to_state (
|
|
const matrix<scalar_type, 0, 1, mem_manager_type>& z
|
|
) const
|
|
/*!
|
|
requires
|
|
- z came from the state_to_vector() function or has a compatible format
|
|
ensures
|
|
- loads the vector z into the state variables of the approximate
|
|
distance function (i.e. b and out_vectors)
|
|
!*/
|
|
{
|
|
long i = 0;
|
|
for (long j = 0; j < b.nr(); ++j)
|
|
{
|
|
b(j) = z(i);
|
|
++i;
|
|
}
|
|
|
|
for (long j = 0; j < out_vectors.size(); ++j)
|
|
{
|
|
for (long k = 0; k < out_vectors(j).size(); ++k)
|
|
{
|
|
out_vectors(j)(k) = z(i);
|
|
++i;
|
|
}
|
|
}
|
|
}
|
|
|
|
const matrix<scalar_type,0,1,mem_manager_type>& operator() (
|
|
const matrix<scalar_type, 0, 1, mem_manager_type>& z
|
|
) const
|
|
/*!
|
|
ensures
|
|
- loads the current approximate distance function with z
|
|
- returns the derivative of the distance between the original
|
|
distance function and the approximate one.
|
|
!*/
|
|
{
|
|
vector_to_state(z);
|
|
res.set_size(z.nr());
|
|
set_all_elements(res,0);
|
|
const kernel_type k(dist_funct.get_kernel());
|
|
const kernel_derivative<kernel_type> K_der(k);
|
|
|
|
// first compute the gradient for the beta weights
|
|
for (long i = 0; i < out_vectors.size(); ++i)
|
|
{
|
|
for (long j = 0; j < out_vectors.size(); ++j)
|
|
{
|
|
res(i) += b(j)*k(out_vectors(i), out_vectors(j));
|
|
}
|
|
}
|
|
for (long i = 0; i < out_vectors.size(); ++i)
|
|
{
|
|
for (long j = 0; j < dist_funct.get_basis_vectors().size(); ++j)
|
|
{
|
|
res(i) -= dist_funct.get_alpha()(j)*k(out_vectors(i), dist_funct.get_basis_vectors()(j));
|
|
}
|
|
}
|
|
|
|
|
|
// now compute the gradient of the actual vectors that go into the kernel functions
|
|
long pos = out_vectors.size();
|
|
const long num = out_vectors(0).nr();
|
|
temp.set_size(num,1);
|
|
for (long i = 0; i < out_vectors.size(); ++i)
|
|
{
|
|
set_all_elements(temp,0);
|
|
for (long j = 0; j < out_vectors.size(); ++j)
|
|
{
|
|
temp += b(j)*K_der(out_vectors(j), out_vectors(i));
|
|
}
|
|
for (long j = 0; j < dist_funct.get_basis_vectors().nr(); ++j)
|
|
{
|
|
temp -= dist_funct.get_alpha()(j)*K_der(dist_funct.get_basis_vectors()(j), out_vectors(i) );
|
|
}
|
|
|
|
// store the gradient for out_vectors(i) into result in the proper spot
|
|
set_subm(res,pos,0,num,1) = b(i)*temp;
|
|
pos += num;
|
|
}
|
|
|
|
|
|
res *= 2;
|
|
return res;
|
|
}
|
|
|
|
private:
|
|
|
|
mutable matrix<scalar_type, 0, 1, mem_manager_type> res;
|
|
mutable sample_type temp;
|
|
|
|
const distance_function<kernel_type>& dist_funct;
|
|
matrix<scalar_type,0,1,mem_manager_type>& b;
|
|
matrix<sample_type,0,1,mem_manager_type>& out_vectors;
|
|
|
|
};
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
template <
|
|
typename K,
|
|
typename stop_strategy_type,
|
|
typename T
|
|
>
|
|
distance_function<K> approximate_distance_function (
|
|
stop_strategy_type stop_strategy,
|
|
const distance_function<K>& target,
|
|
const T& starting_basis
|
|
)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(target.get_basis_vectors().size() > 0 &&
|
|
starting_basis.size() > 0,
|
|
"\t distance_function approximate_distance_function()"
|
|
<< "\n\t Invalid inputs were given to this function."
|
|
<< "\n\t target.get_basis_vectors().size(): " << target.get_basis_vectors().size()
|
|
<< "\n\t starting_basis.size(): " << starting_basis.size()
|
|
);
|
|
|
|
using namespace red_impl;
|
|
// The next few statements just find the best weights with which to approximate
|
|
// the target object with the set of basis vectors in starting_basis. This
|
|
// is really just a simple application of some linear algebra. For the details
|
|
// see page 554 of Learning with kernels by Scholkopf and Smola where they talk
|
|
// about "Optimal Expansion Coefficients."
|
|
|
|
const K kern(target.get_kernel());
|
|
typedef typename K::scalar_type scalar_type;
|
|
typedef typename K::sample_type sample_type;
|
|
typedef typename K::mem_manager_type mem_manager_type;
|
|
|
|
matrix<scalar_type,0,1,mem_manager_type> beta;
|
|
|
|
// Now we compute the fist approximate distance function.
|
|
beta = pinv(kernel_matrix(kern,starting_basis)) *
|
|
(kernel_matrix(kern,starting_basis,target.get_basis_vectors())*target.get_alpha());
|
|
matrix<sample_type,0,1,mem_manager_type> out_vectors(mat(starting_basis));
|
|
|
|
|
|
// Now setup to do a global optimization of all the parameters in the approximate
|
|
// distance function.
|
|
const objective<K> obj(target, beta, out_vectors);
|
|
const objective_derivative<K> obj_der(target, beta, out_vectors);
|
|
matrix<scalar_type,0,1,mem_manager_type> opt_starting_point(obj.state_to_vector());
|
|
|
|
|
|
// perform a full optimization of all the parameters (i.e. both beta and the basis vectors together)
|
|
find_min(lbfgs_search_strategy(20),
|
|
stop_strategy,
|
|
obj, obj_der, opt_starting_point, 0);
|
|
|
|
// now make sure that the final optimized value is loaded into the beta and
|
|
// out_vectors matrices
|
|
obj.vector_to_state(opt_starting_point);
|
|
|
|
// Do a final reoptimization of beta just to make sure it is optimal given the new
|
|
// set of basis vectors.
|
|
beta = pinv(kernel_matrix(kern,out_vectors))*(kernel_matrix(kern,out_vectors,target.get_basis_vectors())*target.get_alpha());
|
|
|
|
// It is possible that some of the beta weights will be very close to zero. Lets remove
|
|
// the basis vectors with these essentially zero weights.
|
|
const scalar_type eps = max(abs(beta))*std::numeric_limits<scalar_type>::epsilon();
|
|
for (long i = 0; i < beta.size(); ++i)
|
|
{
|
|
// if beta(i) is zero (but leave at least one beta no matter what)
|
|
if (std::abs(beta(i)) < eps && beta.size() > 1)
|
|
{
|
|
beta = remove_row(beta, i);
|
|
out_vectors = remove_row(out_vectors, i);
|
|
--i;
|
|
}
|
|
}
|
|
|
|
return distance_function<K>(beta, kern, out_vectors);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename trainer_type
|
|
>
|
|
class reduced_decision_function_trainer2
|
|
{
|
|
public:
|
|
typedef typename trainer_type::kernel_type kernel_type;
|
|
typedef typename trainer_type::scalar_type scalar_type;
|
|
typedef typename trainer_type::sample_type sample_type;
|
|
typedef typename trainer_type::mem_manager_type mem_manager_type;
|
|
typedef typename trainer_type::trained_function_type trained_function_type;
|
|
|
|
reduced_decision_function_trainer2 () : num_bv(0) {}
|
|
reduced_decision_function_trainer2 (
|
|
const trainer_type& trainer_,
|
|
const long num_sb_,
|
|
const double eps_ = 1e-3
|
|
) :
|
|
trainer(trainer_),
|
|
num_bv(num_sb_),
|
|
eps(eps_)
|
|
{
|
|
COMPILE_TIME_ASSERT(is_matrix<sample_type>::value);
|
|
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(num_bv > 0 && eps > 0,
|
|
"\t reduced_decision_function_trainer2()"
|
|
<< "\n\t you have given invalid arguments to this function"
|
|
<< "\n\t num_bv: " << num_bv
|
|
<< "\n\t eps: " << eps
|
|
);
|
|
}
|
|
|
|
template <
|
|
typename in_sample_vector_type,
|
|
typename in_scalar_vector_type
|
|
>
|
|
const decision_function<kernel_type> train (
|
|
const in_sample_vector_type& x,
|
|
const in_scalar_vector_type& y
|
|
) const
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(num_bv > 0,
|
|
"\t reduced_decision_function_trainer2::train(x,y)"
|
|
<< "\n\t You have tried to use an uninitialized version of this object"
|
|
<< "\n\t num_bv: " << num_bv );
|
|
return do_train(mat(x), mat(y));
|
|
}
|
|
|
|
private:
|
|
|
|
template <
|
|
typename in_sample_vector_type,
|
|
typename in_scalar_vector_type
|
|
>
|
|
const decision_function<kernel_type> do_train (
|
|
const in_sample_vector_type& x,
|
|
const in_scalar_vector_type& y
|
|
) const
|
|
{
|
|
// get the decision function object we are going to try and approximate
|
|
const decision_function<kernel_type>& dec_funct = trainer.train(x,y);
|
|
const kernel_type kern(dec_funct.kernel_function);
|
|
|
|
// now find a linearly independent subset of the training points of num_bv points.
|
|
linearly_independent_subset_finder<kernel_type> lisf(kern, num_bv);
|
|
fill_lisf(lisf,x);
|
|
|
|
distance_function<kernel_type> approx, target;
|
|
target = dec_funct;
|
|
approx = approximate_distance_function(objective_delta_stop_strategy(eps), target, lisf);
|
|
|
|
decision_function<kernel_type> new_df(approx.get_alpha(),
|
|
0,
|
|
kern,
|
|
approx.get_basis_vectors());
|
|
|
|
// now we have to figure out what the new bias should be. It might be a little
|
|
// different since we just messed with all the weights and vectors.
|
|
double bias = 0;
|
|
for (long i = 0; i < x.nr(); ++i)
|
|
{
|
|
bias += new_df(x(i)) - dec_funct(x(i));
|
|
}
|
|
|
|
new_df.b = bias/x.nr();
|
|
|
|
return new_df;
|
|
|
|
}
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
trainer_type trainer;
|
|
long num_bv;
|
|
double eps;
|
|
|
|
|
|
}; // end of class reduced_decision_function_trainer2
|
|
|
|
template <typename trainer_type>
|
|
const reduced_decision_function_trainer2<trainer_type> reduced2 (
|
|
const trainer_type& trainer,
|
|
const long num_bv,
|
|
double eps = 1e-3
|
|
)
|
|
{
|
|
COMPILE_TIME_ASSERT(is_matrix<typename trainer_type::sample_type>::value);
|
|
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(num_bv > 0 && eps > 0,
|
|
"\tconst reduced_decision_function_trainer2 reduced2()"
|
|
<< "\n\t you have given invalid arguments to this function"
|
|
<< "\n\t num_bv: " << num_bv
|
|
<< "\n\t eps: " << eps
|
|
);
|
|
|
|
return reduced_decision_function_trainer2<trainer_type>(trainer, num_bv, eps);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_REDUCEd_TRAINERS_
|
|
|