mirror of
https://gitcode.com/gh_mirrors/ope/OpenFace.git
synced 2026-05-15 11:47:50 +00:00
325 lines
10 KiB
C++
325 lines
10 KiB
C++
// Copyright (C) 2008 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#ifndef DLIB_OPTIMIZATIOn_SEARCH_STRATEGIES_H_
|
|
#define DLIB_OPTIMIZATIOn_SEARCH_STRATEGIES_H_
|
|
|
|
#include <cmath>
|
|
#include <limits>
|
|
#include "../matrix.h"
|
|
#include "../algs.h"
|
|
#include "optimization_search_strategies_abstract.h"
|
|
#include "../sequence.h"
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
class cg_search_strategy
|
|
{
|
|
public:
|
|
cg_search_strategy() : been_used(false) {}
|
|
|
|
double get_wolfe_rho (
|
|
) const { return 0.001; }
|
|
|
|
double get_wolfe_sigma (
|
|
) const { return 0.01; }
|
|
|
|
unsigned long get_max_line_search_iterations (
|
|
) const { return 100; }
|
|
|
|
template <typename T>
|
|
const matrix<double,0,1>& get_next_direction (
|
|
const T& ,
|
|
const double ,
|
|
const T& funct_derivative
|
|
)
|
|
{
|
|
if (been_used == false)
|
|
{
|
|
been_used = true;
|
|
prev_direction = -funct_derivative;
|
|
}
|
|
else
|
|
{
|
|
// Use the Polak-Ribiere (4.1.12) conjugate gradient described by Fletcher on page 83
|
|
const double temp = trans(prev_derivative)*prev_derivative;
|
|
// If this value hits zero then just use the direction of steepest descent.
|
|
if (std::abs(temp) < std::numeric_limits<double>::epsilon())
|
|
{
|
|
prev_derivative = funct_derivative;
|
|
prev_direction = -funct_derivative;
|
|
return prev_direction;
|
|
}
|
|
|
|
double b = trans(funct_derivative-prev_derivative)*funct_derivative/(temp);
|
|
prev_direction = -funct_derivative + b*prev_direction;
|
|
|
|
}
|
|
|
|
prev_derivative = funct_derivative;
|
|
return prev_direction;
|
|
}
|
|
|
|
private:
|
|
bool been_used;
|
|
matrix<double,0,1> prev_derivative;
|
|
matrix<double,0,1> prev_direction;
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
class bfgs_search_strategy
|
|
{
|
|
public:
|
|
bfgs_search_strategy() : been_used(false), been_used_twice(false) {}
|
|
|
|
double get_wolfe_rho (
|
|
) const { return 0.01; }
|
|
|
|
double get_wolfe_sigma (
|
|
) const { return 0.9; }
|
|
|
|
unsigned long get_max_line_search_iterations (
|
|
) const { return 100; }
|
|
|
|
template <typename T>
|
|
const matrix<double,0,1>& get_next_direction (
|
|
const T& x,
|
|
const double ,
|
|
const T& funct_derivative
|
|
)
|
|
{
|
|
if (been_used == false)
|
|
{
|
|
been_used = true;
|
|
H = identity_matrix<double>(x.size());
|
|
}
|
|
else
|
|
{
|
|
// update H with the BFGS formula from (3.2.12) on page 55 of Fletcher
|
|
delta = (x-prev_x);
|
|
gamma = funct_derivative-prev_derivative;
|
|
|
|
double dg = dot(delta,gamma);
|
|
|
|
// Try to set the initial value of the H matrix to something reasonable if we are still
|
|
// in the early stages of figuring out what it is. This formula below is what is suggested
|
|
// in the book Numerical Optimization by Nocedal and Wright in the chapter on Quasi-Newton methods.
|
|
if (been_used_twice == false)
|
|
{
|
|
double gg = trans(gamma)*gamma;
|
|
if (std::abs(gg) > std::numeric_limits<double>::epsilon())
|
|
{
|
|
const double temp = put_in_range(0.01, 100, dg/gg);
|
|
H = diagm(uniform_matrix<double>(x.size(),1, temp));
|
|
been_used_twice = true;
|
|
}
|
|
}
|
|
|
|
Hg = H*gamma;
|
|
gH = trans(trans(gamma)*H);
|
|
double gHg = trans(gamma)*H*gamma;
|
|
if (gHg < std::numeric_limits<double>::infinity() && dg < std::numeric_limits<double>::infinity() &&
|
|
dg != 0)
|
|
{
|
|
H += (1 + gHg/dg)*delta*trans(delta)/(dg) - (delta*trans(gH) + Hg*trans(delta))/(dg);
|
|
}
|
|
else
|
|
{
|
|
H = identity_matrix<double>(H.nr());
|
|
been_used_twice = false;
|
|
}
|
|
}
|
|
|
|
prev_x = x;
|
|
prev_direction = -H*funct_derivative;
|
|
prev_derivative = funct_derivative;
|
|
return prev_direction;
|
|
}
|
|
|
|
private:
|
|
bool been_used;
|
|
bool been_used_twice;
|
|
matrix<double,0,1> prev_x;
|
|
matrix<double,0,1> prev_derivative;
|
|
matrix<double,0,1> prev_direction;
|
|
matrix<double> H;
|
|
matrix<double,0,1> delta, gamma, Hg, gH;
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
class lbfgs_search_strategy
|
|
{
|
|
public:
|
|
explicit lbfgs_search_strategy(unsigned long max_size_) : max_size(max_size_), been_used(false)
|
|
{
|
|
DLIB_ASSERT (
|
|
max_size > 0,
|
|
"\t lbfgs_search_strategy(max_size)"
|
|
<< "\n\t max_size can't be zero"
|
|
);
|
|
}
|
|
|
|
lbfgs_search_strategy(const lbfgs_search_strategy& item)
|
|
{
|
|
max_size = item.max_size;
|
|
been_used = item.been_used;
|
|
prev_x = item.prev_x;
|
|
prev_derivative = item.prev_derivative;
|
|
prev_direction = item.prev_direction;
|
|
alpha = item.alpha;
|
|
dh_temp = item.dh_temp;
|
|
}
|
|
|
|
double get_wolfe_rho (
|
|
) const { return 0.01; }
|
|
|
|
double get_wolfe_sigma (
|
|
) const { return 0.9; }
|
|
|
|
unsigned long get_max_line_search_iterations (
|
|
) const { return 100; }
|
|
|
|
template <typename T>
|
|
const matrix<double,0,1>& get_next_direction (
|
|
const T& x,
|
|
const double ,
|
|
const T& funct_derivative
|
|
)
|
|
{
|
|
prev_direction = -funct_derivative;
|
|
|
|
if (been_used == false)
|
|
{
|
|
been_used = true;
|
|
}
|
|
else
|
|
{
|
|
// add an element into the stored data sequence
|
|
dh_temp.s = x - prev_x;
|
|
dh_temp.y = funct_derivative - prev_derivative;
|
|
double temp = dot(dh_temp.s, dh_temp.y);
|
|
// only accept this bit of data if temp isn't zero
|
|
if (std::abs(temp) > std::numeric_limits<double>::epsilon())
|
|
{
|
|
dh_temp.rho = 1/temp;
|
|
data.add(data.size(), dh_temp);
|
|
}
|
|
else
|
|
{
|
|
data.clear();
|
|
}
|
|
|
|
if (data.size() > 0)
|
|
{
|
|
// This block of code is from algorithm 7.4 in the Nocedal book.
|
|
|
|
alpha.resize(data.size());
|
|
for (unsigned long i = data.size()-1; i < data.size(); --i)
|
|
{
|
|
alpha[i] = data[i].rho*dot(data[i].s, prev_direction);
|
|
prev_direction -= alpha[i]*data[i].y;
|
|
}
|
|
|
|
// Take a guess at what the first H matrix should be. This formula below is what is suggested
|
|
// in the book Numerical Optimization by Nocedal and Wright in the chapter on Large Scale
|
|
// Unconstrained Optimization (in the L-BFGS section).
|
|
double H_0 = 1.0/data[data.size()-1].rho/dot(data[data.size()-1].y, data[data.size()-1].y);
|
|
H_0 = put_in_range(0.001, 1000.0, H_0);
|
|
prev_direction *= H_0;
|
|
|
|
for (unsigned long i = 0; i < data.size(); ++i)
|
|
{
|
|
double beta = data[i].rho*dot(data[i].y, prev_direction);
|
|
prev_direction += data[i].s * (alpha[i] - beta);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
if (data.size() > max_size)
|
|
{
|
|
// remove the oldest element in the data sequence
|
|
data.remove(0, dh_temp);
|
|
}
|
|
|
|
prev_x = x;
|
|
prev_derivative = funct_derivative;
|
|
return prev_direction;
|
|
}
|
|
|
|
private:
|
|
|
|
struct data_helper
|
|
{
|
|
matrix<double,0,1> s;
|
|
matrix<double,0,1> y;
|
|
double rho;
|
|
|
|
friend void swap(data_helper& a, data_helper& b)
|
|
{
|
|
a.s.swap(b.s);
|
|
a.y.swap(b.y);
|
|
std::swap(a.rho, b.rho);
|
|
}
|
|
};
|
|
sequence<data_helper>::kernel_2a data;
|
|
|
|
unsigned long max_size;
|
|
bool been_used;
|
|
matrix<double,0,1> prev_x;
|
|
matrix<double,0,1> prev_derivative;
|
|
matrix<double,0,1> prev_direction;
|
|
std::vector<double> alpha;
|
|
|
|
data_helper dh_temp;
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <typename hessian_funct>
|
|
class newton_search_strategy_obj
|
|
{
|
|
public:
|
|
explicit newton_search_strategy_obj(
|
|
const hessian_funct& hess
|
|
) : hessian(hess) {}
|
|
|
|
double get_wolfe_rho (
|
|
) const { return 0.01; }
|
|
|
|
double get_wolfe_sigma (
|
|
) const { return 0.9; }
|
|
|
|
unsigned long get_max_line_search_iterations (
|
|
) const { return 100; }
|
|
|
|
template <typename T>
|
|
const matrix<double,0,1> get_next_direction (
|
|
const T& x,
|
|
const double ,
|
|
const T& funct_derivative
|
|
)
|
|
{
|
|
return -inv(hessian(x))*funct_derivative;
|
|
}
|
|
|
|
private:
|
|
hessian_funct hessian;
|
|
};
|
|
|
|
template <typename hessian_funct>
|
|
newton_search_strategy_obj<hessian_funct> newton_search_strategy (
|
|
hessian_funct hessian
|
|
) { return newton_search_strategy_obj<hessian_funct>(hessian); }
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_OPTIMIZATIOn_SEARCH_STRATEGIES_H_
|
|
|