mirror of
https://gitcode.com/gh_mirrors/ope/OpenFace.git
synced 2026-05-16 04:08:00 +00:00
523 lines
17 KiB
C++
523 lines
17 KiB
C++
// Copyright (C) 2008 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#ifndef DLIB_KKMEANs_
|
|
#define DLIB_KKMEANs_
|
|
|
|
#include <cmath>
|
|
#include "../matrix/matrix_abstract.h"
|
|
#include "../algs.h"
|
|
#include "../serialize.h"
|
|
#include "kernel.h"
|
|
#include "../array.h"
|
|
#include "kcentroid.h"
|
|
#include "kkmeans_abstract.h"
|
|
#include "../noncopyable.h"
|
|
#include "../smart_pointers.h"
|
|
#include <vector>
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
template <
|
|
typename kernel_type
|
|
>
|
|
class kkmeans : public noncopyable
|
|
{
|
|
public:
|
|
typedef typename kernel_type::scalar_type scalar_type;
|
|
typedef typename kernel_type::sample_type sample_type;
|
|
typedef typename kernel_type::mem_manager_type mem_manager_type;
|
|
|
|
kkmeans (
|
|
const kcentroid<kernel_type>& kc_
|
|
):
|
|
kc(kc_),
|
|
min_change(0.01)
|
|
{
|
|
set_number_of_centers(1);
|
|
}
|
|
|
|
~kkmeans()
|
|
{
|
|
}
|
|
|
|
const kernel_type& get_kernel (
|
|
) const
|
|
{
|
|
return kc.get_kernel();
|
|
}
|
|
|
|
void set_kcentroid (
|
|
const kcentroid<kernel_type>& kc_
|
|
)
|
|
{
|
|
kc = kc_;
|
|
set_number_of_centers(number_of_centers());
|
|
}
|
|
|
|
const kcentroid<kernel_type>& get_kcentroid (
|
|
unsigned long i
|
|
) const
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(i < number_of_centers(),
|
|
"\tkcentroid kkmeans::get_kcentroid(i)"
|
|
<< "\n\tYou have given an invalid value for i"
|
|
<< "\n\ti: " << i
|
|
<< "\n\tnumber_of_centers(): " << number_of_centers()
|
|
<< "\n\tthis: " << this
|
|
);
|
|
|
|
return *centers[i];
|
|
}
|
|
|
|
void set_number_of_centers (
|
|
unsigned long num
|
|
)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(num > 0,
|
|
"\tvoid kkmeans::set_number_of_centers()"
|
|
<< "\n\tYou can't set the number of centers to zero"
|
|
<< "\n\tthis: " << this
|
|
);
|
|
|
|
centers.set_max_size(num);
|
|
centers.set_size(num);
|
|
|
|
for (unsigned long i = 0; i < centers.size(); ++i)
|
|
{
|
|
centers[i].reset(new kcentroid<kernel_type>(kc));
|
|
}
|
|
}
|
|
|
|
unsigned long number_of_centers (
|
|
) const
|
|
{
|
|
return centers.size();
|
|
}
|
|
|
|
template <typename T, typename U>
|
|
void train (
|
|
const T& samples,
|
|
const U& initial_centers,
|
|
long max_iter = 1000
|
|
)
|
|
{
|
|
do_train(mat(samples),mat(initial_centers),max_iter);
|
|
}
|
|
|
|
unsigned long operator() (
|
|
const sample_type& sample
|
|
) const
|
|
{
|
|
unsigned long label = 0;
|
|
scalar_type best_score = (*centers[0])(sample);
|
|
|
|
// figure out which center the given sample is closest too
|
|
for (unsigned long i = 1; i < centers.size(); ++i)
|
|
{
|
|
scalar_type temp = (*centers[i])(sample);
|
|
if (temp < best_score)
|
|
{
|
|
label = i;
|
|
best_score = temp;
|
|
}
|
|
}
|
|
|
|
return label;
|
|
}
|
|
|
|
void set_min_change (
|
|
scalar_type min_change_
|
|
)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT( 0 <= min_change_ < 1,
|
|
"\tvoid kkmeans::set_min_change()"
|
|
<< "\n\tInvalid arguments to this function"
|
|
<< "\n\tthis: " << this
|
|
<< "\n\tmin_change_: " << min_change_
|
|
);
|
|
min_change = min_change_;
|
|
}
|
|
|
|
const scalar_type get_min_change (
|
|
) const
|
|
{
|
|
return min_change;
|
|
}
|
|
|
|
void swap (
|
|
kkmeans& item
|
|
)
|
|
{
|
|
centers.swap(item.centers);
|
|
kc.swap(item.kc);
|
|
assignments.swap(item.assignments);
|
|
exchange(min_change, item.min_change);
|
|
}
|
|
|
|
friend void serialize(const kkmeans& item, std::ostream& out)
|
|
{
|
|
serialize(item.centers.size(),out);
|
|
for (unsigned long i = 0; i < item.centers.size(); ++i)
|
|
{
|
|
serialize(*item.centers[i], out);
|
|
}
|
|
serialize(item.kc, out);
|
|
serialize(item.min_change, out);
|
|
}
|
|
|
|
friend void deserialize(kkmeans& item, std::istream& in)
|
|
{
|
|
unsigned long num;
|
|
deserialize(num, in);
|
|
item.centers.resize(num);
|
|
for (unsigned long i = 0; i < item.centers.size(); ++i)
|
|
{
|
|
scoped_ptr<kcentroid<kernel_type> > temp(new kcentroid<kernel_type>(kernel_type()));
|
|
deserialize(*temp, in);
|
|
item.centers[i].swap(temp);
|
|
}
|
|
|
|
deserialize(item.kc, in);
|
|
deserialize(item.min_change, in);
|
|
}
|
|
|
|
private:
|
|
|
|
template <typename matrix_type, typename matrix_type2>
|
|
void do_train (
|
|
const matrix_type& samples,
|
|
const matrix_type2& initial_centers,
|
|
long max_iter = 1000
|
|
)
|
|
{
|
|
COMPILE_TIME_ASSERT((is_same_type<typename matrix_type::type, sample_type>::value));
|
|
COMPILE_TIME_ASSERT((is_same_type<typename matrix_type2::type, sample_type>::value));
|
|
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(samples.nc() == 1 && initial_centers.nc() == 1 &&
|
|
initial_centers.nr() == static_cast<long>(number_of_centers()),
|
|
"\tvoid kkmeans::train()"
|
|
<< "\n\tInvalid arguments to this function"
|
|
<< "\n\tthis: " << this
|
|
<< "\n\tsamples.nc(): " << samples.nc()
|
|
<< "\n\tinitial_centers.nc(): " << initial_centers.nc()
|
|
<< "\n\tinitial_centers.nr(): " << initial_centers.nr()
|
|
);
|
|
|
|
// clear out the old data and initialize the centers
|
|
for (unsigned long i = 0; i < centers.size(); ++i)
|
|
{
|
|
centers[i]->clear_dictionary();
|
|
centers[i]->train(initial_centers(i));
|
|
}
|
|
|
|
assignments.resize(samples.size());
|
|
|
|
bool assignment_changed = true;
|
|
|
|
// loop until the centers stabilize
|
|
long count = 0;
|
|
const unsigned long min_num_change = static_cast<unsigned long>(min_change*samples.size());
|
|
unsigned long num_changed = min_num_change;
|
|
while (assignment_changed && count < max_iter && num_changed >= min_num_change)
|
|
{
|
|
++count;
|
|
assignment_changed = false;
|
|
num_changed = 0;
|
|
|
|
// loop over all the samples and assign them to their closest centers
|
|
for (long i = 0; i < samples.size(); ++i)
|
|
{
|
|
// find the best center
|
|
unsigned long best_center = 0;
|
|
scalar_type best_score = (*centers[0])(samples(i));
|
|
for (unsigned long c = 1; c < centers.size(); ++c)
|
|
{
|
|
scalar_type temp = (*centers[c])(samples(i));
|
|
if (temp < best_score)
|
|
{
|
|
best_score = temp;
|
|
best_center = c;
|
|
}
|
|
}
|
|
|
|
// if the current sample changed centers then make note of that
|
|
if (assignments[i] != best_center)
|
|
{
|
|
assignments[i] = best_center;
|
|
assignment_changed = true;
|
|
++num_changed;
|
|
}
|
|
}
|
|
|
|
if (assignment_changed)
|
|
{
|
|
// now clear out the old data
|
|
for (unsigned long i = 0; i < centers.size(); ++i)
|
|
centers[i]->clear_dictionary();
|
|
|
|
// recalculate the cluster centers
|
|
for (unsigned long i = 0; i < assignments.size(); ++i)
|
|
centers[assignments[i]]->train(samples(i));
|
|
}
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
array<scoped_ptr<kcentroid<kernel_type> > > centers;
|
|
kcentroid<kernel_type> kc;
|
|
scalar_type min_change;
|
|
|
|
// temp variables
|
|
array<unsigned long> assignments;
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <typename kernel_type>
|
|
void swap(kkmeans<kernel_type>& a, kkmeans<kernel_type>& b)
|
|
{ a.swap(b); }
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
struct dlib_pick_initial_centers_data
|
|
{
|
|
dlib_pick_initial_centers_data():idx(0), dist(1e200){}
|
|
long idx;
|
|
double dist;
|
|
bool operator< (const dlib_pick_initial_centers_data& d) const { return dist < d.dist; }
|
|
};
|
|
|
|
template <
|
|
typename vector_type1,
|
|
typename vector_type2,
|
|
typename kernel_type
|
|
>
|
|
void pick_initial_centers(
|
|
long num_centers,
|
|
vector_type1& centers,
|
|
const vector_type2& samples,
|
|
const kernel_type& k,
|
|
double percentile = 0.01
|
|
)
|
|
{
|
|
/*
|
|
This function is basically just a non-randomized version of the kmeans++ algorithm
|
|
described in the paper:
|
|
kmeans++: The Advantages of Careful Seeding by Arthur and Vassilvitskii
|
|
|
|
*/
|
|
|
|
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(num_centers > 1 && 0 <= percentile && percentile < 1 && samples.size() > 1,
|
|
"\tvoid pick_initial_centers()"
|
|
<< "\n\tYou passed invalid arguments to this function"
|
|
<< "\n\tnum_centers: " << num_centers
|
|
<< "\n\tpercentile: " << percentile
|
|
<< "\n\tsamples.size(): " << samples.size()
|
|
);
|
|
|
|
std::vector<dlib_pick_initial_centers_data> scores(samples.size());
|
|
std::vector<dlib_pick_initial_centers_data> scores_sorted(samples.size());
|
|
centers.clear();
|
|
|
|
// pick the first sample as one of the centers
|
|
centers.push_back(samples[0]);
|
|
|
|
const long best_idx = static_cast<long>(samples.size() - samples.size()*percentile - 1);
|
|
|
|
// pick the next center
|
|
for (long i = 0; i < num_centers-1; ++i)
|
|
{
|
|
// Loop over the samples and compare them to the most recent center. Store
|
|
// the distance from each sample to its closest center in scores.
|
|
const double k_cc = k(centers[i], centers[i]);
|
|
for (unsigned long s = 0; s < samples.size(); ++s)
|
|
{
|
|
// compute the distance between this sample and the current center
|
|
const double dist = k_cc + k(samples[s],samples[s]) - 2*k(samples[s], centers[i]);
|
|
|
|
if (dist < scores[s].dist)
|
|
{
|
|
scores[s].dist = dist;
|
|
scores[s].idx = s;
|
|
}
|
|
}
|
|
|
|
scores_sorted = scores;
|
|
|
|
// now find the winning center and add it to centers. It is the one that is
|
|
// far away from all the other centers.
|
|
sort(scores_sorted.begin(), scores_sorted.end());
|
|
centers.push_back(samples[scores_sorted[best_idx].idx]);
|
|
}
|
|
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename vector_type1,
|
|
typename vector_type2
|
|
>
|
|
void pick_initial_centers(
|
|
long num_centers,
|
|
vector_type1& centers,
|
|
const vector_type2& samples,
|
|
double percentile = 0.01
|
|
)
|
|
{
|
|
typedef typename vector_type1::value_type sample_type;
|
|
linear_kernel<sample_type> kern;
|
|
pick_initial_centers(num_centers, centers, samples, kern, percentile);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename array_type,
|
|
typename sample_type,
|
|
typename alloc
|
|
>
|
|
void find_clusters_using_kmeans (
|
|
const array_type& samples,
|
|
std::vector<sample_type, alloc>& centers,
|
|
unsigned long max_iter = 1000
|
|
)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(samples.size() > 0 && centers.size() > 0,
|
|
"\tvoid find_clusters_using_kmeans()"
|
|
<< "\n\tYou passed invalid arguments to this function"
|
|
<< "\n\t samples.size(): " << samples.size()
|
|
<< "\n\t centers.size(): " << centers.size()
|
|
);
|
|
|
|
#ifdef ENABLE_ASSERTS
|
|
{
|
|
const long nr = samples[0].nr();
|
|
const long nc = samples[0].nc();
|
|
for (unsigned long i = 0; i < samples.size(); ++i)
|
|
{
|
|
DLIB_ASSERT(is_vector(samples[i]) && samples[i].nr() == nr && samples[i].nc() == nc,
|
|
"\tvoid find_clusters_using_kmeans()"
|
|
<< "\n\t You passed invalid arguments to this function"
|
|
<< "\n\t is_vector(samples[i]): " << is_vector(samples[i])
|
|
<< "\n\t samples[i].nr(): " << samples[i].nr()
|
|
<< "\n\t nr: " << nr
|
|
<< "\n\t samples[i].nc(): " << samples[i].nc()
|
|
<< "\n\t nc: " << nc
|
|
<< "\n\t i: " << i
|
|
);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
typedef typename sample_type::type scalar_type;
|
|
|
|
sample_type zero(centers[0]);
|
|
set_all_elements(zero, 0);
|
|
|
|
std::vector<unsigned long, alloc> center_element_count;
|
|
|
|
// tells which center a sample belongs to
|
|
std::vector<unsigned long, alloc> assignments(samples.size(), samples.size());
|
|
|
|
|
|
unsigned long iter = 0;
|
|
bool centers_changed = true;
|
|
while (centers_changed && iter < max_iter)
|
|
{
|
|
++iter;
|
|
centers_changed = false;
|
|
center_element_count.assign(centers.size(), 0);
|
|
|
|
// loop over each sample and see which center it is closest to
|
|
for (unsigned long i = 0; i < samples.size(); ++i)
|
|
{
|
|
// find the best center for sample[i]
|
|
scalar_type best_dist = std::numeric_limits<scalar_type>::max();
|
|
unsigned long best_center = 0;
|
|
for (unsigned long j = 0; j < centers.size(); ++j)
|
|
{
|
|
scalar_type dist = length(centers[j] - samples[i]);
|
|
if (dist < best_dist)
|
|
{
|
|
best_dist = dist;
|
|
best_center = j;
|
|
}
|
|
}
|
|
|
|
if (assignments[i] != best_center)
|
|
{
|
|
centers_changed = true;
|
|
assignments[i] = best_center;
|
|
}
|
|
|
|
center_element_count[best_center] += 1;
|
|
}
|
|
|
|
// now update all the centers
|
|
centers.assign(centers.size(), zero);
|
|
for (unsigned long i = 0; i < samples.size(); ++i)
|
|
{
|
|
centers[assignments[i]] += samples[i];
|
|
}
|
|
for (unsigned long i = 0; i < centers.size(); ++i)
|
|
{
|
|
if (center_element_count[i] != 0)
|
|
centers[i] /= center_element_count[i];
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename array_type,
|
|
typename EXP
|
|
>
|
|
unsigned long nearest_center (
|
|
const array_type& centers,
|
|
const matrix_exp<EXP>& sample
|
|
)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(centers.size() > 0 && sample.size() > 0 && is_vector(sample),
|
|
"\t unsigned long nearest_center()"
|
|
<< "\n\t You have given invalid inputs to this function."
|
|
<< "\n\t centers.size(): " << centers.size()
|
|
<< "\n\t sample.size(): " << sample.size()
|
|
<< "\n\t is_vector(sample): " << is_vector(sample)
|
|
);
|
|
|
|
double best_dist = length_squared(centers[0] - sample);
|
|
unsigned long best_idx = 0;
|
|
for (unsigned long i = 1; i < centers.size(); ++i)
|
|
{
|
|
const double dist = length_squared(centers[i] - sample);
|
|
if (dist < best_dist)
|
|
{
|
|
best_dist = dist;
|
|
best_idx = i;
|
|
}
|
|
}
|
|
return best_idx;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_KKMEANs_
|
|
|
|
|