mirror of
https://gitcode.com/gh_mirrors/ope/OpenFace.git
synced 2026-05-23 15:47:53 +00:00
Master commit of OpenFace.
This commit is contained in:
515
lib/3rdParty/dlib/include/dlib/clustering/modularity_clustering.h
vendored
Normal file
515
lib/3rdParty/dlib/include/dlib/clustering/modularity_clustering.h
vendored
Normal file
@@ -0,0 +1,515 @@
|
||||
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_MODULARITY_ClUSTERING__H__
|
||||
#define DLIB_MODULARITY_ClUSTERING__H__
|
||||
|
||||
#include "modularity_clustering_abstract.h"
|
||||
#include "../sparse_vector.h"
|
||||
#include "../graph_utils/edge_list_graphs.h"
|
||||
#include "../matrix.h"
|
||||
#include "../rand.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// -----------------------------------------------------------------------------------------
|
||||
|
||||
namespace impl
|
||||
{
|
||||
inline double newman_cluster_split (
|
||||
dlib::rand& rnd,
|
||||
const std::vector<ordered_sample_pair>& edges,
|
||||
const matrix<double,0,1>& node_degrees, // k from the Newman paper
|
||||
const matrix<double,0,1>& Bdiag, // diag(B) from the Newman paper
|
||||
const double& edge_sum, // m from the Newman paper
|
||||
matrix<double,0,1>& labels,
|
||||
const double eps,
|
||||
const unsigned long max_iterations
|
||||
)
|
||||
/*!
|
||||
requires
|
||||
- node_degrees.size() == max_index_plus_one(edges)
|
||||
- Bdiag.size() == max_index_plus_one(edges)
|
||||
- edges must be sorted according to order_by_index()
|
||||
ensures
|
||||
- This routine splits a graph into two subgraphs using the Newman
|
||||
clustering method.
|
||||
- returns the modularity obtained when the graph is split according
|
||||
to the contents of #labels.
|
||||
- #labels.size() == node_degrees.size()
|
||||
- for all valid i: #labels(i) == -1 or +1
|
||||
- if (this function returns 0) then
|
||||
- all the labels are equal, i.e. the graph is not split.
|
||||
!*/
|
||||
{
|
||||
// Scale epsilon so that it is relative to the expected value of an element of a
|
||||
// unit vector of length node_degrees.size().
|
||||
const double power_iter_eps = eps * std::sqrt(1.0/node_degrees.size());
|
||||
|
||||
// Make a random unit vector and put in labels.
|
||||
labels.set_size(node_degrees.size());
|
||||
for (long i = 0; i < labels.size(); ++i)
|
||||
labels(i) = rnd.get_random_gaussian();
|
||||
labels /= length(labels);
|
||||
|
||||
matrix<double,0,1> Bv, Bv_unit;
|
||||
|
||||
// Do the power iteration for a while.
|
||||
double eig = -1;
|
||||
double offset = 0;
|
||||
while (eig < 0)
|
||||
{
|
||||
|
||||
// any number larger than power_iter_eps
|
||||
double iteration_change = power_iter_eps*2+1;
|
||||
for (unsigned long i = 0; i < max_iterations && iteration_change > power_iter_eps; ++i)
|
||||
{
|
||||
sparse_matrix_vector_multiply(edges, labels, Bv);
|
||||
Bv -= dot(node_degrees, labels)/(2*edge_sum) * node_degrees;
|
||||
|
||||
if (offset != 0)
|
||||
{
|
||||
Bv -= offset*labels;
|
||||
}
|
||||
|
||||
|
||||
const double len = length(Bv);
|
||||
if (len != 0)
|
||||
{
|
||||
Bv_unit = Bv/len;
|
||||
iteration_change = max(abs(labels-Bv_unit));
|
||||
labels.swap(Bv_unit);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Had a bad time, pick another random vector and try it with the
|
||||
// power iteration.
|
||||
for (long i = 0; i < labels.size(); ++i)
|
||||
labels(i) = rnd.get_random_gaussian();
|
||||
}
|
||||
}
|
||||
|
||||
eig = dot(Bv,labels);
|
||||
// we will repeat this loop if the largest eigenvalue is negative
|
||||
offset = eig;
|
||||
}
|
||||
|
||||
|
||||
for (long i = 0; i < labels.size(); ++i)
|
||||
{
|
||||
if (labels(i) > 0)
|
||||
labels(i) = 1;
|
||||
else
|
||||
labels(i) = -1;
|
||||
}
|
||||
|
||||
|
||||
// compute B*labels, store result in Bv.
|
||||
sparse_matrix_vector_multiply(edges, labels, Bv);
|
||||
Bv -= dot(node_degrees, labels)/(2*edge_sum) * node_degrees;
|
||||
|
||||
// Do some label refinement. In this step we swap labels if it
|
||||
// improves the modularity score.
|
||||
bool flipped_label = true;
|
||||
while(flipped_label)
|
||||
{
|
||||
flipped_label = false;
|
||||
unsigned long idx = 0;
|
||||
for (long i = 0; i < labels.size(); ++i)
|
||||
{
|
||||
const double val = -2*labels(i);
|
||||
const double increase = 4*Bdiag(i) + 2*val*Bv(i);
|
||||
|
||||
// if there is an increase in modularity for swapping this label
|
||||
if (increase > 0)
|
||||
{
|
||||
labels(i) *= -1;
|
||||
while (idx < edges.size() && edges[idx].index1() == (unsigned long)i)
|
||||
{
|
||||
const long j = edges[idx].index2();
|
||||
Bv(j) += val*edges[idx].distance();
|
||||
++idx;
|
||||
}
|
||||
|
||||
Bv -= (val*node_degrees(i)/(2*edge_sum))*node_degrees;
|
||||
|
||||
flipped_label = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
while (idx < edges.size() && edges[idx].index1() == (unsigned long)i)
|
||||
{
|
||||
++idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const double modularity = dot(Bv, labels)/(4*edge_sum);
|
||||
|
||||
return modularity;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
inline unsigned long newman_cluster_helper (
|
||||
dlib::rand& rnd,
|
||||
const std::vector<ordered_sample_pair>& edges,
|
||||
const matrix<double,0,1>& node_degrees, // k from the Newman paper
|
||||
const matrix<double,0,1>& Bdiag, // diag(B) from the Newman paper
|
||||
const double& edge_sum, // m from the Newman paper
|
||||
std::vector<unsigned long>& labels,
|
||||
double modularity_threshold,
|
||||
const double eps,
|
||||
const unsigned long max_iterations
|
||||
)
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of clusters the data was split into
|
||||
!*/
|
||||
{
|
||||
matrix<double,0,1> l;
|
||||
const double modularity = newman_cluster_split(rnd,edges,node_degrees,Bdiag,edge_sum,l,eps,max_iterations);
|
||||
|
||||
|
||||
// We need to collapse the node index values down to contiguous values. So
|
||||
// we use the following two vectors to contain the mappings from input index
|
||||
// values to their corresponding index values in each split.
|
||||
std::vector<unsigned long> left_idx_map(node_degrees.size());
|
||||
std::vector<unsigned long> right_idx_map(node_degrees.size());
|
||||
|
||||
// figure out how many nodes went into each side of the split.
|
||||
unsigned long num_left_split = 0;
|
||||
unsigned long num_right_split = 0;
|
||||
for (long i = 0; i < l.size(); ++i)
|
||||
{
|
||||
if (l(i) > 0)
|
||||
{
|
||||
left_idx_map[i] = num_left_split;
|
||||
++num_left_split;
|
||||
}
|
||||
else
|
||||
{
|
||||
right_idx_map[i] = num_right_split;
|
||||
++num_right_split;
|
||||
}
|
||||
}
|
||||
|
||||
// do a recursive split if it will improve the modularity.
|
||||
if (modularity > modularity_threshold && num_left_split > 0 && num_right_split > 0)
|
||||
{
|
||||
|
||||
// split the node_degrees and Bdiag matrices into left and right split parts
|
||||
matrix<double,0,1> left_node_degrees(num_left_split);
|
||||
matrix<double,0,1> right_node_degrees(num_right_split);
|
||||
matrix<double,0,1> left_Bdiag(num_left_split);
|
||||
matrix<double,0,1> right_Bdiag(num_right_split);
|
||||
for (long i = 0; i < l.size(); ++i)
|
||||
{
|
||||
if (l(i) > 0)
|
||||
{
|
||||
left_node_degrees(left_idx_map[i]) = node_degrees(i);
|
||||
left_Bdiag(left_idx_map[i]) = Bdiag(i);
|
||||
}
|
||||
else
|
||||
{
|
||||
right_node_degrees(right_idx_map[i]) = node_degrees(i);
|
||||
right_Bdiag(right_idx_map[i]) = Bdiag(i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// put the edges from one side of the split into split_edges
|
||||
std::vector<ordered_sample_pair> split_edges;
|
||||
modularity_threshold = 0;
|
||||
for (unsigned long k = 0; k < edges.size(); ++k)
|
||||
{
|
||||
const unsigned long i = edges[k].index1();
|
||||
const unsigned long j = edges[k].index2();
|
||||
const double d = edges[k].distance();
|
||||
if (l(i) > 0 && l(j) > 0)
|
||||
{
|
||||
split_edges.push_back(ordered_sample_pair(left_idx_map[i], left_idx_map[j], d));
|
||||
modularity_threshold += d;
|
||||
}
|
||||
}
|
||||
modularity_threshold -= sum(left_node_degrees*sum(left_node_degrees))/(2*edge_sum);
|
||||
modularity_threshold /= 4*edge_sum;
|
||||
|
||||
unsigned long num_left_clusters;
|
||||
std::vector<unsigned long> left_labels;
|
||||
num_left_clusters = newman_cluster_helper(rnd,split_edges,left_node_degrees,left_Bdiag,
|
||||
edge_sum,left_labels,modularity_threshold,
|
||||
eps, max_iterations);
|
||||
|
||||
// now load the other side into split_edges and cluster it as well
|
||||
split_edges.clear();
|
||||
modularity_threshold = 0;
|
||||
for (unsigned long k = 0; k < edges.size(); ++k)
|
||||
{
|
||||
const unsigned long i = edges[k].index1();
|
||||
const unsigned long j = edges[k].index2();
|
||||
const double d = edges[k].distance();
|
||||
if (l(i) < 0 && l(j) < 0)
|
||||
{
|
||||
split_edges.push_back(ordered_sample_pair(right_idx_map[i], right_idx_map[j], d));
|
||||
modularity_threshold += d;
|
||||
}
|
||||
}
|
||||
modularity_threshold -= sum(right_node_degrees*sum(right_node_degrees))/(2*edge_sum);
|
||||
modularity_threshold /= 4*edge_sum;
|
||||
|
||||
unsigned long num_right_clusters;
|
||||
std::vector<unsigned long> right_labels;
|
||||
num_right_clusters = newman_cluster_helper(rnd,split_edges,right_node_degrees,right_Bdiag,
|
||||
edge_sum,right_labels,modularity_threshold,
|
||||
eps, max_iterations);
|
||||
|
||||
// Now merge the labels from the two splits.
|
||||
labels.resize(node_degrees.size());
|
||||
for (unsigned long i = 0; i < labels.size(); ++i)
|
||||
{
|
||||
// if this node was in the left split
|
||||
if (l(i) > 0)
|
||||
{
|
||||
labels[i] = left_labels[left_idx_map[i]];
|
||||
}
|
||||
else // if this node was in the right split
|
||||
{
|
||||
labels[i] = right_labels[right_idx_map[i]] + num_left_clusters;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return num_left_clusters + num_right_clusters;
|
||||
}
|
||||
else
|
||||
{
|
||||
labels.assign(node_degrees.size(),0);
|
||||
return 1;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline unsigned long newman_cluster (
|
||||
const std::vector<ordered_sample_pair>& edges,
|
||||
std::vector<unsigned long>& labels,
|
||||
const double eps = 1e-4,
|
||||
const unsigned long max_iterations = 2000
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(is_ordered_by_index(edges),
|
||||
"\t unsigned long newman_cluster()"
|
||||
<< "\n\t Invalid inputs were given to this function"
|
||||
);
|
||||
|
||||
labels.clear();
|
||||
if (edges.size() == 0)
|
||||
return 0;
|
||||
|
||||
const unsigned long num_nodes = max_index_plus_one(edges);
|
||||
|
||||
// compute the node_degrees vector, edge_sum value, and diag(B).
|
||||
matrix<double,0,1> node_degrees(num_nodes);
|
||||
matrix<double,0,1> Bdiag(num_nodes);
|
||||
Bdiag = 0;
|
||||
double edge_sum = 0;
|
||||
node_degrees = 0;
|
||||
for (unsigned long i = 0; i < edges.size(); ++i)
|
||||
{
|
||||
node_degrees(edges[i].index1()) += edges[i].distance();
|
||||
edge_sum += edges[i].distance();
|
||||
if (edges[i].index1() == edges[i].index2())
|
||||
Bdiag(edges[i].index1()) += edges[i].distance();
|
||||
}
|
||||
edge_sum /= 2;
|
||||
Bdiag -= squared(node_degrees)/(2*edge_sum);
|
||||
|
||||
|
||||
dlib::rand rnd;
|
||||
return impl::newman_cluster_helper(rnd,edges,node_degrees,Bdiag,edge_sum,labels,0,eps,max_iterations);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline unsigned long newman_cluster (
|
||||
const std::vector<sample_pair>& edges,
|
||||
std::vector<unsigned long>& labels,
|
||||
const double eps = 1e-4,
|
||||
const unsigned long max_iterations = 2000
|
||||
)
|
||||
{
|
||||
std::vector<ordered_sample_pair> oedges;
|
||||
convert_unordered_to_ordered(edges, oedges);
|
||||
std::sort(oedges.begin(), oedges.end(), &order_by_index<ordered_sample_pair>);
|
||||
|
||||
return newman_cluster(oedges, labels, eps, max_iterations);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace impl
|
||||
{
|
||||
inline std::vector<unsigned long> remap_labels (
|
||||
const std::vector<unsigned long>& labels,
|
||||
unsigned long& num_labels
|
||||
)
|
||||
/*!
|
||||
ensures
|
||||
- This function takes labels and produces a mapping which maps elements of
|
||||
labels into the most compact range in [0, max] as possible. In particular,
|
||||
there won't be any unused integers in the mapped range.
|
||||
- #num_labels == the number of distinct values in labels.
|
||||
- returns a vector V such that:
|
||||
- V.size() == labels.size()
|
||||
- max(mat(V))+1 == num_labels.
|
||||
- for all valid i,j:
|
||||
- if (labels[i] == labels[j]) then
|
||||
- V[i] == V[j]
|
||||
- else
|
||||
- V[i] != V[j]
|
||||
!*/
|
||||
{
|
||||
std::map<unsigned long, unsigned long> temp;
|
||||
for (unsigned long i = 0; i < labels.size(); ++i)
|
||||
{
|
||||
if (temp.count(labels[i]) == 0)
|
||||
{
|
||||
const unsigned long next = temp.size();
|
||||
temp[labels[i]] = next;
|
||||
}
|
||||
}
|
||||
|
||||
num_labels = temp.size();
|
||||
|
||||
std::vector<unsigned long> result(labels.size());
|
||||
for (unsigned long i = 0; i < labels.size(); ++i)
|
||||
{
|
||||
result[i] = temp[labels[i]];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline double modularity (
|
||||
const std::vector<sample_pair>& edges,
|
||||
const std::vector<unsigned long>& labels
|
||||
)
|
||||
{
|
||||
const unsigned long num_nodes = max_index_plus_one(edges);
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(labels.size() == num_nodes,
|
||||
"\t double modularity()"
|
||||
<< "\n\t Invalid inputs were given to this function"
|
||||
);
|
||||
|
||||
unsigned long num_labels;
|
||||
const std::vector<unsigned long>& labels_ = dlib::impl::remap_labels(labels,num_labels);
|
||||
|
||||
std::vector<double> cluster_sums(num_labels,0);
|
||||
std::vector<double> k(num_nodes,0);
|
||||
|
||||
double Q = 0;
|
||||
double m = 0;
|
||||
for (unsigned long i = 0; i < edges.size(); ++i)
|
||||
{
|
||||
const unsigned long n1 = edges[i].index1();
|
||||
const unsigned long n2 = edges[i].index2();
|
||||
k[n1] += edges[i].distance();
|
||||
if (n1 != n2)
|
||||
k[n2] += edges[i].distance();
|
||||
|
||||
if (n1 != n2)
|
||||
m += edges[i].distance();
|
||||
else
|
||||
m += edges[i].distance()/2;
|
||||
|
||||
if (labels_[n1] == labels_[n2])
|
||||
{
|
||||
if (n1 != n2)
|
||||
Q += 2*edges[i].distance();
|
||||
else
|
||||
Q += edges[i].distance();
|
||||
}
|
||||
}
|
||||
|
||||
if (m == 0)
|
||||
return 0;
|
||||
|
||||
for (unsigned long i = 0; i < labels_.size(); ++i)
|
||||
{
|
||||
cluster_sums[labels_[i]] += k[i];
|
||||
}
|
||||
|
||||
for (unsigned long i = 0; i < labels_.size(); ++i)
|
||||
{
|
||||
Q -= k[i]*cluster_sums[labels_[i]]/(2*m);
|
||||
}
|
||||
|
||||
return 1.0/(2*m)*Q;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline double modularity (
|
||||
const std::vector<ordered_sample_pair>& edges,
|
||||
const std::vector<unsigned long>& labels
|
||||
)
|
||||
{
|
||||
const unsigned long num_nodes = max_index_plus_one(edges);
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(labels.size() == num_nodes,
|
||||
"\t double modularity()"
|
||||
<< "\n\t Invalid inputs were given to this function"
|
||||
);
|
||||
|
||||
|
||||
unsigned long num_labels;
|
||||
const std::vector<unsigned long>& labels_ = dlib::impl::remap_labels(labels,num_labels);
|
||||
|
||||
std::vector<double> cluster_sums(num_labels,0);
|
||||
std::vector<double> k(num_nodes,0);
|
||||
|
||||
double Q = 0;
|
||||
double m = 0;
|
||||
for (unsigned long i = 0; i < edges.size(); ++i)
|
||||
{
|
||||
const unsigned long n1 = edges[i].index1();
|
||||
const unsigned long n2 = edges[i].index2();
|
||||
k[n1] += edges[i].distance();
|
||||
m += edges[i].distance();
|
||||
if (labels_[n1] == labels_[n2])
|
||||
{
|
||||
Q += edges[i].distance();
|
||||
}
|
||||
}
|
||||
|
||||
if (m == 0)
|
||||
return 0;
|
||||
|
||||
for (unsigned long i = 0; i < labels_.size(); ++i)
|
||||
{
|
||||
cluster_sums[labels_[i]] += k[i];
|
||||
}
|
||||
|
||||
for (unsigned long i = 0; i < labels_.size(); ++i)
|
||||
{
|
||||
Q -= k[i]*cluster_sums[labels_[i]]/m;
|
||||
}
|
||||
|
||||
return 1.0/m*Q;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_MODULARITY_ClUSTERING__H__
|
||||
|
||||
Reference in New Issue
Block a user