Master commit of OpenFace.

This commit is contained in:
unknown
2016-04-28 15:40:36 -04:00
parent 5346d303ab
commit 57e58a6949
4406 changed files with 1441342 additions and 0 deletions

View File

@@ -0,0 +1,593 @@
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_EDGE_LIST_GrAPHS_Hh_
#define DLIB_EDGE_LIST_GrAPHS_Hh_
#include "edge_list_graphs_abstract.h"
#include <limits>
#include <vector>
#include "../string.h"
#include "../rand.h"
#include <algorithm>
#include "sample_pair.h"
#include "ordered_sample_pair.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
void remove_duplicate_edges (
vector_type& pairs
)
{
typedef typename vector_type::value_type T;
if (pairs.size() > 0)
{
// sort pairs so that we can avoid duplicates in the loop below
std::sort(pairs.begin(), pairs.end(), &order_by_index<T>);
// now put edges into temp while avoiding duplicates
vector_type temp;
temp.reserve(pairs.size());
temp.push_back(pairs[0]);
for (unsigned long i = 1; i < pairs.size(); ++i)
{
if (pairs[i] != pairs[i-1])
{
temp.push_back(pairs[i]);
}
}
temp.swap(pairs);
}
}
// ----------------------------------------------------------------------------------------
namespace impl
{
template <typename iterator>
iterator iterator_of_worst (
iterator begin,
const iterator& end
)
/*!
ensures
- returns an iterator that points to the element in the given range
that has the biggest distance
!*/
{
double dist = begin->distance();
iterator worst = begin;
for (; begin != end; ++begin)
{
if (begin->distance() > dist)
{
dist = begin->distance();
worst = begin;
}
}
return worst;
}
}
// ----------------------------------------------------------------------------------------
template <
typename vector_type,
typename distance_function_type,
typename alloc,
typename T
>
void find_percent_shortest_edges_randomly (
const vector_type& samples,
const distance_function_type& dist_funct,
const double percent,
const unsigned long num,
const T& random_seed,
std::vector<sample_pair, alloc>& out
)
{
// make sure requires clause is not broken
DLIB_ASSERT( 0 < percent && percent <= 1 &&
num > 0,
"\t void find_percent_shortest_edges_randomly()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t samples.size(): " << samples.size()
<< "\n\t percent: " << percent
<< "\n\t num: " << num
);
out.clear();
if (samples.size() <= 1)
{
return;
}
std::vector<sample_pair, alloc> edges;
edges.reserve(num);
dlib::rand rnd;
rnd.set_seed(cast_to_string(random_seed));
// randomly sample a bunch of edges
for (unsigned long i = 0; i < num; ++i)
{
const unsigned long idx1 = rnd.get_random_32bit_number()%samples.size();
const unsigned long idx2 = rnd.get_random_32bit_number()%samples.size();
if (idx1 != idx2)
{
const double dist = dist_funct(samples[idx1], samples[idx2]);
if (dist < std::numeric_limits<double>::infinity())
{
edges.push_back(sample_pair(idx1, idx2, dist));
}
}
}
// now put edges into out while avoiding duplicates
if (edges.size() > 0)
{
remove_duplicate_edges(edges);
// now sort all the edges by distance and take the percent with the smallest distance
std::sort(edges.begin(), edges.end(), &order_by_distance<sample_pair>);
const unsigned long out_size = std::min<unsigned long>((unsigned long)(num*percent), edges.size());
out.assign(edges.begin(), edges.begin() + out_size);
}
}
// ----------------------------------------------------------------------------------------
template <
typename vector_type,
typename distance_function_type,
typename alloc,
typename T
>
void find_approximate_k_nearest_neighbors (
const vector_type& samples,
const distance_function_type& dist_funct,
const unsigned long k,
unsigned long num,
const T& random_seed,
std::vector<sample_pair, alloc>& out
)
{
// make sure requires clause is not broken
DLIB_ASSERT( num > 0 && k > 0,
"\t void find_approximate_k_nearest_neighbors()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t samples.size(): " << samples.size()
<< "\n\t k: " << k
<< "\n\t num: " << num
);
out.clear();
if (samples.size() <= 1)
{
return;
}
// we add each edge twice in the following loop. So multiply num by 2 to account for that.
num *= 2;
std::vector<ordered_sample_pair> edges;
edges.reserve(num);
std::vector<sample_pair, alloc> temp;
temp.reserve(num);
dlib::rand rnd;
rnd.set_seed(cast_to_string(random_seed));
// randomly sample a bunch of edges
for (unsigned long i = 0; i < num; ++i)
{
const unsigned long idx1 = rnd.get_random_32bit_number()%samples.size();
const unsigned long idx2 = rnd.get_random_32bit_number()%samples.size();
if (idx1 != idx2)
{
const double dist = dist_funct(samples[idx1], samples[idx2]);
if (dist < std::numeric_limits<double>::infinity())
{
edges.push_back(ordered_sample_pair(idx1, idx2, dist));
edges.push_back(ordered_sample_pair(idx2, idx1, dist));
}
}
}
std::sort(edges.begin(), edges.end(), &order_by_index<ordered_sample_pair>);
std::vector<ordered_sample_pair>::iterator beg, itr;
// now copy edges into temp when they aren't duplicates and also only move in the k shortest for
// each index.
itr = edges.begin();
while (itr != edges.end())
{
// first find the bounding range for all the edges connected to node itr->index1()
beg = itr;
while (itr != edges.end() && itr->index1() == beg->index1())
++itr;
// If the node has more than k edges then sort them by distance so that
// we will end up with the k best.
if (static_cast<unsigned long>(itr - beg) > k)
{
std::sort(beg, itr, &order_by_distance_and_index<ordered_sample_pair>);
}
// take the k best unique edges from the range [beg,itr)
temp.push_back(sample_pair(beg->index1(), beg->index2(), beg->distance()));
unsigned long prev_index2 = beg->index2();
++beg;
unsigned long count = 1;
for (; beg != itr && count < k; ++beg)
{
if (beg->index2() != prev_index2)
{
temp.push_back(sample_pair(beg->index1(), beg->index2(), beg->distance()));
++count;
}
prev_index2 = beg->index2();
}
}
remove_duplicate_edges(temp);
temp.swap(out);
}
// ----------------------------------------------------------------------------------------
template <
typename vector_type,
typename distance_function_type,
typename alloc
>
void find_k_nearest_neighbors (
const vector_type& samples,
const distance_function_type& dist_funct,
const unsigned long k,
std::vector<sample_pair, alloc>& out
)
{
// make sure requires clause is not broken
DLIB_ASSERT(k > 0,
"\t void find_k_nearest_neighbors()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t samples.size(): " << samples.size()
<< "\n\t k: " << k
);
out.clear();
if (samples.size() <= 1)
{
return;
}
using namespace impl;
std::vector<sample_pair> edges;
// Initialize all the edges to an edge with an invalid index
edges.resize(samples.size()*k,
sample_pair(samples.size(),samples.size(),std::numeric_limits<double>::infinity()));
// Hold the length for the longest edge for each node. Initially they are all infinity.
std::vector<double> worst_dists(samples.size(), std::numeric_limits<double>::infinity());
std::vector<sample_pair>::iterator begin_i, end_i, begin_j, end_j, itr;
begin_i = edges.begin();
end_i = begin_i + k;
// Loop over all combinations of samples. We will maintain the iterator ranges so that
// within the inner for loop we have:
// [begin_i, end_i) == the range in edges that contains neighbors of samples[i]
// [begin_j, end_j) == the range in edges that contains neighbors of samples[j]
for (unsigned long i = 0; i+1 < samples.size(); ++i)
{
begin_j = begin_i;
end_j = end_i;
for (unsigned long j = i+1; j < samples.size(); ++j)
{
begin_j += k;
end_j += k;
const double dist = dist_funct(samples[i], samples[j]);
if (dist < worst_dists[i])
{
*iterator_of_worst(begin_i, end_i) = sample_pair(i, j, dist);
worst_dists[i] = iterator_of_worst(begin_i, end_i)->distance();
}
if (dist < worst_dists[j])
{
*iterator_of_worst(begin_j, end_j) = sample_pair(i, j, dist);
worst_dists[j] = iterator_of_worst(begin_j, end_j)->distance();
}
}
begin_i += k;
end_i += k;
}
// sort the edges so that duplicate edges will be adjacent
std::sort(edges.begin(), edges.end(), &order_by_index<sample_pair>);
// if the first edge is valid
if (edges[0].index1() < samples.size())
{
// now put edges into out while avoiding duplicates and any remaining invalid edges.
out.reserve(edges.size());
out.push_back(edges[0]);
for (unsigned long i = 1; i < edges.size(); ++i)
{
// if we hit an invalid edge then we can stop
if (edges[i].index1() >= samples.size())
break;
// if this isn't a duplicate edge
if (edges[i] != edges[i-1])
{
out.push_back(edges[i]);
}
}
}
}
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
bool contains_duplicate_pairs (
const vector_type& pairs
)
{
typedef typename vector_type::value_type T;
vector_type temp(pairs);
std::sort(temp.begin(), temp.end(), &order_by_index<T>);
for (unsigned long i = 1; i < temp.size(); ++i)
{
// if we found a duplicate
if (temp[i-1] == temp[i])
return true;
}
return false;
}
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
typename enable_if_c<(is_same_type<sample_pair, typename vector_type::value_type>::value ||
is_same_type<ordered_sample_pair, typename vector_type::value_type>::value),
unsigned long>::type
max_index_plus_one (
const vector_type& pairs
)
{
if (pairs.size() == 0)
{
return 0;
}
else
{
unsigned long max_idx = 0;
for (unsigned long i = 0; i < pairs.size(); ++i)
{
if (pairs[i].index1() > max_idx)
max_idx = pairs[i].index1();
if (pairs[i].index2() > max_idx)
max_idx = pairs[i].index2();
}
return max_idx + 1;
}
}
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
void remove_long_edges (
vector_type& pairs,
double distance_threshold
)
{
vector_type temp;
temp.reserve(pairs.size());
// add all the pairs shorter than the given threshold into temp
for (unsigned long i = 0; i < pairs.size(); ++i)
{
if (pairs[i].distance() <= distance_threshold)
temp.push_back(pairs[i]);
}
// move temp into the output vector
temp.swap(pairs);
}
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
void remove_short_edges (
vector_type& pairs,
double distance_threshold
)
{
vector_type temp;
temp.reserve(pairs.size());
// add all the pairs longer than the given threshold into temp
for (unsigned long i = 0; i < pairs.size(); ++i)
{
if (pairs[i].distance() >= distance_threshold)
temp.push_back(pairs[i]);
}
// move temp into the output vector
temp.swap(pairs);
}
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
void remove_percent_longest_edges (
vector_type& pairs,
double percent
)
{
// make sure requires clause is not broken
DLIB_ASSERT( 0 <= percent && percent < 1,
"\t void remove_percent_longest_edges()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t percent: " << percent
);
typedef typename vector_type::value_type T;
std::sort(pairs.begin(), pairs.end(), &order_by_distance<T>);
const unsigned long num = static_cast<unsigned long>((1.0-percent)*pairs.size());
// pick out the num shortest pairs
vector_type temp(pairs.begin(), pairs.begin() + num);
// move temp into the output vector
temp.swap(pairs);
}
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
void remove_percent_shortest_edges (
vector_type& pairs,
double percent
)
{
// make sure requires clause is not broken
DLIB_ASSERT( 0 <= percent && percent < 1,
"\t void remove_percent_shortest_edges()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t percent: " << percent
);
typedef typename vector_type::value_type T;
std::sort(pairs.rbegin(), pairs.rend(), &order_by_distance<T>);
const unsigned long num = static_cast<unsigned long>((1.0-percent)*pairs.size());
// pick out the num shortest pairs
vector_type temp(pairs.begin(), pairs.begin() + num);
// move temp into the output vector
temp.swap(pairs);
}
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
bool is_ordered_by_index (
const vector_type& edges
)
{
for (unsigned long i = 1; i < edges.size(); ++i)
{
if (order_by_index(edges[i], edges[i-1]))
return false;
}
return true;
}
// ----------------------------------------------------------------------------------------
template <
typename alloc1,
typename alloc2
>
void find_neighbor_ranges (
const std::vector<ordered_sample_pair,alloc1>& edges,
std::vector<std::pair<unsigned long, unsigned long>,alloc2>& neighbors
)
{
// make sure requires clause is not broken
DLIB_ASSERT(is_ordered_by_index(edges),
"\t void find_neighbor_ranges()"
<< "\n\t Invalid inputs were given to this function"
);
// setup neighbors so that [neighbors[i].first, neighbors[i].second) is the range
// within edges that contains all node i's edges.
const unsigned long num_nodes = max_index_plus_one(edges);
neighbors.assign(num_nodes, std::make_pair(0,0));
unsigned long cur_node = 0;
unsigned long start_idx = 0;
for (unsigned long i = 0; i < edges.size(); ++i)
{
if (edges[i].index1() != cur_node)
{
neighbors[cur_node] = std::make_pair(start_idx, i);
start_idx = i;
cur_node = edges[i].index1();
}
}
if (neighbors.size() != 0)
neighbors[cur_node] = std::make_pair(start_idx, (unsigned long)edges.size());
}
// ----------------------------------------------------------------------------------------
template <
typename alloc1,
typename alloc2
>
void convert_unordered_to_ordered (
const std::vector<sample_pair,alloc1>& edges,
std::vector<ordered_sample_pair,alloc2>& out_edges
)
{
out_edges.clear();
out_edges.reserve(edges.size()*2);
for (unsigned long i = 0; i < edges.size(); ++i)
{
out_edges.push_back(ordered_sample_pair(edges[i].index1(), edges[i].index2(), edges[i].distance()));
if (edges[i].index1() != edges[i].index2())
out_edges.push_back(ordered_sample_pair(edges[i].index2(), edges[i].index1(), edges[i].distance()));
}
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_EDGE_LIST_GrAPHS_Hh_

View File

@@ -0,0 +1,358 @@
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_EDGE_LIST_GrAPHS_ABSTRACT_Hh_
#ifdef DLIB_EDGE_LIST_GrAPHS_ABSTRACT_Hh_
#include <vector>
#include "../string.h"
#include "sample_pair_abstract.h"
#include "ordered_sample_pair_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename vector_type,
typename distance_function_type,
typename alloc,
typename T
>
void find_percent_shortest_edges_randomly (
const vector_type& samples,
const distance_function_type& dist_funct,
const double percent,
const unsigned long num,
const T& random_seed,
std::vector<sample_pair, alloc>& out
);
/*!
requires
- 0 < percent <= 1
- num > 0
- random_seed must be convertible to a string by dlib::cast_to_string()
- dist_funct(samples[i], samples[j]) must be a valid expression that evaluates
to a floating point number
ensures
- This function randomly samples the space of pairs of integers between
0 and samples.size()-1 inclusive. For each of these pairs, (i,j), a
sample_pair is created as follows:
sample_pair(i, j, dist_funct(samples[i], samples[j]))
num such sample_pair objects are generated, duplicates and pairs with distance
values == infinity are removed, and then the top percent of them with the
smallest distance are stored into out.
- #out.size() <= num*percent
- contains_duplicate_pairs(#out) == false
- for all valid i:
- #out[i].distance() == dist_funct(samples[#out[i].index1()], samples[#out[i].index2()])
- #out[i].distance() < std::numeric_limits<double>::infinity()
- random_seed is used to seed the random number generator used by this
function.
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type,
typename distance_function_type,
typename alloc,
typename T
>
void find_approximate_k_nearest_neighbors (
const vector_type& samples,
const distance_function_type& dist_funct,
const unsigned long k,
const unsigned long num,
const T& random_seed,
std::vector<sample_pair, alloc>& out
);
/*!
requires
- k > 0
- num > 0
- random_seed must be convertible to a string by dlib::cast_to_string()
- dist_funct(samples[i], samples[j]) must be a valid expression that evaluates
to a floating point number
ensures
- This function computes an approximate form of k nearest neighbors. As num grows
larger the output of this function converges to the output of the
find_k_nearest_neighbors() function defined below.
- Specifically, this function randomly samples the space of pairs of integers between
0 and samples.size()-1 inclusive. For each of these pairs, (i,j), a
sample_pair is created as follows:
sample_pair(i, j, dist_funct(samples[i], samples[j]))
num such sample_pair objects are generated and then exact k-nearest-neighbors
is performed amongst these sample_pairs and the results are stored into #out.
Note that samples with an infinite distance between them are considered to
be not connected at all.
- contains_duplicate_pairs(#out) == false
- for all valid i:
- #out[i].distance() == dist_funct(samples[#out[i].index1()], samples[#out[i].index2()])
- #out[i].distance() < std::numeric_limits<double>::infinity()
- random_seed is used to seed the random number generator used by this
function.
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type,
typename distance_function_type,
typename alloc
>
void find_k_nearest_neighbors (
const vector_type& samples,
const distance_function_type& dist_funct,
const unsigned long k,
std::vector<sample_pair, alloc>& out
);
/*!
requires
- k > 0
- dist_funct(samples[i], samples[j]) must be a valid expression that evaluates
to a floating point number
ensures
- #out == a set of sample_pair objects that represent all the k nearest
neighbors in samples according to the given distance function dist_funct.
Note that samples with an infinite distance between them are considered to
be not connected at all.
- for all valid i:
- #out[i].distance() == dist_funct(samples[#out[i].index1()], samples[#out[i].index2()])
- #out[i].distance() < std::numeric_limits<double>::infinity()
- contains_duplicate_pairs(#out) == false
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
bool contains_duplicate_pairs (
const vector_type& pairs
);
/*!
requires
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- if (pairs contains any elements that are equal according to operator==) then
- returns true
- else
- returns false
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
unsigned long max_index_plus_one (
const vector_type& pairs
);
/*!
requires
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- if (pairs.size() == 0) then
- returns 0
- else
- returns a number N such that:
- for all i: pairs[i].index1() < N && pairs[i].index2() < N
- for some j: pairs[j].index1()+1 == N || pairs[j].index2()+1 == N
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
void remove_long_edges (
vector_type& pairs,
double distance_threshold
);
/*!
requires
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- Removes all elements of pairs that have a distance value greater than the
given threshold.
- #pairs.size() <= pairs.size()
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
void remove_short_edges (
vector_type& pairs,
double distance_threshold
);
/*!
requires
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- Removes all elements of pairs that have a distance value less than the
given threshold.
- #pairs.size() <= pairs.size()
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
void remove_percent_longest_edges (
vector_type& pairs,
double percent
);
/*!
requires
- 0 <= percent < 1
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- Removes the given upper percentage of the longest edges in pairs. I.e.
this function removes the long edges from pairs.
- #pairs.size() == (1-percent)*pairs.size()
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
void remove_percent_shortest_edges (
vector_type& pairs,
double percent
);
/*!
requires
- 0 <= percent < 1
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- Removes the given upper percentage of the shortest edges in pairs. I.e.
this function removes the short edges from pairs.
- #pairs.size() == (1-percent)*pairs.size()
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
void remove_duplicate_edges (
vector_type& pairs
);
/*!
requires
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- Removes any duplicate edges from pairs. That is, for all elements of pairs,
A and B, such that A == B, only one of A or B will be in pairs after this
function terminates.
- #pairs.size() <= pairs.size()
- is_ordered_by_index(#pairs) == true
- contains_duplicate_pairs(#pairs) == false
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
bool is_ordered_by_index (
const vector_type& edges
);
/*!
requires
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- returns true if and only if the contents of edges are in sorted order
according to order_by_index(). That is, we return true if calling
std::stable_sort(edges.begin(), edges.end(), &order_by_index<T>) would not
change the ordering of elements of edges.
!*/
// ----------------------------------------------------------------------------------------
template <
typename alloc1,
typename alloc2
>
void find_neighbor_ranges (
const std::vector<ordered_sample_pair,alloc1>& edges,
std::vector<std::pair<unsigned long, unsigned long>,alloc2>& neighbors
);
/*!
requires
- is_ordered_by_index(edges) == true
(i.e. edges is sorted so that all the edges for a particular node are grouped
together)
ensures
- This function takes a graph, represented by its list of edges, and finds the
ranges that contain the edges for each node in the graph. In particular,
#neighbors[i] will tell you which edges correspond to the ith node in the
graph.
- #neighbors.size() == max_index_plus_one(edges)
(i.e. neighbors will have an entry for each node in the graph defined by the
list of edges)
- for all valid i:
- all elements of edges such that their index1() value == i are in the
range [neighbors[i].first, neighbors[i].second). That is, for all k such
that neighbors[i].first <= k < neighbors[i].second:
- edges[k].index1() == i.
- all edges outside this range have an index1() value != i
!*/
// ----------------------------------------------------------------------------------------
template <
typename alloc1,
typename alloc2
>
void convert_unordered_to_ordered (
const std::vector<sample_pair,alloc1>& edges,
std::vector<ordered_sample_pair,alloc2>& out_edges
);
/*!
ensures
- interprets edges a defining an undirected graph.
- This function populates out_edges with a directed graph that represents the
same graph as the one in edges. In particular, this means that for all valid
i we have the following:
- if (edges[i].index1() != edges[i].index2()) then
- #out_edges contains two edges corresponding to edges[i]. They
represent the two directions of this edge. The distance value from
edges[i] is also copied into the output edges.
- else
- #out_edges contains one edge corresponding to edges[i] since this is
a self edge. The distance value from edges[i] is also copied into
the output edge.
- max_index_plus_one(edges) == max_index_plus_one(#out_edges)
(i.e. both graphs have the same number of nodes)
- In all but the most trivial cases, we will have is_ordered_by_index(#out_edges) == false
- contains_duplicate_pairs(#out_edges) == contains_duplicate_pairs(edges)
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_EDGE_LIST_GrAPHS_ABSTRACT_Hh_

View File

@@ -0,0 +1,217 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_FIND_K_NEAREST_NEIGHBOrS_LSH_Hh_
#define DLIB_FIND_K_NEAREST_NEIGHBOrS_LSH_Hh_
#include "find_k_nearest_neighbors_lsh_abstract.h"
#include "../threads.h"
#include "../lsh/hashes.h"
#include <vector>
#include <queue>
#include "sample_pair.h"
#include "edge_list_graphs.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
namespace impl
{
struct compare_sample_pair_with_distance
{
inline bool operator() (const sample_pair& a, const sample_pair& b) const
{
return a.distance() < b.distance();
}
};
template <
typename vector_type,
typename hash_function_type
>
class hash_block
{
public:
hash_block(
const vector_type& samples_,
const hash_function_type& hash_funct_,
std::vector<typename hash_function_type::result_type>& hashes_
) :
samples(samples_),
hash_funct(hash_funct_),
hashes(hashes_)
{}
void operator() (long i) const
{
hashes[i] = hash_funct(samples[i]);
}
const vector_type& samples;
const hash_function_type& hash_funct;
std::vector<typename hash_function_type::result_type>& hashes;
};
template <
typename vector_type,
typename distance_function_type,
typename hash_function_type,
typename alloc
>
class scan_find_k_nearest_neighbors_lsh
{
public:
scan_find_k_nearest_neighbors_lsh (
const vector_type& samples_,
const distance_function_type& dist_funct_,
const hash_function_type& hash_funct_,
const unsigned long k_,
std::vector<sample_pair, alloc>& edges_,
const unsigned long k_oversample_,
const std::vector<typename hash_function_type::result_type>& hashes_
) :
samples(samples_),
dist_funct(dist_funct_),
hash_funct(hash_funct_),
k(k_),
edges(edges_),
k_oversample(k_oversample_),
hashes(hashes_)
{
edges.clear();
edges.reserve(samples.size()*k/2);
}
mutex m;
const vector_type& samples;
const distance_function_type& dist_funct;
const hash_function_type& hash_funct;
const unsigned long k;
std::vector<sample_pair, alloc>& edges;
const unsigned long k_oversample;
const std::vector<typename hash_function_type::result_type>& hashes;
void operator() (unsigned long i) const
{
const unsigned long k_hash = k*k_oversample;
std::priority_queue<std::pair<unsigned long, unsigned long> > best_hashes;
std::priority_queue<sample_pair, std::vector<sample_pair>, dlib::impl::compare_sample_pair_with_distance> best_samples;
unsigned long worst_distance = std::numeric_limits<unsigned long>::max();
// scan over the hashes and find the best matches for hashes[i]
for (unsigned long j = 0; j < hashes.size(); ++j)
{
if (i == j)
continue;
const unsigned long dist = hash_funct.distance(hashes[i], hashes[j]);
if (dist < worst_distance || best_hashes.size() < k_hash)
{
if (best_hashes.size() >= k_hash)
best_hashes.pop();
best_hashes.push(std::make_pair(dist, j));
worst_distance = best_hashes.top().first;
}
}
// Now figure out which of the best_hashes are actually the k best matches
// according to dist_funct()
while (best_hashes.size() != 0)
{
const unsigned long j = best_hashes.top().second;
best_hashes.pop();
const double dist = dist_funct(samples[i], samples[j]);
if (dist < std::numeric_limits<double>::infinity())
{
if (best_samples.size() >= k)
best_samples.pop();
best_samples.push(sample_pair(i,j,dist));
}
}
// Finally, now put the k best matches according to dist_funct() into edges
auto_mutex lock(m);
while (best_samples.size() != 0)
{
edges.push_back(best_samples.top());
best_samples.pop();
}
}
};
}
// ----------------------------------------------------------------------------------------
template <
typename vector_type,
typename hash_function_type
>
void hash_samples (
const vector_type& samples,
const hash_function_type& hash_funct,
const unsigned long num_threads,
std::vector<typename hash_function_type::result_type>& hashes
)
{
hashes.resize(samples.size());
typedef impl::hash_block<vector_type,hash_function_type> block_type;
block_type temp(samples, hash_funct, hashes);
parallel_for(num_threads, 0, samples.size(), temp);
}
// ----------------------------------------------------------------------------------------
template <
typename vector_type,
typename distance_function_type,
typename hash_function_type,
typename alloc
>
void find_k_nearest_neighbors_lsh (
const vector_type& samples,
const distance_function_type& dist_funct,
const hash_function_type& hash_funct,
const unsigned long k,
const unsigned long num_threads,
std::vector<sample_pair, alloc>& edges,
const unsigned long k_oversample = 20
)
{
// make sure requires clause is not broken
DLIB_ASSERT(k > 0 && k_oversample > 0,
"\t void find_k_nearest_neighbors_lsh()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t samples.size(): " << samples.size()
<< "\n\t k: " << k
<< "\n\t k_oversample: " << k_oversample
);
edges.clear();
if (samples.size() <= 1)
{
return;
}
typedef typename hash_function_type::result_type hash_type;
std::vector<hash_type> hashes;
hash_samples(samples, hash_funct, num_threads, hashes);
typedef impl::scan_find_k_nearest_neighbors_lsh<vector_type, distance_function_type,hash_function_type,alloc> scan_type;
scan_type temp(samples, dist_funct, hash_funct, k, edges, k_oversample, hashes);
parallel_for(num_threads, 0, hashes.size(), temp);
remove_duplicate_edges(edges);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_FIND_K_NEAREST_NEIGHBOrS_LSH_Hh_

View File

@@ -0,0 +1,102 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_FIND_K_NEAREST_NEIGHBOrS_LSH_ABSTRACT_Hh_
#ifdef DLIB_FIND_K_NEAREST_NEIGHBOrS_LSH_ABSTRACT_Hh_
#include "../lsh/hashes_abstract.h"
#include "sample_pair_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename vector_type,
typename hash_function_type
>
void hash_samples (
const vector_type& samples,
const hash_function_type& hash_funct,
const unsigned long num_threads,
std::vector<typename hash_function_type::result_type>& hashes
);
/*!
requires
- hash_funct() is threadsafe. This means that it must be safe for multiple
threads to invoke the member functions of hash_funct() at the same time.
- vector_type is any container that looks like a std::vector or dlib::array.
- hash_funct must be a function object with an interface compatible with the
objects defined in dlib/lsh/hashes_abstract.h. In particular, hash_funct
must be capable of hashing the elements in the samples vector.
ensures
- This function hashes all the elements in samples and stores the results in
hashes. It will also use num_threads concurrent threads to do this. You
should set this value equal to the number of processing cores on your
computer for maximum speed.
- #hashes.size() == 0
- for all valid i:
- #hashes[i] = hash_funct(samples[i])
(i.e. #hashes[i] will contain the hash of samples[i])
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type,
typename distance_function_type,
typename hash_function_type,
typename alloc
>
void find_k_nearest_neighbors_lsh (
const vector_type& samples,
const distance_function_type& dist_funct,
const hash_function_type& hash_funct,
const unsigned long k,
const unsigned long num_threads,
std::vector<sample_pair, alloc>& edges,
const unsigned long k_oversample = 20
);
/*!
requires
- hash_funct and dist_funct are threadsafe. This means that it must be safe
for multiple threads to invoke the member functions of these objects at the
same time.
- k > 0
- k_oversample > 0
- dist_funct(samples[i], samples[j]) must be a valid expression that evaluates
to a floating point number
- vector_type is any container that looks like a std::vector or dlib::array.
- hash_funct must be a function object with an interface compatible with the
objects defined in dlib/lsh/hashes_abstract.h. In particular, hash_funct
must be capable of hashing the elements in the samples vector.
ensures
- This function computes an approximate form of a k nearest neighbors graph of
the elements in samples. In particular, the way it works is that it first
hashes all elements in samples using the provided locality sensitive hash
function hash_funct(). Then it performs an exact k nearest neighbors on the
hashes which can be done very quickly. For each of these neighbors we
compute the true distance using dist_funct() and the k nearest neighbors for
each sample are stored into #edges.
- Note that samples with an infinite distance between them are considered to be
not connected at all. Therefore, we exclude edges with such distances from
being output.
- for all valid i:
- #edges[i].distance() == dist_funct(samples[#edges[i].index1()], samples[#edges[i].index2()])
- #edges[i].distance() < std::numeric_limits<double>::infinity()
- contains_duplicate_pairs(#edges) == false
- This function will use num_threads concurrent threads of processing. You
should set this value equal to the number of processing cores on your
computer for maximum speed.
- The hash based k nearest neighbor step is approximate, however, you can
improve the output accuracy by using a larger k value for this first step.
Therefore, this function finds k*k_oversample nearest neighbors during the
first hashing based step.
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_FIND_K_NEAREST_NEIGHBOrS_LSH_ABSTRACT_Hh_

View File

@@ -0,0 +1,129 @@
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_MR_FUNCTION_ObJECTS_Hh_
#define DLIB_MR_FUNCTION_ObJECTS_Hh_
#include "function_objects_abstract.h"
#include "../matrix.h"
#include "../svm/sparse_vector.h"
#include <cmath>
#include <limits>
namespace dlib
{
// ----------------------------------------------------------------------------------------
struct squared_euclidean_distance
{
squared_euclidean_distance (
) :
lower(0),
upper(std::numeric_limits<double>::infinity())
{}
squared_euclidean_distance (
const double l,
const double u
) :
lower(l),
upper(u)
{}
const double lower;
const double upper;
template <typename sample_type>
double operator() (
const sample_type& a,
const sample_type& b
) const
{
const double len = length_squared(a-b);
if (lower <= len && len <= upper)
return len;
else
return std::numeric_limits<double>::infinity();
}
};
// ----------------------------------------------------------------------------------------
struct cosine_distance
{
template <typename sample_type>
double operator() (
const sample_type& a,
const sample_type& b
) const
{
const double temp = length(a)*length(b);
if (temp == 0)
return 0;
else
return 1-dot(a,b)/temp;
}
};
// ----------------------------------------------------------------------------------------
struct negative_dot_product_distance
{
template <typename sample_type>
double operator() (
const sample_type& a,
const sample_type& b
) const
{
return -dot(a,b);
}
};
// ----------------------------------------------------------------------------------------
struct use_weights_of_one
{
template <typename edge_type>
double operator() (
const edge_type&
) const
{
return 1;
}
};
// ----------------------------------------------------------------------------------------
struct use_gaussian_weights
{
use_gaussian_weights (
)
{
gamma = 0.1;
}
use_gaussian_weights (
double g
)
{
gamma = g;
}
double gamma;
template <typename edge_type>
double operator() (
const edge_type& e
) const
{
return std::exp(-gamma*e.distance());
}
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_MR_FUNCTION_ObJECTS_Hh_

View File

@@ -0,0 +1,209 @@
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_MR_FUNCTION_ObJECTS_ABSTRACT_Hh_
#ifdef DLIB_MR_FUNCTION_ObJECTS_ABSTRACT_Hh_
#include "../matrix.h"
#include <cmath>
#include "../svm/sparse_vector_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
struct squared_euclidean_distance
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a simple function object that computes squared euclidean distance
between two dlib::matrix objects.
THREAD SAFETY
This object has no mutable members. Therefore, it is safe to call
operator() on a single instance of this object simultaneously from multiple
threads.
!*/
squared_euclidean_distance (
);
/*!
ensures
- #lower == 0
- #upper == std::numeric_limits<double>::infinity()
!*/
squared_euclidean_distance (
const double l,
const double u
);
/*!
ensures
- #lower == l
- #upper == u
!*/
const double lower;
const double upper;
template <typename sample_type>
double operator() (
const sample_type& a,
const sample_type& b
) const;
/*!
requires
- sample_type should be a kind of dlib::matrix
ensures
- let LEN = length_squared(a-b)
- if (lower <= LEN <= upper) then
- returns LEN
- else
- returns std::numeric_limits<double>::infinity()
!*/
};
// ----------------------------------------------------------------------------------------
struct cosine_distance
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a simple function object that computes the cosine of the angle
between two vectors and returns 1 - this quantity. Moreover, this object
works for both sparse and dense vectors.
THREAD SAFETY
This object has no mutable members. Therefore, it is safe to call
operator() on a single instance of this object simultaneously from multiple
threads.
!*/
template <typename sample_type>
double operator() (
const sample_type& a,
const sample_type& b
) const;
/*!
requires
- sample_type is a dense vector (e.g. a dlib::matrix) or a sparse
vector as defined at the top of dlib/svm/sparse_vector_abstract.h
ensures
- let theta = the angle between a and b.
- returns 1 - cos(theta)
(e.g. this function returns 0 when a and b have an angle of 0 between
each other, 1 if they have a 90 degree angle, and a maximum of 2 if the
vectors have a 180 degree angle between each other).
- zero length vectors are considered to have angles of 0 between all other
vectors.
!*/
};
// ----------------------------------------------------------------------------------------
struct negative_dot_product_distance
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a simple function object that computes the dot product between two
vectors and returns the negation of this value. Moreover, this object
works for both sparse and dense vectors.
THREAD SAFETY
This object has no mutable members. Therefore, it is safe to call
operator() on a single instance of this object simultaneously from multiple
threads.
!*/
template <typename sample_type>
double operator() (
const sample_type& a,
const sample_type& b
) const;
/*!
requires
- sample_type is a dense vector (e.g. a dlib::matrix) or a sparse
vector as defined at the top of dlib/svm/sparse_vector_abstract.h
ensures
- returns -dot(a,b)
!*/
};
// ----------------------------------------------------------------------------------------
struct use_weights_of_one
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a simple function object that takes a single argument
and always returns 1
THREAD SAFETY
This object has no mutable members. Therefore, it is safe to call
operator() on a single instance of this object simultaneously from multiple
threads.
!*/
template <typename edge_type>
double operator() (
const edge_type&
) const;
/*!
ensures
- returns 1
!*/
};
// ----------------------------------------------------------------------------------------
struct use_gaussian_weights
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a simple function object that takes a single argument
which should be an object similar to dlib::sample_pair.
THREAD SAFETY
This object has no mutable members. Therefore, it is safe to call
operator() on a single instance of this object simultaneously from multiple
threads.
!*/
use_gaussian_weights (
);
/*!
ensures
- #gamma == 0.1
!*/
use_gaussian_weights (
double g
);
/*!
ensures
- #gamma == g
!*/
double gamma;
template <typename edge_type>
double operator() (
const edge_type& e
) const;
/*!
requires
- e.distance() must be a valid expression that returns a number
(e.g. edge_type might be dlib::sample_pair)
ensures
- returns std::exp(-gamma*e.distance());
!*/
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_MR_FUNCTION_ObJECTS_ABSTRACT_Hh_

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,452 @@
// Copyright (C) 2007 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_GRAPH_UTILs_ABSTRACT_
#ifdef DLIB_GRAPH_UTILs_ABSTRACT_
#include "../directed_graph.h"
#include "../algs.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename T
>
typename T::edge_type& edge(
T& g,
unsigned long i,
unsigned long j
);
/*!
requires
- T is an implementation of graph/graph_kernel_abstract.h
- g.has_edge(i,j)
ensures
- returns a reference to the edge data for the edge connecting nodes i and j
(i.e. returns g.node(i).edge(x) such that g.node(i).neighbor(x).index() == j)
!*/
template <
typename T
>
typename const T::edge_type& edge(
const T& g,
unsigned long i,
unsigned long j
);
/*!
requires
- T is an implementation of graph/graph_kernel_abstract.h
- g.has_edge(i,j)
ensures
- returns a const reference to the edge data for the edge connecting nodes i and j
(i.e. returns g.node(i).edge(x) such that g.node(i).neighbor(x).index() == j)
!*/
// ----------------------------------------------------------------------------------------
template <
typename T
>
typename T::edge_type& edge(
T& g,
unsigned long parent_idx,
unsigned long child_idx
);
/*!
requires
- T is an implementation of directed_graph/directed_graph_kernel_abstract.h
- g.has_edge(parent_idx,child_idx)
ensures
- returns a reference to the edge data for the directed edge connecting parent
node g.node(parent_idx) to child node g.node(child_idx).
!*/
template <
typename T
>
typename const T::edge_type& edge(
const T& g,
unsigned long parent_idx,
unsigned long child_idx
);
/*!
requires
- T is an implementation of directed_graph/directed_graph_kernel_abstract.h
- g.has_edge(parent_idx,child_idx)
ensures
- returns a const reference to the edge data for the directed edge connecting
parent node g.node(parent_idx) to child node g.node(child_idx).
!*/
// ----------------------------------------------------------------------------------------
template <
typename T
>
bool graph_has_symmetric_edges (
const T& graph
);
/*!
requires
- T is an implementation of directed_graph/directed_graph_kernel_abstract.h
ensures
- if (All nodes have either 0 edges between them or 2 edges between them.
That is, if there is an edge pointing from node A to node B then there is
also an edge from B to A) then
- returns true
- else
- returns false
!*/
// ----------------------------------------------------------------------------------------
template <
typename T
>
bool graph_contains_directed_cycle (
const T& graph
);
/*!
requires
- T is an implementation of directed_graph/directed_graph_kernel_abstract.h
ensures
- if (there is a directed cycle in the given graph) then
- returns true
- else
- returns false
!*/
// ----------------------------------------------------------------------------------------
template <
typename T
>
bool graph_contains_undirected_cycle (
const T& graph
);
/*!
requires
- T is an implementation of directed_graph/directed_graph_kernel_abstract.h or
T is an implementation of graph/graph_kernel_abstract.h
ensures
- if (there is an undirected cycle in the given graph) then
- returns true
- else
- returns false
!*/
// ----------------------------------------------------------------------------------------
template <
typename T
>
bool graph_contains_length_one_cycle (
const T& graph
);
/*!
requires
- T is an implementation of directed_graph/directed_graph_kernel_abstract.h or
T is an implementation of graph/graph_kernel_abstract.h
ensures
- if (it is the case that graph.has_edge(i,i) == true for some i) then
- returns true
- else
- returns false
!*/
// ----------------------------------------------------------------------------------------
template <
typename src_type,
typename dest_type
>
void copy_graph_structure (
const src_type& src,
dest_type& dest
);
/*!
requires
- src_type is an implementation of directed_graph/directed_graph_kernel_abstract.h or
src_type is an implementation of graph/graph_kernel_abstract.h
- dest_type is an implementation of directed_graph/directed_graph_kernel_abstract.h or
dest_type is an implementation of graph/graph_kernel_abstract.h
- dest_type is not a directed_graph when src_type is a graph
ensures
- this function copies the graph structure from src into dest
- #dest.number_of_nodes() == src.number_of_nodes()
- for all valid i: #dest.node(i).item has an initial value for its type
- for all valid i and j:
- if (src.has_edge(i,j) == true) then
- #dest.has_edge(i,j) == true
!*/
// ----------------------------------------------------------------------------------------
template <
typename src_type,
typename dest_type
>
void copy_graph (
const src_type& src,
dest_type& dest
);
/*!
requires
- src_type is an implementation of directed_graph/directed_graph_kernel_abstract.h or
src_type is an implementation of graph/graph_kernel_abstract.h
- dest_type is an implementation of directed_graph/directed_graph_kernel_abstract.h or
dest_type is an implementation of graph/graph_kernel_abstract.h
- src_type and dest_type are both the same kind of graph. That is, they
are either both directed or both undirected.
- the node and edge data in the graphs are copyable via operator=().
ensures
- #dest is a complete duplicate of src.
!*/
// ----------------------------------------------------------------------------------------
template <
typename directed_graph_type,
typename graph_type
>
void create_moral_graph (
const directed_graph_type& g,
graph_type& moral_graph
);
/*!
requires
- directed_graph_type is an implementation of directed_graph/directed_graph_kernel_abstract.h
- graph_type is an implementation of graph/graph_kernel_abstract.h
- graph_contains_directed_cycle(g) == false
ensures
- #moral_graph == the moralized version of the directed graph g
- #moral_graph.number_of_nodes() == g.number_of_nodes()
- for all valid i and j:
- if (g.has_edge(i,j) == true) then
- #moral_graph.has_edge(i,j) == true
(i.e. all the edges that are in g are also in moral_graph)
- for all valid i:
- for all pairs p1 and p2 such that p1 != p2 and g.node(p1) and g.node(p2) are both
parents of node g.node(i):
- #moral_graph.has_edge(p1,p2) == true
(i.e. all the parents of a node are connected in the moral graph)
!*/
// ----------------------------------------------------------------------------------------
template <
typename T,
typename S
>
void find_connected_nodes (
const T& n,
S& visited
);
/*!
requires
- T is a node_type from an implementation of directed_graph/directed_graph_kernel_abstract.h or
T is a node_type from an implementation of graph/graph_kernel_abstract.h
- S is an implementation of set/set_kernel_abstract.h
ensures
- let G be the graph that contains node n
- #visited.is_member(n.index()) == true
- for all i such that there is an undirected path from n to G.node(i):
- #visited.is_member(i) == true
- for all i such that visited.is_member(i):
- #visited.is_member(i) == true
(i.e. this function doesn't remove anything from visited. So if
it contains stuff when you call this function then it will still
contain those things once the function ends)
!*/
// ----------------------------------------------------------------------------------------
template <
typename T
>
bool graph_is_connected (
const T& g
);
/*!
requires
- T is an implementation of directed_graph/directed_graph_kernel_abstract.h or
T is an implementation of graph/graph_kernel_abstract.h
ensures
- every node in g has an undirected path to every other node in g.
I.e. g is a connected graph
!*/
// ----------------------------------------------------------------------------------------
template <
typename graph_type,
typename sets_of_int
>
bool is_clique (
const graph_type& g,
const sets_of_int& clique
);
/*!
requires
- graph_type is an implementation of graph/graph_kernel_abstract.h
- sets_of_int is an implementation of set/set_kernel_abstract.h
and it contains unsigned long objects.
- graph_contains_length_one_cycle(g) == false
- for all x such that clique.is_member(x):
- x < g.number_of_nodes()
ensures
- if (it is true that for all i and j such that clique.is_member(i) and
clique.is_member(j) then g.has_edge(i,j) == true) then
- returns true
- else
- returns false
- if (clique.size() == 0) then
- returns true
(this is just a special case of the above condition)
- else
- returns false
!*/
// ----------------------------------------------------------------------------------------
template <
typename graph_type,
typename sets_of_int
>
bool is_maximal_clique (
const graph_type& g,
const sets_of_int& clique
);
/*!
requires
- graph_type is an implementation of graph/graph_kernel_abstract.h
- sets_of_int is an implementation of set/set_kernel_abstract.h
and it contains unsigned long objects.
- graph_contains_length_one_cycle(g) == false
- for all x such that clique.is_member(x):
- x < g.number_of_nodes()
- is_clique(g,clique) == true
ensures
- if (there is no x such that clique.is_member(x) == false
and g.has_edge(i,x) for all i such that cliques.is_member(i)) then
- returns true
- else
- returns false
- if (clique.size() == 0) then
- returns true
(this is just a special case of the above condition)
- else
- returns false
!*/
// ----------------------------------------------------------------------------------------
template <
typename graph_type,
typename set_of_sets_of_int
>
void triangulate_graph_and_find_cliques (
graph_type& g,
set_of_sets_of_int& cliques
);
/*!
requires
- graph_type is an implementation of graph/graph_kernel_abstract.h
- set_of_sets_of_int is an implementation of set/set_kernel_abstract.h
and it contains another set object which is comparable by operator< and
itself contains unsigned long objects.
(e.g. set<set<unsigned long>::compare_1a>::kernel_1a)
- graph_contains_length_one_cycle(g) == false
- graph_is_connected(g) == true
ensures
- #g.number_of_nodes() == g.number_of_nodes()
- all this function does to g is add edges to it until g becomes a
chordal graph where a chordal graph is a graph where each cycle
in the graph of 4 or more nodes has an edge joining two nodes
that are not adjacent in the cycle.
- #cliques.size() == the number of maximal cliques in the graph #g
- for all valid sets S such that #cliques.is_member(S):
- for all valid integers i and j such that S.is_member(i) == true
and S.is_member(j) == true and i != j:
- #g.has_edge(i,j) == true
!*/
// ----------------------------------------------------------------------------------------
template <
typename graph_type,
typename join_tree_type
>
bool is_join_tree (
const graph_type& g,
const join_tree_type& join_tree
);
/*!
requires
- graph_type is an implementation of directed_graph/directed_graph_kernel_abstract.h or
graph_type is an implementation of graph/graph_kernel_abstract.h
- join_tree_type is an implementation of graph/graph_kernel_abstract.h
- join_tree_type::type is an implementation of set/set_compare_abstract.h and
this set type contains unsigned long objects.
- join_tree_type::edge_type is an implementation of set/set_compare_abstract.h and
this set type contains unsigned long objects.
- graph_contains_length_one_cycle(g) == false
- graph_is_connected(g) == true
ensures
- if (join_tree is a valid join tree of graph g. That is, join_tree is a
tree decomposition of g) then
- returns true
- else
- returns false
- a join tree of graph g is defined as follows:
- graph_contains_undirected_cycle(join_tree) == false
- graph_is_connected(join_tree) == true
- for all valid i:
- join_tree.node(i).item == a non-empty set containing node indexes
from g. That is, this set contains all the nodes from g that are
in this cluster in the join tree
- for all valid i and j such that i and j are both < join_tree.number_of_nodes()
- let X be the set of numbers that is contained in both join_tree.node(i).item
and join_tree.node(j).item
- It is the case that all nodes on the unique path between join_tree.node(i)
and join_tree.node(j) contain the numbers from X in their sets.
- edge(join_tree,i,j) == a set containing the intersection of
join_tree.node(i).item and join_tree.node(j).item
- the node index for every node in g appears in some node in join_tree at
least once.
!*/
// ----------------------------------------------------------------------------------------
template <
typename graph_type,
typename join_tree_type
>
void create_join_tree (
const graph_type& g,
join_tree_type& join_tree
);
/*!
requires
- graph_type is an implementation of graph/graph_kernel_abstract.h
- join_tree_type is an implementation of graph/graph_kernel_abstract.h
- join_tree_type::type is an implementation of set/set_compare_abstract.h and
this set type contains unsigned long objects.
- join_tree_type::edge_type is an implementation of set/set_compare_abstract.h and
this set type contains unsigned long objects.
- graph_contains_length_one_cycle(g) == false
- graph_is_connected(g) == true
ensures
- #is_join_tree(g, join_tree) == true
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_GRAPH_UTILs_ABSTRACT_

View File

@@ -0,0 +1,125 @@
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_ORDERED_SAMPLE_PaIR_Hh_
#define DLIB_ORDERED_SAMPLE_PaIR_Hh_
#include "ordered_sample_pair_abstract.h"
#include <limits>
#include "../serialize.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class ordered_sample_pair
{
public:
ordered_sample_pair(
) :
_index1(0),
_index2(0)
{
_distance = 1;
}
ordered_sample_pair (
const unsigned long idx1,
const unsigned long idx2
)
{
_distance = 1;
_index1 = idx1;
_index2 = idx2;
}
ordered_sample_pair (
const unsigned long idx1,
const unsigned long idx2,
const double dist
)
{
_distance = dist;
_index1 = idx1;
_index2 = idx2;
}
const unsigned long& index1 (
) const { return _index1; }
const unsigned long& index2 (
) const { return _index2; }
const double& distance (
) const { return _distance; }
private:
unsigned long _index1;
unsigned long _index2;
double _distance;
};
// ----------------------------------------------------------------------------------------
inline bool operator == (
const ordered_sample_pair& a,
const ordered_sample_pair& b
)
{
return a.index1() == b.index1() && a.index2() == b.index2();
}
inline bool operator != (
const ordered_sample_pair& a,
const ordered_sample_pair& b
)
{
return !(a == b);
}
// ----------------------------------------------------------------------------------------
inline void serialize (
const ordered_sample_pair& item,
std::ostream& out
)
{
try
{
serialize(item.index1(),out);
serialize(item.index2(),out);
serialize(item.distance(),out);
}
catch (serialization_error& e)
{
throw serialization_error(e.info + "\n while serializing object of type ordered_sample_pair");
}
}
inline void deserialize (
ordered_sample_pair& item,
std::istream& in
)
{
try
{
unsigned long idx1, idx2;
double dist;
deserialize(idx1,in);
deserialize(idx2,in);
deserialize(dist,in);
item = ordered_sample_pair(idx1, idx2, dist);
}
catch (serialization_error& e)
{
throw serialization_error(e.info + "\n while deserializing object of type ordered_sample_pair");
}
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_ORDERED_SAMPLE_PaIR_Hh_

View File

@@ -0,0 +1,128 @@
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_ORDERED_SAMPLE_PaIR_ABSTRACT_Hh_
#ifdef DLIB_ORDERED_SAMPLE_PaIR_ABSTRACT_Hh_
#include <limits>
#include "../serialize.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class ordered_sample_pair
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is intended to represent an edge in a directed graph which has
data samples at its vertices. So it contains two integers (index1 and
index2) which represent the identifying indices of the samples at the ends
of an edge.
This object also contains a double which can be used for any purpose.
!*/
public:
ordered_sample_pair(
);
/*!
ensures
- #index1() == 0
- #index2() == 0
- #distance() == 1
!*/
ordered_sample_pair (
const unsigned long idx1,
const unsigned long idx2
);
/*!
ensures
- #index1() == idx1
- #index2() == idx2
- #distance() == 1
!*/
ordered_sample_pair (
const unsigned long idx1,
const unsigned long idx2,
const double dist
);
/*!
ensures
- #index1() == idx1
- #index2() == idx2
- #distance() == dist
!*/
const unsigned long& index1 (
) const;
/*!
ensures
- returns the first index value stored in this object
!*/
const unsigned long& index2 (
) const;
/*!
ensures
- returns the second index value stored in this object
!*/
const double& distance (
) const;
/*!
ensures
- returns the floating point number stored in this object
!*/
};
// ----------------------------------------------------------------------------------------
inline bool operator == (
const ordered_sample_pair& a,
const ordered_sample_pair& b
);
/*!
ensures
- returns a.index1() == b.index1() && a.index2() == b.index2();
I.e. returns true if a and b both represent the same pair and false otherwise.
Note that the distance field is not involved in this comparison.
!*/
inline bool operator != (
const ordered_sample_pair& a,
const ordered_sample_pair& b
);
/*!
ensures
- returns !(a == b)
!*/
// ----------------------------------------------------------------------------------------
inline void serialize (
const ordered_sample_pair& item,
std::ostream& out
);
/*!
provides serialization support
!*/
inline void deserialize (
ordered_sample_pair& item,
std::istream& in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_ORDERED_SAMPLE_PaIR_ABSTRACT_Hh_

View File

@@ -0,0 +1,179 @@
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SAMPLE_PaIR_Hh_
#define DLIB_SAMPLE_PaIR_Hh_
#include "sample_pair_abstract.h"
#include <limits>
#include "../serialize.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class sample_pair
{
public:
sample_pair(
) :
_index1(0),
_index2(0)
{
_distance = 1;
}
sample_pair (
const unsigned long idx1,
const unsigned long idx2
)
{
_distance = 1;
if (idx1 < idx2)
{
_index1 = idx1;
_index2 = idx2;
}
else
{
_index1 = idx2;
_index2 = idx1;
}
}
sample_pair (
const unsigned long idx1,
const unsigned long idx2,
const double dist
)
{
_distance = dist;
if (idx1 < idx2)
{
_index1 = idx1;
_index2 = idx2;
}
else
{
_index1 = idx2;
_index2 = idx1;
}
}
const unsigned long& index1 (
) const { return _index1; }
const unsigned long& index2 (
) const { return _index2; }
const double& distance (
) const { return _distance; }
private:
unsigned long _index1;
unsigned long _index2;
double _distance;
};
// ----------------------------------------------------------------------------------------
template <typename T>
inline bool order_by_index (
const T& a,
const T& b
)
{
return a.index1() < b.index1() || (a.index1() == b.index1() && a.index2() < b.index2());
}
template <typename T>
inline bool order_by_distance (
const T& a,
const T& b
)
{
return a.distance() < b.distance();
}
template <typename T>
inline bool order_by_descending_distance (
const T& a,
const T& b
)
{
return a.distance() > b.distance();
}
template <typename T>
bool order_by_distance_and_index (
const T& a,
const T& b
)
{
return a.distance() < b.distance() || (a.distance() == b.distance() && order_by_index(a,b));
}
// ----------------------------------------------------------------------------------------
inline bool operator == (
const sample_pair& a,
const sample_pair& b
)
{
return a.index1() == b.index1() && a.index2() == b.index2();
}
inline bool operator != (
const sample_pair& a,
const sample_pair& b
)
{
return !(a == b);
}
// ----------------------------------------------------------------------------------------
inline void serialize (
const sample_pair& item,
std::ostream& out
)
{
try
{
serialize(item.index1(),out);
serialize(item.index2(),out);
serialize(item.distance(),out);
}
catch (serialization_error& e)
{
throw serialization_error(e.info + "\n while serializing object of type sample_pair");
}
}
inline void deserialize (
sample_pair& item,
std::istream& in
)
{
try
{
unsigned long idx1, idx2;
double dist;
deserialize(idx1,in);
deserialize(idx2,in);
deserialize(dist,in);
item = sample_pair(idx1, idx2, dist);
}
catch (serialization_error& e)
{
throw serialization_error(e.info + "\n while deserializing object of type sample_pair");
}
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SAMPLE_PaIR_Hh_

View File

@@ -0,0 +1,192 @@
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SAMPLE_PaIR_ABSTRACT_Hh_
#ifdef DLIB_SAMPLE_PaIR_ABSTRACT_Hh_
#include <limits>
#include "../serialize.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class sample_pair
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is intended to represent an edge in an undirected graph
which has data samples at its vertices. So it contains two integers
(index1 and index2) which represent the identifying indices of
the samples at the ends of an edge. Note that this object enforces
the constraint that index1 <= index2. This has the effect of
making the edges undirected since a sample_pair is incapable
of representing a single edge in more than one way. That is,
sample_pair(i,j) == sample_pair(j,i) for any value of i and j.
This object also contains a double which can be used for any purpose.
!*/
public:
sample_pair(
);
/*!
ensures
- #index1() == 0
- #index2() == 0
- #distance() == 1
!*/
sample_pair (
const unsigned long idx1,
const unsigned long idx2
);
/*!
ensures
- #index1() == min(idx1,idx2)
- #index2() == max(idx1,idx2)
- #distance() == 1
!*/
sample_pair (
const unsigned long idx1,
const unsigned long idx2,
const double dist
);
/*!
ensures
- #index1() == min(idx1,idx2)
- #index2() == max(idx1,idx2)
- #distance() == dist
!*/
const unsigned long& index1 (
) const;
/*!
ensures
- returns the first index value stored in this object
!*/
const unsigned long& index2 (
) const;
/*!
ensures
- returns the second index value stored in this object
!*/
const double& distance (
) const;
/*!
ensures
- returns the floating point number stored in this object
!*/
};
// ----------------------------------------------------------------------------------------
template <typename T>
bool order_by_index (
const T& a,
const T& b
) { return a.index1() < b.index1() || (a.index1() == b.index1() && a.index2() < b.index2()); }
/*!
requires
- T is a type with an interface compatible with sample_pair.
ensures
- provides a total ordering of sample_pair objects that will cause pairs that are
equal to be adjacent when sorted. So for example, this function can be used
with std::sort() to first sort a sequence of sample_pair objects and then
find duplicate edges.
!*/
template <typename T>
bool order_by_distance (
const T& a,
const T& b
) { return a.distance() < b.distance(); }
/*!
requires
- T is a type with an interface compatible with sample_pair.
ensures
- provides a total ordering of sample_pair objects that causes pairs with
smallest distance to be the first in a sorted list. This function can be
used with std::sort().
!*/
template <typename T>
bool order_by_descending_distance (
const T& a,
const T& b
) { return a.distance() > b.distance(); }
/*!
requires
- T is a type with an interface compatible with sample_pair.
ensures
- provides a total ordering of sample_pair objects that causes pairs with
largest distance to be the first in a sorted list. This function can be
used with std::sort().
!*/
template <typename T>
bool order_by_distance_and_index (
const T& a,
const T& b
) { return a.distance() < b.distance() || (a.distance() == b.distance() && order_by_index(a,b)); }
/*!
requires
- T is a type with an interface compatible with sample_pair.
ensures
- provides a total ordering of sample_pair objects that causes pairs with
smallest distance to be the first in a sorted list but also orders samples
with equal distances according to order_by_index(). This function can be
used with std::sort().
!*/
// ----------------------------------------------------------------------------------------
inline bool operator == (
const sample_pair& a,
const sample_pair& b
);
/*!
ensures
- returns a.index1() == b.index1() && a.index2() == b.index2();
I.e. returns true if a and b both represent the same pair and false otherwise.
Note that the distance field is not involved in this comparison.
!*/
inline bool operator != (
const sample_pair& a,
const sample_pair& b
);
/*!
ensures
- returns !(a == b)
!*/
// ----------------------------------------------------------------------------------------
inline void serialize (
const sample_pair& item,
std::ostream& out
);
/*!
provides serialization support
!*/
inline void deserialize (
sample_pair& item,
std::istream& in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SAMPLE_PaIR_ABSTRACT_Hh_