mirror of
https://gitcode.com/gh_mirrors/ope/OpenFace.git
synced 2026-05-16 04:08:00 +00:00
76 lines
3.4 KiB
C++
76 lines
3.4 KiB
C++
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#undef DLIB_ACTIVE_LEARnING_ABSTRACT_Hh_
|
|
#ifdef DLIB_ACTIVE_LEARnING_ABSTRACT_Hh_
|
|
|
|
#include "svm_c_linear_dcd_trainer_abstract.h"
|
|
#include <vector>
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
enum active_learning_mode
|
|
{
|
|
max_min_margin,
|
|
ratio_margin
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename kernel_type,
|
|
typename in_sample_vector_type,
|
|
typename in_scalar_vector_type,
|
|
typename in_sample_vector_type2
|
|
>
|
|
std::vector<unsigned long> rank_unlabeled_training_samples (
|
|
const svm_c_linear_dcd_trainer<kernel_type>& trainer,
|
|
const in_sample_vector_type& samples,
|
|
const in_scalar_vector_type& labels,
|
|
const in_sample_vector_type2& unlabeled_samples,
|
|
const active_learning_mode mode = max_min_margin
|
|
);
|
|
/*!
|
|
requires
|
|
- if (samples.size() != 0) then
|
|
- it must be legal to call trainer.train(samples, labels)
|
|
- is_learning_problem(samples, labels) == true
|
|
- unlabeled_samples must contain the same kind of vectors as samples.
|
|
- unlabeled_samples, samples, and labels must be matrices or types of
|
|
objects convertible to a matrix via mat().
|
|
- is_vector(unlabeled_samples) == true
|
|
ensures
|
|
- Suppose that we wish to learn a binary classifier by calling
|
|
trainer.train(samples, labels) but we are also interested in selecting one of
|
|
the elements of unlabeled_samples to add to our training data. Since doing
|
|
this requires us to find out the label of the sample, a potentially tedious
|
|
or expensive process, we would like to select the "best" element from
|
|
unlabeled_samples for labeling. The rank_unlabeled_training_samples()
|
|
attempts to find this "best" element. In particular, this function returns a
|
|
ranked list of all the elements in unlabeled_samples such that that the
|
|
"best" elements come first.
|
|
- The method used by this function is described in the paper:
|
|
Support Vector Machine Active Learning with Applications to Text Classification
|
|
by Simon Tong and Daphne Koller
|
|
In particular, this function implements the MaxMin Margin and Ratio Margin
|
|
selection strategies described in the paper. Moreover, the mode argument
|
|
to this function selects which of these strategies is used.
|
|
- returns a std::vector V such that:
|
|
- V contains a list of all the indices from unlabeled_samples. Moreover,
|
|
they are ordered so that the most useful samples come first.
|
|
- V.size() == unlabeled_samples.size()
|
|
- unlabeled_samples[V[0]] == The best sample to add into the training set.
|
|
- unlabeled_samples[V[1]] == The second best sample to add into the training set.
|
|
- unlabeled_samples[V[i]] == The i-th best sample to add into the training set.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_ACTIVE_LEARnING_ABSTRACT_Hh_
|
|
|
|
|