Master commit of OpenFace.

This commit is contained in:
unknown
2016-04-28 15:40:36 -04:00
parent 5346d303ab
commit 57e58a6949
4406 changed files with 1441342 additions and 0 deletions

View File

@@ -0,0 +1,168 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_BOX_OVERlAP_TESTING_Hh_
#define DLIB_BOX_OVERlAP_TESTING_Hh_
#include "box_overlap_testing_abstract.h"
#include "../geometry.h"
#include <vector>
namespace dlib
{
// ----------------------------------------------------------------------------------------
class test_box_overlap
{
public:
test_box_overlap (
) : match_thresh(0.5), overlap_thresh(1.0)
{}
explicit test_box_overlap (
double match_thresh_,
double overlap_thresh_ = 1.0
) : match_thresh(match_thresh_), overlap_thresh(overlap_thresh_)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 <= match_thresh && match_thresh <= 1 &&
0 <= overlap_thresh && overlap_thresh <= 1,
"\t test_box_overlap::test_box_overlap(match_thresh, overlap_thresh)"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t match_thresh: " << match_thresh
<< "\n\t overlap_thresh: " << overlap_thresh
<< "\n\t this: " << this
);
}
bool operator() (
const dlib::rectangle& a,
const dlib::rectangle& b
) const
{
const double inner = a.intersect(b).area();
if (inner == 0)
return false;
const double outer = (a+b).area();
if (inner/outer > match_thresh ||
inner/a.area() > overlap_thresh ||
inner/b.area() > overlap_thresh)
return true;
else
return false;
}
double get_overlap_thresh (
) const
{
return overlap_thresh;
}
double get_match_thresh (
) const
{
return match_thresh;
}
public:
double match_thresh;
double overlap_thresh;
};
// ----------------------------------------------------------------------------------------
inline void serialize (
const test_box_overlap& item,
std::ostream& out
)
{
serialize(item.get_match_thresh(), out);
serialize(item.get_overlap_thresh(), out);
}
inline void deserialize (
test_box_overlap& item,
std::istream& in
)
{
double overlap_thresh, match_thresh;
deserialize(match_thresh, in);
deserialize(overlap_thresh, in);
item = test_box_overlap(match_thresh, overlap_thresh);
}
// ----------------------------------------------------------------------------------------
inline test_box_overlap find_tight_overlap_tester (
const std::vector<std::vector<rectangle> >& rects
)
{
double max_overlap = 0;
double max_match_score = 0;
for (unsigned long i = 0; i < rects.size(); ++i)
{
for (unsigned long j = 0; j < rects[i].size(); ++j)
{
for (unsigned long k = j+1; k < rects[i].size(); ++k)
{
const rectangle a = rects[i][j];
const rectangle b = rects[i][k];
const double match_score = (a.intersect(b)).area()/(double)(a+b).area();
const double overlap_a = (a.intersect(b)).area()/(double)(a).area();
const double overlap_b = (a.intersect(b)).area()/(double)(b).area();
if (match_score > max_match_score)
max_match_score = match_score;
if (overlap_a > max_overlap)
max_overlap = overlap_a;
if (overlap_b > max_overlap)
max_overlap = overlap_b;
}
}
}
// Relax these thresholds very slightly. We do this because on some systems the
// boxes that generated the max values erroneously trigger a box overlap match
// even though their overlap and match values are *equal* to the thresholds but not
// greater. That is, sometimes when double values get moved around they change
// their values slightly, so this avoids the problems that can create.
max_match_score = std::min(1.0000001*max_match_score, 1.0);
max_overlap = std::min(1.0000001*max_overlap, 1.0);
return test_box_overlap(max_match_score, max_overlap);
}
// ----------------------------------------------------------------------------------------
inline bool overlaps_any_box (
const test_box_overlap& tester,
const std::vector<rectangle>& rects,
const rectangle& rect
)
{
for (unsigned long i = 0; i < rects.size(); ++i)
{
if (tester(rects[i],rect))
return true;
}
return false;
}
// ----------------------------------------------------------------------------------------
inline bool overlaps_any_box (
const std::vector<rectangle>& rects,
const rectangle& rect
)
{
return overlaps_any_box(test_box_overlap(),rects,rect);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_BOX_OVERlAP_TESTING_Hh_

View File

@@ -0,0 +1,150 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_BOX_OVERlAP_TESTING_ABSTRACT_Hh_
#ifdef DLIB_BOX_OVERlAP_TESTING_ABSTRACT_Hh_
#include "../geometry.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class test_box_overlap
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is a simple function object for determining if two rectangles
overlap.
THREAD SAFETY
Concurrent access to an instance of this object is safe provided that
only const member functions are invoked. Otherwise, access must be
protected by a mutex lock.
!*/
public:
test_box_overlap (
);
/*!
ensures
- #get_match_thresh() == 0.5
- #get_overlap_thresh() == 1.0
!*/
explicit test_box_overlap (
double match_thresh,
double overlap_thresh = 1.0
);
/*!
requires
- 0 <= match_thresh <= 1
- 0 <= overlap_thresh <= 1
ensures
- #get_match_thresh() == match_thresh
- #get_overlap_thresh() == overlap_thresh
!*/
bool operator() (
const dlib::rectangle& a,
const dlib::rectangle& b
) const;
/*!
ensures
- returns true if a and b overlap "enough". This is defined precisely below.
- if (a.intersect(b).area()/(a+b).area() > get_match_thresh() ||
a.intersect(b).area()/a.area() > get_overlap_thresh() ||
a.intersect(b).area()/b.area() > get_overlap_thresh() ) then
- returns true
- else
- returns false
!*/
double get_match_thresh (
) const;
/*!
ensures
- returns the threshold used to determine if two rectangles match.
Note that the match score varies from 0 to 1 and only becomes 1
when two rectangles are identical.
!*/
double get_overlap_thresh (
) const;
/*!
ensures
- returns the threshold used to determine if two rectangles overlap. This
value is the percent of a rectangle's area covered by another rectangle.
!*/
};
// ----------------------------------------------------------------------------------------
void serialize (
const test_box_overlap& item,
std::ostream& out
);
/*!
provides serialization support
!*/
void deserialize (
test_box_overlap& item,
std::istream& in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
test_box_overlap find_tight_overlap_tester (
const std::vector<std::vector<rectangle> >& rects
);
/*!
ensures
- This function finds the most restrictive test_box_overlap object possible
that is consistent with the given set of sets of rectangles.
- To be precise, this function finds and returns a test_box_overlap object
TBO such that:
- TBO.get_match_thresh() and TBO.get_overlap_thresh() are as small
as possible such that the following conditions are satisfied.
- for all valid i:
- for all distinct rectangles A and B in rects[i]:
- TBO(A,B) == false
!*/
// ----------------------------------------------------------------------------------------
bool overlaps_any_box (
const test_box_overlap& tester,
const std::vector<rectangle>& rects,
const rectangle& rect
);
/*!
ensures
- returns true if rect overlaps any box in rects and false otherwise. Overlap
is determined based on the given tester object.
!*/
// ----------------------------------------------------------------------------------------
bool overlaps_any_box (
const std::vector<rectangle>& rects,
const rectangle& rect
);
/*!
ensures
- returns overlaps_any_box(test_box_overlap(), rects, rect)
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_BOX_OVERlAP_TESTING_ABSTRACT_Hh_

View File

@@ -0,0 +1,113 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DETECTION_TEMPlATE_TOOLS_Hh_
#define DLIB_DETECTION_TEMPlATE_TOOLS_Hh_
#include "detection_template_tools_abstract.h"
#include "../geometry.h"
#include "../matrix.h"
#include <utility>
#include <vector>
#include <cmath>
namespace dlib
{
// ----------------------------------------------------------------------------------------
inline rectangle compute_box_dimensions (
const double width_to_height_ratio,
const double area
)
{
// make sure requires clause is not broken
DLIB_ASSERT(width_to_height_ratio > 0 && area > 0,
"\t rectangle compute_box_dimensions()"
<< "\n\t Invalid arguments were given to this function. "
<< "\n\t width_to_height_ratio: " << width_to_height_ratio
<< "\n\t area: " << area
);
/*
width*height == area
width/height == width_to_height_ratio
*/
using namespace std;
const int height = (int)std::floor(std::sqrt(area/width_to_height_ratio) + 0.5);
const int width = (int)std::floor(area/height + 0.5);
return centered_rect(0,0,width,height);
}
// ----------------------------------------------------------------------------------------
inline std::vector<rectangle> create_single_box_detection_template (
const rectangle& object_box
)
{
std::vector<rectangle> temp;
temp.push_back(object_box);
return temp;
}
// ----------------------------------------------------------------------------------------
inline std::vector<rectangle> create_overlapped_2x2_detection_template (
const rectangle& object_box
)
{
std::vector<rectangle> result;
const point c = center(object_box);
result.push_back(rectangle() + c + object_box.tl_corner() + object_box.tr_corner());
result.push_back(rectangle() + c + object_box.bl_corner() + object_box.br_corner());
result.push_back(rectangle() + c + object_box.tl_corner() + object_box.bl_corner());
result.push_back(rectangle() + c + object_box.tr_corner() + object_box.br_corner());
return result;
}
// ----------------------------------------------------------------------------------------
inline std::vector<rectangle> create_grid_detection_template (
const rectangle& object_box,
unsigned int cells_x,
unsigned int cells_y
)
{
// make sure requires clause is not broken
DLIB_ASSERT(cells_x > 0 && cells_y > 0,
"\t std::vector<rectangle> create_grid_detection_template()"
<< "\n\t The number of cells along a dimension can't be zero. "
<< "\n\t cells_x: " << cells_x
<< "\n\t cells_y: " << cells_y
);
std::vector<rectangle> result;
const matrix<double,1> x = linspace(object_box.left(), object_box.right(), cells_x+1);
const matrix<double,1> y = linspace(object_box.top(), object_box.bottom(), cells_y+1);
for (long j = 0; j+1 < y.size(); ++j)
{
for (long i = 0; i+1 < x.size(); ++i)
{
const dlib::vector<double,2> tl(x(i),y(j));
const dlib::vector<double,2> br(x(i+1),y(j+1));
result.push_back(rectangle(tl,br));
}
}
return result;
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_DETECTION_TEMPlATE_TOOLS_Hh_

View File

@@ -0,0 +1,95 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_DETECTION_TEMPlATE_TOOLS_ABSTRACT_Hh_
#ifdef DLIB_DETECTION_TEMPlATE_TOOLS_ABSTRACT_Hh_
#include "../geometry.h"
#include <utility>
#include <vector>
namespace dlib
{
// ----------------------------------------------------------------------------------------
rectangle compute_box_dimensions (
const double width_to_height_ratio,
const double area
);
/*!
requires
- area > 0
- width_to_height_ratio > 0
ensures
- returns a rectangle with the given area and width_to_height_ratio.
- In particular, returns a rectangle R such that:
- R.area() == area (to within integer precision)
- R.width()/R.height() == width_to_height_ratio (to within integer precision)
- center(R) == point(0,0)
!*/
// ----------------------------------------------------------------------------------------
std::vector<rectangle> create_single_box_detection_template (
const rectangle& object_box
);
/*!
ensures
- returns a vector that contains only object_box.
- In particular, returns a vector V such that:
- V.size() == 1
- V[0] == object_box
!*/
// ----------------------------------------------------------------------------------------
std::vector<rectangle> create_overlapped_2x2_detection_template (
const rectangle& object_box
);
/*!
ensures
- Divides object_box up into four overlapping regions, the
top half, bottom half, left half, and right half. These
four rectangles are returned inside a std::vector.
- In particular, returns a vector V such that:
- V.size() == 4
- V[0] == top half of object_box
- V[1] == bottom half of object_box
- V[2] == left half of object_box
- V[3] == right half of object_box
- for all valid i: object_box.contains(V[i]) == true
!*/
// ----------------------------------------------------------------------------------------
std::vector<rectangle> create_grid_detection_template (
const rectangle& object_box,
unsigned int cells_x,
unsigned int cells_y
);
/*!
requires
- cells_x > 0
- cells_y > 0
ensures
- Divides object_box up into a grid and returns a vector
containing all the rectangles corresponding to elements
of the grid. Moreover, the grid will be cells_x elements
wide and cells_y elements tall.
- In particular, returns a vector V such that:
- V.size() == cells_x*cells_y
- for all valid i:
- object_box.contains(V[i]) == true
- V[i] == The rectangle corresponding to the ith grid
element.
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_DETECTION_TEMPlATE_TOOLS_ABSTRACT_Hh_

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,25 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_FRONTAL_FACE_DETECTOr_ABSTRACT_Hh_
#ifdef DLIB_FRONTAL_FACE_DETECTOr_ABSTRACT_Hh_
#include "object_detector_abstract.h"
#include "scan_fhog_pyramid_abstract.h"
#include "../image_transforms/image_pyramid_abstract.h"
namespace dlib
{
typedef object_detector<scan_fhog_pyramid<pyramid_down<6> > > frontal_face_detector;
frontal_face_detector get_frontal_face_detector(
);
/*!
ensures
- returns an object_detector that is configured to find human faces that are
looking more or less towards the camera.
!*/
}
#endif // DLIB_FRONTAL_FACE_DETECTOr_ABSTRACT_Hh_

View File

@@ -0,0 +1,103 @@
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_FULL_OBJECT_DeTECTION_Hh_
#define DLIB_FULL_OBJECT_DeTECTION_Hh_
#include "../geometry.h"
#include "full_object_detection_abstract.h"
#include <vector>
#include "../serialize.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
const static point OBJECT_PART_NOT_PRESENT(0x7FFFFFFF,
0x7FFFFFFF);
// ----------------------------------------------------------------------------------------
class full_object_detection
{
public:
full_object_detection(
const rectangle& rect_,
const std::vector<point>& parts_
) : rect(rect_), parts(parts_) {}
full_object_detection(){}
explicit full_object_detection(
const rectangle& rect_
) : rect(rect_) {}
const rectangle& get_rect() const { return rect; }
unsigned long num_parts() const { return parts.size(); }
const point& part(
unsigned long idx
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(idx < num_parts(),
"\t point full_object_detection::part()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t idx: " << idx
<< "\n\t num_parts(): " << num_parts()
<< "\n\t this: " << this
);
return parts[idx];
}
friend void serialize (
const full_object_detection& item,
std::ostream& out
)
{
int version = 1;
serialize(version, out);
serialize(item.rect, out);
serialize(item.parts, out);
}
friend void deserialize (
full_object_detection& item,
std::istream& in
)
{
int version = 0;
deserialize(version, in);
if (version != 1)
throw serialization_error("Unexpected version encountered while deserializing dlib::full_object_detection.");
deserialize(item.rect, in);
deserialize(item.parts, in);
}
private:
rectangle rect;
std::vector<point> parts;
};
// ----------------------------------------------------------------------------------------
inline bool all_parts_in_rect (
const full_object_detection& obj
)
{
for (unsigned long i = 0; i < obj.num_parts(); ++i)
{
if (obj.get_rect().contains(obj.part(i)) == false &&
obj.part(i) != OBJECT_PART_NOT_PRESENT)
return false;
}
return true;
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_FULL_OBJECT_DeTECTION_H_

View File

@@ -0,0 +1,125 @@
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_FULL_OBJECT_DeTECTION_ABSTRACT_Hh_
#ifdef DLIB_FULL_OBJECT_DeTECTION_ABSTRACT_Hh_
#include <vector>
#include "../geometry.h"
#include "../serialize.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
const static point OBJECT_PART_NOT_PRESENT(0x7FFFFFFF,
0x7FFFFFFF);
// ----------------------------------------------------------------------------------------
class full_object_detection
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents the location of an object in an image along with the
positions of each of its constituent parts.
!*/
public:
full_object_detection(
const rectangle& rect,
const std::vector<point>& parts
);
/*!
ensures
- #get_rect() == rect
- #num_parts() == parts.size()
- for all valid i:
- part(i) == parts[i]
!*/
full_object_detection(
);
/*!
ensures
- #get_rect().is_empty() == true
- #num_parts() == 0
!*/
explicit full_object_detection(
const rectangle& rect
);
/*!
ensures
- #get_rect() == rect
- #num_parts() == 0
!*/
const rectangle& get_rect(
) const;
/*!
ensures
- returns the rectangle that indicates where this object is. In general,
this should be the bounding box for the object.
!*/
unsigned long num_parts(
) const;
/*!
ensures
- returns the number of parts in this object.
!*/
const point& part(
unsigned long idx
) const;
/*!
requires
- idx < num_parts()
ensures
- returns the location of the center of the idx-th part of this object.
Note that it is valid for a part to be "not present". This is indicated
when the return value of part() is equal to OBJECT_PART_NOT_PRESENT.
This is useful for modeling object parts that are not always observed.
!*/
};
// ----------------------------------------------------------------------------------------
void serialize (
const full_object_detection& item,
std::ostream& out
);
/*!
provides serialization support
!*/
void deserialize (
full_object_detection& item,
std::istream& in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
bool all_parts_in_rect (
const full_object_detection& obj
);
/*!
ensures
- returns true if all the parts in obj are contained within obj.get_rect().
That is, returns true if and only if, for all valid i, the following is
always true:
obj.get_rect().contains(obj.part(i)) == true || obj.part(i) == OBJECT_PART_NOT_PRESENT
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_FULL_OBJECT_DeTECTION_ABSTRACT_Hh_

View File

@@ -0,0 +1,431 @@
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_GeNERIC_IMAGE_Hh_
#define DLIB_GeNERIC_IMAGE_Hh_
#include "../assert.h"
namespace dlib
{
/*!
In dlib, an "image" is any object that implements the generic image interface. In
particular, this simply means that an image type (let's refer to it as image_type
from here on) has the following seven global functions defined for it:
- long num_rows (const image_type& img)
- long num_columns (const image_type& img)
- void set_image_size( image_type& img, long rows, long cols)
- void* image_data ( image_type& img)
- const void* image_data (const image_type& img)
- long width_step (const image_type& img)
- void swap ( image_type& a, image_type& b)
And also provides a specialization of the image_traits template that looks like:
namespace dlib
{
template <>
struct image_traits<image_type>
{
typedef the_type_of_pixel_used_in_image_type pixel_type;
};
}
Additionally, an image object must be default constructable. This means that
expressions of the form:
image_type img;
Must be legal.
Finally, the type of pixel in image_type must have a pixel_traits specialization.
That is, pixel_traits<typename image_traits<image_type>::pixel_type> must be one of
the specializations of pixel_traits.
To be very precise, the seven functions defined above are defined thusly:
long num_rows(
const image_type& img
);
/!*
ensures
- returns the number of rows in the given image
*!/
long num_columns(
const image_type& img
);
/!*
ensures
- returns the number of columns in the given image
*!/
void set_image_size(
image_type& img,
long rows,
long cols
);
/!*
requires
- rows >= 0 && cols >= 0
ensures
- num_rows(#img) == rows
- num_columns(#img) == cols
*!/
void* image_data(
image_type& img
);
/!*
ensures
- returns a non-const pointer to the pixel at row and column position 0,0
in the given image. Or if the image has zero rows or columns in it
then this function returns NULL.
- The image lays pixels down in row major order. However, there might
be padding at the end of each row. The amount of padding is given by
width_step(img).
*!/
const void* image_data(
const image_type& img
);
/!*
ensures
- returns a const pointer to the pixel at row and column position 0,0 in
the given image. Or if the image has zero rows or columns in it then
this function returns NULL.
- The image lays pixels down in row major order. However, there might
be padding at the end of each row. The amount of padding is given by
width_step(img).
*!/
long width_step(
const image_type& img
);
/!*
ensures
- returns the size of one row of the image, in bytes. More precisely,
return a number N such that: (char*)image_data(img) + N*R == a
pointer to the first pixel in the R-th row of the image. This means
that the image must lay its pixels down in row major order.
*!/
void swap(
image_type& a,
image_type& b
);
/!*
ensures
- swaps the state of a and b
*!/
!*/
// ----------------------------------------------------------------------------------------
template <typename image_type>
struct image_traits;
/*!
WHAT THIS OBJECT REPRESENTS
This is a traits class for generic image objects. You can use it to find out
the pixel type contained within an image via an expression of the form:
image_traits<image_type>::pixel_type
!*/
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// UTILITIES TO MAKE ACCESSING IMAGE PIXELS SIMPLER
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename image_type
>
class image_view
{
/*!
REQUIREMENTS ON image_type
image_type must be an image object as defined at the top of this file.
WHAT THIS OBJECT REPRESENTS
This object takes an image object and wraps it with an interface that makes
it look like a dlib::array2d. That is, it makes it look similar to a
regular 2-dimensional C style array, making code which operates on the
pixels simple to read.
Note that an image_view instance is valid until the image given to its
constructor is modified through an interface other than the image_view
instance. This is because, for example, someone might cause the underlying
image object to reallocate its memory, thus invalidating the pointer to its
pixel data stored in the image_view.
As an side, the reason why this object stores a pointer to the image
object's data and uses that pointer instead of calling image_data() each
time a pixel is accessed is to allow for image objects to implement
complex, and possibly slow, image_data() functions. For example, an image
object might perform some kind of synchronization between a GPU and the
host memory during a call to image_data(). Therefore, we call image_data()
only in image_view's constructor to avoid the performance penalty of
calling it for each pixel access.
!*/
public:
typedef typename image_traits<image_type>::pixel_type pixel_type;
image_view(
image_type& img
) :
_data((char*)image_data(img)),
_width_step(width_step(img)),
_nr(num_rows(img)),
_nc(num_columns(img)),
_img(&img)
{}
long nr() const { return _nr; }
/*!
ensures
- returns the number of rows in this image.
!*/
long nc() const { return _nc; }
/*!
ensures
- returns the number of columns in this image.
!*/
unsigned long size() const { return static_cast<unsigned long>(nr()*nc()); }
/*!
ensures
- returns the number of pixels in this image.
!*/
#ifndef ENABLE_ASSERTS
pixel_type* operator[] (long row) { return (pixel_type*)(_data+_width_step*row); }
/*!
requires
- 0 <= row < nr()
ensures
- returns a pointer to the first pixel in the row-th row. Therefore, the
pixel at row and column position r,c can be accessed via (*this)[r][c].
!*/
const pixel_type* operator[] (long row) const { return (const pixel_type*)(_data+_width_step*row); }
/*!
requires
- 0 <= row < nr()
ensures
- returns a const pointer to the first pixel in the row-th row. Therefore,
the pixel at row and column position r,c can be accessed via
(*this)[r][c].
!*/
#else
// If asserts are enabled then we need to return a proxy class so we can make sure
// the column accesses don't go out of bounds.
struct pix_row
{
pix_row(pixel_type* data_, long nc_) : data(data_),_nc(nc_) {}
const pixel_type& operator[] (long col) const
{
DLIB_ASSERT(0 <= col && col < _nc,
"\t The given column index is out of range."
<< "\n\t col: " << col
<< "\n\t _nc: " << _nc);
return data[col];
}
pixel_type& operator[] (long col)
{
DLIB_ASSERT(0 <= col && col < _nc,
"\t The given column index is out of range."
<< "\n\t col: " << col
<< "\n\t _nc: " << _nc);
return data[col];
}
private:
pixel_type* const data;
const long _nc;
};
pix_row operator[] (long row)
{
DLIB_ASSERT(0 <= row && row < _nr,
"\t The given row index is out of range."
<< "\n\t row: " << row
<< "\n\t _nr: " << _nr);
return pix_row((pixel_type*)(_data+_width_step*row), _nc);
}
const pix_row operator[] (long row) const
{
DLIB_ASSERT(0 <= row && row < _nr,
"\t The given row index is out of range."
<< "\n\t row: " << row
<< "\n\t _nr: " << _nr);
return pix_row((pixel_type*)(_data+_width_step*row), _nc);
}
#endif
void set_size(long rows, long cols)
/*!
requires
- rows >= 0 && cols >= 0
ensures
- Tells the underlying image to resize itself to have the given number of
rows and columns.
- #nr() == rows
- #nc() == cols
!*/
{
DLIB_ASSERT((cols >= 0 && rows >= 0),
"\t image_view::set_size(long rows, long cols)"
<< "\n\t The images can't have negative rows or columns."
<< "\n\t cols: " << cols
<< "\n\t rows: " << rows
);
set_image_size(*_img, rows, cols); *this = *_img;
}
void clear() { set_size(0,0); }
/*!
ensures
- sets the image to have 0 pixels in it.
!*/
private:
char* _data;
long _width_step;
long _nr;
long _nc;
image_type* _img;
};
// ----------------------------------------------------------------------------------------
template <typename image_type>
class const_image_view
{
/*!
REQUIREMENTS ON image_type
image_type must be an image object as defined at the top of this file.
WHAT THIS OBJECT REPRESENTS
This object is just like the image_view except that it provides a "const"
view into an image. That is, it has the same interface as image_view
except that you can't modify the image through a const_image_view.
!*/
public:
typedef typename image_traits<image_type>::pixel_type pixel_type;
const_image_view(
const image_type& img
) :
_data((char*)image_data(img)),
_width_step(width_step(img)),
_nr(num_rows(img)),
_nc(num_columns(img))
{}
long nr() const { return _nr; }
long nc() const { return _nc; }
unsigned long size() const { return static_cast<unsigned long>(nr()*nc()); }
#ifndef ENABLE_ASSERTS
const pixel_type* operator[] (long row) const { return (const pixel_type*)(_data+_width_step*row); }
#else
// If asserts are enabled then we need to return a proxy class so we can make sure
// the column accesses don't go out of bounds.
struct pix_row
{
pix_row(pixel_type* data_, long nc_) : data(data_),_nc(nc_) {}
const pixel_type& operator[] (long col) const
{
DLIB_ASSERT(0 <= col && col < _nc,
"\t The given column index is out of range."
<< "\n\t col: " << col
<< "\n\t _nc: " << _nc);
return data[col];
}
private:
pixel_type* const data;
const long _nc;
};
const pix_row operator[] (long row) const
{
DLIB_ASSERT(0 <= row && row < _nr,
"\t The given row index is out of range."
<< "\n\t row: " << row
<< "\n\t _nr: " << _nr);
return pix_row((pixel_type*)(_data+_width_step*row), _nc);
}
#endif
private:
const char* _data;
long _width_step;
long _nr;
long _nc;
};
// ----------------------------------------------------------------------------------------
template <typename image_type>
image_view<image_type> make_image_view ( image_type& img)
{ return image_view<image_type>(img); }
/*!
requires
- image_type == an image object that implements the interface defined at the
top of this file.
ensures
- constructs an image_view from an image object
!*/
template <typename image_type>
const_image_view<image_type> make_image_view (const image_type& img)
{ return const_image_view<image_type>(img); }
/*!
requires
- image_type == an image object that implements the interface defined at the
top of this file.
ensures
- constructs a const_image_view from an image object
!*/
// ----------------------------------------------------------------------------------------
template <typename image_type>
inline unsigned long image_size(
const image_type& img
) { return num_columns(img)*num_rows(img); }
/*!
requires
- image_type == an image object that implements the interface defined at the
top of this file.
ensures
- returns the number of pixels in the given image.
!*/
// ----------------------------------------------------------------------------------------
template <typename image_type>
inline long num_rows(
const image_type& img
) { return img.nr(); }
/*!
ensures
- By default, try to use the member function .nr() to determine the number
of rows in an image. However, as stated at the top of this file, image
objects should provide their own overload of num_rows() if needed.
!*/
template <typename image_type>
inline long num_columns(
const image_type& img
) { return img.nc(); }
/*!
ensures
- By default, try to use the member function .nc() to determine the number
of columns in an image. However, as stated at the top of this file, image
objects should provide their own overload of num_rows() if needed.
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_GeNERIC_IMAGE_Hh_

View File

@@ -0,0 +1,628 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_OBJECT_DeTECTOR_Hh_
#define DLIB_OBJECT_DeTECTOR_Hh_
#include "object_detector_abstract.h"
#include "../geometry.h"
#include <vector>
#include "box_overlap_testing.h"
#include "full_object_detection.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
struct rect_detection
{
double detection_confidence;
unsigned long weight_index;
rectangle rect;
bool operator<(const rect_detection& item) const { return detection_confidence < item.detection_confidence; }
};
struct full_detection
{
double detection_confidence;
unsigned long weight_index;
full_object_detection rect;
bool operator<(const full_detection& item) const { return detection_confidence < item.detection_confidence; }
};
// ----------------------------------------------------------------------------------------
template <typename image_scanner_type>
struct processed_weight_vector
{
processed_weight_vector(){}
typedef typename image_scanner_type::feature_vector_type feature_vector_type;
void init (
const image_scanner_type&
)
/*!
requires
- w has already been assigned its value. Note that the point of this
function is to allow an image scanner to overload the
processed_weight_vector template and provide some different kind of
object as the output of get_detect_argument(). For example, the
scan_fhog_pyramid object uses an overload that causes
get_detect_argument() to return the special fhog_filterbank object
instead of a feature_vector_type. This avoids needing to construct the
fhog_filterbank during each call to detect and therefore speeds up
detection.
!*/
{}
// return the first argument to image_scanner_type::detect()
const feature_vector_type& get_detect_argument() const { return w; }
feature_vector_type w;
};
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type_
>
class object_detector
{
public:
typedef image_scanner_type_ image_scanner_type;
typedef typename image_scanner_type::feature_vector_type feature_vector_type;
object_detector (
);
object_detector (
const object_detector& item
);
object_detector (
const image_scanner_type& scanner_,
const test_box_overlap& overlap_tester_,
const feature_vector_type& w_
);
object_detector (
const image_scanner_type& scanner_,
const test_box_overlap& overlap_tester_,
const std::vector<feature_vector_type>& w_
);
explicit object_detector (
const std::vector<object_detector>& detectors
);
unsigned long num_detectors (
) const { return w.size(); }
const feature_vector_type& get_w (
unsigned long idx = 0
) const { return w[idx].w; }
const processed_weight_vector<image_scanner_type>& get_processed_w (
unsigned long idx = 0
) const { return w[idx]; }
const test_box_overlap& get_overlap_tester (
) const;
const image_scanner_type& get_scanner (
) const;
object_detector& operator= (
const object_detector& item
);
template <
typename image_type
>
std::vector<rectangle> operator() (
const image_type& img,
double adjust_threshold = 0
);
template <
typename image_type
>
void operator() (
const image_type& img,
std::vector<std::pair<double, rectangle> >& final_dets,
double adjust_threshold = 0
);
template <
typename image_type
>
void operator() (
const image_type& img,
std::vector<std::pair<double, full_object_detection> >& final_dets,
double adjust_threshold = 0
);
template <
typename image_type
>
void operator() (
const image_type& img,
std::vector<full_object_detection>& final_dets,
double adjust_threshold = 0
);
// These typedefs are here for backwards compatibility with previous versions of
// dlib.
typedef ::dlib::rect_detection rect_detection;
typedef ::dlib::full_detection full_detection;
template <
typename image_type
>
void operator() (
const image_type& img,
std::vector<rect_detection>& final_dets,
double adjust_threshold = 0
);
template <
typename image_type
>
void operator() (
const image_type& img,
std::vector<full_detection>& final_dets,
double adjust_threshold = 0
);
template <typename T>
friend void serialize (
const object_detector<T>& item,
std::ostream& out
);
template <typename T>
friend void deserialize (
object_detector<T>& item,
std::istream& in
);
public:
bool overlaps_any_box (
const std::vector<rect_detection>& rects,
const dlib::rectangle& rect
) const
{
for (unsigned long i = 0; i < rects.size(); ++i)
{
if (boxes_overlap(rects[i].rect, rect))
return true;
}
return false;
}
test_box_overlap boxes_overlap;
std::vector<processed_weight_vector<image_scanner_type> > w;
image_scanner_type scanner;
};
// ----------------------------------------------------------------------------------------
template <typename T>
void serialize (
const object_detector<T>& item,
std::ostream& out
)
{
int version = 2;
serialize(version, out);
T scanner;
scanner.copy_configuration(item.scanner);
serialize(scanner, out);
serialize(item.boxes_overlap, out);
// serialize all the weight vectors
serialize(item.w.size(), out);
for (unsigned long i = 0; i < item.w.size(); ++i)
serialize(item.w[i].w, out);
}
// ----------------------------------------------------------------------------------------
template <typename T>
void deserialize (
object_detector<T>& item,
std::istream& in
)
{
int version = 0;
deserialize(version, in);
if (version == 1)
{
deserialize(item.scanner, in);
item.w.resize(1);
deserialize(item.w[0].w, in);
item.w[0].init(item.scanner);
deserialize(item.boxes_overlap, in);
}
else if (version == 2)
{
deserialize(item.scanner, in);
deserialize(item.boxes_overlap, in);
unsigned long num_detectors = 0;
deserialize(num_detectors, in);
item.w.resize(num_detectors);
for (unsigned long i = 0; i < item.w.size(); ++i)
{
deserialize(item.w[i].w, in);
item.w[i].init(item.scanner);
}
}
else
{
throw serialization_error("Unexpected version encountered while deserializing a dlib::object_detector object.");
}
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// object_detector member functions
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
object_detector<image_scanner_type>::
object_detector (
)
{
}
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
object_detector<image_scanner_type>::
object_detector (
const object_detector& item
)
{
boxes_overlap = item.boxes_overlap;
w = item.w;
scanner.copy_configuration(item.scanner);
}
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
object_detector<image_scanner_type>::
object_detector (
const image_scanner_type& scanner_,
const test_box_overlap& overlap_tester,
const feature_vector_type& w_
) :
boxes_overlap(overlap_tester)
{
// make sure requires clause is not broken
DLIB_ASSERT(scanner_.get_num_detection_templates() > 0 &&
w_.size() == scanner_.get_num_dimensions() + 1,
"\t object_detector::object_detector(scanner_,overlap_tester,w_)"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t scanner_.get_num_detection_templates(): " << scanner_.get_num_detection_templates()
<< "\n\t w_.size(): " << w_.size()
<< "\n\t scanner_.get_num_dimensions(): " << scanner_.get_num_dimensions()
<< "\n\t this: " << this
);
scanner.copy_configuration(scanner_);
w.resize(1);
w[0].w = w_;
w[0].init(scanner);
}
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
object_detector<image_scanner_type>::
object_detector (
const image_scanner_type& scanner_,
const test_box_overlap& overlap_tester,
const std::vector<feature_vector_type>& w_
) :
boxes_overlap(overlap_tester)
{
// make sure requires clause is not broken
DLIB_ASSERT(scanner_.get_num_detection_templates() > 0 && w_.size() > 0,
"\t object_detector::object_detector(scanner_,overlap_tester,w_)"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t scanner_.get_num_detection_templates(): " << scanner_.get_num_detection_templates()
<< "\n\t w_.size(): " << w_.size()
<< "\n\t this: " << this
);
#ifdef ENABLE_ASSERTS
for (unsigned long i = 0; i < w_.size(); ++i)
{
DLIB_ASSERT(w_[i].size() == scanner_.get_num_dimensions() + 1,
"\t object_detector::object_detector(scanner_,overlap_tester,w_)"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t scanner_.get_num_detection_templates(): " << scanner_.get_num_detection_templates()
<< "\n\t w_["<<i<<"].size(): " << w_[i].size()
<< "\n\t scanner_.get_num_dimensions(): " << scanner_.get_num_dimensions()
<< "\n\t this: " << this
);
}
#endif
scanner.copy_configuration(scanner_);
w.resize(w_.size());
for (unsigned long i = 0; i < w.size(); ++i)
{
w[i].w = w_[i];
w[i].init(scanner);
}
}
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
object_detector<image_scanner_type>::
object_detector (
const std::vector<object_detector>& detectors
)
{
DLIB_ASSERT(detectors.size() != 0,
"\t object_detector::object_detector(detectors)"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t this: " << this
);
std::vector<feature_vector_type> weights;
weights.reserve(detectors.size());
for (unsigned long i = 0; i < detectors.size(); ++i)
{
for (unsigned long j = 0; j < detectors[i].num_detectors(); ++j)
weights.push_back(detectors[i].get_w(j));
}
*this = object_detector(detectors[0].get_scanner(), detectors[0].get_overlap_tester(), weights);
}
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
object_detector<image_scanner_type>& object_detector<image_scanner_type>::
operator= (
const object_detector& item
)
{
if (this == &item)
return *this;
boxes_overlap = item.boxes_overlap;
w = item.w;
scanner.copy_configuration(item.scanner);
return *this;
}
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
template <
typename image_type
>
void object_detector<image_scanner_type>::
operator() (
const image_type& img,
std::vector<rect_detection>& final_dets,
double adjust_threshold
)
{
scanner.load(img);
std::vector<std::pair<double, rectangle> > dets;
std::vector<rect_detection> dets_accum;
for (unsigned long i = 0; i < w.size(); ++i)
{
const double thresh = w[i].w(scanner.get_num_dimensions());
scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);
for (unsigned long j = 0; j < dets.size(); ++j)
{
rect_detection temp;
temp.detection_confidence = dets[j].first-thresh;
temp.weight_index = i;
temp.rect = dets[j].second;
dets_accum.push_back(temp);
}
}
// Do non-max suppression
final_dets.clear();
if (w.size() > 1)
std::sort(dets_accum.rbegin(), dets_accum.rend());
for (unsigned long i = 0; i < dets_accum.size(); ++i)
{
if (overlaps_any_box(final_dets, dets_accum[i].rect))
continue;
final_dets.push_back(dets_accum[i]);
}
}
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
template <
typename image_type
>
void object_detector<image_scanner_type>::
operator() (
const image_type& img,
std::vector<full_detection>& final_dets,
double adjust_threshold
)
{
std::vector<rect_detection> dets;
(*this)(img,dets,adjust_threshold);
final_dets.resize(dets.size());
// convert all the rectangle detections into full_object_detections.
for (unsigned long i = 0; i < dets.size(); ++i)
{
final_dets[i].detection_confidence = dets[i].detection_confidence;
final_dets[i].weight_index = dets[i].weight_index;
final_dets[i].rect = scanner.get_full_object_detection(dets[i].rect, w[dets[i].weight_index].w);
}
}
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
template <
typename image_type
>
std::vector<rectangle> object_detector<image_scanner_type>::
operator() (
const image_type& img,
double adjust_threshold
)
{
std::vector<rect_detection> dets;
(*this)(img,dets,adjust_threshold);
std::vector<rectangle> final_dets(dets.size());
for (unsigned long i = 0; i < dets.size(); ++i)
final_dets[i] = dets[i].rect;
return final_dets;
}
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
template <
typename image_type
>
void object_detector<image_scanner_type>::
operator() (
const image_type& img,
std::vector<std::pair<double, rectangle> >& final_dets,
double adjust_threshold
)
{
std::vector<rect_detection> dets;
(*this)(img,dets,adjust_threshold);
final_dets.resize(dets.size());
for (unsigned long i = 0; i < dets.size(); ++i)
final_dets[i] = std::make_pair(dets[i].detection_confidence,dets[i].rect);
}
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
template <
typename image_type
>
void object_detector<image_scanner_type>::
operator() (
const image_type& img,
std::vector<std::pair<double, full_object_detection> >& final_dets,
double adjust_threshold
)
{
std::vector<rect_detection> dets;
(*this)(img,dets,adjust_threshold);
final_dets.clear();
final_dets.reserve(dets.size());
// convert all the rectangle detections into full_object_detections.
for (unsigned long i = 0; i < dets.size(); ++i)
{
final_dets.push_back(std::make_pair(dets[i].detection_confidence,
scanner.get_full_object_detection(dets[i].rect, w[dets[i].weight_index].w)));
}
}
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
template <
typename image_type
>
void object_detector<image_scanner_type>::
operator() (
const image_type& img,
std::vector<full_object_detection>& final_dets,
double adjust_threshold
)
{
std::vector<rect_detection> dets;
(*this)(img,dets,adjust_threshold);
final_dets.clear();
final_dets.reserve(dets.size());
// convert all the rectangle detections into full_object_detections.
for (unsigned long i = 0; i < dets.size(); ++i)
{
final_dets.push_back(scanner.get_full_object_detection(dets[i].rect, w[dets[i].weight_index].w));
}
}
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
const test_box_overlap& object_detector<image_scanner_type>::
get_overlap_tester (
) const
{
return boxes_overlap;
}
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
const image_scanner_type& object_detector<image_scanner_type>::
get_scanner (
) const
{
return scanner;
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_OBJECT_DeTECTOR_Hh_

View File

@@ -0,0 +1,404 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
#ifdef DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
#include "../geometry.h"
#include <vector>
#include "box_overlap_testing_abstract.h"
#include "full_object_detection_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
struct rect_detection
{
double detection_confidence;
unsigned long weight_index;
rectangle rect;
};
struct full_detection
{
double detection_confidence;
unsigned long weight_index;
full_object_detection rect;
};
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type_
>
class object_detector
{
/*!
REQUIREMENTS ON image_scanner_type_
image_scanner_type_ must be an implementation of
dlib/image_processing/scan_image_pyramid_abstract.h or
dlib/image_processing/scan_fhog_pyramid.h or
dlib/image_processing/scan_image_custom.h or
dlib/image_processing/scan_image_boxes_abstract.h
WHAT THIS OBJECT REPRESENTS
This object is a tool for detecting the positions of objects in an image.
In particular, it is a simple container to aggregate an instance of an image
scanner (i.e. scan_image_pyramid, scan_fhog_pyramid, scan_image_custom, or
scan_image_boxes), the weight vector needed by one of these image scanners,
and finally an instance of test_box_overlap. The test_box_overlap object
is used to perform non-max suppression on the output of the image scanner
object.
Note further that this object can contain multiple weight vectors. In this
case, it will run the image scanner multiple times, once with each of the
weight vectors. Then it will aggregate the results from all runs, perform
non-max suppression and then return the results. Therefore, the object_detector
can also be used as a container for a set of object detectors that all use
the same image scanner but different weight vectors. This is useful since
the object detection procedure has two parts. A loading step where the
image is loaded into the scanner, then a detect step which uses the weight
vector to locate objects in the image. Since the loading step is independent
of the weight vector it is most efficient to run multiple detectors by
performing one load into a scanner followed by multiple detect steps. This
avoids unnecessarily loading the same image into the scanner multiple times.
!*/
public:
typedef image_scanner_type_ image_scanner_type;
typedef typename image_scanner_type::feature_vector_type feature_vector_type;
object_detector (
);
/*!
ensures
- This detector won't generate any detections when
presented with an image.
- #num_detectors() == 0
!*/
object_detector (
const object_detector& item
);
/*!
ensures
- #*this is a copy of item
- #get_scanner() == item.get_scanner()
(note that only the "configuration" of item.get_scanner() is copied.
I.e. the copy is done using copy_configuration())
!*/
object_detector (
const image_scanner_type& scanner,
const test_box_overlap& overlap_tester,
const feature_vector_type& w
);
/*!
requires
- w.size() == scanner.get_num_dimensions() + 1
- scanner.get_num_detection_templates() > 0
ensures
- When the operator() member function is called it will
invoke scanner.detect(w,dets,w(w.size()-1)), suppress
overlapping detections, and then report the results.
- when #*this is used to detect objects, the set of
output detections will never contain any overlaps
with respect to overlap_tester. That is, for all
pairs of returned detections A and B, we will always
have: overlap_tester(A,B) == false
- #get_w() == w
- #get_overlap_tester() == overlap_tester
- #get_scanner() == scanner
(note that only the "configuration" of scanner is copied.
I.e. the copy is done using copy_configuration())
- #num_detectors() == 1
!*/
object_detector (
const image_scanner_type& scanner,
const test_box_overlap& overlap_tester,
const std::vector<feature_vector_type>& w
);
/*!
requires
- for all valid i:
- w[i].size() == scanner.get_num_dimensions() + 1
- scanner.get_num_detection_templates() > 0
- w.size() > 0
ensures
- When the operator() member function is called it will invoke
get_scanner().detect(w[i],dets,w[i](w[i].size()-1)) for all valid i. Then it
will take all the detections output by the calls to detect() and suppress
overlapping detections, and finally report the results.
- when #*this is used to detect objects, the set of output detections will
never contain any overlaps with respect to overlap_tester. That is, for
all pairs of returned detections A and B, we will always have:
overlap_tester(A,B) == false
- for all valid i:
- #get_w(i) == w[i]
- #num_detectors() == w.size()
- #get_overlap_tester() == overlap_tester
- #get_scanner() == scanner
(note that only the "configuration" of scanner is copied.
I.e. the copy is done using copy_configuration())
!*/
explicit object_detector (
const std::vector<object_detector>& detectors
);
/*!
requires
- detectors.size() != 0
- All the detectors must use compatibly configured scanners. That is, it
must make sense for the weight vector from one detector to be used with
the scanner from any other.
- for all valid i:
- detectors[i].get_scanner().get_num_dimensions() == detectors[0].get_scanner().get_num_dimensions()
(i.e. all the detectors use scanners that use the same kind of feature vectors.)
ensures
- Very much like the above constructor, this constructor takes all the
given detectors and packs them into #*this. That is, invoking operator()
on #*this will run all the detectors, perform non-max suppression, and
then report the results.
- When #*this is used to detect objects, the set of output detections will
never contain any overlaps with respect to overlap_tester. That is, for
all pairs of returned detections A and B, we will always have:
overlap_tester(A,B) == false
- #num_detectors() == The sum of detectors[i].num_detectors() for all valid i.
- #get_overlap_tester() == detectors[0].get_overlap_tester()
- #get_scanner() == detectors[0].get_scanner()
(note that only the "configuration" of scanner is copied. I.e. the copy
is done using copy_configuration())
!*/
unsigned long num_detectors (
) const;
/*!
ensures
- returns the number of weight vectors in this object. Since each weight
vector logically represents an object detector, this returns the number
of object detectors contained in this object.
!*/
const feature_vector_type& get_w (
unsigned long idx = 0
) const;
/*!
requires
- idx < num_detectors
ensures
- returns the idx-th weight vector loaded into this object. All the weight vectors
have the same dimension and logically each represents a different detector.
!*/
const test_box_overlap& get_overlap_tester (
) const;
/*!
ensures
- returns the overlap tester used by this object
!*/
const image_scanner_type& get_scanner (
) const;
/*!
ensures
- returns the image scanner used by this object.
!*/
object_detector& operator= (
const object_detector& item
);
/*!
ensures
- #*this is a copy of item
- #get_scanner() == item.get_scanner()
(note that only the "configuration" of item.get_scanner() is
copied. I.e. the copy is done using copy_configuration())
- returns #*this
!*/
template <
typename image_type
>
void operator() (
const image_type& img,
std::vector<rect_detection>& dets,
double adjust_threshold = 0
);
/*!
requires
- img == an object which can be accepted by image_scanner_type::load()
ensures
- Performs object detection on the given image and stores the detected
objects into #dets. In particular, we will have that:
- #dets is sorted such that the highest confidence detections come
first. E.g. element 0 is the best detection, element 1 the next
best, and so on.
- #dets.size() == the number of detected objects.
- #dets[i].detection_confidence == The strength of the i-th detection.
Larger values indicate that the detector is more confident that
#dets[i] is a correct detection rather than being a false alarm.
Moreover, the detection_confidence is equal to the detection value
output by the scanner minus the threshold value stored at the end of
the weight vector in get_w(#dets[i].weight_index).
- #dets[i].weight_index == the index for the weight vector that
generated this detection.
- #dets[i].rect == the bounding box for the i-th detection.
- #get_scanner() will have been loaded with img. Therefore, you can call
#get_scanner().get_feature_vector() to obtain the feature vectors or
#get_scanner().get_full_object_detection() to get the
full_object_detections for the resulting object detection boxes.
- The detection threshold is adjusted by having adjust_threshold added to
it. Therefore, an adjust_threshold value > 0 makes detecting objects
harder while a negative value makes it easier. Moreover, the following
will be true for all valid i:
- #dets[i].detection_confidence >= adjust_threshold
This means that, for example, you can obtain the maximum possible number
of detections by setting adjust_threshold equal to negative infinity.
!*/
template <
typename image_type
>
void operator() (
const image_type& img,
std::vector<full_detection>& dets,
double adjust_threshold = 0
);
/*!
requires
- img == an object which can be accepted by image_scanner_type::load()
ensures
- This function is identical to the above operator() routine, except that
it outputs full_object_detections instead of rectangles. This means that
the output includes part locations. In particular, calling this function
is the same as calling the above operator() routine and then using
get_scanner().get_full_object_detection() to resolve all the rectangles
into full_object_detections. Therefore, this version of operator() is
simply a convenience function for performing this set of operations.
!*/
template <
typename image_type
>
std::vector<rectangle> operator() (
const image_type& img,
const adjust_threshold = 0
);
/*!
requires
- img == an object which can be accepted by image_scanner_type::load()
ensures
- This function is identical to the above operator() routine, except that
it returns a std::vector<rectangle> which contains just the bounding
boxes of all the detections.
!*/
template <
typename image_type
>
void operator() (
const image_type& img,
std::vector<std::pair<double, rectangle> >& dets,
double adjust_threshold = 0
);
/*!
requires
- img == an object which can be accepted by image_scanner_type::load()
ensures
- performs object detection on the given image and stores the
detected objects into #dets. In particular, we will have that:
- #dets is sorted such that the highest confidence detections
come first. E.g. element 0 is the best detection, element 1
the next best, and so on.
- #dets.size() == the number of detected objects.
- #dets[i].first gives the "detection confidence", of the i-th
detection. This is the detection value output by the scanner minus
the threshold value stored at the end of the weight vector in get_w().
- #dets[i].second == the bounding box for the i-th detection.
- #get_scanner() will have been loaded with img. Therefore, you can call
#get_scanner().get_feature_vector() to obtain the feature vectors or
#get_scanner().get_full_object_detection() to get the
full_object_detections for the resulting object detection boxes.
- The detection threshold is adjusted by having adjust_threshold added to
it. Therefore, an adjust_threshold value > 0 makes detecting objects
harder while a negative value makes it easier. Moreover, the following
will be true for all valid i:
- #dets[i].first >= adjust_threshold
This means that, for example, you can obtain the maximum possible number
of detections by setting adjust_threshold equal to negative infinity.
!*/
template <
typename image_type
>
void operator() (
const image_type& img,
std::vector<std::pair<double, full_object_detection> >& dets,
double adjust_threshold = 0
);
/*!
requires
- img == an object which can be accepted by image_scanner_type::load()
ensures
- This function is identical to the above operator() routine, except that
it outputs full_object_detections instead of rectangles. This means that
the output includes part locations. In particular, calling this function
is the same as calling the above operator() routine and then using
get_scanner().get_full_object_detection() to resolve all the rectangles
into full_object_detections. Therefore, this version of operator() is
simply a convenience function for performing this set of operations.
!*/
template <
typename image_type
>
void operator() (
const image_type& img,
std::vector<full_object_detection>& dets,
double adjust_threshold = 0
);
/*!
requires
- img == an object which can be accepted by image_scanner_type::load()
ensures
- This function is identical to the above operator() routine, except that
it doesn't include a double valued score. That is, it just outputs the
full_object_detections.
!*/
};
// ----------------------------------------------------------------------------------------
template <typename T>
void serialize (
const object_detector<T>& item,
std::ostream& out
);
/*!
provides serialization support. Note that this function only saves the
configuration part of item.get_scanner(). That is, we use the scanner's
copy_configuration() function to get a copy of the scanner that doesn't contain any
loaded image data and we then save just the configuration part of the scanner.
This means that any serialized object_detectors won't remember any images they have
processed but will otherwise contain all their state and be able to detect objects
in new images.
!*/
// ----------------------------------------------------------------------------------------
template <typename T>
void deserialize (
object_detector<T>& item,
std::istream& in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_

View File

@@ -0,0 +1,316 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_REMOVE_UnOBTAINABLE_RECTANGLES_Hh_
#define DLIB_REMOVE_UnOBTAINABLE_RECTANGLES_Hh_
#include "remove_unobtainable_rectangles_abstract.h"
#include "scan_image_pyramid.h"
#include "scan_image_boxes.h"
#include "scan_image_custom.h"
#include "scan_fhog_pyramid.h"
#include "../svm/structural_object_detection_trainer.h"
#include "../geometry.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
namespace impl
{
inline bool matches_rect (
const std::vector<rectangle>& rects,
const rectangle& rect,
const double eps
)
{
for (unsigned long i = 0; i < rects.size(); ++i)
{
const double score = (rect.intersect(rects[i])).area()/(double)(rect+rects[i]).area();
if (score > eps)
return true;
}
return false;
}
inline rectangle get_best_matching_rect (
const std::vector<rectangle>& rects,
const rectangle& rect
)
{
double best_score = -1;
rectangle best_rect;
for (unsigned long i = 0; i < rects.size(); ++i)
{
const double score = (rect.intersect(rects[i])).area()/(double)(rect+rects[i]).area();
if (score > best_score)
{
best_score = score;
best_rect = rects[i];
}
}
return best_rect;
}
// ------------------------------------------------------------------------------------
template <
typename image_array_type,
typename image_scanner_type
>
std::vector<std::vector<rectangle> > pyramid_remove_unobtainable_rectangles (
const structural_object_detection_trainer<image_scanner_type>& trainer,
const image_array_type& images,
std::vector<std::vector<rectangle> >& object_locations
)
{
using namespace dlib::impl;
// make sure requires clause is not broken
DLIB_ASSERT(images.size() == object_locations.size(),
"\t std::vector<std::vector<rectangle>> remove_unobtainable_rectangles()"
<< "\n\t Invalid inputs were given to this function."
);
std::vector<std::vector<rectangle> > rejects(images.size());
// If the trainer is setup to automatically fit the overlap tester to the data then
// we should use the loosest possible overlap tester here. Otherwise we should use
// the tester the trainer will use.
test_box_overlap boxes_overlap(0.9999999,1);
if (!trainer.auto_set_overlap_tester())
boxes_overlap = trainer.get_overlap_tester();
for (unsigned long k = 0; k < images.size(); ++k)
{
std::vector<rectangle> objs = object_locations[k];
// First remove things that don't have any matches with the candidate object
// locations.
std::vector<rectangle> good_rects;
for (unsigned long j = 0; j < objs.size(); ++j)
{
const rectangle rect = trainer.get_scanner().get_best_matching_rect(objs[j]);
const double score = (objs[j].intersect(rect)).area()/(double)(objs[j] + rect).area();
if (score > trainer.get_match_eps())
good_rects.push_back(objs[j]);
else
rejects[k].push_back(objs[j]);
}
object_locations[k] = good_rects;
// Remap these rectangles to the ones that can come out of the scanner. That
// way when we compare them to each other in the following loop we will know if
// any distinct truth rectangles get mapped to overlapping boxes.
objs.resize(good_rects.size());
for (unsigned long i = 0; i < good_rects.size(); ++i)
objs[i] = trainer.get_scanner().get_best_matching_rect(good_rects[i]);
good_rects.clear();
// now check for truth rects that are too close together.
for (unsigned long i = 0; i < objs.size(); ++i)
{
// check if objs[i] hits another box
bool hit_box = false;
for (unsigned long j = i+1; j < objs.size(); ++j)
{
if (boxes_overlap(objs[i], objs[j]))
{
hit_box = true;
break;
}
}
if (hit_box)
rejects[k].push_back(object_locations[k][i]);
else
good_rects.push_back(object_locations[k][i]);
}
object_locations[k] = good_rects;
}
return rejects;
}
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type,
typename Pyramid_type,
typename Feature_extractor_type
>
std::vector<std::vector<rectangle> > remove_unobtainable_rectangles (
const structural_object_detection_trainer<scan_image_pyramid<Pyramid_type, Feature_extractor_type> >& trainer,
const image_array_type& images,
std::vector<std::vector<rectangle> >& object_locations
)
{
return impl::pyramid_remove_unobtainable_rectangles(trainer, images, object_locations);
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type,
typename Pyramid_type
>
std::vector<std::vector<rectangle> > remove_unobtainable_rectangles (
const structural_object_detection_trainer<scan_fhog_pyramid<Pyramid_type> >& trainer,
const image_array_type& images,
std::vector<std::vector<rectangle> >& object_locations
)
{
return impl::pyramid_remove_unobtainable_rectangles(trainer, images, object_locations);
}
// ----------------------------------------------------------------------------------------
namespace impl
{
template <
typename image_array_type,
typename scanner_type,
typename get_boxes_functor
>
std::vector<std::vector<rectangle> > remove_unobtainable_rectangles (
get_boxes_functor& bg,
const structural_object_detection_trainer<scanner_type>& trainer,
const image_array_type& images,
std::vector<std::vector<rectangle> >& object_locations
)
{
using namespace dlib::impl;
// make sure requires clause is not broken
DLIB_ASSERT(images.size() == object_locations.size(),
"\t std::vector<std::vector<rectangle>> remove_unobtainable_rectangles()"
<< "\n\t Invalid inputs were given to this function."
);
std::vector<rectangle> rects;
std::vector<std::vector<rectangle> > rejects(images.size());
// If the trainer is setup to automatically fit the overlap tester to the data then
// we should use the loosest possible overlap tester here. Otherwise we should use
// the tester the trainer will use.
test_box_overlap boxes_overlap(0.9999999,1);
if (!trainer.auto_set_overlap_tester())
boxes_overlap = trainer.get_overlap_tester();
for (unsigned long k = 0; k < images.size(); ++k)
{
std::vector<rectangle> objs = object_locations[k];
// Don't even bother computing the candidate rectangles if there aren't any
// object locations for this image since there isn't anything to do anyway.
if (objs.size() == 0)
continue;
bg(images[k], rects);
// First remove things that don't have any matches with the candidate object
// locations.
std::vector<rectangle> good_rects;
for (unsigned long j = 0; j < objs.size(); ++j)
{
if (matches_rect(rects, objs[j], trainer.get_match_eps()))
good_rects.push_back(objs[j]);
else
rejects[k].push_back(objs[j]);
}
object_locations[k] = good_rects;
// Remap these rectangles to the ones that can come out of the scanner. That
// way when we compare them to each other in the following loop we will know if
// any distinct truth rectangles get mapped to overlapping boxes.
objs.resize(good_rects.size());
for (unsigned long i = 0; i < good_rects.size(); ++i)
objs[i] = get_best_matching_rect(rects, good_rects[i]);
good_rects.clear();
// now check for truth rects that are too close together.
for (unsigned long i = 0; i < objs.size(); ++i)
{
// check if objs[i] hits another box
bool hit_box = false;
for (unsigned long j = i+1; j < objs.size(); ++j)
{
if (boxes_overlap(objs[i], objs[j]))
{
hit_box = true;
break;
}
}
if (hit_box)
rejects[k].push_back(object_locations[k][i]);
else
good_rects.push_back(object_locations[k][i]);
}
object_locations[k] = good_rects;
}
return rejects;
}
// ----------------------------------------------------------------------------------------
template <typename T>
struct load_to_functor
{
load_to_functor(T& obj_) : obj(obj_) {}
T& obj;
template <typename U, typename V>
void operator()(const U& u, V& v)
{
obj.load(u,v);
}
};
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type,
typename feature_extractor,
typename box_generator
>
std::vector<std::vector<rectangle> > remove_unobtainable_rectangles (
const structural_object_detection_trainer<scan_image_boxes<feature_extractor, box_generator> >& trainer,
const image_array_type& images,
std::vector<std::vector<rectangle> >& object_locations
)
{
box_generator bg = trainer.get_scanner().get_box_generator();
return impl::remove_unobtainable_rectangles(bg, trainer, images, object_locations);
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type,
typename feature_extractor
>
std::vector<std::vector<rectangle> > remove_unobtainable_rectangles (
const structural_object_detection_trainer<scan_image_custom<feature_extractor> >& trainer,
const image_array_type& images,
std::vector<std::vector<rectangle> >& object_locations
)
{
feature_extractor fe;
fe.copy_configuration(trainer.get_scanner().get_feature_extractor());
impl::load_to_functor<feature_extractor> bg(fe);
return impl::remove_unobtainable_rectangles(bg, trainer, images, object_locations);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_REMOVE_UnOBTAINABLE_RECTANGLES_Hh_

View File

@@ -0,0 +1,57 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_REMOVE_UnOBTAINABLE_RECTANGLES_ABSTRACT_Hh_
#ifdef DLIB_REMOVE_UnOBTAINABLE_RECTANGLES_ABSTRACT_Hh_
#include "scan_image_pyramid_abstract.h"
#include "scan_image_boxes_abstract.h"
#include "scan_image_custom_abstract.h"
#include "scan_fhog_pyramid_abstract.h"
#include "../svm/structural_object_detection_trainer_abstract.h"
#include "../geometry.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type,
typename image_array_type,
typename Pyramid_type
>
std::vector<std::vector<rectangle> > remove_unobtainable_rectangles (
const structural_object_detection_trainer<image_scanner_type>& trainer,
const image_array_type& images,
std::vector<std::vector<rectangle> >& object_locations
);
/*!
requires
- image_scanner_type must be either scan_image_boxes, scan_image_pyramid,
scan_image_custom, or scan_fhog_pyramid.
- images.size() == object_locations.size()
ensures
- Recall that the image scanner objects can't produce all possible rectangles
as object detections since they only consider a limited subset of all possible
object positions. Moreover, the structural_object_detection_trainer requires
its input training data to not contain any object positions which are unobtainable
by its scanner object. Therefore, remove_unobtainable_rectangles() is a tool
to filter out these unobtainable rectangles from the training data before giving
it to a structural_object_detection_trainer.
- This function interprets object_locations[i] as the set of object positions for
image[i], for all valid i.
- In particular, this function removes unobtainable rectangles from object_locations
and also returns a vector V such that:
- V.size() == object_locations.size()
- for all valid i:
- V[i] == the set of rectangles removed from object_locations[i]
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_REMOVE_UnOBTAINABLE_RECTANGLES_ABSTRACT_Hh_

View File

@@ -0,0 +1,64 @@
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_RENDER_FACE_DeTECTIONS_H_
#define DLIB_RENDER_FACE_DeTECTIONS_H_
#include "full_object_detection.h"
#include "../gui_widgets.h"
#include "render_face_detections_abstract.h"
#include <vector>
namespace dlib
{
inline std::vector<image_window::overlay_line> render_face_detections (
const std::vector<full_object_detection>& dets,
const rgb_pixel color = rgb_pixel(0,255,0)
)
{
std::vector<image_window::overlay_line> lines;
for (unsigned long i = 0; i < dets.size(); ++i)
{
DLIB_CASSERT(dets[i].num_parts() == 68,
"\t std::vector<image_window::overlay_line> render_face_detections()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t dets["<<i<<"].num_parts(): " << dets[i].num_parts()
);
const full_object_detection& d = dets[i];
for (unsigned long i = 1; i <= 16; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
for (unsigned long i = 28; i <= 30; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
for (unsigned long i = 18; i <= 21; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
for (unsigned long i = 23; i <= 26; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
for (unsigned long i = 31; i <= 35; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
lines.push_back(image_window::overlay_line(d.part(30), d.part(35), color));
for (unsigned long i = 37; i <= 41; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
lines.push_back(image_window::overlay_line(d.part(36), d.part(41), color));
for (unsigned long i = 43; i <= 47; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
lines.push_back(image_window::overlay_line(d.part(42), d.part(47), color));
for (unsigned long i = 49; i <= 59; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
lines.push_back(image_window::overlay_line(d.part(48), d.part(59), color));
for (unsigned long i = 61; i <= 67; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
lines.push_back(image_window::overlay_line(d.part(60), d.part(67), color));
}
return lines;
}
}
#endif // DLIB_RENDER_FACE_DeTECTIONS_H_

View File

@@ -0,0 +1,33 @@
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_RENDER_FACE_DeTECTIONS_ABSTRACT_H_
#ifdef DLIB_RENDER_FACE_DeTECTIONS_ABSTRACT_H_
#include "full_object_detection_abstract.h"
#include "../gui_widgets.h"
namespace dlib
{
inline std::vector<image_window::overlay_line> render_face_detections (
const std::vector<full_object_detection>& dets,
const rgb_pixel color = rgb_pixel(0,255,0)
);
/*!
requires
- for all valid i:
- dets[i].num_parts() == 68
ensures
- Interprets the given objects as face detections with parts annotated using
the iBUG face landmark scheme. We then return a set of overlay lines that
will draw the objects onto the screen in a way that properly draws the
outline of the face features defined by the part locations.
- returns a vector with dets.size() elements, each containing the lines
necessary to render a face detection from dets.
!*/
}
#endif // DLIB_RENDER_FACE_DeTECTIONS_ABSTRACT_H_

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,784 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SCAN_fHOG_PYRAMID_ABSTRACT_Hh_
#ifdef DLIB_SCAN_fHOG_PYRAMID_ABSTRACT_Hh_
#include <vector>
#include "../image_transforms/fhog_abstract.h"
#include "object_detector_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
matrix<unsigned char> draw_fhog (
const object_detector<scan_fhog_pyramid<Pyramid_type,feature_extractor_type> >& detector,
const unsigned long weight_index = 0,
const long cell_draw_size = 15
);
/*!
requires
- cell_draw_size > 0
- weight_index < detector.num_detectors()
- detector.get_w(weight_index).size() >= detector.get_scanner().get_num_dimensions()
(i.e. the detector must have been populated with a HOG filter)
ensures
- Converts the HOG filters in the given detector (specifically, the filters in
detector.get_w(weight_index)) into an image suitable for display on the
screen. In particular, we draw all the HOG cells into a grayscale image in a
way that shows the magnitude and orientation of the gradient energy in each
cell. The resulting image is then returned.
!*/
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
unsigned long num_separable_filters (
const object_detector<scan_fhog_pyramid<Pyramid_type,feature_extractor_type> >& detector,
const unsigned long weight_index = 0
);
/*!
requires
- weight_index < detector.num_detectors()
- detector.get_w(weight_index).size() >= detector.get_scanner().get_num_dimensions()
(i.e. the detector must have been populated with a HOG filter)
ensures
- Returns the number of separable filters necessary to represent the HOG
filters in the given detector's weight_index'th filter. This is the filter
defined by detector.get_w(weight_index).
!*/
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
object_detector<scan_fhog_pyramid<Pyramid_type,feature_extractor_type> > threshold_filter_singular_values (
const object_detector<scan_fhog_pyramid<Pyramid_type,feature_extractor_type> >& detector,
double thresh,
const unsigned long weight_index = 0
);
/*!
requires
- thresh >= 0
- weight_index < detector.num_detectors()
- detector.get_w(weight_index).size() >= detector.get_scanner().get_num_dimensions()
(i.e. the detector must have been populated with a HOG filter)
ensures
- Removes all components of the filters in the given detector that have
singular values that are smaller than the given threshold. Therefore, this
function allows you to control how many separable filters are in a detector.
In particular, as thresh gets larger the quantity
num_separable_filters(threshold_filter_singular_values(detector,thresh,weight_index),weight_index)
will generally get smaller and therefore give a faster running detector.
However, note that at some point a large enough thresh will drop too much
information from the filters and their accuracy will suffer.
- returns the updated detector
!*/
// ----------------------------------------------------------------------------------------
class default_fhog_feature_extractor
{
/*!
WHAT THIS OBJECT REPRESENTS
The scan_fhog_pyramid object defined below is primarily meant to be used
with the feature extraction technique implemented by extract_fhog_features().
This technique can generally be understood as taking an input image and
outputting a multi-planed output image of floating point numbers that
somehow describe the image contents. Since there are many ways to define
how this feature mapping is performed, the scan_fhog_pyramid allows you to
replace the extract_fhog_features() method with a customized method of your
choosing. To do this you implement a class with the same interface as
default_fhog_feature_extractor.
Therefore, the point of default_fhog_feature_extractor is two fold. First,
it provides the default FHOG feature extraction method used by scan_fhog_pyramid.
Second, it serves to document the interface you need to implement to define
your own custom HOG style feature extraction.
!*/
public:
rectangle image_to_feats (
const rectangle& rect,
int cell_size,
int filter_rows_padding,
int filter_cols_padding
) const { return image_to_fhog(rect, cell_size, filter_rows_padding, filter_cols_padding); }
/*!
requires
- cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
ensures
- Maps a rectangle from the coordinates in an input image to the corresponding
area in the output feature image.
!*/
rectangle feats_to_image (
const rectangle& rect,
int cell_size,
int filter_rows_padding,
int filter_cols_padding
) const { return fhog_to_image(rect, cell_size, filter_rows_padding, filter_cols_padding); }
/*!
requires
- cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
ensures
- Maps a rectangle from the coordinates of the hog feature image back to
the input image.
- Mapping from feature space to image space is an invertible
transformation. That is, for any rectangle R we have:
R == image_to_feats(feats_to_image(R,cell_size,filter_rows_padding,filter_cols_padding),
cell_size,filter_rows_padding,filter_cols_padding).
!*/
template <
typename image_type
>
void operator()(
const image_type& img,
dlib::array<array2d<float> >& hog,
int cell_size,
int filter_rows_padding,
int filter_cols_padding
) const { extract_fhog_features(img,hog,cell_size,filter_rows_padding,filter_cols_padding); }
/*!
requires
- image_type == is an implementation of array2d/array2d_kernel_abstract.h
- img contains some kind of pixel type.
(i.e. pixel_traits<typename image_type::type> is defined)
ensures
- Extracts FHOG features by calling extract_fhog_features(). The results are
stored into #hog. Note that if you are implementing your own feature extractor you can
pretty much do whatever you want in terms of feature extraction so long as the following
conditions are met:
- #hog.size() == get_num_planes()
- Each image plane in of #hog has the same dimensions.
- for all valid i, r, and c:
- #hog[i][r][c] == a feature value describing the image content centered at the
following pixel location in img:
feats_to_image(point(c,r),cell_size,filter_rows_padding,filter_cols_padding)
!*/
inline unsigned long get_num_planes (
) const { return 31; }
/*!
ensures
- returns the number of planes in the hog image output by the operator()
method.
!*/
};
inline void serialize (const default_fhog_feature_extractor&, std::ostream&) {}
inline void deserialize (default_fhog_feature_extractor&, std::istream&) {}
/*!
Provides serialization support. Note that there is no state in the default hog
feature extractor so these functions do nothing. But if you define a custom
feature extractor then make sure you remember to serialize any state in your
feature extractor.
!*/
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename Feature_extractor_type = default_fhog_feature_extractor
>
class scan_fhog_pyramid : noncopyable
{
/*!
REQUIREMENTS ON Pyramid_type
- Must be one of the pyramid_down objects defined in
dlib/image_transforms/image_pyramid_abstract.h or an object with a
compatible interface
REQUIREMENTS ON Feature_extractor_type
- Must be a type with an interface compatible with the
default_fhog_feature_extractor.
INITIAL VALUE
- get_padding() == 1
- get_cell_size() == 8
- get_detection_window_width() == 64
- get_detection_window_height() == 64
- get_max_pyramid_levels() == 1000
- get_min_pyramid_layer_width() == 64
- get_min_pyramid_layer_height() == 64
- get_nuclear_norm_regularization_strength() == 0
WHAT THIS OBJECT REPRESENTS
This object is a tool for running a fixed sized sliding window classifier
over an image pyramid. In particular, it slides a linear classifier over
a HOG pyramid as discussed in the paper:
Histograms of Oriented Gradients for Human Detection by Navneet Dalal
and Bill Triggs, CVPR 2005
However, we augment the method slightly to use the version of HOG features
from:
Object Detection with Discriminatively Trained Part Based Models by
P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan
IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010
Since these HOG features have been shown to give superior performance.
THREAD SAFETY
Concurrent access to an instance of this object is not safe and should be
protected by a mutex lock except for the case where you are copying the
configuration (via copy_configuration()) of a scan_fhog_pyramid object to
many other threads. In this case, it is safe to copy the configuration of
a shared object so long as no other operations are performed on it.
!*/
public:
typedef matrix<double,0,1> feature_vector_type;
typedef Pyramid_type pyramid_type;
typedef Feature_extractor_type feature_extractor_type;
scan_fhog_pyramid (
);
/*!
ensures
- this object is properly initialized
!*/
explicit scan_fhog_pyramid (
const feature_extractor_type& fe
);
/*!
ensures
- this object is properly initialized
- #get_feature_extractor() == fe
!*/
template <
typename image_type
>
void load (
const image_type& img
);
/*!
requires
- image_type == is an implementation of array2d/array2d_kernel_abstract.h
- img contains some kind of pixel type.
(i.e. pixel_traits<typename image_type::type> is defined)
ensures
- #is_loaded_with_image() == true
- This object is ready to run a classifier over img to detect object
locations. Call detect() to do this.
!*/
const feature_extractor_type& get_feature_extractor(
) const;
/*!
ensures
- returns a const reference to the feature extractor used by this object.
!*/
bool is_loaded_with_image (
) const;
/*!
ensures
- returns true if this object has been loaded with an image to process and
false otherwise.
!*/
void copy_configuration (
const scan_fhog_pyramid& item
);
/*!
ensures
- Copies all the state information of item into *this, except for state
information populated by load(). More precisely, given two scan_fhog_pyramid
objects S1 and S2, the following sequence of instructions should always
result in both of them having the exact same state:
S2.copy_configuration(S1);
S1.load(img);
S2.load(img);
!*/
void set_detection_window_size (
unsigned long window_width,
unsigned long window_height
);
/*!
requires
- window_width > 0
- window_height > 0
ensures
- When detect() is called, this object scans a window that is of the given
width and height (in pixels) over each layer in an image pyramid. This
means that the rectangle detections which come out of detect() will have
a width to height ratio approximately equal to window_width/window_height
and will be approximately window_width*window_height pixels in area or
larger. Therefore, the smallest object that can be detected is roughly
window_width by window_height pixels in size.
- #get_detection_window_width() == window_width
- #get_detection_window_height() == window_height
- Since we use a HOG feature representation, the detection procedure works
as follows:
Step 1. Make an image pyramid.
Step 2. Convert each layer of the image pyramid into a multi-planed HOG "image".
(the number of bands is given by get_feature_extractor().get_num_planes())
Step 3. Scan a linear classifier over each HOG image in the pyramid.
Moreover, the HOG features quantize the input image into a grid of cells,
each cell being get_cell_size() by get_cell_size() pixels in size. So
when we scan the object detector over the pyramid we are scanning an
appropriately sized window over these smaller quantized HOG features. In
particular, the size of the window we scan over the HOG feature pyramid
is #get_fhog_window_width() by #get_fhog_window_height() HOG cells in
size.
- #is_loaded_with_image() == false
!*/
unsigned long get_detection_window_width (
) const;
/*!
ensures
- returns the width, in pixels, of the detection window that is scanned
over the image when detect() is called.
!*/
inline unsigned long get_detection_window_height (
) const;
/*!
ensures
- returns the height, in pixels, of the detection window that is scanned
over the image when detect() is called.
!*/
unsigned long get_fhog_window_width (
) const;
/*!
ensures
- Returns the width of the HOG scanning window in terms of HOG cell blocks.
Note that this is a function of get_detection_window_width(), get_cell_size(),
and get_padding() and is therefore not something you set directly.
- #get_fhog_window_width() is approximately equal to the number of HOG cells
that fit into get_detection_window_width() pixels plus 2*get_padding()
since we include additional padding around each window to add context.
!*/
unsigned long get_fhog_window_height (
) const;
/*!
ensures
- Returns the height of the HOG scanning window in terms of HOG cell blocks.
Note that this is a function of get_detection_window_height(), get_cell_size(),
and get_padding() and is therefore not something you set directly.
- #get_fhog_window_height() is approximately equal to the number of HOG cells
that fit into get_detection_window_height() pixels plus 2*get_padding()
since we include additional padding around each window to add context.
!*/
void set_padding (
unsigned long new_padding
);
/*!
ensures
- #get_padding() == new_padding
- #is_loaded_with_image() == false
!*/
unsigned long get_padding (
) const;
/*!
ensures
- The HOG windows scanned over the HOG pyramid can include additional HOG
cells outside the detection window. This can help add context and
improve detection accuracy. This function returns the number of extra
HOG cells added onto the border of the HOG windows which are scanned by
detect().
!*/
unsigned long get_cell_size (
) const;
/*!
ensures
- Returns the size of the HOG cells. Each HOG cell is square and contains
get_cell_size()*get_cell_size() pixels.
!*/
void set_cell_size (
unsigned long new_cell_size
);
/*!
requires
- new_cell_size > 0
ensures
- #get_cell_size() == new_cell_size
- #is_loaded_with_image() == false
!*/
inline long get_num_dimensions (
) const;
/*!
ensures
- returns get_fhog_window_width()*get_fhog_window_height()*get_feature_extractor().get_num_planes()
(i.e. The number of features is equal to the size of the HOG window times
the number of planes output by the feature extractor. )
!*/
inline unsigned long get_num_detection_templates (
) const { return 1; }
/*!
ensures
- returns 1. Note that this function is here only for compatibility with
the scan_image_pyramid object. Notionally, its return value indicates
that a scan_fhog_pyramid object is always ready to detect objects once
an image has been loaded.
!*/
inline unsigned long get_num_movable_components_per_detection_template (
) const { return 0; }
/*!
ensures
- returns 0. Note that this function is here only for compatibility with
the scan_image_pyramid object. Its return value means that this object
does not support using movable part models.
!*/
unsigned long get_max_pyramid_levels (
) const;
/*!
ensures
- returns the maximum number of image pyramid levels this object will use.
Note that #get_max_pyramid_levels() == 1 indicates that no image pyramid
will be used at all. That is, only the original image will be processed
and no lower scale versions will be created.
!*/
void set_max_pyramid_levels (
unsigned long max_levels
);
/*!
requires
- max_levels > 0
ensures
- #get_max_pyramid_levels() == max_levels
!*/
void set_min_pyramid_layer_size (
unsigned long width,
unsigned long height
);
/*!
requires
- width > 0
- height > 0
ensures
- #get_min_pyramid_layer_width() == width
- #get_min_pyramid_layer_height() == height
!*/
inline unsigned long get_min_pyramid_layer_width (
) const;
/*!
ensures
- returns the smallest allowable width of an image in the image pyramid.
All pyramids will always include the original input image, however, no
pyramid levels will be created which have a width smaller than the
value returned by this function.
!*/
inline unsigned long get_min_pyramid_layer_height (
) const;
/*!
ensures
- returns the smallest allowable height of an image in the image pyramid.
All pyramids will always include the original input image, however, no
pyramid levels will be created which have a height smaller than the
value returned by this function.
!*/
fhog_filterbank build_fhog_filterbank (
const feature_vector_type& weights
) const;
/*!
requires
- weights.size() >= get_num_dimensions()
ensures
- Creates and then returns a fhog_filterbank object FB such that:
- FB.get_num_dimensions() == get_num_dimensions()
- FB.get_filters() == the values in weights unpacked into get_feature_extractor().get_num_planes() filters.
- FB.num_separable_filters() == the number of separable filters necessary to
represent all the filters in FB.get_filters().
!*/
class fhog_filterbank
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents a HOG filter bank. That is, the classifier that is
slid over a HOG pyramid is a set of get_feature_extractor().get_num_planes()
linear filters, each get_fhog_window_width() rows by get_fhog_window_height()
columns in size. This object contains that set of filters.
!*/
public:
long get_num_dimensions(
) const;
/*!
ensures
- Returns the total number of values in the filters.
!*/
const std::vector<matrix<float> >& get_filters(
) const;
/*!
ensures
- returns the set of HOG filters in this object.
!*/
unsigned long num_separable_filters(
) const;
/*!
ensures
- returns the number of separable filters necessary to represent all
the filters in get_filters().
!*/
};
void detect (
const fhog_filterbank& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const;
/*!
requires
- w.get_num_dimensions() == get_num_dimensions()
- is_loaded_with_image() == true
ensures
- Scans the HOG filter defined by w over the HOG pyramid that was populated
by the last call to load() and stores all object detections into #dets.
- for all valid i:
- #dets[i].second == The object box which produced this detection. This rectangle gives
the location of the detection. Note that the rectangle will have been converted back into
the original image input space. That is, if this detection was made at a low level in the
image pyramid then the object box will have been automatically mapped up the pyramid layers
to the original image space. Or in other words, if you plot #dets[i].second on top of the
image given to load() it will show up in the right place.
- #dets[i].first == The score for this detection. This value is equal to dot(w, feature vector
for this sliding window location).
- #dets[i].first >= thresh
- #dets will be sorted in descending order. (i.e. #dets[i].first >= #dets[j].first for all i, and j>i)
- Elements of w beyond index get_num_dimensions()-1 are ignored. I.e. only the first
get_num_dimensions() are used.
- Note that no form of non-max suppression is performed. If a window has a score >= thresh
then it is reported in #dets.
!*/
void detect (
const feature_vector_type& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const;
/*!
requires
- w.size() >= get_num_dimensions()
- is_loaded_with_image() == true
ensures
- performs: detect(build_fhog_filterbank(w), dets, thresh)
!*/
void get_feature_vector (
const full_object_detection& obj,
feature_vector_type& psi
) const;
/*!
requires
- obj.num_parts() == 0
- is_loaded_with_image() == true
- psi.size() >= get_num_dimensions()
(i.e. psi must have preallocated its memory before this function is called)
ensures
- This function allows you to determine the feature vector used for an
object detection output from detect(). Note that this vector is
added to psi. Note also that you must use get_full_object_detection() to
convert a rectangle from detect() into the needed full_object_detection.
- The dimensionality of the vector added to psi is get_num_dimensions(). This
means that elements of psi after psi(get_num_dimensions()-1) are not modified.
- Since scan_fhog_pyramid only searches a limited set of object locations,
not all possible rectangles can be output by detect(). So in the case
where obj.get_rect() could not arise from a call to detect(), this
function will map obj.get_rect() to the nearest possible rectangle and
then add the feature vector for the mapped rectangle into #psi.
- get_best_matching_rect(obj.get_rect()) == the rectangle obj.get_rect()
gets mapped to for feature extraction.
!*/
full_object_detection get_full_object_detection (
const rectangle& rect,
const feature_vector_type& w
) const;
/*!
ensures
- returns full_object_detection(rect)
(This function is here only for compatibility with the scan_image_pyramid
object)
!*/
const rectangle get_best_matching_rect (
const rectangle& rect
) const;
/*!
ensures
- Since scan_fhog_pyramid only searches a limited set of object locations,
not all possible rectangles can be represented. Therefore, this function
allows you to supply a rectangle and obtain the nearest possible
candidate object location rectangle.
!*/
double get_nuclear_norm_regularization_strength (
) const;
/*!
ensures
- If the number of separable filters in a fhog_filterbank is small then the
filter bank can be scanned over an image much faster than a normal set of
filters. Therefore, this object provides the option to encourage
machine learning methods that learn a HOG filter bank (i.e.
structural_object_detection_trainer) to select filter banks that have
this beneficial property. In particular, the value returned by
get_nuclear_norm_regularization_strength() is a multiplier on a nuclear
norm regularizer which will encourage the selection of filters that use a
small number of separable components. Larger values encourage tend to
give a smaller number of separable filters.
- if (get_nuclear_norm_regularization_strength() == 0) then
- This feature is disabled
- else
- A nuclear norm regularizer will be added when
structural_object_detection_trainer is used to learn a HOG filter
bank. Note that this can make the training process take
significantly longer (but can result in faster object detectors).
!*/
void set_nuclear_norm_regularization_strength (
double strength
);
/*!
requires
- strength >= 0
ensures
- #get_nuclear_norm_regularization_strength() == strength
!*/
};
// ----------------------------------------------------------------------------------------
template <typename T>
void serialize (
const scan_fhog_pyramid<T>& item,
std::ostream& out
);
/*!
provides serialization support
!*/
// ----------------------------------------------------------------------------------------
template <typename T>
void deserialize (
scan_fhog_pyramid<T>& item,
std::istream& in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename pyramid_type,
typename image_type
>
void evaluate_detectors (
const std::vector<object_detector<scan_fhog_pyramid<pyramid_type>>>& detectors,
const image_type& img,
std::vector<rect_detection>& dets,
const double adjust_threshold = 0
);
/*!
requires
- image_type == is an implementation of array2d/array2d_kernel_abstract.h
- img contains some kind of pixel type.
(i.e. pixel_traits<typename image_type::type> is defined)
ensures
- This function runs each of the provided object_detector objects over img and
stores the resulting detections into #dets. Importantly, this function is
faster than running each detector individually because it computes the HOG
features only once and then reuses them for each detector. However, it is
important to note that this speedup is only possible if all the detectors use
the same cell_size parameter that determines how HOG features are computed.
If different cell_size values are used then this function will not be any
faster than running the detectors individually.
- This function applies non-max suppression individually to the output of each
detector. Therefore, the output is the same as if you ran each detector
individually and then concatenated the results.
- To be precise, this function performs object detection on the given image and
stores the detected objects into #dets. In particular, we will have that:
- #dets is sorted such that the highest confidence detections come first.
E.g. element 0 is the best detection, element 1 the next best, and so on.
- #dets.size() == the number of detected objects.
- #dets[i].detection_confidence == The strength of the i-th detection.
Larger values indicate that the detector is more confident that #dets[i]
is a correct detection rather than being a false alarm. Moreover, the
detection_confidence is equal to the detection value output by the
scanner minus the threshold value stored at the end of the weight vector.
- #dets[i].rect == the bounding box for the i-th detection.
- The detection #dets[i].rect was produced by detectors[#dets[i].weight_index].
- The detection threshold is adjusted by having adjust_threshold added to it.
Therefore, an adjust_threshold value > 0 makes detecting objects harder while
a negative value makes it easier. Moreover, the following will be true for
all valid i:
- #dets[i].detection_confidence >= adjust_threshold
This means that, for example, you can obtain the maximum possible number of
detections by setting adjust_threshold equal to negative infinity.
- This function is threadsafe in the sense that multiple threads can call
evaluate_detectors() with the same instances of detectors and img without
requiring a mutex lock.
!*/
// ----------------------------------------------------------------------------------------
template <
typename pyramid_type,
typename image_type
>
std::vector<rectangle> evaluate_detectors (
const std::vector<object_detector<scan_fhog_pyramid<pyramid_type>>>& detectors,
const image_type& img,
const double adjust_threshold = 0
);
/*!
requires
- image_type == is an implementation of array2d/array2d_kernel_abstract.h
- img contains some kind of pixel type.
(i.e. pixel_traits<typename image_type::type> is defined)
ensures
- This function just calls the above evaluate_detectors() routine and copies
the output dets into a vector<rectangle> object and returns it. Therefore,
this function is provided for convenience.
- This function is threadsafe in the sense that multiple threads can call
evaluate_detectors() with the same instances of detectors and img without
requiring a mutex lock.
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_fHOG_PYRAMID_ABSTRACT_Hh_

View File

@@ -0,0 +1,368 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SCAN_iMAGE_Hh_
#define DLIB_SCAN_iMAGE_Hh_
#include <vector>
#include <utility>
#include "scan_image_abstract.h"
#include "../matrix.h"
#include "../algs.h"
#include "../rand.h"
#include "../array2d.h"
#include "../image_transforms/spatial_filtering.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
namespace impl
{
inline rectangle bounding_box_of_rects (
const std::vector<std::pair<unsigned int, rectangle> >& rects,
const point& position
)
/*!
ensures
- returns the smallest rectangle that contains all the
rectangles in rects. That is, returns the rectangle that
contains translate_rect(rects[i].second,position) for all valid i.
!*/
{
rectangle rect;
for (unsigned long i = 0; i < rects.size(); ++i)
{
rect += translate_rect(rects[i].second,position);
}
return rect;
}
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
bool all_images_same_size (
const image_array_type& images
)
{
if (images.size() == 0)
return true;
for (unsigned long i = 0; i < images.size(); ++i)
{
if (num_rows(images[0]) != num_rows(images[i]) ||
num_columns(images[0]) != num_columns(images[i]))
return false;
}
return true;
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
double sum_of_rects_in_images (
const image_array_type& images,
const std::vector<std::pair<unsigned int, rectangle> >& rects,
const point& position
)
{
DLIB_ASSERT(all_images_same_size(images),
"\t double sum_of_rects_in_images()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
);
#ifdef ENABLE_ASSERTS
for (unsigned long i = 0; i < rects.size(); ++i)
{
DLIB_ASSERT(rects[i].first < images.size(),
"\t double sum_of_rects_in_images()"
<< "\n\t rects["<<i<<"].first must refer to a valid image."
<< "\n\t rects["<<i<<"].first: " << rects[i].first
<< "\n\t images.size(): " << images.size()
);
}
#endif
typedef typename image_traits<typename image_array_type::type>::pixel_type pixel_type;
typedef typename promote<pixel_type>::type ptype;
ptype temp = 0;
for (unsigned long i = 0; i < rects.size(); ++i)
{
const typename image_array_type::type& img = images[rects[i].first];
const rectangle rect = get_rect(img).intersect(translate_rect(rects[i].second,position));
temp += sum(matrix_cast<ptype>(subm(mat(img), rect)));
}
return static_cast<double>(temp);
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
double sum_of_rects_in_images_movable_parts (
const image_array_type& images,
const rectangle& window,
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
const point& position
)
{
DLIB_ASSERT(all_images_same_size(images) && center(window) == point(0,0),
"\t double sum_of_rects_in_images_movable_parts()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
<< "\n\t center(window): " << center(window)
);
#ifdef ENABLE_ASSERTS
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
{
DLIB_ASSERT(fixed_rects[i].first < images.size(),
"\t double sum_of_rects_in_images_movable_parts()"
<< "\n\t fixed_rects["<<i<<"].first must refer to a valid image."
<< "\n\t fixed_rects["<<i<<"].first: " << fixed_rects[i].first
<< "\n\t images.size(): " << images.size()
);
}
for (unsigned long i = 0; i < movable_rects.size(); ++i)
{
DLIB_ASSERT(movable_rects[i].first < images.size(),
"\t double sum_of_rects_in_images_movable_parts()"
<< "\n\t movable_rects["<<i<<"].first must refer to a valid image."
<< "\n\t movable_rects["<<i<<"].first: " << movable_rects[i].first
<< "\n\t images.size(): " << images.size()
);
DLIB_ASSERT(center(movable_rects[i].second) == point(0,0),
"\t double sum_of_rects_in_images_movable_parts()"
<< "\n\t movable_rects["<<i<<"].second: " << movable_rects[i].second
);
}
#endif
typedef typename image_traits<typename image_array_type::type>::pixel_type pixel_type;
typedef typename promote<pixel_type>::type ptype;
ptype temp = 0;
// compute TOTAL_FIXED part
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
{
const typename image_array_type::type& img = images[fixed_rects[i].first];
const rectangle rect = get_rect(img).intersect(translate_rect(fixed_rects[i].second,position));
temp += sum(matrix_cast<ptype>(subm(mat(img), rect)));
}
if (images.size() > 0)
{
// compute TOTAL_MOVABLE part
array2d<ptype> tempimg(images[0].nr(), images[0].nc());
for (unsigned long i = 0; i < movable_rects.size(); ++i)
{
const typename image_array_type::type& img = images[movable_rects[i].first];
sum_filter_assign(img, tempimg, movable_rects[i].second);
const rectangle rect = get_rect(tempimg).intersect(translate_rect(window,position));
if (rect.is_empty() == false)
temp += std::max(0,max(matrix_cast<ptype>(subm(mat(tempimg), rect))));
}
}
return static_cast<double>(temp);
}
// ----------------------------------------------------------------------------------------
template <
typename image_type
>
void find_points_above_thresh (
std::vector<std::pair<double, point> >& dets,
const image_type& img_,
const double thresh,
const unsigned long max_dets
)
{
const_image_view<image_type> img(img_);
typedef typename image_traits<image_type>::pixel_type ptype;
dets.clear();
if (max_dets == 0)
return;
unsigned long count = 0;
dlib::rand rnd;
for (long r = 0; r < img.nr(); ++r)
{
for (long c = 0; c < img.nc(); ++c)
{
const ptype val = img[r][c];
if (val >= thresh)
{
++count;
if (dets.size() < max_dets)
{
dets.push_back(std::make_pair(val, point(c,r)));
}
else
{
// The idea here is to cause us to randomly sample possible detection
// locations throughout the image rather than just stopping the detection
// procedure once we hit the max_dets limit. So this method will result
// in a random subsample of all the detections >= thresh being in dets
// at the end of scan_image().
const unsigned long random_index = rnd.get_random_32bit_number()%count;
if (random_index < dets.size())
{
dets[random_index] = std::make_pair(val, point(c,r));
}
}
}
}
}
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
void scan_image (
std::vector<std::pair<double, point> >& dets,
const image_array_type& images,
const std::vector<std::pair<unsigned int, rectangle> >& rects,
const double thresh,
const unsigned long max_dets
)
{
DLIB_ASSERT(images.size() > 0 && rects.size() > 0 && all_images_same_size(images),
"\t void scan_image()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t images.size(): " << images.size()
<< "\n\t rects.size(): " << rects.size()
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
);
#ifdef ENABLE_ASSERTS
for (unsigned long i = 0; i < rects.size(); ++i)
{
DLIB_ASSERT(rects[i].first < images.size(),
"\t void scan_image()"
<< "\n\t rects["<<i<<"].first must refer to a valid image."
<< "\n\t rects["<<i<<"].first: " << rects[i].first
<< "\n\t images.size(): " << images.size()
);
}
#endif
typedef typename image_traits<typename image_array_type::type>::pixel_type pixel_type;
typedef typename promote<pixel_type>::type ptype;
array2d<ptype> accum(images[0].nr(), images[0].nc());
assign_all_pixels(accum, 0);
for (unsigned long i = 0; i < rects.size(); ++i)
sum_filter(images[rects[i].first], accum, rects[i].second);
find_points_above_thresh(dets, accum, thresh, max_dets);
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
void scan_image_movable_parts (
std::vector<std::pair<double, point> >& dets,
const image_array_type& images,
const rectangle& window,
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
const double thresh,
const unsigned long max_dets
)
{
DLIB_ASSERT(images.size() > 0 && all_images_same_size(images) &&
center(window) == point(0,0) && window.area() > 0,
"\t void scan_image_movable_parts()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
<< "\n\t center(window): " << center(window)
<< "\n\t window.area(): " << window.area()
<< "\n\t images.size(): " << images.size()
);
#ifdef ENABLE_ASSERTS
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
{
DLIB_ASSERT(fixed_rects[i].first < images.size(),
"\t void scan_image_movable_parts()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t fixed_rects["<<i<<"].first must refer to a valid image."
<< "\n\t fixed_rects["<<i<<"].first: " << fixed_rects[i].first
<< "\n\t images.size(): " << images.size()
);
}
for (unsigned long i = 0; i < movable_rects.size(); ++i)
{
DLIB_ASSERT(movable_rects[i].first < images.size(),
"\t void scan_image_movable_parts()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t movable_rects["<<i<<"].first must refer to a valid image."
<< "\n\t movable_rects["<<i<<"].first: " << movable_rects[i].first
<< "\n\t images.size(): " << images.size()
);
DLIB_ASSERT(center(movable_rects[i].second) == point(0,0) &&
movable_rects[i].second.area() > 0,
"\t void scan_image_movable_parts()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t movable_rects["<<i<<"].second: " << movable_rects[i].second
<< "\n\t movable_rects["<<i<<"].second.area(): " << movable_rects[i].second.area()
);
}
#endif
if (movable_rects.size() == 0 && fixed_rects.size() == 0)
return;
typedef typename image_traits<typename image_array_type::type>::pixel_type pixel_type;
typedef typename promote<pixel_type>::type ptype;
array2d<ptype> accum(images[0].nr(), images[0].nc());
assign_all_pixels(accum, 0);
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
sum_filter(images[fixed_rects[i].first], accum, fixed_rects[i].second);
array2d<ptype> temp(accum.nr(), accum.nc());
for (unsigned long i = 0; i < movable_rects.size(); ++i)
{
const rectangle rect = movable_rects[i].second;
sum_filter_assign(images[movable_rects[i].first], temp, rect);
max_filter(temp, accum, window.width(), window.height(), 0);
}
find_points_above_thresh(dets, accum, thresh, max_dets);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_iMAGE_Hh_

View File

@@ -0,0 +1,227 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SCAN_iMAGE_ABSTRACT_Hh_
#ifdef DLIB_SCAN_iMAGE_ABSTRACT_Hh_
#include <vector>
#include <utility>
#include "../algs.h"
#include "../image_processing/generic_image.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
bool all_images_same_size (
const image_array_type& images
);
/*!
requires
- image_array_type == an implementation of array/array_kernel_abstract.h
- image_array_type::type == an image object that implements the interface
defined in dlib/image_processing/generic_image.h
ensures
- if (all elements of images have the same dimensions (i.e.
for all i and j: get_rect(images[i]) == get_rect(images[j]))) then
- returns true
- else
- returns false
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
double sum_of_rects_in_images (
const image_array_type& images,
const std::vector<std::pair<unsigned int, rectangle> >& rects,
const point& position
);
/*!
requires
- image_array_type == an implementation of array/array_kernel_abstract.h
- image_array_type::type == an image object that implements the interface
defined in dlib/image_processing/generic_image.h. Moreover, these objects must
contain a scalar pixel type (e.g. int rather than rgb_pixel)
- all_images_same_size(images) == true
- for all valid i: rects[i].first < images.size()
(i.e. all the rectangles must reference valid elements of images)
ensures
- returns the sum of the pixels inside the given rectangles. To be precise,
let RECT_SUM[i] = sum of pixels inside the rectangle translate_rect(rects[i].second, position)
from the image images[rects[i].first]. Then this function returns the
sum of RECT_SUM[i] for all the valid values of i.
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
double sum_of_rects_in_images_movable_parts (
const image_array_type& images,
const rectangle& window,
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
const point& position
);
/*!
requires
- image_array_type == an implementation of array/array_kernel_abstract.h
- image_array_type::type == an image object that implements the interface
defined in dlib/image_processing/generic_image.h. Moreover, these objects must
contain a scalar pixel type (e.g. int rather than rgb_pixel)
- all_images_same_size(images) == true
- center(window) == point(0,0)
- for all valid i:
- fixed_rects[i].first < images.size()
(i.e. all the rectangles must reference valid elements of images)
- for all valid i:
- movable_rects[i].first < images.size()
(i.e. all the rectangles must reference valid elements of images)
- center(movable_rects[i].second) == point(0,0)
ensures
- returns the sum of the pixels inside fixed_rects as well as the sum of the pixels
inside movable_rects when these latter rectangles are placed at their highest
scoring locations inside the given window. To be precise:
- let RECT_SUM(r,x) = sum of pixels inside the rectangle translate_rect(r.second, x)
from the image images[r.first].
- let WIN_MAX(i) = The maximum value of RECT_SUM(movable_rects[i],X) when maximizing
over all the X such that translate_rect(window,position).contains(X) == true.
- let TOTAL_FIXED == sum over all elements R in fixed_rects of: RECT_SUM(R,position)
- let TOTAL_MOVABLE == sum over all valid i of: max(WIN_MAX(i), 0)
Then this function returns TOTAL_FIXED + TOTAL_MOVABLE.
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_type
>
void find_points_above_thresh (
std::vector<std::pair<double, point> >& dets,
const image_type& img,
const double thresh,
const unsigned long max_dets
);
/*!
requires
- image_type == an image object that implements the interface defined in
dlib/image_processing/generic_image.h. Moreover, these it must contain a
scalar pixel type (e.g. int rather than rgb_pixel)
ensures
- #dets == a list of points from img which had pixel values >= thresh.
- Specifically, we have:
- #dets.size() <= max_dets
(note that dets is cleared before new detections are added by find_points_above_thresh())
- for all valid i:
- #dets[i].first == img[#dets[i].second.y()][#dets[i].second.x()]
(i.e. the first field contains the value of the pixel at this detection location)
- #dets[i].first >= thresh
- if (there are more than max_dets locations that pass the above threshold test) then
- #dets == a random subsample of all the locations which passed the threshold
test.
- else
- #dets == all the points which passed the threshold test.
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
void scan_image (
std::vector<std::pair<double, point> >& dets,
const image_array_type& images,
const std::vector<std::pair<unsigned int, rectangle> >& rects,
const double thresh,
const unsigned long max_dets
);
/*!
requires
- image_array_type == an implementation of array/array_kernel_abstract.h
- image_array_type::type == an image object that implements the interface
defined in dlib/image_processing/generic_image.h. Moreover, these objects must
contain a scalar pixel type (e.g. int rather than rgb_pixel)
- images.size() > 0
- rects.size() > 0
- all_images_same_size(images) == true
- for all valid i: rects[i].first < images.size()
(i.e. all the rectangles must reference valid elements of images)
ensures
- slides the set of rectangles over the image space and reports the locations
which give a sum bigger than thresh.
- Specifically, we have:
- #dets.size() <= max_dets
(note that dets is cleared before new detections are added by scan_image())
- for all valid i:
- #dets[i].first == sum_of_rects_in_images(images,rects,#dets[i].second) >= thresh
- if (there are more than max_dets locations that pass the threshold test) then
- #dets == a random subsample of all the locations which passed the threshold
test.
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
void scan_image_movable_parts (
std::vector<std::pair<double, point> >& dets,
const image_array_type& images,
const rectangle& window,
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
const double thresh,
const unsigned long max_dets
);
/*!
requires
- image_array_type == an implementation of array/array_kernel_abstract.h
- image_array_type::type == an image object that implements the interface
defined in dlib/image_processing/generic_image.h. Moreover, these objects must
contain a scalar pixel type (e.g. int rather than rgb_pixel)
- images.size() > 0
- all_images_same_size(images) == true
- center(window) == point(0,0)
- window.area() > 0
- for all valid i:
- fixed_rects[i].first < images.size()
(i.e. all the rectangles must reference valid elements of images)
- for all valid i:
- movable_rects[i].first < images.size()
(i.e. all the rectangles must reference valid elements of images)
- center(movable_rects[i].second) == point(0,0)
- movable_rects[i].second.area() > 0
ensures
- Scans the given window over the images and reports the locations with a score bigger
than thresh.
- Specifically, we have:
- #dets.size() <= max_dets
(note that dets is cleared before new detections are added by scan_image_movable_parts())
- for all valid i:
- #dets[i].first == sum_of_rects_in_images_movable_parts(images,
window,
fixed_rects,
movable_rects,
#dets[i].second) >= thresh
- if (there are more than max_dets locations that pass the above threshold test) then
- #dets == a random subsample of all the locations which passed the threshold
test.
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_iMAGE_ABSTRACT_Hh_

View File

@@ -0,0 +1,631 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SCAN_IMAGE_bOXES_Hh_
#define DLIB_SCAN_IMAGE_bOXES_Hh_
#include "scan_image_boxes_abstract.h"
#include "../matrix.h"
#include "../geometry.h"
#include "../image_processing.h"
#include "../array2d.h"
#include <vector>
#include "../image_processing/full_object_detection.h"
#include "../image_transforms.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class default_box_generator
{
public:
template <typename image_type>
void operator() (
const image_type& img,
std::vector<rectangle>& rects
) const
{
rects.clear();
find_candidate_object_locations(img, rects);
}
};
inline void serialize(const default_box_generator&, std::ostream& ) {}
inline void deserialize(default_box_generator&, std::istream& ) {}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator = default_box_generator
>
class scan_image_boxes : noncopyable
{
public:
typedef matrix<double,0,1> feature_vector_type;
typedef Feature_extractor_type feature_extractor_type;
typedef Box_generator box_generator;
scan_image_boxes (
);
template <
typename image_type
>
void load (
const image_type& img
);
inline bool is_loaded_with_image (
) const;
inline void copy_configuration(
const feature_extractor_type& fe
);
inline void copy_configuration(
const box_generator& bg
);
const box_generator& get_box_generator (
) const { return detect_boxes; }
const Feature_extractor_type& get_feature_extractor (
) const { return feats; }
inline void copy_configuration (
const scan_image_boxes& item
);
inline long get_num_dimensions (
) const;
unsigned long get_num_spatial_pyramid_levels (
) const;
void set_num_spatial_pyramid_levels (
unsigned long levels
);
void detect (
const feature_vector_type& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const;
void get_feature_vector (
const full_object_detection& obj,
feature_vector_type& psi
) const;
full_object_detection get_full_object_detection (
const rectangle& rect,
const feature_vector_type& w
) const;
const rectangle get_best_matching_rect (
const rectangle& rect
) const;
/*!
requires
- is_loaded_with_image() == true
!*/
inline unsigned long get_num_detection_templates (
) const { return 1; }
inline unsigned long get_num_movable_components_per_detection_template (
) const { return 0; }
template <typename T, typename U>
friend void serialize (
const scan_image_boxes<T,U>& item,
std::ostream& out
);
template <typename T, typename U>
friend void deserialize (
scan_image_boxes<T,U>& item,
std::istream& in
);
private:
static bool compare_pair_rect (
const std::pair<double, rectangle>& a,
const std::pair<double, rectangle>& b
)
{
return a.first < b.first;
}
void test_coordinate_transforms()
{
for (long x = -10; x <= 10; x += 10)
{
for (long y = -10; y <= 10; y += 10)
{
const rectangle rect = centered_rect(x,y,5,6);
rectangle a;
a = feats.image_to_feat_space(rect);
if (a.width() > 10000000 || a.height() > 10000000 )
{
DLIB_CASSERT(false, "The image_to_feat_space() routine is outputting rectangles of an implausibly "
<< "\nlarge size. This means there is probably a bug in your feature extractor.");
}
a = feats.feat_to_image_space(rect);
if (a.width() > 10000000 || a.height() > 10000000 )
{
DLIB_CASSERT(false, "The feat_to_image_space() routine is outputting rectangles of an implausibly "
<< "\nlarge size. This means there is probably a bug in your feature extractor.");
}
}
}
}
static void add_grid_rects (
std::vector<rectangle>& rects,
const rectangle& object_box,
unsigned int cells_x,
unsigned int cells_y
)
{
// make sure requires clause is not broken
DLIB_ASSERT(cells_x > 0 && cells_y > 0,
"\t void add_grid_rects()"
<< "\n\t The number of cells along a dimension can't be zero. "
<< "\n\t cells_x: " << cells_x
<< "\n\t cells_y: " << cells_y
);
const matrix_range_exp<double>& x = linspace(object_box.left(), object_box.right(), cells_x+1);
const matrix_range_exp<double>& y = linspace(object_box.top(), object_box.bottom(), cells_y+1);
for (long j = 0; j+1 < y.size(); ++j)
{
for (long i = 0; i+1 < x.size(); ++i)
{
const dlib::vector<double,2> tl(x(i),y(j));
const dlib::vector<double,2> br(x(i+1),y(j+1));
rects.push_back(rectangle(tl,br));
}
}
}
void get_feature_extraction_regions (
const rectangle& rect,
std::vector<rectangle>& regions
) const
/*!
ensures
- #regions.size() is always the same number no matter what the input is. The
regions also have a consistent ordering.
- all the output rectangles are contained within rect.
!*/
{
regions.clear();
for (unsigned int l = 1; l <= num_spatial_pyramid_levels; ++l)
{
const int cells = (int)std::pow(2.0, l-1.0);
add_grid_rects(regions, rect, cells, cells);
}
}
unsigned int get_num_components_per_detection_template(
) const
{
return (unsigned int)(std::pow(4.0,(double)num_spatial_pyramid_levels)-1)/3;
}
feature_extractor_type feats;
std::vector<rectangle> search_rects;
bool loaded_with_image;
unsigned int num_spatial_pyramid_levels;
box_generator detect_boxes;
const long box_sizedims;
const long box_maxsize;
};
// ----------------------------------------------------------------------------------------
template <typename T, typename U>
void serialize (
const scan_image_boxes<T,U>& item,
std::ostream& out
)
{
int version = 1;
serialize(version, out);
serialize(item.feats, out);
serialize(item.search_rects, out);
serialize(item.loaded_with_image, out);
serialize(item.num_spatial_pyramid_levels, out);
serialize(item.detect_boxes, out);
serialize(item.get_num_dimensions(), out);
}
// ----------------------------------------------------------------------------------------
template <typename T, typename U>
void deserialize (
scan_image_boxes<T,U>& item,
std::istream& in
)
{
int version = 0;
deserialize(version, in);
if (version != 1)
throw serialization_error("Unsupported version found when deserializing a scan_image_boxes object.");
deserialize(item.feats, in);
deserialize(item.search_rects, in);
deserialize(item.loaded_with_image, in);
deserialize(item.num_spatial_pyramid_levels, in);
deserialize(item.detect_boxes, in);
// When developing some feature extractor, it's easy to accidentally change its
// number of dimensions and then try to deserialize data from an older version of
// your extractor into the current code. This check is here to catch that kind of
// user error.
long dims;
deserialize(dims, in);
if (item.get_num_dimensions() != dims)
throw serialization_error("Number of dimensions in serialized scan_image_boxes doesn't match the expected number.");
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// scan_image_boxes member functions
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
scan_image_boxes<Feature_extractor_type,Box_generator>::
scan_image_boxes (
) :
loaded_with_image(false),
num_spatial_pyramid_levels(3),
box_sizedims(20),
box_maxsize(1200)
{
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
template <
typename image_type
>
void scan_image_boxes<Feature_extractor_type,Box_generator>::
load (
const image_type& img
)
{
feats.load(img);
detect_boxes(img, search_rects);
loaded_with_image = true;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
bool scan_image_boxes<Feature_extractor_type,Box_generator>::
is_loaded_with_image (
) const
{
return loaded_with_image;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
void scan_image_boxes<Feature_extractor_type,Box_generator>::
copy_configuration(
const feature_extractor_type& fe
)
{
test_coordinate_transforms();
feats.copy_configuration(fe);
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
void scan_image_boxes<Feature_extractor_type,Box_generator>::
copy_configuration(
const box_generator& bg
)
{
detect_boxes = bg;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
void scan_image_boxes<Feature_extractor_type,Box_generator>::
copy_configuration (
const scan_image_boxes& item
)
{
feats.copy_configuration(item.feats);
detect_boxes = item.detect_boxes;
num_spatial_pyramid_levels = item.num_spatial_pyramid_levels;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
unsigned long scan_image_boxes<Feature_extractor_type,Box_generator>::
get_num_spatial_pyramid_levels (
) const
{
return num_spatial_pyramid_levels;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
void scan_image_boxes<Feature_extractor_type,Box_generator>::
set_num_spatial_pyramid_levels (
unsigned long levels
)
{
// make sure requires clause is not broken
DLIB_ASSERT(levels > 0,
"\t void scan_image_boxes::set_num_spatial_pyramid_levels()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t levels: " << levels
<< "\n\t this: " << this
);
num_spatial_pyramid_levels = levels;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
long scan_image_boxes<Feature_extractor_type,Box_generator>::
get_num_dimensions (
) const
{
return feats.get_num_dimensions()*get_num_components_per_detection_template() + box_sizedims*2;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
void scan_image_boxes<Feature_extractor_type,Box_generator>::
detect (
const feature_vector_type& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_loaded_with_image() &&
w.size() >= get_num_dimensions(),
"\t void scan_image_boxes::detect()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t w.size(): " << w.size()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t this: " << this
);
dets.clear();
array<integral_image_generic<double> > saliency_images(get_num_components_per_detection_template());
array2d<double> temp_img(feats.nr(), feats.nc());
// build saliency images
for (unsigned long i = 0; i < saliency_images.size(); ++i)
{
const unsigned long offset = 2*box_sizedims + feats.get_num_dimensions()*i;
// make the basic saliency image for the i-th feature extraction region
for (long r = 0; r < feats.nr(); ++r)
{
for (long c = 0; c < feats.nc(); ++c)
{
const typename feature_extractor_type::descriptor_type& descriptor = feats(r,c);
double sum = 0;
for (unsigned long k = 0; k < descriptor.size(); ++k)
{
sum += w(descriptor[k].first + offset)*descriptor[k].second;
}
temp_img[r][c] = sum;
}
}
// now convert base saliency image into final integral image
saliency_images[i].load(temp_img);
}
// now search the saliency images
std::vector<rectangle> regions;
const rectangle bounds = get_rect(feats);
for (unsigned long i = 0; i < search_rects.size(); ++i)
{
const rectangle rect = feats.image_to_feat_space(search_rects[i]).intersect(bounds);
if (rect.is_empty())
continue;
get_feature_extraction_regions(rect, regions);
double score = 0;
for (unsigned long k = 0; k < regions.size(); ++k)
{
score += saliency_images[k].get_sum_of_area(regions[k]);
}
const double width = search_rects[i].width();
const double height = search_rects[i].height();
score += dot(linpiece(width, linspace(0, box_maxsize, box_sizedims+1)), rowm(w, range(0,box_sizedims-1)));
score += dot(linpiece(height, linspace(0, box_maxsize, box_sizedims+1)), rowm(w, range(box_sizedims,2*box_sizedims-1)));
if (score >= thresh)
{
dets.push_back(std::make_pair(score, search_rects[i]));
}
}
std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
const rectangle scan_image_boxes<Feature_extractor_type,Box_generator>::
get_best_matching_rect (
const rectangle& rect
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_loaded_with_image(),
"\t const rectangle scan_image_boxes::get_best_matching_rect()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t this: " << this
);
double best_score = -1;
rectangle best_rect;
for (unsigned long i = 0; i < search_rects.size(); ++i)
{
const double score = (rect.intersect(search_rects[i])).area()/(double)(rect+search_rects[i]).area();
if (score > best_score)
{
best_score = score;
best_rect = search_rects[i];
}
}
return best_rect;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
full_object_detection scan_image_boxes<Feature_extractor_type,Box_generator>::
get_full_object_detection (
const rectangle& rect,
const feature_vector_type& /*w*/
) const
{
return full_object_detection(rect);
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
void scan_image_boxes<Feature_extractor_type,Box_generator>::
get_feature_vector (
const full_object_detection& obj,
feature_vector_type& psi
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_loaded_with_image() &&
psi.size() >= get_num_dimensions() &&
obj.num_parts() == 0,
"\t void scan_image_boxes::get_feature_vector()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t psi.size(): " << psi.size()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t obj.num_parts(): " << obj.num_parts()
<< "\n\t this: " << this
);
const rectangle best_rect = get_best_matching_rect(obj.get_rect());
const rectangle mapped_rect = feats.image_to_feat_space(best_rect).intersect(get_rect(feats));
if (mapped_rect.is_empty())
return;
std::vector<rectangle> regions;
get_feature_extraction_regions(mapped_rect, regions);
// pull features out of all the boxes in regions.
for (unsigned long j = 0; j < regions.size(); ++j)
{
const rectangle rect = regions[j];
const unsigned long template_region_id = j;
const unsigned long offset = box_sizedims*2 + feats.get_num_dimensions()*template_region_id;
for (long r = rect.top(); r <= rect.bottom(); ++r)
{
for (long c = rect.left(); c <= rect.right(); ++c)
{
const typename feature_extractor_type::descriptor_type& descriptor = feats(r,c);
for (unsigned long k = 0; k < descriptor.size(); ++k)
{
psi(descriptor[k].first + offset) += descriptor[k].second;
}
}
}
}
const double width = best_rect.width();
const double height = best_rect.height();
set_rowm(psi, range(0,box_sizedims-1)) += linpiece(width, linspace(0, box_maxsize, box_sizedims+1));
set_rowm(psi, range(box_sizedims,box_sizedims*2-1)) += linpiece(height, linspace(0, box_maxsize, box_sizedims+1));
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_IMAGE_bOXES_Hh_

View File

@@ -0,0 +1,394 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SCAN_IMAGE_bOXES_ABSTRACT_Hh_
#ifdef DLIB_SCAN_IMAGE_bOXES_ABSTRACT_Hh_
#include "../matrix.h"
#include "../geometry.h"
#include "../image_processing.h"
#include "../array2d.h"
#include "full_object_detection_abstract.h"
#include "../image_transforms/segment_image_abstract.h"
#include <vector>
namespace dlib
{
// ----------------------------------------------------------------------------------------
class default_box_generator
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a function object that takes in an image and outputs a set of
candidate object locations. It is also the default box generator used by
the scan_image_boxes object defined below.
!*/
public:
template <typename image_type>
void operator() (
const image_type& img,
std::vector<rectangle>& rects
) const
/*!
ensures
- #rects == the set of candidate object locations which should be searched
inside img. That is, these are the rectangles which might contain
objects of interest within the given image.
!*/
{
rects.clear();
find_candidate_object_locations(img, rects);
}
};
inline void serialize (const default_box_generator&, std::ostream& ) {}
inline void deserialize( default_box_generator&, std::istream& ) {}
/*!
ensures
- provides serialization support.
!*/
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator = default_box_generator
>
class scan_image_boxes : noncopyable
{
/*!
REQUIREMENTS ON Feature_extractor_type
- must be an object with an interface compatible with the hashed_feature_image
object defined in dlib/image_keypoint/hashed_feature_image_abstract.h or
with the nearest_neighbor_feature_image object defined in
dlib/image_keypoint/nearest_neighbor_feature_image_abstract.h
REQUIREMENTS ON Box_generator
- must be an object with an interface compatible with the
default_box_generator object defined at the top of this file.
INITIAL VALUE
- get_num_spatial_pyramid_levels() == 3
- is_loaded_with_image() == false
WHAT THIS OBJECT REPRESENTS
This object is a tool for running a classifier over an image with the goal
of localizing each object present. The localization is in the form of the
bounding box around each object of interest.
Unlike the scan_image_pyramid object which scans a fixed sized window over
an image pyramid, the scan_image_boxes tool allows you to define your own
list of "candidate object locations" which should be evaluated. This is
simply a list of rectangle objects which might contain objects of interest.
The scan_image_boxes object will then evaluate the classifier at each of
these locations and return the subset of rectangles which appear to have
objects in them. The candidate object location generation is provided by
the Box_generator that is passed in as a template argument.
This object can also be understood as a general tool for implementing the
spatial pyramid models described in the paper:
Beyond Bags of Features: Spatial Pyramid Matching for Recognizing
Natural Scene Categories by Svetlana Lazebnik, Cordelia Schmid,
and Jean Ponce
The classifiers used by this object have three parts:
1. The underlying feature extraction provided by Feature_extractor_type
objects, which associate a vector with each location in an image.
2. A rule for extracting a feature vector from a candidate object
location. In this object we use the spatial pyramid matching method.
This means we cut an object's detection window into a set of "feature
extraction regions" and extract a bag-of-words vector from each
before finally concatenating them to form the final feature vector
representing the entire object window. The set of feature extraction
regions can be configured by the user by calling
set_num_spatial_pyramid_levels(). To be a little more precise, the
feature vector for a candidate object window is defined as follows:
- Let N denote the number of feature extraction zones.
- Let M denote the dimensionality of the vectors output by
Feature_extractor_type objects.
- Let F(i) == the M dimensional vector which is the sum of all
vectors given by our Feature_extractor_type object inside the
i-th feature extraction zone. So this is notionally a
bag-of-words vector from the i-th zone.
- Then the feature vector for an object window is an M*N
dimensional vector [F(1) F(2) F(3) ... F(N)] (i.e. it is a
concatenation of the N vectors). This feature vector can be
thought of as a collection of N bags-of-words, each bag coming
from a spatial location determined by one of the feature
extraction zones.
3. A weight vector and a threshold value. The dot product between the
weight vector and the feature vector for a candidate object location
gives the score of the location. If this score is greater than the
threshold value then the candidate object location is output as a
detection.
THREAD SAFETY
Concurrent access to an instance of this object is not safe and should be
protected by a mutex lock except for the case where you are copying the
configuration (via copy_configuration()) of a scan_image_boxes object to
many other threads. In this case, it is safe to copy the configuration of
a shared object so long as no other operations are performed on it.
!*/
public:
typedef matrix<double,0,1> feature_vector_type;
typedef Feature_extractor_type feature_extractor_type;
typedef Box_generator box_generator;
scan_image_boxes (
);
/*!
ensures
- this object is properly initialized
!*/
template <
typename image_type
>
void load (
const image_type& img
);
/*!
requires
- image_type must be a type with the following properties:
- image_type objects can be loaded into Feature_extractor_type
objects via Feature_extractor_type::load().
- image_type objects can be passed to the first argument of
Box_generator::operator()
ensures
- #is_loaded_with_image() == true
- This object is ready to run a classifier over img to detect object
locations. Call detect() to do this.
!*/
bool is_loaded_with_image (
) const;
/*!
ensures
- returns true if this object has been loaded with an image to process and
false otherwise.
!*/
const feature_extractor_type& get_feature_extractor (
) const;
/*!
ensures
- returns a const reference to the feature_extractor_type object used
internally for local feature extraction.
!*/
void copy_configuration(
const feature_extractor_type& fe
);
/*!
ensures
- This function performs the equivalent of
get_feature_extractor().copy_configuration(fe) (i.e. this function allows
you to configure the parameters of the underlying feature extractor used
by a scan_image_boxes object)
!*/
void copy_configuration(
const box_generator& bg
);
/*!
ensures
- #get_box_generator() == bg
(i.e. this function allows you to configure the parameters of the
underlying box generator used by a scan_image_boxes object)
!*/
const box_generator& get_box_generator (
) const;
/*!
ensures
- returns the box_generator used by this object to generate candidate
object locations.
!*/
void copy_configuration (
const scan_image_boxes& item
);
/*!
ensures
- Copies all the state information of item into *this, except for state
information populated by load(). More precisely, given two scan_image_boxes
objects S1 and S2, the following sequence of instructions should always
result in both of them having the exact same state:
S2.copy_configuration(S1);
S1.load(img);
S2.load(img);
!*/
long get_num_dimensions (
) const;
/*!
ensures
- returns the number of dimensions in the feature vector for a candidate
object location. This value is the dimensionality of the underlying
feature vectors produced by Feature_extractor_type times the number of
feature extraction regions used. Note that the number of feature
extraction regions used is a function of
get_num_spatial_pyramid_levels().
!*/
unsigned long get_num_spatial_pyramid_levels (
) const;
/*!
ensures
- returns the number of layers in the spatial pyramid. For example, if
this function returns 1 then it means we use a simple bag-of-words
representation over the whole object window. If it returns 2 then it
means the feature representation is the concatenation of 5 bag-of-words
vectors, one from the entire object window and 4 others from 4 different
parts of the object window. If it returns 3 then there are 1+4+16
bag-of-words vectors concatenated together in the feature representation,
and so on.
!*/
void set_num_spatial_pyramid_levels (
unsigned long levels
);
/*!
requires
- levels > 0
ensures
- #get_num_spatial_pyramid_levels() == levels
!*/
void detect (
const feature_vector_type& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const;
/*!
requires
- w.size() >= get_num_dimensions()
- is_loaded_with_image() == true
ensures
- Scans over all the candidate object locations as discussed in the WHAT
THIS OBJECT REPRESENTS section and stores all detections into #dets.
- for all valid i:
- #dets[i].second == The candidate object location which produced this
detection. This rectangle gives the location of the detection.
- #dets[i].first == The score for this detection. This value is equal
to dot(w, feature vector for this candidate object location).
- #dets[i].first >= thresh
- #dets will be sorted in descending order.
(i.e. #dets[i].first >= #dets[j].first for all i, and j>i)
- Elements of w beyond index get_num_dimensions()-1 are ignored. I.e. only
the first get_num_dimensions() are used.
- Note that no form of non-max suppression is performed. If a locations
has a score >= thresh then it is reported in #dets.
!*/
void get_feature_vector (
const full_object_detection& obj,
feature_vector_type& psi
) const;
/*!
requires
- obj.num_parts() == 0
- is_loaded_with_image() == true
- psi.size() >= get_num_dimensions()
(i.e. psi must have preallocated its memory before this function is called)
ensures
- This function allows you to determine the feature vector used for a
candidate object location output from detect(). Note that this vector is
added to psi. Note also that you must use get_full_object_detection() to
convert a rectangle from detect() into the needed full_object_detection.
- The dimensionality of the vector added to psi is get_num_dimensions(). This
means that elements of psi after psi(get_num_dimensions()-1) are not modified.
- Since scan_image_boxes only searches a limited set of object locations,
not all possible rectangles can be output by detect(). So in the case
where obj.get_rect() could not arise from a call to detect(), this
function will map obj.get_rect() to the nearest possible rectangle and
then add the feature vector for the mapped rectangle into #psi.
- get_best_matching_rect(obj.get_rect()) == the rectangle obj.get_rect()
gets mapped to for feature extraction.
!*/
full_object_detection get_full_object_detection (
const rectangle& rect,
const feature_vector_type& w
) const;
/*!
ensures
- returns full_object_detection(rect)
(This function is here only for compatibility with the scan_image_pyramid
object)
!*/
const rectangle get_best_matching_rect (
const rectangle& rect
) const;
/*!
requires
- is_loaded_with_image() == true
ensures
- Since scan_image_boxes only searches a limited set of object locations,
not all possible rectangles can be represented. Therefore, this function
allows you to supply a rectangle and obtain the nearest possible
candidate object location rectangle.
!*/
unsigned long get_num_detection_templates (
) const { return 1; }
/*!
ensures
- returns 1. Note that this function is here only for compatibility with
the scan_image_pyramid object. Notionally, its return value indicates
that a scan_image_boxes object is always ready to detect objects once
an image has been loaded.
!*/
unsigned long get_num_movable_components_per_detection_template (
) const { return 0; }
/*!
ensures
- returns 0. Note that this function is here only for compatibility with
the scan_image_pyramid object. Its return value means that this object
does not support using movable part models.
!*/
};
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
void serialize (
const scan_image_boxes<Feature_extractor_type,Box_generator>& item,
std::ostream& out
);
/*!
provides serialization support
!*/
template <
typename Feature_extractor_type,
typename Box_generator
>
void deserialize (
scan_image_boxes<Feature_extractor_type,Box_generator>& item,
std::istream& in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_IMAGE_bOXES_ABSTRACT_Hh_

View File

@@ -0,0 +1,401 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SCAN_IMAGE_CuSTOM_Hh_
#define DLIB_SCAN_IMAGE_CuSTOM_Hh_
#include "scan_image_custom_abstract.h"
#include "../matrix.h"
#include "../geometry.h"
#include <vector>
#include "../image_processing/full_object_detection.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type
>
class scan_image_custom : noncopyable
{
public:
typedef matrix<double,0,1> feature_vector_type;
typedef Feature_extractor_type feature_extractor_type;
scan_image_custom (
);
template <
typename image_type
>
void load (
const image_type& img
);
inline bool is_loaded_with_image (
) const;
inline void copy_configuration(
const feature_extractor_type& fe
);
const Feature_extractor_type& get_feature_extractor (
) const { return feats; }
inline void copy_configuration (
const scan_image_custom& item
);
inline long get_num_dimensions (
) const;
void detect (
const feature_vector_type& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const;
void get_feature_vector (
const full_object_detection& obj,
feature_vector_type& psi
) const;
full_object_detection get_full_object_detection (
const rectangle& rect,
const feature_vector_type& w
) const;
const rectangle get_best_matching_rect (
const rectangle& rect
) const;
inline unsigned long get_num_detection_templates (
) const { return 1; }
inline unsigned long get_num_movable_components_per_detection_template (
) const { return 0; }
template <typename T>
friend void serialize (
const scan_image_custom<T>& item,
std::ostream& out
);
template <typename T>
friend void deserialize (
scan_image_custom<T>& item,
std::istream& in
);
private:
static bool compare_pair_rect (
const std::pair<double, rectangle>& a,
const std::pair<double, rectangle>& b
)
{
return a.first < b.first;
}
DLIB_MAKE_HAS_MEMBER_FUNCTION_TEST(
has_compute_object_score,
double,
compute_object_score,
( const matrix<double,0,1>& w, const rectangle& obj) const
);
template <typename fe_type>
typename enable_if<has_compute_object_score<fe_type> >::type compute_all_rect_scores (
const fe_type& feats,
const feature_vector_type& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const
{
for (unsigned long i = 0; i < search_rects.size(); ++i)
{
const double score = feats.compute_object_score(w, search_rects[i]);
if (score >= thresh)
{
dets.push_back(std::make_pair(score, search_rects[i]));
}
}
}
template <typename fe_type>
typename disable_if<has_compute_object_score<fe_type> >::type compute_all_rect_scores (
const fe_type& feats,
const feature_vector_type& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const
{
matrix<double,0,1> psi(w.size());
psi = 0;
double prev_dot = 0;
for (unsigned long i = 0; i < search_rects.size(); ++i)
{
// Reset these back to zero every so often to avoid the accumulation of
// rounding error. Note that the only reason we do this loop in this
// complex way is to avoid needing to zero the psi vector every iteration.
if ((i%500) == 499)
{
psi = 0;
prev_dot = 0;
}
feats.get_feature_vector(search_rects[i], psi);
const double cur_dot = dot(psi, w);
const double score = cur_dot - prev_dot;
if (score >= thresh)
{
dets.push_back(std::make_pair(score, search_rects[i]));
}
prev_dot = cur_dot;
}
}
feature_extractor_type feats;
std::vector<rectangle> search_rects;
bool loaded_with_image;
};
// ----------------------------------------------------------------------------------------
template <typename T>
void serialize (
const scan_image_custom<T>& item,
std::ostream& out
)
{
int version = 1;
serialize(version, out);
serialize(item.feats, out);
serialize(item.search_rects, out);
serialize(item.loaded_with_image, out);
serialize(item.get_num_dimensions(), out);
}
// ----------------------------------------------------------------------------------------
template <typename T>
void deserialize (
scan_image_custom<T>& item,
std::istream& in
)
{
int version = 0;
deserialize(version, in);
if (version != 1)
throw serialization_error("Unsupported version found when deserializing a scan_image_custom object.");
deserialize(item.feats, in);
deserialize(item.search_rects, in);
deserialize(item.loaded_with_image, in);
// When developing some feature extractor, it's easy to accidentally change its
// number of dimensions and then try to deserialize data from an older version of
// your extractor into the current code. This check is here to catch that kind of
// user error.
long dims;
deserialize(dims, in);
if (item.get_num_dimensions() != dims)
throw serialization_error("Number of dimensions in serialized scan_image_custom doesn't match the expected number.");
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// scan_image_custom member functions
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type
>
scan_image_custom<Feature_extractor_type>::
scan_image_custom (
) :
loaded_with_image(false)
{
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type
>
template <
typename image_type
>
void scan_image_custom<Feature_extractor_type>::
load (
const image_type& img
)
{
feats.load(img, search_rects);
loaded_with_image = true;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type
>
bool scan_image_custom<Feature_extractor_type>::
is_loaded_with_image (
) const
{
return loaded_with_image;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type
>
void scan_image_custom<Feature_extractor_type>::
copy_configuration(
const feature_extractor_type& fe
)
{
feats.copy_configuration(fe);
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type
>
void scan_image_custom<Feature_extractor_type>::
copy_configuration (
const scan_image_custom& item
)
{
feats.copy_configuration(item.feats);
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type
>
long scan_image_custom<Feature_extractor_type>::
get_num_dimensions (
) const
{
return feats.get_num_dimensions();
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type
>
void scan_image_custom<Feature_extractor_type>::
detect (
const feature_vector_type& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_loaded_with_image() &&
w.size() >= get_num_dimensions(),
"\t void scan_image_custom::detect()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t w.size(): " << w.size()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t this: " << this
);
dets.clear();
compute_all_rect_scores(feats, w,dets,thresh);
std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type
>
const rectangle scan_image_custom<Feature_extractor_type>::
get_best_matching_rect (
const rectangle& rect
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_loaded_with_image(),
"\t const rectangle scan_image_custom::get_best_matching_rect()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t this: " << this
);
double best_score = -1;
rectangle best_rect;
for (unsigned long i = 0; i < search_rects.size(); ++i)
{
const double score = (rect.intersect(search_rects[i])).area()/(double)(rect+search_rects[i]).area();
if (score > best_score)
{
best_score = score;
best_rect = search_rects[i];
}
}
return best_rect;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type
>
full_object_detection scan_image_custom<Feature_extractor_type>::
get_full_object_detection (
const rectangle& rect,
const feature_vector_type& /*w*/
) const
{
return full_object_detection(rect);
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type
>
void scan_image_custom<Feature_extractor_type>::
get_feature_vector (
const full_object_detection& obj,
feature_vector_type& psi
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_loaded_with_image() &&
psi.size() >= get_num_dimensions() &&
obj.num_parts() == 0,
"\t void scan_image_custom::get_feature_vector()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t psi.size(): " << psi.size()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t obj.num_parts(): " << obj.num_parts()
<< "\n\t this: " << this
);
feats.get_feature_vector(get_best_matching_rect(obj.get_rect()), psi);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_IMAGE_CuSTOM_Hh_

View File

@@ -0,0 +1,390 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SCAN_IMAGE_CuSTOM_ABSTRACT_Hh_
#ifdef DLIB_SCAN_IMAGE_CuSTOM_ABSTRACT_Hh_
#include <vector>
#include "../matrix.h"
#include "../geometry.h"
#include "../image_processing/full_object_detection_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class example_feature_extractor
{
/*!
WHAT THIS OBJECT REPRESENTS
This object defines the interface a feature extractor must implement if it
is to be used with the scan_image_custom object defined at the bottom of
this file.
In this case, the purpose of a feature extractor is to associated a
complete feature vector with each rectangle in an image. In particular,
each rectangle is scored by taking the dot product between this feature
vector and a weight vector. If this score is greater than a threshold then
the rectangle is output as a detection.
!*/
public:
template <
typename image_type
>
void load (
const image_type& image,
std::vector<rectangle>& candidate_objects
);
/*!
ensures
- Loads the given image into this feature extractor. This means that
subsequent calls to get_feature_vector() will return the feature vector
corresponding to locations in the image given to load().
- #candidate_objects == a set of bounding boxes in the given image that
might contain objects of interest. These are the locations that will be
checked for the presents of objects when this feature extractor is used
with the scan_image_custom object.
!*/
void copy_configuration (
const feature_extractor& item
);
/*!
ensures
- Copies all the state information of item into *this, except for state
information populated by load(). More precisely, given two
feature extractor objects S1 and S2, the following sequence of
instructions should always result in both of them having the exact same
state:
S2.copy_configuration(S1);
S1.load(img, temp);
S2.load(img, temp);
!*/
unsigned long get_num_dimensions (
) const;
/*!
ensures
- returns the dimensionality of the feature vectors output by this object.
!*/
void get_feature_vector (
const rectangle& obj,
matrix<double,0,1>& psi
) const;
/*!
requires
- psi.size() >= get_num_dimensions()
(i.e. psi must have preallocated its memory before this function is called)
ensures
- This function computes the feature vector associated with the given rectangle
in obj. This rectangle is interpreted as a bounding box within the last image
given to this->load() and a feature vector describing that bounding box is
output into psi.
- The feature vector is added into psi. That is, it does not overwrite the
previous contents of psi, but instead, it adds the vector to psi.
- The dimensionality of the vector added to psi is get_num_dimensions(). This
means that elements of psi after psi(get_num_dimensions()-1) are not modified.
- #psi.size() == psi.size()
(i.e. this function does not change the size of the psi vector)
!*/
double compute_object_score (
const matrix<double,0,1>& w,
const rectangle& obj
) const;
/*!
requires
- w.size() >= get_num_dimensions()
ensures
- This function returns the dot product between the feature vector for
object box obj and the given w vector. That is, this function computes
the same number as the following code snippet:
matrix<double,0,1> psi(w.size());
psi = 0;
get_feature_vector(obj, psi);
return dot(psi, w);
The point of the compute_object_score() routine is to compute this dot
product in a much more efficient way than directly calling
get_feature_vector() and dot(). Therefore, compute_object_score() is an
optional function. If you can't think of a faster way to compute these
scores then do not implement compute_object_score() and the
scan_image_custom object will simply compute these scores for you.
However, it is often the case that there is something clever you can do
to make this computation faster. If that is the case, then you can
provide an implementation of this function with your feature extractor
and then scan_image_custom will use it instead of using the default
calculation method shown in the above code snippet.
!*/
};
// ----------------------------------------------------------------------------------------
void serialize(
const feature_extractor& item,
std::ostream& out
);
/*!
provides serialization support
!*/
void deserialize(
feature_extractor& item,
std::istream& in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type
>
class scan_image_custom : noncopyable
{
/*!
REQUIREMENTS ON Feature_extractor_type
- must be an object with an interface compatible with the
example_feature_extractor defined at the top of this file.
INITIAL VALUE
- is_loaded_with_image() == false
WHAT THIS OBJECT REPRESENTS
This object is a tool for running a classifier over an image with the goal
of localizing each object present. The localization is in the form of the
bounding box around each object of interest.
Unlike the scan_image_pyramid and scan_image_boxes objects, this image
scanner delegates all the work of constructing the object feature vector to
its Feature_extractor_type template argument. That is, scan_image_custom
simply asks the supplied feature extractor what boxes in the image we
should investigate and then asks the feature extractor for the complete
feature vector for each box. That is, scan_image_custom does not apply any
kind of pyramiding or other higher level processing to the features coming
out of the feature extractor. That means that when you use
scan_image_custom it is completely up to you to define the feature vector
used with each image box.
THREAD SAFETY
Concurrent access to an instance of this object is not safe and should be
protected by a mutex lock except for the case where you are copying the
configuration (via copy_configuration()) of a scan_image_custom object to
many other threads. In this case, it is safe to copy the configuration of
a shared object so long as no other operations are performed on it.
!*/
public:
typedef matrix<double,0,1> feature_vector_type;
typedef Feature_extractor_type feature_extractor_type;
scan_image_custom (
);
/*!
ensures
- this object is properly initialized
!*/
template <
typename image_type
>
void load (
const image_type& img
);
/*!
requires
- image_type must be a type with the following properties:
- image_type objects can be loaded into Feature_extractor_type
objects via Feature_extractor_type::load().
ensures
- #is_loaded_with_image() == true
- Calls get_feature_extractor().load() on the given image. That is, we
will have loaded the image into the feature extractor in this
scan_image_custom object. We will also have stored the candidate
object locations generated by the feature extractor and will scan
over them when this->detect() is called.
- This object is ready to run a classifier over img to detect object
locations. Call detect() to do this.
!*/
bool is_loaded_with_image (
) const;
/*!
ensures
- returns true if this object has been loaded with an image to process and
false otherwise.
!*/
const feature_extractor_type& get_feature_extractor (
) const;
/*!
ensures
- returns a const reference to the feature_extractor_type object used
internally for local feature extraction.
!*/
void copy_configuration(
const feature_extractor_type& fe
);
/*!
ensures
- This function performs the equivalent of
get_feature_extractor().copy_configuration(fe) (i.e. this function allows
you to configure the parameters of the underlying feature extractor used
by a scan_image_custom object)
!*/
void copy_configuration (
const scan_image_custom& item
);
/*!
ensures
- Copies all the state information of item into *this, except for state
information populated by load(). More precisely, given two
scan_image_custom objects S1 and S2, the following sequence of
instructions should always result in both of them having the exact same
state:
S2.copy_configuration(S1);
S1.load(img);
S2.load(img);
!*/
long get_num_dimensions (
) const;
/*!
ensures
- returns the number of dimensions in the feature vector for a candidate
object location. That is, this function returns get_feature_extractor().get_num_dimensions().
!*/
void detect (
const feature_vector_type& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const;
/*!
requires
- w.size() >= get_num_dimensions()
- is_loaded_with_image() == true
ensures
- Scans over all the candidate object locations produced by the feature
extractor during image loading and stores all detections into #dets.
- for all valid i:
- #dets[i].second == The candidate object location which produced this
detection. This rectangle gives the location of the detection.
- #dets[i].first == The score for this detection. This value is equal
to dot(w, feature vector for this candidate object location).
- #dets[i].first >= thresh
- #dets will be sorted in descending order.
(i.e. #dets[i].first >= #dets[j].first for all i, and j>i)
- Elements of w beyond index get_num_dimensions()-1 are ignored. I.e. only
the first get_num_dimensions() are used.
- Note that no form of non-max suppression is performed. If a locations
has a score >= thresh then it is reported in #dets.
!*/
void get_feature_vector (
const full_object_detection& obj,
feature_vector_type& psi
) const;
/*!
requires
- obj.num_parts() == 0
- is_loaded_with_image() == true
- psi.size() >= get_num_dimensions()
(i.e. psi must have preallocated its memory before this function is called)
ensures
- This function allows you to determine the feature vector used for a
candidate object location output from detect(). Note that this vector is
added to psi. Note also that you must use get_full_object_detection() to
convert a rectangle from detect() into the needed full_object_detection.
- The dimensionality of the vector added to psi is get_num_dimensions(). This
means that elements of psi after psi(get_num_dimensions()-1) are not modified.
- Since scan_image_custom only searches a limited set of object locations,
not all possible rectangles can be output by detect(). So in the case
where obj.get_rect() could not arise from a call to detect(), this
function will map obj.get_rect() to the nearest possible rectangle and
then add the feature vector for the mapped rectangle into #psi.
- get_best_matching_rect(obj.get_rect()) == the rectangle obj.get_rect()
gets mapped to for feature extraction.
!*/
full_object_detection get_full_object_detection (
const rectangle& rect,
const feature_vector_type& w
) const;
/*!
ensures
- returns full_object_detection(rect)
(This function is here only for compatibility with the scan_image_pyramid
object)
!*/
const rectangle get_best_matching_rect (
const rectangle& rect
) const;
/*!
requires
- is_loaded_with_image() == true
ensures
- Since scan_image_custom only searches a limited set of object locations,
not all possible rectangles can be represented. Therefore, this function
allows you to supply a rectangle and obtain the nearest possible
candidate object location rectangle.
!*/
unsigned long get_num_detection_templates (
) const { return 1; }
/*!
ensures
- returns 1. Note that this function is here only for compatibility with
the scan_image_pyramid object. Notionally, its return value indicates
that a scan_image_custom object is always ready to detect objects once an
image has been loaded.
!*/
unsigned long get_num_movable_components_per_detection_template (
) const { return 0; }
/*!
ensures
- returns 0. Note that this function is here only for compatibility with
the scan_image_pyramid object. Its return value means that this object
does not support using movable part models.
!*/
};
// ----------------------------------------------------------------------------------------
template <typename T>
void serialize (
const scan_image_custom<T>& item,
std::ostream& out
);
/*!
provides serialization support
!*/
template <typename T>
void deserialize (
scan_image_custom<T>& item,
std::istream& in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_IMAGE_CuSTOM_ABSTRACT_Hh_

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,495 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SCAN_IMaGE_PYRAMID_ABSTRACT_Hh_
#ifdef DLIB_SCAN_IMaGE_PYRAMID_ABSTRACT_Hh_
#include "../matrix.h"
#include "../geometry.h"
#include "../image_processing.h"
#include "../array2d.h"
#include <vector>
#include "full_object_detection_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename Feature_extractor_type
>
class scan_image_pyramid : noncopyable
{
/*!
REQUIREMENTS ON Pyramid_type
- must be one of the pyramid_down objects defined in
dlib/image_transforms/image_pyramid_abstract.h or an object with
a compatible interface
REQUIREMENTS ON Feature_extractor_type
- must be an object with an interface compatible with the hashed_feature_image
object defined in dlib/image_keypoint/hashed_feature_image_abstract.h or
with the nearest_neighbor_feature_image object defined in
dlib/image_keypoint/nearest_neighbor_feature_image_abstract.h
INITIAL VALUE
- get_num_detection_templates() == 0
- is_loaded_with_image() == false
- get_max_detections_per_template() == 10000
- get_max_pyramid_levels() == 1000
- get_min_pyramid_layer_width() == 20
- get_min_pyramid_layer_height() == 20
WHAT THIS OBJECT REPRESENTS
This object is a tool for running a sliding window classifier over
an image pyramid. This object can also be understood as a general
tool for implementing the spatial pyramid models described in the paper:
Beyond Bags of Features: Spatial Pyramid Matching for Recognizing
Natural Scene Categories by Svetlana Lazebnik, Cordelia Schmid,
and Jean Ponce
It also includes the ability to represent movable part models.
The sliding window classifiers used by this object have three parts:
1. The underlying feature extraction provided by Feature_extractor_type
objects, which associate a vector with each location in an image.
2. A detection template. This is a rectangle which defines the shape of a
sliding window (i.e. the object_box), as well as a set of rectangular feature
extraction regions inside it. This set of regions defines the spatial
structure of the overall feature extraction within a sliding window. In
particular, each location of a sliding window has a feature vector
associated with it. This feature vector is defined as follows:
- Let N denote the number of feature extraction zones.
- Let M denote the dimensionality of the vectors output by Feature_extractor_type
objects.
- Let F(i) == the M dimensional vector which is the sum of all vectors
given by our Feature_extractor_type object inside the i-th feature extraction
zone.
- Then the feature vector for a sliding window is an M*N dimensional vector
[F(1) F(2) F(3) ... F(N)] (i.e. it is a concatenation of the N vectors).
This feature vector can be thought of as a collection of N "bags of features",
each bag coming from a spatial location determined by one of the rectangular
feature extraction zones.
3. A weight vector and a threshold value. The dot product between the weight
vector and the feature vector for a sliding window location gives the score
of the window. If this score is greater than the threshold value then the
window location is output as a detection.
Finally, the sliding window classifiers described above are applied to every level of
an image pyramid. Moreover, some of the feature extraction zones are allowed to move
freely within the object box. This means that when we are sliding the classifier over
an image, some feature extraction zones are stationary (i.e. always in the same place
relative to the object box) while others are allowed to move anywhere within the object
box. In particular, the movable regions are placed at the locations that maximize the
score of the classifier. Note further that each of the movable feature extraction
zones must pass a threshold test for it to be included. That is, if the score that a
movable zone would contribute to the overall score for a sliding window location is not
positive then that zone is not included in the feature vector (i.e. its part of the
feature vector is set to zero. This way the length of the feature vector stays
constant). This movable region construction allows us to represent objects with parts
that move around relative to the object box. For example, a human has hands but they
aren't always in the same place relative to a person's bounding box.
THREAD SAFETY
Concurrent access to an instance of this object is not safe and should be protected
by a mutex lock except for the case where you are copying the configuration
(via copy_configuration()) of a scan_image_pyramid object to many other threads.
In this case, it is safe to copy the configuration of a shared object so long
as no other operations are performed on it.
!*/
public:
typedef matrix<double,0,1> feature_vector_type;
typedef Pyramid_type pyramid_type;
typedef Feature_extractor_type feature_extractor_type;
scan_image_pyramid (
);
/*!
ensures
- this object is properly initialized
!*/
template <
typename image_type
>
void load (
const image_type& img
);
/*!
requires
- image_type must be a type with the following properties:
- image_type is default constructable.
- image_type is swappable by the global swap() function.
- image_type logically represents some kind of image and therefore its
number of rows and columns can be queried via num_rows(img) and
num_columns(img) respectively.
- image_type objects can be loaded into Feature_extractor_type
objects via Feature_extractor_type::load().
- image_type objects can be used with Pyramid_type. That is,
if pyr is an object of type Pyramid_type while img1 and img2
are objects of image_type, then pyr(img1,img2) should be
a valid expression which downsamples img1 into img2.
ensures
- #is_loaded_with_image() == true
- This object is ready to run sliding window classifiers over img. Call
detect() to do this.
!*/
bool is_loaded_with_image (
) const;
/*!
ensures
- returns true if this object has been loaded with an image to process
and false otherwise.
!*/
const feature_extractor_type& get_feature_extractor (
) const;
/*!
ensures
- returns a const reference to the feature_extractor_type object used
internally for local feature extraction.
!*/
void copy_configuration(
const feature_extractor_type& fe
);
/*!
ensures
- This function performs the equivalent of
get_feature_extractor().copy_configuration(fe) (i.e. this function allows
you to configure the parameters of the underlying feature extractor used
by a scan_image_pyramid object)
!*/
void copy_configuration (
const scan_image_pyramid& item
);
/*!
ensures
- copies all the state information of item into *this, except for state
information populated by load(). More precisely, given two scan_image_pyramid
objects S1 and S2, the following sequence of instructions should always
result in both of them having the exact same state.
S2.copy_configuration(S1);
S1.load(img);
S2.load(img);
!*/
void add_detection_template (
const rectangle& object_box,
const std::vector<rectangle>& stationary_feature_extraction_regions,
const std::vector<rectangle>& movable_feature_extraction_regions
);
/*!
requires
- center(object_box) == point(0,0)
- for all valid i:
- center(movable_feature_extraction_regions[i]) == point(0,0)
- if (get_num_detection_templates() > 0) then
- get_num_stationary_components_per_detection_template() == stationary_feature_extraction_regions.size()
- get_num_movable_components_per_detection_template() == movable_feature_extraction_regions.size()
(i.e. if you already have detection templates in this object, then
any new detection template must declare a consistent number of
feature extraction regions)
ensures
- Adds another detection template to this object. In particular, object_box
defines the size and shape of a sliding window while stationary_feature_extraction_regions
and movable_feature_extraction_regions defines the locations for feature extraction as
discussed in the WHAT THIS OBJECT REPRESENTS section above. Note also that the locations of
the stationary feature extraction regions are relative to the object_box.
- #get_num_detection_templates() == get_num_detection_templates() + 1
- The order of rectangles in stationary_feature_extraction_regions and
movable_feature_extraction_regions matters. Recall that each rectangle
gets its own set of features. So given two different templates, their
i-th rectangles will both share the same part of the weight vector (i.e. the w
supplied to detect()). So there should be some reasonable correspondence
between the rectangle ordering in different detection templates. For,
example, different detection templates should place corresponding feature
extraction regions in roughly the same part of the object_box.
- #get_num_stationary_components_per_detection_template() = stationary_feature_extraction_regions.size()
- #get_num_movable_components_per_detection_template() = movable_feature_extraction_regions.size()
!*/
void add_detection_template (
const rectangle& object_box,
const std::vector<rectangle>& stationary_feature_extraction_regions
);
/*!
ensures
- calls add_detection_template(object_box, stationary_feature_extraction_regions, empty_list)
where empty_list is a vector of size 0. I.e. this function is just a convenience
routine for adding detection templates with no movable regions.
!*/
unsigned long get_num_detection_templates (
) const;
/*!
ensures
- returns the number of detection templates in this object
!*/
unsigned long get_num_stationary_components_per_detection_template (
) const;
/*!
requires
- get_num_detection_templates() > 0
ensures
- A detection template is a rectangle which defines the shape of a sliding
window (the object_box), as well as a set of rectangles which define
feature extraction zones. This function returns the number of stationary
feature extraction zones in the detection templates used by this object.
!*/
unsigned long get_num_movable_components_per_detection_template (
) const;
/*!
requires
- get_num_detection_templates() > 0
ensures
- A detection template is a rectangle which defines the shape of a sliding
window (the object_box), as well as a set of rectangles which define
feature extraction zones. This function returns the number of movable
feature extraction zones in the detection templates used by this object.
!*/
unsigned long get_num_components_per_detection_template (
) const;
/*!
requires
- get_num_detection_templates() > 0
ensures
- returns the total number of feature extraction zones in the detection
templates used by this object. That is, returns the following:
- get_num_movable_components_per_detection_template() +
get_num_stationary_components_per_detection_template()
!*/
long get_num_dimensions (
) const;
/*!
requires
- get_num_detection_templates() > 0
ensures
- returns the number of dimensions in the feature vector for a sliding window
location. This value is the dimensionality of the underlying feature vectors
produced by Feature_extractor_type times (get_num_stationary_components_per_detection_template() +
get_num_movable_components_per_detection_template()).
!*/
unsigned long get_max_pyramid_levels (
) const;
/*!
ensures
- returns the maximum number of image pyramid levels this object will use.
Note that #get_max_pyramid_levels() == 1 indicates that no image pyramid
will be used at all. That is, only the original image will be processed
and no lower scale versions will be created.
!*/
void set_max_pyramid_levels (
unsigned long max_levels
);
/*!
requires
- max_levels > 0
ensures
- #get_max_pyramid_levels() == max_levels
!*/
void set_min_pyramid_layer_size (
unsigned long width,
unsigned long height
);
/*!
requires
- width > 0
- height > 0
ensures
- #get_min_pyramid_layer_width() == width
- #get_min_pyramid_layer_height() == height
!*/
inline unsigned long get_min_pyramid_layer_width (
) const;
/*!
ensures
- returns the smallest allowable width of an image in the image pyramid.
All pyramids will always include the original input image, however, no
pyramid levels will be created which have a width smaller than the
value returned by this function.
!*/
inline unsigned long get_min_pyramid_layer_height (
) const;
/*!
ensures
- returns the smallest allowable height of an image in the image pyramid.
All pyramids will always include the original input image, however, no
pyramid levels will be created which have a height smaller than the
value returned by this function.
!*/
unsigned long get_max_detections_per_template (
) const;
/*!
ensures
- For each image pyramid layer and detection template, this object scans a sliding
window classifier over an image and produces a number of detections. This
function returns a number which defines a hard upper limit on the number of
detections allowed by a single scan. This means that the total number of
possible detections produced by detect() is get_max_detections_per_template()*
get_num_detection_templates()*(number of image pyramid layers). Additionally,
if the maximum number of detections is reached during a scan then this object
will return a random subsample of all detections which are above the detection
threshold.
!*/
void set_max_detections_per_template (
unsigned long max_dets
);
/*!
requires
- max_dets > 0
ensures
- #get_max_detections_per_template() == max_dets
!*/
void detect (
const feature_vector_type& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const;
/*!
requires
- w.size() >= get_num_dimensions()
- is_loaded_with_image() == true
- get_num_detection_templates() > 0
ensures
- Scans all the detection templates over all pyramid layers as discussed in the
WHAT THIS OBJECT REPRESENTS section and stores all detections into #dets.
- for all valid i:
- #dets[i].second == The object box which produced this detection. This rectangle gives
the location of the detection. Note that the rectangle will have been converted back into
the original image input space. That is, if this detection was made at a low level in the
image pyramid then the object box will have been automatically mapped up the pyramid layers
to the original image space. Or in other words, if you plot #dets[i].second on top of the
image given to load() it will show up in the right place.
- #dets[i].first == The score for this detection. This value is equal to dot(w, feature vector
for this sliding window location).
- #dets[i].first >= thresh
- #dets will be sorted in descending order. (i.e. #dets[i].first >= #dets[j].first for all i, and j>i)
- Elements of w beyond index get_num_dimensions()-1 are ignored. I.e. only the first
get_num_dimensions() are used.
- Note that no form of non-max suppression is performed. If a window has a score >= thresh
then it is reported in #dets (assuming the limit imposed by get_max_detections_per_template() hasn't
been reached).
!*/
const rectangle get_best_matching_rect (
const rectangle& rect
) const;
/*!
requires
- get_num_detection_templates() > 0
ensures
- Since scan_image_pyramid is a sliding window classifier system, not all possible rectangles
can be represented. Therefore, this function allows you to supply a rectangle and obtain the
nearest possible sliding window rectangle.
!*/
void get_feature_vector (
const full_object_detection& obj,
feature_vector_type& psi
) const;
/*!
requires
- all_parts_in_rect(obj) == true
- obj.num_parts() == get_num_movable_components_per_detection_template()
- is_loaded_with_image() == true
- get_num_detection_templates() > 0
- psi.size() >= get_num_dimensions()
(i.e. psi must have preallocated its memory before this function is called)
ensures
- This function allows you to determine the feature vector used for a
sliding window location. Note that this vector is added to psi. Note
also that you must use get_full_object_detection() to convert a rect from
detect() into the needed full_object_detection.
- The dimensionality of the vector added to psi is get_num_dimensions(). This
means that elements of psi after psi(get_num_dimensions()-1) are not modified.
- Since scan_image_pyramid is a sliding window classifier system, not all
possible rectangles can be output by detect(). So in the case where
obj.get_rect() could not arise from a call to detect(), this function
will map obj.get_rect() to the nearest possible object box and then add
the feature vector for the mapped rectangle into #psi.
- get_best_matching_rect(obj.get_rect()) == the rectangle obj.get_rect()
gets mapped to for feature extraction.
!*/
full_object_detection get_full_object_detection (
const rectangle& rect,
const feature_vector_type& w
) const;
/*!
requires
- w.size() >= get_num_dimensions()
- is_loaded_with_image() == true
- get_num_detection_templates() > 0
ensures
- This function allows you to determine the full_object_detection
corresponding to a sliding window location. Note that the detect()
routine doesn't return the locations of the movable parts in a detected
object. Therefore, if you are using any movable parts in your model you
must use get_full_object_detection() to find out where the movable parts
were detected. To do this, you supply the w and detected rectangle.
Then the corresponding fully populated full_object_detection will be
returned.
- returns a full_object_detection, OBJ, such that:
- OBJ.get_rect() == rect
- OBJ.num_parts() == get_num_movable_components_per_detection_template()
- OBJ.part(i) == the location of the i-th movable part inside this detection,
or OBJECT_PART_NOT_PRESENT if the part was not found.
!*/
};
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename Feature_extractor_type
>
void serialize (
const scan_image_pyramid<Pyramid_type,Feature_extractor_type>& item,
std::ostream& out
);
/*!
provides serialization support
!*/
template <
typename Pyramid_type,
typename Feature_extractor_type
>
void deserialize (
scan_image_pyramid<Pyramid_type,Feature_extractor_type>& item,
std::istream& in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_IMaGE_PYRAMID_ABSTRACT_Hh_

View File

@@ -0,0 +1,180 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SCAN_IMaGE_PYRAMID_TOOLS_Hh_
#define DLIB_SCAN_IMaGE_PYRAMID_TOOLS_Hh_
#include "scan_image_pyramid_tools_abstract.h"
#include "../statistics.h"
#include <list>
#include "../geometry.h"
#include <iostream>
namespace dlib
{
// ----------------------------------------------------------------------------------------
namespace impl
{
inline bool compare_first (
const std::pair<unsigned long,rectangle>& a,
const std::pair<unsigned long,rectangle>& b
)
{
return a.first < b.first;
}
}
template <typename image_scanner_type>
std::vector<rectangle> determine_object_boxes (
const image_scanner_type& scanner,
const std::vector<rectangle>& rects,
double min_match_score
)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < min_match_score && min_match_score <= 1,
"\t std::vector<rectangle> determine_object_boxes()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t min_match_score: " << min_match_score
);
typename image_scanner_type::pyramid_type pyr;
typedef std::list<std::pair<unsigned long, rectangle> > list_type;
unsigned long max_area = 0;
// Copy rects into sorted_rects and sort them in order of increasing area. But
// only include the rectangles that aren't already obtainable by the scanner.
list_type sorted_rects;
for (unsigned long i = 0; i < rects.size(); ++i)
{
if (scanner.get_num_detection_templates() > 0)
{
rectangle temp = scanner.get_best_matching_rect(rects[i]);
const double match_score = (rects[i].intersect(temp).area())/(double)(rects[i] + temp).area();
// skip this rectangle if it's already matched well enough.
if (match_score > min_match_score)
continue;
}
max_area = std::max(rects[i].area(), max_area);
sorted_rects.push_back(std::make_pair(rects[i].area(), rects[i]));
}
sorted_rects.sort(dlib::impl::compare_first);
// Make sure this area value is comfortably larger than all the
// rectangles' areas.
max_area = 3*max_area + 100;
std::vector<rectangle> object_boxes;
while (sorted_rects.size() != 0)
{
rectangle cur = sorted_rects.front().second;
sorted_rects.pop_front();
object_boxes.push_back(centered_rect(point(0,0), cur.width(), cur.height()));
// Scale cur up the image pyramid and remove any rectangles which match.
// But also stop when cur gets large enough to not match anything.
for (unsigned long itr = 0;
itr < scanner.get_max_pyramid_levels() && cur.area() < max_area;
++itr)
{
list_type::iterator i = sorted_rects.begin();
while (i != sorted_rects.end())
{
const rectangle temp = move_rect(i->second, cur.tl_corner());
const double match_score = (cur.intersect(temp).area())/(double)(cur + temp).area();
if (match_score > min_match_score)
{
i = sorted_rects.erase(i);
}
else
{
++i;
}
}
cur = pyr.rect_up(cur);
}
}
return object_boxes;
}
// ----------------------------------------------------------------------------------------
template <typename image_scanner_type>
std::vector<rectangle> determine_object_boxes (
const image_scanner_type& scanner,
const std::vector<std::vector<rectangle> >& rects,
double min_match_score
)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < min_match_score && min_match_score <= 1,
"\t std::vector<rectangle> determine_object_boxes()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t min_match_score: " << min_match_score
);
std::vector<rectangle> temp;
for (unsigned long i = 0; i < rects.size(); ++i)
{
for (unsigned long j = 0; j < rects[i].size(); ++j)
{
temp.push_back(rects[i][j]);
}
}
return determine_object_boxes(scanner, temp, min_match_score);
}
// ----------------------------------------------------------------------------------------
template <typename image_scanner_type>
void setup_grid_detection_templates (
image_scanner_type& scanner,
const std::vector<std::vector<rectangle> >& rects,
unsigned int cells_x,
unsigned int cells_y,
double min_match_score = 0.75
)
{
const std::vector<rectangle>& object_boxes = determine_object_boxes(scanner, rects, min_match_score);
for (unsigned long i = 0; i < object_boxes.size(); ++i)
{
scanner.add_detection_template(object_boxes[i], create_grid_detection_template(object_boxes[i], cells_x, cells_y));
}
}
// ----------------------------------------------------------------------------------------
template <typename image_scanner_type>
void setup_grid_detection_templates_verbose (
image_scanner_type& scanner,
const std::vector<std::vector<rectangle> >& rects,
unsigned int cells_x,
unsigned int cells_y,
double min_match_score = 0.75
)
{
const std::vector<rectangle>& object_boxes = determine_object_boxes(scanner, rects, min_match_score);
std::cout << "number of detection templates: "<< object_boxes.size() << std::endl;
for (unsigned long i = 0; i < object_boxes.size(); ++i)
{
std::cout << " object box " << i << ": width: " << object_boxes[i].width()
<< " height: "<< object_boxes[i].height() << std::endl;
scanner.add_detection_template(object_boxes[i], create_grid_detection_template(object_boxes[i], cells_x, cells_y));
}
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_IMaGE_PYRAMID_TOOLS_Hh_

View File

@@ -0,0 +1,118 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SCAN_IMaGE_PYRAMID_TOOLS_ABSTRACT_Hh_
#ifdef DLIB_SCAN_IMaGE_PYRAMID_TOOLS_ABSTRACT_Hh_
#include "scan_image_pyramid_abstract.h"
#include <vector>
#include "../geometry.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
std::vector<rectangle> determine_object_boxes (
const image_scanner_type& scanner,
const std::vector<rectangle>& rects,
double min_match_score
);
/*!
requires
- 0 < min_match_score <= 1
- image_scanner_type == an implementation of the scan_image_pyramid
object defined in dlib/image_processing/scan_image_pyramid_tools_abstract.h
ensures
- returns a set of object boxes which, when used as detection templates with
the given scanner, can attain at least min_match_score alignment with every
element of rects. Note that the alignment between two rectangles A and B is
defined as:
(A.intersect(B).area())/(double)(A+B).area()
- Only elements of rects which are not already well matched by the scanner are
considered. That is, if the scanner already has some detection templates in
it then the contents of rects will be checked against those detection
templates and elements with a match better than min_match_score are ignore.
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
std::vector<rectangle> determine_object_boxes (
const image_scanner_type& scanner,
const std::vector<std::vector<rectangle> >& rects,
double min_match_score
);
/*!
requires
- 0 < min_match_score <= 1
- image_scanner_type == an implementation of the scan_image_pyramid
object defined in dlib/image_processing/scan_image_pyramid_tools_abstract.h
ensures
- copies all rectangles in rects into a std::vector<rectangle> object, call it
R. Then this function returns determine_object_boxes(scanner,R,min_match_score).
That is, it just called the version of determine_object_boxes() defined above
and returns the results.
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
void setup_grid_detection_templates (
image_scanner_type& scanner,
const std::vector<std::vector<rectangle> >& rects,
unsigned int cells_x,
unsigned int cells_y,
double min_match_score = 0.75
);
/*!
requires
- cells_x > 0
- cells_y > 0
- 0 < min_match_score <= 1
- image_scanner_type == an implementation of the scan_image_pyramid
object defined in dlib/image_processing/scan_image_pyramid_tools_abstract.h
ensures
- uses determine_object_boxes(scanner,rects,min_match_score) to obtain a set of
object boxes and then adds them to the given scanner object as detection templates.
Also uses create_grid_detection_template(object_box, cells_x, cells_y) to create
each feature extraction region. Therefore, the detection templates will extract
features from a regular grid inside each object box.
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
void setup_grid_detection_templates_verbose (
image_scanner_type& scanner,
const std::vector<std::vector<rectangle> >& rects,
unsigned int cells_x,
unsigned int cells_y,
double min_match_score = 0.75
);
/*!
requires
- cells_x > 0
- cells_y > 0
- 0 < min_match_score <= 1
- image_scanner_type == an implementation of the scan_image_pyramid
object defined in dlib/image_processing/scan_image_pyramid_tools_abstract.h
ensures
- this function is identical to setup_grid_detection_templates() except
that it also outputs the selected detection templates to standard out.
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_IMaGE_PYRAMID_TOOLS_ABSTRACT_Hh_

View File

@@ -0,0 +1,219 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SETUP_HAShED_FEATURES_Hh_
#define DLIB_SETUP_HAShED_FEATURES_Hh_
#include "setup_hashed_features_abstract.h"
#include "scan_image_pyramid.h"
#include "scan_image_boxes.h"
#include "../lsh.h"
#include "../statistics.h"
#include "../image_keypoint.h"
#include "../geometry.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class image_hash_construction_failure : public error
{
public:
image_hash_construction_failure(
const std::string& a
): error(a) {}
};
// ----------------------------------------------------------------------------------------
template <
typename image_scanner
>
void use_uniform_feature_weights (
image_scanner& scanner
)
{
typename image_scanner::feature_extractor_type fe;
fe.copy_configuration(scanner.get_feature_extractor());
fe.use_uniform_feature_weights();
scanner.copy_configuration(fe);
}
// ----------------------------------------------------------------------------------------
template <
typename image_scanner
>
void use_relative_feature_weights (
image_scanner& scanner
)
{
typename image_scanner::feature_extractor_type fe;
fe.copy_configuration(scanner.get_feature_extractor());
fe.use_relative_feature_weights();
scanner.copy_configuration(fe);
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// stuff for scan_image_pyramid
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename image_array,
typename pyramid,
typename feature_extractor,
template <typename fe, typename hash> class feature_image
>
void setup_hashed_features (
scan_image_pyramid<pyramid, feature_image<feature_extractor, projection_hash> >& scanner,
const image_array& images,
const feature_extractor& fe,
int bits,
unsigned long num_samples = 200000
)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < bits && bits <= 32 &&
num_samples > 1 &&
images.size() > 0,
"\t void setup_hashed_features()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t bits: " << bits
<< "\n\t num_samples: " << num_samples
<< "\n\t images.size(): " << images.size()
);
pyramid pyr;
const random_subset_selector<typename feature_extractor::descriptor_type>& samps =
randomly_sample_image_features(images, pyr, fe, num_samples);
if (samps.size() <= 1)
throw dlib::image_hash_construction_failure("Images too small, not able to gather enough samples to make hash");
projection_hash phash = create_random_projection_hash(samps, bits);
feature_image<feature_extractor, projection_hash> hfe;
hfe.copy_configuration(scanner.get_feature_extractor());
hfe.set_hash(phash);
hfe.copy_configuration(fe);
scanner.copy_configuration(hfe);
}
// ----------------------------------------------------------------------------------------
template <
typename image_array,
typename pyramid,
typename feature_extractor,
template <typename fe, typename hash> class feature_image
>
void setup_hashed_features (
scan_image_pyramid<pyramid, feature_image<feature_extractor, projection_hash> >& scanner,
const image_array& images,
int bits,
unsigned long num_samples = 200000
)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < bits && bits <= 32 &&
num_samples > 1 &&
images.size() > 0,
"\t void setup_hashed_features()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t bits: " << bits
<< "\n\t num_samples: " << num_samples
<< "\n\t images.size(): " << images.size()
);
feature_extractor fe;
setup_hashed_features(scanner, images, fe, bits, num_samples);
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// stuff for scan_image_boxes
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename image_array,
typename feature_extractor,
template <typename fe, typename hash> class feature_image,
typename box_generator
>
void setup_hashed_features (
scan_image_boxes<feature_image<feature_extractor, projection_hash>,box_generator >& scanner,
const image_array& images,
const feature_extractor& fe,
int bits,
unsigned long num_samples = 200000
)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < bits && bits <= 32 &&
num_samples > 1 &&
images.size() > 0,
"\t void setup_hashed_features()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t bits: " << bits
<< "\n\t num_samples: " << num_samples
<< "\n\t images.size(): " << images.size()
);
pyramid_disable pyr;
const random_subset_selector<typename feature_extractor::descriptor_type>& samps =
randomly_sample_image_features(images, pyr, fe, num_samples);
if (samps.size() <= 1)
throw dlib::image_hash_construction_failure("Images too small, not able to gather enough samples to make hash");
projection_hash phash = create_random_projection_hash(samps, bits);
feature_image<feature_extractor, projection_hash> hfe;
hfe.copy_configuration(scanner.get_feature_extractor());
hfe.set_hash(phash);
hfe.copy_configuration(fe);
scanner.copy_configuration(hfe);
}
// ----------------------------------------------------------------------------------------
template <
typename image_array,
typename feature_extractor,
template <typename fe, typename hash> class feature_image,
typename box_generator
>
void setup_hashed_features (
scan_image_boxes<feature_image<feature_extractor, projection_hash>,box_generator>& scanner,
const image_array& images,
int bits,
unsigned long num_samples = 200000
)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < bits && bits <= 32 &&
num_samples > 1 &&
images.size() > 0,
"\t void setup_hashed_features()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t bits: " << bits
<< "\n\t num_samples: " << num_samples
<< "\n\t images.size(): " << images.size()
);
feature_extractor fe;
setup_hashed_features(scanner, images, fe, bits, num_samples);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SETUP_HAShED_FEATURES_Hh_

View File

@@ -0,0 +1,210 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SETUP_HAShED_FEATURES_ABSTRACT_Hh_
#ifdef DLIB_SETUP_HAShED_FEATURES_ABSTRACT_Hh_
#include "scan_image_pyramid_abstract.h"
#include "scan_image_boxes_abstract.h"
#include "../lsh/projection_hash_abstract.h"
#include "../image_keypoint/hashed_feature_image_abstract.h"
#include "../image_keypoint/binned_vector_feature_image_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class image_hash_construction_failure : public error
{
/*!
WHAT THIS OBJECT REPRESENTS
This is the exception object used by the routines in this file.
!*/
};
// ----------------------------------------------------------------------------------------
template <
typename image_scanner
>
void use_uniform_feature_weights (
image_scanner& scanner
);
/*!
requires
- image_scanner should be either scan_image_pyramid or scan_image_boxes and
should use the hashed_feature_image as its local feature extractor.
ensures
- #scanner.get_feature_extractor().uses_uniform_feature_weights() == true
(i.e. Make the scanner's feature extractor use the uniform feature weighting
scheme)
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_scanner
>
void use_relative_feature_weights (
image_scanner& scanner
);
/*!
requires
- image_scanner should be either scan_image_pyramid or scan_image_boxes and
should use the hashed_feature_image as its local feature extractor.
ensures
- #scanner.get_feature_extractor().uses_uniform_feature_weights() == false
(i.e. Make the scanner's feature extractor use the relative feature weighting
scheme)
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_array,
typename pyramid,
typename feature_extractor
template <typename fe, typename hash> class feature_image
>
void setup_hashed_features (
scan_image_pyramid<pyramid, feature_image<feature_extractor, projection_hash> >& scanner,
const image_array& images,
const feature_extractor& fe,
int bits,
unsigned long num_samples = 200000
);
/*!
requires
- 0 < bits <= 32
- num_samples > 1
- images.size() > 0
- it must be valid to pass images[0] into scanner.load().
(also, image_array must be an implementation of dlib/array/array_kernel_abstract.h)
- feature_image == must be either hashed_feature_image, binned_vector_feature_image,
or a type with a compatible interface.
ensures
- Creates a projection_hash suitable for hashing the feature vectors produced by
fe and then configures scanner to use this hash function.
- The hash function will map vectors into integers in the range [0, pow(2,bits))
- The hash function will be setup so that it hashes a random sample of num_samples
vectors from fe such that each bin ends up with roughly the same number of
elements in it.
throws
- image_hash_construction_failure
This exception is thrown if there is a problem creating the projection_hash.
This should only happen the images are so small they contain less than 2
feature vectors.
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_array,
typename pyramid,
typename feature_extractor
template <typename fe, typename hash> class feature_image
>
void setup_hashed_features (
scan_image_pyramid<pyramid, feature_image<feature_extractor, projection_hash> >& scanner,
const image_array& images,
int bits,
unsigned long num_samples = 200000
);
/*!
requires
- 0 < bits <= 32
- num_samples > 1
- images.size() > 0
- it must be valid to pass images[0] into scanner.load().
(also, image_array must be an implementation of dlib/array/array_kernel_abstract.h)
- feature_image == must be either hashed_feature_image, binned_vector_feature_image,
or a type with a compatible interface.
ensures
- performs: setup_hashed_features(scanner, images, feature_extractor(), bits, num_samples)
throws
- image_hash_construction_failure
This exception is thrown if there is a problem creating the projection_hash.
This should only happen the images are so small they contain less than 2
feature vectors.
!*/
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename image_array,
typename feature_extractor,
template <typename fe, typename hash> class feature_image
typename box_generator
>
void setup_hashed_features (
scan_image_boxes<feature_image<feature_extractor, projection_hash>,box_generator>& scanner,
const image_array& images,
const feature_extractor& fe,
int bits,
unsigned long num_samples = 200000
);
/*!
requires
- 0 < bits <= 32
- num_samples > 1
- images.size() > 0
- it must be valid to pass images[0] into scanner.load().
(also, image_array must be an implementation of dlib/array/array_kernel_abstract.h)
- feature_image == must be either hashed_feature_image, binned_vector_feature_image,
or a type with a compatible interface.
ensures
- Creates a projection_hash suitable for hashing the feature vectors produced by
fe and then configures scanner to use this hash function.
- The hash function will map vectors into integers in the range [0, pow(2,bits))
- The hash function will be setup so that it hashes a random sample of num_samples
vectors from fe such that each bin ends up with roughly the same number of
elements in it.
throws
- image_hash_construction_failure
This exception is thrown if there is a problem creating the projection_hash.
This should only happen the images are so small they contain less than 2
feature vectors.
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_array,
typename feature_extractor,
template <typename fe, typename hash> class feature_image
typename box_generator
>
void setup_hashed_features (
scan_image_boxes<feature_image<feature_extractor, projection_hash>,box_generator>& scanner,
const image_array& images,
int bits,
unsigned long num_samples = 200000
);
/*!
requires
- 0 < bits <= 32
- num_samples > 1
- images.size() > 0
- it must be valid to pass images[0] into scanner.load().
(also, image_array must be an implementation of dlib/array/array_kernel_abstract.h)
- feature_image == must be either hashed_feature_image, binned_vector_feature_image,
or a type with a compatible interface.
ensures
- performs: setup_hashed_features(scanner, images, feature_extractor(), bits, num_samples)
throws
- image_hash_construction_failure
This exception is thrown if there is a problem creating the projection_hash.
This should only happen the images are so small they contain less than 2
feature vectors.
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SETUP_HAShED_FEATURES_ABSTRACT_Hh_

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,442 @@
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SHAPE_PREDICToR_ABSTRACT_H_
#ifdef DLIB_SHAPE_PREDICToR_ABSTRACT_H_
#include "full_object_detection_abstract.h"
#include "../matrix.h"
#include "../geometry.h"
#include "../pixel.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class shape_predictor
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is a tool that takes in an image region containing some object
and outputs a set of point locations that define the pose of the object.
The classic example of this is human face pose prediction, where you take
an image of a human face as input and are expected to identify the
locations of important facial landmarks such as the corners of the mouth
and eyes, tip of the nose, and so forth.
To create useful instantiations of this object you need to use the
shape_predictor_trainer object defined below to train a shape_predictor
using a set of training images, each annotated with shapes you want to
predict.
!*/
public:
shape_predictor (
);
/*!
ensures
- #num_parts() == 0
!*/
unsigned long num_parts (
) const;
/*!
ensures
- returns the number of parts in the shapes predicted by this object.
!*/
template <typename image_type>
full_object_detection operator()(
const image_type& img,
const rectangle& rect
) const;
/*!
requires
- image_type == an image object that implements the interface defined in
dlib/image_processing/generic_image.h
ensures
- Runs the shape prediction algorithm on the part of the image contained in
the given bounding rectangle. So it will try and fit the shape model to
the contents of the given rectangle in the image. For example, if there
is a human face inside the rectangle and you use a face landmarking shape
model then this function will return the locations of the face landmarks
as the parts. So the return value is a full_object_detection DET such
that:
- DET.get_rect() == rect
- DET.num_parts() == num_parts()
- for all valid i:
- DET.part(i) == the location in img for the i-th part of the shape
predicted by this object.
!*/
};
void serialize (const shape_predictor& item, std::ostream& out);
void deserialize (shape_predictor& item, std::istream& in);
/*!
provides serialization support
!*/
// ----------------------------------------------------------------------------------------
class shape_predictor_trainer
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is a tool for training shape_predictors based on annotated training
images. Its implementation uses the algorithm described in:
One Millisecond Face Alignment with an Ensemble of Regression Trees
by Vahid Kazemi and Josephine Sullivan, CVPR 2014
!*/
public:
shape_predictor_trainer (
);
/*!
ensures
- #get_cascade_depth() == 10
- #get_tree_depth() == 4
- #get_num_trees_per_cascade_level() == 500
- #get_nu() == 0.1
- #get_oversampling_amount() == 20
- #get_feature_pool_size() == 400
- #get_lambda() == 0.1
- #get_num_test_splits() == 20
- #get_feature_pool_region_padding() == 0
- #get_random_seed() == ""
- This object will not be verbose
!*/
unsigned long get_cascade_depth (
) const;
/*!
ensures
- returns the number of cascades created when you train a model. This
means that the total number of trees in the learned model is equal to
get_cascade_depth()*get_num_trees_per_cascade_level().
!*/
void set_cascade_depth (
unsigned long depth
);
/*!
requires
- depth > 0
ensures
- #get_cascade_depth() == depth
!*/
unsigned long get_tree_depth (
) const;
/*!
ensures
- returns the depth of the trees used in the cascade. In particular, there
are pow(2,get_tree_depth()) leaves in each tree.
!*/
void set_tree_depth (
unsigned long depth
);
/*!
requires
- depth > 0
ensures
- #get_tree_depth() == depth
!*/
unsigned long get_num_trees_per_cascade_level (
) const;
/*!
ensures
- returns the number of trees created for each cascade. This means that
the total number of trees in the learned model is equal to
get_cascade_depth()*get_num_trees_per_cascade_level().
!*/
void set_num_trees_per_cascade_level (
unsigned long num
);
/*!
requires
- num > 0
ensures
- #get_num_trees_per_cascade_level() == num
!*/
double get_nu (
) const;
/*!
ensures
- returns the regularization parameter. Larger values of this parameter
will cause the algorithm to fit the training data better but may also
cause overfitting.
!*/
void set_nu (
double nu
);
/*!
requires
- nu > 0
ensures
- #get_nu() == nu
!*/
std::string get_random_seed (
) const;
/*!
ensures
- returns the random seed used by the internal random number generator.
Since this algorithm is a random forest style algorithm it relies on a
random number generator for generating the trees. So each setting of the
random seed will produce slightly different outputs.
!*/
void set_random_seed (
const std::string& seed
);
/*!
ensures
- #get_random_seed() == seed
!*/
unsigned long get_oversampling_amount (
) const;
/*!
ensures
- You give annotated images to this object as training examples. You
can effectively increase the amount of training data by adding in each
training example multiple times but with a randomly selected deformation
applied to it. That is what this parameter controls. That is, if you
supply N training samples to train() then the algorithm runs internally
with N*get_oversampling_amount() training samples. So the bigger this
parameter the better (excepting that larger values make training take
longer). In terms of the Kazemi paper, this parameter is the number of
randomly selected initial starting points sampled for each training
example.
!*/
void set_oversampling_amount (
unsigned long amount
);
/*!
requires
- amount > 0
ensures
- #get_oversampling_amount() == amount
!*/
unsigned long get_feature_pool_size (
) const;
/*!
ensures
- At each level of the cascade we randomly sample get_feature_pool_size()
pixels from the image. These pixels are used to generate features for
the random trees. So in general larger settings of this parameter give
better accuracy but make the algorithm run slower.
!*/
void set_feature_pool_size (
unsigned long size
);
/*!
requires
- size > 1
ensures
- #get_feature_pool_size() == size
!*/
double get_feature_pool_region_padding (
) const;
/*!
ensures
- When we randomly sample the pixels for the feature pool we do so in a box
fit around the provided training landmarks. By default, this box is the
tightest box that contains the landmarks (i.e. this is what happens when
get_feature_pool_region_padding()==0). However, you can expand or shrink
the size of the pixel sampling region by setting a different value of
get_feature_pool_region_padding().
To explain this precisely, for a padding of 0 we say that the pixels are
sampled from a box of size 1x1. The padding value is added to each side
of the box. So a padding of 0.5 would cause the algorithm to sample
pixels from a box that was 2x2, effectively multiplying the area pixels
are sampled from by 4. Similarly, setting the padding to -0.2 would
cause it to sample from a box 0.8x0.8 in size.
!*/
void set_feature_pool_region_padding (
double padding
);
/*!
ensures
- #get_feature_pool_region_padding() == padding
!*/
double get_lambda (
) const;
/*!
ensures
- To decide how to split nodes in the regression trees the algorithm looks
at pairs of pixels in the image. These pixel pairs are sampled randomly
but with a preference for selecting pixels that are near each other.
get_lambda() controls this "nearness" preference. In particular, smaller
values of get_lambda() will make the algorithm prefer to select pixels
close together and larger values of get_lambda() will make it care less
about picking nearby pixel pairs.
Note that this is the inverse of how it is defined in the Kazemi paper.
For this object, you should think of lambda as "the fraction of the
bounding box will we traverse to find a neighboring pixel". Nominally,
this is normalized between 0 and 1. So reasonable settings of lambda are
values in the range 0 < lambda < 1.
!*/
void set_lambda (
double lambda
);
/*!
requires
- lambda > 0
ensures
- #get_lambda() == lambda
!*/
unsigned long get_num_test_splits (
) const;
/*!
ensures
- When generating the random trees we randomly sample get_num_test_splits()
possible split features at each node and pick the one that gives the best
split. Larger values of this parameter will usually give more accurate
outputs but take longer to train.
!*/
void set_num_test_splits (
unsigned long num
);
/*!
requires
- num > 0
ensures
- #get_num_test_splits() == num
!*/
void be_verbose (
);
/*!
ensures
- This object will print status messages to standard out so that a
user can observe the progress of the algorithm.
!*/
void be_quiet (
);
/*!
ensures
- This object will not print anything to standard out
!*/
template <typename image_array>
shape_predictor train (
const image_array& images,
const std::vector<std::vector<full_object_detection> >& objects
) const;
/*!
requires
- image_array is a dlib::array of image objects where each image object
implements the interface defined in dlib/image_processing/generic_image.h
- images.size() == objects.size()
- images.size() > 0
- for some i: objects[i].size() != 0
(i.e. there has to be at least one full_object_detection in the training set)
- for all valid i,j,k,l:
- objects[i][j].num_parts() == objects[k][l].num_parts()
(i.e. all objects must agree on the number of parts)
- objects[i][j].num_parts() > 0
ensures
- This object will try to learn to predict the locations of an object's parts
based on the object bounding box (i.e. full_object_detection::get_rect())
and the image pixels in that box. That is, we will try to learn a
shape_predictor, SP, such that:
SP(images[i], objects[i][j].get_rect()) == objects[i][j]
This learned SP object is then returned.
!*/
};
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename image_array
>
double test_shape_predictor (
const shape_predictor& sp,
const image_array& images,
const std::vector<std::vector<full_object_detection> >& objects,
const std::vector<std::vector<double> >& scales
);
/*!
requires
- image_array is a dlib::array of image objects where each image object
implements the interface defined in dlib/image_processing/generic_image.h
- images.size() == objects.size()
- for all valid i and j:
- objects[i][j].num_parts() == sp.num_parts()
- if (scales.size() != 0) then
- There must be a scale value for each full_object_detection in objects.
That is, it must be the case that:
- scales.size() == objects.size()
- for all valid i:
- scales[i].size() == objects[i].size()
ensures
- Tests the given shape_predictor by running it on each of the given objects and
checking how well it recovers the part positions. In particular, for all
valid i and j we perform:
sp(images[i], objects[i][j].get_rect())
and compare the result with the truth part positions in objects[i][j]. We
then return the average distance (measured in pixels) between a predicted
part location and its true position.
- if (scales.size() != 0) then
- Each time we compute the distance between a predicted part location and
its true location in objects[i][j] we divide the distance by
scales[i][j]. Therefore, if you want the reported error to be the
average pixel distance then give an empty scales vector, but if you want
the returned value to be something else like the average distance
normalized by some feature of each object (e.g. the interocular distance)
then you can supply those normalizing values via scales.
!*/
template <
typename image_array
>
double test_shape_predictor (
const shape_predictor& sp,
const image_array& images,
const std::vector<std::vector<full_object_detection> >& objects
);
/*!
requires
- image_array is a dlib::array of image objects where each image object
implements the interface defined in dlib/image_processing/generic_image.h
- images.size() == objects.size()
- for all valid i and j:
- objects[i][j].num_parts() == sp.num_parts()
ensures
- returns test_shape_predictor(sp, images, objects, no_scales) where no_scales
is an empty vector. So this is just a convenience function for calling the
above test_shape_predictor() routine without a scales argument.
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SHAPE_PREDICToR_ABSTRACT_H_