mirror of
https://gitcode.com/gh_mirrors/ope/OpenFace.git
synced 2026-05-17 20:58:01 +00:00
Master commit of OpenFace.
This commit is contained in:
168
lib/3rdParty/dlib/include/dlib/image_processing/box_overlap_testing.h
vendored
Normal file
168
lib/3rdParty/dlib/include/dlib/image_processing/box_overlap_testing.h
vendored
Normal file
@@ -0,0 +1,168 @@
|
||||
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_BOX_OVERlAP_TESTING_Hh_
|
||||
#define DLIB_BOX_OVERlAP_TESTING_Hh_
|
||||
|
||||
#include "box_overlap_testing_abstract.h"
|
||||
#include "../geometry.h"
|
||||
#include <vector>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class test_box_overlap
|
||||
{
|
||||
public:
|
||||
test_box_overlap (
|
||||
) : match_thresh(0.5), overlap_thresh(1.0)
|
||||
{}
|
||||
|
||||
explicit test_box_overlap (
|
||||
double match_thresh_,
|
||||
double overlap_thresh_ = 1.0
|
||||
) : match_thresh(match_thresh_), overlap_thresh(overlap_thresh_)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(0 <= match_thresh && match_thresh <= 1 &&
|
||||
0 <= overlap_thresh && overlap_thresh <= 1,
|
||||
"\t test_box_overlap::test_box_overlap(match_thresh, overlap_thresh)"
|
||||
<< "\n\t Invalid inputs were given to this function "
|
||||
<< "\n\t match_thresh: " << match_thresh
|
||||
<< "\n\t overlap_thresh: " << overlap_thresh
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
bool operator() (
|
||||
const dlib::rectangle& a,
|
||||
const dlib::rectangle& b
|
||||
) const
|
||||
{
|
||||
const double inner = a.intersect(b).area();
|
||||
if (inner == 0)
|
||||
return false;
|
||||
|
||||
const double outer = (a+b).area();
|
||||
if (inner/outer > match_thresh ||
|
||||
inner/a.area() > overlap_thresh ||
|
||||
inner/b.area() > overlap_thresh)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
double get_overlap_thresh (
|
||||
) const
|
||||
{
|
||||
return overlap_thresh;
|
||||
}
|
||||
|
||||
double get_match_thresh (
|
||||
) const
|
||||
{
|
||||
return match_thresh;
|
||||
}
|
||||
|
||||
public:
|
||||
double match_thresh;
|
||||
double overlap_thresh;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline void serialize (
|
||||
const test_box_overlap& item,
|
||||
std::ostream& out
|
||||
)
|
||||
{
|
||||
serialize(item.get_match_thresh(), out);
|
||||
serialize(item.get_overlap_thresh(), out);
|
||||
}
|
||||
|
||||
inline void deserialize (
|
||||
test_box_overlap& item,
|
||||
std::istream& in
|
||||
)
|
||||
{
|
||||
double overlap_thresh, match_thresh;
|
||||
deserialize(match_thresh, in);
|
||||
deserialize(overlap_thresh, in);
|
||||
item = test_box_overlap(match_thresh, overlap_thresh);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline test_box_overlap find_tight_overlap_tester (
|
||||
const std::vector<std::vector<rectangle> >& rects
|
||||
)
|
||||
{
|
||||
double max_overlap = 0;
|
||||
double max_match_score = 0;
|
||||
for (unsigned long i = 0; i < rects.size(); ++i)
|
||||
{
|
||||
for (unsigned long j = 0; j < rects[i].size(); ++j)
|
||||
{
|
||||
for (unsigned long k = j+1; k < rects[i].size(); ++k)
|
||||
{
|
||||
const rectangle a = rects[i][j];
|
||||
const rectangle b = rects[i][k];
|
||||
const double match_score = (a.intersect(b)).area()/(double)(a+b).area();
|
||||
const double overlap_a = (a.intersect(b)).area()/(double)(a).area();
|
||||
const double overlap_b = (a.intersect(b)).area()/(double)(b).area();
|
||||
|
||||
if (match_score > max_match_score)
|
||||
max_match_score = match_score;
|
||||
|
||||
if (overlap_a > max_overlap)
|
||||
max_overlap = overlap_a;
|
||||
if (overlap_b > max_overlap)
|
||||
max_overlap = overlap_b;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Relax these thresholds very slightly. We do this because on some systems the
|
||||
// boxes that generated the max values erroneously trigger a box overlap match
|
||||
// even though their overlap and match values are *equal* to the thresholds but not
|
||||
// greater. That is, sometimes when double values get moved around they change
|
||||
// their values slightly, so this avoids the problems that can create.
|
||||
max_match_score = std::min(1.0000001*max_match_score, 1.0);
|
||||
max_overlap = std::min(1.0000001*max_overlap, 1.0);
|
||||
return test_box_overlap(max_match_score, max_overlap);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline bool overlaps_any_box (
|
||||
const test_box_overlap& tester,
|
||||
const std::vector<rectangle>& rects,
|
||||
const rectangle& rect
|
||||
)
|
||||
{
|
||||
for (unsigned long i = 0; i < rects.size(); ++i)
|
||||
{
|
||||
if (tester(rects[i],rect))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline bool overlaps_any_box (
|
||||
const std::vector<rectangle>& rects,
|
||||
const rectangle& rect
|
||||
)
|
||||
{
|
||||
return overlaps_any_box(test_box_overlap(),rects,rect);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_BOX_OVERlAP_TESTING_Hh_
|
||||
|
||||
150
lib/3rdParty/dlib/include/dlib/image_processing/box_overlap_testing_abstract.h
vendored
Normal file
150
lib/3rdParty/dlib/include/dlib/image_processing/box_overlap_testing_abstract.h
vendored
Normal file
@@ -0,0 +1,150 @@
|
||||
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_BOX_OVERlAP_TESTING_ABSTRACT_Hh_
|
||||
#ifdef DLIB_BOX_OVERlAP_TESTING_ABSTRACT_Hh_
|
||||
|
||||
#include "../geometry.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class test_box_overlap
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a simple function object for determining if two rectangles
|
||||
overlap.
|
||||
|
||||
THREAD SAFETY
|
||||
Concurrent access to an instance of this object is safe provided that
|
||||
only const member functions are invoked. Otherwise, access must be
|
||||
protected by a mutex lock.
|
||||
!*/
|
||||
|
||||
public:
|
||||
test_box_overlap (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_match_thresh() == 0.5
|
||||
- #get_overlap_thresh() == 1.0
|
||||
!*/
|
||||
|
||||
explicit test_box_overlap (
|
||||
double match_thresh,
|
||||
double overlap_thresh = 1.0
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- 0 <= match_thresh <= 1
|
||||
- 0 <= overlap_thresh <= 1
|
||||
ensures
|
||||
- #get_match_thresh() == match_thresh
|
||||
- #get_overlap_thresh() == overlap_thresh
|
||||
!*/
|
||||
|
||||
bool operator() (
|
||||
const dlib::rectangle& a,
|
||||
const dlib::rectangle& b
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns true if a and b overlap "enough". This is defined precisely below.
|
||||
- if (a.intersect(b).area()/(a+b).area() > get_match_thresh() ||
|
||||
a.intersect(b).area()/a.area() > get_overlap_thresh() ||
|
||||
a.intersect(b).area()/b.area() > get_overlap_thresh() ) then
|
||||
- returns true
|
||||
- else
|
||||
- returns false
|
||||
!*/
|
||||
|
||||
double get_match_thresh (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the threshold used to determine if two rectangles match.
|
||||
Note that the match score varies from 0 to 1 and only becomes 1
|
||||
when two rectangles are identical.
|
||||
|
||||
!*/
|
||||
|
||||
double get_overlap_thresh (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the threshold used to determine if two rectangles overlap. This
|
||||
value is the percent of a rectangle's area covered by another rectangle.
|
||||
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void serialize (
|
||||
const test_box_overlap& item,
|
||||
std::ostream& out
|
||||
);
|
||||
/*!
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
void deserialize (
|
||||
test_box_overlap& item,
|
||||
std::istream& in
|
||||
);
|
||||
/*!
|
||||
provides deserialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
test_box_overlap find_tight_overlap_tester (
|
||||
const std::vector<std::vector<rectangle> >& rects
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- This function finds the most restrictive test_box_overlap object possible
|
||||
that is consistent with the given set of sets of rectangles.
|
||||
- To be precise, this function finds and returns a test_box_overlap object
|
||||
TBO such that:
|
||||
- TBO.get_match_thresh() and TBO.get_overlap_thresh() are as small
|
||||
as possible such that the following conditions are satisfied.
|
||||
- for all valid i:
|
||||
- for all distinct rectangles A and B in rects[i]:
|
||||
- TBO(A,B) == false
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
bool overlaps_any_box (
|
||||
const test_box_overlap& tester,
|
||||
const std::vector<rectangle>& rects,
|
||||
const rectangle& rect
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- returns true if rect overlaps any box in rects and false otherwise. Overlap
|
||||
is determined based on the given tester object.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
bool overlaps_any_box (
|
||||
const std::vector<rectangle>& rects,
|
||||
const rectangle& rect
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- returns overlaps_any_box(test_box_overlap(), rects, rect)
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_BOX_OVERlAP_TESTING_ABSTRACT_Hh_
|
||||
|
||||
|
||||
113
lib/3rdParty/dlib/include/dlib/image_processing/detection_template_tools.h
vendored
Normal file
113
lib/3rdParty/dlib/include/dlib/image_processing/detection_template_tools.h
vendored
Normal file
@@ -0,0 +1,113 @@
|
||||
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_DETECTION_TEMPlATE_TOOLS_Hh_
|
||||
#define DLIB_DETECTION_TEMPlATE_TOOLS_Hh_
|
||||
|
||||
#include "detection_template_tools_abstract.h"
|
||||
#include "../geometry.h"
|
||||
#include "../matrix.h"
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline rectangle compute_box_dimensions (
|
||||
const double width_to_height_ratio,
|
||||
const double area
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(width_to_height_ratio > 0 && area > 0,
|
||||
"\t rectangle compute_box_dimensions()"
|
||||
<< "\n\t Invalid arguments were given to this function. "
|
||||
<< "\n\t width_to_height_ratio: " << width_to_height_ratio
|
||||
<< "\n\t area: " << area
|
||||
);
|
||||
|
||||
/*
|
||||
width*height == area
|
||||
width/height == width_to_height_ratio
|
||||
*/
|
||||
using namespace std;
|
||||
|
||||
const int height = (int)std::floor(std::sqrt(area/width_to_height_ratio) + 0.5);
|
||||
const int width = (int)std::floor(area/height + 0.5);
|
||||
|
||||
return centered_rect(0,0,width,height);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline std::vector<rectangle> create_single_box_detection_template (
|
||||
const rectangle& object_box
|
||||
)
|
||||
{
|
||||
std::vector<rectangle> temp;
|
||||
temp.push_back(object_box);
|
||||
return temp;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline std::vector<rectangle> create_overlapped_2x2_detection_template (
|
||||
const rectangle& object_box
|
||||
)
|
||||
{
|
||||
std::vector<rectangle> result;
|
||||
|
||||
const point c = center(object_box);
|
||||
|
||||
result.push_back(rectangle() + c + object_box.tl_corner() + object_box.tr_corner());
|
||||
result.push_back(rectangle() + c + object_box.bl_corner() + object_box.br_corner());
|
||||
result.push_back(rectangle() + c + object_box.tl_corner() + object_box.bl_corner());
|
||||
result.push_back(rectangle() + c + object_box.tr_corner() + object_box.br_corner());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline std::vector<rectangle> create_grid_detection_template (
|
||||
const rectangle& object_box,
|
||||
unsigned int cells_x,
|
||||
unsigned int cells_y
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(cells_x > 0 && cells_y > 0,
|
||||
"\t std::vector<rectangle> create_grid_detection_template()"
|
||||
<< "\n\t The number of cells along a dimension can't be zero. "
|
||||
<< "\n\t cells_x: " << cells_x
|
||||
<< "\n\t cells_y: " << cells_y
|
||||
);
|
||||
|
||||
std::vector<rectangle> result;
|
||||
|
||||
const matrix<double,1> x = linspace(object_box.left(), object_box.right(), cells_x+1);
|
||||
const matrix<double,1> y = linspace(object_box.top(), object_box.bottom(), cells_y+1);
|
||||
|
||||
for (long j = 0; j+1 < y.size(); ++j)
|
||||
{
|
||||
for (long i = 0; i+1 < x.size(); ++i)
|
||||
{
|
||||
const dlib::vector<double,2> tl(x(i),y(j));
|
||||
const dlib::vector<double,2> br(x(i+1),y(j+1));
|
||||
result.push_back(rectangle(tl,br));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif // DLIB_DETECTION_TEMPlATE_TOOLS_Hh_
|
||||
|
||||
|
||||
95
lib/3rdParty/dlib/include/dlib/image_processing/detection_template_tools_abstract.h
vendored
Normal file
95
lib/3rdParty/dlib/include/dlib/image_processing/detection_template_tools_abstract.h
vendored
Normal file
@@ -0,0 +1,95 @@
|
||||
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_DETECTION_TEMPlATE_TOOLS_ABSTRACT_Hh_
|
||||
#ifdef DLIB_DETECTION_TEMPlATE_TOOLS_ABSTRACT_Hh_
|
||||
|
||||
#include "../geometry.h"
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
rectangle compute_box_dimensions (
|
||||
const double width_to_height_ratio,
|
||||
const double area
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- area > 0
|
||||
- width_to_height_ratio > 0
|
||||
ensures
|
||||
- returns a rectangle with the given area and width_to_height_ratio.
|
||||
- In particular, returns a rectangle R such that:
|
||||
- R.area() == area (to within integer precision)
|
||||
- R.width()/R.height() == width_to_height_ratio (to within integer precision)
|
||||
- center(R) == point(0,0)
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
std::vector<rectangle> create_single_box_detection_template (
|
||||
const rectangle& object_box
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- returns a vector that contains only object_box.
|
||||
- In particular, returns a vector V such that:
|
||||
- V.size() == 1
|
||||
- V[0] == object_box
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
std::vector<rectangle> create_overlapped_2x2_detection_template (
|
||||
const rectangle& object_box
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- Divides object_box up into four overlapping regions, the
|
||||
top half, bottom half, left half, and right half. These
|
||||
four rectangles are returned inside a std::vector.
|
||||
- In particular, returns a vector V such that:
|
||||
- V.size() == 4
|
||||
- V[0] == top half of object_box
|
||||
- V[1] == bottom half of object_box
|
||||
- V[2] == left half of object_box
|
||||
- V[3] == right half of object_box
|
||||
- for all valid i: object_box.contains(V[i]) == true
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
std::vector<rectangle> create_grid_detection_template (
|
||||
const rectangle& object_box,
|
||||
unsigned int cells_x,
|
||||
unsigned int cells_y
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- cells_x > 0
|
||||
- cells_y > 0
|
||||
ensures
|
||||
- Divides object_box up into a grid and returns a vector
|
||||
containing all the rectangles corresponding to elements
|
||||
of the grid. Moreover, the grid will be cells_x elements
|
||||
wide and cells_y elements tall.
|
||||
- In particular, returns a vector V such that:
|
||||
- V.size() == cells_x*cells_y
|
||||
- for all valid i:
|
||||
- object_box.contains(V[i]) == true
|
||||
- V[i] == The rectangle corresponding to the ith grid
|
||||
element.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif // DLIB_DETECTION_TEMPlATE_TOOLS_ABSTRACT_Hh_
|
||||
|
||||
|
||||
|
||||
2376
lib/3rdParty/dlib/include/dlib/image_processing/frontal_face_detector.h
vendored
Normal file
2376
lib/3rdParty/dlib/include/dlib/image_processing/frontal_face_detector.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
25
lib/3rdParty/dlib/include/dlib/image_processing/frontal_face_detector_abstract.h
vendored
Normal file
25
lib/3rdParty/dlib/include/dlib/image_processing/frontal_face_detector_abstract.h
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_FRONTAL_FACE_DETECTOr_ABSTRACT_Hh_
|
||||
#ifdef DLIB_FRONTAL_FACE_DETECTOr_ABSTRACT_Hh_
|
||||
|
||||
#include "object_detector_abstract.h"
|
||||
#include "scan_fhog_pyramid_abstract.h"
|
||||
#include "../image_transforms/image_pyramid_abstract.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
typedef object_detector<scan_fhog_pyramid<pyramid_down<6> > > frontal_face_detector;
|
||||
|
||||
frontal_face_detector get_frontal_face_detector(
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- returns an object_detector that is configured to find human faces that are
|
||||
looking more or less towards the camera.
|
||||
!*/
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_FRONTAL_FACE_DETECTOr_ABSTRACT_Hh_
|
||||
|
||||
103
lib/3rdParty/dlib/include/dlib/image_processing/full_object_detection.h
vendored
Normal file
103
lib/3rdParty/dlib/include/dlib/image_processing/full_object_detection.h
vendored
Normal file
@@ -0,0 +1,103 @@
|
||||
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_FULL_OBJECT_DeTECTION_Hh_
|
||||
#define DLIB_FULL_OBJECT_DeTECTION_Hh_
|
||||
|
||||
#include "../geometry.h"
|
||||
#include "full_object_detection_abstract.h"
|
||||
#include <vector>
|
||||
#include "../serialize.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
const static point OBJECT_PART_NOT_PRESENT(0x7FFFFFFF,
|
||||
0x7FFFFFFF);
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class full_object_detection
|
||||
{
|
||||
public:
|
||||
full_object_detection(
|
||||
const rectangle& rect_,
|
||||
const std::vector<point>& parts_
|
||||
) : rect(rect_), parts(parts_) {}
|
||||
|
||||
full_object_detection(){}
|
||||
|
||||
explicit full_object_detection(
|
||||
const rectangle& rect_
|
||||
) : rect(rect_) {}
|
||||
|
||||
const rectangle& get_rect() const { return rect; }
|
||||
unsigned long num_parts() const { return parts.size(); }
|
||||
|
||||
const point& part(
|
||||
unsigned long idx
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(idx < num_parts(),
|
||||
"\t point full_object_detection::part()"
|
||||
<< "\n\t Invalid inputs were given to this function "
|
||||
<< "\n\t idx: " << idx
|
||||
<< "\n\t num_parts(): " << num_parts()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
return parts[idx];
|
||||
}
|
||||
|
||||
friend void serialize (
|
||||
const full_object_detection& item,
|
||||
std::ostream& out
|
||||
)
|
||||
{
|
||||
int version = 1;
|
||||
serialize(version, out);
|
||||
serialize(item.rect, out);
|
||||
serialize(item.parts, out);
|
||||
}
|
||||
|
||||
friend void deserialize (
|
||||
full_object_detection& item,
|
||||
std::istream& in
|
||||
)
|
||||
{
|
||||
int version = 0;
|
||||
deserialize(version, in);
|
||||
if (version != 1)
|
||||
throw serialization_error("Unexpected version encountered while deserializing dlib::full_object_detection.");
|
||||
|
||||
deserialize(item.rect, in);
|
||||
deserialize(item.parts, in);
|
||||
}
|
||||
|
||||
private:
|
||||
rectangle rect;
|
||||
std::vector<point> parts;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline bool all_parts_in_rect (
|
||||
const full_object_detection& obj
|
||||
)
|
||||
{
|
||||
for (unsigned long i = 0; i < obj.num_parts(); ++i)
|
||||
{
|
||||
if (obj.get_rect().contains(obj.part(i)) == false &&
|
||||
obj.part(i) != OBJECT_PART_NOT_PRESENT)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_FULL_OBJECT_DeTECTION_H_
|
||||
|
||||
125
lib/3rdParty/dlib/include/dlib/image_processing/full_object_detection_abstract.h
vendored
Normal file
125
lib/3rdParty/dlib/include/dlib/image_processing/full_object_detection_abstract.h
vendored
Normal file
@@ -0,0 +1,125 @@
|
||||
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_FULL_OBJECT_DeTECTION_ABSTRACT_Hh_
|
||||
#ifdef DLIB_FULL_OBJECT_DeTECTION_ABSTRACT_Hh_
|
||||
|
||||
#include <vector>
|
||||
#include "../geometry.h"
|
||||
#include "../serialize.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
const static point OBJECT_PART_NOT_PRESENT(0x7FFFFFFF,
|
||||
0x7FFFFFFF);
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class full_object_detection
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object represents the location of an object in an image along with the
|
||||
positions of each of its constituent parts.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
full_object_detection(
|
||||
const rectangle& rect,
|
||||
const std::vector<point>& parts
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_rect() == rect
|
||||
- #num_parts() == parts.size()
|
||||
- for all valid i:
|
||||
- part(i) == parts[i]
|
||||
!*/
|
||||
|
||||
full_object_detection(
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_rect().is_empty() == true
|
||||
- #num_parts() == 0
|
||||
!*/
|
||||
|
||||
explicit full_object_detection(
|
||||
const rectangle& rect
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_rect() == rect
|
||||
- #num_parts() == 0
|
||||
!*/
|
||||
|
||||
const rectangle& get_rect(
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the rectangle that indicates where this object is. In general,
|
||||
this should be the bounding box for the object.
|
||||
!*/
|
||||
|
||||
unsigned long num_parts(
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of parts in this object.
|
||||
!*/
|
||||
|
||||
const point& part(
|
||||
unsigned long idx
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- idx < num_parts()
|
||||
ensures
|
||||
- returns the location of the center of the idx-th part of this object.
|
||||
Note that it is valid for a part to be "not present". This is indicated
|
||||
when the return value of part() is equal to OBJECT_PART_NOT_PRESENT.
|
||||
This is useful for modeling object parts that are not always observed.
|
||||
!*/
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void serialize (
|
||||
const full_object_detection& item,
|
||||
std::ostream& out
|
||||
);
|
||||
/*!
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
void deserialize (
|
||||
full_object_detection& item,
|
||||
std::istream& in
|
||||
);
|
||||
/*!
|
||||
provides deserialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
bool all_parts_in_rect (
|
||||
const full_object_detection& obj
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- returns true if all the parts in obj are contained within obj.get_rect().
|
||||
That is, returns true if and only if, for all valid i, the following is
|
||||
always true:
|
||||
obj.get_rect().contains(obj.part(i)) == true || obj.part(i) == OBJECT_PART_NOT_PRESENT
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_FULL_OBJECT_DeTECTION_ABSTRACT_Hh_
|
||||
|
||||
|
||||
431
lib/3rdParty/dlib/include/dlib/image_processing/generic_image.h
vendored
Normal file
431
lib/3rdParty/dlib/include/dlib/image_processing/generic_image.h
vendored
Normal file
@@ -0,0 +1,431 @@
|
||||
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_GeNERIC_IMAGE_Hh_
|
||||
#define DLIB_GeNERIC_IMAGE_Hh_
|
||||
|
||||
#include "../assert.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
/*!
|
||||
In dlib, an "image" is any object that implements the generic image interface. In
|
||||
particular, this simply means that an image type (let's refer to it as image_type
|
||||
from here on) has the following seven global functions defined for it:
|
||||
- long num_rows (const image_type& img)
|
||||
- long num_columns (const image_type& img)
|
||||
- void set_image_size( image_type& img, long rows, long cols)
|
||||
- void* image_data ( image_type& img)
|
||||
- const void* image_data (const image_type& img)
|
||||
- long width_step (const image_type& img)
|
||||
- void swap ( image_type& a, image_type& b)
|
||||
And also provides a specialization of the image_traits template that looks like:
|
||||
namespace dlib
|
||||
{
|
||||
template <>
|
||||
struct image_traits<image_type>
|
||||
{
|
||||
typedef the_type_of_pixel_used_in_image_type pixel_type;
|
||||
};
|
||||
}
|
||||
|
||||
Additionally, an image object must be default constructable. This means that
|
||||
expressions of the form:
|
||||
image_type img;
|
||||
Must be legal.
|
||||
|
||||
Finally, the type of pixel in image_type must have a pixel_traits specialization.
|
||||
That is, pixel_traits<typename image_traits<image_type>::pixel_type> must be one of
|
||||
the specializations of pixel_traits.
|
||||
|
||||
|
||||
To be very precise, the seven functions defined above are defined thusly:
|
||||
|
||||
long num_rows(
|
||||
const image_type& img
|
||||
);
|
||||
/!*
|
||||
ensures
|
||||
- returns the number of rows in the given image
|
||||
*!/
|
||||
|
||||
long num_columns(
|
||||
const image_type& img
|
||||
);
|
||||
/!*
|
||||
ensures
|
||||
- returns the number of columns in the given image
|
||||
*!/
|
||||
|
||||
void set_image_size(
|
||||
image_type& img,
|
||||
long rows,
|
||||
long cols
|
||||
);
|
||||
/!*
|
||||
requires
|
||||
- rows >= 0 && cols >= 0
|
||||
ensures
|
||||
- num_rows(#img) == rows
|
||||
- num_columns(#img) == cols
|
||||
*!/
|
||||
|
||||
void* image_data(
|
||||
image_type& img
|
||||
);
|
||||
/!*
|
||||
ensures
|
||||
- returns a non-const pointer to the pixel at row and column position 0,0
|
||||
in the given image. Or if the image has zero rows or columns in it
|
||||
then this function returns NULL.
|
||||
- The image lays pixels down in row major order. However, there might
|
||||
be padding at the end of each row. The amount of padding is given by
|
||||
width_step(img).
|
||||
*!/
|
||||
|
||||
const void* image_data(
|
||||
const image_type& img
|
||||
);
|
||||
/!*
|
||||
ensures
|
||||
- returns a const pointer to the pixel at row and column position 0,0 in
|
||||
the given image. Or if the image has zero rows or columns in it then
|
||||
this function returns NULL.
|
||||
- The image lays pixels down in row major order. However, there might
|
||||
be padding at the end of each row. The amount of padding is given by
|
||||
width_step(img).
|
||||
*!/
|
||||
|
||||
long width_step(
|
||||
const image_type& img
|
||||
);
|
||||
/!*
|
||||
ensures
|
||||
- returns the size of one row of the image, in bytes. More precisely,
|
||||
return a number N such that: (char*)image_data(img) + N*R == a
|
||||
pointer to the first pixel in the R-th row of the image. This means
|
||||
that the image must lay its pixels down in row major order.
|
||||
*!/
|
||||
|
||||
void swap(
|
||||
image_type& a,
|
||||
image_type& b
|
||||
);
|
||||
/!*
|
||||
ensures
|
||||
- swaps the state of a and b
|
||||
*!/
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename image_type>
|
||||
struct image_traits;
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This is a traits class for generic image objects. You can use it to find out
|
||||
the pixel type contained within an image via an expression of the form:
|
||||
image_traits<image_type>::pixel_type
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// UTILITIES TO MAKE ACCESSING IMAGE PIXELS SIMPLER
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
class image_view
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON image_type
|
||||
image_type must be an image object as defined at the top of this file.
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object takes an image object and wraps it with an interface that makes
|
||||
it look like a dlib::array2d. That is, it makes it look similar to a
|
||||
regular 2-dimensional C style array, making code which operates on the
|
||||
pixels simple to read.
|
||||
|
||||
Note that an image_view instance is valid until the image given to its
|
||||
constructor is modified through an interface other than the image_view
|
||||
instance. This is because, for example, someone might cause the underlying
|
||||
image object to reallocate its memory, thus invalidating the pointer to its
|
||||
pixel data stored in the image_view.
|
||||
|
||||
As an side, the reason why this object stores a pointer to the image
|
||||
object's data and uses that pointer instead of calling image_data() each
|
||||
time a pixel is accessed is to allow for image objects to implement
|
||||
complex, and possibly slow, image_data() functions. For example, an image
|
||||
object might perform some kind of synchronization between a GPU and the
|
||||
host memory during a call to image_data(). Therefore, we call image_data()
|
||||
only in image_view's constructor to avoid the performance penalty of
|
||||
calling it for each pixel access.
|
||||
!*/
|
||||
|
||||
public:
|
||||
typedef typename image_traits<image_type>::pixel_type pixel_type;
|
||||
|
||||
image_view(
|
||||
image_type& img
|
||||
) :
|
||||
_data((char*)image_data(img)),
|
||||
_width_step(width_step(img)),
|
||||
_nr(num_rows(img)),
|
||||
_nc(num_columns(img)),
|
||||
_img(&img)
|
||||
{}
|
||||
|
||||
long nr() const { return _nr; }
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of rows in this image.
|
||||
!*/
|
||||
|
||||
long nc() const { return _nc; }
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of columns in this image.
|
||||
!*/
|
||||
|
||||
unsigned long size() const { return static_cast<unsigned long>(nr()*nc()); }
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of pixels in this image.
|
||||
!*/
|
||||
|
||||
#ifndef ENABLE_ASSERTS
|
||||
pixel_type* operator[] (long row) { return (pixel_type*)(_data+_width_step*row); }
|
||||
/*!
|
||||
requires
|
||||
- 0 <= row < nr()
|
||||
ensures
|
||||
- returns a pointer to the first pixel in the row-th row. Therefore, the
|
||||
pixel at row and column position r,c can be accessed via (*this)[r][c].
|
||||
!*/
|
||||
|
||||
const pixel_type* operator[] (long row) const { return (const pixel_type*)(_data+_width_step*row); }
|
||||
/*!
|
||||
requires
|
||||
- 0 <= row < nr()
|
||||
ensures
|
||||
- returns a const pointer to the first pixel in the row-th row. Therefore,
|
||||
the pixel at row and column position r,c can be accessed via
|
||||
(*this)[r][c].
|
||||
!*/
|
||||
#else
|
||||
// If asserts are enabled then we need to return a proxy class so we can make sure
|
||||
// the column accesses don't go out of bounds.
|
||||
struct pix_row
|
||||
{
|
||||
pix_row(pixel_type* data_, long nc_) : data(data_),_nc(nc_) {}
|
||||
const pixel_type& operator[] (long col) const
|
||||
{
|
||||
DLIB_ASSERT(0 <= col && col < _nc,
|
||||
"\t The given column index is out of range."
|
||||
<< "\n\t col: " << col
|
||||
<< "\n\t _nc: " << _nc);
|
||||
return data[col];
|
||||
}
|
||||
pixel_type& operator[] (long col)
|
||||
{
|
||||
DLIB_ASSERT(0 <= col && col < _nc,
|
||||
"\t The given column index is out of range."
|
||||
<< "\n\t col: " << col
|
||||
<< "\n\t _nc: " << _nc);
|
||||
return data[col];
|
||||
}
|
||||
private:
|
||||
pixel_type* const data;
|
||||
const long _nc;
|
||||
};
|
||||
pix_row operator[] (long row)
|
||||
{
|
||||
DLIB_ASSERT(0 <= row && row < _nr,
|
||||
"\t The given row index is out of range."
|
||||
<< "\n\t row: " << row
|
||||
<< "\n\t _nr: " << _nr);
|
||||
return pix_row((pixel_type*)(_data+_width_step*row), _nc);
|
||||
}
|
||||
const pix_row operator[] (long row) const
|
||||
{
|
||||
DLIB_ASSERT(0 <= row && row < _nr,
|
||||
"\t The given row index is out of range."
|
||||
<< "\n\t row: " << row
|
||||
<< "\n\t _nr: " << _nr);
|
||||
return pix_row((pixel_type*)(_data+_width_step*row), _nc);
|
||||
}
|
||||
#endif
|
||||
|
||||
void set_size(long rows, long cols)
|
||||
/*!
|
||||
requires
|
||||
- rows >= 0 && cols >= 0
|
||||
ensures
|
||||
- Tells the underlying image to resize itself to have the given number of
|
||||
rows and columns.
|
||||
- #nr() == rows
|
||||
- #nc() == cols
|
||||
!*/
|
||||
{
|
||||
DLIB_ASSERT((cols >= 0 && rows >= 0),
|
||||
"\t image_view::set_size(long rows, long cols)"
|
||||
<< "\n\t The images can't have negative rows or columns."
|
||||
<< "\n\t cols: " << cols
|
||||
<< "\n\t rows: " << rows
|
||||
);
|
||||
set_image_size(*_img, rows, cols); *this = *_img;
|
||||
}
|
||||
|
||||
void clear() { set_size(0,0); }
|
||||
/*!
|
||||
ensures
|
||||
- sets the image to have 0 pixels in it.
|
||||
!*/
|
||||
|
||||
private:
|
||||
|
||||
char* _data;
|
||||
long _width_step;
|
||||
long _nr;
|
||||
long _nc;
|
||||
image_type* _img;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename image_type>
|
||||
class const_image_view
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON image_type
|
||||
image_type must be an image object as defined at the top of this file.
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is just like the image_view except that it provides a "const"
|
||||
view into an image. That is, it has the same interface as image_view
|
||||
except that you can't modify the image through a const_image_view.
|
||||
!*/
|
||||
|
||||
public:
|
||||
typedef typename image_traits<image_type>::pixel_type pixel_type;
|
||||
|
||||
const_image_view(
|
||||
const image_type& img
|
||||
) :
|
||||
_data((char*)image_data(img)),
|
||||
_width_step(width_step(img)),
|
||||
_nr(num_rows(img)),
|
||||
_nc(num_columns(img))
|
||||
{}
|
||||
|
||||
long nr() const { return _nr; }
|
||||
long nc() const { return _nc; }
|
||||
unsigned long size() const { return static_cast<unsigned long>(nr()*nc()); }
|
||||
#ifndef ENABLE_ASSERTS
|
||||
const pixel_type* operator[] (long row) const { return (const pixel_type*)(_data+_width_step*row); }
|
||||
#else
|
||||
// If asserts are enabled then we need to return a proxy class so we can make sure
|
||||
// the column accesses don't go out of bounds.
|
||||
struct pix_row
|
||||
{
|
||||
pix_row(pixel_type* data_, long nc_) : data(data_),_nc(nc_) {}
|
||||
const pixel_type& operator[] (long col) const
|
||||
{
|
||||
DLIB_ASSERT(0 <= col && col < _nc,
|
||||
"\t The given column index is out of range."
|
||||
<< "\n\t col: " << col
|
||||
<< "\n\t _nc: " << _nc);
|
||||
return data[col];
|
||||
}
|
||||
private:
|
||||
pixel_type* const data;
|
||||
const long _nc;
|
||||
};
|
||||
const pix_row operator[] (long row) const
|
||||
{
|
||||
DLIB_ASSERT(0 <= row && row < _nr,
|
||||
"\t The given row index is out of range."
|
||||
<< "\n\t row: " << row
|
||||
<< "\n\t _nr: " << _nr);
|
||||
return pix_row((pixel_type*)(_data+_width_step*row), _nc);
|
||||
}
|
||||
#endif
|
||||
|
||||
private:
|
||||
const char* _data;
|
||||
long _width_step;
|
||||
long _nr;
|
||||
long _nc;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename image_type>
|
||||
image_view<image_type> make_image_view ( image_type& img)
|
||||
{ return image_view<image_type>(img); }
|
||||
/*!
|
||||
requires
|
||||
- image_type == an image object that implements the interface defined at the
|
||||
top of this file.
|
||||
ensures
|
||||
- constructs an image_view from an image object
|
||||
!*/
|
||||
|
||||
template <typename image_type>
|
||||
const_image_view<image_type> make_image_view (const image_type& img)
|
||||
{ return const_image_view<image_type>(img); }
|
||||
/*!
|
||||
requires
|
||||
- image_type == an image object that implements the interface defined at the
|
||||
top of this file.
|
||||
ensures
|
||||
- constructs a const_image_view from an image object
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename image_type>
|
||||
inline unsigned long image_size(
|
||||
const image_type& img
|
||||
) { return num_columns(img)*num_rows(img); }
|
||||
/*!
|
||||
requires
|
||||
- image_type == an image object that implements the interface defined at the
|
||||
top of this file.
|
||||
ensures
|
||||
- returns the number of pixels in the given image.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename image_type>
|
||||
inline long num_rows(
|
||||
const image_type& img
|
||||
) { return img.nr(); }
|
||||
/*!
|
||||
ensures
|
||||
- By default, try to use the member function .nr() to determine the number
|
||||
of rows in an image. However, as stated at the top of this file, image
|
||||
objects should provide their own overload of num_rows() if needed.
|
||||
!*/
|
||||
|
||||
template <typename image_type>
|
||||
inline long num_columns(
|
||||
const image_type& img
|
||||
) { return img.nc(); }
|
||||
/*!
|
||||
ensures
|
||||
- By default, try to use the member function .nc() to determine the number
|
||||
of columns in an image. However, as stated at the top of this file, image
|
||||
objects should provide their own overload of num_rows() if needed.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_GeNERIC_IMAGE_Hh_
|
||||
|
||||
628
lib/3rdParty/dlib/include/dlib/image_processing/object_detector.h
vendored
Normal file
628
lib/3rdParty/dlib/include/dlib/image_processing/object_detector.h
vendored
Normal file
@@ -0,0 +1,628 @@
|
||||
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_OBJECT_DeTECTOR_Hh_
|
||||
#define DLIB_OBJECT_DeTECTOR_Hh_
|
||||
|
||||
#include "object_detector_abstract.h"
|
||||
#include "../geometry.h"
|
||||
#include <vector>
|
||||
#include "box_overlap_testing.h"
|
||||
#include "full_object_detection.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
struct rect_detection
|
||||
{
|
||||
double detection_confidence;
|
||||
unsigned long weight_index;
|
||||
rectangle rect;
|
||||
|
||||
bool operator<(const rect_detection& item) const { return detection_confidence < item.detection_confidence; }
|
||||
};
|
||||
|
||||
struct full_detection
|
||||
{
|
||||
double detection_confidence;
|
||||
unsigned long weight_index;
|
||||
full_object_detection rect;
|
||||
|
||||
bool operator<(const full_detection& item) const { return detection_confidence < item.detection_confidence; }
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename image_scanner_type>
|
||||
struct processed_weight_vector
|
||||
{
|
||||
processed_weight_vector(){}
|
||||
|
||||
typedef typename image_scanner_type::feature_vector_type feature_vector_type;
|
||||
|
||||
void init (
|
||||
const image_scanner_type&
|
||||
)
|
||||
/*!
|
||||
requires
|
||||
- w has already been assigned its value. Note that the point of this
|
||||
function is to allow an image scanner to overload the
|
||||
processed_weight_vector template and provide some different kind of
|
||||
object as the output of get_detect_argument(). For example, the
|
||||
scan_fhog_pyramid object uses an overload that causes
|
||||
get_detect_argument() to return the special fhog_filterbank object
|
||||
instead of a feature_vector_type. This avoids needing to construct the
|
||||
fhog_filterbank during each call to detect and therefore speeds up
|
||||
detection.
|
||||
!*/
|
||||
{}
|
||||
|
||||
// return the first argument to image_scanner_type::detect()
|
||||
const feature_vector_type& get_detect_argument() const { return w; }
|
||||
|
||||
feature_vector_type w;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type_
|
||||
>
|
||||
class object_detector
|
||||
{
|
||||
public:
|
||||
typedef image_scanner_type_ image_scanner_type;
|
||||
typedef typename image_scanner_type::feature_vector_type feature_vector_type;
|
||||
|
||||
object_detector (
|
||||
);
|
||||
|
||||
object_detector (
|
||||
const object_detector& item
|
||||
);
|
||||
|
||||
object_detector (
|
||||
const image_scanner_type& scanner_,
|
||||
const test_box_overlap& overlap_tester_,
|
||||
const feature_vector_type& w_
|
||||
);
|
||||
|
||||
object_detector (
|
||||
const image_scanner_type& scanner_,
|
||||
const test_box_overlap& overlap_tester_,
|
||||
const std::vector<feature_vector_type>& w_
|
||||
);
|
||||
|
||||
explicit object_detector (
|
||||
const std::vector<object_detector>& detectors
|
||||
);
|
||||
|
||||
unsigned long num_detectors (
|
||||
) const { return w.size(); }
|
||||
|
||||
const feature_vector_type& get_w (
|
||||
unsigned long idx = 0
|
||||
) const { return w[idx].w; }
|
||||
|
||||
const processed_weight_vector<image_scanner_type>& get_processed_w (
|
||||
unsigned long idx = 0
|
||||
) const { return w[idx]; }
|
||||
|
||||
const test_box_overlap& get_overlap_tester (
|
||||
) const;
|
||||
|
||||
const image_scanner_type& get_scanner (
|
||||
) const;
|
||||
|
||||
object_detector& operator= (
|
||||
const object_detector& item
|
||||
);
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
std::vector<rectangle> operator() (
|
||||
const image_type& img,
|
||||
double adjust_threshold = 0
|
||||
);
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void operator() (
|
||||
const image_type& img,
|
||||
std::vector<std::pair<double, rectangle> >& final_dets,
|
||||
double adjust_threshold = 0
|
||||
);
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void operator() (
|
||||
const image_type& img,
|
||||
std::vector<std::pair<double, full_object_detection> >& final_dets,
|
||||
double adjust_threshold = 0
|
||||
);
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void operator() (
|
||||
const image_type& img,
|
||||
std::vector<full_object_detection>& final_dets,
|
||||
double adjust_threshold = 0
|
||||
);
|
||||
|
||||
// These typedefs are here for backwards compatibility with previous versions of
|
||||
// dlib.
|
||||
typedef ::dlib::rect_detection rect_detection;
|
||||
typedef ::dlib::full_detection full_detection;
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void operator() (
|
||||
const image_type& img,
|
||||
std::vector<rect_detection>& final_dets,
|
||||
double adjust_threshold = 0
|
||||
);
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void operator() (
|
||||
const image_type& img,
|
||||
std::vector<full_detection>& final_dets,
|
||||
double adjust_threshold = 0
|
||||
);
|
||||
|
||||
template <typename T>
|
||||
friend void serialize (
|
||||
const object_detector<T>& item,
|
||||
std::ostream& out
|
||||
);
|
||||
|
||||
template <typename T>
|
||||
friend void deserialize (
|
||||
object_detector<T>& item,
|
||||
std::istream& in
|
||||
);
|
||||
|
||||
public:
|
||||
|
||||
bool overlaps_any_box (
|
||||
const std::vector<rect_detection>& rects,
|
||||
const dlib::rectangle& rect
|
||||
) const
|
||||
{
|
||||
for (unsigned long i = 0; i < rects.size(); ++i)
|
||||
{
|
||||
if (boxes_overlap(rects[i].rect, rect))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
test_box_overlap boxes_overlap;
|
||||
std::vector<processed_weight_vector<image_scanner_type> > w;
|
||||
image_scanner_type scanner;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
void serialize (
|
||||
const object_detector<T>& item,
|
||||
std::ostream& out
|
||||
)
|
||||
{
|
||||
int version = 2;
|
||||
serialize(version, out);
|
||||
|
||||
T scanner;
|
||||
scanner.copy_configuration(item.scanner);
|
||||
serialize(scanner, out);
|
||||
serialize(item.boxes_overlap, out);
|
||||
// serialize all the weight vectors
|
||||
serialize(item.w.size(), out);
|
||||
for (unsigned long i = 0; i < item.w.size(); ++i)
|
||||
serialize(item.w[i].w, out);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
void deserialize (
|
||||
object_detector<T>& item,
|
||||
std::istream& in
|
||||
)
|
||||
{
|
||||
int version = 0;
|
||||
deserialize(version, in);
|
||||
if (version == 1)
|
||||
{
|
||||
deserialize(item.scanner, in);
|
||||
item.w.resize(1);
|
||||
deserialize(item.w[0].w, in);
|
||||
item.w[0].init(item.scanner);
|
||||
deserialize(item.boxes_overlap, in);
|
||||
}
|
||||
else if (version == 2)
|
||||
{
|
||||
deserialize(item.scanner, in);
|
||||
deserialize(item.boxes_overlap, in);
|
||||
unsigned long num_detectors = 0;
|
||||
deserialize(num_detectors, in);
|
||||
item.w.resize(num_detectors);
|
||||
for (unsigned long i = 0; i < item.w.size(); ++i)
|
||||
{
|
||||
deserialize(item.w[i].w, in);
|
||||
item.w[i].init(item.scanner);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw serialization_error("Unexpected version encountered while deserializing a dlib::object_detector object.");
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// object_detector member functions
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
object_detector<image_scanner_type>::
|
||||
object_detector (
|
||||
)
|
||||
{
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
object_detector<image_scanner_type>::
|
||||
object_detector (
|
||||
const object_detector& item
|
||||
)
|
||||
{
|
||||
boxes_overlap = item.boxes_overlap;
|
||||
w = item.w;
|
||||
scanner.copy_configuration(item.scanner);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
object_detector<image_scanner_type>::
|
||||
object_detector (
|
||||
const image_scanner_type& scanner_,
|
||||
const test_box_overlap& overlap_tester,
|
||||
const feature_vector_type& w_
|
||||
) :
|
||||
boxes_overlap(overlap_tester)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(scanner_.get_num_detection_templates() > 0 &&
|
||||
w_.size() == scanner_.get_num_dimensions() + 1,
|
||||
"\t object_detector::object_detector(scanner_,overlap_tester,w_)"
|
||||
<< "\n\t Invalid inputs were given to this function "
|
||||
<< "\n\t scanner_.get_num_detection_templates(): " << scanner_.get_num_detection_templates()
|
||||
<< "\n\t w_.size(): " << w_.size()
|
||||
<< "\n\t scanner_.get_num_dimensions(): " << scanner_.get_num_dimensions()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
scanner.copy_configuration(scanner_);
|
||||
w.resize(1);
|
||||
w[0].w = w_;
|
||||
w[0].init(scanner);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
object_detector<image_scanner_type>::
|
||||
object_detector (
|
||||
const image_scanner_type& scanner_,
|
||||
const test_box_overlap& overlap_tester,
|
||||
const std::vector<feature_vector_type>& w_
|
||||
) :
|
||||
boxes_overlap(overlap_tester)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(scanner_.get_num_detection_templates() > 0 && w_.size() > 0,
|
||||
"\t object_detector::object_detector(scanner_,overlap_tester,w_)"
|
||||
<< "\n\t Invalid inputs were given to this function "
|
||||
<< "\n\t scanner_.get_num_detection_templates(): " << scanner_.get_num_detection_templates()
|
||||
<< "\n\t w_.size(): " << w_.size()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
#ifdef ENABLE_ASSERTS
|
||||
for (unsigned long i = 0; i < w_.size(); ++i)
|
||||
{
|
||||
DLIB_ASSERT(w_[i].size() == scanner_.get_num_dimensions() + 1,
|
||||
"\t object_detector::object_detector(scanner_,overlap_tester,w_)"
|
||||
<< "\n\t Invalid inputs were given to this function "
|
||||
<< "\n\t scanner_.get_num_detection_templates(): " << scanner_.get_num_detection_templates()
|
||||
<< "\n\t w_["<<i<<"].size(): " << w_[i].size()
|
||||
<< "\n\t scanner_.get_num_dimensions(): " << scanner_.get_num_dimensions()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
scanner.copy_configuration(scanner_);
|
||||
w.resize(w_.size());
|
||||
for (unsigned long i = 0; i < w.size(); ++i)
|
||||
{
|
||||
w[i].w = w_[i];
|
||||
w[i].init(scanner);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
object_detector<image_scanner_type>::
|
||||
object_detector (
|
||||
const std::vector<object_detector>& detectors
|
||||
)
|
||||
{
|
||||
DLIB_ASSERT(detectors.size() != 0,
|
||||
"\t object_detector::object_detector(detectors)"
|
||||
<< "\n\t Invalid inputs were given to this function "
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
std::vector<feature_vector_type> weights;
|
||||
weights.reserve(detectors.size());
|
||||
for (unsigned long i = 0; i < detectors.size(); ++i)
|
||||
{
|
||||
for (unsigned long j = 0; j < detectors[i].num_detectors(); ++j)
|
||||
weights.push_back(detectors[i].get_w(j));
|
||||
}
|
||||
|
||||
*this = object_detector(detectors[0].get_scanner(), detectors[0].get_overlap_tester(), weights);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
object_detector<image_scanner_type>& object_detector<image_scanner_type>::
|
||||
operator= (
|
||||
const object_detector& item
|
||||
)
|
||||
{
|
||||
if (this == &item)
|
||||
return *this;
|
||||
|
||||
boxes_overlap = item.boxes_overlap;
|
||||
w = item.w;
|
||||
scanner.copy_configuration(item.scanner);
|
||||
return *this;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void object_detector<image_scanner_type>::
|
||||
operator() (
|
||||
const image_type& img,
|
||||
std::vector<rect_detection>& final_dets,
|
||||
double adjust_threshold
|
||||
)
|
||||
{
|
||||
scanner.load(img);
|
||||
std::vector<std::pair<double, rectangle> > dets;
|
||||
std::vector<rect_detection> dets_accum;
|
||||
for (unsigned long i = 0; i < w.size(); ++i)
|
||||
{
|
||||
const double thresh = w[i].w(scanner.get_num_dimensions());
|
||||
scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);
|
||||
for (unsigned long j = 0; j < dets.size(); ++j)
|
||||
{
|
||||
rect_detection temp;
|
||||
temp.detection_confidence = dets[j].first-thresh;
|
||||
temp.weight_index = i;
|
||||
temp.rect = dets[j].second;
|
||||
dets_accum.push_back(temp);
|
||||
}
|
||||
}
|
||||
|
||||
// Do non-max suppression
|
||||
final_dets.clear();
|
||||
if (w.size() > 1)
|
||||
std::sort(dets_accum.rbegin(), dets_accum.rend());
|
||||
for (unsigned long i = 0; i < dets_accum.size(); ++i)
|
||||
{
|
||||
if (overlaps_any_box(final_dets, dets_accum[i].rect))
|
||||
continue;
|
||||
|
||||
final_dets.push_back(dets_accum[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void object_detector<image_scanner_type>::
|
||||
operator() (
|
||||
const image_type& img,
|
||||
std::vector<full_detection>& final_dets,
|
||||
double adjust_threshold
|
||||
)
|
||||
{
|
||||
std::vector<rect_detection> dets;
|
||||
(*this)(img,dets,adjust_threshold);
|
||||
|
||||
final_dets.resize(dets.size());
|
||||
|
||||
// convert all the rectangle detections into full_object_detections.
|
||||
for (unsigned long i = 0; i < dets.size(); ++i)
|
||||
{
|
||||
final_dets[i].detection_confidence = dets[i].detection_confidence;
|
||||
final_dets[i].weight_index = dets[i].weight_index;
|
||||
final_dets[i].rect = scanner.get_full_object_detection(dets[i].rect, w[dets[i].weight_index].w);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
std::vector<rectangle> object_detector<image_scanner_type>::
|
||||
operator() (
|
||||
const image_type& img,
|
||||
double adjust_threshold
|
||||
)
|
||||
{
|
||||
std::vector<rect_detection> dets;
|
||||
(*this)(img,dets,adjust_threshold);
|
||||
|
||||
std::vector<rectangle> final_dets(dets.size());
|
||||
for (unsigned long i = 0; i < dets.size(); ++i)
|
||||
final_dets[i] = dets[i].rect;
|
||||
|
||||
return final_dets;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void object_detector<image_scanner_type>::
|
||||
operator() (
|
||||
const image_type& img,
|
||||
std::vector<std::pair<double, rectangle> >& final_dets,
|
||||
double adjust_threshold
|
||||
)
|
||||
{
|
||||
std::vector<rect_detection> dets;
|
||||
(*this)(img,dets,adjust_threshold);
|
||||
|
||||
final_dets.resize(dets.size());
|
||||
for (unsigned long i = 0; i < dets.size(); ++i)
|
||||
final_dets[i] = std::make_pair(dets[i].detection_confidence,dets[i].rect);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void object_detector<image_scanner_type>::
|
||||
operator() (
|
||||
const image_type& img,
|
||||
std::vector<std::pair<double, full_object_detection> >& final_dets,
|
||||
double adjust_threshold
|
||||
)
|
||||
{
|
||||
std::vector<rect_detection> dets;
|
||||
(*this)(img,dets,adjust_threshold);
|
||||
|
||||
final_dets.clear();
|
||||
final_dets.reserve(dets.size());
|
||||
|
||||
// convert all the rectangle detections into full_object_detections.
|
||||
for (unsigned long i = 0; i < dets.size(); ++i)
|
||||
{
|
||||
final_dets.push_back(std::make_pair(dets[i].detection_confidence,
|
||||
scanner.get_full_object_detection(dets[i].rect, w[dets[i].weight_index].w)));
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void object_detector<image_scanner_type>::
|
||||
operator() (
|
||||
const image_type& img,
|
||||
std::vector<full_object_detection>& final_dets,
|
||||
double adjust_threshold
|
||||
)
|
||||
{
|
||||
std::vector<rect_detection> dets;
|
||||
(*this)(img,dets,adjust_threshold);
|
||||
|
||||
final_dets.clear();
|
||||
final_dets.reserve(dets.size());
|
||||
|
||||
// convert all the rectangle detections into full_object_detections.
|
||||
for (unsigned long i = 0; i < dets.size(); ++i)
|
||||
{
|
||||
final_dets.push_back(scanner.get_full_object_detection(dets[i].rect, w[dets[i].weight_index].w));
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
const test_box_overlap& object_detector<image_scanner_type>::
|
||||
get_overlap_tester (
|
||||
) const
|
||||
{
|
||||
return boxes_overlap;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
const image_scanner_type& object_detector<image_scanner_type>::
|
||||
get_scanner (
|
||||
) const
|
||||
{
|
||||
return scanner;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_OBJECT_DeTECTOR_Hh_
|
||||
|
||||
|
||||
404
lib/3rdParty/dlib/include/dlib/image_processing/object_detector_abstract.h
vendored
Normal file
404
lib/3rdParty/dlib/include/dlib/image_processing/object_detector_abstract.h
vendored
Normal file
@@ -0,0 +1,404 @@
|
||||
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
|
||||
#ifdef DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
|
||||
|
||||
#include "../geometry.h"
|
||||
#include <vector>
|
||||
#include "box_overlap_testing_abstract.h"
|
||||
#include "full_object_detection_abstract.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
struct rect_detection
|
||||
{
|
||||
double detection_confidence;
|
||||
unsigned long weight_index;
|
||||
rectangle rect;
|
||||
};
|
||||
|
||||
struct full_detection
|
||||
{
|
||||
double detection_confidence;
|
||||
unsigned long weight_index;
|
||||
full_object_detection rect;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type_
|
||||
>
|
||||
class object_detector
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON image_scanner_type_
|
||||
image_scanner_type_ must be an implementation of
|
||||
dlib/image_processing/scan_image_pyramid_abstract.h or
|
||||
dlib/image_processing/scan_fhog_pyramid.h or
|
||||
dlib/image_processing/scan_image_custom.h or
|
||||
dlib/image_processing/scan_image_boxes_abstract.h
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool for detecting the positions of objects in an image.
|
||||
In particular, it is a simple container to aggregate an instance of an image
|
||||
scanner (i.e. scan_image_pyramid, scan_fhog_pyramid, scan_image_custom, or
|
||||
scan_image_boxes), the weight vector needed by one of these image scanners,
|
||||
and finally an instance of test_box_overlap. The test_box_overlap object
|
||||
is used to perform non-max suppression on the output of the image scanner
|
||||
object.
|
||||
|
||||
Note further that this object can contain multiple weight vectors. In this
|
||||
case, it will run the image scanner multiple times, once with each of the
|
||||
weight vectors. Then it will aggregate the results from all runs, perform
|
||||
non-max suppression and then return the results. Therefore, the object_detector
|
||||
can also be used as a container for a set of object detectors that all use
|
||||
the same image scanner but different weight vectors. This is useful since
|
||||
the object detection procedure has two parts. A loading step where the
|
||||
image is loaded into the scanner, then a detect step which uses the weight
|
||||
vector to locate objects in the image. Since the loading step is independent
|
||||
of the weight vector it is most efficient to run multiple detectors by
|
||||
performing one load into a scanner followed by multiple detect steps. This
|
||||
avoids unnecessarily loading the same image into the scanner multiple times.
|
||||
!*/
|
||||
public:
|
||||
typedef image_scanner_type_ image_scanner_type;
|
||||
typedef typename image_scanner_type::feature_vector_type feature_vector_type;
|
||||
|
||||
object_detector (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- This detector won't generate any detections when
|
||||
presented with an image.
|
||||
- #num_detectors() == 0
|
||||
!*/
|
||||
|
||||
object_detector (
|
||||
const object_detector& item
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #*this is a copy of item
|
||||
- #get_scanner() == item.get_scanner()
|
||||
(note that only the "configuration" of item.get_scanner() is copied.
|
||||
I.e. the copy is done using copy_configuration())
|
||||
!*/
|
||||
|
||||
object_detector (
|
||||
const image_scanner_type& scanner,
|
||||
const test_box_overlap& overlap_tester,
|
||||
const feature_vector_type& w
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- w.size() == scanner.get_num_dimensions() + 1
|
||||
- scanner.get_num_detection_templates() > 0
|
||||
ensures
|
||||
- When the operator() member function is called it will
|
||||
invoke scanner.detect(w,dets,w(w.size()-1)), suppress
|
||||
overlapping detections, and then report the results.
|
||||
- when #*this is used to detect objects, the set of
|
||||
output detections will never contain any overlaps
|
||||
with respect to overlap_tester. That is, for all
|
||||
pairs of returned detections A and B, we will always
|
||||
have: overlap_tester(A,B) == false
|
||||
- #get_w() == w
|
||||
- #get_overlap_tester() == overlap_tester
|
||||
- #get_scanner() == scanner
|
||||
(note that only the "configuration" of scanner is copied.
|
||||
I.e. the copy is done using copy_configuration())
|
||||
- #num_detectors() == 1
|
||||
!*/
|
||||
|
||||
object_detector (
|
||||
const image_scanner_type& scanner,
|
||||
const test_box_overlap& overlap_tester,
|
||||
const std::vector<feature_vector_type>& w
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- for all valid i:
|
||||
- w[i].size() == scanner.get_num_dimensions() + 1
|
||||
- scanner.get_num_detection_templates() > 0
|
||||
- w.size() > 0
|
||||
ensures
|
||||
- When the operator() member function is called it will invoke
|
||||
get_scanner().detect(w[i],dets,w[i](w[i].size()-1)) for all valid i. Then it
|
||||
will take all the detections output by the calls to detect() and suppress
|
||||
overlapping detections, and finally report the results.
|
||||
- when #*this is used to detect objects, the set of output detections will
|
||||
never contain any overlaps with respect to overlap_tester. That is, for
|
||||
all pairs of returned detections A and B, we will always have:
|
||||
overlap_tester(A,B) == false
|
||||
- for all valid i:
|
||||
- #get_w(i) == w[i]
|
||||
- #num_detectors() == w.size()
|
||||
- #get_overlap_tester() == overlap_tester
|
||||
- #get_scanner() == scanner
|
||||
(note that only the "configuration" of scanner is copied.
|
||||
I.e. the copy is done using copy_configuration())
|
||||
!*/
|
||||
|
||||
explicit object_detector (
|
||||
const std::vector<object_detector>& detectors
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- detectors.size() != 0
|
||||
- All the detectors must use compatibly configured scanners. That is, it
|
||||
must make sense for the weight vector from one detector to be used with
|
||||
the scanner from any other.
|
||||
- for all valid i:
|
||||
- detectors[i].get_scanner().get_num_dimensions() == detectors[0].get_scanner().get_num_dimensions()
|
||||
(i.e. all the detectors use scanners that use the same kind of feature vectors.)
|
||||
ensures
|
||||
- Very much like the above constructor, this constructor takes all the
|
||||
given detectors and packs them into #*this. That is, invoking operator()
|
||||
on #*this will run all the detectors, perform non-max suppression, and
|
||||
then report the results.
|
||||
- When #*this is used to detect objects, the set of output detections will
|
||||
never contain any overlaps with respect to overlap_tester. That is, for
|
||||
all pairs of returned detections A and B, we will always have:
|
||||
overlap_tester(A,B) == false
|
||||
- #num_detectors() == The sum of detectors[i].num_detectors() for all valid i.
|
||||
- #get_overlap_tester() == detectors[0].get_overlap_tester()
|
||||
- #get_scanner() == detectors[0].get_scanner()
|
||||
(note that only the "configuration" of scanner is copied. I.e. the copy
|
||||
is done using copy_configuration())
|
||||
!*/
|
||||
|
||||
unsigned long num_detectors (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of weight vectors in this object. Since each weight
|
||||
vector logically represents an object detector, this returns the number
|
||||
of object detectors contained in this object.
|
||||
!*/
|
||||
|
||||
const feature_vector_type& get_w (
|
||||
unsigned long idx = 0
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- idx < num_detectors
|
||||
ensures
|
||||
- returns the idx-th weight vector loaded into this object. All the weight vectors
|
||||
have the same dimension and logically each represents a different detector.
|
||||
!*/
|
||||
|
||||
const test_box_overlap& get_overlap_tester (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the overlap tester used by this object
|
||||
!*/
|
||||
|
||||
const image_scanner_type& get_scanner (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the image scanner used by this object.
|
||||
!*/
|
||||
|
||||
object_detector& operator= (
|
||||
const object_detector& item
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #*this is a copy of item
|
||||
- #get_scanner() == item.get_scanner()
|
||||
(note that only the "configuration" of item.get_scanner() is
|
||||
copied. I.e. the copy is done using copy_configuration())
|
||||
- returns #*this
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void operator() (
|
||||
const image_type& img,
|
||||
std::vector<rect_detection>& dets,
|
||||
double adjust_threshold = 0
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- img == an object which can be accepted by image_scanner_type::load()
|
||||
ensures
|
||||
- Performs object detection on the given image and stores the detected
|
||||
objects into #dets. In particular, we will have that:
|
||||
- #dets is sorted such that the highest confidence detections come
|
||||
first. E.g. element 0 is the best detection, element 1 the next
|
||||
best, and so on.
|
||||
- #dets.size() == the number of detected objects.
|
||||
- #dets[i].detection_confidence == The strength of the i-th detection.
|
||||
Larger values indicate that the detector is more confident that
|
||||
#dets[i] is a correct detection rather than being a false alarm.
|
||||
Moreover, the detection_confidence is equal to the detection value
|
||||
output by the scanner minus the threshold value stored at the end of
|
||||
the weight vector in get_w(#dets[i].weight_index).
|
||||
- #dets[i].weight_index == the index for the weight vector that
|
||||
generated this detection.
|
||||
- #dets[i].rect == the bounding box for the i-th detection.
|
||||
- #get_scanner() will have been loaded with img. Therefore, you can call
|
||||
#get_scanner().get_feature_vector() to obtain the feature vectors or
|
||||
#get_scanner().get_full_object_detection() to get the
|
||||
full_object_detections for the resulting object detection boxes.
|
||||
- The detection threshold is adjusted by having adjust_threshold added to
|
||||
it. Therefore, an adjust_threshold value > 0 makes detecting objects
|
||||
harder while a negative value makes it easier. Moreover, the following
|
||||
will be true for all valid i:
|
||||
- #dets[i].detection_confidence >= adjust_threshold
|
||||
This means that, for example, you can obtain the maximum possible number
|
||||
of detections by setting adjust_threshold equal to negative infinity.
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void operator() (
|
||||
const image_type& img,
|
||||
std::vector<full_detection>& dets,
|
||||
double adjust_threshold = 0
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- img == an object which can be accepted by image_scanner_type::load()
|
||||
ensures
|
||||
- This function is identical to the above operator() routine, except that
|
||||
it outputs full_object_detections instead of rectangles. This means that
|
||||
the output includes part locations. In particular, calling this function
|
||||
is the same as calling the above operator() routine and then using
|
||||
get_scanner().get_full_object_detection() to resolve all the rectangles
|
||||
into full_object_detections. Therefore, this version of operator() is
|
||||
simply a convenience function for performing this set of operations.
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
std::vector<rectangle> operator() (
|
||||
const image_type& img,
|
||||
const adjust_threshold = 0
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- img == an object which can be accepted by image_scanner_type::load()
|
||||
ensures
|
||||
- This function is identical to the above operator() routine, except that
|
||||
it returns a std::vector<rectangle> which contains just the bounding
|
||||
boxes of all the detections.
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void operator() (
|
||||
const image_type& img,
|
||||
std::vector<std::pair<double, rectangle> >& dets,
|
||||
double adjust_threshold = 0
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- img == an object which can be accepted by image_scanner_type::load()
|
||||
ensures
|
||||
- performs object detection on the given image and stores the
|
||||
detected objects into #dets. In particular, we will have that:
|
||||
- #dets is sorted such that the highest confidence detections
|
||||
come first. E.g. element 0 is the best detection, element 1
|
||||
the next best, and so on.
|
||||
- #dets.size() == the number of detected objects.
|
||||
- #dets[i].first gives the "detection confidence", of the i-th
|
||||
detection. This is the detection value output by the scanner minus
|
||||
the threshold value stored at the end of the weight vector in get_w().
|
||||
- #dets[i].second == the bounding box for the i-th detection.
|
||||
- #get_scanner() will have been loaded with img. Therefore, you can call
|
||||
#get_scanner().get_feature_vector() to obtain the feature vectors or
|
||||
#get_scanner().get_full_object_detection() to get the
|
||||
full_object_detections for the resulting object detection boxes.
|
||||
- The detection threshold is adjusted by having adjust_threshold added to
|
||||
it. Therefore, an adjust_threshold value > 0 makes detecting objects
|
||||
harder while a negative value makes it easier. Moreover, the following
|
||||
will be true for all valid i:
|
||||
- #dets[i].first >= adjust_threshold
|
||||
This means that, for example, you can obtain the maximum possible number
|
||||
of detections by setting adjust_threshold equal to negative infinity.
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void operator() (
|
||||
const image_type& img,
|
||||
std::vector<std::pair<double, full_object_detection> >& dets,
|
||||
double adjust_threshold = 0
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- img == an object which can be accepted by image_scanner_type::load()
|
||||
ensures
|
||||
- This function is identical to the above operator() routine, except that
|
||||
it outputs full_object_detections instead of rectangles. This means that
|
||||
the output includes part locations. In particular, calling this function
|
||||
is the same as calling the above operator() routine and then using
|
||||
get_scanner().get_full_object_detection() to resolve all the rectangles
|
||||
into full_object_detections. Therefore, this version of operator() is
|
||||
simply a convenience function for performing this set of operations.
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void operator() (
|
||||
const image_type& img,
|
||||
std::vector<full_object_detection>& dets,
|
||||
double adjust_threshold = 0
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- img == an object which can be accepted by image_scanner_type::load()
|
||||
ensures
|
||||
- This function is identical to the above operator() routine, except that
|
||||
it doesn't include a double valued score. That is, it just outputs the
|
||||
full_object_detections.
|
||||
!*/
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
void serialize (
|
||||
const object_detector<T>& item,
|
||||
std::ostream& out
|
||||
);
|
||||
/*!
|
||||
provides serialization support. Note that this function only saves the
|
||||
configuration part of item.get_scanner(). That is, we use the scanner's
|
||||
copy_configuration() function to get a copy of the scanner that doesn't contain any
|
||||
loaded image data and we then save just the configuration part of the scanner.
|
||||
This means that any serialized object_detectors won't remember any images they have
|
||||
processed but will otherwise contain all their state and be able to detect objects
|
||||
in new images.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
void deserialize (
|
||||
object_detector<T>& item,
|
||||
std::istream& in
|
||||
);
|
||||
/*!
|
||||
provides deserialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
|
||||
|
||||
316
lib/3rdParty/dlib/include/dlib/image_processing/remove_unobtainable_rectangles.h
vendored
Normal file
316
lib/3rdParty/dlib/include/dlib/image_processing/remove_unobtainable_rectangles.h
vendored
Normal file
@@ -0,0 +1,316 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_REMOVE_UnOBTAINABLE_RECTANGLES_Hh_
|
||||
#define DLIB_REMOVE_UnOBTAINABLE_RECTANGLES_Hh_
|
||||
|
||||
#include "remove_unobtainable_rectangles_abstract.h"
|
||||
#include "scan_image_pyramid.h"
|
||||
#include "scan_image_boxes.h"
|
||||
#include "scan_image_custom.h"
|
||||
#include "scan_fhog_pyramid.h"
|
||||
#include "../svm/structural_object_detection_trainer.h"
|
||||
#include "../geometry.h"
|
||||
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace impl
|
||||
{
|
||||
inline bool matches_rect (
|
||||
const std::vector<rectangle>& rects,
|
||||
const rectangle& rect,
|
||||
const double eps
|
||||
)
|
||||
{
|
||||
for (unsigned long i = 0; i < rects.size(); ++i)
|
||||
{
|
||||
const double score = (rect.intersect(rects[i])).area()/(double)(rect+rects[i]).area();
|
||||
if (score > eps)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
inline rectangle get_best_matching_rect (
|
||||
const std::vector<rectangle>& rects,
|
||||
const rectangle& rect
|
||||
)
|
||||
{
|
||||
double best_score = -1;
|
||||
rectangle best_rect;
|
||||
for (unsigned long i = 0; i < rects.size(); ++i)
|
||||
{
|
||||
const double score = (rect.intersect(rects[i])).area()/(double)(rect+rects[i]).area();
|
||||
if (score > best_score)
|
||||
{
|
||||
best_score = score;
|
||||
best_rect = rects[i];
|
||||
}
|
||||
}
|
||||
return best_rect;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type,
|
||||
typename image_scanner_type
|
||||
>
|
||||
std::vector<std::vector<rectangle> > pyramid_remove_unobtainable_rectangles (
|
||||
const structural_object_detection_trainer<image_scanner_type>& trainer,
|
||||
const image_array_type& images,
|
||||
std::vector<std::vector<rectangle> >& object_locations
|
||||
)
|
||||
{
|
||||
using namespace dlib::impl;
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(images.size() == object_locations.size(),
|
||||
"\t std::vector<std::vector<rectangle>> remove_unobtainable_rectangles()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
);
|
||||
|
||||
|
||||
std::vector<std::vector<rectangle> > rejects(images.size());
|
||||
|
||||
// If the trainer is setup to automatically fit the overlap tester to the data then
|
||||
// we should use the loosest possible overlap tester here. Otherwise we should use
|
||||
// the tester the trainer will use.
|
||||
test_box_overlap boxes_overlap(0.9999999,1);
|
||||
if (!trainer.auto_set_overlap_tester())
|
||||
boxes_overlap = trainer.get_overlap_tester();
|
||||
|
||||
for (unsigned long k = 0; k < images.size(); ++k)
|
||||
{
|
||||
std::vector<rectangle> objs = object_locations[k];
|
||||
|
||||
// First remove things that don't have any matches with the candidate object
|
||||
// locations.
|
||||
std::vector<rectangle> good_rects;
|
||||
for (unsigned long j = 0; j < objs.size(); ++j)
|
||||
{
|
||||
const rectangle rect = trainer.get_scanner().get_best_matching_rect(objs[j]);
|
||||
const double score = (objs[j].intersect(rect)).area()/(double)(objs[j] + rect).area();
|
||||
if (score > trainer.get_match_eps())
|
||||
good_rects.push_back(objs[j]);
|
||||
else
|
||||
rejects[k].push_back(objs[j]);
|
||||
}
|
||||
object_locations[k] = good_rects;
|
||||
|
||||
|
||||
// Remap these rectangles to the ones that can come out of the scanner. That
|
||||
// way when we compare them to each other in the following loop we will know if
|
||||
// any distinct truth rectangles get mapped to overlapping boxes.
|
||||
objs.resize(good_rects.size());
|
||||
for (unsigned long i = 0; i < good_rects.size(); ++i)
|
||||
objs[i] = trainer.get_scanner().get_best_matching_rect(good_rects[i]);
|
||||
|
||||
good_rects.clear();
|
||||
// now check for truth rects that are too close together.
|
||||
for (unsigned long i = 0; i < objs.size(); ++i)
|
||||
{
|
||||
// check if objs[i] hits another box
|
||||
bool hit_box = false;
|
||||
for (unsigned long j = i+1; j < objs.size(); ++j)
|
||||
{
|
||||
if (boxes_overlap(objs[i], objs[j]))
|
||||
{
|
||||
hit_box = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (hit_box)
|
||||
rejects[k].push_back(object_locations[k][i]);
|
||||
else
|
||||
good_rects.push_back(object_locations[k][i]);
|
||||
}
|
||||
object_locations[k] = good_rects;
|
||||
}
|
||||
|
||||
return rejects;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type,
|
||||
typename Pyramid_type,
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
std::vector<std::vector<rectangle> > remove_unobtainable_rectangles (
|
||||
const structural_object_detection_trainer<scan_image_pyramid<Pyramid_type, Feature_extractor_type> >& trainer,
|
||||
const image_array_type& images,
|
||||
std::vector<std::vector<rectangle> >& object_locations
|
||||
)
|
||||
{
|
||||
return impl::pyramid_remove_unobtainable_rectangles(trainer, images, object_locations);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type,
|
||||
typename Pyramid_type
|
||||
>
|
||||
std::vector<std::vector<rectangle> > remove_unobtainable_rectangles (
|
||||
const structural_object_detection_trainer<scan_fhog_pyramid<Pyramid_type> >& trainer,
|
||||
const image_array_type& images,
|
||||
std::vector<std::vector<rectangle> >& object_locations
|
||||
)
|
||||
{
|
||||
return impl::pyramid_remove_unobtainable_rectangles(trainer, images, object_locations);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace impl
|
||||
{
|
||||
template <
|
||||
typename image_array_type,
|
||||
typename scanner_type,
|
||||
typename get_boxes_functor
|
||||
>
|
||||
std::vector<std::vector<rectangle> > remove_unobtainable_rectangles (
|
||||
get_boxes_functor& bg,
|
||||
const structural_object_detection_trainer<scanner_type>& trainer,
|
||||
const image_array_type& images,
|
||||
std::vector<std::vector<rectangle> >& object_locations
|
||||
)
|
||||
{
|
||||
using namespace dlib::impl;
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(images.size() == object_locations.size(),
|
||||
"\t std::vector<std::vector<rectangle>> remove_unobtainable_rectangles()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
);
|
||||
|
||||
std::vector<rectangle> rects;
|
||||
|
||||
std::vector<std::vector<rectangle> > rejects(images.size());
|
||||
|
||||
// If the trainer is setup to automatically fit the overlap tester to the data then
|
||||
// we should use the loosest possible overlap tester here. Otherwise we should use
|
||||
// the tester the trainer will use.
|
||||
test_box_overlap boxes_overlap(0.9999999,1);
|
||||
if (!trainer.auto_set_overlap_tester())
|
||||
boxes_overlap = trainer.get_overlap_tester();
|
||||
|
||||
for (unsigned long k = 0; k < images.size(); ++k)
|
||||
{
|
||||
std::vector<rectangle> objs = object_locations[k];
|
||||
// Don't even bother computing the candidate rectangles if there aren't any
|
||||
// object locations for this image since there isn't anything to do anyway.
|
||||
if (objs.size() == 0)
|
||||
continue;
|
||||
|
||||
bg(images[k], rects);
|
||||
|
||||
|
||||
// First remove things that don't have any matches with the candidate object
|
||||
// locations.
|
||||
std::vector<rectangle> good_rects;
|
||||
for (unsigned long j = 0; j < objs.size(); ++j)
|
||||
{
|
||||
if (matches_rect(rects, objs[j], trainer.get_match_eps()))
|
||||
good_rects.push_back(objs[j]);
|
||||
else
|
||||
rejects[k].push_back(objs[j]);
|
||||
}
|
||||
object_locations[k] = good_rects;
|
||||
|
||||
|
||||
// Remap these rectangles to the ones that can come out of the scanner. That
|
||||
// way when we compare them to each other in the following loop we will know if
|
||||
// any distinct truth rectangles get mapped to overlapping boxes.
|
||||
objs.resize(good_rects.size());
|
||||
for (unsigned long i = 0; i < good_rects.size(); ++i)
|
||||
objs[i] = get_best_matching_rect(rects, good_rects[i]);
|
||||
|
||||
good_rects.clear();
|
||||
// now check for truth rects that are too close together.
|
||||
for (unsigned long i = 0; i < objs.size(); ++i)
|
||||
{
|
||||
// check if objs[i] hits another box
|
||||
bool hit_box = false;
|
||||
for (unsigned long j = i+1; j < objs.size(); ++j)
|
||||
{
|
||||
if (boxes_overlap(objs[i], objs[j]))
|
||||
{
|
||||
hit_box = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (hit_box)
|
||||
rejects[k].push_back(object_locations[k][i]);
|
||||
else
|
||||
good_rects.push_back(object_locations[k][i]);
|
||||
}
|
||||
object_locations[k] = good_rects;
|
||||
}
|
||||
|
||||
return rejects;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
struct load_to_functor
|
||||
{
|
||||
load_to_functor(T& obj_) : obj(obj_) {}
|
||||
T& obj;
|
||||
|
||||
template <typename U, typename V>
|
||||
void operator()(const U& u, V& v)
|
||||
{
|
||||
obj.load(u,v);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type,
|
||||
typename feature_extractor,
|
||||
typename box_generator
|
||||
>
|
||||
std::vector<std::vector<rectangle> > remove_unobtainable_rectangles (
|
||||
const structural_object_detection_trainer<scan_image_boxes<feature_extractor, box_generator> >& trainer,
|
||||
const image_array_type& images,
|
||||
std::vector<std::vector<rectangle> >& object_locations
|
||||
)
|
||||
{
|
||||
box_generator bg = trainer.get_scanner().get_box_generator();
|
||||
return impl::remove_unobtainable_rectangles(bg, trainer, images, object_locations);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type,
|
||||
typename feature_extractor
|
||||
>
|
||||
std::vector<std::vector<rectangle> > remove_unobtainable_rectangles (
|
||||
const structural_object_detection_trainer<scan_image_custom<feature_extractor> >& trainer,
|
||||
const image_array_type& images,
|
||||
std::vector<std::vector<rectangle> >& object_locations
|
||||
)
|
||||
{
|
||||
feature_extractor fe;
|
||||
fe.copy_configuration(trainer.get_scanner().get_feature_extractor());
|
||||
impl::load_to_functor<feature_extractor> bg(fe);
|
||||
return impl::remove_unobtainable_rectangles(bg, trainer, images, object_locations);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_REMOVE_UnOBTAINABLE_RECTANGLES_Hh_
|
||||
|
||||
57
lib/3rdParty/dlib/include/dlib/image_processing/remove_unobtainable_rectangles_abstract.h
vendored
Normal file
57
lib/3rdParty/dlib/include/dlib/image_processing/remove_unobtainable_rectangles_abstract.h
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_REMOVE_UnOBTAINABLE_RECTANGLES_ABSTRACT_Hh_
|
||||
#ifdef DLIB_REMOVE_UnOBTAINABLE_RECTANGLES_ABSTRACT_Hh_
|
||||
|
||||
#include "scan_image_pyramid_abstract.h"
|
||||
#include "scan_image_boxes_abstract.h"
|
||||
#include "scan_image_custom_abstract.h"
|
||||
#include "scan_fhog_pyramid_abstract.h"
|
||||
#include "../svm/structural_object_detection_trainer_abstract.h"
|
||||
#include "../geometry.h"
|
||||
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type,
|
||||
typename image_array_type,
|
||||
typename Pyramid_type
|
||||
>
|
||||
std::vector<std::vector<rectangle> > remove_unobtainable_rectangles (
|
||||
const structural_object_detection_trainer<image_scanner_type>& trainer,
|
||||
const image_array_type& images,
|
||||
std::vector<std::vector<rectangle> >& object_locations
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_scanner_type must be either scan_image_boxes, scan_image_pyramid,
|
||||
scan_image_custom, or scan_fhog_pyramid.
|
||||
- images.size() == object_locations.size()
|
||||
ensures
|
||||
- Recall that the image scanner objects can't produce all possible rectangles
|
||||
as object detections since they only consider a limited subset of all possible
|
||||
object positions. Moreover, the structural_object_detection_trainer requires
|
||||
its input training data to not contain any object positions which are unobtainable
|
||||
by its scanner object. Therefore, remove_unobtainable_rectangles() is a tool
|
||||
to filter out these unobtainable rectangles from the training data before giving
|
||||
it to a structural_object_detection_trainer.
|
||||
- This function interprets object_locations[i] as the set of object positions for
|
||||
image[i], for all valid i.
|
||||
- In particular, this function removes unobtainable rectangles from object_locations
|
||||
and also returns a vector V such that:
|
||||
- V.size() == object_locations.size()
|
||||
- for all valid i:
|
||||
- V[i] == the set of rectangles removed from object_locations[i]
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_REMOVE_UnOBTAINABLE_RECTANGLES_ABSTRACT_Hh_
|
||||
|
||||
|
||||
64
lib/3rdParty/dlib/include/dlib/image_processing/render_face_detections.h
vendored
Normal file
64
lib/3rdParty/dlib/include/dlib/image_processing/render_face_detections.h
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_RENDER_FACE_DeTECTIONS_H_
|
||||
#define DLIB_RENDER_FACE_DeTECTIONS_H_
|
||||
|
||||
#include "full_object_detection.h"
|
||||
#include "../gui_widgets.h"
|
||||
#include "render_face_detections_abstract.h"
|
||||
#include <vector>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
inline std::vector<image_window::overlay_line> render_face_detections (
|
||||
const std::vector<full_object_detection>& dets,
|
||||
const rgb_pixel color = rgb_pixel(0,255,0)
|
||||
)
|
||||
{
|
||||
std::vector<image_window::overlay_line> lines;
|
||||
for (unsigned long i = 0; i < dets.size(); ++i)
|
||||
{
|
||||
DLIB_CASSERT(dets[i].num_parts() == 68,
|
||||
"\t std::vector<image_window::overlay_line> render_face_detections()"
|
||||
<< "\n\t Invalid inputs were given to this function. "
|
||||
<< "\n\t dets["<<i<<"].num_parts(): " << dets[i].num_parts()
|
||||
);
|
||||
|
||||
const full_object_detection& d = dets[i];
|
||||
for (unsigned long i = 1; i <= 16; ++i)
|
||||
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
|
||||
|
||||
for (unsigned long i = 28; i <= 30; ++i)
|
||||
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
|
||||
|
||||
for (unsigned long i = 18; i <= 21; ++i)
|
||||
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
|
||||
for (unsigned long i = 23; i <= 26; ++i)
|
||||
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
|
||||
for (unsigned long i = 31; i <= 35; ++i)
|
||||
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
|
||||
lines.push_back(image_window::overlay_line(d.part(30), d.part(35), color));
|
||||
|
||||
for (unsigned long i = 37; i <= 41; ++i)
|
||||
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
|
||||
lines.push_back(image_window::overlay_line(d.part(36), d.part(41), color));
|
||||
|
||||
for (unsigned long i = 43; i <= 47; ++i)
|
||||
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
|
||||
lines.push_back(image_window::overlay_line(d.part(42), d.part(47), color));
|
||||
|
||||
for (unsigned long i = 49; i <= 59; ++i)
|
||||
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
|
||||
lines.push_back(image_window::overlay_line(d.part(48), d.part(59), color));
|
||||
|
||||
for (unsigned long i = 61; i <= 67; ++i)
|
||||
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
|
||||
lines.push_back(image_window::overlay_line(d.part(60), d.part(67), color));
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_RENDER_FACE_DeTECTIONS_H_
|
||||
|
||||
33
lib/3rdParty/dlib/include/dlib/image_processing/render_face_detections_abstract.h
vendored
Normal file
33
lib/3rdParty/dlib/include/dlib/image_processing/render_face_detections_abstract.h
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_RENDER_FACE_DeTECTIONS_ABSTRACT_H_
|
||||
#ifdef DLIB_RENDER_FACE_DeTECTIONS_ABSTRACT_H_
|
||||
|
||||
#include "full_object_detection_abstract.h"
|
||||
#include "../gui_widgets.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
inline std::vector<image_window::overlay_line> render_face_detections (
|
||||
const std::vector<full_object_detection>& dets,
|
||||
const rgb_pixel color = rgb_pixel(0,255,0)
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- for all valid i:
|
||||
- dets[i].num_parts() == 68
|
||||
ensures
|
||||
- Interprets the given objects as face detections with parts annotated using
|
||||
the iBUG face landmark scheme. We then return a set of overlay lines that
|
||||
will draw the objects onto the screen in a way that properly draws the
|
||||
outline of the face features defined by the part locations.
|
||||
- returns a vector with dets.size() elements, each containing the lines
|
||||
necessary to render a face detection from dets.
|
||||
!*/
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_RENDER_FACE_DeTECTIONS_ABSTRACT_H_
|
||||
|
||||
|
||||
1568
lib/3rdParty/dlib/include/dlib/image_processing/scan_fhog_pyramid.h
vendored
Normal file
1568
lib/3rdParty/dlib/include/dlib/image_processing/scan_fhog_pyramid.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
784
lib/3rdParty/dlib/include/dlib/image_processing/scan_fhog_pyramid_abstract.h
vendored
Normal file
784
lib/3rdParty/dlib/include/dlib/image_processing/scan_fhog_pyramid_abstract.h
vendored
Normal file
@@ -0,0 +1,784 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_SCAN_fHOG_PYRAMID_ABSTRACT_Hh_
|
||||
#ifdef DLIB_SCAN_fHOG_PYRAMID_ABSTRACT_Hh_
|
||||
|
||||
#include <vector>
|
||||
#include "../image_transforms/fhog_abstract.h"
|
||||
#include "object_detector_abstract.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Pyramid_type,
|
||||
typename feature_extractor_type
|
||||
>
|
||||
matrix<unsigned char> draw_fhog (
|
||||
const object_detector<scan_fhog_pyramid<Pyramid_type,feature_extractor_type> >& detector,
|
||||
const unsigned long weight_index = 0,
|
||||
const long cell_draw_size = 15
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- cell_draw_size > 0
|
||||
- weight_index < detector.num_detectors()
|
||||
- detector.get_w(weight_index).size() >= detector.get_scanner().get_num_dimensions()
|
||||
(i.e. the detector must have been populated with a HOG filter)
|
||||
ensures
|
||||
- Converts the HOG filters in the given detector (specifically, the filters in
|
||||
detector.get_w(weight_index)) into an image suitable for display on the
|
||||
screen. In particular, we draw all the HOG cells into a grayscale image in a
|
||||
way that shows the magnitude and orientation of the gradient energy in each
|
||||
cell. The resulting image is then returned.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Pyramid_type,
|
||||
typename feature_extractor_type
|
||||
>
|
||||
unsigned long num_separable_filters (
|
||||
const object_detector<scan_fhog_pyramid<Pyramid_type,feature_extractor_type> >& detector,
|
||||
const unsigned long weight_index = 0
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- weight_index < detector.num_detectors()
|
||||
- detector.get_w(weight_index).size() >= detector.get_scanner().get_num_dimensions()
|
||||
(i.e. the detector must have been populated with a HOG filter)
|
||||
ensures
|
||||
- Returns the number of separable filters necessary to represent the HOG
|
||||
filters in the given detector's weight_index'th filter. This is the filter
|
||||
defined by detector.get_w(weight_index).
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Pyramid_type,
|
||||
typename feature_extractor_type
|
||||
>
|
||||
object_detector<scan_fhog_pyramid<Pyramid_type,feature_extractor_type> > threshold_filter_singular_values (
|
||||
const object_detector<scan_fhog_pyramid<Pyramid_type,feature_extractor_type> >& detector,
|
||||
double thresh,
|
||||
const unsigned long weight_index = 0
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- thresh >= 0
|
||||
- weight_index < detector.num_detectors()
|
||||
- detector.get_w(weight_index).size() >= detector.get_scanner().get_num_dimensions()
|
||||
(i.e. the detector must have been populated with a HOG filter)
|
||||
ensures
|
||||
- Removes all components of the filters in the given detector that have
|
||||
singular values that are smaller than the given threshold. Therefore, this
|
||||
function allows you to control how many separable filters are in a detector.
|
||||
In particular, as thresh gets larger the quantity
|
||||
num_separable_filters(threshold_filter_singular_values(detector,thresh,weight_index),weight_index)
|
||||
will generally get smaller and therefore give a faster running detector.
|
||||
However, note that at some point a large enough thresh will drop too much
|
||||
information from the filters and their accuracy will suffer.
|
||||
- returns the updated detector
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class default_fhog_feature_extractor
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
The scan_fhog_pyramid object defined below is primarily meant to be used
|
||||
with the feature extraction technique implemented by extract_fhog_features().
|
||||
This technique can generally be understood as taking an input image and
|
||||
outputting a multi-planed output image of floating point numbers that
|
||||
somehow describe the image contents. Since there are many ways to define
|
||||
how this feature mapping is performed, the scan_fhog_pyramid allows you to
|
||||
replace the extract_fhog_features() method with a customized method of your
|
||||
choosing. To do this you implement a class with the same interface as
|
||||
default_fhog_feature_extractor.
|
||||
|
||||
Therefore, the point of default_fhog_feature_extractor is two fold. First,
|
||||
it provides the default FHOG feature extraction method used by scan_fhog_pyramid.
|
||||
Second, it serves to document the interface you need to implement to define
|
||||
your own custom HOG style feature extraction.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
rectangle image_to_feats (
|
||||
const rectangle& rect,
|
||||
int cell_size,
|
||||
int filter_rows_padding,
|
||||
int filter_cols_padding
|
||||
) const { return image_to_fhog(rect, cell_size, filter_rows_padding, filter_cols_padding); }
|
||||
/*!
|
||||
requires
|
||||
- cell_size > 0
|
||||
- filter_rows_padding > 0
|
||||
- filter_cols_padding > 0
|
||||
ensures
|
||||
- Maps a rectangle from the coordinates in an input image to the corresponding
|
||||
area in the output feature image.
|
||||
!*/
|
||||
|
||||
rectangle feats_to_image (
|
||||
const rectangle& rect,
|
||||
int cell_size,
|
||||
int filter_rows_padding,
|
||||
int filter_cols_padding
|
||||
) const { return fhog_to_image(rect, cell_size, filter_rows_padding, filter_cols_padding); }
|
||||
/*!
|
||||
requires
|
||||
- cell_size > 0
|
||||
- filter_rows_padding > 0
|
||||
- filter_cols_padding > 0
|
||||
ensures
|
||||
- Maps a rectangle from the coordinates of the hog feature image back to
|
||||
the input image.
|
||||
- Mapping from feature space to image space is an invertible
|
||||
transformation. That is, for any rectangle R we have:
|
||||
R == image_to_feats(feats_to_image(R,cell_size,filter_rows_padding,filter_cols_padding),
|
||||
cell_size,filter_rows_padding,filter_cols_padding).
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void operator()(
|
||||
const image_type& img,
|
||||
dlib::array<array2d<float> >& hog,
|
||||
int cell_size,
|
||||
int filter_rows_padding,
|
||||
int filter_cols_padding
|
||||
) const { extract_fhog_features(img,hog,cell_size,filter_rows_padding,filter_cols_padding); }
|
||||
/*!
|
||||
requires
|
||||
- image_type == is an implementation of array2d/array2d_kernel_abstract.h
|
||||
- img contains some kind of pixel type.
|
||||
(i.e. pixel_traits<typename image_type::type> is defined)
|
||||
ensures
|
||||
- Extracts FHOG features by calling extract_fhog_features(). The results are
|
||||
stored into #hog. Note that if you are implementing your own feature extractor you can
|
||||
pretty much do whatever you want in terms of feature extraction so long as the following
|
||||
conditions are met:
|
||||
- #hog.size() == get_num_planes()
|
||||
- Each image plane in of #hog has the same dimensions.
|
||||
- for all valid i, r, and c:
|
||||
- #hog[i][r][c] == a feature value describing the image content centered at the
|
||||
following pixel location in img:
|
||||
feats_to_image(point(c,r),cell_size,filter_rows_padding,filter_cols_padding)
|
||||
!*/
|
||||
|
||||
inline unsigned long get_num_planes (
|
||||
) const { return 31; }
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of planes in the hog image output by the operator()
|
||||
method.
|
||||
!*/
|
||||
};
|
||||
|
||||
inline void serialize (const default_fhog_feature_extractor&, std::ostream&) {}
|
||||
inline void deserialize (default_fhog_feature_extractor&, std::istream&) {}
|
||||
/*!
|
||||
Provides serialization support. Note that there is no state in the default hog
|
||||
feature extractor so these functions do nothing. But if you define a custom
|
||||
feature extractor then make sure you remember to serialize any state in your
|
||||
feature extractor.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Pyramid_type,
|
||||
typename Feature_extractor_type = default_fhog_feature_extractor
|
||||
>
|
||||
class scan_fhog_pyramid : noncopyable
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON Pyramid_type
|
||||
- Must be one of the pyramid_down objects defined in
|
||||
dlib/image_transforms/image_pyramid_abstract.h or an object with a
|
||||
compatible interface
|
||||
|
||||
REQUIREMENTS ON Feature_extractor_type
|
||||
- Must be a type with an interface compatible with the
|
||||
default_fhog_feature_extractor.
|
||||
|
||||
INITIAL VALUE
|
||||
- get_padding() == 1
|
||||
- get_cell_size() == 8
|
||||
- get_detection_window_width() == 64
|
||||
- get_detection_window_height() == 64
|
||||
- get_max_pyramid_levels() == 1000
|
||||
- get_min_pyramid_layer_width() == 64
|
||||
- get_min_pyramid_layer_height() == 64
|
||||
- get_nuclear_norm_regularization_strength() == 0
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool for running a fixed sized sliding window classifier
|
||||
over an image pyramid. In particular, it slides a linear classifier over
|
||||
a HOG pyramid as discussed in the paper:
|
||||
Histograms of Oriented Gradients for Human Detection by Navneet Dalal
|
||||
and Bill Triggs, CVPR 2005
|
||||
However, we augment the method slightly to use the version of HOG features
|
||||
from:
|
||||
Object Detection with Discriminatively Trained Part Based Models by
|
||||
P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan
|
||||
IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010
|
||||
Since these HOG features have been shown to give superior performance.
|
||||
|
||||
THREAD SAFETY
|
||||
Concurrent access to an instance of this object is not safe and should be
|
||||
protected by a mutex lock except for the case where you are copying the
|
||||
configuration (via copy_configuration()) of a scan_fhog_pyramid object to
|
||||
many other threads. In this case, it is safe to copy the configuration of
|
||||
a shared object so long as no other operations are performed on it.
|
||||
!*/
|
||||
|
||||
public:
|
||||
typedef matrix<double,0,1> feature_vector_type;
|
||||
typedef Pyramid_type pyramid_type;
|
||||
typedef Feature_extractor_type feature_extractor_type;
|
||||
|
||||
scan_fhog_pyramid (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- this object is properly initialized
|
||||
!*/
|
||||
|
||||
explicit scan_fhog_pyramid (
|
||||
const feature_extractor_type& fe
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- this object is properly initialized
|
||||
- #get_feature_extractor() == fe
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void load (
|
||||
const image_type& img
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_type == is an implementation of array2d/array2d_kernel_abstract.h
|
||||
- img contains some kind of pixel type.
|
||||
(i.e. pixel_traits<typename image_type::type> is defined)
|
||||
ensures
|
||||
- #is_loaded_with_image() == true
|
||||
- This object is ready to run a classifier over img to detect object
|
||||
locations. Call detect() to do this.
|
||||
!*/
|
||||
|
||||
const feature_extractor_type& get_feature_extractor(
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns a const reference to the feature extractor used by this object.
|
||||
!*/
|
||||
|
||||
bool is_loaded_with_image (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns true if this object has been loaded with an image to process and
|
||||
false otherwise.
|
||||
!*/
|
||||
|
||||
void copy_configuration (
|
||||
const scan_fhog_pyramid& item
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- Copies all the state information of item into *this, except for state
|
||||
information populated by load(). More precisely, given two scan_fhog_pyramid
|
||||
objects S1 and S2, the following sequence of instructions should always
|
||||
result in both of them having the exact same state:
|
||||
S2.copy_configuration(S1);
|
||||
S1.load(img);
|
||||
S2.load(img);
|
||||
!*/
|
||||
|
||||
void set_detection_window_size (
|
||||
unsigned long window_width,
|
||||
unsigned long window_height
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- window_width > 0
|
||||
- window_height > 0
|
||||
ensures
|
||||
- When detect() is called, this object scans a window that is of the given
|
||||
width and height (in pixels) over each layer in an image pyramid. This
|
||||
means that the rectangle detections which come out of detect() will have
|
||||
a width to height ratio approximately equal to window_width/window_height
|
||||
and will be approximately window_width*window_height pixels in area or
|
||||
larger. Therefore, the smallest object that can be detected is roughly
|
||||
window_width by window_height pixels in size.
|
||||
- #get_detection_window_width() == window_width
|
||||
- #get_detection_window_height() == window_height
|
||||
- Since we use a HOG feature representation, the detection procedure works
|
||||
as follows:
|
||||
Step 1. Make an image pyramid.
|
||||
Step 2. Convert each layer of the image pyramid into a multi-planed HOG "image".
|
||||
(the number of bands is given by get_feature_extractor().get_num_planes())
|
||||
Step 3. Scan a linear classifier over each HOG image in the pyramid.
|
||||
Moreover, the HOG features quantize the input image into a grid of cells,
|
||||
each cell being get_cell_size() by get_cell_size() pixels in size. So
|
||||
when we scan the object detector over the pyramid we are scanning an
|
||||
appropriately sized window over these smaller quantized HOG features. In
|
||||
particular, the size of the window we scan over the HOG feature pyramid
|
||||
is #get_fhog_window_width() by #get_fhog_window_height() HOG cells in
|
||||
size.
|
||||
- #is_loaded_with_image() == false
|
||||
!*/
|
||||
|
||||
unsigned long get_detection_window_width (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the width, in pixels, of the detection window that is scanned
|
||||
over the image when detect() is called.
|
||||
!*/
|
||||
|
||||
inline unsigned long get_detection_window_height (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the height, in pixels, of the detection window that is scanned
|
||||
over the image when detect() is called.
|
||||
!*/
|
||||
|
||||
unsigned long get_fhog_window_width (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- Returns the width of the HOG scanning window in terms of HOG cell blocks.
|
||||
Note that this is a function of get_detection_window_width(), get_cell_size(),
|
||||
and get_padding() and is therefore not something you set directly.
|
||||
- #get_fhog_window_width() is approximately equal to the number of HOG cells
|
||||
that fit into get_detection_window_width() pixels plus 2*get_padding()
|
||||
since we include additional padding around each window to add context.
|
||||
!*/
|
||||
|
||||
unsigned long get_fhog_window_height (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- Returns the height of the HOG scanning window in terms of HOG cell blocks.
|
||||
Note that this is a function of get_detection_window_height(), get_cell_size(),
|
||||
and get_padding() and is therefore not something you set directly.
|
||||
- #get_fhog_window_height() is approximately equal to the number of HOG cells
|
||||
that fit into get_detection_window_height() pixels plus 2*get_padding()
|
||||
since we include additional padding around each window to add context.
|
||||
!*/
|
||||
|
||||
void set_padding (
|
||||
unsigned long new_padding
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_padding() == new_padding
|
||||
- #is_loaded_with_image() == false
|
||||
!*/
|
||||
|
||||
unsigned long get_padding (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- The HOG windows scanned over the HOG pyramid can include additional HOG
|
||||
cells outside the detection window. This can help add context and
|
||||
improve detection accuracy. This function returns the number of extra
|
||||
HOG cells added onto the border of the HOG windows which are scanned by
|
||||
detect().
|
||||
!*/
|
||||
|
||||
unsigned long get_cell_size (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- Returns the size of the HOG cells. Each HOG cell is square and contains
|
||||
get_cell_size()*get_cell_size() pixels.
|
||||
!*/
|
||||
|
||||
void set_cell_size (
|
||||
unsigned long new_cell_size
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- new_cell_size > 0
|
||||
ensures
|
||||
- #get_cell_size() == new_cell_size
|
||||
- #is_loaded_with_image() == false
|
||||
!*/
|
||||
|
||||
inline long get_num_dimensions (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns get_fhog_window_width()*get_fhog_window_height()*get_feature_extractor().get_num_planes()
|
||||
(i.e. The number of features is equal to the size of the HOG window times
|
||||
the number of planes output by the feature extractor. )
|
||||
!*/
|
||||
|
||||
inline unsigned long get_num_detection_templates (
|
||||
) const { return 1; }
|
||||
/*!
|
||||
ensures
|
||||
- returns 1. Note that this function is here only for compatibility with
|
||||
the scan_image_pyramid object. Notionally, its return value indicates
|
||||
that a scan_fhog_pyramid object is always ready to detect objects once
|
||||
an image has been loaded.
|
||||
!*/
|
||||
|
||||
inline unsigned long get_num_movable_components_per_detection_template (
|
||||
) const { return 0; }
|
||||
/*!
|
||||
ensures
|
||||
- returns 0. Note that this function is here only for compatibility with
|
||||
the scan_image_pyramid object. Its return value means that this object
|
||||
does not support using movable part models.
|
||||
!*/
|
||||
|
||||
unsigned long get_max_pyramid_levels (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the maximum number of image pyramid levels this object will use.
|
||||
Note that #get_max_pyramid_levels() == 1 indicates that no image pyramid
|
||||
will be used at all. That is, only the original image will be processed
|
||||
and no lower scale versions will be created.
|
||||
!*/
|
||||
|
||||
void set_max_pyramid_levels (
|
||||
unsigned long max_levels
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- max_levels > 0
|
||||
ensures
|
||||
- #get_max_pyramid_levels() == max_levels
|
||||
!*/
|
||||
|
||||
void set_min_pyramid_layer_size (
|
||||
unsigned long width,
|
||||
unsigned long height
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- width > 0
|
||||
- height > 0
|
||||
ensures
|
||||
- #get_min_pyramid_layer_width() == width
|
||||
- #get_min_pyramid_layer_height() == height
|
||||
!*/
|
||||
|
||||
inline unsigned long get_min_pyramid_layer_width (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the smallest allowable width of an image in the image pyramid.
|
||||
All pyramids will always include the original input image, however, no
|
||||
pyramid levels will be created which have a width smaller than the
|
||||
value returned by this function.
|
||||
!*/
|
||||
|
||||
inline unsigned long get_min_pyramid_layer_height (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the smallest allowable height of an image in the image pyramid.
|
||||
All pyramids will always include the original input image, however, no
|
||||
pyramid levels will be created which have a height smaller than the
|
||||
value returned by this function.
|
||||
!*/
|
||||
|
||||
fhog_filterbank build_fhog_filterbank (
|
||||
const feature_vector_type& weights
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- weights.size() >= get_num_dimensions()
|
||||
ensures
|
||||
- Creates and then returns a fhog_filterbank object FB such that:
|
||||
- FB.get_num_dimensions() == get_num_dimensions()
|
||||
- FB.get_filters() == the values in weights unpacked into get_feature_extractor().get_num_planes() filters.
|
||||
- FB.num_separable_filters() == the number of separable filters necessary to
|
||||
represent all the filters in FB.get_filters().
|
||||
!*/
|
||||
|
||||
class fhog_filterbank
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object represents a HOG filter bank. That is, the classifier that is
|
||||
slid over a HOG pyramid is a set of get_feature_extractor().get_num_planes()
|
||||
linear filters, each get_fhog_window_width() rows by get_fhog_window_height()
|
||||
columns in size. This object contains that set of filters.
|
||||
!*/
|
||||
|
||||
public:
|
||||
long get_num_dimensions(
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- Returns the total number of values in the filters.
|
||||
!*/
|
||||
|
||||
const std::vector<matrix<float> >& get_filters(
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the set of HOG filters in this object.
|
||||
!*/
|
||||
|
||||
unsigned long num_separable_filters(
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of separable filters necessary to represent all
|
||||
the filters in get_filters().
|
||||
!*/
|
||||
};
|
||||
|
||||
void detect (
|
||||
const fhog_filterbank& w,
|
||||
std::vector<std::pair<double, rectangle> >& dets,
|
||||
const double thresh
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- w.get_num_dimensions() == get_num_dimensions()
|
||||
- is_loaded_with_image() == true
|
||||
ensures
|
||||
- Scans the HOG filter defined by w over the HOG pyramid that was populated
|
||||
by the last call to load() and stores all object detections into #dets.
|
||||
- for all valid i:
|
||||
- #dets[i].second == The object box which produced this detection. This rectangle gives
|
||||
the location of the detection. Note that the rectangle will have been converted back into
|
||||
the original image input space. That is, if this detection was made at a low level in the
|
||||
image pyramid then the object box will have been automatically mapped up the pyramid layers
|
||||
to the original image space. Or in other words, if you plot #dets[i].second on top of the
|
||||
image given to load() it will show up in the right place.
|
||||
- #dets[i].first == The score for this detection. This value is equal to dot(w, feature vector
|
||||
for this sliding window location).
|
||||
- #dets[i].first >= thresh
|
||||
- #dets will be sorted in descending order. (i.e. #dets[i].first >= #dets[j].first for all i, and j>i)
|
||||
- Elements of w beyond index get_num_dimensions()-1 are ignored. I.e. only the first
|
||||
get_num_dimensions() are used.
|
||||
- Note that no form of non-max suppression is performed. If a window has a score >= thresh
|
||||
then it is reported in #dets.
|
||||
!*/
|
||||
|
||||
void detect (
|
||||
const feature_vector_type& w,
|
||||
std::vector<std::pair<double, rectangle> >& dets,
|
||||
const double thresh
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- w.size() >= get_num_dimensions()
|
||||
- is_loaded_with_image() == true
|
||||
ensures
|
||||
- performs: detect(build_fhog_filterbank(w), dets, thresh)
|
||||
!*/
|
||||
|
||||
void get_feature_vector (
|
||||
const full_object_detection& obj,
|
||||
feature_vector_type& psi
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- obj.num_parts() == 0
|
||||
- is_loaded_with_image() == true
|
||||
- psi.size() >= get_num_dimensions()
|
||||
(i.e. psi must have preallocated its memory before this function is called)
|
||||
ensures
|
||||
- This function allows you to determine the feature vector used for an
|
||||
object detection output from detect(). Note that this vector is
|
||||
added to psi. Note also that you must use get_full_object_detection() to
|
||||
convert a rectangle from detect() into the needed full_object_detection.
|
||||
- The dimensionality of the vector added to psi is get_num_dimensions(). This
|
||||
means that elements of psi after psi(get_num_dimensions()-1) are not modified.
|
||||
- Since scan_fhog_pyramid only searches a limited set of object locations,
|
||||
not all possible rectangles can be output by detect(). So in the case
|
||||
where obj.get_rect() could not arise from a call to detect(), this
|
||||
function will map obj.get_rect() to the nearest possible rectangle and
|
||||
then add the feature vector for the mapped rectangle into #psi.
|
||||
- get_best_matching_rect(obj.get_rect()) == the rectangle obj.get_rect()
|
||||
gets mapped to for feature extraction.
|
||||
!*/
|
||||
|
||||
full_object_detection get_full_object_detection (
|
||||
const rectangle& rect,
|
||||
const feature_vector_type& w
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns full_object_detection(rect)
|
||||
(This function is here only for compatibility with the scan_image_pyramid
|
||||
object)
|
||||
!*/
|
||||
|
||||
const rectangle get_best_matching_rect (
|
||||
const rectangle& rect
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- Since scan_fhog_pyramid only searches a limited set of object locations,
|
||||
not all possible rectangles can be represented. Therefore, this function
|
||||
allows you to supply a rectangle and obtain the nearest possible
|
||||
candidate object location rectangle.
|
||||
!*/
|
||||
|
||||
double get_nuclear_norm_regularization_strength (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- If the number of separable filters in a fhog_filterbank is small then the
|
||||
filter bank can be scanned over an image much faster than a normal set of
|
||||
filters. Therefore, this object provides the option to encourage
|
||||
machine learning methods that learn a HOG filter bank (i.e.
|
||||
structural_object_detection_trainer) to select filter banks that have
|
||||
this beneficial property. In particular, the value returned by
|
||||
get_nuclear_norm_regularization_strength() is a multiplier on a nuclear
|
||||
norm regularizer which will encourage the selection of filters that use a
|
||||
small number of separable components. Larger values encourage tend to
|
||||
give a smaller number of separable filters.
|
||||
- if (get_nuclear_norm_regularization_strength() == 0) then
|
||||
- This feature is disabled
|
||||
- else
|
||||
- A nuclear norm regularizer will be added when
|
||||
structural_object_detection_trainer is used to learn a HOG filter
|
||||
bank. Note that this can make the training process take
|
||||
significantly longer (but can result in faster object detectors).
|
||||
!*/
|
||||
|
||||
void set_nuclear_norm_regularization_strength (
|
||||
double strength
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- strength >= 0
|
||||
ensures
|
||||
- #get_nuclear_norm_regularization_strength() == strength
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
void serialize (
|
||||
const scan_fhog_pyramid<T>& item,
|
||||
std::ostream& out
|
||||
);
|
||||
/*!
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
void deserialize (
|
||||
scan_fhog_pyramid<T>& item,
|
||||
std::istream& in
|
||||
);
|
||||
/*!
|
||||
provides deserialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename pyramid_type,
|
||||
typename image_type
|
||||
>
|
||||
void evaluate_detectors (
|
||||
const std::vector<object_detector<scan_fhog_pyramid<pyramid_type>>>& detectors,
|
||||
const image_type& img,
|
||||
std::vector<rect_detection>& dets,
|
||||
const double adjust_threshold = 0
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_type == is an implementation of array2d/array2d_kernel_abstract.h
|
||||
- img contains some kind of pixel type.
|
||||
(i.e. pixel_traits<typename image_type::type> is defined)
|
||||
ensures
|
||||
- This function runs each of the provided object_detector objects over img and
|
||||
stores the resulting detections into #dets. Importantly, this function is
|
||||
faster than running each detector individually because it computes the HOG
|
||||
features only once and then reuses them for each detector. However, it is
|
||||
important to note that this speedup is only possible if all the detectors use
|
||||
the same cell_size parameter that determines how HOG features are computed.
|
||||
If different cell_size values are used then this function will not be any
|
||||
faster than running the detectors individually.
|
||||
- This function applies non-max suppression individually to the output of each
|
||||
detector. Therefore, the output is the same as if you ran each detector
|
||||
individually and then concatenated the results.
|
||||
- To be precise, this function performs object detection on the given image and
|
||||
stores the detected objects into #dets. In particular, we will have that:
|
||||
- #dets is sorted such that the highest confidence detections come first.
|
||||
E.g. element 0 is the best detection, element 1 the next best, and so on.
|
||||
- #dets.size() == the number of detected objects.
|
||||
- #dets[i].detection_confidence == The strength of the i-th detection.
|
||||
Larger values indicate that the detector is more confident that #dets[i]
|
||||
is a correct detection rather than being a false alarm. Moreover, the
|
||||
detection_confidence is equal to the detection value output by the
|
||||
scanner minus the threshold value stored at the end of the weight vector.
|
||||
- #dets[i].rect == the bounding box for the i-th detection.
|
||||
- The detection #dets[i].rect was produced by detectors[#dets[i].weight_index].
|
||||
- The detection threshold is adjusted by having adjust_threshold added to it.
|
||||
Therefore, an adjust_threshold value > 0 makes detecting objects harder while
|
||||
a negative value makes it easier. Moreover, the following will be true for
|
||||
all valid i:
|
||||
- #dets[i].detection_confidence >= adjust_threshold
|
||||
This means that, for example, you can obtain the maximum possible number of
|
||||
detections by setting adjust_threshold equal to negative infinity.
|
||||
- This function is threadsafe in the sense that multiple threads can call
|
||||
evaluate_detectors() with the same instances of detectors and img without
|
||||
requiring a mutex lock.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename pyramid_type,
|
||||
typename image_type
|
||||
>
|
||||
std::vector<rectangle> evaluate_detectors (
|
||||
const std::vector<object_detector<scan_fhog_pyramid<pyramid_type>>>& detectors,
|
||||
const image_type& img,
|
||||
const double adjust_threshold = 0
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_type == is an implementation of array2d/array2d_kernel_abstract.h
|
||||
- img contains some kind of pixel type.
|
||||
(i.e. pixel_traits<typename image_type::type> is defined)
|
||||
ensures
|
||||
- This function just calls the above evaluate_detectors() routine and copies
|
||||
the output dets into a vector<rectangle> object and returns it. Therefore,
|
||||
this function is provided for convenience.
|
||||
- This function is threadsafe in the sense that multiple threads can call
|
||||
evaluate_detectors() with the same instances of detectors and img without
|
||||
requiring a mutex lock.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SCAN_fHOG_PYRAMID_ABSTRACT_Hh_
|
||||
|
||||
|
||||
368
lib/3rdParty/dlib/include/dlib/image_processing/scan_image.h
vendored
Normal file
368
lib/3rdParty/dlib/include/dlib/image_processing/scan_image.h
vendored
Normal file
@@ -0,0 +1,368 @@
|
||||
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_SCAN_iMAGE_Hh_
|
||||
#define DLIB_SCAN_iMAGE_Hh_
|
||||
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "scan_image_abstract.h"
|
||||
#include "../matrix.h"
|
||||
#include "../algs.h"
|
||||
#include "../rand.h"
|
||||
#include "../array2d.h"
|
||||
#include "../image_transforms/spatial_filtering.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace impl
|
||||
{
|
||||
|
||||
inline rectangle bounding_box_of_rects (
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& rects,
|
||||
const point& position
|
||||
)
|
||||
/*!
|
||||
ensures
|
||||
- returns the smallest rectangle that contains all the
|
||||
rectangles in rects. That is, returns the rectangle that
|
||||
contains translate_rect(rects[i].second,position) for all valid i.
|
||||
!*/
|
||||
{
|
||||
rectangle rect;
|
||||
|
||||
for (unsigned long i = 0; i < rects.size(); ++i)
|
||||
{
|
||||
rect += translate_rect(rects[i].second,position);
|
||||
}
|
||||
|
||||
return rect;
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type
|
||||
>
|
||||
bool all_images_same_size (
|
||||
const image_array_type& images
|
||||
)
|
||||
{
|
||||
if (images.size() == 0)
|
||||
return true;
|
||||
|
||||
for (unsigned long i = 0; i < images.size(); ++i)
|
||||
{
|
||||
if (num_rows(images[0]) != num_rows(images[i]) ||
|
||||
num_columns(images[0]) != num_columns(images[i]))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type
|
||||
>
|
||||
double sum_of_rects_in_images (
|
||||
const image_array_type& images,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& rects,
|
||||
const point& position
|
||||
)
|
||||
{
|
||||
DLIB_ASSERT(all_images_same_size(images),
|
||||
"\t double sum_of_rects_in_images()"
|
||||
<< "\n\t Invalid arguments given to this function."
|
||||
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
|
||||
);
|
||||
#ifdef ENABLE_ASSERTS
|
||||
for (unsigned long i = 0; i < rects.size(); ++i)
|
||||
{
|
||||
DLIB_ASSERT(rects[i].first < images.size(),
|
||||
"\t double sum_of_rects_in_images()"
|
||||
<< "\n\t rects["<<i<<"].first must refer to a valid image."
|
||||
<< "\n\t rects["<<i<<"].first: " << rects[i].first
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
typedef typename image_traits<typename image_array_type::type>::pixel_type pixel_type;
|
||||
typedef typename promote<pixel_type>::type ptype;
|
||||
|
||||
ptype temp = 0;
|
||||
|
||||
for (unsigned long i = 0; i < rects.size(); ++i)
|
||||
{
|
||||
const typename image_array_type::type& img = images[rects[i].first];
|
||||
const rectangle rect = get_rect(img).intersect(translate_rect(rects[i].second,position));
|
||||
temp += sum(matrix_cast<ptype>(subm(mat(img), rect)));
|
||||
}
|
||||
|
||||
return static_cast<double>(temp);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type
|
||||
>
|
||||
double sum_of_rects_in_images_movable_parts (
|
||||
const image_array_type& images,
|
||||
const rectangle& window,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
|
||||
const point& position
|
||||
)
|
||||
{
|
||||
DLIB_ASSERT(all_images_same_size(images) && center(window) == point(0,0),
|
||||
"\t double sum_of_rects_in_images_movable_parts()"
|
||||
<< "\n\t Invalid arguments given to this function."
|
||||
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
|
||||
<< "\n\t center(window): " << center(window)
|
||||
);
|
||||
#ifdef ENABLE_ASSERTS
|
||||
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
|
||||
{
|
||||
DLIB_ASSERT(fixed_rects[i].first < images.size(),
|
||||
"\t double sum_of_rects_in_images_movable_parts()"
|
||||
<< "\n\t fixed_rects["<<i<<"].first must refer to a valid image."
|
||||
<< "\n\t fixed_rects["<<i<<"].first: " << fixed_rects[i].first
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
}
|
||||
for (unsigned long i = 0; i < movable_rects.size(); ++i)
|
||||
{
|
||||
DLIB_ASSERT(movable_rects[i].first < images.size(),
|
||||
"\t double sum_of_rects_in_images_movable_parts()"
|
||||
<< "\n\t movable_rects["<<i<<"].first must refer to a valid image."
|
||||
<< "\n\t movable_rects["<<i<<"].first: " << movable_rects[i].first
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
DLIB_ASSERT(center(movable_rects[i].second) == point(0,0),
|
||||
"\t double sum_of_rects_in_images_movable_parts()"
|
||||
<< "\n\t movable_rects["<<i<<"].second: " << movable_rects[i].second
|
||||
);
|
||||
}
|
||||
#endif
|
||||
typedef typename image_traits<typename image_array_type::type>::pixel_type pixel_type;
|
||||
typedef typename promote<pixel_type>::type ptype;
|
||||
|
||||
ptype temp = 0;
|
||||
|
||||
// compute TOTAL_FIXED part
|
||||
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
|
||||
{
|
||||
const typename image_array_type::type& img = images[fixed_rects[i].first];
|
||||
const rectangle rect = get_rect(img).intersect(translate_rect(fixed_rects[i].second,position));
|
||||
temp += sum(matrix_cast<ptype>(subm(mat(img), rect)));
|
||||
}
|
||||
|
||||
if (images.size() > 0)
|
||||
{
|
||||
// compute TOTAL_MOVABLE part
|
||||
array2d<ptype> tempimg(images[0].nr(), images[0].nc());
|
||||
for (unsigned long i = 0; i < movable_rects.size(); ++i)
|
||||
{
|
||||
const typename image_array_type::type& img = images[movable_rects[i].first];
|
||||
|
||||
sum_filter_assign(img, tempimg, movable_rects[i].second);
|
||||
|
||||
const rectangle rect = get_rect(tempimg).intersect(translate_rect(window,position));
|
||||
if (rect.is_empty() == false)
|
||||
temp += std::max(0,max(matrix_cast<ptype>(subm(mat(tempimg), rect))));
|
||||
}
|
||||
}
|
||||
|
||||
return static_cast<double>(temp);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void find_points_above_thresh (
|
||||
std::vector<std::pair<double, point> >& dets,
|
||||
const image_type& img_,
|
||||
const double thresh,
|
||||
const unsigned long max_dets
|
||||
)
|
||||
{
|
||||
const_image_view<image_type> img(img_);
|
||||
typedef typename image_traits<image_type>::pixel_type ptype;
|
||||
|
||||
dets.clear();
|
||||
if (max_dets == 0)
|
||||
return;
|
||||
|
||||
unsigned long count = 0;
|
||||
dlib::rand rnd;
|
||||
for (long r = 0; r < img.nr(); ++r)
|
||||
{
|
||||
for (long c = 0; c < img.nc(); ++c)
|
||||
{
|
||||
const ptype val = img[r][c];
|
||||
if (val >= thresh)
|
||||
{
|
||||
++count;
|
||||
|
||||
if (dets.size() < max_dets)
|
||||
{
|
||||
dets.push_back(std::make_pair(val, point(c,r)));
|
||||
}
|
||||
else
|
||||
{
|
||||
// The idea here is to cause us to randomly sample possible detection
|
||||
// locations throughout the image rather than just stopping the detection
|
||||
// procedure once we hit the max_dets limit. So this method will result
|
||||
// in a random subsample of all the detections >= thresh being in dets
|
||||
// at the end of scan_image().
|
||||
const unsigned long random_index = rnd.get_random_32bit_number()%count;
|
||||
if (random_index < dets.size())
|
||||
{
|
||||
dets[random_index] = std::make_pair(val, point(c,r));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type
|
||||
>
|
||||
void scan_image (
|
||||
std::vector<std::pair<double, point> >& dets,
|
||||
const image_array_type& images,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& rects,
|
||||
const double thresh,
|
||||
const unsigned long max_dets
|
||||
)
|
||||
{
|
||||
DLIB_ASSERT(images.size() > 0 && rects.size() > 0 && all_images_same_size(images),
|
||||
"\t void scan_image()"
|
||||
<< "\n\t Invalid arguments given to this function."
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
<< "\n\t rects.size(): " << rects.size()
|
||||
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
|
||||
);
|
||||
#ifdef ENABLE_ASSERTS
|
||||
for (unsigned long i = 0; i < rects.size(); ++i)
|
||||
{
|
||||
DLIB_ASSERT(rects[i].first < images.size(),
|
||||
"\t void scan_image()"
|
||||
<< "\n\t rects["<<i<<"].first must refer to a valid image."
|
||||
<< "\n\t rects["<<i<<"].first: " << rects[i].first
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
typedef typename image_traits<typename image_array_type::type>::pixel_type pixel_type;
|
||||
typedef typename promote<pixel_type>::type ptype;
|
||||
|
||||
array2d<ptype> accum(images[0].nr(), images[0].nc());
|
||||
assign_all_pixels(accum, 0);
|
||||
|
||||
for (unsigned long i = 0; i < rects.size(); ++i)
|
||||
sum_filter(images[rects[i].first], accum, rects[i].second);
|
||||
|
||||
find_points_above_thresh(dets, accum, thresh, max_dets);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type
|
||||
>
|
||||
void scan_image_movable_parts (
|
||||
std::vector<std::pair<double, point> >& dets,
|
||||
const image_array_type& images,
|
||||
const rectangle& window,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
|
||||
const double thresh,
|
||||
const unsigned long max_dets
|
||||
)
|
||||
{
|
||||
DLIB_ASSERT(images.size() > 0 && all_images_same_size(images) &&
|
||||
center(window) == point(0,0) && window.area() > 0,
|
||||
"\t void scan_image_movable_parts()"
|
||||
<< "\n\t Invalid arguments given to this function."
|
||||
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
|
||||
<< "\n\t center(window): " << center(window)
|
||||
<< "\n\t window.area(): " << window.area()
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
#ifdef ENABLE_ASSERTS
|
||||
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
|
||||
{
|
||||
DLIB_ASSERT(fixed_rects[i].first < images.size(),
|
||||
"\t void scan_image_movable_parts()"
|
||||
<< "\n\t Invalid arguments given to this function."
|
||||
<< "\n\t fixed_rects["<<i<<"].first must refer to a valid image."
|
||||
<< "\n\t fixed_rects["<<i<<"].first: " << fixed_rects[i].first
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
}
|
||||
for (unsigned long i = 0; i < movable_rects.size(); ++i)
|
||||
{
|
||||
DLIB_ASSERT(movable_rects[i].first < images.size(),
|
||||
"\t void scan_image_movable_parts()"
|
||||
<< "\n\t Invalid arguments given to this function."
|
||||
<< "\n\t movable_rects["<<i<<"].first must refer to a valid image."
|
||||
<< "\n\t movable_rects["<<i<<"].first: " << movable_rects[i].first
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
DLIB_ASSERT(center(movable_rects[i].second) == point(0,0) &&
|
||||
movable_rects[i].second.area() > 0,
|
||||
"\t void scan_image_movable_parts()"
|
||||
<< "\n\t Invalid arguments given to this function."
|
||||
<< "\n\t movable_rects["<<i<<"].second: " << movable_rects[i].second
|
||||
<< "\n\t movable_rects["<<i<<"].second.area(): " << movable_rects[i].second.area()
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (movable_rects.size() == 0 && fixed_rects.size() == 0)
|
||||
return;
|
||||
|
||||
typedef typename image_traits<typename image_array_type::type>::pixel_type pixel_type;
|
||||
typedef typename promote<pixel_type>::type ptype;
|
||||
|
||||
array2d<ptype> accum(images[0].nr(), images[0].nc());
|
||||
assign_all_pixels(accum, 0);
|
||||
|
||||
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
|
||||
sum_filter(images[fixed_rects[i].first], accum, fixed_rects[i].second);
|
||||
|
||||
array2d<ptype> temp(accum.nr(), accum.nc());
|
||||
for (unsigned long i = 0; i < movable_rects.size(); ++i)
|
||||
{
|
||||
const rectangle rect = movable_rects[i].second;
|
||||
sum_filter_assign(images[movable_rects[i].first], temp, rect);
|
||||
max_filter(temp, accum, window.width(), window.height(), 0);
|
||||
}
|
||||
|
||||
find_points_above_thresh(dets, accum, thresh, max_dets);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SCAN_iMAGE_Hh_
|
||||
|
||||
|
||||
227
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_abstract.h
vendored
Normal file
227
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_abstract.h
vendored
Normal file
@@ -0,0 +1,227 @@
|
||||
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_SCAN_iMAGE_ABSTRACT_Hh_
|
||||
#ifdef DLIB_SCAN_iMAGE_ABSTRACT_Hh_
|
||||
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "../algs.h"
|
||||
#include "../image_processing/generic_image.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type
|
||||
>
|
||||
bool all_images_same_size (
|
||||
const image_array_type& images
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_array_type == an implementation of array/array_kernel_abstract.h
|
||||
- image_array_type::type == an image object that implements the interface
|
||||
defined in dlib/image_processing/generic_image.h
|
||||
ensures
|
||||
- if (all elements of images have the same dimensions (i.e.
|
||||
for all i and j: get_rect(images[i]) == get_rect(images[j]))) then
|
||||
- returns true
|
||||
- else
|
||||
- returns false
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type
|
||||
>
|
||||
double sum_of_rects_in_images (
|
||||
const image_array_type& images,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& rects,
|
||||
const point& position
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_array_type == an implementation of array/array_kernel_abstract.h
|
||||
- image_array_type::type == an image object that implements the interface
|
||||
defined in dlib/image_processing/generic_image.h. Moreover, these objects must
|
||||
contain a scalar pixel type (e.g. int rather than rgb_pixel)
|
||||
- all_images_same_size(images) == true
|
||||
- for all valid i: rects[i].first < images.size()
|
||||
(i.e. all the rectangles must reference valid elements of images)
|
||||
ensures
|
||||
- returns the sum of the pixels inside the given rectangles. To be precise,
|
||||
let RECT_SUM[i] = sum of pixels inside the rectangle translate_rect(rects[i].second, position)
|
||||
from the image images[rects[i].first]. Then this function returns the
|
||||
sum of RECT_SUM[i] for all the valid values of i.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type
|
||||
>
|
||||
double sum_of_rects_in_images_movable_parts (
|
||||
const image_array_type& images,
|
||||
const rectangle& window,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
|
||||
const point& position
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_array_type == an implementation of array/array_kernel_abstract.h
|
||||
- image_array_type::type == an image object that implements the interface
|
||||
defined in dlib/image_processing/generic_image.h. Moreover, these objects must
|
||||
contain a scalar pixel type (e.g. int rather than rgb_pixel)
|
||||
- all_images_same_size(images) == true
|
||||
- center(window) == point(0,0)
|
||||
- for all valid i:
|
||||
- fixed_rects[i].first < images.size()
|
||||
(i.e. all the rectangles must reference valid elements of images)
|
||||
- for all valid i:
|
||||
- movable_rects[i].first < images.size()
|
||||
(i.e. all the rectangles must reference valid elements of images)
|
||||
- center(movable_rects[i].second) == point(0,0)
|
||||
ensures
|
||||
- returns the sum of the pixels inside fixed_rects as well as the sum of the pixels
|
||||
inside movable_rects when these latter rectangles are placed at their highest
|
||||
scoring locations inside the given window. To be precise:
|
||||
- let RECT_SUM(r,x) = sum of pixels inside the rectangle translate_rect(r.second, x)
|
||||
from the image images[r.first].
|
||||
- let WIN_MAX(i) = The maximum value of RECT_SUM(movable_rects[i],X) when maximizing
|
||||
over all the X such that translate_rect(window,position).contains(X) == true.
|
||||
|
||||
- let TOTAL_FIXED == sum over all elements R in fixed_rects of: RECT_SUM(R,position)
|
||||
- let TOTAL_MOVABLE == sum over all valid i of: max(WIN_MAX(i), 0)
|
||||
|
||||
Then this function returns TOTAL_FIXED + TOTAL_MOVABLE.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void find_points_above_thresh (
|
||||
std::vector<std::pair<double, point> >& dets,
|
||||
const image_type& img,
|
||||
const double thresh,
|
||||
const unsigned long max_dets
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_type == an image object that implements the interface defined in
|
||||
dlib/image_processing/generic_image.h. Moreover, these it must contain a
|
||||
scalar pixel type (e.g. int rather than rgb_pixel)
|
||||
ensures
|
||||
- #dets == a list of points from img which had pixel values >= thresh.
|
||||
- Specifically, we have:
|
||||
- #dets.size() <= max_dets
|
||||
(note that dets is cleared before new detections are added by find_points_above_thresh())
|
||||
- for all valid i:
|
||||
- #dets[i].first == img[#dets[i].second.y()][#dets[i].second.x()]
|
||||
(i.e. the first field contains the value of the pixel at this detection location)
|
||||
- #dets[i].first >= thresh
|
||||
- if (there are more than max_dets locations that pass the above threshold test) then
|
||||
- #dets == a random subsample of all the locations which passed the threshold
|
||||
test.
|
||||
- else
|
||||
- #dets == all the points which passed the threshold test.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type
|
||||
>
|
||||
void scan_image (
|
||||
std::vector<std::pair<double, point> >& dets,
|
||||
const image_array_type& images,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& rects,
|
||||
const double thresh,
|
||||
const unsigned long max_dets
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_array_type == an implementation of array/array_kernel_abstract.h
|
||||
- image_array_type::type == an image object that implements the interface
|
||||
defined in dlib/image_processing/generic_image.h. Moreover, these objects must
|
||||
contain a scalar pixel type (e.g. int rather than rgb_pixel)
|
||||
- images.size() > 0
|
||||
- rects.size() > 0
|
||||
- all_images_same_size(images) == true
|
||||
- for all valid i: rects[i].first < images.size()
|
||||
(i.e. all the rectangles must reference valid elements of images)
|
||||
ensures
|
||||
- slides the set of rectangles over the image space and reports the locations
|
||||
which give a sum bigger than thresh.
|
||||
- Specifically, we have:
|
||||
- #dets.size() <= max_dets
|
||||
(note that dets is cleared before new detections are added by scan_image())
|
||||
- for all valid i:
|
||||
- #dets[i].first == sum_of_rects_in_images(images,rects,#dets[i].second) >= thresh
|
||||
- if (there are more than max_dets locations that pass the threshold test) then
|
||||
- #dets == a random subsample of all the locations which passed the threshold
|
||||
test.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type
|
||||
>
|
||||
void scan_image_movable_parts (
|
||||
std::vector<std::pair<double, point> >& dets,
|
||||
const image_array_type& images,
|
||||
const rectangle& window,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
|
||||
const double thresh,
|
||||
const unsigned long max_dets
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_array_type == an implementation of array/array_kernel_abstract.h
|
||||
- image_array_type::type == an image object that implements the interface
|
||||
defined in dlib/image_processing/generic_image.h. Moreover, these objects must
|
||||
contain a scalar pixel type (e.g. int rather than rgb_pixel)
|
||||
- images.size() > 0
|
||||
- all_images_same_size(images) == true
|
||||
- center(window) == point(0,0)
|
||||
- window.area() > 0
|
||||
- for all valid i:
|
||||
- fixed_rects[i].first < images.size()
|
||||
(i.e. all the rectangles must reference valid elements of images)
|
||||
- for all valid i:
|
||||
- movable_rects[i].first < images.size()
|
||||
(i.e. all the rectangles must reference valid elements of images)
|
||||
- center(movable_rects[i].second) == point(0,0)
|
||||
- movable_rects[i].second.area() > 0
|
||||
ensures
|
||||
- Scans the given window over the images and reports the locations with a score bigger
|
||||
than thresh.
|
||||
- Specifically, we have:
|
||||
- #dets.size() <= max_dets
|
||||
(note that dets is cleared before new detections are added by scan_image_movable_parts())
|
||||
- for all valid i:
|
||||
- #dets[i].first == sum_of_rects_in_images_movable_parts(images,
|
||||
window,
|
||||
fixed_rects,
|
||||
movable_rects,
|
||||
#dets[i].second) >= thresh
|
||||
- if (there are more than max_dets locations that pass the above threshold test) then
|
||||
- #dets == a random subsample of all the locations which passed the threshold
|
||||
test.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SCAN_iMAGE_ABSTRACT_Hh_
|
||||
|
||||
|
||||
|
||||
631
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_boxes.h
vendored
Normal file
631
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_boxes.h
vendored
Normal file
@@ -0,0 +1,631 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_SCAN_IMAGE_bOXES_Hh_
|
||||
#define DLIB_SCAN_IMAGE_bOXES_Hh_
|
||||
|
||||
#include "scan_image_boxes_abstract.h"
|
||||
#include "../matrix.h"
|
||||
#include "../geometry.h"
|
||||
#include "../image_processing.h"
|
||||
#include "../array2d.h"
|
||||
#include <vector>
|
||||
#include "../image_processing/full_object_detection.h"
|
||||
#include "../image_transforms.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class default_box_generator
|
||||
{
|
||||
public:
|
||||
template <typename image_type>
|
||||
void operator() (
|
||||
const image_type& img,
|
||||
std::vector<rectangle>& rects
|
||||
) const
|
||||
{
|
||||
rects.clear();
|
||||
find_candidate_object_locations(img, rects);
|
||||
}
|
||||
};
|
||||
|
||||
inline void serialize(const default_box_generator&, std::ostream& ) {}
|
||||
inline void deserialize(default_box_generator&, std::istream& ) {}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator = default_box_generator
|
||||
>
|
||||
class scan_image_boxes : noncopyable
|
||||
{
|
||||
|
||||
public:
|
||||
|
||||
typedef matrix<double,0,1> feature_vector_type;
|
||||
|
||||
typedef Feature_extractor_type feature_extractor_type;
|
||||
typedef Box_generator box_generator;
|
||||
|
||||
scan_image_boxes (
|
||||
);
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void load (
|
||||
const image_type& img
|
||||
);
|
||||
|
||||
inline bool is_loaded_with_image (
|
||||
) const;
|
||||
|
||||
inline void copy_configuration(
|
||||
const feature_extractor_type& fe
|
||||
);
|
||||
|
||||
inline void copy_configuration(
|
||||
const box_generator& bg
|
||||
);
|
||||
|
||||
const box_generator& get_box_generator (
|
||||
) const { return detect_boxes; }
|
||||
|
||||
const Feature_extractor_type& get_feature_extractor (
|
||||
) const { return feats; }
|
||||
|
||||
inline void copy_configuration (
|
||||
const scan_image_boxes& item
|
||||
);
|
||||
|
||||
inline long get_num_dimensions (
|
||||
) const;
|
||||
|
||||
unsigned long get_num_spatial_pyramid_levels (
|
||||
) const;
|
||||
|
||||
void set_num_spatial_pyramid_levels (
|
||||
unsigned long levels
|
||||
);
|
||||
|
||||
void detect (
|
||||
const feature_vector_type& w,
|
||||
std::vector<std::pair<double, rectangle> >& dets,
|
||||
const double thresh
|
||||
) const;
|
||||
|
||||
void get_feature_vector (
|
||||
const full_object_detection& obj,
|
||||
feature_vector_type& psi
|
||||
) const;
|
||||
|
||||
full_object_detection get_full_object_detection (
|
||||
const rectangle& rect,
|
||||
const feature_vector_type& w
|
||||
) const;
|
||||
|
||||
const rectangle get_best_matching_rect (
|
||||
const rectangle& rect
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- is_loaded_with_image() == true
|
||||
!*/
|
||||
|
||||
inline unsigned long get_num_detection_templates (
|
||||
) const { return 1; }
|
||||
|
||||
inline unsigned long get_num_movable_components_per_detection_template (
|
||||
) const { return 0; }
|
||||
|
||||
template <typename T, typename U>
|
||||
friend void serialize (
|
||||
const scan_image_boxes<T,U>& item,
|
||||
std::ostream& out
|
||||
);
|
||||
|
||||
template <typename T, typename U>
|
||||
friend void deserialize (
|
||||
scan_image_boxes<T,U>& item,
|
||||
std::istream& in
|
||||
);
|
||||
|
||||
private:
|
||||
static bool compare_pair_rect (
|
||||
const std::pair<double, rectangle>& a,
|
||||
const std::pair<double, rectangle>& b
|
||||
)
|
||||
{
|
||||
return a.first < b.first;
|
||||
}
|
||||
|
||||
void test_coordinate_transforms()
|
||||
{
|
||||
for (long x = -10; x <= 10; x += 10)
|
||||
{
|
||||
for (long y = -10; y <= 10; y += 10)
|
||||
{
|
||||
const rectangle rect = centered_rect(x,y,5,6);
|
||||
rectangle a;
|
||||
|
||||
a = feats.image_to_feat_space(rect);
|
||||
if (a.width() > 10000000 || a.height() > 10000000 )
|
||||
{
|
||||
DLIB_CASSERT(false, "The image_to_feat_space() routine is outputting rectangles of an implausibly "
|
||||
<< "\nlarge size. This means there is probably a bug in your feature extractor.");
|
||||
}
|
||||
a = feats.feat_to_image_space(rect);
|
||||
if (a.width() > 10000000 || a.height() > 10000000 )
|
||||
{
|
||||
DLIB_CASSERT(false, "The feat_to_image_space() routine is outputting rectangles of an implausibly "
|
||||
<< "\nlarge size. This means there is probably a bug in your feature extractor.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void add_grid_rects (
|
||||
std::vector<rectangle>& rects,
|
||||
const rectangle& object_box,
|
||||
unsigned int cells_x,
|
||||
unsigned int cells_y
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(cells_x > 0 && cells_y > 0,
|
||||
"\t void add_grid_rects()"
|
||||
<< "\n\t The number of cells along a dimension can't be zero. "
|
||||
<< "\n\t cells_x: " << cells_x
|
||||
<< "\n\t cells_y: " << cells_y
|
||||
);
|
||||
|
||||
const matrix_range_exp<double>& x = linspace(object_box.left(), object_box.right(), cells_x+1);
|
||||
const matrix_range_exp<double>& y = linspace(object_box.top(), object_box.bottom(), cells_y+1);
|
||||
|
||||
for (long j = 0; j+1 < y.size(); ++j)
|
||||
{
|
||||
for (long i = 0; i+1 < x.size(); ++i)
|
||||
{
|
||||
const dlib::vector<double,2> tl(x(i),y(j));
|
||||
const dlib::vector<double,2> br(x(i+1),y(j+1));
|
||||
rects.push_back(rectangle(tl,br));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void get_feature_extraction_regions (
|
||||
const rectangle& rect,
|
||||
std::vector<rectangle>& regions
|
||||
) const
|
||||
/*!
|
||||
ensures
|
||||
- #regions.size() is always the same number no matter what the input is. The
|
||||
regions also have a consistent ordering.
|
||||
- all the output rectangles are contained within rect.
|
||||
!*/
|
||||
{
|
||||
regions.clear();
|
||||
|
||||
for (unsigned int l = 1; l <= num_spatial_pyramid_levels; ++l)
|
||||
{
|
||||
const int cells = (int)std::pow(2.0, l-1.0);
|
||||
add_grid_rects(regions, rect, cells, cells);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int get_num_components_per_detection_template(
|
||||
) const
|
||||
{
|
||||
return (unsigned int)(std::pow(4.0,(double)num_spatial_pyramid_levels)-1)/3;
|
||||
}
|
||||
|
||||
feature_extractor_type feats;
|
||||
std::vector<rectangle> search_rects;
|
||||
bool loaded_with_image;
|
||||
unsigned int num_spatial_pyramid_levels;
|
||||
box_generator detect_boxes;
|
||||
|
||||
const long box_sizedims;
|
||||
const long box_maxsize;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T, typename U>
|
||||
void serialize (
|
||||
const scan_image_boxes<T,U>& item,
|
||||
std::ostream& out
|
||||
)
|
||||
{
|
||||
int version = 1;
|
||||
serialize(version, out);
|
||||
serialize(item.feats, out);
|
||||
serialize(item.search_rects, out);
|
||||
serialize(item.loaded_with_image, out);
|
||||
serialize(item.num_spatial_pyramid_levels, out);
|
||||
serialize(item.detect_boxes, out);
|
||||
serialize(item.get_num_dimensions(), out);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T, typename U>
|
||||
void deserialize (
|
||||
scan_image_boxes<T,U>& item,
|
||||
std::istream& in
|
||||
)
|
||||
{
|
||||
int version = 0;
|
||||
deserialize(version, in);
|
||||
if (version != 1)
|
||||
throw serialization_error("Unsupported version found when deserializing a scan_image_boxes object.");
|
||||
|
||||
deserialize(item.feats, in);
|
||||
deserialize(item.search_rects, in);
|
||||
deserialize(item.loaded_with_image, in);
|
||||
deserialize(item.num_spatial_pyramid_levels, in);
|
||||
deserialize(item.detect_boxes, in);
|
||||
|
||||
// When developing some feature extractor, it's easy to accidentally change its
|
||||
// number of dimensions and then try to deserialize data from an older version of
|
||||
// your extractor into the current code. This check is here to catch that kind of
|
||||
// user error.
|
||||
long dims;
|
||||
deserialize(dims, in);
|
||||
if (item.get_num_dimensions() != dims)
|
||||
throw serialization_error("Number of dimensions in serialized scan_image_boxes doesn't match the expected number.");
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// scan_image_boxes member functions
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
scan_image_boxes<Feature_extractor_type,Box_generator>::
|
||||
scan_image_boxes (
|
||||
) :
|
||||
loaded_with_image(false),
|
||||
num_spatial_pyramid_levels(3),
|
||||
box_sizedims(20),
|
||||
box_maxsize(1200)
|
||||
{
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void scan_image_boxes<Feature_extractor_type,Box_generator>::
|
||||
load (
|
||||
const image_type& img
|
||||
)
|
||||
{
|
||||
feats.load(img);
|
||||
detect_boxes(img, search_rects);
|
||||
loaded_with_image = true;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
bool scan_image_boxes<Feature_extractor_type,Box_generator>::
|
||||
is_loaded_with_image (
|
||||
) const
|
||||
{
|
||||
return loaded_with_image;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
void scan_image_boxes<Feature_extractor_type,Box_generator>::
|
||||
copy_configuration(
|
||||
const feature_extractor_type& fe
|
||||
)
|
||||
{
|
||||
test_coordinate_transforms();
|
||||
feats.copy_configuration(fe);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
void scan_image_boxes<Feature_extractor_type,Box_generator>::
|
||||
copy_configuration(
|
||||
const box_generator& bg
|
||||
)
|
||||
{
|
||||
detect_boxes = bg;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
void scan_image_boxes<Feature_extractor_type,Box_generator>::
|
||||
copy_configuration (
|
||||
const scan_image_boxes& item
|
||||
)
|
||||
{
|
||||
feats.copy_configuration(item.feats);
|
||||
detect_boxes = item.detect_boxes;
|
||||
num_spatial_pyramid_levels = item.num_spatial_pyramid_levels;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
unsigned long scan_image_boxes<Feature_extractor_type,Box_generator>::
|
||||
get_num_spatial_pyramid_levels (
|
||||
) const
|
||||
{
|
||||
return num_spatial_pyramid_levels;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
void scan_image_boxes<Feature_extractor_type,Box_generator>::
|
||||
set_num_spatial_pyramid_levels (
|
||||
unsigned long levels
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(levels > 0,
|
||||
"\t void scan_image_boxes::set_num_spatial_pyramid_levels()"
|
||||
<< "\n\t Invalid inputs were given to this function "
|
||||
<< "\n\t levels: " << levels
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
|
||||
num_spatial_pyramid_levels = levels;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
long scan_image_boxes<Feature_extractor_type,Box_generator>::
|
||||
get_num_dimensions (
|
||||
) const
|
||||
{
|
||||
return feats.get_num_dimensions()*get_num_components_per_detection_template() + box_sizedims*2;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
void scan_image_boxes<Feature_extractor_type,Box_generator>::
|
||||
detect (
|
||||
const feature_vector_type& w,
|
||||
std::vector<std::pair<double, rectangle> >& dets,
|
||||
const double thresh
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(is_loaded_with_image() &&
|
||||
w.size() >= get_num_dimensions(),
|
||||
"\t void scan_image_boxes::detect()"
|
||||
<< "\n\t Invalid inputs were given to this function "
|
||||
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
|
||||
<< "\n\t w.size(): " << w.size()
|
||||
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
dets.clear();
|
||||
|
||||
array<integral_image_generic<double> > saliency_images(get_num_components_per_detection_template());
|
||||
|
||||
array2d<double> temp_img(feats.nr(), feats.nc());
|
||||
|
||||
// build saliency images
|
||||
for (unsigned long i = 0; i < saliency_images.size(); ++i)
|
||||
{
|
||||
const unsigned long offset = 2*box_sizedims + feats.get_num_dimensions()*i;
|
||||
|
||||
// make the basic saliency image for the i-th feature extraction region
|
||||
for (long r = 0; r < feats.nr(); ++r)
|
||||
{
|
||||
for (long c = 0; c < feats.nc(); ++c)
|
||||
{
|
||||
const typename feature_extractor_type::descriptor_type& descriptor = feats(r,c);
|
||||
|
||||
double sum = 0;
|
||||
for (unsigned long k = 0; k < descriptor.size(); ++k)
|
||||
{
|
||||
sum += w(descriptor[k].first + offset)*descriptor[k].second;
|
||||
}
|
||||
temp_img[r][c] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
// now convert base saliency image into final integral image
|
||||
saliency_images[i].load(temp_img);
|
||||
}
|
||||
|
||||
|
||||
// now search the saliency images
|
||||
std::vector<rectangle> regions;
|
||||
const rectangle bounds = get_rect(feats);
|
||||
for (unsigned long i = 0; i < search_rects.size(); ++i)
|
||||
{
|
||||
const rectangle rect = feats.image_to_feat_space(search_rects[i]).intersect(bounds);
|
||||
if (rect.is_empty())
|
||||
continue;
|
||||
get_feature_extraction_regions(rect, regions);
|
||||
double score = 0;
|
||||
for (unsigned long k = 0; k < regions.size(); ++k)
|
||||
{
|
||||
score += saliency_images[k].get_sum_of_area(regions[k]);
|
||||
}
|
||||
const double width = search_rects[i].width();
|
||||
const double height = search_rects[i].height();
|
||||
|
||||
score += dot(linpiece(width, linspace(0, box_maxsize, box_sizedims+1)), rowm(w, range(0,box_sizedims-1)));
|
||||
score += dot(linpiece(height, linspace(0, box_maxsize, box_sizedims+1)), rowm(w, range(box_sizedims,2*box_sizedims-1)));
|
||||
|
||||
if (score >= thresh)
|
||||
{
|
||||
dets.push_back(std::make_pair(score, search_rects[i]));
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
const rectangle scan_image_boxes<Feature_extractor_type,Box_generator>::
|
||||
get_best_matching_rect (
|
||||
const rectangle& rect
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(is_loaded_with_image(),
|
||||
"\t const rectangle scan_image_boxes::get_best_matching_rect()"
|
||||
<< "\n\t Invalid inputs were given to this function "
|
||||
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
|
||||
double best_score = -1;
|
||||
rectangle best_rect;
|
||||
for (unsigned long i = 0; i < search_rects.size(); ++i)
|
||||
{
|
||||
const double score = (rect.intersect(search_rects[i])).area()/(double)(rect+search_rects[i]).area();
|
||||
if (score > best_score)
|
||||
{
|
||||
best_score = score;
|
||||
best_rect = search_rects[i];
|
||||
}
|
||||
}
|
||||
return best_rect;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
full_object_detection scan_image_boxes<Feature_extractor_type,Box_generator>::
|
||||
get_full_object_detection (
|
||||
const rectangle& rect,
|
||||
const feature_vector_type& /*w*/
|
||||
) const
|
||||
{
|
||||
return full_object_detection(rect);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
void scan_image_boxes<Feature_extractor_type,Box_generator>::
|
||||
get_feature_vector (
|
||||
const full_object_detection& obj,
|
||||
feature_vector_type& psi
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(is_loaded_with_image() &&
|
||||
psi.size() >= get_num_dimensions() &&
|
||||
obj.num_parts() == 0,
|
||||
"\t void scan_image_boxes::get_feature_vector()"
|
||||
<< "\n\t Invalid inputs were given to this function "
|
||||
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
|
||||
<< "\n\t psi.size(): " << psi.size()
|
||||
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
|
||||
<< "\n\t obj.num_parts(): " << obj.num_parts()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
|
||||
|
||||
const rectangle best_rect = get_best_matching_rect(obj.get_rect());
|
||||
const rectangle mapped_rect = feats.image_to_feat_space(best_rect).intersect(get_rect(feats));
|
||||
if (mapped_rect.is_empty())
|
||||
return;
|
||||
|
||||
std::vector<rectangle> regions;
|
||||
get_feature_extraction_regions(mapped_rect, regions);
|
||||
|
||||
// pull features out of all the boxes in regions.
|
||||
for (unsigned long j = 0; j < regions.size(); ++j)
|
||||
{
|
||||
const rectangle rect = regions[j];
|
||||
|
||||
const unsigned long template_region_id = j;
|
||||
const unsigned long offset = box_sizedims*2 + feats.get_num_dimensions()*template_region_id;
|
||||
for (long r = rect.top(); r <= rect.bottom(); ++r)
|
||||
{
|
||||
for (long c = rect.left(); c <= rect.right(); ++c)
|
||||
{
|
||||
const typename feature_extractor_type::descriptor_type& descriptor = feats(r,c);
|
||||
for (unsigned long k = 0; k < descriptor.size(); ++k)
|
||||
{
|
||||
psi(descriptor[k].first + offset) += descriptor[k].second;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const double width = best_rect.width();
|
||||
const double height = best_rect.height();
|
||||
set_rowm(psi, range(0,box_sizedims-1)) += linpiece(width, linspace(0, box_maxsize, box_sizedims+1));
|
||||
set_rowm(psi, range(box_sizedims,box_sizedims*2-1)) += linpiece(height, linspace(0, box_maxsize, box_sizedims+1));
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SCAN_IMAGE_bOXES_Hh_
|
||||
|
||||
|
||||
|
||||
394
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_boxes_abstract.h
vendored
Normal file
394
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_boxes_abstract.h
vendored
Normal file
@@ -0,0 +1,394 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_SCAN_IMAGE_bOXES_ABSTRACT_Hh_
|
||||
#ifdef DLIB_SCAN_IMAGE_bOXES_ABSTRACT_Hh_
|
||||
|
||||
#include "../matrix.h"
|
||||
#include "../geometry.h"
|
||||
#include "../image_processing.h"
|
||||
#include "../array2d.h"
|
||||
#include "full_object_detection_abstract.h"
|
||||
#include "../image_transforms/segment_image_abstract.h"
|
||||
#include <vector>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class default_box_generator
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This is a function object that takes in an image and outputs a set of
|
||||
candidate object locations. It is also the default box generator used by
|
||||
the scan_image_boxes object defined below.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
template <typename image_type>
|
||||
void operator() (
|
||||
const image_type& img,
|
||||
std::vector<rectangle>& rects
|
||||
) const
|
||||
/*!
|
||||
ensures
|
||||
- #rects == the set of candidate object locations which should be searched
|
||||
inside img. That is, these are the rectangles which might contain
|
||||
objects of interest within the given image.
|
||||
!*/
|
||||
{
|
||||
rects.clear();
|
||||
find_candidate_object_locations(img, rects);
|
||||
}
|
||||
};
|
||||
|
||||
inline void serialize (const default_box_generator&, std::ostream& ) {}
|
||||
inline void deserialize( default_box_generator&, std::istream& ) {}
|
||||
/*!
|
||||
ensures
|
||||
- provides serialization support.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator = default_box_generator
|
||||
>
|
||||
class scan_image_boxes : noncopyable
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON Feature_extractor_type
|
||||
- must be an object with an interface compatible with the hashed_feature_image
|
||||
object defined in dlib/image_keypoint/hashed_feature_image_abstract.h or
|
||||
with the nearest_neighbor_feature_image object defined in
|
||||
dlib/image_keypoint/nearest_neighbor_feature_image_abstract.h
|
||||
|
||||
REQUIREMENTS ON Box_generator
|
||||
- must be an object with an interface compatible with the
|
||||
default_box_generator object defined at the top of this file.
|
||||
|
||||
INITIAL VALUE
|
||||
- get_num_spatial_pyramid_levels() == 3
|
||||
- is_loaded_with_image() == false
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool for running a classifier over an image with the goal
|
||||
of localizing each object present. The localization is in the form of the
|
||||
bounding box around each object of interest.
|
||||
|
||||
Unlike the scan_image_pyramid object which scans a fixed sized window over
|
||||
an image pyramid, the scan_image_boxes tool allows you to define your own
|
||||
list of "candidate object locations" which should be evaluated. This is
|
||||
simply a list of rectangle objects which might contain objects of interest.
|
||||
The scan_image_boxes object will then evaluate the classifier at each of
|
||||
these locations and return the subset of rectangles which appear to have
|
||||
objects in them. The candidate object location generation is provided by
|
||||
the Box_generator that is passed in as a template argument.
|
||||
|
||||
This object can also be understood as a general tool for implementing the
|
||||
spatial pyramid models described in the paper:
|
||||
Beyond Bags of Features: Spatial Pyramid Matching for Recognizing
|
||||
Natural Scene Categories by Svetlana Lazebnik, Cordelia Schmid,
|
||||
and Jean Ponce
|
||||
|
||||
|
||||
The classifiers used by this object have three parts:
|
||||
1. The underlying feature extraction provided by Feature_extractor_type
|
||||
objects, which associate a vector with each location in an image.
|
||||
|
||||
2. A rule for extracting a feature vector from a candidate object
|
||||
location. In this object we use the spatial pyramid matching method.
|
||||
This means we cut an object's detection window into a set of "feature
|
||||
extraction regions" and extract a bag-of-words vector from each
|
||||
before finally concatenating them to form the final feature vector
|
||||
representing the entire object window. The set of feature extraction
|
||||
regions can be configured by the user by calling
|
||||
set_num_spatial_pyramid_levels(). To be a little more precise, the
|
||||
feature vector for a candidate object window is defined as follows:
|
||||
- Let N denote the number of feature extraction zones.
|
||||
- Let M denote the dimensionality of the vectors output by
|
||||
Feature_extractor_type objects.
|
||||
- Let F(i) == the M dimensional vector which is the sum of all
|
||||
vectors given by our Feature_extractor_type object inside the
|
||||
i-th feature extraction zone. So this is notionally a
|
||||
bag-of-words vector from the i-th zone.
|
||||
- Then the feature vector for an object window is an M*N
|
||||
dimensional vector [F(1) F(2) F(3) ... F(N)] (i.e. it is a
|
||||
concatenation of the N vectors). This feature vector can be
|
||||
thought of as a collection of N bags-of-words, each bag coming
|
||||
from a spatial location determined by one of the feature
|
||||
extraction zones.
|
||||
|
||||
3. A weight vector and a threshold value. The dot product between the
|
||||
weight vector and the feature vector for a candidate object location
|
||||
gives the score of the location. If this score is greater than the
|
||||
threshold value then the candidate object location is output as a
|
||||
detection.
|
||||
|
||||
THREAD SAFETY
|
||||
Concurrent access to an instance of this object is not safe and should be
|
||||
protected by a mutex lock except for the case where you are copying the
|
||||
configuration (via copy_configuration()) of a scan_image_boxes object to
|
||||
many other threads. In this case, it is safe to copy the configuration of
|
||||
a shared object so long as no other operations are performed on it.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
typedef matrix<double,0,1> feature_vector_type;
|
||||
|
||||
typedef Feature_extractor_type feature_extractor_type;
|
||||
typedef Box_generator box_generator;
|
||||
|
||||
scan_image_boxes (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- this object is properly initialized
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void load (
|
||||
const image_type& img
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_type must be a type with the following properties:
|
||||
- image_type objects can be loaded into Feature_extractor_type
|
||||
objects via Feature_extractor_type::load().
|
||||
- image_type objects can be passed to the first argument of
|
||||
Box_generator::operator()
|
||||
ensures
|
||||
- #is_loaded_with_image() == true
|
||||
- This object is ready to run a classifier over img to detect object
|
||||
locations. Call detect() to do this.
|
||||
!*/
|
||||
|
||||
bool is_loaded_with_image (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns true if this object has been loaded with an image to process and
|
||||
false otherwise.
|
||||
!*/
|
||||
|
||||
const feature_extractor_type& get_feature_extractor (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns a const reference to the feature_extractor_type object used
|
||||
internally for local feature extraction.
|
||||
!*/
|
||||
|
||||
void copy_configuration(
|
||||
const feature_extractor_type& fe
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- This function performs the equivalent of
|
||||
get_feature_extractor().copy_configuration(fe) (i.e. this function allows
|
||||
you to configure the parameters of the underlying feature extractor used
|
||||
by a scan_image_boxes object)
|
||||
!*/
|
||||
|
||||
void copy_configuration(
|
||||
const box_generator& bg
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_box_generator() == bg
|
||||
(i.e. this function allows you to configure the parameters of the
|
||||
underlying box generator used by a scan_image_boxes object)
|
||||
!*/
|
||||
|
||||
const box_generator& get_box_generator (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the box_generator used by this object to generate candidate
|
||||
object locations.
|
||||
!*/
|
||||
|
||||
void copy_configuration (
|
||||
const scan_image_boxes& item
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- Copies all the state information of item into *this, except for state
|
||||
information populated by load(). More precisely, given two scan_image_boxes
|
||||
objects S1 and S2, the following sequence of instructions should always
|
||||
result in both of them having the exact same state:
|
||||
S2.copy_configuration(S1);
|
||||
S1.load(img);
|
||||
S2.load(img);
|
||||
!*/
|
||||
|
||||
long get_num_dimensions (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of dimensions in the feature vector for a candidate
|
||||
object location. This value is the dimensionality of the underlying
|
||||
feature vectors produced by Feature_extractor_type times the number of
|
||||
feature extraction regions used. Note that the number of feature
|
||||
extraction regions used is a function of
|
||||
get_num_spatial_pyramid_levels().
|
||||
!*/
|
||||
|
||||
unsigned long get_num_spatial_pyramid_levels (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of layers in the spatial pyramid. For example, if
|
||||
this function returns 1 then it means we use a simple bag-of-words
|
||||
representation over the whole object window. If it returns 2 then it
|
||||
means the feature representation is the concatenation of 5 bag-of-words
|
||||
vectors, one from the entire object window and 4 others from 4 different
|
||||
parts of the object window. If it returns 3 then there are 1+4+16
|
||||
bag-of-words vectors concatenated together in the feature representation,
|
||||
and so on.
|
||||
!*/
|
||||
|
||||
void set_num_spatial_pyramid_levels (
|
||||
unsigned long levels
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- levels > 0
|
||||
ensures
|
||||
- #get_num_spatial_pyramid_levels() == levels
|
||||
!*/
|
||||
|
||||
void detect (
|
||||
const feature_vector_type& w,
|
||||
std::vector<std::pair<double, rectangle> >& dets,
|
||||
const double thresh
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- w.size() >= get_num_dimensions()
|
||||
- is_loaded_with_image() == true
|
||||
ensures
|
||||
- Scans over all the candidate object locations as discussed in the WHAT
|
||||
THIS OBJECT REPRESENTS section and stores all detections into #dets.
|
||||
- for all valid i:
|
||||
- #dets[i].second == The candidate object location which produced this
|
||||
detection. This rectangle gives the location of the detection.
|
||||
- #dets[i].first == The score for this detection. This value is equal
|
||||
to dot(w, feature vector for this candidate object location).
|
||||
- #dets[i].first >= thresh
|
||||
- #dets will be sorted in descending order.
|
||||
(i.e. #dets[i].first >= #dets[j].first for all i, and j>i)
|
||||
- Elements of w beyond index get_num_dimensions()-1 are ignored. I.e. only
|
||||
the first get_num_dimensions() are used.
|
||||
- Note that no form of non-max suppression is performed. If a locations
|
||||
has a score >= thresh then it is reported in #dets.
|
||||
!*/
|
||||
|
||||
void get_feature_vector (
|
||||
const full_object_detection& obj,
|
||||
feature_vector_type& psi
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- obj.num_parts() == 0
|
||||
- is_loaded_with_image() == true
|
||||
- psi.size() >= get_num_dimensions()
|
||||
(i.e. psi must have preallocated its memory before this function is called)
|
||||
ensures
|
||||
- This function allows you to determine the feature vector used for a
|
||||
candidate object location output from detect(). Note that this vector is
|
||||
added to psi. Note also that you must use get_full_object_detection() to
|
||||
convert a rectangle from detect() into the needed full_object_detection.
|
||||
- The dimensionality of the vector added to psi is get_num_dimensions(). This
|
||||
means that elements of psi after psi(get_num_dimensions()-1) are not modified.
|
||||
- Since scan_image_boxes only searches a limited set of object locations,
|
||||
not all possible rectangles can be output by detect(). So in the case
|
||||
where obj.get_rect() could not arise from a call to detect(), this
|
||||
function will map obj.get_rect() to the nearest possible rectangle and
|
||||
then add the feature vector for the mapped rectangle into #psi.
|
||||
- get_best_matching_rect(obj.get_rect()) == the rectangle obj.get_rect()
|
||||
gets mapped to for feature extraction.
|
||||
!*/
|
||||
|
||||
full_object_detection get_full_object_detection (
|
||||
const rectangle& rect,
|
||||
const feature_vector_type& w
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns full_object_detection(rect)
|
||||
(This function is here only for compatibility with the scan_image_pyramid
|
||||
object)
|
||||
!*/
|
||||
|
||||
const rectangle get_best_matching_rect (
|
||||
const rectangle& rect
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- is_loaded_with_image() == true
|
||||
ensures
|
||||
- Since scan_image_boxes only searches a limited set of object locations,
|
||||
not all possible rectangles can be represented. Therefore, this function
|
||||
allows you to supply a rectangle and obtain the nearest possible
|
||||
candidate object location rectangle.
|
||||
!*/
|
||||
|
||||
unsigned long get_num_detection_templates (
|
||||
) const { return 1; }
|
||||
/*!
|
||||
ensures
|
||||
- returns 1. Note that this function is here only for compatibility with
|
||||
the scan_image_pyramid object. Notionally, its return value indicates
|
||||
that a scan_image_boxes object is always ready to detect objects once
|
||||
an image has been loaded.
|
||||
!*/
|
||||
|
||||
unsigned long get_num_movable_components_per_detection_template (
|
||||
) const { return 0; }
|
||||
/*!
|
||||
ensures
|
||||
- returns 0. Note that this function is here only for compatibility with
|
||||
the scan_image_pyramid object. Its return value means that this object
|
||||
does not support using movable part models.
|
||||
!*/
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
void serialize (
|
||||
const scan_image_boxes<Feature_extractor_type,Box_generator>& item,
|
||||
std::ostream& out
|
||||
);
|
||||
/*!
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
void deserialize (
|
||||
scan_image_boxes<Feature_extractor_type,Box_generator>& item,
|
||||
std::istream& in
|
||||
);
|
||||
/*!
|
||||
provides deserialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SCAN_IMAGE_bOXES_ABSTRACT_Hh_
|
||||
|
||||
401
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_custom.h
vendored
Normal file
401
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_custom.h
vendored
Normal file
@@ -0,0 +1,401 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_SCAN_IMAGE_CuSTOM_Hh_
|
||||
#define DLIB_SCAN_IMAGE_CuSTOM_Hh_
|
||||
|
||||
#include "scan_image_custom_abstract.h"
|
||||
#include "../matrix.h"
|
||||
#include "../geometry.h"
|
||||
#include <vector>
|
||||
#include "../image_processing/full_object_detection.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
class scan_image_custom : noncopyable
|
||||
{
|
||||
|
||||
public:
|
||||
|
||||
typedef matrix<double,0,1> feature_vector_type;
|
||||
typedef Feature_extractor_type feature_extractor_type;
|
||||
|
||||
scan_image_custom (
|
||||
);
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void load (
|
||||
const image_type& img
|
||||
);
|
||||
|
||||
inline bool is_loaded_with_image (
|
||||
) const;
|
||||
|
||||
inline void copy_configuration(
|
||||
const feature_extractor_type& fe
|
||||
);
|
||||
|
||||
const Feature_extractor_type& get_feature_extractor (
|
||||
) const { return feats; }
|
||||
|
||||
inline void copy_configuration (
|
||||
const scan_image_custom& item
|
||||
);
|
||||
|
||||
inline long get_num_dimensions (
|
||||
) const;
|
||||
|
||||
void detect (
|
||||
const feature_vector_type& w,
|
||||
std::vector<std::pair<double, rectangle> >& dets,
|
||||
const double thresh
|
||||
) const;
|
||||
|
||||
void get_feature_vector (
|
||||
const full_object_detection& obj,
|
||||
feature_vector_type& psi
|
||||
) const;
|
||||
|
||||
full_object_detection get_full_object_detection (
|
||||
const rectangle& rect,
|
||||
const feature_vector_type& w
|
||||
) const;
|
||||
|
||||
const rectangle get_best_matching_rect (
|
||||
const rectangle& rect
|
||||
) const;
|
||||
|
||||
inline unsigned long get_num_detection_templates (
|
||||
) const { return 1; }
|
||||
|
||||
inline unsigned long get_num_movable_components_per_detection_template (
|
||||
) const { return 0; }
|
||||
|
||||
template <typename T>
|
||||
friend void serialize (
|
||||
const scan_image_custom<T>& item,
|
||||
std::ostream& out
|
||||
);
|
||||
|
||||
template <typename T>
|
||||
friend void deserialize (
|
||||
scan_image_custom<T>& item,
|
||||
std::istream& in
|
||||
);
|
||||
|
||||
private:
|
||||
static bool compare_pair_rect (
|
||||
const std::pair<double, rectangle>& a,
|
||||
const std::pair<double, rectangle>& b
|
||||
)
|
||||
{
|
||||
return a.first < b.first;
|
||||
}
|
||||
|
||||
|
||||
DLIB_MAKE_HAS_MEMBER_FUNCTION_TEST(
|
||||
has_compute_object_score,
|
||||
double,
|
||||
compute_object_score,
|
||||
( const matrix<double,0,1>& w, const rectangle& obj) const
|
||||
);
|
||||
|
||||
template <typename fe_type>
|
||||
typename enable_if<has_compute_object_score<fe_type> >::type compute_all_rect_scores (
|
||||
const fe_type& feats,
|
||||
const feature_vector_type& w,
|
||||
std::vector<std::pair<double, rectangle> >& dets,
|
||||
const double thresh
|
||||
) const
|
||||
{
|
||||
for (unsigned long i = 0; i < search_rects.size(); ++i)
|
||||
{
|
||||
const double score = feats.compute_object_score(w, search_rects[i]);
|
||||
if (score >= thresh)
|
||||
{
|
||||
dets.push_back(std::make_pair(score, search_rects[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename fe_type>
|
||||
typename disable_if<has_compute_object_score<fe_type> >::type compute_all_rect_scores (
|
||||
const fe_type& feats,
|
||||
const feature_vector_type& w,
|
||||
std::vector<std::pair<double, rectangle> >& dets,
|
||||
const double thresh
|
||||
) const
|
||||
{
|
||||
matrix<double,0,1> psi(w.size());
|
||||
psi = 0;
|
||||
double prev_dot = 0;
|
||||
for (unsigned long i = 0; i < search_rects.size(); ++i)
|
||||
{
|
||||
// Reset these back to zero every so often to avoid the accumulation of
|
||||
// rounding error. Note that the only reason we do this loop in this
|
||||
// complex way is to avoid needing to zero the psi vector every iteration.
|
||||
if ((i%500) == 499)
|
||||
{
|
||||
psi = 0;
|
||||
prev_dot = 0;
|
||||
}
|
||||
|
||||
feats.get_feature_vector(search_rects[i], psi);
|
||||
const double cur_dot = dot(psi, w);
|
||||
const double score = cur_dot - prev_dot;
|
||||
if (score >= thresh)
|
||||
{
|
||||
dets.push_back(std::make_pair(score, search_rects[i]));
|
||||
}
|
||||
prev_dot = cur_dot;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
feature_extractor_type feats;
|
||||
std::vector<rectangle> search_rects;
|
||||
bool loaded_with_image;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
void serialize (
|
||||
const scan_image_custom<T>& item,
|
||||
std::ostream& out
|
||||
)
|
||||
{
|
||||
int version = 1;
|
||||
serialize(version, out);
|
||||
serialize(item.feats, out);
|
||||
serialize(item.search_rects, out);
|
||||
serialize(item.loaded_with_image, out);
|
||||
serialize(item.get_num_dimensions(), out);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
void deserialize (
|
||||
scan_image_custom<T>& item,
|
||||
std::istream& in
|
||||
)
|
||||
{
|
||||
int version = 0;
|
||||
deserialize(version, in);
|
||||
if (version != 1)
|
||||
throw serialization_error("Unsupported version found when deserializing a scan_image_custom object.");
|
||||
|
||||
deserialize(item.feats, in);
|
||||
deserialize(item.search_rects, in);
|
||||
deserialize(item.loaded_with_image, in);
|
||||
|
||||
// When developing some feature extractor, it's easy to accidentally change its
|
||||
// number of dimensions and then try to deserialize data from an older version of
|
||||
// your extractor into the current code. This check is here to catch that kind of
|
||||
// user error.
|
||||
long dims;
|
||||
deserialize(dims, in);
|
||||
if (item.get_num_dimensions() != dims)
|
||||
throw serialization_error("Number of dimensions in serialized scan_image_custom doesn't match the expected number.");
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// scan_image_custom member functions
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
scan_image_custom<Feature_extractor_type>::
|
||||
scan_image_custom (
|
||||
) :
|
||||
loaded_with_image(false)
|
||||
{
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void scan_image_custom<Feature_extractor_type>::
|
||||
load (
|
||||
const image_type& img
|
||||
)
|
||||
{
|
||||
feats.load(img, search_rects);
|
||||
loaded_with_image = true;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
bool scan_image_custom<Feature_extractor_type>::
|
||||
is_loaded_with_image (
|
||||
) const
|
||||
{
|
||||
return loaded_with_image;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
void scan_image_custom<Feature_extractor_type>::
|
||||
copy_configuration(
|
||||
const feature_extractor_type& fe
|
||||
)
|
||||
{
|
||||
feats.copy_configuration(fe);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
void scan_image_custom<Feature_extractor_type>::
|
||||
copy_configuration (
|
||||
const scan_image_custom& item
|
||||
)
|
||||
{
|
||||
feats.copy_configuration(item.feats);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
long scan_image_custom<Feature_extractor_type>::
|
||||
get_num_dimensions (
|
||||
) const
|
||||
{
|
||||
return feats.get_num_dimensions();
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
void scan_image_custom<Feature_extractor_type>::
|
||||
detect (
|
||||
const feature_vector_type& w,
|
||||
std::vector<std::pair<double, rectangle> >& dets,
|
||||
const double thresh
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(is_loaded_with_image() &&
|
||||
w.size() >= get_num_dimensions(),
|
||||
"\t void scan_image_custom::detect()"
|
||||
<< "\n\t Invalid inputs were given to this function "
|
||||
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
|
||||
<< "\n\t w.size(): " << w.size()
|
||||
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
dets.clear();
|
||||
compute_all_rect_scores(feats, w,dets,thresh);
|
||||
std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
const rectangle scan_image_custom<Feature_extractor_type>::
|
||||
get_best_matching_rect (
|
||||
const rectangle& rect
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(is_loaded_with_image(),
|
||||
"\t const rectangle scan_image_custom::get_best_matching_rect()"
|
||||
<< "\n\t Invalid inputs were given to this function "
|
||||
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
|
||||
double best_score = -1;
|
||||
rectangle best_rect;
|
||||
for (unsigned long i = 0; i < search_rects.size(); ++i)
|
||||
{
|
||||
const double score = (rect.intersect(search_rects[i])).area()/(double)(rect+search_rects[i]).area();
|
||||
if (score > best_score)
|
||||
{
|
||||
best_score = score;
|
||||
best_rect = search_rects[i];
|
||||
}
|
||||
}
|
||||
return best_rect;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
full_object_detection scan_image_custom<Feature_extractor_type>::
|
||||
get_full_object_detection (
|
||||
const rectangle& rect,
|
||||
const feature_vector_type& /*w*/
|
||||
) const
|
||||
{
|
||||
return full_object_detection(rect);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
void scan_image_custom<Feature_extractor_type>::
|
||||
get_feature_vector (
|
||||
const full_object_detection& obj,
|
||||
feature_vector_type& psi
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(is_loaded_with_image() &&
|
||||
psi.size() >= get_num_dimensions() &&
|
||||
obj.num_parts() == 0,
|
||||
"\t void scan_image_custom::get_feature_vector()"
|
||||
<< "\n\t Invalid inputs were given to this function "
|
||||
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
|
||||
<< "\n\t psi.size(): " << psi.size()
|
||||
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
|
||||
<< "\n\t obj.num_parts(): " << obj.num_parts()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
|
||||
feats.get_feature_vector(get_best_matching_rect(obj.get_rect()), psi);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SCAN_IMAGE_CuSTOM_Hh_
|
||||
|
||||
390
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_custom_abstract.h
vendored
Normal file
390
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_custom_abstract.h
vendored
Normal file
@@ -0,0 +1,390 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_SCAN_IMAGE_CuSTOM_ABSTRACT_Hh_
|
||||
#ifdef DLIB_SCAN_IMAGE_CuSTOM_ABSTRACT_Hh_
|
||||
|
||||
#include <vector>
|
||||
#include "../matrix.h"
|
||||
#include "../geometry.h"
|
||||
#include "../image_processing/full_object_detection_abstract.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class example_feature_extractor
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object defines the interface a feature extractor must implement if it
|
||||
is to be used with the scan_image_custom object defined at the bottom of
|
||||
this file.
|
||||
|
||||
In this case, the purpose of a feature extractor is to associated a
|
||||
complete feature vector with each rectangle in an image. In particular,
|
||||
each rectangle is scored by taking the dot product between this feature
|
||||
vector and a weight vector. If this score is greater than a threshold then
|
||||
the rectangle is output as a detection.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void load (
|
||||
const image_type& image,
|
||||
std::vector<rectangle>& candidate_objects
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- Loads the given image into this feature extractor. This means that
|
||||
subsequent calls to get_feature_vector() will return the feature vector
|
||||
corresponding to locations in the image given to load().
|
||||
- #candidate_objects == a set of bounding boxes in the given image that
|
||||
might contain objects of interest. These are the locations that will be
|
||||
checked for the presents of objects when this feature extractor is used
|
||||
with the scan_image_custom object.
|
||||
|
||||
!*/
|
||||
|
||||
void copy_configuration (
|
||||
const feature_extractor& item
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- Copies all the state information of item into *this, except for state
|
||||
information populated by load(). More precisely, given two
|
||||
feature extractor objects S1 and S2, the following sequence of
|
||||
instructions should always result in both of them having the exact same
|
||||
state:
|
||||
S2.copy_configuration(S1);
|
||||
S1.load(img, temp);
|
||||
S2.load(img, temp);
|
||||
!*/
|
||||
|
||||
unsigned long get_num_dimensions (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the dimensionality of the feature vectors output by this object.
|
||||
!*/
|
||||
|
||||
void get_feature_vector (
|
||||
const rectangle& obj,
|
||||
matrix<double,0,1>& psi
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- psi.size() >= get_num_dimensions()
|
||||
(i.e. psi must have preallocated its memory before this function is called)
|
||||
ensures
|
||||
- This function computes the feature vector associated with the given rectangle
|
||||
in obj. This rectangle is interpreted as a bounding box within the last image
|
||||
given to this->load() and a feature vector describing that bounding box is
|
||||
output into psi.
|
||||
- The feature vector is added into psi. That is, it does not overwrite the
|
||||
previous contents of psi, but instead, it adds the vector to psi.
|
||||
- The dimensionality of the vector added to psi is get_num_dimensions(). This
|
||||
means that elements of psi after psi(get_num_dimensions()-1) are not modified.
|
||||
- #psi.size() == psi.size()
|
||||
(i.e. this function does not change the size of the psi vector)
|
||||
!*/
|
||||
|
||||
double compute_object_score (
|
||||
const matrix<double,0,1>& w,
|
||||
const rectangle& obj
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- w.size() >= get_num_dimensions()
|
||||
ensures
|
||||
- This function returns the dot product between the feature vector for
|
||||
object box obj and the given w vector. That is, this function computes
|
||||
the same number as the following code snippet:
|
||||
matrix<double,0,1> psi(w.size());
|
||||
psi = 0;
|
||||
get_feature_vector(obj, psi);
|
||||
return dot(psi, w);
|
||||
The point of the compute_object_score() routine is to compute this dot
|
||||
product in a much more efficient way than directly calling
|
||||
get_feature_vector() and dot(). Therefore, compute_object_score() is an
|
||||
optional function. If you can't think of a faster way to compute these
|
||||
scores then do not implement compute_object_score() and the
|
||||
scan_image_custom object will simply compute these scores for you.
|
||||
However, it is often the case that there is something clever you can do
|
||||
to make this computation faster. If that is the case, then you can
|
||||
provide an implementation of this function with your feature extractor
|
||||
and then scan_image_custom will use it instead of using the default
|
||||
calculation method shown in the above code snippet.
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void serialize(
|
||||
const feature_extractor& item,
|
||||
std::ostream& out
|
||||
);
|
||||
/*!
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
void deserialize(
|
||||
feature_extractor& item,
|
||||
std::istream& in
|
||||
);
|
||||
/*!
|
||||
provides deserialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
class scan_image_custom : noncopyable
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON Feature_extractor_type
|
||||
- must be an object with an interface compatible with the
|
||||
example_feature_extractor defined at the top of this file.
|
||||
|
||||
INITIAL VALUE
|
||||
- is_loaded_with_image() == false
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool for running a classifier over an image with the goal
|
||||
of localizing each object present. The localization is in the form of the
|
||||
bounding box around each object of interest.
|
||||
|
||||
Unlike the scan_image_pyramid and scan_image_boxes objects, this image
|
||||
scanner delegates all the work of constructing the object feature vector to
|
||||
its Feature_extractor_type template argument. That is, scan_image_custom
|
||||
simply asks the supplied feature extractor what boxes in the image we
|
||||
should investigate and then asks the feature extractor for the complete
|
||||
feature vector for each box. That is, scan_image_custom does not apply any
|
||||
kind of pyramiding or other higher level processing to the features coming
|
||||
out of the feature extractor. That means that when you use
|
||||
scan_image_custom it is completely up to you to define the feature vector
|
||||
used with each image box.
|
||||
|
||||
THREAD SAFETY
|
||||
Concurrent access to an instance of this object is not safe and should be
|
||||
protected by a mutex lock except for the case where you are copying the
|
||||
configuration (via copy_configuration()) of a scan_image_custom object to
|
||||
many other threads. In this case, it is safe to copy the configuration of
|
||||
a shared object so long as no other operations are performed on it.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
typedef matrix<double,0,1> feature_vector_type;
|
||||
typedef Feature_extractor_type feature_extractor_type;
|
||||
|
||||
scan_image_custom (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- this object is properly initialized
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void load (
|
||||
const image_type& img
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_type must be a type with the following properties:
|
||||
- image_type objects can be loaded into Feature_extractor_type
|
||||
objects via Feature_extractor_type::load().
|
||||
ensures
|
||||
- #is_loaded_with_image() == true
|
||||
- Calls get_feature_extractor().load() on the given image. That is, we
|
||||
will have loaded the image into the feature extractor in this
|
||||
scan_image_custom object. We will also have stored the candidate
|
||||
object locations generated by the feature extractor and will scan
|
||||
over them when this->detect() is called.
|
||||
- This object is ready to run a classifier over img to detect object
|
||||
locations. Call detect() to do this.
|
||||
!*/
|
||||
|
||||
bool is_loaded_with_image (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns true if this object has been loaded with an image to process and
|
||||
false otherwise.
|
||||
!*/
|
||||
|
||||
const feature_extractor_type& get_feature_extractor (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns a const reference to the feature_extractor_type object used
|
||||
internally for local feature extraction.
|
||||
!*/
|
||||
|
||||
void copy_configuration(
|
||||
const feature_extractor_type& fe
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- This function performs the equivalent of
|
||||
get_feature_extractor().copy_configuration(fe) (i.e. this function allows
|
||||
you to configure the parameters of the underlying feature extractor used
|
||||
by a scan_image_custom object)
|
||||
!*/
|
||||
|
||||
void copy_configuration (
|
||||
const scan_image_custom& item
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- Copies all the state information of item into *this, except for state
|
||||
information populated by load(). More precisely, given two
|
||||
scan_image_custom objects S1 and S2, the following sequence of
|
||||
instructions should always result in both of them having the exact same
|
||||
state:
|
||||
S2.copy_configuration(S1);
|
||||
S1.load(img);
|
||||
S2.load(img);
|
||||
!*/
|
||||
|
||||
long get_num_dimensions (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of dimensions in the feature vector for a candidate
|
||||
object location. That is, this function returns get_feature_extractor().get_num_dimensions().
|
||||
!*/
|
||||
|
||||
void detect (
|
||||
const feature_vector_type& w,
|
||||
std::vector<std::pair<double, rectangle> >& dets,
|
||||
const double thresh
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- w.size() >= get_num_dimensions()
|
||||
- is_loaded_with_image() == true
|
||||
ensures
|
||||
- Scans over all the candidate object locations produced by the feature
|
||||
extractor during image loading and stores all detections into #dets.
|
||||
- for all valid i:
|
||||
- #dets[i].second == The candidate object location which produced this
|
||||
detection. This rectangle gives the location of the detection.
|
||||
- #dets[i].first == The score for this detection. This value is equal
|
||||
to dot(w, feature vector for this candidate object location).
|
||||
- #dets[i].first >= thresh
|
||||
- #dets will be sorted in descending order.
|
||||
(i.e. #dets[i].first >= #dets[j].first for all i, and j>i)
|
||||
- Elements of w beyond index get_num_dimensions()-1 are ignored. I.e. only
|
||||
the first get_num_dimensions() are used.
|
||||
- Note that no form of non-max suppression is performed. If a locations
|
||||
has a score >= thresh then it is reported in #dets.
|
||||
!*/
|
||||
|
||||
void get_feature_vector (
|
||||
const full_object_detection& obj,
|
||||
feature_vector_type& psi
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- obj.num_parts() == 0
|
||||
- is_loaded_with_image() == true
|
||||
- psi.size() >= get_num_dimensions()
|
||||
(i.e. psi must have preallocated its memory before this function is called)
|
||||
ensures
|
||||
- This function allows you to determine the feature vector used for a
|
||||
candidate object location output from detect(). Note that this vector is
|
||||
added to psi. Note also that you must use get_full_object_detection() to
|
||||
convert a rectangle from detect() into the needed full_object_detection.
|
||||
- The dimensionality of the vector added to psi is get_num_dimensions(). This
|
||||
means that elements of psi after psi(get_num_dimensions()-1) are not modified.
|
||||
- Since scan_image_custom only searches a limited set of object locations,
|
||||
not all possible rectangles can be output by detect(). So in the case
|
||||
where obj.get_rect() could not arise from a call to detect(), this
|
||||
function will map obj.get_rect() to the nearest possible rectangle and
|
||||
then add the feature vector for the mapped rectangle into #psi.
|
||||
- get_best_matching_rect(obj.get_rect()) == the rectangle obj.get_rect()
|
||||
gets mapped to for feature extraction.
|
||||
!*/
|
||||
|
||||
full_object_detection get_full_object_detection (
|
||||
const rectangle& rect,
|
||||
const feature_vector_type& w
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns full_object_detection(rect)
|
||||
(This function is here only for compatibility with the scan_image_pyramid
|
||||
object)
|
||||
!*/
|
||||
|
||||
const rectangle get_best_matching_rect (
|
||||
const rectangle& rect
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- is_loaded_with_image() == true
|
||||
ensures
|
||||
- Since scan_image_custom only searches a limited set of object locations,
|
||||
not all possible rectangles can be represented. Therefore, this function
|
||||
allows you to supply a rectangle and obtain the nearest possible
|
||||
candidate object location rectangle.
|
||||
!*/
|
||||
|
||||
unsigned long get_num_detection_templates (
|
||||
) const { return 1; }
|
||||
/*!
|
||||
ensures
|
||||
- returns 1. Note that this function is here only for compatibility with
|
||||
the scan_image_pyramid object. Notionally, its return value indicates
|
||||
that a scan_image_custom object is always ready to detect objects once an
|
||||
image has been loaded.
|
||||
!*/
|
||||
|
||||
unsigned long get_num_movable_components_per_detection_template (
|
||||
) const { return 0; }
|
||||
/*!
|
||||
ensures
|
||||
- returns 0. Note that this function is here only for compatibility with
|
||||
the scan_image_pyramid object. Its return value means that this object
|
||||
does not support using movable part models.
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
void serialize (
|
||||
const scan_image_custom<T>& item,
|
||||
std::ostream& out
|
||||
);
|
||||
/*!
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
template <typename T>
|
||||
void deserialize (
|
||||
scan_image_custom<T>& item,
|
||||
std::istream& in
|
||||
);
|
||||
/*!
|
||||
provides deserialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SCAN_IMAGE_CuSTOM_ABSTRACT_Hh_
|
||||
|
||||
1101
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_pyramid.h
vendored
Normal file
1101
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_pyramid.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
495
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_pyramid_abstract.h
vendored
Normal file
495
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_pyramid_abstract.h
vendored
Normal file
@@ -0,0 +1,495 @@
|
||||
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_SCAN_IMaGE_PYRAMID_ABSTRACT_Hh_
|
||||
#ifdef DLIB_SCAN_IMaGE_PYRAMID_ABSTRACT_Hh_
|
||||
|
||||
#include "../matrix.h"
|
||||
#include "../geometry.h"
|
||||
#include "../image_processing.h"
|
||||
#include "../array2d.h"
|
||||
#include <vector>
|
||||
#include "full_object_detection_abstract.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Pyramid_type,
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
class scan_image_pyramid : noncopyable
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON Pyramid_type
|
||||
- must be one of the pyramid_down objects defined in
|
||||
dlib/image_transforms/image_pyramid_abstract.h or an object with
|
||||
a compatible interface
|
||||
|
||||
REQUIREMENTS ON Feature_extractor_type
|
||||
- must be an object with an interface compatible with the hashed_feature_image
|
||||
object defined in dlib/image_keypoint/hashed_feature_image_abstract.h or
|
||||
with the nearest_neighbor_feature_image object defined in
|
||||
dlib/image_keypoint/nearest_neighbor_feature_image_abstract.h
|
||||
|
||||
INITIAL VALUE
|
||||
- get_num_detection_templates() == 0
|
||||
- is_loaded_with_image() == false
|
||||
- get_max_detections_per_template() == 10000
|
||||
- get_max_pyramid_levels() == 1000
|
||||
- get_min_pyramid_layer_width() == 20
|
||||
- get_min_pyramid_layer_height() == 20
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool for running a sliding window classifier over
|
||||
an image pyramid. This object can also be understood as a general
|
||||
tool for implementing the spatial pyramid models described in the paper:
|
||||
Beyond Bags of Features: Spatial Pyramid Matching for Recognizing
|
||||
Natural Scene Categories by Svetlana Lazebnik, Cordelia Schmid,
|
||||
and Jean Ponce
|
||||
It also includes the ability to represent movable part models.
|
||||
|
||||
|
||||
|
||||
|
||||
The sliding window classifiers used by this object have three parts:
|
||||
1. The underlying feature extraction provided by Feature_extractor_type
|
||||
objects, which associate a vector with each location in an image.
|
||||
|
||||
2. A detection template. This is a rectangle which defines the shape of a
|
||||
sliding window (i.e. the object_box), as well as a set of rectangular feature
|
||||
extraction regions inside it. This set of regions defines the spatial
|
||||
structure of the overall feature extraction within a sliding window. In
|
||||
particular, each location of a sliding window has a feature vector
|
||||
associated with it. This feature vector is defined as follows:
|
||||
- Let N denote the number of feature extraction zones.
|
||||
- Let M denote the dimensionality of the vectors output by Feature_extractor_type
|
||||
objects.
|
||||
- Let F(i) == the M dimensional vector which is the sum of all vectors
|
||||
given by our Feature_extractor_type object inside the i-th feature extraction
|
||||
zone.
|
||||
- Then the feature vector for a sliding window is an M*N dimensional vector
|
||||
[F(1) F(2) F(3) ... F(N)] (i.e. it is a concatenation of the N vectors).
|
||||
This feature vector can be thought of as a collection of N "bags of features",
|
||||
each bag coming from a spatial location determined by one of the rectangular
|
||||
feature extraction zones.
|
||||
|
||||
3. A weight vector and a threshold value. The dot product between the weight
|
||||
vector and the feature vector for a sliding window location gives the score
|
||||
of the window. If this score is greater than the threshold value then the
|
||||
window location is output as a detection.
|
||||
|
||||
Finally, the sliding window classifiers described above are applied to every level of
|
||||
an image pyramid. Moreover, some of the feature extraction zones are allowed to move
|
||||
freely within the object box. This means that when we are sliding the classifier over
|
||||
an image, some feature extraction zones are stationary (i.e. always in the same place
|
||||
relative to the object box) while others are allowed to move anywhere within the object
|
||||
box. In particular, the movable regions are placed at the locations that maximize the
|
||||
score of the classifier. Note further that each of the movable feature extraction
|
||||
zones must pass a threshold test for it to be included. That is, if the score that a
|
||||
movable zone would contribute to the overall score for a sliding window location is not
|
||||
positive then that zone is not included in the feature vector (i.e. its part of the
|
||||
feature vector is set to zero. This way the length of the feature vector stays
|
||||
constant). This movable region construction allows us to represent objects with parts
|
||||
that move around relative to the object box. For example, a human has hands but they
|
||||
aren't always in the same place relative to a person's bounding box.
|
||||
|
||||
THREAD SAFETY
|
||||
Concurrent access to an instance of this object is not safe and should be protected
|
||||
by a mutex lock except for the case where you are copying the configuration
|
||||
(via copy_configuration()) of a scan_image_pyramid object to many other threads.
|
||||
In this case, it is safe to copy the configuration of a shared object so long
|
||||
as no other operations are performed on it.
|
||||
!*/
|
||||
public:
|
||||
|
||||
typedef matrix<double,0,1> feature_vector_type;
|
||||
|
||||
typedef Pyramid_type pyramid_type;
|
||||
typedef Feature_extractor_type feature_extractor_type;
|
||||
|
||||
scan_image_pyramid (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- this object is properly initialized
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void load (
|
||||
const image_type& img
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_type must be a type with the following properties:
|
||||
- image_type is default constructable.
|
||||
- image_type is swappable by the global swap() function.
|
||||
- image_type logically represents some kind of image and therefore its
|
||||
number of rows and columns can be queried via num_rows(img) and
|
||||
num_columns(img) respectively.
|
||||
- image_type objects can be loaded into Feature_extractor_type
|
||||
objects via Feature_extractor_type::load().
|
||||
- image_type objects can be used with Pyramid_type. That is,
|
||||
if pyr is an object of type Pyramid_type while img1 and img2
|
||||
are objects of image_type, then pyr(img1,img2) should be
|
||||
a valid expression which downsamples img1 into img2.
|
||||
ensures
|
||||
- #is_loaded_with_image() == true
|
||||
- This object is ready to run sliding window classifiers over img. Call
|
||||
detect() to do this.
|
||||
!*/
|
||||
|
||||
bool is_loaded_with_image (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns true if this object has been loaded with an image to process
|
||||
and false otherwise.
|
||||
!*/
|
||||
|
||||
const feature_extractor_type& get_feature_extractor (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns a const reference to the feature_extractor_type object used
|
||||
internally for local feature extraction.
|
||||
!*/
|
||||
|
||||
void copy_configuration(
|
||||
const feature_extractor_type& fe
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- This function performs the equivalent of
|
||||
get_feature_extractor().copy_configuration(fe) (i.e. this function allows
|
||||
you to configure the parameters of the underlying feature extractor used
|
||||
by a scan_image_pyramid object)
|
||||
!*/
|
||||
|
||||
void copy_configuration (
|
||||
const scan_image_pyramid& item
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- copies all the state information of item into *this, except for state
|
||||
information populated by load(). More precisely, given two scan_image_pyramid
|
||||
objects S1 and S2, the following sequence of instructions should always
|
||||
result in both of them having the exact same state.
|
||||
S2.copy_configuration(S1);
|
||||
S1.load(img);
|
||||
S2.load(img);
|
||||
!*/
|
||||
|
||||
void add_detection_template (
|
||||
const rectangle& object_box,
|
||||
const std::vector<rectangle>& stationary_feature_extraction_regions,
|
||||
const std::vector<rectangle>& movable_feature_extraction_regions
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- center(object_box) == point(0,0)
|
||||
- for all valid i:
|
||||
- center(movable_feature_extraction_regions[i]) == point(0,0)
|
||||
- if (get_num_detection_templates() > 0) then
|
||||
- get_num_stationary_components_per_detection_template() == stationary_feature_extraction_regions.size()
|
||||
- get_num_movable_components_per_detection_template() == movable_feature_extraction_regions.size()
|
||||
(i.e. if you already have detection templates in this object, then
|
||||
any new detection template must declare a consistent number of
|
||||
feature extraction regions)
|
||||
ensures
|
||||
- Adds another detection template to this object. In particular, object_box
|
||||
defines the size and shape of a sliding window while stationary_feature_extraction_regions
|
||||
and movable_feature_extraction_regions defines the locations for feature extraction as
|
||||
discussed in the WHAT THIS OBJECT REPRESENTS section above. Note also that the locations of
|
||||
the stationary feature extraction regions are relative to the object_box.
|
||||
- #get_num_detection_templates() == get_num_detection_templates() + 1
|
||||
- The order of rectangles in stationary_feature_extraction_regions and
|
||||
movable_feature_extraction_regions matters. Recall that each rectangle
|
||||
gets its own set of features. So given two different templates, their
|
||||
i-th rectangles will both share the same part of the weight vector (i.e. the w
|
||||
supplied to detect()). So there should be some reasonable correspondence
|
||||
between the rectangle ordering in different detection templates. For,
|
||||
example, different detection templates should place corresponding feature
|
||||
extraction regions in roughly the same part of the object_box.
|
||||
- #get_num_stationary_components_per_detection_template() = stationary_feature_extraction_regions.size()
|
||||
- #get_num_movable_components_per_detection_template() = movable_feature_extraction_regions.size()
|
||||
!*/
|
||||
|
||||
void add_detection_template (
|
||||
const rectangle& object_box,
|
||||
const std::vector<rectangle>& stationary_feature_extraction_regions
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- calls add_detection_template(object_box, stationary_feature_extraction_regions, empty_list)
|
||||
where empty_list is a vector of size 0. I.e. this function is just a convenience
|
||||
routine for adding detection templates with no movable regions.
|
||||
!*/
|
||||
|
||||
unsigned long get_num_detection_templates (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of detection templates in this object
|
||||
!*/
|
||||
|
||||
unsigned long get_num_stationary_components_per_detection_template (
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- get_num_detection_templates() > 0
|
||||
ensures
|
||||
- A detection template is a rectangle which defines the shape of a sliding
|
||||
window (the object_box), as well as a set of rectangles which define
|
||||
feature extraction zones. This function returns the number of stationary
|
||||
feature extraction zones in the detection templates used by this object.
|
||||
!*/
|
||||
|
||||
unsigned long get_num_movable_components_per_detection_template (
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- get_num_detection_templates() > 0
|
||||
ensures
|
||||
- A detection template is a rectangle which defines the shape of a sliding
|
||||
window (the object_box), as well as a set of rectangles which define
|
||||
feature extraction zones. This function returns the number of movable
|
||||
feature extraction zones in the detection templates used by this object.
|
||||
!*/
|
||||
|
||||
unsigned long get_num_components_per_detection_template (
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- get_num_detection_templates() > 0
|
||||
ensures
|
||||
- returns the total number of feature extraction zones in the detection
|
||||
templates used by this object. That is, returns the following:
|
||||
- get_num_movable_components_per_detection_template() +
|
||||
get_num_stationary_components_per_detection_template()
|
||||
!*/
|
||||
|
||||
long get_num_dimensions (
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- get_num_detection_templates() > 0
|
||||
ensures
|
||||
- returns the number of dimensions in the feature vector for a sliding window
|
||||
location. This value is the dimensionality of the underlying feature vectors
|
||||
produced by Feature_extractor_type times (get_num_stationary_components_per_detection_template() +
|
||||
get_num_movable_components_per_detection_template()).
|
||||
!*/
|
||||
|
||||
unsigned long get_max_pyramid_levels (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the maximum number of image pyramid levels this object will use.
|
||||
Note that #get_max_pyramid_levels() == 1 indicates that no image pyramid
|
||||
will be used at all. That is, only the original image will be processed
|
||||
and no lower scale versions will be created.
|
||||
!*/
|
||||
|
||||
void set_max_pyramid_levels (
|
||||
unsigned long max_levels
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- max_levels > 0
|
||||
ensures
|
||||
- #get_max_pyramid_levels() == max_levels
|
||||
!*/
|
||||
|
||||
void set_min_pyramid_layer_size (
|
||||
unsigned long width,
|
||||
unsigned long height
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- width > 0
|
||||
- height > 0
|
||||
ensures
|
||||
- #get_min_pyramid_layer_width() == width
|
||||
- #get_min_pyramid_layer_height() == height
|
||||
!*/
|
||||
|
||||
inline unsigned long get_min_pyramid_layer_width (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the smallest allowable width of an image in the image pyramid.
|
||||
All pyramids will always include the original input image, however, no
|
||||
pyramid levels will be created which have a width smaller than the
|
||||
value returned by this function.
|
||||
!*/
|
||||
|
||||
inline unsigned long get_min_pyramid_layer_height (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the smallest allowable height of an image in the image pyramid.
|
||||
All pyramids will always include the original input image, however, no
|
||||
pyramid levels will be created which have a height smaller than the
|
||||
value returned by this function.
|
||||
!*/
|
||||
|
||||
unsigned long get_max_detections_per_template (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- For each image pyramid layer and detection template, this object scans a sliding
|
||||
window classifier over an image and produces a number of detections. This
|
||||
function returns a number which defines a hard upper limit on the number of
|
||||
detections allowed by a single scan. This means that the total number of
|
||||
possible detections produced by detect() is get_max_detections_per_template()*
|
||||
get_num_detection_templates()*(number of image pyramid layers). Additionally,
|
||||
if the maximum number of detections is reached during a scan then this object
|
||||
will return a random subsample of all detections which are above the detection
|
||||
threshold.
|
||||
!*/
|
||||
|
||||
void set_max_detections_per_template (
|
||||
unsigned long max_dets
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- max_dets > 0
|
||||
ensures
|
||||
- #get_max_detections_per_template() == max_dets
|
||||
!*/
|
||||
|
||||
void detect (
|
||||
const feature_vector_type& w,
|
||||
std::vector<std::pair<double, rectangle> >& dets,
|
||||
const double thresh
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- w.size() >= get_num_dimensions()
|
||||
- is_loaded_with_image() == true
|
||||
- get_num_detection_templates() > 0
|
||||
ensures
|
||||
- Scans all the detection templates over all pyramid layers as discussed in the
|
||||
WHAT THIS OBJECT REPRESENTS section and stores all detections into #dets.
|
||||
- for all valid i:
|
||||
- #dets[i].second == The object box which produced this detection. This rectangle gives
|
||||
the location of the detection. Note that the rectangle will have been converted back into
|
||||
the original image input space. That is, if this detection was made at a low level in the
|
||||
image pyramid then the object box will have been automatically mapped up the pyramid layers
|
||||
to the original image space. Or in other words, if you plot #dets[i].second on top of the
|
||||
image given to load() it will show up in the right place.
|
||||
- #dets[i].first == The score for this detection. This value is equal to dot(w, feature vector
|
||||
for this sliding window location).
|
||||
- #dets[i].first >= thresh
|
||||
- #dets will be sorted in descending order. (i.e. #dets[i].first >= #dets[j].first for all i, and j>i)
|
||||
- Elements of w beyond index get_num_dimensions()-1 are ignored. I.e. only the first
|
||||
get_num_dimensions() are used.
|
||||
- Note that no form of non-max suppression is performed. If a window has a score >= thresh
|
||||
then it is reported in #dets (assuming the limit imposed by get_max_detections_per_template() hasn't
|
||||
been reached).
|
||||
!*/
|
||||
|
||||
const rectangle get_best_matching_rect (
|
||||
const rectangle& rect
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- get_num_detection_templates() > 0
|
||||
ensures
|
||||
- Since scan_image_pyramid is a sliding window classifier system, not all possible rectangles
|
||||
can be represented. Therefore, this function allows you to supply a rectangle and obtain the
|
||||
nearest possible sliding window rectangle.
|
||||
!*/
|
||||
|
||||
void get_feature_vector (
|
||||
const full_object_detection& obj,
|
||||
feature_vector_type& psi
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- all_parts_in_rect(obj) == true
|
||||
- obj.num_parts() == get_num_movable_components_per_detection_template()
|
||||
- is_loaded_with_image() == true
|
||||
- get_num_detection_templates() > 0
|
||||
- psi.size() >= get_num_dimensions()
|
||||
(i.e. psi must have preallocated its memory before this function is called)
|
||||
ensures
|
||||
- This function allows you to determine the feature vector used for a
|
||||
sliding window location. Note that this vector is added to psi. Note
|
||||
also that you must use get_full_object_detection() to convert a rect from
|
||||
detect() into the needed full_object_detection.
|
||||
- The dimensionality of the vector added to psi is get_num_dimensions(). This
|
||||
means that elements of psi after psi(get_num_dimensions()-1) are not modified.
|
||||
- Since scan_image_pyramid is a sliding window classifier system, not all
|
||||
possible rectangles can be output by detect(). So in the case where
|
||||
obj.get_rect() could not arise from a call to detect(), this function
|
||||
will map obj.get_rect() to the nearest possible object box and then add
|
||||
the feature vector for the mapped rectangle into #psi.
|
||||
- get_best_matching_rect(obj.get_rect()) == the rectangle obj.get_rect()
|
||||
gets mapped to for feature extraction.
|
||||
!*/
|
||||
|
||||
full_object_detection get_full_object_detection (
|
||||
const rectangle& rect,
|
||||
const feature_vector_type& w
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- w.size() >= get_num_dimensions()
|
||||
- is_loaded_with_image() == true
|
||||
- get_num_detection_templates() > 0
|
||||
ensures
|
||||
- This function allows you to determine the full_object_detection
|
||||
corresponding to a sliding window location. Note that the detect()
|
||||
routine doesn't return the locations of the movable parts in a detected
|
||||
object. Therefore, if you are using any movable parts in your model you
|
||||
must use get_full_object_detection() to find out where the movable parts
|
||||
were detected. To do this, you supply the w and detected rectangle.
|
||||
Then the corresponding fully populated full_object_detection will be
|
||||
returned.
|
||||
- returns a full_object_detection, OBJ, such that:
|
||||
- OBJ.get_rect() == rect
|
||||
- OBJ.num_parts() == get_num_movable_components_per_detection_template()
|
||||
- OBJ.part(i) == the location of the i-th movable part inside this detection,
|
||||
or OBJECT_PART_NOT_PRESENT if the part was not found.
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Pyramid_type,
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
void serialize (
|
||||
const scan_image_pyramid<Pyramid_type,Feature_extractor_type>& item,
|
||||
std::ostream& out
|
||||
);
|
||||
/*!
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename Pyramid_type,
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
void deserialize (
|
||||
scan_image_pyramid<Pyramid_type,Feature_extractor_type>& item,
|
||||
std::istream& in
|
||||
);
|
||||
/*!
|
||||
provides deserialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SCAN_IMaGE_PYRAMID_ABSTRACT_Hh_
|
||||
|
||||
|
||||
180
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_pyramid_tools.h
vendored
Normal file
180
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_pyramid_tools.h
vendored
Normal file
@@ -0,0 +1,180 @@
|
||||
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_SCAN_IMaGE_PYRAMID_TOOLS_Hh_
|
||||
#define DLIB_SCAN_IMaGE_PYRAMID_TOOLS_Hh_
|
||||
|
||||
#include "scan_image_pyramid_tools_abstract.h"
|
||||
#include "../statistics.h"
|
||||
#include <list>
|
||||
#include "../geometry.h"
|
||||
#include <iostream>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace impl
|
||||
{
|
||||
inline bool compare_first (
|
||||
const std::pair<unsigned long,rectangle>& a,
|
||||
const std::pair<unsigned long,rectangle>& b
|
||||
)
|
||||
{
|
||||
return a.first < b.first;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename image_scanner_type>
|
||||
std::vector<rectangle> determine_object_boxes (
|
||||
const image_scanner_type& scanner,
|
||||
const std::vector<rectangle>& rects,
|
||||
double min_match_score
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(0 < min_match_score && min_match_score <= 1,
|
||||
"\t std::vector<rectangle> determine_object_boxes()"
|
||||
<< "\n\t Invalid inputs were given to this function. "
|
||||
<< "\n\t min_match_score: " << min_match_score
|
||||
);
|
||||
|
||||
typename image_scanner_type::pyramid_type pyr;
|
||||
|
||||
typedef std::list<std::pair<unsigned long, rectangle> > list_type;
|
||||
|
||||
unsigned long max_area = 0;
|
||||
|
||||
// Copy rects into sorted_rects and sort them in order of increasing area. But
|
||||
// only include the rectangles that aren't already obtainable by the scanner.
|
||||
list_type sorted_rects;
|
||||
for (unsigned long i = 0; i < rects.size(); ++i)
|
||||
{
|
||||
if (scanner.get_num_detection_templates() > 0)
|
||||
{
|
||||
rectangle temp = scanner.get_best_matching_rect(rects[i]);
|
||||
const double match_score = (rects[i].intersect(temp).area())/(double)(rects[i] + temp).area();
|
||||
// skip this rectangle if it's already matched well enough.
|
||||
if (match_score > min_match_score)
|
||||
continue;
|
||||
}
|
||||
max_area = std::max(rects[i].area(), max_area);
|
||||
sorted_rects.push_back(std::make_pair(rects[i].area(), rects[i]));
|
||||
}
|
||||
sorted_rects.sort(dlib::impl::compare_first);
|
||||
|
||||
// Make sure this area value is comfortably larger than all the
|
||||
// rectangles' areas.
|
||||
max_area = 3*max_area + 100;
|
||||
|
||||
std::vector<rectangle> object_boxes;
|
||||
|
||||
while (sorted_rects.size() != 0)
|
||||
{
|
||||
rectangle cur = sorted_rects.front().second;
|
||||
sorted_rects.pop_front();
|
||||
object_boxes.push_back(centered_rect(point(0,0), cur.width(), cur.height()));
|
||||
|
||||
// Scale cur up the image pyramid and remove any rectangles which match.
|
||||
// But also stop when cur gets large enough to not match anything.
|
||||
for (unsigned long itr = 0;
|
||||
itr < scanner.get_max_pyramid_levels() && cur.area() < max_area;
|
||||
++itr)
|
||||
{
|
||||
list_type::iterator i = sorted_rects.begin();
|
||||
while (i != sorted_rects.end())
|
||||
{
|
||||
const rectangle temp = move_rect(i->second, cur.tl_corner());
|
||||
const double match_score = (cur.intersect(temp).area())/(double)(cur + temp).area();
|
||||
if (match_score > min_match_score)
|
||||
{
|
||||
i = sorted_rects.erase(i);
|
||||
}
|
||||
else
|
||||
{
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
cur = pyr.rect_up(cur);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return object_boxes;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename image_scanner_type>
|
||||
std::vector<rectangle> determine_object_boxes (
|
||||
const image_scanner_type& scanner,
|
||||
const std::vector<std::vector<rectangle> >& rects,
|
||||
double min_match_score
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(0 < min_match_score && min_match_score <= 1,
|
||||
"\t std::vector<rectangle> determine_object_boxes()"
|
||||
<< "\n\t Invalid inputs were given to this function. "
|
||||
<< "\n\t min_match_score: " << min_match_score
|
||||
);
|
||||
|
||||
std::vector<rectangle> temp;
|
||||
for (unsigned long i = 0; i < rects.size(); ++i)
|
||||
{
|
||||
for (unsigned long j = 0; j < rects[i].size(); ++j)
|
||||
{
|
||||
temp.push_back(rects[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
return determine_object_boxes(scanner, temp, min_match_score);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename image_scanner_type>
|
||||
void setup_grid_detection_templates (
|
||||
image_scanner_type& scanner,
|
||||
const std::vector<std::vector<rectangle> >& rects,
|
||||
unsigned int cells_x,
|
||||
unsigned int cells_y,
|
||||
double min_match_score = 0.75
|
||||
)
|
||||
{
|
||||
const std::vector<rectangle>& object_boxes = determine_object_boxes(scanner, rects, min_match_score);
|
||||
for (unsigned long i = 0; i < object_boxes.size(); ++i)
|
||||
{
|
||||
scanner.add_detection_template(object_boxes[i], create_grid_detection_template(object_boxes[i], cells_x, cells_y));
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename image_scanner_type>
|
||||
void setup_grid_detection_templates_verbose (
|
||||
image_scanner_type& scanner,
|
||||
const std::vector<std::vector<rectangle> >& rects,
|
||||
unsigned int cells_x,
|
||||
unsigned int cells_y,
|
||||
double min_match_score = 0.75
|
||||
)
|
||||
{
|
||||
const std::vector<rectangle>& object_boxes = determine_object_boxes(scanner, rects, min_match_score);
|
||||
std::cout << "number of detection templates: "<< object_boxes.size() << std::endl;
|
||||
for (unsigned long i = 0; i < object_boxes.size(); ++i)
|
||||
{
|
||||
std::cout << " object box " << i << ": width: " << object_boxes[i].width()
|
||||
<< " height: "<< object_boxes[i].height() << std::endl;
|
||||
scanner.add_detection_template(object_boxes[i], create_grid_detection_template(object_boxes[i], cells_x, cells_y));
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SCAN_IMaGE_PYRAMID_TOOLS_Hh_
|
||||
|
||||
118
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_pyramid_tools_abstract.h
vendored
Normal file
118
lib/3rdParty/dlib/include/dlib/image_processing/scan_image_pyramid_tools_abstract.h
vendored
Normal file
@@ -0,0 +1,118 @@
|
||||
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_SCAN_IMaGE_PYRAMID_TOOLS_ABSTRACT_Hh_
|
||||
#ifdef DLIB_SCAN_IMaGE_PYRAMID_TOOLS_ABSTRACT_Hh_
|
||||
|
||||
#include "scan_image_pyramid_abstract.h"
|
||||
#include <vector>
|
||||
#include "../geometry.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
std::vector<rectangle> determine_object_boxes (
|
||||
const image_scanner_type& scanner,
|
||||
const std::vector<rectangle>& rects,
|
||||
double min_match_score
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- 0 < min_match_score <= 1
|
||||
- image_scanner_type == an implementation of the scan_image_pyramid
|
||||
object defined in dlib/image_processing/scan_image_pyramid_tools_abstract.h
|
||||
ensures
|
||||
- returns a set of object boxes which, when used as detection templates with
|
||||
the given scanner, can attain at least min_match_score alignment with every
|
||||
element of rects. Note that the alignment between two rectangles A and B is
|
||||
defined as:
|
||||
(A.intersect(B).area())/(double)(A+B).area()
|
||||
- Only elements of rects which are not already well matched by the scanner are
|
||||
considered. That is, if the scanner already has some detection templates in
|
||||
it then the contents of rects will be checked against those detection
|
||||
templates and elements with a match better than min_match_score are ignore.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
std::vector<rectangle> determine_object_boxes (
|
||||
const image_scanner_type& scanner,
|
||||
const std::vector<std::vector<rectangle> >& rects,
|
||||
double min_match_score
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- 0 < min_match_score <= 1
|
||||
- image_scanner_type == an implementation of the scan_image_pyramid
|
||||
object defined in dlib/image_processing/scan_image_pyramid_tools_abstract.h
|
||||
ensures
|
||||
- copies all rectangles in rects into a std::vector<rectangle> object, call it
|
||||
R. Then this function returns determine_object_boxes(scanner,R,min_match_score).
|
||||
That is, it just called the version of determine_object_boxes() defined above
|
||||
and returns the results.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
void setup_grid_detection_templates (
|
||||
image_scanner_type& scanner,
|
||||
const std::vector<std::vector<rectangle> >& rects,
|
||||
unsigned int cells_x,
|
||||
unsigned int cells_y,
|
||||
double min_match_score = 0.75
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- cells_x > 0
|
||||
- cells_y > 0
|
||||
- 0 < min_match_score <= 1
|
||||
- image_scanner_type == an implementation of the scan_image_pyramid
|
||||
object defined in dlib/image_processing/scan_image_pyramid_tools_abstract.h
|
||||
ensures
|
||||
- uses determine_object_boxes(scanner,rects,min_match_score) to obtain a set of
|
||||
object boxes and then adds them to the given scanner object as detection templates.
|
||||
Also uses create_grid_detection_template(object_box, cells_x, cells_y) to create
|
||||
each feature extraction region. Therefore, the detection templates will extract
|
||||
features from a regular grid inside each object box.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner_type
|
||||
>
|
||||
void setup_grid_detection_templates_verbose (
|
||||
image_scanner_type& scanner,
|
||||
const std::vector<std::vector<rectangle> >& rects,
|
||||
unsigned int cells_x,
|
||||
unsigned int cells_y,
|
||||
double min_match_score = 0.75
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- cells_x > 0
|
||||
- cells_y > 0
|
||||
- 0 < min_match_score <= 1
|
||||
- image_scanner_type == an implementation of the scan_image_pyramid
|
||||
object defined in dlib/image_processing/scan_image_pyramid_tools_abstract.h
|
||||
ensures
|
||||
- this function is identical to setup_grid_detection_templates() except
|
||||
that it also outputs the selected detection templates to standard out.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SCAN_IMaGE_PYRAMID_TOOLS_ABSTRACT_Hh_
|
||||
|
||||
219
lib/3rdParty/dlib/include/dlib/image_processing/setup_hashed_features.h
vendored
Normal file
219
lib/3rdParty/dlib/include/dlib/image_processing/setup_hashed_features.h
vendored
Normal file
@@ -0,0 +1,219 @@
|
||||
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_SETUP_HAShED_FEATURES_Hh_
|
||||
#define DLIB_SETUP_HAShED_FEATURES_Hh_
|
||||
|
||||
#include "setup_hashed_features_abstract.h"
|
||||
#include "scan_image_pyramid.h"
|
||||
#include "scan_image_boxes.h"
|
||||
#include "../lsh.h"
|
||||
#include "../statistics.h"
|
||||
#include "../image_keypoint.h"
|
||||
#include "../geometry.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class image_hash_construction_failure : public error
|
||||
{
|
||||
public:
|
||||
image_hash_construction_failure(
|
||||
const std::string& a
|
||||
): error(a) {}
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner
|
||||
>
|
||||
void use_uniform_feature_weights (
|
||||
image_scanner& scanner
|
||||
)
|
||||
{
|
||||
typename image_scanner::feature_extractor_type fe;
|
||||
fe.copy_configuration(scanner.get_feature_extractor());
|
||||
fe.use_uniform_feature_weights();
|
||||
scanner.copy_configuration(fe);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner
|
||||
>
|
||||
void use_relative_feature_weights (
|
||||
image_scanner& scanner
|
||||
)
|
||||
{
|
||||
typename image_scanner::feature_extractor_type fe;
|
||||
fe.copy_configuration(scanner.get_feature_extractor());
|
||||
fe.use_relative_feature_weights();
|
||||
scanner.copy_configuration(fe);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// stuff for scan_image_pyramid
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array,
|
||||
typename pyramid,
|
||||
typename feature_extractor,
|
||||
template <typename fe, typename hash> class feature_image
|
||||
>
|
||||
void setup_hashed_features (
|
||||
scan_image_pyramid<pyramid, feature_image<feature_extractor, projection_hash> >& scanner,
|
||||
const image_array& images,
|
||||
const feature_extractor& fe,
|
||||
int bits,
|
||||
unsigned long num_samples = 200000
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(0 < bits && bits <= 32 &&
|
||||
num_samples > 1 &&
|
||||
images.size() > 0,
|
||||
"\t void setup_hashed_features()"
|
||||
<< "\n\t Invalid inputs were given to this function. "
|
||||
<< "\n\t bits: " << bits
|
||||
<< "\n\t num_samples: " << num_samples
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
|
||||
pyramid pyr;
|
||||
|
||||
const random_subset_selector<typename feature_extractor::descriptor_type>& samps =
|
||||
randomly_sample_image_features(images, pyr, fe, num_samples);
|
||||
|
||||
if (samps.size() <= 1)
|
||||
throw dlib::image_hash_construction_failure("Images too small, not able to gather enough samples to make hash");
|
||||
|
||||
projection_hash phash = create_random_projection_hash(samps, bits);
|
||||
|
||||
feature_image<feature_extractor, projection_hash> hfe;
|
||||
hfe.copy_configuration(scanner.get_feature_extractor());
|
||||
hfe.set_hash(phash);
|
||||
hfe.copy_configuration(fe);
|
||||
scanner.copy_configuration(hfe);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array,
|
||||
typename pyramid,
|
||||
typename feature_extractor,
|
||||
template <typename fe, typename hash> class feature_image
|
||||
>
|
||||
void setup_hashed_features (
|
||||
scan_image_pyramid<pyramid, feature_image<feature_extractor, projection_hash> >& scanner,
|
||||
const image_array& images,
|
||||
int bits,
|
||||
unsigned long num_samples = 200000
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(0 < bits && bits <= 32 &&
|
||||
num_samples > 1 &&
|
||||
images.size() > 0,
|
||||
"\t void setup_hashed_features()"
|
||||
<< "\n\t Invalid inputs were given to this function. "
|
||||
<< "\n\t bits: " << bits
|
||||
<< "\n\t num_samples: " << num_samples
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
|
||||
feature_extractor fe;
|
||||
setup_hashed_features(scanner, images, fe, bits, num_samples);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// stuff for scan_image_boxes
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array,
|
||||
typename feature_extractor,
|
||||
template <typename fe, typename hash> class feature_image,
|
||||
typename box_generator
|
||||
>
|
||||
void setup_hashed_features (
|
||||
scan_image_boxes<feature_image<feature_extractor, projection_hash>,box_generator >& scanner,
|
||||
const image_array& images,
|
||||
const feature_extractor& fe,
|
||||
int bits,
|
||||
unsigned long num_samples = 200000
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(0 < bits && bits <= 32 &&
|
||||
num_samples > 1 &&
|
||||
images.size() > 0,
|
||||
"\t void setup_hashed_features()"
|
||||
<< "\n\t Invalid inputs were given to this function. "
|
||||
<< "\n\t bits: " << bits
|
||||
<< "\n\t num_samples: " << num_samples
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
|
||||
pyramid_disable pyr;
|
||||
|
||||
const random_subset_selector<typename feature_extractor::descriptor_type>& samps =
|
||||
randomly_sample_image_features(images, pyr, fe, num_samples);
|
||||
|
||||
if (samps.size() <= 1)
|
||||
throw dlib::image_hash_construction_failure("Images too small, not able to gather enough samples to make hash");
|
||||
|
||||
projection_hash phash = create_random_projection_hash(samps, bits);
|
||||
|
||||
feature_image<feature_extractor, projection_hash> hfe;
|
||||
hfe.copy_configuration(scanner.get_feature_extractor());
|
||||
hfe.set_hash(phash);
|
||||
hfe.copy_configuration(fe);
|
||||
scanner.copy_configuration(hfe);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array,
|
||||
typename feature_extractor,
|
||||
template <typename fe, typename hash> class feature_image,
|
||||
typename box_generator
|
||||
>
|
||||
void setup_hashed_features (
|
||||
scan_image_boxes<feature_image<feature_extractor, projection_hash>,box_generator>& scanner,
|
||||
const image_array& images,
|
||||
int bits,
|
||||
unsigned long num_samples = 200000
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(0 < bits && bits <= 32 &&
|
||||
num_samples > 1 &&
|
||||
images.size() > 0,
|
||||
"\t void setup_hashed_features()"
|
||||
<< "\n\t Invalid inputs were given to this function. "
|
||||
<< "\n\t bits: " << bits
|
||||
<< "\n\t num_samples: " << num_samples
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
|
||||
feature_extractor fe;
|
||||
setup_hashed_features(scanner, images, fe, bits, num_samples);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SETUP_HAShED_FEATURES_Hh_
|
||||
|
||||
|
||||
210
lib/3rdParty/dlib/include/dlib/image_processing/setup_hashed_features_abstract.h
vendored
Normal file
210
lib/3rdParty/dlib/include/dlib/image_processing/setup_hashed_features_abstract.h
vendored
Normal file
@@ -0,0 +1,210 @@
|
||||
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_SETUP_HAShED_FEATURES_ABSTRACT_Hh_
|
||||
#ifdef DLIB_SETUP_HAShED_FEATURES_ABSTRACT_Hh_
|
||||
|
||||
#include "scan_image_pyramid_abstract.h"
|
||||
#include "scan_image_boxes_abstract.h"
|
||||
#include "../lsh/projection_hash_abstract.h"
|
||||
#include "../image_keypoint/hashed_feature_image_abstract.h"
|
||||
#include "../image_keypoint/binned_vector_feature_image_abstract.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class image_hash_construction_failure : public error
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This is the exception object used by the routines in this file.
|
||||
!*/
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner
|
||||
>
|
||||
void use_uniform_feature_weights (
|
||||
image_scanner& scanner
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_scanner should be either scan_image_pyramid or scan_image_boxes and
|
||||
should use the hashed_feature_image as its local feature extractor.
|
||||
ensures
|
||||
- #scanner.get_feature_extractor().uses_uniform_feature_weights() == true
|
||||
(i.e. Make the scanner's feature extractor use the uniform feature weighting
|
||||
scheme)
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_scanner
|
||||
>
|
||||
void use_relative_feature_weights (
|
||||
image_scanner& scanner
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_scanner should be either scan_image_pyramid or scan_image_boxes and
|
||||
should use the hashed_feature_image as its local feature extractor.
|
||||
ensures
|
||||
- #scanner.get_feature_extractor().uses_uniform_feature_weights() == false
|
||||
(i.e. Make the scanner's feature extractor use the relative feature weighting
|
||||
scheme)
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array,
|
||||
typename pyramid,
|
||||
typename feature_extractor
|
||||
template <typename fe, typename hash> class feature_image
|
||||
>
|
||||
void setup_hashed_features (
|
||||
scan_image_pyramid<pyramid, feature_image<feature_extractor, projection_hash> >& scanner,
|
||||
const image_array& images,
|
||||
const feature_extractor& fe,
|
||||
int bits,
|
||||
unsigned long num_samples = 200000
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- 0 < bits <= 32
|
||||
- num_samples > 1
|
||||
- images.size() > 0
|
||||
- it must be valid to pass images[0] into scanner.load().
|
||||
(also, image_array must be an implementation of dlib/array/array_kernel_abstract.h)
|
||||
- feature_image == must be either hashed_feature_image, binned_vector_feature_image,
|
||||
or a type with a compatible interface.
|
||||
ensures
|
||||
- Creates a projection_hash suitable for hashing the feature vectors produced by
|
||||
fe and then configures scanner to use this hash function.
|
||||
- The hash function will map vectors into integers in the range [0, pow(2,bits))
|
||||
- The hash function will be setup so that it hashes a random sample of num_samples
|
||||
vectors from fe such that each bin ends up with roughly the same number of
|
||||
elements in it.
|
||||
throws
|
||||
- image_hash_construction_failure
|
||||
This exception is thrown if there is a problem creating the projection_hash.
|
||||
This should only happen the images are so small they contain less than 2
|
||||
feature vectors.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array,
|
||||
typename pyramid,
|
||||
typename feature_extractor
|
||||
template <typename fe, typename hash> class feature_image
|
||||
>
|
||||
void setup_hashed_features (
|
||||
scan_image_pyramid<pyramid, feature_image<feature_extractor, projection_hash> >& scanner,
|
||||
const image_array& images,
|
||||
int bits,
|
||||
unsigned long num_samples = 200000
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- 0 < bits <= 32
|
||||
- num_samples > 1
|
||||
- images.size() > 0
|
||||
- it must be valid to pass images[0] into scanner.load().
|
||||
(also, image_array must be an implementation of dlib/array/array_kernel_abstract.h)
|
||||
- feature_image == must be either hashed_feature_image, binned_vector_feature_image,
|
||||
or a type with a compatible interface.
|
||||
ensures
|
||||
- performs: setup_hashed_features(scanner, images, feature_extractor(), bits, num_samples)
|
||||
throws
|
||||
- image_hash_construction_failure
|
||||
This exception is thrown if there is a problem creating the projection_hash.
|
||||
This should only happen the images are so small they contain less than 2
|
||||
feature vectors.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array,
|
||||
typename feature_extractor,
|
||||
template <typename fe, typename hash> class feature_image
|
||||
typename box_generator
|
||||
>
|
||||
void setup_hashed_features (
|
||||
scan_image_boxes<feature_image<feature_extractor, projection_hash>,box_generator>& scanner,
|
||||
const image_array& images,
|
||||
const feature_extractor& fe,
|
||||
int bits,
|
||||
unsigned long num_samples = 200000
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- 0 < bits <= 32
|
||||
- num_samples > 1
|
||||
- images.size() > 0
|
||||
- it must be valid to pass images[0] into scanner.load().
|
||||
(also, image_array must be an implementation of dlib/array/array_kernel_abstract.h)
|
||||
- feature_image == must be either hashed_feature_image, binned_vector_feature_image,
|
||||
or a type with a compatible interface.
|
||||
ensures
|
||||
- Creates a projection_hash suitable for hashing the feature vectors produced by
|
||||
fe and then configures scanner to use this hash function.
|
||||
- The hash function will map vectors into integers in the range [0, pow(2,bits))
|
||||
- The hash function will be setup so that it hashes a random sample of num_samples
|
||||
vectors from fe such that each bin ends up with roughly the same number of
|
||||
elements in it.
|
||||
throws
|
||||
- image_hash_construction_failure
|
||||
This exception is thrown if there is a problem creating the projection_hash.
|
||||
This should only happen the images are so small they contain less than 2
|
||||
feature vectors.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array,
|
||||
typename feature_extractor,
|
||||
template <typename fe, typename hash> class feature_image
|
||||
typename box_generator
|
||||
>
|
||||
void setup_hashed_features (
|
||||
scan_image_boxes<feature_image<feature_extractor, projection_hash>,box_generator>& scanner,
|
||||
const image_array& images,
|
||||
int bits,
|
||||
unsigned long num_samples = 200000
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- 0 < bits <= 32
|
||||
- num_samples > 1
|
||||
- images.size() > 0
|
||||
- it must be valid to pass images[0] into scanner.load().
|
||||
(also, image_array must be an implementation of dlib/array/array_kernel_abstract.h)
|
||||
- feature_image == must be either hashed_feature_image, binned_vector_feature_image,
|
||||
or a type with a compatible interface.
|
||||
ensures
|
||||
- performs: setup_hashed_features(scanner, images, feature_extractor(), bits, num_samples)
|
||||
throws
|
||||
- image_hash_construction_failure
|
||||
This exception is thrown if there is a problem creating the projection_hash.
|
||||
This should only happen the images are so small they contain less than 2
|
||||
feature vectors.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SETUP_HAShED_FEATURES_ABSTRACT_Hh_
|
||||
|
||||
|
||||
1067
lib/3rdParty/dlib/include/dlib/image_processing/shape_predictor.h
vendored
Normal file
1067
lib/3rdParty/dlib/include/dlib/image_processing/shape_predictor.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
442
lib/3rdParty/dlib/include/dlib/image_processing/shape_predictor_abstract.h
vendored
Normal file
442
lib/3rdParty/dlib/include/dlib/image_processing/shape_predictor_abstract.h
vendored
Normal file
@@ -0,0 +1,442 @@
|
||||
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_SHAPE_PREDICToR_ABSTRACT_H_
|
||||
#ifdef DLIB_SHAPE_PREDICToR_ABSTRACT_H_
|
||||
|
||||
#include "full_object_detection_abstract.h"
|
||||
#include "../matrix.h"
|
||||
#include "../geometry.h"
|
||||
#include "../pixel.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class shape_predictor
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool that takes in an image region containing some object
|
||||
and outputs a set of point locations that define the pose of the object.
|
||||
The classic example of this is human face pose prediction, where you take
|
||||
an image of a human face as input and are expected to identify the
|
||||
locations of important facial landmarks such as the corners of the mouth
|
||||
and eyes, tip of the nose, and so forth.
|
||||
|
||||
To create useful instantiations of this object you need to use the
|
||||
shape_predictor_trainer object defined below to train a shape_predictor
|
||||
using a set of training images, each annotated with shapes you want to
|
||||
predict.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
shape_predictor (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #num_parts() == 0
|
||||
!*/
|
||||
|
||||
unsigned long num_parts (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of parts in the shapes predicted by this object.
|
||||
!*/
|
||||
|
||||
template <typename image_type>
|
||||
full_object_detection operator()(
|
||||
const image_type& img,
|
||||
const rectangle& rect
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- image_type == an image object that implements the interface defined in
|
||||
dlib/image_processing/generic_image.h
|
||||
ensures
|
||||
- Runs the shape prediction algorithm on the part of the image contained in
|
||||
the given bounding rectangle. So it will try and fit the shape model to
|
||||
the contents of the given rectangle in the image. For example, if there
|
||||
is a human face inside the rectangle and you use a face landmarking shape
|
||||
model then this function will return the locations of the face landmarks
|
||||
as the parts. So the return value is a full_object_detection DET such
|
||||
that:
|
||||
- DET.get_rect() == rect
|
||||
- DET.num_parts() == num_parts()
|
||||
- for all valid i:
|
||||
- DET.part(i) == the location in img for the i-th part of the shape
|
||||
predicted by this object.
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
void serialize (const shape_predictor& item, std::ostream& out);
|
||||
void deserialize (shape_predictor& item, std::istream& in);
|
||||
/*!
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class shape_predictor_trainer
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool for training shape_predictors based on annotated training
|
||||
images. Its implementation uses the algorithm described in:
|
||||
One Millisecond Face Alignment with an Ensemble of Regression Trees
|
||||
by Vahid Kazemi and Josephine Sullivan, CVPR 2014
|
||||
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
shape_predictor_trainer (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_cascade_depth() == 10
|
||||
- #get_tree_depth() == 4
|
||||
- #get_num_trees_per_cascade_level() == 500
|
||||
- #get_nu() == 0.1
|
||||
- #get_oversampling_amount() == 20
|
||||
- #get_feature_pool_size() == 400
|
||||
- #get_lambda() == 0.1
|
||||
- #get_num_test_splits() == 20
|
||||
- #get_feature_pool_region_padding() == 0
|
||||
- #get_random_seed() == ""
|
||||
- This object will not be verbose
|
||||
!*/
|
||||
|
||||
unsigned long get_cascade_depth (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of cascades created when you train a model. This
|
||||
means that the total number of trees in the learned model is equal to
|
||||
get_cascade_depth()*get_num_trees_per_cascade_level().
|
||||
!*/
|
||||
|
||||
void set_cascade_depth (
|
||||
unsigned long depth
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- depth > 0
|
||||
ensures
|
||||
- #get_cascade_depth() == depth
|
||||
!*/
|
||||
|
||||
unsigned long get_tree_depth (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the depth of the trees used in the cascade. In particular, there
|
||||
are pow(2,get_tree_depth()) leaves in each tree.
|
||||
!*/
|
||||
|
||||
void set_tree_depth (
|
||||
unsigned long depth
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- depth > 0
|
||||
ensures
|
||||
- #get_tree_depth() == depth
|
||||
!*/
|
||||
|
||||
unsigned long get_num_trees_per_cascade_level (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of trees created for each cascade. This means that
|
||||
the total number of trees in the learned model is equal to
|
||||
get_cascade_depth()*get_num_trees_per_cascade_level().
|
||||
!*/
|
||||
|
||||
void set_num_trees_per_cascade_level (
|
||||
unsigned long num
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- num > 0
|
||||
ensures
|
||||
- #get_num_trees_per_cascade_level() == num
|
||||
!*/
|
||||
|
||||
double get_nu (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the regularization parameter. Larger values of this parameter
|
||||
will cause the algorithm to fit the training data better but may also
|
||||
cause overfitting.
|
||||
!*/
|
||||
|
||||
void set_nu (
|
||||
double nu
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- nu > 0
|
||||
ensures
|
||||
- #get_nu() == nu
|
||||
!*/
|
||||
|
||||
std::string get_random_seed (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the random seed used by the internal random number generator.
|
||||
Since this algorithm is a random forest style algorithm it relies on a
|
||||
random number generator for generating the trees. So each setting of the
|
||||
random seed will produce slightly different outputs.
|
||||
!*/
|
||||
|
||||
void set_random_seed (
|
||||
const std::string& seed
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_random_seed() == seed
|
||||
!*/
|
||||
|
||||
unsigned long get_oversampling_amount (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- You give annotated images to this object as training examples. You
|
||||
can effectively increase the amount of training data by adding in each
|
||||
training example multiple times but with a randomly selected deformation
|
||||
applied to it. That is what this parameter controls. That is, if you
|
||||
supply N training samples to train() then the algorithm runs internally
|
||||
with N*get_oversampling_amount() training samples. So the bigger this
|
||||
parameter the better (excepting that larger values make training take
|
||||
longer). In terms of the Kazemi paper, this parameter is the number of
|
||||
randomly selected initial starting points sampled for each training
|
||||
example.
|
||||
!*/
|
||||
|
||||
void set_oversampling_amount (
|
||||
unsigned long amount
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- amount > 0
|
||||
ensures
|
||||
- #get_oversampling_amount() == amount
|
||||
!*/
|
||||
|
||||
unsigned long get_feature_pool_size (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- At each level of the cascade we randomly sample get_feature_pool_size()
|
||||
pixels from the image. These pixels are used to generate features for
|
||||
the random trees. So in general larger settings of this parameter give
|
||||
better accuracy but make the algorithm run slower.
|
||||
!*/
|
||||
|
||||
void set_feature_pool_size (
|
||||
unsigned long size
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- size > 1
|
||||
ensures
|
||||
- #get_feature_pool_size() == size
|
||||
!*/
|
||||
|
||||
double get_feature_pool_region_padding (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- When we randomly sample the pixels for the feature pool we do so in a box
|
||||
fit around the provided training landmarks. By default, this box is the
|
||||
tightest box that contains the landmarks (i.e. this is what happens when
|
||||
get_feature_pool_region_padding()==0). However, you can expand or shrink
|
||||
the size of the pixel sampling region by setting a different value of
|
||||
get_feature_pool_region_padding().
|
||||
|
||||
To explain this precisely, for a padding of 0 we say that the pixels are
|
||||
sampled from a box of size 1x1. The padding value is added to each side
|
||||
of the box. So a padding of 0.5 would cause the algorithm to sample
|
||||
pixels from a box that was 2x2, effectively multiplying the area pixels
|
||||
are sampled from by 4. Similarly, setting the padding to -0.2 would
|
||||
cause it to sample from a box 0.8x0.8 in size.
|
||||
!*/
|
||||
|
||||
void set_feature_pool_region_padding (
|
||||
double padding
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_feature_pool_region_padding() == padding
|
||||
!*/
|
||||
|
||||
|
||||
double get_lambda (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- To decide how to split nodes in the regression trees the algorithm looks
|
||||
at pairs of pixels in the image. These pixel pairs are sampled randomly
|
||||
but with a preference for selecting pixels that are near each other.
|
||||
get_lambda() controls this "nearness" preference. In particular, smaller
|
||||
values of get_lambda() will make the algorithm prefer to select pixels
|
||||
close together and larger values of get_lambda() will make it care less
|
||||
about picking nearby pixel pairs.
|
||||
|
||||
Note that this is the inverse of how it is defined in the Kazemi paper.
|
||||
For this object, you should think of lambda as "the fraction of the
|
||||
bounding box will we traverse to find a neighboring pixel". Nominally,
|
||||
this is normalized between 0 and 1. So reasonable settings of lambda are
|
||||
values in the range 0 < lambda < 1.
|
||||
!*/
|
||||
|
||||
void set_lambda (
|
||||
double lambda
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- lambda > 0
|
||||
ensures
|
||||
- #get_lambda() == lambda
|
||||
!*/
|
||||
|
||||
unsigned long get_num_test_splits (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- When generating the random trees we randomly sample get_num_test_splits()
|
||||
possible split features at each node and pick the one that gives the best
|
||||
split. Larger values of this parameter will usually give more accurate
|
||||
outputs but take longer to train.
|
||||
!*/
|
||||
|
||||
void set_num_test_splits (
|
||||
unsigned long num
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- num > 0
|
||||
ensures
|
||||
- #get_num_test_splits() == num
|
||||
!*/
|
||||
|
||||
void be_verbose (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- This object will print status messages to standard out so that a
|
||||
user can observe the progress of the algorithm.
|
||||
!*/
|
||||
|
||||
void be_quiet (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- This object will not print anything to standard out
|
||||
!*/
|
||||
|
||||
template <typename image_array>
|
||||
shape_predictor train (
|
||||
const image_array& images,
|
||||
const std::vector<std::vector<full_object_detection> >& objects
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- image_array is a dlib::array of image objects where each image object
|
||||
implements the interface defined in dlib/image_processing/generic_image.h
|
||||
- images.size() == objects.size()
|
||||
- images.size() > 0
|
||||
- for some i: objects[i].size() != 0
|
||||
(i.e. there has to be at least one full_object_detection in the training set)
|
||||
- for all valid i,j,k,l:
|
||||
- objects[i][j].num_parts() == objects[k][l].num_parts()
|
||||
(i.e. all objects must agree on the number of parts)
|
||||
- objects[i][j].num_parts() > 0
|
||||
ensures
|
||||
- This object will try to learn to predict the locations of an object's parts
|
||||
based on the object bounding box (i.e. full_object_detection::get_rect())
|
||||
and the image pixels in that box. That is, we will try to learn a
|
||||
shape_predictor, SP, such that:
|
||||
SP(images[i], objects[i][j].get_rect()) == objects[i][j]
|
||||
This learned SP object is then returned.
|
||||
!*/
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array
|
||||
>
|
||||
double test_shape_predictor (
|
||||
const shape_predictor& sp,
|
||||
const image_array& images,
|
||||
const std::vector<std::vector<full_object_detection> >& objects,
|
||||
const std::vector<std::vector<double> >& scales
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_array is a dlib::array of image objects where each image object
|
||||
implements the interface defined in dlib/image_processing/generic_image.h
|
||||
- images.size() == objects.size()
|
||||
- for all valid i and j:
|
||||
- objects[i][j].num_parts() == sp.num_parts()
|
||||
- if (scales.size() != 0) then
|
||||
- There must be a scale value for each full_object_detection in objects.
|
||||
That is, it must be the case that:
|
||||
- scales.size() == objects.size()
|
||||
- for all valid i:
|
||||
- scales[i].size() == objects[i].size()
|
||||
ensures
|
||||
- Tests the given shape_predictor by running it on each of the given objects and
|
||||
checking how well it recovers the part positions. In particular, for all
|
||||
valid i and j we perform:
|
||||
sp(images[i], objects[i][j].get_rect())
|
||||
and compare the result with the truth part positions in objects[i][j]. We
|
||||
then return the average distance (measured in pixels) between a predicted
|
||||
part location and its true position.
|
||||
- if (scales.size() != 0) then
|
||||
- Each time we compute the distance between a predicted part location and
|
||||
its true location in objects[i][j] we divide the distance by
|
||||
scales[i][j]. Therefore, if you want the reported error to be the
|
||||
average pixel distance then give an empty scales vector, but if you want
|
||||
the returned value to be something else like the average distance
|
||||
normalized by some feature of each object (e.g. the interocular distance)
|
||||
then you can supply those normalizing values via scales.
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename image_array
|
||||
>
|
||||
double test_shape_predictor (
|
||||
const shape_predictor& sp,
|
||||
const image_array& images,
|
||||
const std::vector<std::vector<full_object_detection> >& objects
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_array is a dlib::array of image objects where each image object
|
||||
implements the interface defined in dlib/image_processing/generic_image.h
|
||||
- images.size() == objects.size()
|
||||
- for all valid i and j:
|
||||
- objects[i][j].num_parts() == sp.num_parts()
|
||||
ensures
|
||||
- returns test_shape_predictor(sp, images, objects, no_scales) where no_scales
|
||||
is an empty vector. So this is just a convenience function for calling the
|
||||
above test_shape_predictor() routine without a scales argument.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SHAPE_PREDICToR_ABSTRACT_H_
|
||||
|
||||
Reference in New Issue
Block a user