diff --git a/SSH/README.md b/SSH/README.md new file mode 100644 index 0000000..98f7973 --- /dev/null +++ b/SSH/README.md @@ -0,0 +1 @@ +SSH, single stage face detector diff --git a/SSH/__init__.py b/SSH/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/SSH/rcnn/__init__.py b/SSH/rcnn/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/SSH/rcnn/cython/.gitignore b/SSH/rcnn/cython/.gitignore new file mode 100644 index 0000000..15a165d --- /dev/null +++ b/SSH/rcnn/cython/.gitignore @@ -0,0 +1,3 @@ +*.c +*.cpp +*.so diff --git a/SSH/rcnn/cython/__init__.py b/SSH/rcnn/cython/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/SSH/rcnn/cython/anchors.pyx b/SSH/rcnn/cython/anchors.pyx new file mode 100755 index 0000000..7005199 --- /dev/null +++ b/SSH/rcnn/cython/anchors.pyx @@ -0,0 +1,35 @@ +cimport cython +import numpy as np +cimport numpy as np + +DTYPE = np.float32 +ctypedef np.float32_t DTYPE_t + +def anchors_cython(int height, int width, int stride, np.ndarray[DTYPE_t, ndim=2] base_anchors): + """ + Parameters + ---------- + height: height of plane + width: width of plane + stride: stride ot the original image + anchors_base: (A, 4) a base set of anchors + Returns + ------- + all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane + """ + cdef unsigned int A = base_anchors.shape[0] + cdef np.ndarray[DTYPE_t, ndim=4] all_anchors = np.zeros((height, width, A, 4), dtype=DTYPE) + cdef unsigned int iw, ih + cdef unsigned int k + cdef unsigned int sh + cdef unsigned int sw + for iw in range(width): + sw = iw * stride + for ih in range(height): + sh = ih * stride + for k in range(A): + all_anchors[ih, iw, k, 0] = base_anchors[k, 0] + sw + all_anchors[ih, iw, k, 1] = base_anchors[k, 1] + sh + all_anchors[ih, iw, k, 2] = base_anchors[k, 2] + sw + all_anchors[ih, iw, k, 3] = base_anchors[k, 3] + sh + return all_anchors \ No newline at end of file diff --git a/SSH/rcnn/cython/bbox.pyx b/SSH/rcnn/cython/bbox.pyx new file mode 100644 index 0000000..0c49e12 --- /dev/null +++ b/SSH/rcnn/cython/bbox.pyx @@ -0,0 +1,55 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Sergey Karayev +# -------------------------------------------------------- + +cimport cython +import numpy as np +cimport numpy as np + +DTYPE = np.float +ctypedef np.float_t DTYPE_t + +def bbox_overlaps_cython( + np.ndarray[DTYPE_t, ndim=2] boxes, + np.ndarray[DTYPE_t, ndim=2] query_boxes): + """ + Parameters + ---------- + boxes: (N, 4) ndarray of float + query_boxes: (K, 4) ndarray of float + Returns + ------- + overlaps: (N, K) ndarray of overlap between boxes and query_boxes + """ + cdef unsigned int N = boxes.shape[0] + cdef unsigned int K = query_boxes.shape[0] + cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) + cdef DTYPE_t iw, ih, box_area + cdef DTYPE_t ua + cdef unsigned int k, n + for k in range(K): + box_area = ( + (query_boxes[k, 2] - query_boxes[k, 0] + 1) * + (query_boxes[k, 3] - query_boxes[k, 1] + 1) + ) + for n in range(N): + iw = ( + min(boxes[n, 2], query_boxes[k, 2]) - + max(boxes[n, 0], query_boxes[k, 0]) + 1 + ) + if iw > 0: + ih = ( + min(boxes[n, 3], query_boxes[k, 3]) - + max(boxes[n, 1], query_boxes[k, 1]) + 1 + ) + if ih > 0: + ua = float( + (boxes[n, 2] - boxes[n, 0] + 1) * + (boxes[n, 3] - boxes[n, 1] + 1) + + box_area - iw * ih + ) + overlaps[n, k] = iw * ih / ua + return overlaps diff --git a/SSH/rcnn/cython/cpu_nms.pyx b/SSH/rcnn/cython/cpu_nms.pyx new file mode 100644 index 0000000..1d0bef3 --- /dev/null +++ b/SSH/rcnn/cython/cpu_nms.pyx @@ -0,0 +1,68 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import numpy as np +cimport numpy as np + +cdef inline np.float32_t max(np.float32_t a, np.float32_t b): + return a if a >= b else b + +cdef inline np.float32_t min(np.float32_t a, np.float32_t b): + return a if a <= b else b + +def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): + cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] + cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] + cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] + cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] + cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] + + cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) + cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] + + cdef int ndets = dets.shape[0] + cdef np.ndarray[np.int_t, ndim=1] suppressed = \ + np.zeros((ndets), dtype=np.int) + + # nominal indices + cdef int _i, _j + # sorted indices + cdef int i, j + # temp variables for box i's (the box currently under consideration) + cdef np.float32_t ix1, iy1, ix2, iy2, iarea + # variables for computing overlap with box j (lower scoring box) + cdef np.float32_t xx1, yy1, xx2, yy2 + cdef np.float32_t w, h + cdef np.float32_t inter, ovr + + keep = [] + for _i in range(ndets): + i = order[_i] + if suppressed[i] == 1: + continue + keep.append(i) + ix1 = x1[i] + iy1 = y1[i] + ix2 = x2[i] + iy2 = y2[i] + iarea = areas[i] + for _j in range(_i + 1, ndets): + j = order[_j] + if suppressed[j] == 1: + continue + xx1 = max(ix1, x1[j]) + yy1 = max(iy1, y1[j]) + xx2 = min(ix2, x2[j]) + yy2 = min(iy2, y2[j]) + w = max(0.0, xx2 - xx1 + 1) + h = max(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (iarea + areas[j] - inter) + if ovr >= thresh: + suppressed[j] = 1 + + return keep diff --git a/SSH/rcnn/cython/gpu_nms.hpp b/SSH/rcnn/cython/gpu_nms.hpp new file mode 100644 index 0000000..68b6d42 --- /dev/null +++ b/SSH/rcnn/cython/gpu_nms.hpp @@ -0,0 +1,2 @@ +void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, + int boxes_dim, float nms_overlap_thresh, int device_id); diff --git a/SSH/rcnn/cython/gpu_nms.pyx b/SSH/rcnn/cython/gpu_nms.pyx new file mode 100644 index 0000000..59d84af --- /dev/null +++ b/SSH/rcnn/cython/gpu_nms.pyx @@ -0,0 +1,31 @@ +# -------------------------------------------------------- +# Faster R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import numpy as np +cimport numpy as np + +assert sizeof(int) == sizeof(np.int32_t) + +cdef extern from "gpu_nms.hpp": + void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) + +def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, + np.int32_t device_id=0): + cdef int boxes_num = dets.shape[0] + cdef int boxes_dim = dets.shape[1] + cdef int num_out + cdef np.ndarray[np.int32_t, ndim=1] \ + keep = np.zeros(boxes_num, dtype=np.int32) + cdef np.ndarray[np.float32_t, ndim=1] \ + scores = dets[:, 4] + cdef np.ndarray[np.int_t, ndim=1] \ + order = scores.argsort()[::-1] + cdef np.ndarray[np.float32_t, ndim=2] \ + sorted_dets = dets[order, :] + _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) + keep = keep[:num_out] + return list(order[keep]) diff --git a/SSH/rcnn/cython/nms_kernel.cu b/SSH/rcnn/cython/nms_kernel.cu new file mode 100644 index 0000000..038a590 --- /dev/null +++ b/SSH/rcnn/cython/nms_kernel.cu @@ -0,0 +1,144 @@ +// ------------------------------------------------------------------ +// Faster R-CNN +// Copyright (c) 2015 Microsoft +// Licensed under The MIT License [see fast-rcnn/LICENSE for details] +// Written by Shaoqing Ren +// ------------------------------------------------------------------ + +#include "gpu_nms.hpp" +#include +#include + +#define CUDA_CHECK(condition) \ + /* Code block avoids redefinition of cudaError_t error */ \ + do { \ + cudaError_t error = condition; \ + if (error != cudaSuccess) { \ + std::cout << cudaGetErrorString(error) << std::endl; \ + } \ + } while (0) + +#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) +int const threadsPerBlock = sizeof(unsigned long long) * 8; + +__device__ inline float devIoU(float const * const a, float const * const b) { + float left = max(a[0], b[0]), right = min(a[2], b[2]); + float top = max(a[1], b[1]), bottom = min(a[3], b[3]); + float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); + float interS = width * height; + float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); + float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); + return interS / (Sa + Sb - interS); +} + +__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, + const float *dev_boxes, unsigned long long *dev_mask) { + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + + const int row_size = + min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); + const int col_size = + min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); + + __shared__ float block_boxes[threadsPerBlock * 5]; + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x * 5 + 0] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; + block_boxes[threadIdx.x * 5 + 1] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; + block_boxes[threadIdx.x * 5 + 2] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; + block_boxes[threadIdx.x * 5 + 3] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; + block_boxes[threadIdx.x * 5 + 4] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; + } + __syncthreads(); + + if (threadIdx.x < row_size) { + const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; + const float *cur_box = dev_boxes + cur_box_idx * 5; + int i = 0; + unsigned long long t = 0; + int start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (i = start; i < col_size; i++) { + if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { + t |= 1ULL << i; + } + } + const int col_blocks = DIVUP(n_boxes, threadsPerBlock); + dev_mask[cur_box_idx * col_blocks + col_start] = t; + } +} + +void _set_device(int device_id) { + int current_device; + CUDA_CHECK(cudaGetDevice(¤t_device)); + if (current_device == device_id) { + return; + } + // The call to cudaSetDevice must come before any calls to Get, which + // may perform initialization using the GPU. + CUDA_CHECK(cudaSetDevice(device_id)); +} + +void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, + int boxes_dim, float nms_overlap_thresh, int device_id) { + _set_device(device_id); + + float* boxes_dev = NULL; + unsigned long long* mask_dev = NULL; + + const int col_blocks = DIVUP(boxes_num, threadsPerBlock); + + CUDA_CHECK(cudaMalloc(&boxes_dev, + boxes_num * boxes_dim * sizeof(float))); + CUDA_CHECK(cudaMemcpy(boxes_dev, + boxes_host, + boxes_num * boxes_dim * sizeof(float), + cudaMemcpyHostToDevice)); + + CUDA_CHECK(cudaMalloc(&mask_dev, + boxes_num * col_blocks * sizeof(unsigned long long))); + + dim3 blocks(DIVUP(boxes_num, threadsPerBlock), + DIVUP(boxes_num, threadsPerBlock)); + dim3 threads(threadsPerBlock); + nms_kernel<<>>(boxes_num, + nms_overlap_thresh, + boxes_dev, + mask_dev); + + std::vector mask_host(boxes_num * col_blocks); + CUDA_CHECK(cudaMemcpy(&mask_host[0], + mask_dev, + sizeof(unsigned long long) * boxes_num * col_blocks, + cudaMemcpyDeviceToHost)); + + std::vector remv(col_blocks); + memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); + + int num_to_keep = 0; + for (int i = 0; i < boxes_num; i++) { + int nblock = i / threadsPerBlock; + int inblock = i % threadsPerBlock; + + if (!(remv[nblock] & (1ULL << inblock))) { + keep_out[num_to_keep++] = i; + unsigned long long *p = &mask_host[0] + i * col_blocks; + for (int j = nblock; j < col_blocks; j++) { + remv[j] |= p[j]; + } + } + } + *num_out = num_to_keep; + + CUDA_CHECK(cudaFree(boxes_dev)); + CUDA_CHECK(cudaFree(mask_dev)); +} diff --git a/SSH/rcnn/cython/setup.py b/SSH/rcnn/cython/setup.py new file mode 100644 index 0000000..3e27add --- /dev/null +++ b/SSH/rcnn/cython/setup.py @@ -0,0 +1,169 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import os +from os.path import join as pjoin +from setuptools import setup +from distutils.extension import Extension +from Cython.Distutils import build_ext +import numpy as np + + +def find_in_path(name, path): + "Find a file in a search path" + # Adapted fom + # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ + for dir in path.split(os.pathsep): + binpath = pjoin(dir, name) + if os.path.exists(binpath): + return os.path.abspath(binpath) + return None + + +def locate_cuda(): + """Locate the CUDA environment on the system + + Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' + and values giving the absolute path to each directory. + + Starts by looking for the CUDAHOME env variable. If not found, everything + is based on finding 'nvcc' in the PATH. + """ + + # first check if the CUDAHOME env variable is in use + if 'CUDAHOME' in os.environ: + home = os.environ['CUDAHOME'] + nvcc = pjoin(home, 'bin', 'nvcc') + else: + # otherwise, search the PATH for NVCC + default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') + nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) + if nvcc is None: + raise EnvironmentError('The nvcc binary could not be ' + 'located in your $PATH. Either add it to your path, or set $CUDAHOME') + home = os.path.dirname(os.path.dirname(nvcc)) + + cudaconfig = {'home':home, 'nvcc':nvcc, + 'include': pjoin(home, 'include'), + 'lib64': pjoin(home, 'lib64')} + for k, v in cudaconfig.items(): + if not os.path.exists(v): + raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) + + return cudaconfig + + +# Test if cuda could be foun +try: + CUDA = locate_cuda() +except EnvironmentError: + CUDA = None + + +# Obtain the numpy include directory. This logic works across numpy versions. +try: + numpy_include = np.get_include() +except AttributeError: + numpy_include = np.get_numpy_include() + + +def customize_compiler_for_nvcc(self): + """inject deep into distutils to customize how the dispatch + to gcc/nvcc works. + + If you subclass UnixCCompiler, it's not trivial to get your subclass + injected in, and still have the right customizations (i.e. + distutils.sysconfig.customize_compiler) run on it. So instead of going + the OO route, I have this. Note, it's kindof like a wierd functional + subclassing going on.""" + + # tell the compiler it can processes .cu + self.src_extensions.append('.cu') + + # save references to the default compiler_so and _comple methods + default_compiler_so = self.compiler_so + super = self._compile + + # now redefine the _compile method. This gets executed for each + # object but distutils doesn't have the ability to change compilers + # based on source extension: we add it. + def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): + if os.path.splitext(src)[1] == '.cu': + # use the cuda for .cu files + self.set_executable('compiler_so', CUDA['nvcc']) + # use only a subset of the extra_postargs, which are 1-1 translated + # from the extra_compile_args in the Extension class + postargs = extra_postargs['nvcc'] + else: + postargs = extra_postargs['gcc'] + + super(obj, src, ext, cc_args, postargs, pp_opts) + # reset the default compiler_so, which we might have changed for cuda + self.compiler_so = default_compiler_so + + # inject our redefined _compile method into the class + self._compile = _compile + + +# run the customize_compiler +class custom_build_ext(build_ext): + def build_extensions(self): + customize_compiler_for_nvcc(self.compiler) + build_ext.build_extensions(self) + + +ext_modules = [ + Extension( + "bbox", + ["bbox.pyx"], + extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, + include_dirs=[numpy_include] + ), + Extension( + "anchors", + ["anchors.pyx"], + extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, + include_dirs=[numpy_include] + ), + Extension( + "cpu_nms", + ["cpu_nms.pyx"], + extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, + include_dirs = [numpy_include] + ), +] + +if CUDA is not None: + ext_modules.append( + Extension('gpu_nms', + ['nms_kernel.cu', 'gpu_nms.pyx'], + library_dirs=[CUDA['lib64']], + libraries=['cudart'], + language='c++', + runtime_library_dirs=[CUDA['lib64']], + # this syntax is specific to this build system + # we're only going to use certain compiler args with nvcc and not with + # gcc the implementation of this trick is in customize_compiler() below + extra_compile_args={'gcc': ["-Wno-unused-function"], + 'nvcc': ['-arch=sm_35', + '--ptxas-options=-v', + '-c', + '--compiler-options', + "'-fPIC'"]}, + include_dirs = [numpy_include, CUDA['include']] + ) + ) +else: + print('Skipping GPU_NMS') + + +setup( + name='frcnn_cython', + ext_modules=ext_modules, + # inject our custom trigger + cmdclass={'build_ext': custom_build_ext}, +) diff --git a/SSH/rcnn/processing/__init__.py b/SSH/rcnn/processing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/SSH/rcnn/processing/bbox_regression.py b/SSH/rcnn/processing/bbox_regression.py new file mode 100644 index 0000000..4e6f949 --- /dev/null +++ b/SSH/rcnn/processing/bbox_regression.py @@ -0,0 +1,120 @@ +""" +This file has functions about generating bounding box regression targets +""" + +import numpy as np + +from ..logger import logger +from .bbox_transform import bbox_overlaps, bbox_transform +from rcnn.config import config + + +def compute_bbox_regression_targets(rois, overlaps, labels): + """ + given rois, overlaps, gt labels, compute bounding box regression targets + :param rois: roidb[i]['boxes'] k * 4 + :param overlaps: roidb[i]['max_overlaps'] k * 1 + :param labels: roidb[i]['max_classes'] k * 1 + :return: targets[i][class, dx, dy, dw, dh] k * 5 + """ + # Ensure ROIs are floats + rois = rois.astype(np.float, copy=False) + + # Sanity check + if len(rois) != len(overlaps): + logger.warning('bbox regression: len(rois) != len(overlaps)') + + # Indices of ground-truth ROIs + gt_inds = np.where(overlaps == 1)[0] + if len(gt_inds) == 0: + logger.warning('bbox regression: len(gt_inds) == 0') + + # Indices of examples for which we try to make predictions + ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0] + + # Get IoU overlap between each ex ROI and gt ROI + ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :]) + + # Find which gt ROI each ex ROI has max overlap with: + # this will be the ex ROI's gt target + gt_assignment = ex_gt_overlaps.argmax(axis=1) + gt_rois = rois[gt_inds[gt_assignment], :] + ex_rois = rois[ex_inds, :] + + targets = np.zeros((rois.shape[0], 5), dtype=np.float32) + targets[ex_inds, 0] = labels[ex_inds] + targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) + return targets + + +def add_bbox_regression_targets(roidb): + """ + given roidb, add ['bbox_targets'] and normalize bounding box regression targets + :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb + :return: means, std variances of targets + """ + logger.info('bbox regression: add bounding box regression targets') + assert len(roidb) > 0 + assert 'max_classes' in roidb[0] + + num_images = len(roidb) + num_classes = roidb[0]['gt_overlaps'].shape[1] + for im_i in range(num_images): + rois = roidb[im_i]['boxes'] + max_overlaps = roidb[im_i]['max_overlaps'] + max_classes = roidb[im_i]['max_classes'] + roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes) + + if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: + # use fixed / precomputed means and stds instead of empirical values + means = np.tile(np.array(config.TRAIN.BBOX_MEANS), (num_classes, 1)) + stds = np.tile(np.array(config.TRAIN.BBOX_STDS), (num_classes, 1)) + else: + # compute mean, std values + class_counts = np.zeros((num_classes, 1)) + 1e-14 + sums = np.zeros((num_classes, 4)) + squared_sums = np.zeros((num_classes, 4)) + for im_i in range(num_images): + targets = roidb[im_i]['bbox_targets'] + for cls in range(1, num_classes): + cls_indexes = np.where(targets[:, 0] == cls)[0] + if cls_indexes.size > 0: + class_counts[cls] += cls_indexes.size + sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0) + squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0) + + means = sums / class_counts + # var(x) = E(x^2) - E(x)^2 + stds = np.sqrt(squared_sums / class_counts - means ** 2) + + # normalized targets + for im_i in range(num_images): + targets = roidb[im_i]['bbox_targets'] + for cls in range(1, num_classes): + cls_indexes = np.where(targets[:, 0] == cls)[0] + roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :] + roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :] + + return means.ravel(), stds.ravel() + + +def expand_bbox_regression_targets(bbox_targets_data, num_classes): + """ + expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets + :param bbox_targets_data: [k * 5] + :param num_classes: number of classes + :return: bbox target processed [k * 4 num_classes] + bbox_weights ! only foreground boxes have bbox regression computation! + """ + classes = bbox_targets_data[:, 0] + bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32) + bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32) + indexes = np.where(classes > 0)[0] + for index in indexes: + cls = classes[index] + start = int(4 * cls) + end = start + 4 + bbox_targets[index, start:end] = bbox_targets_data[index, 1:] + bbox_weights[index, start:end] = config.TRAIN.BBOX_WEIGHTS + return bbox_targets, bbox_weights + diff --git a/SSH/rcnn/processing/bbox_transform.py b/SSH/rcnn/processing/bbox_transform.py new file mode 100644 index 0000000..7a8667e --- /dev/null +++ b/SSH/rcnn/processing/bbox_transform.py @@ -0,0 +1,162 @@ +import numpy as np +from ..cython.bbox import bbox_overlaps_cython + + +def bbox_overlaps(boxes, query_boxes): + return bbox_overlaps_cython(boxes, query_boxes) + + +def bbox_overlaps_py(boxes, query_boxes): + """ + determine overlaps between boxes and query_boxes + :param boxes: n * 4 bounding boxes + :param query_boxes: k * 4 bounding boxes + :return: overlaps: n * k overlaps + """ + n_ = boxes.shape[0] + k_ = query_boxes.shape[0] + overlaps = np.zeros((n_, k_), dtype=np.float) + for k in range(k_): + query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1) + for n in range(n_): + iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1 + if iw > 0: + ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1 + if ih > 0: + box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) + all_area = float(box_area + query_box_area - iw * ih) + overlaps[n, k] = iw * ih / all_area + return overlaps + + +def clip_boxes(boxes, im_shape): + """ + Clip boxes to image boundaries. + :param boxes: [N, 4* num_classes] + :param im_shape: tuple of 2 + :return: [N, 4* num_classes] + """ + # x1 >= 0 + boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) + # y1 >= 0 + boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) + # x2 < im_shape[1] + boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) + # y2 < im_shape[0] + boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) + return boxes + + +def nonlinear_transform(ex_rois, gt_rois): + """ + compute bounding box regression targets from ex_rois to gt_rois + :param ex_rois: [N, 4] + :param gt_rois: [N, 4] + :return: [N, 4] + """ + assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' + + ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 + ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 + ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) + ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) + + gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 + gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 + gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0) + gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0) + + targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14) + targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14) + targets_dw = np.log(gt_widths / ex_widths) + targets_dh = np.log(gt_heights / ex_heights) + + targets = np.vstack( + (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() + return targets + + +def nonlinear_pred(boxes, box_deltas): + """ + Transform the set of class-agnostic boxes into class-specific boxes + by applying the predicted offsets (box_deltas) + :param boxes: !important [N 4] + :param box_deltas: [N, 4 * num_classes] + :return: [N 4 * num_classes] + """ + if boxes.shape[0] == 0: + return np.zeros((0, box_deltas.shape[1])) + + boxes = boxes.astype(np.float, copy=False) + widths = boxes[:, 2] - boxes[:, 0] + 1.0 + heights = boxes[:, 3] - boxes[:, 1] + 1.0 + ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) + ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) + + dx = box_deltas[:, 0::4] + dy = box_deltas[:, 1::4] + dw = box_deltas[:, 2::4] + dh = box_deltas[:, 3::4] + + pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] + pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] + pred_w = np.exp(dw) * widths[:, np.newaxis] + pred_h = np.exp(dh) * heights[:, np.newaxis] + + pred_boxes = np.zeros(box_deltas.shape) + # x1 + pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0) + # y1 + pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0) + # x2 + pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0) + # y2 + pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0) + + return pred_boxes + + +def iou_transform(ex_rois, gt_rois): + """ return bbox targets, IoU loss uses gt_rois as gt """ + assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' + return gt_rois + + +def iou_pred(boxes, box_deltas): + """ + Transform the set of class-agnostic boxes into class-specific boxes + by applying the predicted offsets (box_deltas) + :param boxes: !important [N 4] + :param box_deltas: [N, 4 * num_classes] + :return: [N 4 * num_classes] + """ + if boxes.shape[0] == 0: + return np.zeros((0, box_deltas.shape[1])) + + boxes = boxes.astype(np.float, copy=False) + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + dx1 = box_deltas[:, 0::4] + dy1 = box_deltas[:, 1::4] + dx2 = box_deltas[:, 2::4] + dy2 = box_deltas[:, 3::4] + + pred_boxes = np.zeros(box_deltas.shape) + # x1 + pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis] + # y1 + pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis] + # x2 + pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis] + # y2 + pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis] + + return pred_boxes + + +# define bbox_transform and bbox_pred +bbox_transform = nonlinear_transform +bbox_pred = nonlinear_pred diff --git a/SSH/rcnn/processing/generate_anchor.py b/SSH/rcnn/processing/generate_anchor.py new file mode 100644 index 0000000..16bbf2f --- /dev/null +++ b/SSH/rcnn/processing/generate_anchor.py @@ -0,0 +1,96 @@ +""" +Generate base anchors on index 0 +""" +from __future__ import print_function +import sys +#from builtins import range +import numpy as np +from ..cython.anchors import anchors_cython + + +def anchors_plane(feat_h, feat_w, stride, base_anchor): + return anchors_cython(feat_h, feat_w, stride, base_anchor) + +def generate_anchors(base_size=16, ratios=[0.5, 1, 2], + scales=2 ** np.arange(3, 6)): + """ + Generate anchor (reference) windows by enumerating aspect ratios X + scales wrt a reference (0, 0, 15, 15) window. + """ + + base_anchor = np.array([1, 1, base_size, base_size]) - 1 + ratio_anchors = _ratio_enum(base_anchor, ratios) + anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) + for i in range(ratio_anchors.shape[0])]) + return anchors + +def generate_anchors_fpn(base_size=[64,32,16,8,4], ratios=[0.5, 1, 2], + scales=8): + """ + Generate anchor (reference) windows by enumerating aspect ratios X + scales wrt a reference (0, 0, 15, 15) window. + """ + anchors = [] + _ratios = ratios.reshape( (len(base_size), -1) ) + _scales = scales.reshape( (len(base_size), -1) ) + for i,bs in enumerate(base_size): + __ratios = _ratios[i] + __scales = _scales[i] + #print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr) + r = generate_anchors(bs, __ratios, __scales) + #print('anchors_fpn', r.shape, file=sys.stderr) + anchors.append(r) + + return anchors + +def _whctrs(anchor): + """ + Return width, height, x center, and y center for an anchor (window). + """ + + w = anchor[2] - anchor[0] + 1 + h = anchor[3] - anchor[1] + 1 + x_ctr = anchor[0] + 0.5 * (w - 1) + y_ctr = anchor[1] + 0.5 * (h - 1) + return w, h, x_ctr, y_ctr + + +def _mkanchors(ws, hs, x_ctr, y_ctr): + """ + Given a vector of widths (ws) and heights (hs) around a center + (x_ctr, y_ctr), output a set of anchors (windows). + """ + + ws = ws[:, np.newaxis] + hs = hs[:, np.newaxis] + anchors = np.hstack((x_ctr - 0.5 * (ws - 1), + y_ctr - 0.5 * (hs - 1), + x_ctr + 0.5 * (ws - 1), + y_ctr + 0.5 * (hs - 1))) + return anchors + + +def _ratio_enum(anchor, ratios): + """ + Enumerate a set of anchors for each aspect ratio wrt an anchor. + """ + + w, h, x_ctr, y_ctr = _whctrs(anchor) + size = w * h + size_ratios = size / ratios + ws = np.round(np.sqrt(size_ratios)) + hs = np.round(ws * ratios) + anchors = _mkanchors(ws, hs, x_ctr, y_ctr) + return anchors + + +def _scale_enum(anchor, scales): + """ + Enumerate a set of anchors for each scale wrt an anchor. + """ + + w, h, x_ctr, y_ctr = _whctrs(anchor) + ws = w * scales + hs = h * scales + anchors = _mkanchors(ws, hs, x_ctr, y_ctr) + return anchors diff --git a/SSH/rcnn/processing/nms.py b/SSH/rcnn/processing/nms.py new file mode 100644 index 0000000..230139c --- /dev/null +++ b/SSH/rcnn/processing/nms.py @@ -0,0 +1,64 @@ +import numpy as np +from ..cython.cpu_nms import cpu_nms +try: + from ..cython.gpu_nms import gpu_nms +except ImportError: + gpu_nms = None + + +def py_nms_wrapper(thresh): + def _nms(dets): + return nms(dets, thresh) + return _nms + + +def cpu_nms_wrapper(thresh): + def _nms(dets): + return cpu_nms(dets, thresh) + return _nms + + +def gpu_nms_wrapper(thresh, device_id): + def _nms(dets): + return gpu_nms(dets, thresh, device_id) + if gpu_nms is not None: + return _nms + else: + return cpu_nms_wrapper(thresh) + + +def nms(dets, thresh): + """ + greedily select boxes with high confidence and overlap with current maximum <= thresh + rule out overlap >= thresh + :param dets: [[x1, y1, x2, y2 score]] + :param thresh: retain overlap < thresh + :return: indexes to keep + """ + x1 = dets[:, 0] + y1 = dets[:, 1] + x2 = dets[:, 2] + y2 = dets[:, 3] + scores = dets[:, 4] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + return keep diff --git a/SSH/ssh_detector.py b/SSH/ssh_detector.py new file mode 100644 index 0000000..cd49426 --- /dev/null +++ b/SSH/ssh_detector.py @@ -0,0 +1,194 @@ +from __future__ import print_function +import sys +import cv2 +import mxnet as mx +from mxnet import ndarray as nd +import numpy as np +import numpy.random as npr +from distutils.util import strtobool + +from rcnn.processing.bbox_transform import nonlinear_pred, clip_boxes +from rcnn.processing.generate_anchor import generate_anchors_fpn, anchors_plane +from rcnn.processing.nms import gpu_nms_wrapper + + +class SSHDetector: + def __init__(self, prefix, epoch, ctx_id=0): + self.ctx_id = ctx_id + self.ctx = mx.gpu(self.ctx_id) + self.fpn_keys = [] + fpn_stride = [] + fpn_base_size = [] + self._feat_stride_fpn = [32, 16, 8] + + for s in self._feat_stride_fpn: + self.fpn_keys.append('stride%s'%s) + fpn_stride.append(int(s)) + fpn_base_size.append(16) + + self._scales = np.array([32,16,8,4,2,1]) + self._ratios = np.array([1.0]*len(self._feat_stride_fpn)) + self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios))) + self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) + self._rpn_pre_nms_top_n = 1000 + #self._rpn_post_nms_top_n = rpn_post_nms_top_n + #self.score_threshold = 0.05 + self.nms_threshold = 0.3 + self._bbox_pred = nonlinear_pred + sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) + #all_layers = sym.get_internals() + #outs = [] + #for stride in self._feat_stride_fpn: + # _name = 'rpn_cls_score_stride%s_output' % stride + # rpn_cls_score = all_layers[_name] + # rpn_cls_score_reshape = mx.symbol.Reshape(data=rpn_cls_score, + # shape=(0, 2, -1, 0), + # name="rpn_cls_score_reshape_stride%d" % stride) + + # rpn_cls_prob = mx.symbol.SoftmaxActivation(data=rpn_cls_score_reshape, + # mode="channel", + # name="rpn_cls_prob_stride%d" % stride) + # rpn_cls_prob_reshape = mx.symbol.Reshape(data=rpn_cls_prob, + # shape=(0, 2 * num_anchors, -1, 0), + # name='rpn_cls_prob_reshape_stride%d' % stride) + # outs.append(rpn_cls_prob_reshape) + # _name = 'rpn_bbox_pred_stride%s_output' % stride + # rpn_bbox_pred = all_layers[_name] + # outs.append(rpn_bbox_pred) + #sym = mx.sym.Group(outs) + + self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names = None) + image_size = (640, 640) + self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) + self.model.set_params(arg_params, aux_params) + self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) + pass + + def detect(self,img, threshold=0.05): + image_size = (img.shape[0], img.shape[1]) + #self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) + im_info = [image_size[0], image_size[1], 1.0] + data = nd.zeros( (1 ,3, image_size[0], image_size[1]) ) + nimg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + nimg = np.transpose(nimg, (2,0,1)) + nimg = nd.array(nimg) + data[0] = nimg + db = mx.io.DataBatch(data=(data,)) + self.model.forward(db, is_train=False) + net_out = self.model.get_outputs() + + + pre_nms_topN = self._rpn_pre_nms_top_n + #post_nms_topN = self._rpn_post_nms_top_n + #min_size_dict = self._rpn_min_size_fpn + + proposals_list = [] + scores_list = [] + idx = 0 + for s in self._feat_stride_fpn: + _key = 'stride%s'%s + stride = int(s) + scores = net_out[idx].asnumpy() + #print(scores.shape) + idx+=1 + #print('scores',stride, scores.shape, file=sys.stderr) + scores = scores[:, self._num_anchors['stride%s'%s]:, :, :] + bbox_deltas = net_out[idx].asnumpy() + idx+=1 + + #if DEBUG: + # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) + # print 'scale: {}'.format(im_info[2]) + + _height, _width = int(im_info[0] / stride), int(im_info[1] / stride) + height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] + + A = self._num_anchors['stride%s'%s] + K = height * width + + anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s'%s].astype(np.float32)) + print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) + anchors = anchors.reshape((K * A, 4)) + + #print('pre', bbox_deltas.shape, height, width) + bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) + #print('after', bbox_deltas.shape, height, width) + bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) + + scores = self._clip_pad(scores, (height, width)) + scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) + + #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) + proposals = self._bbox_pred(anchors, bbox_deltas) + #proposals = anchors + + proposals = clip_boxes(proposals, im_info[:2]) + + #keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2]) + #proposals = proposals[keep, :] + #scores = scores[keep] + #print('333', proposals.shape) + + scores_ravel = scores.ravel() + order = scores_ravel.argsort()[::-1] + if pre_nms_topN > 0: + order = order[:pre_nms_topN] + proposals = proposals[order, :] + scores = scores[order] + + proposals_list.append(proposals) + scores_list.append(scores) + + proposals = np.vstack(proposals_list) + scores = np.vstack(scores_list) + scores_ravel = scores.ravel() + order = scores_ravel.argsort()[::-1] + #if config.TEST.SCORE_THRESH>0.0: + # _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH) + # order = order[:_count] + #if pre_nms_topN > 0: + # order = order[:pre_nms_topN] + proposals = proposals[order, :] + scores = scores[order] + + det = np.hstack((proposals, scores)).astype(np.float32) + + #if np.shape(det)[0] == 0: + # print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.") + # proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32) + # scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32) + # det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32) + + + if self.nms_threshold<1.0: + keep = self.nms(det) + det = det[keep, :] + if threshold>0.0: + keep = np.where(det[:, 4] >= threshold)[0] + det = det[keep, :] + return det + + @staticmethod + def _filter_boxes(boxes, min_size): + """ Remove all boxes with any side smaller than min_size """ + ws = boxes[:, 2] - boxes[:, 0] + 1 + hs = boxes[:, 3] - boxes[:, 1] + 1 + keep = np.where((ws >= min_size) & (hs >= min_size))[0] + return keep + + @staticmethod + def _clip_pad(tensor, pad_shape): + """ + Clip boxes of the pad area. + :param tensor: [n, c, H, W] + :param pad_shape: [h, w] + :return: [n, c, h, w] + """ + H, W = tensor.shape[2:] + h, w = pad_shape + + if h < H or w < W: + tensor = tensor[:, :, :h, :w].copy() + + return tensor + diff --git a/SSH/t2.jpg b/SSH/t2.jpg new file mode 100644 index 0000000..dcca930 Binary files /dev/null and b/SSH/t2.jpg differ diff --git a/SSH/test.py b/SSH/test.py new file mode 100644 index 0000000..4c3c755 --- /dev/null +++ b/SSH/test.py @@ -0,0 +1,29 @@ +import cv2 +import sys +import numpy as np +import datetime +#sys.path.append('.') +from ssh_detector import SSHDetector + +long_max = 1200 +t = 2 + + +f = 't2.jpg' +if len(sys.argv)>1: + f = sys.argv[1] +img = cv2.imread(f) +print(img.shape) +if img.shape[0]>long_max or img.shape[1]>long_max: + scale = float(long_max) / max(img.shape[0], img.shape[1]) + img = cv2.resize(img, (0,0), fx=scale, fy=scale) + print('resize to', img.shape) +detector = SSHDetector('./model/e2ef', 0) +for i in xrange(t-1): #warmup + faces = detector.detect(img) +timea = datetime.datetime.now() +faces = detector.detect(img) +timeb = datetime.datetime.now() +diff = timeb - timea +print('detection uses', diff.total_seconds(), 'seconds') +print('find', faces.shape[0], 'faces')