mirror of
https://gitcode.com/gh_mirrors/ope/OpenFace.git
synced 2025-12-30 13:02:30 +00:00
Updating OpenFace to version 2.2.0 (#741)
Change log: Moving to C++17. This means that the code can only be build using C++17 compilers (e.g. g++ >8 and Visual Studio 2017, clang > 5), fixing related bugs - (#698, #629, #641) Removing an explicit dependency on boost (all the filesystem operations are performed using std::filesystem or boost::filesysteme). If boost is available it will used boost:filesystem, otherwise std::filesystem (this requires C++17) Visual Studio 2017 is now the main version for Visual Studio builds, VS 2015 is no longer supported Updating OpenCV to 4.1 version (#511) Fixing a bug with output images when using GUI (#694) Updating RAWImage - #609, so it can be initialized from System.Drawing.Bitmap directly Fixing overlap detection for multi face tracking (#693)
This commit is contained in:
committed by
GitHub
parent
f023667fee
commit
e4a57e11d2
@@ -1,3 +1,5 @@
|
||||
.git
|
||||
docker-compose.yml
|
||||
build
|
||||
matlab_runners
|
||||
matlab_version
|
||||
@@ -5,4 +7,4 @@ python_scripts
|
||||
test-dump
|
||||
samples
|
||||
model_training
|
||||
gui
|
||||
gui
|
||||
|
||||
6
.env
Normal file
6
.env
Normal file
@@ -0,0 +1,6 @@
|
||||
## This is read by docker-compose. You can overwrite these at runtime, e.g.:
|
||||
## DOCKERUSER=bobfoo docker-compose build
|
||||
|
||||
DOCKERUSER=openface
|
||||
DOCKERTAG=latest
|
||||
DATA_MOUNT=/tmp/openface
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -1,3 +1,5 @@
|
||||
lib/local/LandmarkDetector/model/patch_experts/cen_patches_*.dat
|
||||
|
||||
matlab_version/experiments_menpo/out_semifrontal/
|
||||
/x64/Release/
|
||||
/x64/Debug/
|
||||
@@ -100,3 +102,6 @@ lib/local/Utilities/Debug/
|
||||
lib/3rdParty/CameraEnumerator/Debug/
|
||||
lib/local/CppInerop/Debug/
|
||||
*.user
|
||||
|
||||
# IDE-generated folders
|
||||
.idea
|
||||
|
||||
30
.travis.yml
30
.travis.yml
@@ -1,12 +1,13 @@
|
||||
language: cpp
|
||||
|
||||
dist: trusty
|
||||
dist: xenial
|
||||
sudo: required
|
||||
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- develop
|
||||
- feature/opencv4
|
||||
compiler:
|
||||
- gcc
|
||||
|
||||
@@ -16,14 +17,17 @@ os:
|
||||
|
||||
before_install:
|
||||
|
||||
# OpenCV dependencies, dlib and boost
|
||||
# G++ 8 compiler, OpenCV dependencies, dlib, openblas, and tbb
|
||||
- if [ ${TRAVIS_OS_NAME} = linux ]; then
|
||||
sudo apt-get update;
|
||||
sudo apt-get install libopenblas-dev;
|
||||
cd ../;
|
||||
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test;
|
||||
sudo apt-get update -qq;
|
||||
sudo apt-get install -qq g++-8;
|
||||
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 90;
|
||||
sudo apt-get install git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev;
|
||||
sudo apt-get install python-dev python-numpy libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-22-dev;
|
||||
sudo apt-get install cmake;
|
||||
sudo apt-get install libboost-all-dev;
|
||||
sudo apt-get install libopenblas-dev;
|
||||
wget http://dlib.net/files/dlib-19.13.tar.bz2;
|
||||
tar xf dlib-19.13.tar.bz2;
|
||||
cd dlib-19.13;
|
||||
@@ -34,14 +38,14 @@ before_install:
|
||||
sudo make install;
|
||||
sudo ldconfig;
|
||||
cd ../..;
|
||||
wget https://github.com/opencv/opencv/archive/3.4.0.zip;
|
||||
unzip -qq 3.4.0.zip;
|
||||
cd opencv-3.4.0;
|
||||
wget https://github.com/opencv/opencv/archive/4.0.0.zip;
|
||||
unzip -qq 4.0.0.zip;
|
||||
cd opencv-4.0.0;
|
||||
mkdir build;
|
||||
cd build;
|
||||
fi
|
||||
|
||||
# g++4.8.1
|
||||
# g++ TODO these should not be separated?
|
||||
- if [ "$CXX" = "g++" ]; then
|
||||
if [ ${TRAVIS_OS_NAME} = linux ]; then
|
||||
$CXX --version;
|
||||
@@ -52,7 +56,7 @@ before_install:
|
||||
fi
|
||||
fi
|
||||
|
||||
# clang 3.4
|
||||
# clang
|
||||
- if [ "$CXX" = "clang++" ]; then
|
||||
if [ ${TRAVIS_OS_NAME} = linux ]; then
|
||||
$CXX --version;
|
||||
@@ -68,16 +72,18 @@ before_install:
|
||||
brew install tbb;
|
||||
brew install openblas;
|
||||
brew install dlib;
|
||||
brew install opencv3;
|
||||
brew install opencv;
|
||||
cd ../;
|
||||
fi
|
||||
|
||||
script:
|
||||
- cd OpenFace
|
||||
- $CXX --version
|
||||
- chmod 777 ./download_models.sh
|
||||
- ./download_models.sh
|
||||
- mkdir build
|
||||
- cd build
|
||||
- cmake -D CMAKE_BUILD_TYPE=RELEASE CMAKE_CXX_FLAGS="-std=c++11" -D CMAKE_EXE_LINKER_FLAGS="-std=c++11" ..
|
||||
- cmake -D CMAKE_BUILD_TYPE=RELEASE ..
|
||||
- make
|
||||
- ../build/bin/FeatureExtraction -f "../samples/2015-10-15-15-14.avi" -q -mloc model/main_clm_general.txt
|
||||
- ../build/bin/FaceLandmarkImg -fdir ../samples -out_dir data -multi_view 1 -wild -q
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
cmake_minimum_required (VERSION 3.2)
|
||||
cmake_minimum_required (VERSION 3.8)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
project(OpenFace VERSION 2.0.2)
|
||||
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/)
|
||||
@@ -27,7 +29,7 @@ else()
|
||||
MESSAGE(" OpenBLAS_INCLUDE: ${OpenBLAS_INCLUDE_DIR}")
|
||||
endif()
|
||||
|
||||
find_package( OpenCV 3.3 REQUIRED COMPONENTS core imgproc calib3d highgui objdetect)
|
||||
find_package( OpenCV 4.0 REQUIRED COMPONENTS core imgproc calib3d highgui objdetect)
|
||||
if(${OpenCV_FOUND})
|
||||
MESSAGE("OpenCV information:")
|
||||
MESSAGE(" OpenCV_INCLUDE_DIRS: ${OpenCV_INCLUDE_DIRS}")
|
||||
@@ -37,7 +39,7 @@ else()
|
||||
MESSAGE(FATAL_ERROR "OpenCV not found in the system.")
|
||||
endif()
|
||||
|
||||
find_package( Boost 1.5.9 REQUIRED COMPONENTS filesystem system)
|
||||
find_package( Boost 1.5.9 COMPONENTS filesystem system)
|
||||
if(${Boost_FOUND})
|
||||
MESSAGE("Boost information:")
|
||||
MESSAGE(" Boost_VERSION: ${Boost_VERSION}")
|
||||
@@ -45,9 +47,10 @@ if(${Boost_FOUND})
|
||||
MESSAGE(" Boost_LIBRARIES: ${Boost_LIBRARIES}")
|
||||
MESSAGE(" Boost_LIBRARY_DIRS: ${Boost_LIBRARY_DIRS}")
|
||||
else()
|
||||
MESSAGE(FATAL_ERROR "Boost not found in the system.")
|
||||
MESSAGE("Boost not found in the system.")
|
||||
endif()
|
||||
|
||||
|
||||
# Move LandmarkDetector model
|
||||
file(GLOB files "lib/local/LandmarkDetector/model/*.txt")
|
||||
foreach(file ${files})
|
||||
@@ -134,13 +137,11 @@ endforeach()
|
||||
|
||||
if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
|
||||
execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (GCC_VERSION VERSION_LESS 4.7)
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x -msse -msse2 -msse3")
|
||||
if (GCC_VERSION VERSION_LESS 8.0)
|
||||
MESSAGE(FATAL_ERROR "Need a 8.0 or newer GCC compiler. Current GCC: ${GCC_VERSION}")
|
||||
else ()
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -msse -msse2 -msse3")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse -msse2 -msse3")
|
||||
endif ()
|
||||
else ()
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -msse -msse2 -msse3")
|
||||
endif ()
|
||||
|
||||
# dlib
|
||||
|
||||
63
Dockerfile
63
Dockerfile
@@ -1,63 +0,0 @@
|
||||
FROM ubuntu:14.04 as build
|
||||
|
||||
LABEL maintainer="Edgar Aroutiounian <edgar.factorial@gmail.com>"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
ARG BUILD_DIR=/home/build-dep
|
||||
|
||||
ARG OPENFACE_DIR=/home/openface-build
|
||||
|
||||
RUN mkdir ${OPENFACE_DIR}
|
||||
WORKDIR ${OPENFACE_DIR}
|
||||
|
||||
COPY ./CMakeLists.txt ${OPENFACE_DIR}
|
||||
|
||||
COPY ./cmake ${OPENFACE_DIR}/cmake
|
||||
|
||||
COPY ./exe ${OPENFACE_DIR}/exe
|
||||
|
||||
COPY ./lib ${OPENFACE_DIR}/lib
|
||||
|
||||
ADD https://www.dropbox.com/s/7na5qsjzz8yfoer/cen_patches_0.25_of.dat?dl=1 \
|
||||
${OPENFACE_DIR}/lib/local/LandmarkDetector/model/patch_experts/cen_patches_0.25_of.dat
|
||||
|
||||
ADD https://www.dropbox.com/s/k7bj804cyiu474t/cen_patches_0.35_of.dat?dl=1 \
|
||||
${OPENFACE_DIR}/lib/local/LandmarkDetector/model/patch_experts/cen_patches_0.35_of.dat
|
||||
|
||||
ADD https://www.dropbox.com/s/ixt4vkbmxgab1iu/cen_patches_0.50_of.dat?dl=1 \
|
||||
${OPENFACE_DIR}/lib/local/LandmarkDetector/model/patch_experts/cen_patches_0.50_of.dat
|
||||
|
||||
ADD https://www.dropbox.com/s/2t5t1sdpshzfhpj/cen_patches_1.00_of.dat?dl=1 \
|
||||
${OPENFACE_DIR}/lib/local/LandmarkDetector/model/patch_experts/cen_patches_1.00_of.dat
|
||||
|
||||
RUN mkdir ${BUILD_DIR}
|
||||
|
||||
ADD https://github.com/opencv/opencv/archive/3.4.0.zip ${BUILD_DIR}
|
||||
|
||||
RUN apt-get update && apt-get install -qq -y \
|
||||
curl build-essential llvm clang-3.7 libc++-dev \
|
||||
libc++abi-dev cmake libopenblas-dev liblapack-dev git libgtk2.0-dev \
|
||||
pkg-config libavcodec-dev libavformat-dev libswscale-dev \
|
||||
python-dev python-numpy libtbb2 libtbb-dev libjpeg-dev \
|
||||
libpng-dev libtiff-dev libjasper-dev libdc1394-22-dev checkinstall \
|
||||
libboost-all-dev wget unzip && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN cd ${BUILD_DIR} && unzip 3.4.0.zip && \
|
||||
cd opencv-3.4.0 && \
|
||||
mkdir -p build && \
|
||||
cd build && \
|
||||
cmake -D CMAKE_BUILD_TYPE=RELEASE \
|
||||
-D CMAKE_INSTALL_PREFIX=/usr/local \
|
||||
-D WITH_TBB=ON -D WITH_CUDA=OFF \
|
||||
-D BUILD_SHARED_LIBS=OFF .. && \
|
||||
make -j4 && \
|
||||
make install
|
||||
|
||||
RUN cd ${OPENFACE_DIR} && mkdir -p build && cd build && \
|
||||
cmake -D CMAKE_BUILD_TYPE=RELEASE .. && \
|
||||
make -j4
|
||||
|
||||
RUN ln /dev/null /dev/raw1394
|
||||
ENTRYPOINT ["/bin/bash"]
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 14
|
||||
VisualStudioVersion = 14.0.25420.1
|
||||
# Visual Studio 17
|
||||
VisualStudioVersion = 15.8.2
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LandmarkDetector", "lib\local\LandmarkDetector\LandmarkDetector.vcxproj", "{BDC1D107-DE17-4705-8E7B-CDDE8BFB2BF8}"
|
||||
EndProject
|
||||
@@ -179,4 +179,7 @@ Global
|
||||
{78196985-EE54-411F-822B-5A23EDF80642} = {652CCE53-4997-4B43-9A99-28D075199C99}
|
||||
{50B7D4BF-E33B-41D0-AA89-76BBA57BF5CC} = {652CCE53-4997-4B43-9A99-28D075199C99}
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {228609CD-6688-47E7-8D5B-5EE684F8A7A1}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
||||
@@ -1,182 +0,0 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 14
|
||||
VisualStudioVersion = 14.0.25420.1
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LandmarkDetector", "lib\local\LandmarkDetector\LandmarkDetector.vcxproj", "{BDC1D107-DE17-4705-8E7B-CDDE8BFB2BF8}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "FaceAnalyser", "lib\local\FaceAnalyser\FaceAnalyser.vcxproj", "{0E7FC556-0E80-45EA-A876-DDE4C2FEDCD7}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "FeatureExtraction", "exe\FeatureExtraction\FeatureExtraction.vcxproj", "{8A23C00D-767D-422D-89A3-CF225E3DAB4B}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Libraries", "Libraries", "{99FEBA13-BDDF-4076-B57E-D8EF4076E20D}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Executables", "Executables", "{9961DDAC-BE6E-4A6E-8EEF-FFC7D67BD631}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "FaceLandmarkVidMulti", "exe\FaceLandmarkVidMulti\FaceLandmarkVidMulti.vcxproj", "{C3FAF36F-44BC-4454-87C2-C5106575FE50}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Recording", "exe\Recording\Recording.vcxproj", "{2D80FA0B-2DE8-4475-BA5A-C08A9E1EDAAC}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "FaceLandmarkVid", "exe\FaceLandmarkVid\FaceLandmarkVid.vcxproj", "{34032CF2-1B99-4A25-9050-E9C13DD4CD0A}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "FaceLandmarkImg", "exe\FaceLandmarkImg\FaceLandmarkImg.vcxproj", "{DDC3535E-526C-44EC-9DF4-739E2D3A323B}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GazeAnalyser", "lib\local\GazeAnalyser\GazeAnalyser.vcxproj", "{5F915541-F531-434F-9C81-79F5DB58012B}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "UtilLibs", "UtilLibs", "{652CCE53-4997-4B43-9A99-28D075199C99}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Utilities", "lib\local\Utilities\Utilities.vcxproj", "{8E741EA2-9386-4CF2-815E-6F9B08991EAC}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "gui", "gui", "{E59CF005-539F-484F-9AA6-9F08AC2DB31E}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeadPoseLive", "gui\HeadPose-live\HeadPoseLive.csproj", "{F396362D-821E-4EA6-9BBF-1F6050844118}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OpenFaceDemo", "gui\OpenFaceDemo\OpenFaceDemo.csproj", "{E143A2AA-312E-4DFE-B61D-9A87CCBC8E90}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OpenFaceOffline", "gui\OpenFaceOffline\OpenFaceOffline.csproj", "{A4760F41-2B1F-4144-B7B2-62785AFFE79B}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CppInerop", "lib\local\CppInerop\CppInerop.vcxproj", "{78196985-EE54-411F-822B-5A23EDF80642}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CameraEnumerator", "lib\3rdParty\CameraEnumerator\CameraEnumerator.vcxproj", "{50B7D4BF-E33B-41D0-AA89-76BBA57BF5CC}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
Debug|x64 = Debug|x64
|
||||
Release|Win32 = Release|Win32
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{BDC1D107-DE17-4705-8E7B-CDDE8BFB2BF8}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{BDC1D107-DE17-4705-8E7B-CDDE8BFB2BF8}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{BDC1D107-DE17-4705-8E7B-CDDE8BFB2BF8}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{BDC1D107-DE17-4705-8E7B-CDDE8BFB2BF8}.Debug|x64.Build.0 = Debug|x64
|
||||
{BDC1D107-DE17-4705-8E7B-CDDE8BFB2BF8}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{BDC1D107-DE17-4705-8E7B-CDDE8BFB2BF8}.Release|Win32.Build.0 = Release|Win32
|
||||
{BDC1D107-DE17-4705-8E7B-CDDE8BFB2BF8}.Release|x64.ActiveCfg = Release|x64
|
||||
{BDC1D107-DE17-4705-8E7B-CDDE8BFB2BF8}.Release|x64.Build.0 = Release|x64
|
||||
{0E7FC556-0E80-45EA-A876-DDE4C2FEDCD7}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{0E7FC556-0E80-45EA-A876-DDE4C2FEDCD7}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{0E7FC556-0E80-45EA-A876-DDE4C2FEDCD7}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{0E7FC556-0E80-45EA-A876-DDE4C2FEDCD7}.Debug|x64.Build.0 = Debug|x64
|
||||
{0E7FC556-0E80-45EA-A876-DDE4C2FEDCD7}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{0E7FC556-0E80-45EA-A876-DDE4C2FEDCD7}.Release|Win32.Build.0 = Release|Win32
|
||||
{0E7FC556-0E80-45EA-A876-DDE4C2FEDCD7}.Release|x64.ActiveCfg = Release|x64
|
||||
{0E7FC556-0E80-45EA-A876-DDE4C2FEDCD7}.Release|x64.Build.0 = Release|x64
|
||||
{8A23C00D-767D-422D-89A3-CF225E3DAB4B}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{8A23C00D-767D-422D-89A3-CF225E3DAB4B}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{8A23C00D-767D-422D-89A3-CF225E3DAB4B}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{8A23C00D-767D-422D-89A3-CF225E3DAB4B}.Debug|x64.Build.0 = Debug|x64
|
||||
{8A23C00D-767D-422D-89A3-CF225E3DAB4B}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{8A23C00D-767D-422D-89A3-CF225E3DAB4B}.Release|Win32.Build.0 = Release|Win32
|
||||
{8A23C00D-767D-422D-89A3-CF225E3DAB4B}.Release|x64.ActiveCfg = Release|x64
|
||||
{8A23C00D-767D-422D-89A3-CF225E3DAB4B}.Release|x64.Build.0 = Release|x64
|
||||
{C3FAF36F-44BC-4454-87C2-C5106575FE50}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{C3FAF36F-44BC-4454-87C2-C5106575FE50}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{C3FAF36F-44BC-4454-87C2-C5106575FE50}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{C3FAF36F-44BC-4454-87C2-C5106575FE50}.Debug|x64.Build.0 = Debug|x64
|
||||
{C3FAF36F-44BC-4454-87C2-C5106575FE50}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{C3FAF36F-44BC-4454-87C2-C5106575FE50}.Release|Win32.Build.0 = Release|Win32
|
||||
{C3FAF36F-44BC-4454-87C2-C5106575FE50}.Release|x64.ActiveCfg = Release|x64
|
||||
{C3FAF36F-44BC-4454-87C2-C5106575FE50}.Release|x64.Build.0 = Release|x64
|
||||
{2D80FA0B-2DE8-4475-BA5A-C08A9E1EDAAC}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{2D80FA0B-2DE8-4475-BA5A-C08A9E1EDAAC}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{2D80FA0B-2DE8-4475-BA5A-C08A9E1EDAAC}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{2D80FA0B-2DE8-4475-BA5A-C08A9E1EDAAC}.Debug|x64.Build.0 = Debug|x64
|
||||
{2D80FA0B-2DE8-4475-BA5A-C08A9E1EDAAC}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{2D80FA0B-2DE8-4475-BA5A-C08A9E1EDAAC}.Release|Win32.Build.0 = Release|Win32
|
||||
{2D80FA0B-2DE8-4475-BA5A-C08A9E1EDAAC}.Release|x64.ActiveCfg = Release|x64
|
||||
{2D80FA0B-2DE8-4475-BA5A-C08A9E1EDAAC}.Release|x64.Build.0 = Release|x64
|
||||
{34032CF2-1B99-4A25-9050-E9C13DD4CD0A}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{34032CF2-1B99-4A25-9050-E9C13DD4CD0A}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{34032CF2-1B99-4A25-9050-E9C13DD4CD0A}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{34032CF2-1B99-4A25-9050-E9C13DD4CD0A}.Debug|x64.Build.0 = Debug|x64
|
||||
{34032CF2-1B99-4A25-9050-E9C13DD4CD0A}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{34032CF2-1B99-4A25-9050-E9C13DD4CD0A}.Release|Win32.Build.0 = Release|Win32
|
||||
{34032CF2-1B99-4A25-9050-E9C13DD4CD0A}.Release|x64.ActiveCfg = Release|x64
|
||||
{34032CF2-1B99-4A25-9050-E9C13DD4CD0A}.Release|x64.Build.0 = Release|x64
|
||||
{DDC3535E-526C-44EC-9DF4-739E2D3A323B}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{DDC3535E-526C-44EC-9DF4-739E2D3A323B}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{DDC3535E-526C-44EC-9DF4-739E2D3A323B}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{DDC3535E-526C-44EC-9DF4-739E2D3A323B}.Debug|x64.Build.0 = Debug|x64
|
||||
{DDC3535E-526C-44EC-9DF4-739E2D3A323B}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{DDC3535E-526C-44EC-9DF4-739E2D3A323B}.Release|Win32.Build.0 = Release|Win32
|
||||
{DDC3535E-526C-44EC-9DF4-739E2D3A323B}.Release|x64.ActiveCfg = Release|x64
|
||||
{DDC3535E-526C-44EC-9DF4-739E2D3A323B}.Release|x64.Build.0 = Release|x64
|
||||
{5F915541-F531-434F-9C81-79F5DB58012B}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{5F915541-F531-434F-9C81-79F5DB58012B}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{5F915541-F531-434F-9C81-79F5DB58012B}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{5F915541-F531-434F-9C81-79F5DB58012B}.Debug|x64.Build.0 = Debug|x64
|
||||
{5F915541-F531-434F-9C81-79F5DB58012B}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{5F915541-F531-434F-9C81-79F5DB58012B}.Release|Win32.Build.0 = Release|Win32
|
||||
{5F915541-F531-434F-9C81-79F5DB58012B}.Release|x64.ActiveCfg = Release|x64
|
||||
{5F915541-F531-434F-9C81-79F5DB58012B}.Release|x64.Build.0 = Release|x64
|
||||
{8E741EA2-9386-4CF2-815E-6F9B08991EAC}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{8E741EA2-9386-4CF2-815E-6F9B08991EAC}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{8E741EA2-9386-4CF2-815E-6F9B08991EAC}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{8E741EA2-9386-4CF2-815E-6F9B08991EAC}.Debug|x64.Build.0 = Debug|x64
|
||||
{8E741EA2-9386-4CF2-815E-6F9B08991EAC}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{8E741EA2-9386-4CF2-815E-6F9B08991EAC}.Release|Win32.Build.0 = Release|Win32
|
||||
{8E741EA2-9386-4CF2-815E-6F9B08991EAC}.Release|x64.ActiveCfg = Release|x64
|
||||
{8E741EA2-9386-4CF2-815E-6F9B08991EAC}.Release|x64.Build.0 = Release|x64
|
||||
{F396362D-821E-4EA6-9BBF-1F6050844118}.Debug|Win32.ActiveCfg = Debug|x86
|
||||
{F396362D-821E-4EA6-9BBF-1F6050844118}.Debug|Win32.Build.0 = Debug|x86
|
||||
{F396362D-821E-4EA6-9BBF-1F6050844118}.Debug|x64.ActiveCfg = Debug|Any CPU
|
||||
{F396362D-821E-4EA6-9BBF-1F6050844118}.Debug|x64.Build.0 = Debug|Any CPU
|
||||
{F396362D-821E-4EA6-9BBF-1F6050844118}.Release|Win32.ActiveCfg = Release|x86
|
||||
{F396362D-821E-4EA6-9BBF-1F6050844118}.Release|Win32.Build.0 = Release|x86
|
||||
{F396362D-821E-4EA6-9BBF-1F6050844118}.Release|x64.ActiveCfg = Release|Any CPU
|
||||
{F396362D-821E-4EA6-9BBF-1F6050844118}.Release|x64.Build.0 = Release|Any CPU
|
||||
{E143A2AA-312E-4DFE-B61D-9A87CCBC8E90}.Debug|Win32.ActiveCfg = Debug|x86
|
||||
{E143A2AA-312E-4DFE-B61D-9A87CCBC8E90}.Debug|Win32.Build.0 = Debug|x86
|
||||
{E143A2AA-312E-4DFE-B61D-9A87CCBC8E90}.Debug|x64.ActiveCfg = Debug|Any CPU
|
||||
{E143A2AA-312E-4DFE-B61D-9A87CCBC8E90}.Debug|x64.Build.0 = Debug|Any CPU
|
||||
{E143A2AA-312E-4DFE-B61D-9A87CCBC8E90}.Release|Win32.ActiveCfg = Release|x86
|
||||
{E143A2AA-312E-4DFE-B61D-9A87CCBC8E90}.Release|Win32.Build.0 = Release|x86
|
||||
{E143A2AA-312E-4DFE-B61D-9A87CCBC8E90}.Release|x64.ActiveCfg = Release|Any CPU
|
||||
{E143A2AA-312E-4DFE-B61D-9A87CCBC8E90}.Release|x64.Build.0 = Release|Any CPU
|
||||
{A4760F41-2B1F-4144-B7B2-62785AFFE79B}.Debug|Win32.ActiveCfg = Debug|x86
|
||||
{A4760F41-2B1F-4144-B7B2-62785AFFE79B}.Debug|Win32.Build.0 = Debug|x86
|
||||
{A4760F41-2B1F-4144-B7B2-62785AFFE79B}.Debug|x64.ActiveCfg = Debug|Any CPU
|
||||
{A4760F41-2B1F-4144-B7B2-62785AFFE79B}.Debug|x64.Build.0 = Debug|Any CPU
|
||||
{A4760F41-2B1F-4144-B7B2-62785AFFE79B}.Release|Win32.ActiveCfg = Release|x86
|
||||
{A4760F41-2B1F-4144-B7B2-62785AFFE79B}.Release|Win32.Build.0 = Release|x86
|
||||
{A4760F41-2B1F-4144-B7B2-62785AFFE79B}.Release|x64.ActiveCfg = Release|Any CPU
|
||||
{A4760F41-2B1F-4144-B7B2-62785AFFE79B}.Release|x64.Build.0 = Release|Any CPU
|
||||
{78196985-EE54-411F-822B-5A23EDF80642}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{78196985-EE54-411F-822B-5A23EDF80642}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{78196985-EE54-411F-822B-5A23EDF80642}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{78196985-EE54-411F-822B-5A23EDF80642}.Debug|x64.Build.0 = Debug|x64
|
||||
{78196985-EE54-411F-822B-5A23EDF80642}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{78196985-EE54-411F-822B-5A23EDF80642}.Release|Win32.Build.0 = Release|Win32
|
||||
{78196985-EE54-411F-822B-5A23EDF80642}.Release|x64.ActiveCfg = Release|x64
|
||||
{78196985-EE54-411F-822B-5A23EDF80642}.Release|x64.Build.0 = Release|x64
|
||||
{50B7D4BF-E33B-41D0-AA89-76BBA57BF5CC}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{50B7D4BF-E33B-41D0-AA89-76BBA57BF5CC}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{50B7D4BF-E33B-41D0-AA89-76BBA57BF5CC}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{50B7D4BF-E33B-41D0-AA89-76BBA57BF5CC}.Debug|x64.Build.0 = Debug|x64
|
||||
{50B7D4BF-E33B-41D0-AA89-76BBA57BF5CC}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{50B7D4BF-E33B-41D0-AA89-76BBA57BF5CC}.Release|Win32.Build.0 = Release|Win32
|
||||
{50B7D4BF-E33B-41D0-AA89-76BBA57BF5CC}.Release|x64.ActiveCfg = Release|x64
|
||||
{50B7D4BF-E33B-41D0-AA89-76BBA57BF5CC}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(NestedProjects) = preSolution
|
||||
{BDC1D107-DE17-4705-8E7B-CDDE8BFB2BF8} = {99FEBA13-BDDF-4076-B57E-D8EF4076E20D}
|
||||
{0E7FC556-0E80-45EA-A876-DDE4C2FEDCD7} = {99FEBA13-BDDF-4076-B57E-D8EF4076E20D}
|
||||
{8A23C00D-767D-422D-89A3-CF225E3DAB4B} = {9961DDAC-BE6E-4A6E-8EEF-FFC7D67BD631}
|
||||
{C3FAF36F-44BC-4454-87C2-C5106575FE50} = {9961DDAC-BE6E-4A6E-8EEF-FFC7D67BD631}
|
||||
{2D80FA0B-2DE8-4475-BA5A-C08A9E1EDAAC} = {9961DDAC-BE6E-4A6E-8EEF-FFC7D67BD631}
|
||||
{34032CF2-1B99-4A25-9050-E9C13DD4CD0A} = {9961DDAC-BE6E-4A6E-8EEF-FFC7D67BD631}
|
||||
{DDC3535E-526C-44EC-9DF4-739E2D3A323B} = {9961DDAC-BE6E-4A6E-8EEF-FFC7D67BD631}
|
||||
{5F915541-F531-434F-9C81-79F5DB58012B} = {99FEBA13-BDDF-4076-B57E-D8EF4076E20D}
|
||||
{8E741EA2-9386-4CF2-815E-6F9B08991EAC} = {652CCE53-4997-4B43-9A99-28D075199C99}
|
||||
{F396362D-821E-4EA6-9BBF-1F6050844118} = {E59CF005-539F-484F-9AA6-9F08AC2DB31E}
|
||||
{E143A2AA-312E-4DFE-B61D-9A87CCBC8E90} = {E59CF005-539F-484F-9AA6-9F08AC2DB31E}
|
||||
{A4760F41-2B1F-4144-B7B2-62785AFFE79B} = {E59CF005-539F-484F-9AA6-9F08AC2DB31E}
|
||||
{78196985-EE54-411F-822B-5A23EDF80642} = {652CCE53-4997-4B43-9A99-28D075199C99}
|
||||
{50B7D4BF-E33B-41D0-AA89-76BBA57BF5CC} = {652CCE53-4997-4B43-9A99-28D075199C99}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
19
appveyor.yml
19
appveyor.yml
@@ -1,8 +1,11 @@
|
||||
version: 1.0.{build}
|
||||
image: Visual Studio 2017
|
||||
branches:
|
||||
only:
|
||||
- develop
|
||||
- feature/opencv4
|
||||
- master
|
||||
- feature/boost_removal
|
||||
max_jobs: 4
|
||||
configuration:
|
||||
- Release
|
||||
@@ -12,19 +15,9 @@ platform:
|
||||
- Win32
|
||||
# scripts that run after cloning repository
|
||||
install:
|
||||
- ps: $source = "https://www.dropbox.com/s/7na5qsjzz8yfoer/cen_patches_0.25_of.dat?dl=1"
|
||||
- ps: $destination = "lib/local/LandmarkDetector/model/patch_experts/cen_patches_0.25_of.dat"
|
||||
- ps: Invoke-WebRequest $source -OutFile $destination
|
||||
- ps: $source = "https://www.dropbox.com/s/k7bj804cyiu474t/cen_patches_0.35_of.dat?dl=1"
|
||||
- ps: $destination = "lib/local/LandmarkDetector/model/patch_experts/cen_patches_0.35_of.dat"
|
||||
- ps: Invoke-WebRequest $source -OutFile $destination
|
||||
- ps: $source = "https://www.dropbox.com/s/ixt4vkbmxgab1iu/cen_patches_0.50_of.dat?dl=1"
|
||||
- ps: $destination = "lib/local/LandmarkDetector/model/patch_experts/cen_patches_0.50_of.dat"
|
||||
- ps: Invoke-WebRequest $source -OutFile $destination
|
||||
- ps: $source = "https://www.dropbox.com/s/2t5t1sdpshzfhpj/cen_patches_1.00_of.dat?dl=1"
|
||||
- ps: $destination = "lib/local/LandmarkDetector/model/patch_experts/cen_patches_1.00_of.dat"
|
||||
- ps: Invoke-WebRequest $source -OutFile $destination
|
||||
|
||||
- ps: '& ".\download_libraries.ps1"'
|
||||
- ps: '& ".\download_models.ps1"'
|
||||
|
||||
build:
|
||||
project: OpenFace.sln
|
||||
verbosity: minimal
|
||||
|
||||
24
docker-compose.yml
Normal file
24
docker-compose.yml
Normal file
@@ -0,0 +1,24 @@
|
||||
---
|
||||
# Variables can be set from the shell: `DOCKERTAG=foo docker-compose build`
|
||||
# or from a .env file. See docker-compose documentation for details
|
||||
# Variable DOCKERUSER should be set to your dockerhub user
|
||||
# Alternatively, use a docker registry url as the image name
|
||||
version: '3.7'
|
||||
services:
|
||||
## Image runtime service
|
||||
## This can be used to add volume mounts or pass environment variables
|
||||
## Todo: make a service which can use the container interactively
|
||||
openface:
|
||||
container_name: openface
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile
|
||||
image: "${DOCKERUSER}/openface:${DOCKERTAG}"
|
||||
tty: true
|
||||
volumes:
|
||||
- "${DATA_MOUNT}:${DATA_MOUNT}"
|
||||
environment:
|
||||
DATA_MOUNT: "${DATA_MOUNT}"
|
||||
command: ["bash"]
|
||||
|
||||
...
|
||||
110
docker/Dockerfile
Normal file
110
docker/Dockerfile
Normal file
@@ -0,0 +1,110 @@
|
||||
# ==================== Building Model Layer ===========================
|
||||
# This is a little trick to improve caching and minimize rebuild time
|
||||
# and bandwidth. Note that RUN commands only cache-miss if the prior layers
|
||||
# miss, or the dockerfile changes prior to this step.
|
||||
# To update these patch files, be sure to run build with --no-cache
|
||||
FROM alpine as model_data
|
||||
RUN apk --no-cache --update-cache add wget
|
||||
WORKDIR /data/patch_experts
|
||||
|
||||
RUN wget -q https://www.dropbox.com/s/7na5qsjzz8yfoer/cen_patches_0.25_of.dat &&\
|
||||
wget -q https://www.dropbox.com/s/k7bj804cyiu474t/cen_patches_0.35_of.dat &&\
|
||||
wget -q https://www.dropbox.com/s/ixt4vkbmxgab1iu/cen_patches_0.50_of.dat &&\
|
||||
wget -q https://www.dropbox.com/s/2t5t1sdpshzfhpj/cen_patches_1.00_of.dat
|
||||
|
||||
## ==================== Install Ubuntu Base libs ===========================
|
||||
## This will be our base image for OpenFace, and also the base for the compiler
|
||||
## image. We only need packages which are linked
|
||||
|
||||
FROM ubuntu:18.04 as ubuntu_base
|
||||
|
||||
LABEL maintainer="Michael McDermott <mikemcdermott23@gmail.com>"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# todo: minimize this even more
|
||||
RUN apt-get update -qq &&\
|
||||
apt-get install -qq curl &&\
|
||||
apt-get install -qq --no-install-recommends \
|
||||
libopenblas-dev liblapack-dev \
|
||||
libavcodec-dev libavformat-dev libswscale-dev \
|
||||
libtbb2 libtbb-dev libjpeg-dev \
|
||||
libpng-dev libtiff-dev &&\
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
## ==================== Build-time dependency libs ======================
|
||||
## This will build and install opencv and dlib into an additional dummy
|
||||
## directory, /root/diff, so we can later copy in these artifacts,
|
||||
## minimizing docker layer size
|
||||
## Protip: ninja is faster than `make -j` and less likely to lock up system
|
||||
FROM ubuntu_base as cv_deps
|
||||
|
||||
WORKDIR /root/build-dep
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update -qq && apt-get install -qq -y \
|
||||
cmake ninja-build pkg-config build-essential checkinstall\
|
||||
g++-8 &&\
|
||||
rm -rf /var/lib/apt/lists/* &&\
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8
|
||||
|
||||
## llvm clang-3.7 libc++-dev libc++abi-dev \
|
||||
## ==================== Building dlib ===========================
|
||||
|
||||
RUN curl http://dlib.net/files/dlib-19.13.tar.bz2 -LO &&\
|
||||
tar xf dlib-19.13.tar.bz2 && \
|
||||
rm dlib-19.13.tar.bz2 &&\
|
||||
mv dlib-19.13 dlib &&\
|
||||
mkdir -p dlib/build &&\
|
||||
cd dlib/build &&\
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -G Ninja .. &&\
|
||||
ninja && \
|
||||
ninja install && \
|
||||
DESTDIR=/root/diff ninja install &&\
|
||||
ldconfig
|
||||
|
||||
## ==================== Building OpenCV ======================
|
||||
ENV OPENCV_VERSION=4.1.0
|
||||
|
||||
RUN curl https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.tar.gz -LO &&\
|
||||
tar xf ${OPENCV_VERSION}.tar.gz && \
|
||||
rm ${OPENCV_VERSION}.tar.gz &&\
|
||||
mv opencv-${OPENCV_VERSION} opencv && \
|
||||
mkdir -p opencv/build && \
|
||||
cd opencv/build && \
|
||||
cmake -D CMAKE_BUILD_TYPE=RELEASE \
|
||||
-D CMAKE_INSTALL_PREFIX=/usr/local \
|
||||
-D WITH_TBB=ON -D WITH_CUDA=OFF \
|
||||
-DWITH_QT=OFF -DWITH_GTK=OFF\
|
||||
-G Ninja .. && \
|
||||
ninja && \
|
||||
ninja install &&\
|
||||
DESTDIR=/root/diff ninja install
|
||||
|
||||
## ==================== Building OpenFace ===========================
|
||||
FROM cv_deps as openface
|
||||
WORKDIR /root/openface
|
||||
|
||||
COPY ./ ./
|
||||
|
||||
COPY --from=model_data /data/patch_experts/* \
|
||||
/root/openface/lib/local/LandmarkDetector/model/patch_experts/
|
||||
|
||||
RUN mkdir -p build && cd build && \
|
||||
cmake -D CMAKE_BUILD_TYPE=RELEASE -G Ninja .. && \
|
||||
ninja &&\
|
||||
DESTDIR=/root/diff ninja install
|
||||
|
||||
|
||||
## ==================== Streamline container ===========================
|
||||
## Clean up - start fresh and only copy in necessary stuff
|
||||
## This shrinks the image from ~8 GB to ~1.6 GB
|
||||
FROM ubuntu_base as final
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
# Copy in only necessary libraries
|
||||
COPY --from=openface /root/diff /
|
||||
|
||||
# Since we "imported" the build artifacts, we need to reconfigure ld
|
||||
RUN ldconfig
|
||||
57
docker/README.md
Normal file
57
docker/README.md
Normal file
@@ -0,0 +1,57 @@
|
||||
# Docker building instructions
|
||||
|
||||
This image can be build with just `docker`, but it is highly recommend to use
|
||||
`docker-compose` as this greatly simplifies and improves the process.
|
||||
|
||||
## Quick start
|
||||
|
||||
To start with the container hosted by the repo maintainer, run
|
||||
|
||||
`docker run -it --rm --name openface algebr/openface:latest`
|
||||
|
||||
This will drop you into a shell with binaries such as FaceLandmarkImg. For example,
|
||||
try this code (in the container):
|
||||
|
||||
```bash
|
||||
curl -o tesla.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/b/b7/Nicola_Tesla_LCCN2014684845.jpg/559px-Nicola_Tesla_LCCN2014684845.jpg
|
||||
FaceLandmarkImg -f tesla.jpg
|
||||
```
|
||||
|
||||
Then, copy the output to the host system (from host terminal):
|
||||
|
||||
```bash
|
||||
docker cp openface:/root/processed /tmp/
|
||||
cd /tmp/processed
|
||||
```
|
||||
|
||||
Tip: On Ubuntu and other *nixes with X running, you can open a file directly
|
||||
like this:
|
||||
```bash
|
||||
xdg-open /tmp/processed/tesla.jpg
|
||||
```
|
||||
|
||||
## Building
|
||||
|
||||
In repo root, run `docker-compose build` to automatically build and tag.
|
||||
There are two variables which can be used to modify the tag, `$DOCKERUSER` and
|
||||
`$DOCKERTAG`. DC will automatically tag image as
|
||||
`${DOCKERUSER}/openface:${DOCKERTAG}`
|
||||
|
||||
## OpenFace service (in progress)
|
||||
|
||||
To run OpenFace like a service, you can start the container with bind mounts
|
||||
in order to pass data into and out of the container easily.
|
||||
`$DATA_MOUNT` by default is set to `/tmp/openface`. This can be overridden by
|
||||
modifying `.env` file, setting it in your shell environment, or passing in
|
||||
before `docker-compose` at runtime. Note: output will be `root` owner.
|
||||
|
||||
```bash
|
||||
export DATA_MOUNT=/tmp/openface
|
||||
mkdir -p $DATA_MOUNT/tesla # this is just to ensure this is writable by user
|
||||
docker-compose up -d openface && sync # sync is to wait till service starts
|
||||
curl -o $DATA_MOUNT/tesla.jpg \
|
||||
https://upload.wikimedia.org/wikipedia/commons/thumb/b/b7/Nicola_Tesla_LCCN2014684845.jpg/559px-Nicola_Tesla_LCCN2014684845.jpg
|
||||
docker exec -it openface FaceLandmarkImg -f $DATA_MOUNT/tesla.jpg -out_dir $DATA_MOUNT/tesla
|
||||
docker exec -it openface chown -R $UID:$UID $DATA_MOUNT # chown to current user
|
||||
docker-compose down # stop service if you wish
|
||||
```
|
||||
127
download_libraries.ps1
Normal file
127
download_libraries.ps1
Normal file
@@ -0,0 +1,127 @@
|
||||
# Download the OpenCV libraries from the cloud (stored in Dropbox and OneDrive)
|
||||
|
||||
# ffmpeg x64 dll
|
||||
$destination = "lib/3rdParty/OpenCV/bin/opencv_ffmpeg410_64.dll"
|
||||
|
||||
$out_dir = Join-Path (Get-Location) "lib/3rdParty/OpenCV/bin"
|
||||
if(!(Test-Path $out_dir))
|
||||
{
|
||||
New-Item -ItemType directory -Path $out_dir
|
||||
}
|
||||
|
||||
if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
|
||||
{
|
||||
$source = "https://www.dropbox.com/s/gvkd4549wsjvn3u/opencv_ffmpeg410_64.dll?dl=1"
|
||||
Invoke-WebRequest $source -OutFile $destination
|
||||
}
|
||||
|
||||
if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
|
||||
{
|
||||
$source = "https://onedrive.live.com/download?cid=2E2ADA578BFF6E6E&resid=2E2ADA578BFF6E6E%2153244&authkey=AEuLAF197Sy7S3M"
|
||||
Invoke-WebRequest $source -OutFile $destination
|
||||
}
|
||||
|
||||
# Release x64 dll
|
||||
$destination = "lib/3rdParty/OpenCV/x64/v141/bin/Release/opencv_world410.dll"
|
||||
|
||||
$out_dir = Join-Path (Get-Location) "lib/3rdParty/OpenCV/x64/v141/bin/Release"
|
||||
if(!(Test-Path $out_dir))
|
||||
{
|
||||
New-Item -ItemType directory -Path $out_dir
|
||||
}
|
||||
|
||||
#if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
|
||||
#{
|
||||
# $source = "https://www.dropbox.com/s/c81shi8br57xytv/opencv_world410.dll?dl=1"
|
||||
# Invoke-WebRequest $source -OutFile $destination
|
||||
#}
|
||||
|
||||
if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
|
||||
{
|
||||
$source = "https://onedrive.live.com/download?cid=2E2ADA578BFF6E6E&resid=2E2ADA578BFF6E6E%2153246&authkey=AJkwseAGKqL3PpU"
|
||||
Invoke-WebRequest $source -OutFile $destination
|
||||
}
|
||||
|
||||
# Debug x64 dll
|
||||
$destination = "lib/3rdParty/OpenCV/x64/v141/bin/Debug/opencv_world410d.dll"
|
||||
|
||||
$out_dir = Join-Path (Get-Location) "lib/3rdParty/OpenCV/x64/v141/bin/Debug"
|
||||
if(!(Test-Path $out_dir))
|
||||
{
|
||||
New-Item -ItemType directory -Path $out_dir
|
||||
}
|
||||
|
||||
#if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
|
||||
#{
|
||||
# $source = "https://www.dropbox.com/s/8a4kmvpj5a09jdz/opencv_world410d.dll?dl=1"
|
||||
# Invoke-WebRequest $source -OutFile $destination
|
||||
#}
|
||||
|
||||
if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
|
||||
{
|
||||
$source = "https://onedrive.live.com/download?cid=2E2ADA578BFF6E6E&resid=2E2ADA578BFF6E6E%2153247&authkey=AJE4pLhzjtkPDWs"
|
||||
Invoke-WebRequest $source -OutFile $destination
|
||||
}
|
||||
|
||||
# ffmpeg x32 dll
|
||||
$destination = "lib/3rdParty/OpenCV/bin/opencv_ffmpeg410.dll"
|
||||
|
||||
$out_dir = Join-Path (Get-Location) "lib/3rdParty/OpenCV/bin"
|
||||
if(!(Test-Path $out_dir))
|
||||
{
|
||||
New-Item -ItemType directory -Path $out_dir
|
||||
}
|
||||
|
||||
if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
|
||||
{
|
||||
$source = "https://www.dropbox.com/s/7pada6etpui97f7/opencv_ffmpeg410.dll?dl=1"
|
||||
Invoke-WebRequest $source -OutFile $destination
|
||||
}
|
||||
|
||||
if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
|
||||
{
|
||||
$source = "https://onedrive.live.com/download?cid=2E2ADA578BFF6E6E&resid=2E2ADA578BFF6E6E%2153252&authkey=AEwNPWYZ7sOOhXo"
|
||||
Invoke-WebRequest $source -OutFile $destination
|
||||
}
|
||||
|
||||
# Release x32 dll
|
||||
$destination = "lib/3rdParty/OpenCV/x86/v141/bin/Release/opencv_world410.dll"
|
||||
|
||||
$out_dir = Join-Path (Get-Location) "lib/3rdParty/OpenCV/x86/v141/bin/Release"
|
||||
if(!(Test-Path $out_dir))
|
||||
{
|
||||
New-Item -ItemType directory -Path $out_dir
|
||||
}
|
||||
|
||||
if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
|
||||
{
|
||||
$source = "https://www.dropbox.com/s/qf4rqphvrj2k4d1/opencv_world410.dll?dl=1"
|
||||
Invoke-WebRequest $source -OutFile $destination
|
||||
}
|
||||
|
||||
if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
|
||||
{
|
||||
$source = "https://onedrive.live.com/download?cid=2E2ADA578BFF6E6E&resid=2E2ADA578BFF6E6E%2153255&authkey=AJ0S3GY4fCYKFXo"
|
||||
Invoke-WebRequest $source -OutFile $destination
|
||||
}
|
||||
|
||||
# Debug x32 dll
|
||||
$destination = "lib/3rdParty/OpenCV/x86/v141/bin/Debug/opencv_world410d.dll"
|
||||
|
||||
$out_dir = Join-Path (Get-Location) "lib/3rdParty/OpenCV/x86/v141/bin/Debug"
|
||||
if(!(Test-Path $out_dir))
|
||||
{
|
||||
New-Item -ItemType directory -Path $out_dir
|
||||
}
|
||||
|
||||
if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
|
||||
{
|
||||
$source = "https://www.dropbox.com/s/kafps88dbdlg5y2/opencv_world410d.dll?dl=1"
|
||||
Invoke-WebRequest $source -OutFile $destination
|
||||
}
|
||||
|
||||
if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
|
||||
{
|
||||
$source = "https://onedrive.live.com/download?cid=2E2ADA578BFF6E6E&resid=2E2ADA578BFF6E6E%2153256&authkey=ALbDTeRByHmWO-M"
|
||||
Invoke-WebRequest $source -OutFile $destination
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
# Download the models from the cloud (stored in Dropbox, OneDrive, and Google Drive)
|
||||
# Download the models from the cloud (stored in Dropbox and OneDrive)
|
||||
|
||||
# Determine correct path to the model files
|
||||
if([System.IO.Directory]::Exists( (Join-Path (Get-Location) 'lib') ))
|
||||
|
||||
@@ -52,16 +52,14 @@
|
||||
#define CONFIG_DIR "~"
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
vector<string> get_arguments(int argc, char **argv)
|
||||
std::vector<std::string> get_arguments(int argc, char **argv)
|
||||
{
|
||||
|
||||
vector<string> arguments;
|
||||
std::vector<std::string> arguments;
|
||||
|
||||
for (int i = 0; i < argc; ++i)
|
||||
{
|
||||
arguments.push_back(string(argv[i]));
|
||||
arguments.push_back(std::string(argv[i]));
|
||||
}
|
||||
return arguments;
|
||||
}
|
||||
@@ -70,13 +68,13 @@ int main(int argc, char **argv)
|
||||
{
|
||||
|
||||
//Convert arguments to more convenient vector form
|
||||
vector<string> arguments = get_arguments(argc, argv);
|
||||
std::vector<std::string> arguments = get_arguments(argc, argv);
|
||||
|
||||
// no arguments: output usage
|
||||
if (arguments.size() == 1)
|
||||
{
|
||||
cout << "For command line arguments see:" << endl;
|
||||
cout << " https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments";
|
||||
std::cout << "For command line arguments see:" << std::endl;
|
||||
std::cout << " https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments";
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -86,7 +84,7 @@ int main(int argc, char **argv)
|
||||
// The sequence reader chooses what to open based on command line arguments provided
|
||||
if (!image_reader.Open(arguments))
|
||||
{
|
||||
cout << "Could not open any images" << endl;
|
||||
std::cout << "Could not open any images" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -94,16 +92,16 @@ int main(int argc, char **argv)
|
||||
LandmarkDetector::FaceModelParameters det_parameters(arguments);
|
||||
|
||||
// The modules that are being used for tracking
|
||||
cout << "Loading the model" << endl;
|
||||
std::cout << "Loading the model" << std::endl;
|
||||
LandmarkDetector::CLNF face_model(det_parameters.model_location);
|
||||
|
||||
if (!face_model.loaded_successfully)
|
||||
{
|
||||
cout << "ERROR: Could not load the landmark detector" << endl;
|
||||
std::cout << "ERROR: Could not load the landmark detector" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
cout << "Model loaded" << endl;
|
||||
std::cout << "Model loaded" << std::endl;
|
||||
|
||||
// Load facial feature extractor and AU analyser (make sure it is static)
|
||||
FaceAnalysis::FaceAnalyserParameters face_analysis_params(arguments);
|
||||
@@ -118,7 +116,7 @@ int main(int argc, char **argv)
|
||||
// If can't find MTCNN face detector, default to HOG one
|
||||
if (det_parameters.curr_face_detector == LandmarkDetector::FaceModelParameters::MTCNN_DETECTOR && face_detector_mtcnn.empty())
|
||||
{
|
||||
cout << "INFO: defaulting to HOG-SVM face detector" << endl;
|
||||
std::cout << "INFO: defaulting to HOG-SVM face detector" << std::endl;
|
||||
det_parameters.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
|
||||
}
|
||||
|
||||
@@ -131,15 +129,15 @@ int main(int argc, char **argv)
|
||||
|
||||
if (!face_model.eye_model)
|
||||
{
|
||||
cout << "WARNING: no eye model found" << endl;
|
||||
std::cout << "WARNING: no eye model found" << std::endl;
|
||||
}
|
||||
|
||||
if (face_analyser.GetAUClassNames().size() == 0 && face_analyser.GetAUClassNames().size() == 0)
|
||||
{
|
||||
cout << "WARNING: no Action Unit models found" << endl;
|
||||
std::cout << "WARNING: no Action Unit models found" << std::endl;
|
||||
}
|
||||
|
||||
cout << "Starting tracking" << endl;
|
||||
std::cout << "Starting tracking" << std::endl;
|
||||
while (!rgb_image.empty())
|
||||
{
|
||||
|
||||
@@ -158,7 +156,7 @@ int main(int argc, char **argv)
|
||||
cv::Mat_<uchar> grayscale_image = image_reader.GetGrayFrame();
|
||||
|
||||
// Detect faces in an image
|
||||
vector<cv::Rect_<float> > face_detections;
|
||||
std::vector<cv::Rect_<float> > face_detections;
|
||||
|
||||
if (image_reader.has_bounding_boxes)
|
||||
{
|
||||
@@ -168,7 +166,7 @@ int main(int argc, char **argv)
|
||||
{
|
||||
if (det_parameters.curr_face_detector == LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR)
|
||||
{
|
||||
vector<float> confidences;
|
||||
std::vector<float> confidences;
|
||||
LandmarkDetector::DetectFacesHOG(face_detections, grayscale_image, face_detector_hog, confidences);
|
||||
}
|
||||
else if (det_parameters.curr_face_detector == LandmarkDetector::FaceModelParameters::HAAR_DETECTOR)
|
||||
@@ -177,7 +175,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
else
|
||||
{
|
||||
vector<float> confidences;
|
||||
std::vector<float> confidences;
|
||||
LandmarkDetector::DetectFacesMTCNN(face_detections, rgb_image, face_detector_mtcnn, confidences);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,34 +23,34 @@
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>FaceLandmarkImg</RootNamespace>
|
||||
<ProjectName>FaceLandmarkImg</ProjectName>
|
||||
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
|
||||
<WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
@@ -58,29 +58,25 @@
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost.props" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost.props" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
@@ -111,6 +107,7 @@
|
||||
<AdditionalIncludeDirectories>$(SolutionDir)\lib\local\LandmarkDetector\include;$(SolutionDir)\lib\local\FaceAnalyser\include;$(SolutionDir)\lib\local\GazeAnalyser\include;$(SolutionDir)\lib\local\Utilities\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -127,6 +124,7 @@
|
||||
<EnableEnhancedInstructionSet>
|
||||
</EnableEnhancedInstructionSet>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -147,6 +145,7 @@
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -170,6 +169,7 @@
|
||||
</EnableEnhancedInstructionSet>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
|
||||
@@ -59,16 +59,14 @@ static void printErrorAndAbort(const std::string & error)
|
||||
#define FATAL_STREAM( stream ) \
|
||||
printErrorAndAbort( std::string( "Fatal error: " ) + stream )
|
||||
|
||||
using namespace std;
|
||||
|
||||
vector<string> get_arguments(int argc, char **argv)
|
||||
std::vector<std::string> get_arguments(int argc, char **argv)
|
||||
{
|
||||
|
||||
vector<string> arguments;
|
||||
std::vector<std::string> arguments;
|
||||
|
||||
for (int i = 0; i < argc; ++i)
|
||||
{
|
||||
arguments.push_back(string(argv[i]));
|
||||
arguments.push_back(std::string(argv[i]));
|
||||
}
|
||||
return arguments;
|
||||
}
|
||||
@@ -76,13 +74,13 @@ vector<string> get_arguments(int argc, char **argv)
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
|
||||
vector<string> arguments = get_arguments(argc, argv);
|
||||
std::vector<std::string> arguments = get_arguments(argc, argv);
|
||||
|
||||
// no arguments: output usage
|
||||
if (arguments.size() == 1)
|
||||
{
|
||||
cout << "For command line arguments see:" << endl;
|
||||
cout << " https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments";
|
||||
std::cout << "For command line arguments see:" << std::endl;
|
||||
std::cout << " https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments";
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -92,13 +90,13 @@ int main(int argc, char **argv)
|
||||
LandmarkDetector::CLNF face_model(det_parameters.model_location);
|
||||
if (!face_model.loaded_successfully)
|
||||
{
|
||||
cout << "ERROR: Could not load the landmark detector" << endl;
|
||||
std::cout << "ERROR: Could not load the landmark detector" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!face_model.eye_model)
|
||||
{
|
||||
cout << "WARNING: no eye model found" << endl;
|
||||
std::cout << "WARNING: no eye model found" << std::endl;
|
||||
}
|
||||
|
||||
// Open a sequence
|
||||
|
||||
@@ -23,34 +23,34 @@
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>FaceLandmarkVid</RootNamespace>
|
||||
<ProjectName>FaceLandmarkVid</ProjectName>
|
||||
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
|
||||
<WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
@@ -58,29 +58,25 @@
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost.props" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost.props" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
@@ -112,6 +108,7 @@
|
||||
<OpenMPSupport>false</OpenMPSupport>
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -129,6 +126,7 @@
|
||||
<EnableEnhancedInstructionSet>
|
||||
</EnableEnhancedInstructionSet>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -149,6 +147,7 @@
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -172,6 +171,7 @@
|
||||
<EnableEnhancedInstructionSet>
|
||||
</EnableEnhancedInstructionSet>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
|
||||
@@ -62,21 +62,55 @@ static void printErrorAndAbort(const std::string & error)
|
||||
#define FATAL_STREAM( stream ) \
|
||||
printErrorAndAbort( std::string( "Fatal error: " ) + stream )
|
||||
|
||||
using namespace std;
|
||||
|
||||
vector<string> get_arguments(int argc, char **argv)
|
||||
std::vector<std::string> get_arguments(int argc, char **argv)
|
||||
{
|
||||
|
||||
vector<string> arguments;
|
||||
std::vector<std::string> arguments;
|
||||
|
||||
for (int i = 0; i < argc; ++i)
|
||||
{
|
||||
arguments.push_back(string(argv[i]));
|
||||
arguments.push_back(std::string(argv[i]));
|
||||
}
|
||||
return arguments;
|
||||
}
|
||||
|
||||
void NonOverlapingDetections(const vector<LandmarkDetector::CLNF>& clnf_models, vector<cv::Rect_<float> >& face_detections)
|
||||
double IOU(cv::Rect_<float> rect1, cv::Rect_<float> rect2)
|
||||
{
|
||||
double intersection_area = (rect1 & rect2).area();
|
||||
double union_area = rect1.area() + rect2.area() - intersection_area;
|
||||
return intersection_area / union_area;
|
||||
}
|
||||
|
||||
void RemoveOverlapingModels(std::vector<LandmarkDetector::CLNF>& face_models, std::vector<bool>& active_models)
|
||||
{
|
||||
// Go over the model and eliminate detections that are not informative (there already is a tracker there)
|
||||
for (size_t model1 = 0; model1 < active_models.size(); ++model1)
|
||||
{
|
||||
if (active_models[model1])
|
||||
{
|
||||
// See if the detections intersect
|
||||
cv::Rect_<float> model1_rect = face_models[model1].GetBoundingBox();
|
||||
|
||||
for (int model2 = model1 + 1; model2 < active_models.size(); ++model2)
|
||||
{
|
||||
if(active_models[model2])
|
||||
{
|
||||
cv::Rect_<float> model2_rect = face_models[model2].GetBoundingBox();
|
||||
|
||||
// If the model is already tracking what we're detecting ignore the detection, this is determined by amount of overlap
|
||||
if (IOU(model1_rect, model2_rect) > 0.5)
|
||||
{
|
||||
active_models[model1] = false;
|
||||
face_models[model1].Reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void NonOverlapingDetections(const std::vector<LandmarkDetector::CLNF>& clnf_models, std::vector<cv::Rect_<float> >& face_detections)
|
||||
{
|
||||
|
||||
// Go over the model and eliminate detections that are not informative (there already is a tracker there)
|
||||
@@ -88,11 +122,8 @@ void NonOverlapingDetections(const vector<LandmarkDetector::CLNF>& clnf_models,
|
||||
|
||||
for (int detection = face_detections.size() - 1; detection >= 0; --detection)
|
||||
{
|
||||
double intersection_area = (model_rect & face_detections[detection]).area();
|
||||
double union_area = model_rect.area() + face_detections[detection].area() - 2 * intersection_area;
|
||||
|
||||
// If the model is already tracking what we're detecting ignore the detection, this is determined by amount of overlap
|
||||
if (intersection_area / union_area > 0.5)
|
||||
if (IOU(model_rect, face_detections[detection]) > 0.5)
|
||||
{
|
||||
face_detections.erase(face_detections.begin() + detection);
|
||||
}
|
||||
@@ -103,13 +134,13 @@ void NonOverlapingDetections(const vector<LandmarkDetector::CLNF>& clnf_models,
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
|
||||
vector<string> arguments = get_arguments(argc, argv);
|
||||
std::vector<std::string> arguments = get_arguments(argc, argv);
|
||||
|
||||
// no arguments: output usage
|
||||
if (arguments.size() == 1)
|
||||
{
|
||||
cout << "For command line arguments see:" << endl;
|
||||
cout << " https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments";
|
||||
std::cout << "For command line arguments see:" << std::endl;
|
||||
std::cout << " https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments";
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -119,12 +150,12 @@ int main(int argc, char **argv)
|
||||
|
||||
det_params.curr_face_detector = LandmarkDetector::FaceModelParameters::MTCNN_DETECTOR;
|
||||
|
||||
vector<LandmarkDetector::FaceModelParameters> det_parameters;
|
||||
std::vector<LandmarkDetector::FaceModelParameters> det_parameters;
|
||||
det_parameters.push_back(det_params);
|
||||
|
||||
// The modules that are being used for tracking
|
||||
vector<LandmarkDetector::CLNF> face_models;
|
||||
vector<bool> active_models;
|
||||
std::vector<LandmarkDetector::CLNF> face_models;
|
||||
std::vector<bool> active_models;
|
||||
|
||||
int num_faces_max = 4;
|
||||
|
||||
@@ -132,7 +163,7 @@ int main(int argc, char **argv)
|
||||
|
||||
if (!face_model.loaded_successfully)
|
||||
{
|
||||
cout << "ERROR: Could not load the landmark detector" << endl;
|
||||
std::cout << "ERROR: Could not load the landmark detector" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -145,7 +176,7 @@ int main(int argc, char **argv)
|
||||
// If can't find MTCNN face detector, default to HOG one
|
||||
if (det_parameters[0].curr_face_detector == LandmarkDetector::FaceModelParameters::MTCNN_DETECTOR && face_model.face_detector_MTCNN.empty())
|
||||
{
|
||||
cout << "INFO: defaulting to HOG-SVM face detector" << endl;
|
||||
std::cout << "INFO: defaulting to HOG-SVM face detector" << std::endl;
|
||||
det_parameters[0].curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
|
||||
}
|
||||
|
||||
@@ -168,12 +199,12 @@ int main(int argc, char **argv)
|
||||
|
||||
if (!face_model.eye_model)
|
||||
{
|
||||
cout << "WARNING: no eye model found" << endl;
|
||||
std::cout << "WARNING: no eye model found" << std::endl;
|
||||
}
|
||||
|
||||
if (face_analyser.GetAUClassNames().size() == 0 && face_analyser.GetAUClassNames().size() == 0)
|
||||
{
|
||||
cout << "WARNING: no Action Unit models found" << endl;
|
||||
std::cout << "WARNING: no Action Unit models found" << std::endl;
|
||||
}
|
||||
|
||||
// Open a sequence
|
||||
@@ -232,7 +263,7 @@ int main(int argc, char **argv)
|
||||
// Reading the images
|
||||
cv::Mat_<uchar> grayscale_image = sequence_reader.GetGrayFrame();
|
||||
|
||||
vector<cv::Rect_<float> > face_detections;
|
||||
std::vector<cv::Rect_<float> > face_detections;
|
||||
|
||||
bool all_models_active = true;
|
||||
for (unsigned int model = 0; model < face_models.size(); ++model)
|
||||
@@ -248,7 +279,7 @@ int main(int argc, char **argv)
|
||||
{
|
||||
if (det_parameters[0].curr_face_detector == LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR)
|
||||
{
|
||||
vector<float> confidences;
|
||||
std::vector<float> confidences;
|
||||
LandmarkDetector::DetectFacesHOG(face_detections, grayscale_image, face_models[0].face_detector_HOG, confidences);
|
||||
}
|
||||
else if (det_parameters[0].curr_face_detector == LandmarkDetector::FaceModelParameters::HAAR_DETECTOR)
|
||||
@@ -257,7 +288,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
else
|
||||
{
|
||||
vector<float> confidences;
|
||||
std::vector<float> confidences;
|
||||
LandmarkDetector::DetectFacesMTCNN(face_detections, rgb_image, face_models[0].face_detector_MTCNN, confidences);
|
||||
}
|
||||
|
||||
@@ -314,6 +345,10 @@ int main(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
// Remove models that end up tracking overlapping faces
|
||||
// even if initial bounding boxes were not overlapping, they could have ended up converging to the same face
|
||||
RemoveOverlapingModels(face_models, active_models);
|
||||
|
||||
// Keeping track of FPS
|
||||
fps_tracker.AddFrame();
|
||||
|
||||
@@ -402,10 +437,10 @@ int main(int argc, char **argv)
|
||||
// Reporting progress
|
||||
if (sequence_reader.GetProgress() >= reported_completion / 10.0)
|
||||
{
|
||||
cout << reported_completion * 10 << "% ";
|
||||
std::cout << reported_completion * 10 << "% ";
|
||||
if (reported_completion == 10)
|
||||
{
|
||||
cout << endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
reported_completion = reported_completion + 1;
|
||||
}
|
||||
|
||||
@@ -22,32 +22,32 @@
|
||||
<ProjectGuid>{C3FAF36F-44BC-4454-87C2-C5106575FE50}</ProjectGuid>
|
||||
<RootNamespace>FaceLandmarkVidMulti</RootNamespace>
|
||||
<ProjectName>FaceLandmarkVidMulti</ProjectName>
|
||||
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
|
||||
<WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
@@ -57,29 +57,25 @@
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost.props" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost.props" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
@@ -104,6 +100,7 @@
|
||||
<AdditionalIncludeDirectories>$(SolutionDir)\lib\local\LandmarkDetector\include;$(SolutionDir)\lib\local\Utilities\include;$(SolutionDir)\lib\local\FaceAnalyser\include;$(SolutionDir)\lib\local\GazeAnalyser\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
@@ -117,6 +114,7 @@
|
||||
<EnableEnhancedInstructionSet>
|
||||
</EnableEnhancedInstructionSet>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
@@ -134,6 +132,7 @@
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
@@ -155,6 +154,7 @@
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<PreprocessorDefinitions>WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
|
||||
@@ -68,17 +68,15 @@ static void printErrorAndAbort(const std::string & error)
|
||||
#define FATAL_STREAM( stream ) \
|
||||
printErrorAndAbort( std::string( "Fatal error: " ) + stream )
|
||||
|
||||
using namespace std;
|
||||
|
||||
vector<string> get_arguments(int argc, char **argv)
|
||||
std::vector<std::string> get_arguments(int argc, char **argv)
|
||||
{
|
||||
|
||||
vector<string> arguments;
|
||||
std::vector<std::string> arguments;
|
||||
|
||||
// First argument is reserved for the name of the executable
|
||||
for (int i = 0; i < argc; ++i)
|
||||
{
|
||||
arguments.push_back(string(argv[i]));
|
||||
arguments.push_back(std::string(argv[i]));
|
||||
}
|
||||
return arguments;
|
||||
}
|
||||
@@ -86,13 +84,13 @@ vector<string> get_arguments(int argc, char **argv)
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
|
||||
vector<string> arguments = get_arguments(argc, argv);
|
||||
std::vector<std::string> arguments = get_arguments(argc, argv);
|
||||
|
||||
// no arguments: output usage
|
||||
if (arguments.size() == 1)
|
||||
{
|
||||
cout << "For command line arguments see:" << endl;
|
||||
cout << " https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments";
|
||||
std::cout << "For command line arguments see:" << std::endl;
|
||||
std::cout << " https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments";
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -104,7 +102,7 @@ int main(int argc, char **argv)
|
||||
|
||||
if (!face_model.loaded_successfully)
|
||||
{
|
||||
cout << "ERROR: Could not load the landmark detector" << endl;
|
||||
std::cout << "ERROR: Could not load the landmark detector" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -114,12 +112,12 @@ int main(int argc, char **argv)
|
||||
|
||||
if (!face_model.eye_model)
|
||||
{
|
||||
cout << "WARNING: no eye model found" << endl;
|
||||
std::cout << "WARNING: no eye model found" << std::endl;
|
||||
}
|
||||
|
||||
if (face_analyser.GetAUClassNames().size() == 0 && face_analyser.GetAUClassNames().size() == 0)
|
||||
{
|
||||
cout << "WARNING: no Action Unit models found" << endl;
|
||||
std::cout << "WARNING: no Action Unit models found" << std::endl;
|
||||
}
|
||||
|
||||
Utilities::SequenceCapture sequence_reader;
|
||||
@@ -158,7 +156,7 @@ int main(int argc, char **argv)
|
||||
Utilities::RecorderOpenFace open_face_rec(sequence_reader.name, recording_params, arguments);
|
||||
|
||||
if (recording_params.outputGaze() && !face_model.eye_model)
|
||||
cout << "WARNING: no eye model defined, but outputting gaze" << endl;
|
||||
std::cout << "WARNING: no eye model defined, but outputting gaze" << std::endl;
|
||||
|
||||
captured_image = sequence_reader.GetNextFrame();
|
||||
|
||||
@@ -240,10 +238,10 @@ int main(int argc, char **argv)
|
||||
// Reporting progress
|
||||
if (sequence_reader.GetProgress() >= reported_completion / 10.0)
|
||||
{
|
||||
cout << reported_completion * 10 << "% ";
|
||||
std::cout << reported_completion * 10 << "% ";
|
||||
if (reported_completion == 10)
|
||||
{
|
||||
cout << endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
reported_completion = reported_completion + 1;
|
||||
}
|
||||
|
||||
@@ -22,34 +22,34 @@
|
||||
<ProjectGuid>{8A23C00D-767D-422D-89A3-CF225E3DAB4B}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>FeatureExtraction</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
|
||||
<WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
@@ -57,29 +57,25 @@
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost.props" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost.props" />
|
||||
<Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
@@ -111,6 +107,7 @@
|
||||
<OpenMPSupport>false</OpenMPSupport>
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -128,6 +125,7 @@
|
||||
<EnableEnhancedInstructionSet>
|
||||
</EnableEnhancedInstructionSet>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -149,6 +147,7 @@
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -173,6 +172,7 @@
|
||||
</EnableEnhancedInstructionSet>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
|
||||
@@ -43,14 +43,11 @@
|
||||
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/videoio/videoio.hpp> // Video write
|
||||
#include <opencv2/videoio/videoio_c.h> // Video write
|
||||
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <filesystem.hpp>
|
||||
#include <filesystem/fstream.hpp>
|
||||
#include <filesystem>
|
||||
|
||||
#define INFO_STREAM( stream ) \
|
||||
std::cout << stream << std::endl
|
||||
@@ -70,8 +67,6 @@ static void printErrorAndAbort( const std::string & error )
|
||||
#define FATAL_STREAM( stream ) \
|
||||
printErrorAndAbort( std::string( "Fatal error: " ) + stream )
|
||||
|
||||
using namespace std;
|
||||
|
||||
// Get current date/time, format is YYYY-MM-DD.HH:mm:ss
|
||||
const std::string currentDateTime() {
|
||||
time_t now = time(0);
|
||||
@@ -85,14 +80,14 @@ const std::string currentDateTime() {
|
||||
return buf;
|
||||
}
|
||||
|
||||
vector<string> get_arguments(int argc, char **argv)
|
||||
std::vector<std::string> get_arguments(int argc, char **argv)
|
||||
{
|
||||
|
||||
vector<string> arguments;
|
||||
std::vector<std::string> arguments;
|
||||
|
||||
for(int i = 1; i < argc; ++i)
|
||||
{
|
||||
arguments.push_back(string(argv[i]));
|
||||
arguments.push_back(std::string(argv[i]));
|
||||
}
|
||||
return arguments;
|
||||
}
|
||||
@@ -100,10 +95,10 @@ vector<string> get_arguments(int argc, char **argv)
|
||||
int main (int argc, char **argv)
|
||||
{
|
||||
|
||||
vector<string> arguments = get_arguments(argc, argv);
|
||||
std::vector<std::string> arguments = get_arguments(argc, argv);
|
||||
|
||||
// Some initial parameters that can be overriden from command line
|
||||
string outroot, outfile;
|
||||
std::string outroot, outfile;
|
||||
|
||||
TCHAR NPath[200];
|
||||
GetCurrentDirectory(200, NPath);
|
||||
@@ -152,16 +147,16 @@ int main (int argc, char **argv)
|
||||
cv::Mat img;
|
||||
vCap >> img;
|
||||
|
||||
boost::filesystem::path dir(outroot);
|
||||
boost::filesystem::create_directory(dir);
|
||||
std::filesystem::path dir(outroot);
|
||||
std::filesystem::create_directory(dir);
|
||||
|
||||
string out_file = outroot + outfile;
|
||||
std::string out_file = outroot + outfile;
|
||||
// saving the videos
|
||||
cv::VideoWriter video_writer(out_file, CV_FOURCC('D','I','V','X'), 30, img.size(), true);
|
||||
cv::VideoWriter video_writer(out_file, cv::VideoWriter::fourcc('D','I','V','X'), 30, img.size(), true);
|
||||
|
||||
ofstream outlog;
|
||||
outlog.open((outroot + outfile + ".log").c_str(), ios_base::out);
|
||||
outlog << "frame, time(ms)" << endl;
|
||||
std::ofstream outlog;
|
||||
outlog.open((outroot + outfile + ".log").c_str(), std::ios_base::out);
|
||||
outlog << "frame, time(ms)" << std::endl;
|
||||
|
||||
double freq = cv::getTickFrequency();
|
||||
|
||||
@@ -180,7 +175,7 @@ int main (int argc, char **argv)
|
||||
video_writer << img;
|
||||
|
||||
outlog << frameProc + 1 << " " << curr_time;
|
||||
outlog << endl;
|
||||
outlog << std::endl;
|
||||
|
||||
|
||||
cv::imshow("rec", img);
|
||||
|
||||
@@ -21,57 +21,53 @@
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{2D80FA0B-2DE8-4475-BA5A-C08A9E1EDAAC}</ProjectGuid>
|
||||
<RootNamespace>Recording</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
|
||||
<WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="..\..\lib\3rdParty\boost\boost.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
|
||||
<Import Project="..\..\lib\3rdParty\OpenCV\openCV.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
@@ -86,6 +82,7 @@
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
@@ -96,6 +93,7 @@
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
@@ -110,6 +108,7 @@
|
||||
<OpenMPSupport>false</OpenMPSupport>
|
||||
<CallingConvention>Cdecl</CallingConvention>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
@@ -128,6 +127,7 @@
|
||||
<OpenMPSupport>false</OpenMPSupport>
|
||||
<CallingConvention>Cdecl</CallingConvention>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
clear;
|
||||
version = '2.1.0';
|
||||
version = '2.2.0';
|
||||
|
||||
out_x86 = sprintf('OpenFace_%s_win_x86', version);
|
||||
out_x64 = sprintf('OpenFace_%s_win_x64', version);
|
||||
|
||||
@@ -68,6 +68,7 @@
|
||||
</Reference>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Data" />
|
||||
<Reference Include="System.Drawing" />
|
||||
<Reference Include="System.Windows.Forms" />
|
||||
<Reference Include="System.Xml" />
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
|
||||
68
install.sh
68
install.sh
@@ -1,16 +1,13 @@
|
||||
#!/bin/bash
|
||||
#==============================================================================
|
||||
# Title: install.sh
|
||||
# Description: Install everything necessary for OpenFace to compile.
|
||||
# Author: Daniyal Shahrokhian <daniyal@kth.se>
|
||||
# Date: 20170428
|
||||
# Version : 1.02
|
||||
# Description: Install everything necessary for OpenFace to compile.
|
||||
# Will install all required dependencies, only use if you do not have the dependencies
|
||||
# already installed or if you don't mind specific versions of gcc,g++,cmake,opencv etc. installed
|
||||
# Author: Daniyal Shahrokhian <daniyal@kth.se>, Tadas Baltrusaitis <tadyla@gmail.com>
|
||||
# Date: 20190630
|
||||
# Version : 1.03
|
||||
# Usage: bash install.sh
|
||||
# NOTES: There are certain steps to be taken in the system before installing
|
||||
# via this script (refer to README): Run
|
||||
# `sudo gedit /etc/apt/sources.list` and change the line
|
||||
# `deb http://us.archive.ubuntu.com/ubuntu/ xenial main restricted` to
|
||||
# `deb http://us.archive.ubuntu.com/ubuntu/ xenial main universe`
|
||||
#==============================================================================
|
||||
|
||||
# Exit script if any command fails
|
||||
@@ -25,19 +22,49 @@ fi
|
||||
|
||||
# Essential Dependencies
|
||||
echo "Installing Essential dependencies..."
|
||||
|
||||
# If we're not on 18.04
|
||||
sudo apt-get -y update
|
||||
|
||||
if [[ `lsb_release -rs` != "18.04" ]]
|
||||
then
|
||||
echo "Adding ppa:ubuntu-toolchain-r/test apt-repository "
|
||||
sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y
|
||||
sudo apt-get -y update
|
||||
fi
|
||||
|
||||
sudo apt-get -y install build-essential
|
||||
sudo apt-get -y install cmake
|
||||
sudo apt-get -y install gcc-8 g++-8
|
||||
|
||||
# Ubuntu 16.04 does not have newest CMake so need to build it manually
|
||||
if [[ `lsb_release -rs` != "18.04" ]]; then
|
||||
sudo apt-get --purge remove cmake-qt-gui -y
|
||||
sudo apt-get --purge remove cmake -y
|
||||
mkdir -p cmake_tmp
|
||||
cd cmake_tmp
|
||||
wget https://cmake.org/files/v3.10/cmake-3.10.1.tar.gz
|
||||
tar -xzvf cmake-3.10.1.tar.gz
|
||||
cd cmake-3.10.1/
|
||||
./bootstrap
|
||||
make -j4
|
||||
sudo make install
|
||||
cd ../..
|
||||
sudo rm -r cmake_tmp
|
||||
else
|
||||
sudo apt-get -y install cmake
|
||||
fi
|
||||
|
||||
sudo apt-get -y install zip
|
||||
sudo apt-get -y install libopenblas-dev liblapack-dev
|
||||
sudo apt-get -y install git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev
|
||||
sudo apt-get -y install python-dev python-numpy libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-22-dev
|
||||
sudo apt-get -y install libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev
|
||||
sudo apt-get -y install libtbb2 libjpeg-dev libpng-dev libtiff-dev libdc1394-22-dev
|
||||
echo "Essential dependencies installed."
|
||||
|
||||
# OpenCV Dependency
|
||||
echo "Downloading OpenCV..."
|
||||
wget https://github.com/opencv/opencv/archive/3.4.0.zip
|
||||
unzip 3.4.0.zip
|
||||
cd opencv-3.4.0
|
||||
wget https://github.com/opencv/opencv/archive/4.1.0.zip
|
||||
unzip 4.1.0.zip
|
||||
cd opencv-4.1.0
|
||||
mkdir -p build
|
||||
cd build
|
||||
echo "Installing OpenCV..."
|
||||
@@ -45,8 +72,8 @@ cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_TBB
|
||||
make -j4
|
||||
sudo make install
|
||||
cd ../..
|
||||
rm 3.4.0.zip
|
||||
sudo rm -r opencv-3.4.0
|
||||
rm 4.1.0.zip
|
||||
sudo rm -r opencv-4.1.0
|
||||
echo "OpenCV installed."
|
||||
|
||||
# dlib dependecy
|
||||
@@ -65,16 +92,11 @@ cd ../..;
|
||||
rm -r dlib-19.13.tar.bz2
|
||||
echo "dlib installed"
|
||||
|
||||
# Boost C++ Dependency
|
||||
echo "Installing Boost..."
|
||||
sudo apt-get install libboost-all-dev
|
||||
echo "Boost installed."
|
||||
|
||||
# OpenFace installation
|
||||
echo "Installing OpenFace..."
|
||||
mkdir -p build
|
||||
cd build
|
||||
cmake -D CMAKE_BUILD_TYPE=RELEASE ..
|
||||
cmake -D CMAKE_CXX_COMPILER=g++-8 -D CMAKE_C_COMPILER=gcc-8 -D CMAKE_BUILD_TYPE=RELEASE ..
|
||||
make
|
||||
cd ..
|
||||
echo "OpenFace successfully installed."
|
||||
@@ -22,32 +22,32 @@
|
||||
<ProjectGuid>{50B7D4BF-E33B-41D0-AA89-76BBA57BF5CC}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>CameraEnumerator</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
|
||||
<WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
@@ -78,6 +78,7 @@
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
@@ -92,6 +93,7 @@
|
||||
<PreprocessorDefinitions>_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>
|
||||
</EnableEnhancedInstructionSet>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
@@ -108,6 +110,7 @@
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
@@ -127,6 +130,7 @@
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<EnableEnhancedInstructionSet>
|
||||
</EnableEnhancedInstructionSet>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
150
lib/3rdParty/OpenCV/include/opencv2/calib3d/calib3d_c.h
vendored
Normal file
150
lib/3rdParty/OpenCV/include/opencv2/calib3d/calib3d_c.h
vendored
Normal file
@@ -0,0 +1,150 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CALIB3D_C_H
|
||||
#define OPENCV_CALIB3D_C_H
|
||||
|
||||
#include "opencv2/core/types_c.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Calculates fundamental matrix given a set of corresponding points */
|
||||
#define CV_FM_7POINT 1
|
||||
#define CV_FM_8POINT 2
|
||||
|
||||
#define CV_LMEDS 4
|
||||
#define CV_RANSAC 8
|
||||
|
||||
#define CV_FM_LMEDS_ONLY CV_LMEDS
|
||||
#define CV_FM_RANSAC_ONLY CV_RANSAC
|
||||
#define CV_FM_LMEDS CV_LMEDS
|
||||
#define CV_FM_RANSAC CV_RANSAC
|
||||
|
||||
enum
|
||||
{
|
||||
CV_ITERATIVE = 0,
|
||||
CV_EPNP = 1, // F.Moreno-Noguer, V.Lepetit and P.Fua "EPnP: Efficient Perspective-n-Point Camera Pose Estimation"
|
||||
CV_P3P = 2, // X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang; "Complete Solution Classification for the Perspective-Three-Point Problem"
|
||||
CV_DLS = 3 // Joel A. Hesch and Stergios I. Roumeliotis. "A Direct Least-Squares (DLS) Method for PnP"
|
||||
};
|
||||
|
||||
#define CV_CALIB_CB_ADAPTIVE_THRESH 1
|
||||
#define CV_CALIB_CB_NORMALIZE_IMAGE 2
|
||||
#define CV_CALIB_CB_FILTER_QUADS 4
|
||||
#define CV_CALIB_CB_FAST_CHECK 8
|
||||
|
||||
#define CV_CALIB_USE_INTRINSIC_GUESS 1
|
||||
#define CV_CALIB_FIX_ASPECT_RATIO 2
|
||||
#define CV_CALIB_FIX_PRINCIPAL_POINT 4
|
||||
#define CV_CALIB_ZERO_TANGENT_DIST 8
|
||||
#define CV_CALIB_FIX_FOCAL_LENGTH 16
|
||||
#define CV_CALIB_FIX_K1 32
|
||||
#define CV_CALIB_FIX_K2 64
|
||||
#define CV_CALIB_FIX_K3 128
|
||||
#define CV_CALIB_FIX_K4 2048
|
||||
#define CV_CALIB_FIX_K5 4096
|
||||
#define CV_CALIB_FIX_K6 8192
|
||||
#define CV_CALIB_RATIONAL_MODEL 16384
|
||||
#define CV_CALIB_THIN_PRISM_MODEL 32768
|
||||
#define CV_CALIB_FIX_S1_S2_S3_S4 65536
|
||||
#define CV_CALIB_TILTED_MODEL 262144
|
||||
#define CV_CALIB_FIX_TAUX_TAUY 524288
|
||||
#define CV_CALIB_FIX_TANGENT_DIST 2097152
|
||||
|
||||
#define CV_CALIB_NINTRINSIC 18
|
||||
|
||||
#define CV_CALIB_FIX_INTRINSIC 256
|
||||
#define CV_CALIB_SAME_FOCAL_LENGTH 512
|
||||
|
||||
#define CV_CALIB_ZERO_DISPARITY 1024
|
||||
|
||||
/* stereo correspondence parameters and functions */
|
||||
#define CV_STEREO_BM_NORMALIZED_RESPONSE 0
|
||||
#define CV_STEREO_BM_XSOBEL 1
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
class CV_EXPORTS CvLevMarq
|
||||
{
|
||||
public:
|
||||
CvLevMarq();
|
||||
CvLevMarq( int nparams, int nerrs, CvTermCriteria criteria=
|
||||
cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
|
||||
bool completeSymmFlag=false );
|
||||
~CvLevMarq();
|
||||
void init( int nparams, int nerrs, CvTermCriteria criteria=
|
||||
cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
|
||||
bool completeSymmFlag=false );
|
||||
bool update( const CvMat*& param, CvMat*& J, CvMat*& err );
|
||||
bool updateAlt( const CvMat*& param, CvMat*& JtJ, CvMat*& JtErr, double*& errNorm );
|
||||
|
||||
void clear();
|
||||
void step();
|
||||
enum { DONE=0, STARTED=1, CALC_J=2, CHECK_ERR=3 };
|
||||
|
||||
cv::Ptr<CvMat> mask;
|
||||
cv::Ptr<CvMat> prevParam;
|
||||
cv::Ptr<CvMat> param;
|
||||
cv::Ptr<CvMat> J;
|
||||
cv::Ptr<CvMat> err;
|
||||
cv::Ptr<CvMat> JtJ;
|
||||
cv::Ptr<CvMat> JtJN;
|
||||
cv::Ptr<CvMat> JtErr;
|
||||
cv::Ptr<CvMat> JtJV;
|
||||
cv::Ptr<CvMat> JtJW;
|
||||
double prevErrNorm, errNorm;
|
||||
int lambdaLg10;
|
||||
CvTermCriteria criteria;
|
||||
int state;
|
||||
int iters;
|
||||
bool completeSymmFlag;
|
||||
int solveMethod;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* OPENCV_CALIB3D_C_H */
|
||||
@@ -75,6 +75,7 @@
|
||||
@defgroup core_utils_sse SSE utilities
|
||||
@defgroup core_utils_neon NEON utilities
|
||||
@defgroup core_utils_softfloat Softfloat support
|
||||
@defgroup core_utils_samples Utility functions for OpenCV samples
|
||||
@}
|
||||
@defgroup core_opengl OpenGL interoperability
|
||||
@defgroup core_ipp Intel IPP Asynchronous C/C++ Converters
|
||||
@@ -91,6 +92,7 @@
|
||||
@{
|
||||
@defgroup core_hal_intrin_impl Private implementation helpers
|
||||
@}
|
||||
@defgroup core_lowlevel_api Low-level API for external libraries / plugins
|
||||
@}
|
||||
@}
|
||||
*/
|
||||
@@ -124,7 +126,7 @@ public:
|
||||
/*!
|
||||
\return the error description and the context as a text string.
|
||||
*/
|
||||
virtual const char *what() const throw();
|
||||
virtual const char *what() const throw() CV_OVERRIDE;
|
||||
void formatMessage();
|
||||
|
||||
String msg; ///< the formatted error message
|
||||
@@ -140,11 +142,11 @@ public:
|
||||
|
||||
By default the function prints information about the error to stderr,
|
||||
then it either stops if cv::setBreakOnError() had been called before or raises the exception.
|
||||
It is possible to alternate error processing by using cv::redirectError().
|
||||
It is possible to alternate error processing by using #redirectError().
|
||||
@param exc the exception raisen.
|
||||
@deprecated drop this version
|
||||
*/
|
||||
CV_EXPORTS void error( const Exception& exc );
|
||||
CV_EXPORTS CV_NORETURN void error(const Exception& exc);
|
||||
|
||||
enum SortFlags { SORT_EVERY_ROW = 0, //!< each matrix row is sorted independently
|
||||
SORT_EVERY_COLUMN = 1, //!< each matrix column is sorted
|
||||
@@ -175,7 +177,7 @@ enum CovarFlags {
|
||||
/**The output covariance matrix is calculated as:
|
||||
\f[\texttt{scale} \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...] \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...]^T,\f]
|
||||
covar will be a square matrix of the same size as the total number of elements in each input
|
||||
vector. One and only one of COVAR_SCRAMBLED and COVAR_NORMAL must be specified.*/
|
||||
vector. One and only one of #COVAR_SCRAMBLED and #COVAR_NORMAL must be specified.*/
|
||||
COVAR_NORMAL = 1,
|
||||
/** If the flag is specified, the function does not calculate mean from
|
||||
the input vectors but, instead, uses the passed mean vector. This is useful if mean has been
|
||||
@@ -211,28 +213,6 @@ enum KmeansFlags {
|
||||
KMEANS_USE_INITIAL_LABELS = 1
|
||||
};
|
||||
|
||||
//! type of line
|
||||
enum LineTypes {
|
||||
FILLED = -1,
|
||||
LINE_4 = 4, //!< 4-connected line
|
||||
LINE_8 = 8, //!< 8-connected line
|
||||
LINE_AA = 16 //!< antialiased line
|
||||
};
|
||||
|
||||
//! Only a subset of Hershey fonts
|
||||
//! <http://sources.isc.org/utils/misc/hershey-font.txt> are supported
|
||||
enum HersheyFonts {
|
||||
FONT_HERSHEY_SIMPLEX = 0, //!< normal size sans-serif font
|
||||
FONT_HERSHEY_PLAIN = 1, //!< small size sans-serif font
|
||||
FONT_HERSHEY_DUPLEX = 2, //!< normal size sans-serif font (more complex than FONT_HERSHEY_SIMPLEX)
|
||||
FONT_HERSHEY_COMPLEX = 3, //!< normal size serif font
|
||||
FONT_HERSHEY_TRIPLEX = 4, //!< normal size serif font (more complex than FONT_HERSHEY_COMPLEX)
|
||||
FONT_HERSHEY_COMPLEX_SMALL = 5, //!< smaller version of FONT_HERSHEY_COMPLEX
|
||||
FONT_HERSHEY_SCRIPT_SIMPLEX = 6, //!< hand-writing style font
|
||||
FONT_HERSHEY_SCRIPT_COMPLEX = 7, //!< more complex variant of FONT_HERSHEY_SCRIPT_SIMPLEX
|
||||
FONT_ITALIC = 16 //!< flag for italic font
|
||||
};
|
||||
|
||||
enum ReduceTypes { REDUCE_SUM = 0, //!< the output is the sum of all rows/columns of the matrix.
|
||||
REDUCE_AVG = 1, //!< the output is the mean vector of all rows/columns of the matrix.
|
||||
REDUCE_MAX = 2, //!< the output is the maximum (column/row-wise) of all rows/columns of the matrix.
|
||||
@@ -266,17 +246,19 @@ Normally, the function is not called directly. It is used inside filtering funct
|
||||
copyMakeBorder.
|
||||
@param p 0-based coordinate of the extrapolated pixel along one of the axes, likely \<0 or \>= len
|
||||
@param len Length of the array along the corresponding axis.
|
||||
@param borderType Border type, one of the cv::BorderTypes, except for cv::BORDER_TRANSPARENT and
|
||||
cv::BORDER_ISOLATED . When borderType==cv::BORDER_CONSTANT , the function always returns -1, regardless
|
||||
@param borderType Border type, one of the #BorderTypes, except for #BORDER_TRANSPARENT and
|
||||
#BORDER_ISOLATED . When borderType==#BORDER_CONSTANT , the function always returns -1, regardless
|
||||
of p and len.
|
||||
|
||||
@sa copyMakeBorder
|
||||
*/
|
||||
CV_EXPORTS_W int borderInterpolate(int p, int len, int borderType);
|
||||
|
||||
/** @example copyMakeBorder_demo.cpp
|
||||
An example using copyMakeBorder function
|
||||
*/
|
||||
/** @example samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp
|
||||
An example using copyMakeBorder function.
|
||||
Check @ref tutorial_copyMakeBorder "the corresponding tutorial" for more details
|
||||
*/
|
||||
|
||||
/** @brief Forms a border around an image.
|
||||
|
||||
The function copies the source image into the middle of the destination image. The areas to the
|
||||
@@ -304,7 +286,7 @@ function does not copy src itself but simply constructs the border, for example:
|
||||
@endcode
|
||||
@note When the source image is a part (ROI) of a bigger image, the function will try to use the
|
||||
pixels outside of the ROI to form a border. To disable this feature and always do extrapolation, as
|
||||
if src was not a ROI, use borderType | BORDER_ISOLATED.
|
||||
if src was not a ROI, use borderType | #BORDER_ISOLATED.
|
||||
|
||||
@param src Source image.
|
||||
@param dst Destination image of the same type as src and the size Size(src.cols+left+right,
|
||||
@@ -435,8 +417,13 @@ The function cv::divide divides one array by another:
|
||||
or a scalar by an array when there is no src1 :
|
||||
\f[\texttt{dst(I) = saturate(scale/src2(I))}\f]
|
||||
|
||||
When src2(I) is zero, dst(I) will also be zero. Different channels of
|
||||
multi-channel arrays are processed independently.
|
||||
Different channels of multi-channel arrays are processed independently.
|
||||
|
||||
For integer types when src2(I) is zero, dst(I) will also be zero.
|
||||
|
||||
@note In case of floating point data there is no special defined behavior for zero src2(I) values.
|
||||
Regular floating-point division is used.
|
||||
Expect correct IEEE-754 behaviour for floating-point data (with NaN, Inf result values).
|
||||
|
||||
@note Saturation is not applied when the output array has the depth CV_32S. You may even get
|
||||
result of an incorrect sign in the case of overflow.
|
||||
@@ -475,9 +462,10 @@ The function can also be emulated with a matrix expression, for example:
|
||||
*/
|
||||
CV_EXPORTS_W void scaleAdd(InputArray src1, double alpha, InputArray src2, OutputArray dst);
|
||||
|
||||
/** @example AddingImagesTrackbar.cpp
|
||||
/** @example samples/cpp/tutorial_code/HighGUI/AddingImagesTrackbar.cpp
|
||||
Check @ref tutorial_trackbar "the corresponding tutorial" for more details
|
||||
*/
|
||||
|
||||
*/
|
||||
/** @brief Calculates the weighted sum of two arrays.
|
||||
|
||||
The function addWeighted calculates the weighted sum of two arrays as follows:
|
||||
@@ -533,8 +521,9 @@ CV_EXPORTS_W void convertScaleAbs(InputArray src, OutputArray dst,
|
||||
|
||||
/** @brief Converts an array to half precision floating number.
|
||||
|
||||
This function converts FP32 (single precision floating point) from/to FP16 (half precision floating point). The input array has to have type of CV_32F or
|
||||
CV_16S to represent the bit depth. If the input array is neither of them, the function will raise an error.
|
||||
This function converts FP32 (single precision floating point) from/to FP16 (half precision floating point). CV_16S format is used to represent FP16 data.
|
||||
There are two use modes (src -> dst): CV_32F -> CV_16S and CV_16S -> CV_32F. The input array has to have type of CV_32F or
|
||||
CV_16S to represent the bit depth. If the input array is neither of them, the function will raise an error.
|
||||
The format of half precision floating point is defined in IEEE 754-2008.
|
||||
|
||||
@param src input array.
|
||||
@@ -599,7 +588,7 @@ or
|
||||
// access pixel coordinates
|
||||
Point pnt = locations[i];
|
||||
@endcode
|
||||
@param src single-channel array (type CV_8UC1)
|
||||
@param src single-channel array
|
||||
@param idx the output array, type of cv::Mat or std::vector<Point>, corresponding to non-zero indices in the input
|
||||
*/
|
||||
CV_EXPORTS_W void findNonZero( InputArray src, OutputArray idx );
|
||||
@@ -642,7 +631,7 @@ CV_EXPORTS_W void meanStdDev(InputArray src, OutputArray mean, OutputArray stdde
|
||||
|
||||
/** @brief Calculates the absolute norm of an array.
|
||||
|
||||
This version of cv::norm calculates the absolute norm of src1. The type of norm to calculate is specified using cv::NormTypes.
|
||||
This version of #norm calculates the absolute norm of src1. The type of norm to calculate is specified using #NormTypes.
|
||||
|
||||
As example for one array consider the function \f$r(x)= \begin{pmatrix} x \\ 1-x \end{pmatrix}, x \in [-1;1]\f$.
|
||||
The \f$ L_{1}, L_{2} \f$ and \f$ L_{\infty} \f$ norm for the sample value \f$r(-1) = \begin{pmatrix} -1 \\ 2 \end{pmatrix}\f$
|
||||
@@ -664,7 +653,7 @@ It is notable that the \f$ L_{1} \f$ norm forms the upper and the \f$ L_{\infty}
|
||||
|
||||
When the mask parameter is specified and it is not empty, the norm is
|
||||
|
||||
If normType is not specified, NORM_L2 is used.
|
||||
If normType is not specified, #NORM_L2 is used.
|
||||
calculated only over the region specified by the mask.
|
||||
|
||||
Multi-channel input arrays are treated as single-channel arrays, that is,
|
||||
@@ -673,7 +662,7 @@ the results for all channels are combined.
|
||||
Hamming norms can only be calculated with CV_8U depth arrays.
|
||||
|
||||
@param src1 first input array.
|
||||
@param normType type of the norm (see cv::NormTypes).
|
||||
@param normType type of the norm (see #NormTypes).
|
||||
@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
|
||||
*/
|
||||
CV_EXPORTS_W double norm(InputArray src1, int normType = NORM_L2, InputArray mask = noArray());
|
||||
@@ -682,24 +671,25 @@ CV_EXPORTS_W double norm(InputArray src1, int normType = NORM_L2, InputArray mas
|
||||
|
||||
This version of cv::norm calculates the absolute difference norm
|
||||
or the relative difference norm of arrays src1 and src2.
|
||||
The type of norm to calculate is specified using cv::NormTypes.
|
||||
The type of norm to calculate is specified using #NormTypes.
|
||||
|
||||
@param src1 first input array.
|
||||
@param src2 second input array of the same size and the same type as src1.
|
||||
@param normType type of the norm (cv::NormTypes).
|
||||
@param normType type of the norm (see #NormTypes).
|
||||
@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
|
||||
*/
|
||||
CV_EXPORTS_W double norm(InputArray src1, InputArray src2,
|
||||
int normType = NORM_L2, InputArray mask = noArray());
|
||||
/** @overload
|
||||
@param src first input array.
|
||||
@param normType type of the norm (see cv::NormTypes).
|
||||
@param normType type of the norm (see #NormTypes).
|
||||
*/
|
||||
CV_EXPORTS double norm( const SparseMat& src, int normType );
|
||||
|
||||
/** @brief Computes the Peak Signal-to-Noise Ratio (PSNR) image quality metric.
|
||||
|
||||
This function calculates the Peak Signal-to-Noise Ratio (PSNR) image quality metric in decibels (dB), between two input arrays src1 and src2. Arrays must have depth CV_8U.
|
||||
This function calculates the Peak Signal-to-Noise Ratio (PSNR) image quality metric in decibels (dB),
|
||||
between two input arrays src1 and src2. The arrays must have the same type.
|
||||
|
||||
The PSNR is calculated as follows:
|
||||
|
||||
@@ -707,13 +697,15 @@ The PSNR is calculated as follows:
|
||||
\texttt{PSNR} = 10 \cdot \log_{10}{\left( \frac{R^2}{MSE} \right) }
|
||||
\f]
|
||||
|
||||
where R is the maximum integer value of depth CV_8U (255) and MSE is the mean squared error between the two arrays.
|
||||
where R is the maximum integer value of depth (e.g. 255 in the case of CV_8U data)
|
||||
and MSE is the mean squared error between the two arrays.
|
||||
|
||||
@param src1 first input array.
|
||||
@param src2 second input array of the same size as src1.
|
||||
@param R the maximum pixel value (255 by default)
|
||||
|
||||
*/
|
||||
CV_EXPORTS_W double PSNR(InputArray src1, InputArray src2);
|
||||
CV_EXPORTS_W double PSNR(InputArray src1, InputArray src2, double R=255.);
|
||||
|
||||
/** @brief naive nearest neighbor finder
|
||||
|
||||
@@ -859,11 +851,11 @@ CV_EXPORTS void minMaxLoc(const SparseMat& a, double* minVal,
|
||||
|
||||
/** @brief Reduces a matrix to a vector.
|
||||
|
||||
The function cv::reduce reduces the matrix to a vector by treating the matrix rows/columns as a set of
|
||||
The function #reduce reduces the matrix to a vector by treating the matrix rows/columns as a set of
|
||||
1D vectors and performing the specified operation on the vectors until a single row/column is
|
||||
obtained. For example, the function can be used to compute horizontal and vertical projections of a
|
||||
raster image. In case of REDUCE_MAX and REDUCE_MIN , the output image should have the same type as the source one.
|
||||
In case of REDUCE_SUM and REDUCE_AVG , the output may have a larger element bit-depth to preserve accuracy.
|
||||
raster image. In case of #REDUCE_MAX and #REDUCE_MIN , the output image should have the same type as the source one.
|
||||
In case of #REDUCE_SUM and #REDUCE_AVG , the output may have a larger element bit-depth to preserve accuracy.
|
||||
And multi-channel arrays are also supported in these two reduction modes.
|
||||
|
||||
The following code demonstrates its usage for a single channel matrix.
|
||||
@@ -876,7 +868,7 @@ And the following code demonstrates its usage for a two-channel matrix.
|
||||
@param dst output vector. Its size and type is defined by dim and dtype parameters.
|
||||
@param dim dimension index along which the matrix is reduced. 0 means that the matrix is reduced to
|
||||
a single row. 1 means that the matrix is reduced to a single column.
|
||||
@param rtype reduction operation that could be one of cv::ReduceTypes
|
||||
@param rtype reduction operation that could be one of #ReduceTypes
|
||||
@param dtype when negative, the output vector will have the same type as the input matrix,
|
||||
otherwise, its type will be CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()).
|
||||
@sa repeat
|
||||
@@ -1063,19 +1055,19 @@ around both axes.
|
||||
CV_EXPORTS_W void flip(InputArray src, OutputArray dst, int flipCode);
|
||||
|
||||
enum RotateFlags {
|
||||
ROTATE_90_CLOCKWISE = 0, //Rotate 90 degrees clockwise
|
||||
ROTATE_180 = 1, //Rotate 180 degrees clockwise
|
||||
ROTATE_90_COUNTERCLOCKWISE = 2, //Rotate 270 degrees clockwise
|
||||
ROTATE_90_CLOCKWISE = 0, //!<Rotate 90 degrees clockwise
|
||||
ROTATE_180 = 1, //!<Rotate 180 degrees clockwise
|
||||
ROTATE_90_COUNTERCLOCKWISE = 2, //!<Rotate 270 degrees clockwise
|
||||
};
|
||||
/** @brief Rotates a 2D array in multiples of 90 degrees.
|
||||
The function rotate rotates the array in one of three different ways:
|
||||
* Rotate by 90 degrees clockwise (rotateCode = ROTATE_90).
|
||||
The function cv::rotate rotates the array in one of three different ways:
|
||||
* Rotate by 90 degrees clockwise (rotateCode = ROTATE_90_CLOCKWISE).
|
||||
* Rotate by 180 degrees clockwise (rotateCode = ROTATE_180).
|
||||
* Rotate by 270 degrees clockwise (rotateCode = ROTATE_270).
|
||||
* Rotate by 270 degrees clockwise (rotateCode = ROTATE_90_COUNTERCLOCKWISE).
|
||||
@param src input array.
|
||||
@param dst output array of the same type as src. The size is the same with ROTATE_180,
|
||||
and the rows and cols are switched for ROTATE_90 and ROTATE_270.
|
||||
@param rotateCode an enum to specify how to rotate the array; see the enum RotateFlags
|
||||
and the rows and cols are switched for ROTATE_90_CLOCKWISE and ROTATE_90_COUNTERCLOCKWISE.
|
||||
@param rotateCode an enum to specify how to rotate the array; see the enum #RotateFlags
|
||||
@sa transpose , repeat , completeSymm, flip, RotateFlags
|
||||
*/
|
||||
CV_EXPORTS_W void rotate(InputArray src, OutputArray dst, int rotateCode);
|
||||
@@ -1356,6 +1348,17 @@ You may even get a negative value in the case of overflow.
|
||||
*/
|
||||
CV_EXPORTS_W void absdiff(InputArray src1, InputArray src2, OutputArray dst);
|
||||
|
||||
/** @brief This is an overloaded member function, provided for convenience (python)
|
||||
Copies the matrix to another one.
|
||||
When the operation mask is specified, if the Mat::create call shown above reallocates the matrix, the newly allocated matrix is initialized with all zeros before copying the data.
|
||||
@param src source matrix.
|
||||
@param dst Destination matrix. If it does not have a proper size or type before the operation, it is
|
||||
reallocated.
|
||||
@param mask Operation mask of the same size as \*this. Its non-zero elements indicate which matrix
|
||||
elements need to be copied. The mask has to be of type CV_8U and can have 1 or multiple channels.
|
||||
*/
|
||||
|
||||
void CV_EXPORTS_W copyTo(InputArray src, OutputArray dst, InputArray mask);
|
||||
/** @brief Checks if array elements lie between the elements of two other arrays.
|
||||
|
||||
The function checks the range as follows:
|
||||
@@ -1739,20 +1742,21 @@ element is a 2D/3D vector to be transformed.
|
||||
*/
|
||||
CV_EXPORTS_W void perspectiveTransform(InputArray src, OutputArray dst, InputArray m );
|
||||
|
||||
/** @brief Copies the lower or the upper half of a square matrix to another half.
|
||||
/** @brief Copies the lower or the upper half of a square matrix to its another half.
|
||||
|
||||
The function cv::completeSymm copies the lower half of a square matrix to
|
||||
The function cv::completeSymm copies the lower or the upper half of a square matrix to
|
||||
its another half. The matrix diagonal remains unchanged:
|
||||
* \f$\texttt{mtx}_{ij}=\texttt{mtx}_{ji}\f$ for \f$i > j\f$ if
|
||||
- \f$\texttt{m}_{ij}=\texttt{m}_{ji}\f$ for \f$i > j\f$ if
|
||||
lowerToUpper=false
|
||||
* \f$\texttt{mtx}_{ij}=\texttt{mtx}_{ji}\f$ for \f$i < j\f$ if
|
||||
- \f$\texttt{m}_{ij}=\texttt{m}_{ji}\f$ for \f$i < j\f$ if
|
||||
lowerToUpper=true
|
||||
@param mtx input-output floating-point square matrix.
|
||||
|
||||
@param m input-output floating-point square matrix.
|
||||
@param lowerToUpper operation flag; if true, the lower half is copied to
|
||||
the upper half. Otherwise, the upper half is copied to the lower half.
|
||||
@sa flip, transpose
|
||||
*/
|
||||
CV_EXPORTS_W void completeSymm(InputOutputArray mtx, bool lowerToUpper = false);
|
||||
CV_EXPORTS_W void completeSymm(InputOutputArray m, bool lowerToUpper = false);
|
||||
|
||||
/** @brief Initializes a scaled identity matrix.
|
||||
|
||||
@@ -1802,15 +1806,15 @@ The function cv::invert inverts the matrix src and stores the result in dst
|
||||
the pseudo-inverse matrix (the dst matrix) so that norm(src\*dst - I) is
|
||||
minimal, where I is an identity matrix.
|
||||
|
||||
In case of the DECOMP_LU method, the function returns non-zero value if
|
||||
In case of the #DECOMP_LU method, the function returns non-zero value if
|
||||
the inverse has been successfully calculated and 0 if src is singular.
|
||||
|
||||
In case of the DECOMP_SVD method, the function returns the inverse
|
||||
In case of the #DECOMP_SVD method, the function returns the inverse
|
||||
condition number of src (the ratio of the smallest singular value to the
|
||||
largest singular value) and 0 if src is singular. The SVD method
|
||||
calculates a pseudo-inverse matrix if src is singular.
|
||||
|
||||
Similarly to DECOMP_LU, the method DECOMP_CHOLESKY works only with
|
||||
Similarly to #DECOMP_LU, the method #DECOMP_CHOLESKY works only with
|
||||
non-singular square matrices that should also be symmetrical and
|
||||
positively defined. In this case, the function stores the inverted
|
||||
matrix in dst and returns non-zero. Otherwise, it returns 0.
|
||||
@@ -1826,10 +1830,10 @@ CV_EXPORTS_W double invert(InputArray src, OutputArray dst, int flags = DECOMP_L
|
||||
|
||||
The function cv::solve solves a linear system or least-squares problem (the
|
||||
latter is possible with SVD or QR methods, or by specifying the flag
|
||||
DECOMP_NORMAL ):
|
||||
#DECOMP_NORMAL ):
|
||||
\f[\texttt{dst} = \arg \min _X \| \texttt{src1} \cdot \texttt{X} - \texttt{src2} \|\f]
|
||||
|
||||
If DECOMP_LU or DECOMP_CHOLESKY method is used, the function returns 1
|
||||
If #DECOMP_LU or #DECOMP_CHOLESKY method is used, the function returns 1
|
||||
if src1 (or \f$\texttt{src1}^T\texttt{src1}\f$ ) is non-singular. Otherwise,
|
||||
it returns 0. In the latter case, dst is not valid. Other methods find a
|
||||
pseudo-solution in case of a singular left-hand side part.
|
||||
@@ -1841,7 +1845,7 @@ will not do the work. Use SVD::solveZ instead.
|
||||
@param src1 input matrix on the left-hand side of the system.
|
||||
@param src2 input matrix on the right-hand side of the system.
|
||||
@param dst output solution.
|
||||
@param flags solution (matrix inversion) method (cv::DecompTypes)
|
||||
@param flags solution (matrix inversion) method (#DecompTypes)
|
||||
@sa invert, SVD, eigen
|
||||
*/
|
||||
CV_EXPORTS_W bool solve(InputArray src1, InputArray src2,
|
||||
@@ -1857,7 +1861,7 @@ proper comparison predicate.
|
||||
|
||||
@param src input single-channel array.
|
||||
@param dst output array of the same size and type as src.
|
||||
@param flags operation flags, a combination of cv::SortFlags
|
||||
@param flags operation flags, a combination of #SortFlags
|
||||
@sa sortIdx, randShuffle
|
||||
*/
|
||||
CV_EXPORTS_W void sort(InputArray src, OutputArray dst, int flags);
|
||||
@@ -1893,6 +1897,7 @@ The function solveCubic finds the real roots of a cubic equation:
|
||||
The roots are stored in the roots array.
|
||||
@param coeffs equation coefficients, an array of 3 or 4 elements.
|
||||
@param roots output array of real roots that has 1 or 3 elements.
|
||||
@return number of real roots. It can be 0, 1 or 2.
|
||||
*/
|
||||
CV_EXPORTS_W int solveCubic(InputArray coeffs, OutputArray roots);
|
||||
|
||||
@@ -1953,7 +1958,7 @@ the set of input vectors.
|
||||
@param nsamples number of samples
|
||||
@param covar output covariance matrix of the type ctype and square size.
|
||||
@param mean input or output (depending on the flags) array as the average value of the input vectors.
|
||||
@param flags operation flags as a combination of cv::CovarFlags
|
||||
@param flags operation flags as a combination of #CovarFlags
|
||||
@param ctype type of the matrixl; it equals 'CV_64F' by default.
|
||||
@sa PCA, mulTransposed, Mahalanobis
|
||||
@todo InputArrayOfArrays
|
||||
@@ -1962,11 +1967,11 @@ CV_EXPORTS void calcCovarMatrix( const Mat* samples, int nsamples, Mat& covar, M
|
||||
int flags, int ctype = CV_64F);
|
||||
|
||||
/** @overload
|
||||
@note use cv::COVAR_ROWS or cv::COVAR_COLS flag
|
||||
@note use #COVAR_ROWS or #COVAR_COLS flag
|
||||
@param samples samples stored as rows/columns of a single matrix.
|
||||
@param covar output covariance matrix of the type ctype and square size.
|
||||
@param mean input or output (depending on the flags) array as the average value of the input vectors.
|
||||
@param flags operation flags as a combination of cv::CovarFlags
|
||||
@param flags operation flags as a combination of #CovarFlags
|
||||
@param ctype type of the matrixl; it equals 'CV_64F' by default.
|
||||
*/
|
||||
CV_EXPORTS_W void calcCovarMatrix( InputArray samples, OutputArray covar,
|
||||
@@ -1976,10 +1981,20 @@ CV_EXPORTS_W void calcCovarMatrix( InputArray samples, OutputArray covar,
|
||||
CV_EXPORTS_W void PCACompute(InputArray data, InputOutputArray mean,
|
||||
OutputArray eigenvectors, int maxComponents = 0);
|
||||
|
||||
/** wrap PCA::operator() and add eigenvalues output parameter */
|
||||
CV_EXPORTS_AS(PCACompute2) void PCACompute(InputArray data, InputOutputArray mean,
|
||||
OutputArray eigenvectors, OutputArray eigenvalues,
|
||||
int maxComponents = 0);
|
||||
|
||||
/** wrap PCA::operator() */
|
||||
CV_EXPORTS_W void PCACompute(InputArray data, InputOutputArray mean,
|
||||
OutputArray eigenvectors, double retainedVariance);
|
||||
|
||||
/** wrap PCA::operator() and add eigenvalues output parameter */
|
||||
CV_EXPORTS_AS(PCACompute2) void PCACompute(InputArray data, InputOutputArray mean,
|
||||
OutputArray eigenvectors, OutputArray eigenvalues,
|
||||
double retainedVariance);
|
||||
|
||||
/** wrap PCA::project */
|
||||
CV_EXPORTS_W void PCAProject(InputArray data, InputArray mean,
|
||||
InputArray eigenvectors, OutputArray result);
|
||||
@@ -1999,8 +2014,8 @@ CV_EXPORTS_W void SVBackSubst( InputArray w, InputArray u, InputArray vt,
|
||||
|
||||
The function cv::Mahalanobis calculates and returns the weighted distance between two vectors:
|
||||
\f[d( \texttt{vec1} , \texttt{vec2} )= \sqrt{\sum_{i,j}{\texttt{icovar(i,j)}\cdot(\texttt{vec1}(I)-\texttt{vec2}(I))\cdot(\texttt{vec1(j)}-\texttt{vec2(j)})} }\f]
|
||||
The covariance matrix may be calculated using the cv::calcCovarMatrix function and then inverted using
|
||||
the invert function (preferably using the cv::DECOMP_SVD method, as the most accurate).
|
||||
The covariance matrix may be calculated using the #calcCovarMatrix function and then inverted using
|
||||
the invert function (preferably using the #DECOMP_SVD method, as the most accurate).
|
||||
@param v1 first 1D input vector.
|
||||
@param v2 second 1D input vector.
|
||||
@param icovar inverse covariance matrix.
|
||||
@@ -2030,28 +2045,28 @@ is how 2D *CCS* spectrum looks:
|
||||
In case of 1D transform of a real vector, the output looks like the first row of the matrix above.
|
||||
|
||||
So, the function chooses an operation mode depending on the flags and size of the input array:
|
||||
- If DFT_ROWS is set or the input array has a single row or single column, the function
|
||||
performs a 1D forward or inverse transform of each row of a matrix when DFT_ROWS is set.
|
||||
- If #DFT_ROWS is set or the input array has a single row or single column, the function
|
||||
performs a 1D forward or inverse transform of each row of a matrix when #DFT_ROWS is set.
|
||||
Otherwise, it performs a 2D transform.
|
||||
- If the input array is real and DFT_INVERSE is not set, the function performs a forward 1D or
|
||||
- If the input array is real and #DFT_INVERSE is not set, the function performs a forward 1D or
|
||||
2D transform:
|
||||
- When DFT_COMPLEX_OUTPUT is set, the output is a complex matrix of the same size as
|
||||
- When #DFT_COMPLEX_OUTPUT is set, the output is a complex matrix of the same size as
|
||||
input.
|
||||
- When DFT_COMPLEX_OUTPUT is not set, the output is a real matrix of the same size as
|
||||
- When #DFT_COMPLEX_OUTPUT is not set, the output is a real matrix of the same size as
|
||||
input. In case of 2D transform, it uses the packed format as shown above. In case of a
|
||||
single 1D transform, it looks like the first row of the matrix above. In case of
|
||||
multiple 1D transforms (when using the DFT_ROWS flag), each row of the output matrix
|
||||
multiple 1D transforms (when using the #DFT_ROWS flag), each row of the output matrix
|
||||
looks like the first row of the matrix above.
|
||||
- If the input array is complex and either DFT_INVERSE or DFT_REAL_OUTPUT are not set, the
|
||||
- If the input array is complex and either #DFT_INVERSE or #DFT_REAL_OUTPUT are not set, the
|
||||
output is a complex array of the same size as input. The function performs a forward or
|
||||
inverse 1D or 2D transform of the whole input array or each row of the input array
|
||||
independently, depending on the flags DFT_INVERSE and DFT_ROWS.
|
||||
- When DFT_INVERSE is set and the input array is real, or it is complex but DFT_REAL_OUTPUT
|
||||
- When #DFT_INVERSE is set and the input array is real, or it is complex but #DFT_REAL_OUTPUT
|
||||
is set, the output is a real array of the same size as input. The function performs a 1D or 2D
|
||||
inverse transformation of the whole input array or each individual row, depending on the flags
|
||||
DFT_INVERSE and DFT_ROWS.
|
||||
#DFT_INVERSE and #DFT_ROWS.
|
||||
|
||||
If DFT_SCALE is set, the scaling is done after the transformation.
|
||||
If #DFT_SCALE is set, the scaling is done after the transformation.
|
||||
|
||||
Unlike dct , the function supports arrays of arbitrary size. But only those arrays are processed
|
||||
efficiently, whose sizes can be factorized in a product of small prime numbers (2, 3, and 5 in the
|
||||
@@ -2117,7 +2132,7 @@ To optimize this sample, consider the following approaches:
|
||||
- If different tiles in C can be calculated in parallel and, thus, the convolution is done by
|
||||
parts, the loop can be threaded.
|
||||
|
||||
All of the above improvements have been implemented in matchTemplate and filter2D . Therefore, by
|
||||
All of the above improvements have been implemented in #matchTemplate and #filter2D . Therefore, by
|
||||
using them, you can get the performance even better than with the above theoretically optimal
|
||||
implementation. Though, those two functions actually calculate cross-correlation, not convolution,
|
||||
so you need to "flip" the second convolution operand B vertically and horizontally using flip .
|
||||
@@ -2130,10 +2145,10 @@ so you need to "flip" the second convolution operand B vertically and horizontal
|
||||
opencv_source/samples/python/dft.py
|
||||
@param src input array that could be real or complex.
|
||||
@param dst output array whose size and type depends on the flags .
|
||||
@param flags transformation flags, representing a combination of the cv::DftFlags
|
||||
@param flags transformation flags, representing a combination of the #DftFlags
|
||||
@param nonzeroRows when the parameter is not zero, the function assumes that only the first
|
||||
nonzeroRows rows of the input array (DFT_INVERSE is not set) or only the first nonzeroRows of the
|
||||
output array (DFT_INVERSE is set) contain non-zeros, thus, the function can handle the rest of the
|
||||
nonzeroRows rows of the input array (#DFT_INVERSE is not set) or only the first nonzeroRows of the
|
||||
output array (#DFT_INVERSE is set) contain non-zeros, thus, the function can handle the rest of the
|
||||
rows more efficiently and save some time; this technique is very useful for calculating array
|
||||
cross-correlation or convolution using DFT.
|
||||
@sa dct , getOptimalDFTSize , mulSpectrums, filter2D , matchTemplate , flip , cartToPolar ,
|
||||
@@ -2143,13 +2158,13 @@ CV_EXPORTS_W void dft(InputArray src, OutputArray dst, int flags = 0, int nonzer
|
||||
|
||||
/** @brief Calculates the inverse Discrete Fourier Transform of a 1D or 2D array.
|
||||
|
||||
idft(src, dst, flags) is equivalent to dft(src, dst, flags | DFT_INVERSE) .
|
||||
@note None of dft and idft scales the result by default. So, you should pass DFT_SCALE to one of
|
||||
idft(src, dst, flags) is equivalent to dft(src, dst, flags | #DFT_INVERSE) .
|
||||
@note None of dft and idft scales the result by default. So, you should pass #DFT_SCALE to one of
|
||||
dft or idft explicitly to make these transforms mutually inverse.
|
||||
@sa dft, dct, idct, mulSpectrums, getOptimalDFTSize
|
||||
@param src input floating-point real or complex array.
|
||||
@param dst output array whose size and type depend on the flags.
|
||||
@param flags operation flags (see dft and cv::DftFlags).
|
||||
@param flags operation flags (see dft and #DftFlags).
|
||||
@param nonzeroRows number of dst rows to process; the rest of the rows have undefined content (see
|
||||
the convolution sample in dft description.
|
||||
*/
|
||||
@@ -2174,9 +2189,9 @@ floating-point array:
|
||||
\f[X = \left (C^{(N)} \right )^T \cdot X \cdot C^{(N)}\f]
|
||||
|
||||
The function chooses the mode of operation by looking at the flags and size of the input array:
|
||||
- If (flags & DCT_INVERSE) == 0 , the function does a forward 1D or 2D transform. Otherwise, it
|
||||
- If (flags & #DCT_INVERSE) == 0 , the function does a forward 1D or 2D transform. Otherwise, it
|
||||
is an inverse 1D or 2D transform.
|
||||
- If (flags & DCT_ROWS) != 0 , the function performs a 1D transform of each row.
|
||||
- If (flags & #DCT_ROWS) != 0 , the function performs a 1D transform of each row.
|
||||
- If the array is a single column or a single row, the function performs a 1D transform.
|
||||
- If none of the above is true, the function performs a 2D transform.
|
||||
|
||||
@@ -2515,14 +2530,18 @@ public:
|
||||
Mat mean; //!< mean value subtracted before the projection and added after the back projection
|
||||
};
|
||||
|
||||
/** @example pca.cpp
|
||||
An example using %PCA for dimensionality reduction while maintaining an amount of variance
|
||||
*/
|
||||
/** @example samples/cpp/pca.cpp
|
||||
An example using %PCA for dimensionality reduction while maintaining an amount of variance
|
||||
*/
|
||||
|
||||
/** @example samples/cpp/tutorial_code/ml/introduction_to_pca/introduction_to_pca.cpp
|
||||
Check @ref tutorial_introduction_to_pca "the corresponding tutorial" for more details
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Linear Discriminant Analysis
|
||||
@todo document this class
|
||||
*/
|
||||
@brief Linear Discriminant Analysis
|
||||
@todo document this class
|
||||
*/
|
||||
class CV_EXPORTS LDA
|
||||
{
|
||||
public:
|
||||
@@ -2584,7 +2603,6 @@ public:
|
||||
static Mat subspaceReconstruct(InputArray W, InputArray mean, InputArray src);
|
||||
|
||||
protected:
|
||||
bool _dataAsRow; // unused, but needed for 3.0 ABI compatibility.
|
||||
int _num_components;
|
||||
Mat _eigenvectors;
|
||||
Mat _eigenvalues;
|
||||
@@ -2629,7 +2647,7 @@ public:
|
||||
|
||||
/** @overload
|
||||
initializes an empty SVD structure and then calls SVD::operator()
|
||||
@param src decomposed matrix.
|
||||
@param src decomposed matrix. The depth has to be CV_32F or CV_64F.
|
||||
@param flags operation flags (SVD::Flags)
|
||||
*/
|
||||
SVD( InputArray src, int flags = 0 );
|
||||
@@ -2642,7 +2660,7 @@ public:
|
||||
different matrices. Each time, if needed, the previous u,`vt` , and w
|
||||
are reclaimed and the new matrices are created, which is all handled by
|
||||
Mat::create.
|
||||
@param src decomposed matrix.
|
||||
@param src decomposed matrix. The depth has to be CV_32F or CV_64F.
|
||||
@param flags operation flags (SVD::Flags)
|
||||
*/
|
||||
SVD& operator ()( InputArray src, int flags = 0 );
|
||||
@@ -2658,18 +2676,18 @@ public:
|
||||
SVD::compute(A, w, u, vt);
|
||||
@endcode
|
||||
|
||||
@param src decomposed matrix
|
||||
@param src decomposed matrix. The depth has to be CV_32F or CV_64F.
|
||||
@param w calculated singular values
|
||||
@param u calculated left singular vectors
|
||||
@param vt transposed matrix of right singular values
|
||||
@param flags operation flags - see SVD::SVD.
|
||||
@param vt transposed matrix of right singular vectors
|
||||
@param flags operation flags - see SVD::Flags.
|
||||
*/
|
||||
static void compute( InputArray src, OutputArray w,
|
||||
OutputArray u, OutputArray vt, int flags = 0 );
|
||||
|
||||
/** @overload
|
||||
computes singular values of a matrix
|
||||
@param src decomposed matrix
|
||||
@param src decomposed matrix. The depth has to be CV_32F or CV_64F.
|
||||
@param w calculated singular values
|
||||
@param flags operation flags - see SVD::Flags.
|
||||
*/
|
||||
@@ -2713,7 +2731,7 @@ public:
|
||||
if you need to solve many linear systems with the same left-hand side
|
||||
(for example, src ). If all you need is to solve a single system
|
||||
(possibly with multiple rhs immediately available), simply call solve
|
||||
add pass DECOMP_SVD there. It does absolutely the same thing.
|
||||
add pass #DECOMP_SVD there. It does absolutely the same thing.
|
||||
*/
|
||||
void backSubst( InputArray rhs, OutputArray dst ) const;
|
||||
|
||||
@@ -2838,7 +2856,7 @@ public:
|
||||
use explicit type cast operators, as in the a1 initialization above.
|
||||
@param a lower inclusive boundary of the returned random number.
|
||||
@param b upper non-inclusive boundary of the returned random number.
|
||||
*/
|
||||
*/
|
||||
int uniform(int a, int b);
|
||||
/** @overload */
|
||||
float uniform(float a, float b);
|
||||
@@ -2900,7 +2918,7 @@ public:
|
||||
|
||||
Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
|
||||
@todo document
|
||||
*/
|
||||
*/
|
||||
class CV_EXPORTS RNG_MT19937
|
||||
{
|
||||
public:
|
||||
@@ -2918,17 +2936,11 @@ public:
|
||||
unsigned operator ()(unsigned N);
|
||||
unsigned operator ()();
|
||||
|
||||
/** @brief returns uniformly distributed integer random number from [a,b) range
|
||||
|
||||
*/
|
||||
/** @brief returns uniformly distributed integer random number from [a,b) range*/
|
||||
int uniform(int a, int b);
|
||||
/** @brief returns uniformly distributed floating-point random number from [a,b) range
|
||||
|
||||
*/
|
||||
/** @brief returns uniformly distributed floating-point random number from [a,b) range*/
|
||||
float uniform(float a, float b);
|
||||
/** @brief returns uniformly distributed double-precision floating-point random number from [a,b) range
|
||||
|
||||
*/
|
||||
/** @brief returns uniformly distributed double-precision floating-point random number from [a,b) range*/
|
||||
double uniform(double a, double b);
|
||||
|
||||
private:
|
||||
@@ -2942,14 +2954,14 @@ private:
|
||||
//! @addtogroup core_cluster
|
||||
//! @{
|
||||
|
||||
/** @example kmeans.cpp
|
||||
An example on K-means clustering
|
||||
/** @example samples/cpp/kmeans.cpp
|
||||
An example on K-means clustering
|
||||
*/
|
||||
|
||||
/** @brief Finds centers of clusters and groups input samples around the clusters.
|
||||
|
||||
The function kmeans implements a k-means algorithm that finds the centers of cluster_count clusters
|
||||
and groups the input samples around the clusters. As an output, \f$\texttt{labels}_i\f$ contains a
|
||||
and groups the input samples around the clusters. As an output, \f$\texttt{bestLabels}_i\f$ contains a
|
||||
0-based cluster index for the sample stored in the \f$i^{th}\f$ row of the samples matrix.
|
||||
|
||||
@note
|
||||
@@ -2976,7 +2988,7 @@ function parameter).
|
||||
after every attempt. The best (minimum) value is chosen and the corresponding labels and the
|
||||
compactness value are returned by the function. Basically, you can use only the core of the
|
||||
function, set the number of attempts to 1, initialize labels each time using a custom algorithm,
|
||||
pass them with the ( flags = KMEANS_USE_INITIAL_LABELS ) flag, and then choose the best
|
||||
pass them with the ( flags = #KMEANS_USE_INITIAL_LABELS ) flag, and then choose the best
|
||||
(most-compact) clustering.
|
||||
*/
|
||||
CV_EXPORTS_W double kmeans( InputArray data, int K, InputOutputArray bestLabels,
|
||||
@@ -3003,7 +3015,8 @@ public:
|
||||
class CV_EXPORTS Formatter
|
||||
{
|
||||
public:
|
||||
enum { FMT_DEFAULT = 0,
|
||||
enum FormatType {
|
||||
FMT_DEFAULT = 0,
|
||||
FMT_MATLAB = 1,
|
||||
FMT_CSV = 2,
|
||||
FMT_PYTHON = 3,
|
||||
@@ -3015,11 +3028,12 @@ public:
|
||||
|
||||
virtual Ptr<Formatted> format(const Mat& mtx) const = 0;
|
||||
|
||||
virtual void set16fPrecision(int p = 4) = 0;
|
||||
virtual void set32fPrecision(int p = 8) = 0;
|
||||
virtual void set64fPrecision(int p = 16) = 0;
|
||||
virtual void setMultiline(bool ml = true) = 0;
|
||||
|
||||
static Ptr<Formatter> get(int fmt = FMT_DEFAULT);
|
||||
static Ptr<Formatter> get(Formatter::FormatType fmt = FMT_DEFAULT);
|
||||
|
||||
};
|
||||
|
||||
@@ -3042,7 +3056,7 @@ String& operator << (String& out, const Mat& mtx)
|
||||
|
||||
class CV_EXPORTS Algorithm;
|
||||
|
||||
template<typename _Tp> struct ParamType {};
|
||||
template<typename _Tp, typename _EnumTp = void> struct ParamType {};
|
||||
|
||||
|
||||
/** @brief This is a base class for all more or less complex algorithms in OpenCV
|
||||
@@ -3053,32 +3067,9 @@ matching, graph-cut etc.), background subtraction (which can be done using mixtu
|
||||
models, codebook-based algorithm etc.), optical flow (block matching, Lucas-Kanade, Horn-Schunck
|
||||
etc.).
|
||||
|
||||
Here is example of SIFT use in your application via Algorithm interface:
|
||||
@code
|
||||
#include "opencv2/opencv.hpp"
|
||||
#include "opencv2/xfeatures2d.hpp"
|
||||
using namespace cv::xfeatures2d;
|
||||
|
||||
Ptr<Feature2D> sift = SIFT::create();
|
||||
FileStorage fs("sift_params.xml", FileStorage::READ);
|
||||
if( fs.isOpened() ) // if we have file with parameters, read them
|
||||
{
|
||||
sift->read(fs["sift_params"]);
|
||||
fs.release();
|
||||
}
|
||||
else // else modify the parameters and store them; user can later edit the file to use different parameters
|
||||
{
|
||||
sift->setContrastThreshold(0.01f); // lower the contrast threshold, compared to the default value
|
||||
{
|
||||
WriteStructContext ws(fs, "sift_params", CV_NODE_MAP);
|
||||
sift->write(fs);
|
||||
}
|
||||
}
|
||||
Mat image = imread("myimage.png", 0), descriptors;
|
||||
vector<KeyPoint> keypoints;
|
||||
sift->detectAndCompute(image, noArray(), keypoints, descriptors);
|
||||
@endcode
|
||||
*/
|
||||
Here is example of SimpleBlobDetector use in your application via Algorithm interface:
|
||||
@snippet snippets/core_various.cpp Algorithm
|
||||
*/
|
||||
class CV_EXPORTS_W Algorithm
|
||||
{
|
||||
public:
|
||||
@@ -3091,32 +3082,32 @@ public:
|
||||
|
||||
/** @brief Stores algorithm parameters in a file storage
|
||||
*/
|
||||
virtual void write(FileStorage& fs) const { (void)fs; }
|
||||
virtual void write(FileStorage& fs) const { CV_UNUSED(fs); }
|
||||
|
||||
/** @brief simplified API for language bindings
|
||||
* @overload
|
||||
*/
|
||||
* @overload
|
||||
*/
|
||||
CV_WRAP void write(const Ptr<FileStorage>& fs, const String& name = String()) const;
|
||||
|
||||
/** @brief Reads algorithm parameters from a file storage
|
||||
*/
|
||||
CV_WRAP virtual void read(const FileNode& fn) { (void)fn; }
|
||||
CV_WRAP virtual void read(const FileNode& fn) { CV_UNUSED(fn); }
|
||||
|
||||
/** @brief Returns true if the Algorithm is empty (e.g. in the very beginning or after unsuccessful read
|
||||
*/
|
||||
*/
|
||||
CV_WRAP virtual bool empty() const { return false; }
|
||||
|
||||
/** @brief Reads algorithm from the file node
|
||||
|
||||
This is static template method of Algorithm. It's usage is following (in the case of SVM):
|
||||
@code
|
||||
cv::FileStorage fsRead("example.xml", FileStorage::READ);
|
||||
Ptr<SVM> svm = Algorithm::read<SVM>(fsRead.root());
|
||||
@endcode
|
||||
In order to make this method work, the derived class must overwrite Algorithm::read(const
|
||||
FileNode& fn) and also have static create() method without parameters
|
||||
(or with all the optional parameters)
|
||||
*/
|
||||
This is static template method of Algorithm. It's usage is following (in the case of SVM):
|
||||
@code
|
||||
cv::FileStorage fsRead("example.xml", FileStorage::READ);
|
||||
Ptr<SVM> svm = Algorithm::read<SVM>(fsRead.root());
|
||||
@endcode
|
||||
In order to make this method work, the derived class must overwrite Algorithm::read(const
|
||||
FileNode& fn) and also have static create() method without parameters
|
||||
(or with all the optional parameters)
|
||||
*/
|
||||
template<typename _Tp> static Ptr<_Tp> read(const FileNode& fn)
|
||||
{
|
||||
Ptr<_Tp> obj = _Tp::create();
|
||||
@@ -3126,16 +3117,16 @@ public:
|
||||
|
||||
/** @brief Loads algorithm from the file
|
||||
|
||||
@param filename Name of the file to read.
|
||||
@param objname The optional name of the node to read (if empty, the first top-level node will be used)
|
||||
@param filename Name of the file to read.
|
||||
@param objname The optional name of the node to read (if empty, the first top-level node will be used)
|
||||
|
||||
This is static template method of Algorithm. It's usage is following (in the case of SVM):
|
||||
@code
|
||||
Ptr<SVM> svm = Algorithm::load<SVM>("my_svm_model.xml");
|
||||
@endcode
|
||||
In order to make this method work, the derived class must overwrite Algorithm::read(const
|
||||
FileNode& fn).
|
||||
*/
|
||||
This is static template method of Algorithm. It's usage is following (in the case of SVM):
|
||||
@code
|
||||
Ptr<SVM> svm = Algorithm::load<SVM>("my_svm_model.xml");
|
||||
@endcode
|
||||
In order to make this method work, the derived class must overwrite Algorithm::read(const
|
||||
FileNode& fn).
|
||||
*/
|
||||
template<typename _Tp> static Ptr<_Tp> load(const String& filename, const String& objname=String())
|
||||
{
|
||||
FileStorage fs(filename, FileStorage::READ);
|
||||
@@ -3149,14 +3140,14 @@ public:
|
||||
|
||||
/** @brief Loads algorithm from a String
|
||||
|
||||
@param strModel The string variable containing the model you want to load.
|
||||
@param objname The optional name of the node to read (if empty, the first top-level node will be used)
|
||||
@param strModel The string variable containing the model you want to load.
|
||||
@param objname The optional name of the node to read (if empty, the first top-level node will be used)
|
||||
|
||||
This is static template method of Algorithm. It's usage is following (in the case of SVM):
|
||||
@code
|
||||
Ptr<SVM> svm = Algorithm::loadFromString<SVM>(myStringModel);
|
||||
@endcode
|
||||
*/
|
||||
This is static template method of Algorithm. It's usage is following (in the case of SVM):
|
||||
@code
|
||||
Ptr<SVM> svm = Algorithm::loadFromString<SVM>(myStringModel);
|
||||
@endcode
|
||||
*/
|
||||
template<typename _Tp> static Ptr<_Tp> loadFromString(const String& strModel, const String& objname=String())
|
||||
{
|
||||
FileStorage fs(strModel, FileStorage::READ + FileStorage::MEMORY);
|
||||
@@ -3167,20 +3158,20 @@ public:
|
||||
}
|
||||
|
||||
/** Saves the algorithm to a file.
|
||||
In order to make this method work, the derived class must implement Algorithm::write(FileStorage& fs). */
|
||||
In order to make this method work, the derived class must implement Algorithm::write(FileStorage& fs). */
|
||||
CV_WRAP virtual void save(const String& filename) const;
|
||||
|
||||
/** Returns the algorithm string identifier.
|
||||
This string is used as top level xml/yml node tag when the object is saved to a file or string. */
|
||||
This string is used as top level xml/yml node tag when the object is saved to a file or string. */
|
||||
CV_WRAP virtual String getDefaultName() const;
|
||||
|
||||
protected:
|
||||
void writeFormat(FileStorage& fs) const;
|
||||
};
|
||||
|
||||
struct Param {
|
||||
enum { INT=0, BOOLEAN=1, REAL=2, STRING=3, MAT=4, MAT_VECTOR=5, ALGORITHM=6, FLOAT=7,
|
||||
UNSIGNED_INT=8, UINT64=9, UCHAR=11 };
|
||||
enum struct Param {
|
||||
INT=0, BOOLEAN=1, REAL=2, STRING=3, MAT=4, MAT_VECTOR=5, ALGORITHM=6, FLOAT=7,
|
||||
UNSIGNED_INT=8, UINT64=9, UCHAR=11, SCALAR=12
|
||||
};
|
||||
|
||||
|
||||
@@ -3190,7 +3181,7 @@ template<> struct ParamType<bool>
|
||||
typedef bool const_param_type;
|
||||
typedef bool member_type;
|
||||
|
||||
enum { type = Param::BOOLEAN };
|
||||
static const Param type = Param::BOOLEAN;
|
||||
};
|
||||
|
||||
template<> struct ParamType<int>
|
||||
@@ -3198,7 +3189,7 @@ template<> struct ParamType<int>
|
||||
typedef int const_param_type;
|
||||
typedef int member_type;
|
||||
|
||||
enum { type = Param::INT };
|
||||
static const Param type = Param::INT;
|
||||
};
|
||||
|
||||
template<> struct ParamType<double>
|
||||
@@ -3206,7 +3197,7 @@ template<> struct ParamType<double>
|
||||
typedef double const_param_type;
|
||||
typedef double member_type;
|
||||
|
||||
enum { type = Param::REAL };
|
||||
static const Param type = Param::REAL;
|
||||
};
|
||||
|
||||
template<> struct ParamType<String>
|
||||
@@ -3214,7 +3205,7 @@ template<> struct ParamType<String>
|
||||
typedef const String& const_param_type;
|
||||
typedef String member_type;
|
||||
|
||||
enum { type = Param::STRING };
|
||||
static const Param type = Param::STRING;
|
||||
};
|
||||
|
||||
template<> struct ParamType<Mat>
|
||||
@@ -3222,7 +3213,7 @@ template<> struct ParamType<Mat>
|
||||
typedef const Mat& const_param_type;
|
||||
typedef Mat member_type;
|
||||
|
||||
enum { type = Param::MAT };
|
||||
static const Param type = Param::MAT;
|
||||
};
|
||||
|
||||
template<> struct ParamType<std::vector<Mat> >
|
||||
@@ -3230,7 +3221,7 @@ template<> struct ParamType<std::vector<Mat> >
|
||||
typedef const std::vector<Mat>& const_param_type;
|
||||
typedef std::vector<Mat> member_type;
|
||||
|
||||
enum { type = Param::MAT_VECTOR };
|
||||
static const Param type = Param::MAT_VECTOR;
|
||||
};
|
||||
|
||||
template<> struct ParamType<Algorithm>
|
||||
@@ -3238,7 +3229,7 @@ template<> struct ParamType<Algorithm>
|
||||
typedef const Ptr<Algorithm>& const_param_type;
|
||||
typedef Ptr<Algorithm> member_type;
|
||||
|
||||
enum { type = Param::ALGORITHM };
|
||||
static const Param type = Param::ALGORITHM;
|
||||
};
|
||||
|
||||
template<> struct ParamType<float>
|
||||
@@ -3246,7 +3237,7 @@ template<> struct ParamType<float>
|
||||
typedef float const_param_type;
|
||||
typedef float member_type;
|
||||
|
||||
enum { type = Param::FLOAT };
|
||||
static const Param type = Param::FLOAT;
|
||||
};
|
||||
|
||||
template<> struct ParamType<unsigned>
|
||||
@@ -3254,7 +3245,7 @@ template<> struct ParamType<unsigned>
|
||||
typedef unsigned const_param_type;
|
||||
typedef unsigned member_type;
|
||||
|
||||
enum { type = Param::UNSIGNED_INT };
|
||||
static const Param type = Param::UNSIGNED_INT;
|
||||
};
|
||||
|
||||
template<> struct ParamType<uint64>
|
||||
@@ -3262,7 +3253,7 @@ template<> struct ParamType<uint64>
|
||||
typedef uint64 const_param_type;
|
||||
typedef uint64 member_type;
|
||||
|
||||
enum { type = Param::UINT64 };
|
||||
static const Param type = Param::UINT64;
|
||||
};
|
||||
|
||||
template<> struct ParamType<uchar>
|
||||
@@ -3270,7 +3261,24 @@ template<> struct ParamType<uchar>
|
||||
typedef uchar const_param_type;
|
||||
typedef uchar member_type;
|
||||
|
||||
enum { type = Param::UCHAR };
|
||||
static const Param type = Param::UCHAR;
|
||||
};
|
||||
|
||||
template<> struct ParamType<Scalar>
|
||||
{
|
||||
typedef const Scalar& const_param_type;
|
||||
typedef Scalar member_type;
|
||||
|
||||
static const Param type = Param::SCALAR;
|
||||
};
|
||||
|
||||
template<typename _Tp>
|
||||
struct ParamType<_Tp, typename std::enable_if< std::is_enum<_Tp>::value >::type>
|
||||
{
|
||||
typedef typename std::underlying_type<_Tp>::type const_param_type;
|
||||
typedef typename std::underlying_type<_Tp>::type member_type;
|
||||
|
||||
static const Param type = Param::INT;
|
||||
};
|
||||
|
||||
//! @} core_basic
|
||||
@@ -55,7 +55,72 @@ namespace cv
|
||||
//! @{
|
||||
|
||||
/** @brief Affine transform
|
||||
@todo document
|
||||
*
|
||||
* It represents a 4x4 homogeneous transformation matrix \f$T\f$
|
||||
*
|
||||
* \f[T =
|
||||
* \begin{bmatrix}
|
||||
* R & t\\
|
||||
* 0 & 1\\
|
||||
* \end{bmatrix}
|
||||
* \f]
|
||||
*
|
||||
* where \f$R\f$ is a 3x3 rotation matrix and \f$t\f$ is a 3x1 translation vector.
|
||||
*
|
||||
* You can specify \f$R\f$ either by a 3x3 rotation matrix or by a 3x1 rotation vector,
|
||||
* which is converted to a 3x3 rotation matrix by the Rodrigues formula.
|
||||
*
|
||||
* To construct a matrix \f$T\f$ representing first rotation around the axis \f$r\f$ with rotation
|
||||
* angle \f$|r|\f$ in radian (right hand rule) and then translation by the vector \f$t\f$, you can use
|
||||
*
|
||||
* @code
|
||||
* cv::Vec3f r, t;
|
||||
* cv::Affine3f T(r, t);
|
||||
* @endcode
|
||||
*
|
||||
* If you already have the rotation matrix \f$R\f$, then you can use
|
||||
*
|
||||
* @code
|
||||
* cv::Matx33f R;
|
||||
* cv::Affine3f T(R, t);
|
||||
* @endcode
|
||||
*
|
||||
* To extract the rotation matrix \f$R\f$ from \f$T\f$, use
|
||||
*
|
||||
* @code
|
||||
* cv::Matx33f R = T.rotation();
|
||||
* @endcode
|
||||
*
|
||||
* To extract the translation vector \f$t\f$ from \f$T\f$, use
|
||||
*
|
||||
* @code
|
||||
* cv::Vec3f t = T.translation();
|
||||
* @endcode
|
||||
*
|
||||
* To extract the rotation vector \f$r\f$ from \f$T\f$, use
|
||||
*
|
||||
* @code
|
||||
* cv::Vec3f r = T.rvec();
|
||||
* @endcode
|
||||
*
|
||||
* Note that since the mapping from rotation vectors to rotation matrices
|
||||
* is many to one. The returned rotation vector is not necessarily the one
|
||||
* you used before to set the matrix.
|
||||
*
|
||||
* If you have two transformations \f$T = T_1 * T_2\f$, use
|
||||
*
|
||||
* @code
|
||||
* cv::Affine3f T, T1, T2;
|
||||
* T = T2.concatenate(T1);
|
||||
* @endcode
|
||||
*
|
||||
* To get the inverse transform of \f$T\f$, use
|
||||
*
|
||||
* @code
|
||||
* cv::Affine3f T, T_inv;
|
||||
* T_inv = T.inv();
|
||||
* @endcode
|
||||
*
|
||||
*/
|
||||
template<typename T>
|
||||
class Affine3
|
||||
@@ -66,45 +131,127 @@ namespace cv
|
||||
typedef Matx<float_type, 4, 4> Mat4;
|
||||
typedef Vec<float_type, 3> Vec3;
|
||||
|
||||
//! Default constructor. It represents a 4x4 identity matrix.
|
||||
Affine3();
|
||||
|
||||
//! Augmented affine matrix
|
||||
Affine3(const Mat4& affine);
|
||||
|
||||
//! Rotation matrix
|
||||
/**
|
||||
* The resulting 4x4 matrix is
|
||||
*
|
||||
* \f[
|
||||
* \begin{bmatrix}
|
||||
* R & t\\
|
||||
* 0 & 1\\
|
||||
* \end{bmatrix}
|
||||
* \f]
|
||||
*
|
||||
* @param R 3x3 rotation matrix.
|
||||
* @param t 3x1 translation vector.
|
||||
*/
|
||||
Affine3(const Mat3& R, const Vec3& t = Vec3::all(0));
|
||||
|
||||
//! Rodrigues vector
|
||||
/**
|
||||
* Rodrigues vector.
|
||||
*
|
||||
* The last row of the current matrix is set to [0,0,0,1].
|
||||
*
|
||||
* @param rvec 3x1 rotation vector. Its direction indicates the rotation axis and its length
|
||||
* indicates the rotation angle in radian (using right hand rule).
|
||||
* @param t 3x1 translation vector.
|
||||
*/
|
||||
Affine3(const Vec3& rvec, const Vec3& t = Vec3::all(0));
|
||||
|
||||
//! Combines all contructors above. Supports 4x4, 4x3, 3x3, 1x3, 3x1 sizes of data matrix
|
||||
/**
|
||||
* Combines all constructors above. Supports 4x4, 3x4, 3x3, 1x3, 3x1 sizes of data matrix.
|
||||
*
|
||||
* The last row of the current matrix is set to [0,0,0,1] when data is not 4x4.
|
||||
*
|
||||
* @param data 1-channel matrix.
|
||||
* when it is 4x4, it is copied to the current matrix and t is not used.
|
||||
* When it is 3x4, it is copied to the upper part 3x4 of the current matrix and t is not used.
|
||||
* When it is 3x3, it is copied to the upper left 3x3 part of the current matrix.
|
||||
* When it is 3x1 or 1x3, it is treated as a rotation vector and the Rodrigues formula is used
|
||||
* to compute a 3x3 rotation matrix.
|
||||
* @param t 3x1 translation vector. It is used only when data is neither 4x4 nor 3x4.
|
||||
*/
|
||||
explicit Affine3(const Mat& data, const Vec3& t = Vec3::all(0));
|
||||
|
||||
//! From 16th element array
|
||||
//! From 16-element array
|
||||
explicit Affine3(const float_type* vals);
|
||||
|
||||
//! Create identity transform
|
||||
//! Create an 4x4 identity transform
|
||||
static Affine3 Identity();
|
||||
|
||||
//! Rotation matrix
|
||||
/**
|
||||
* Rotation matrix.
|
||||
*
|
||||
* Copy the rotation matrix to the upper left 3x3 part of the current matrix.
|
||||
* The remaining elements of the current matrix are not changed.
|
||||
*
|
||||
* @param R 3x3 rotation matrix.
|
||||
*
|
||||
*/
|
||||
void rotation(const Mat3& R);
|
||||
|
||||
//! Rodrigues vector
|
||||
/**
|
||||
* Rodrigues vector.
|
||||
*
|
||||
* It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
|
||||
*
|
||||
* @param rvec 3x1 rotation vector. The direction indicates the rotation axis and
|
||||
* its length indicates the rotation angle in radian (using the right thumb convention).
|
||||
*/
|
||||
void rotation(const Vec3& rvec);
|
||||
|
||||
//! Combines rotation methods above. Suports 3x3, 1x3, 3x1 sizes of data matrix;
|
||||
/**
|
||||
* Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix.
|
||||
*
|
||||
* It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
|
||||
*
|
||||
* @param data 1-channel matrix.
|
||||
* When it is a 3x3 matrix, it sets the upper left 3x3 part of the current matrix.
|
||||
* When it is a 1x3 or 3x1 matrix, it is used as a rotation vector. The Rodrigues formula
|
||||
* is used to compute the rotation matrix and sets the upper left 3x3 part of the current matrix.
|
||||
*/
|
||||
void rotation(const Mat& data);
|
||||
|
||||
/**
|
||||
* Copy the 3x3 matrix L to the upper left part of the current matrix
|
||||
*
|
||||
* It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
|
||||
*
|
||||
* @param L 3x3 matrix.
|
||||
*/
|
||||
void linear(const Mat3& L);
|
||||
|
||||
/**
|
||||
* Copy t to the first three elements of the last column of the current matrix
|
||||
*
|
||||
* It sets the upper right 3x1 part of the matrix. The remaining part is unaffected.
|
||||
*
|
||||
* @param t 3x1 translation vector.
|
||||
*/
|
||||
void translation(const Vec3& t);
|
||||
|
||||
//! @return the upper left 3x3 part
|
||||
Mat3 rotation() const;
|
||||
|
||||
//! @return the upper left 3x3 part
|
||||
Mat3 linear() const;
|
||||
|
||||
//! @return the upper right 3x1 part
|
||||
Vec3 translation() const;
|
||||
|
||||
//! Rodrigues vector
|
||||
//! Rodrigues vector.
|
||||
//! @return a vector representing the upper left 3x3 rotation matrix of the current matrix.
|
||||
//! @warning Since the mapping between rotation vectors and rotation matrices is many to one,
|
||||
//! this function returns only one rotation vector that represents the current rotation matrix,
|
||||
//! which is not necessarily the same one set by `rotation(const Vec3& rvec)`.
|
||||
Vec3 rvec() const;
|
||||
|
||||
//! @return the inverse of the current matrix.
|
||||
Affine3 inv(int method = cv::DECOMP_SVD) const;
|
||||
|
||||
//! a.rotate(R) is equivalent to Affine(R, 0) * a;
|
||||
@@ -113,7 +260,7 @@ namespace cv
|
||||
//! a.rotate(rvec) is equivalent to Affine(rvec, 0) * a;
|
||||
Affine3 rotate(const Vec3& rvec) const;
|
||||
|
||||
//! a.translate(t) is equivalent to Affine(E, t) * a;
|
||||
//! a.translate(t) is equivalent to Affine(E, t) * a, where E is an identity matrix
|
||||
Affine3 translate(const Vec3& t) const;
|
||||
|
||||
//! a.concatenate(affine) is equivalent to affine * a;
|
||||
@@ -136,6 +283,7 @@ namespace cv
|
||||
template<typename T> static
|
||||
Affine3<T> operator*(const Affine3<T>& affine1, const Affine3<T>& affine2);
|
||||
|
||||
//! V is a 3-element vector with member fields x, y and z
|
||||
template<typename T, typename V> static
|
||||
V operator*(const Affine3<T>& affine, const V& vector);
|
||||
|
||||
@@ -178,7 +326,7 @@ namespace cv
|
||||
//! @cond IGNORED
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
// Implementaiton
|
||||
// Implementation
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T>::Affine3()
|
||||
@@ -212,6 +360,7 @@ template<typename T> inline
|
||||
cv::Affine3<T>::Affine3(const cv::Mat& data, const Vec3& t)
|
||||
{
|
||||
CV_Assert(data.type() == cv::traits::Type<T>::value);
|
||||
CV_Assert(data.channels() == 1);
|
||||
|
||||
if (data.cols == 4 && data.rows == 4)
|
||||
{
|
||||
@@ -276,11 +425,12 @@ void cv::Affine3<T>::rotation(const Vec3& _rvec)
|
||||
}
|
||||
}
|
||||
|
||||
//Combines rotation methods above. Suports 3x3, 1x3, 3x1 sizes of data matrix;
|
||||
//Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix;
|
||||
template<typename T> inline
|
||||
void cv::Affine3<T>::rotation(const cv::Mat& data)
|
||||
{
|
||||
CV_Assert(data.type() == cv::traits::Type<T>::value);
|
||||
CV_Assert(data.channels() == 1);
|
||||
|
||||
if (data.cols == 3 && data.rows == 3)
|
||||
{
|
||||
@@ -295,7 +445,7 @@ void cv::Affine3<T>::rotation(const cv::Mat& data)
|
||||
rotation(_rvec);
|
||||
}
|
||||
else
|
||||
CV_Assert(!"Input marix can be 3x3, 1x3 or 3x1");
|
||||
CV_Error(Error::StsError, "Input matrix can only be 3x3, 1x3 or 3x1");
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
@@ -271,7 +271,7 @@ enum BorderTypes {
|
||||
BORDER_REFLECT = 2, //!< `fedcba|abcdefgh|hgfedcb`
|
||||
BORDER_WRAP = 3, //!< `cdefgh|abcdefgh|abcdefg`
|
||||
BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba`
|
||||
BORDER_TRANSPARENT = 5, //!< `uvwxyz|absdefgh|ijklmno`
|
||||
BORDER_TRANSPARENT = 5, //!< `uvwxyz|abcdefgh|ijklmno`
|
||||
|
||||
BORDER_REFLECT101 = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
|
||||
BORDER_DEFAULT = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
|
||||
@@ -283,84 +283,6 @@ enum BorderTypes {
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
//////////////// static assert /////////////////
|
||||
#define CVAUX_CONCAT_EXP(a, b) a##b
|
||||
#define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b)
|
||||
|
||||
#if defined(__clang__)
|
||||
# ifndef __has_extension
|
||||
# define __has_extension __has_feature /* compatibility, for older versions of clang */
|
||||
# endif
|
||||
# if __has_extension(cxx_static_assert)
|
||||
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
|
||||
# elif __has_extension(c_static_assert)
|
||||
# define CV_StaticAssert(condition, reason) _Static_assert((condition), reason " " #condition)
|
||||
# endif
|
||||
#elif defined(__GNUC__)
|
||||
# if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
|
||||
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
|
||||
# endif
|
||||
#elif defined(_MSC_VER)
|
||||
# if _MSC_VER >= 1600 /* MSVC 10 */
|
||||
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
|
||||
# endif
|
||||
#endif
|
||||
#ifndef CV_StaticAssert
|
||||
# if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302)
|
||||
# define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); })
|
||||
# else
|
||||
template <bool x> struct CV_StaticAssert_failed;
|
||||
template <> struct CV_StaticAssert_failed<true> { enum { val = 1 }; };
|
||||
template<int x> struct CV_StaticAssert_test {};
|
||||
# define CV_StaticAssert(condition, reason)\
|
||||
typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast<bool>(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Suppress warning "-Wdeprecated-declarations" / C4996
|
||||
#if defined(_MSC_VER)
|
||||
#define CV_DO_PRAGMA(x) __pragma(x)
|
||||
#elif defined(__GNUC__)
|
||||
#define CV_DO_PRAGMA(x) _Pragma (#x)
|
||||
#else
|
||||
#define CV_DO_PRAGMA(x)
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define CV_SUPPRESS_DEPRECATED_START \
|
||||
CV_DO_PRAGMA(warning(push)) \
|
||||
CV_DO_PRAGMA(warning(disable: 4996))
|
||||
#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop))
|
||||
#elif defined (__clang__) || ((__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 405))
|
||||
#define CV_SUPPRESS_DEPRECATED_START \
|
||||
CV_DO_PRAGMA(GCC diagnostic push) \
|
||||
CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
|
||||
#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop)
|
||||
#else
|
||||
#define CV_SUPPRESS_DEPRECATED_START
|
||||
#define CV_SUPPRESS_DEPRECATED_END
|
||||
#endif
|
||||
|
||||
#define CV_UNUSED(name) (void)name
|
||||
|
||||
#if defined __GNUC__ && !defined __EXCEPTIONS
|
||||
#define CV_TRY
|
||||
#define CV_CATCH(A, B) for (A B; false; )
|
||||
#define CV_CATCH_ALL if (false)
|
||||
#define CV_THROW(A) abort()
|
||||
#define CV_RETHROW() abort()
|
||||
#else
|
||||
#define CV_TRY try
|
||||
#define CV_CATCH(A, B) catch(const A & B)
|
||||
#define CV_CATCH_ALL catch(...)
|
||||
#define CV_THROW(A) throw A
|
||||
#define CV_RETHROW() throw
|
||||
#endif
|
||||
|
||||
//! @endcond
|
||||
|
||||
/*! @brief Signals an error and raises the exception.
|
||||
|
||||
By default the function prints information about the error to stderr,
|
||||
@@ -371,51 +293,17 @@ It is possible to alternate error processing by using redirectError().
|
||||
@param _func - function name. Available only when the compiler supports getting it
|
||||
@param _file - source file name where the error has occurred
|
||||
@param _line - line number in the source file where the error has occurred
|
||||
@see CV_Error, CV_Error_, CV_ErrorNoReturn, CV_ErrorNoReturn_, CV_Assert, CV_DbgAssert
|
||||
@see CV_Error, CV_Error_, CV_Assert, CV_DbgAssert
|
||||
*/
|
||||
CV_EXPORTS void error(int _code, const String& _err, const char* _func, const char* _file, int _line);
|
||||
|
||||
#ifdef __GNUC__
|
||||
# if defined __clang__ || defined __APPLE__
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC diagnostic ignored "-Winvalid-noreturn"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/** same as cv::error, but does not return */
|
||||
CV_INLINE CV_NORETURN void errorNoReturn(int _code, const String& _err, const char* _func, const char* _file, int _line)
|
||||
{
|
||||
error(_code, _err, _func, _file, _line);
|
||||
#ifdef __GNUC__
|
||||
# if !defined __clang__ && !defined __APPLE__
|
||||
// this suppresses this warning: "noreturn" function does return [enabled by default]
|
||||
__builtin_trap();
|
||||
// or use infinite loop: for (;;) {}
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
#ifdef __GNUC__
|
||||
# if defined __clang__ || defined __APPLE__
|
||||
# pragma GCC diagnostic pop
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__
|
||||
#define CV_Func __func__
|
||||
#elif defined _MSC_VER
|
||||
#define CV_Func __FUNCTION__
|
||||
#else
|
||||
#define CV_Func ""
|
||||
#endif
|
||||
CV_EXPORTS CV_NORETURN void error(int _code, const String& _err, const char* _func, const char* _file, int _line);
|
||||
|
||||
#ifdef CV_STATIC_ANALYSIS
|
||||
|
||||
// In practice, some macro are not processed correctly (noreturn is not detected).
|
||||
// We need to use simplified definition for them.
|
||||
#define CV_Error(...) do { abort(); } while (0)
|
||||
#define CV_Error_(...) do { abort(); } while (0)
|
||||
#define CV_Assert(cond) do { if (!(cond)) abort(); } while (0)
|
||||
#define CV_ErrorNoReturn(...) do { abort(); } while (0)
|
||||
#define CV_ErrorNoReturn_(...) do { abort(); } while (0)
|
||||
#define CV_Error_( code, args ) do { cv::format args; abort(); } while (0)
|
||||
#define CV_Assert( expr ) do { if (!(expr)) abort(); } while (0)
|
||||
|
||||
#else // CV_STATIC_ANALYSIS
|
||||
|
||||
@@ -437,7 +325,7 @@ This macro can be used to construct an error message on-fly to include some dyna
|
||||
for example:
|
||||
@code
|
||||
// note the extra parentheses around the formatted text message
|
||||
CV_Error_( CV_StsOutOfRange,
|
||||
CV_Error_(Error::StsOutOfRange,
|
||||
("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue));
|
||||
@endcode
|
||||
@param code one of Error::Code
|
||||
@@ -451,35 +339,39 @@ The macros CV_Assert (and CV_DbgAssert(expr)) evaluate the specified expression.
|
||||
raise an error (see cv::error). The macro CV_Assert checks the condition in both Debug and Release
|
||||
configurations while CV_DbgAssert is only retained in the Debug configuration.
|
||||
*/
|
||||
|
||||
#define CV_VA_NUM_ARGS_HELPER(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N
|
||||
#define CV_VA_NUM_ARGS(...) CV_VA_NUM_ARGS_HELPER(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
|
||||
|
||||
#define CV_Assert_1( expr ) if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ )
|
||||
#define CV_Assert_2( expr1, expr2 ) CV_Assert_1(expr1); CV_Assert_1(expr2)
|
||||
#define CV_Assert_3( expr1, expr2, expr3 ) CV_Assert_2(expr1, expr2); CV_Assert_1(expr3)
|
||||
#define CV_Assert_4( expr1, expr2, expr3, expr4 ) CV_Assert_3(expr1, expr2, expr3); CV_Assert_1(expr4)
|
||||
#define CV_Assert_5( expr1, expr2, expr3, expr4, expr5 ) CV_Assert_4(expr1, expr2, expr3, expr4); CV_Assert_1(expr5)
|
||||
#define CV_Assert_6( expr1, expr2, expr3, expr4, expr5, expr6 ) CV_Assert_5(expr1, expr2, expr3, expr4, expr5); CV_Assert_1(expr6)
|
||||
#define CV_Assert_7( expr1, expr2, expr3, expr4, expr5, expr6, expr7 ) CV_Assert_6(expr1, expr2, expr3, expr4, expr5, expr6 ); CV_Assert_1(expr7)
|
||||
#define CV_Assert_8( expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8 ) CV_Assert_7(expr1, expr2, expr3, expr4, expr5, expr6, expr7 ); CV_Assert_1(expr8)
|
||||
#define CV_Assert_9( expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9 ) CV_Assert_8(expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8 ); CV_Assert_1(expr9)
|
||||
#define CV_Assert_10( expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9, expr10 ) CV_Assert_9(expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9 ); CV_Assert_1(expr10)
|
||||
|
||||
#define CV_Assert(...) CVAUX_CONCAT(CV_Assert_, CV_VA_NUM_ARGS(__VA_ARGS__)) (__VA_ARGS__)
|
||||
|
||||
/** same as CV_Error(code,msg), but does not return */
|
||||
#define CV_ErrorNoReturn( code, msg ) cv::errorNoReturn( code, msg, CV_Func, __FILE__, __LINE__ )
|
||||
|
||||
/** same as CV_Error_(code,args), but does not return */
|
||||
#define CV_ErrorNoReturn_( code, args ) cv::errorNoReturn( code, cv::format args, CV_Func, __FILE__, __LINE__ )
|
||||
#define CV_Assert( expr ) do { if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ ); } while(0)
|
||||
|
||||
#endif // CV_STATIC_ANALYSIS
|
||||
|
||||
/** replaced with CV_Assert(expr) in Debug configuration */
|
||||
#ifdef _DEBUG
|
||||
//! @cond IGNORED
|
||||
#if !defined(__OPENCV_BUILD) // TODO: backward compatibility only
|
||||
#ifndef CV_ErrorNoReturn
|
||||
#define CV_ErrorNoReturn CV_Error
|
||||
#endif
|
||||
#ifndef CV_ErrorNoReturn_
|
||||
#define CV_ErrorNoReturn_ CV_Error_
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define CV_Assert_1 CV_Assert
|
||||
#define CV_Assert_2( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_1( __VA_ARGS__ ))
|
||||
#define CV_Assert_3( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_2( __VA_ARGS__ ))
|
||||
#define CV_Assert_4( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_3( __VA_ARGS__ ))
|
||||
#define CV_Assert_5( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_4( __VA_ARGS__ ))
|
||||
#define CV_Assert_6( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_5( __VA_ARGS__ ))
|
||||
#define CV_Assert_7( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_6( __VA_ARGS__ ))
|
||||
#define CV_Assert_8( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_7( __VA_ARGS__ ))
|
||||
#define CV_Assert_9( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_8( __VA_ARGS__ ))
|
||||
#define CV_Assert_10( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_9( __VA_ARGS__ ))
|
||||
|
||||
#define CV_Assert_N(...) do { __CV_EXPAND(__CV_CAT(CV_Assert_, __CV_VA_NUM_ARGS(__VA_ARGS__)) (__VA_ARGS__)); } while(0)
|
||||
|
||||
//! @endcond
|
||||
|
||||
#if defined _DEBUG || defined CV_STATIC_ANALYSIS
|
||||
# define CV_DbgAssert(expr) CV_Assert(expr)
|
||||
#else
|
||||
/** replaced with CV_Assert(expr) in Debug configuration */
|
||||
# define CV_DbgAssert(expr)
|
||||
#endif
|
||||
|
||||
@@ -489,7 +381,7 @@ configurations while CV_DbgAssert is only retained in the Debug configuration.
|
||||
*/
|
||||
struct CV_EXPORTS Hamming
|
||||
{
|
||||
enum { normType = NORM_HAMMING };
|
||||
static const NormTypes normType = NORM_HAMMING;
|
||||
typedef unsigned char ValueType;
|
||||
typedef int ResultType;
|
||||
|
||||
@@ -726,11 +618,7 @@ namespace cudev
|
||||
|
||||
namespace ipp
|
||||
{
|
||||
#if OPENCV_ABI_COMPATIBILITY > 300
|
||||
CV_EXPORTS unsigned long long getIppFeatures();
|
||||
#else
|
||||
CV_EXPORTS int getIppFeatures();
|
||||
#endif
|
||||
CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL,
|
||||
int line = 0);
|
||||
CV_EXPORTS int getIppStatus();
|
||||
@@ -740,9 +628,13 @@ CV_EXPORTS_W void setUseIPP(bool flag);
|
||||
CV_EXPORTS_W String getIppVersion();
|
||||
|
||||
// IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results
|
||||
// but have internal accuracy differences which have to much direct or indirect impact on accuracy tests.
|
||||
CV_EXPORTS_W bool useIPP_NE();
|
||||
CV_EXPORTS_W void setUseIPP_NE(bool flag);
|
||||
// but have internal accuracy differences which have too much direct or indirect impact on accuracy tests.
|
||||
CV_EXPORTS_W bool useIPP_NotExact();
|
||||
CV_EXPORTS_W void setUseIPP_NotExact(bool flag);
|
||||
#ifndef DISABLE_OPENCV_3_COMPATIBILITY
|
||||
static inline bool useIPP_NE() { return useIPP_NotExact(); }
|
||||
static inline void setUseIPP_NE(bool flag) { setUseIPP_NotExact(flag); }
|
||||
#endif
|
||||
|
||||
} // ipp
|
||||
|
||||
@@ -757,5 +649,6 @@ CV_EXPORTS_W void setUseIPP_NE(bool flag);
|
||||
|
||||
#include "opencv2/core/neon_utils.hpp"
|
||||
#include "opencv2/core/vsx_utils.hpp"
|
||||
#include "opencv2/core/check.hpp"
|
||||
|
||||
#endif //OPENCV_CORE_BASE_HPP
|
||||
23
lib/3rdParty/OpenCV/include/opencv2/core/bindings_utils.hpp
vendored
Normal file
23
lib/3rdParty/OpenCV/include/opencv2/core/bindings_utils.hpp
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_CORE_BINDINGS_UTILS_HPP
|
||||
#define OPENCV_CORE_BINDINGS_UTILS_HPP
|
||||
|
||||
namespace cv { namespace utils {
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
CV_EXPORTS_W String dumpInputArray(InputArray argument);
|
||||
|
||||
CV_EXPORTS_W String dumpInputArrayOfArrays(InputArrayOfArrays argument);
|
||||
|
||||
CV_EXPORTS_W String dumpInputOutputArray(InputOutputArray argument);
|
||||
|
||||
CV_EXPORTS_W String dumpInputOutputArrayOfArrays(InputOutputArrayOfArrays argument);
|
||||
|
||||
//! @}
|
||||
}} // namespace
|
||||
|
||||
#endif // OPENCV_CORE_BINDINGS_UTILS_HPP
|
||||
159
lib/3rdParty/OpenCV/include/opencv2/core/check.hpp
vendored
Normal file
159
lib/3rdParty/OpenCV/include/opencv2/core/check.hpp
vendored
Normal file
@@ -0,0 +1,159 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_CORE_CHECK_HPP
|
||||
#define OPENCV_CORE_CHECK_HPP
|
||||
|
||||
#include <opencv2/core/base.hpp>
|
||||
|
||||
namespace cv {
|
||||
|
||||
/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or "<invalid depth>" */
|
||||
CV_EXPORTS const char* depthToString(int depth);
|
||||
|
||||
/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or "<invalid type>" */
|
||||
CV_EXPORTS const String typeToString(int type);
|
||||
|
||||
|
||||
//! @cond IGNORED
|
||||
namespace detail {
|
||||
|
||||
/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or NULL */
|
||||
CV_EXPORTS const char* depthToString_(int depth);
|
||||
|
||||
/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or cv::String() */
|
||||
CV_EXPORTS const cv::String typeToString_(int type);
|
||||
|
||||
enum TestOp {
|
||||
TEST_CUSTOM = 0,
|
||||
TEST_EQ = 1,
|
||||
TEST_NE = 2,
|
||||
TEST_LE = 3,
|
||||
TEST_LT = 4,
|
||||
TEST_GE = 5,
|
||||
TEST_GT = 6,
|
||||
CV__LAST_TEST_OP
|
||||
};
|
||||
|
||||
struct CheckContext {
|
||||
const char* func;
|
||||
const char* file;
|
||||
int line;
|
||||
enum TestOp testOp;
|
||||
const char* message;
|
||||
const char* p1_str;
|
||||
const char* p2_str;
|
||||
};
|
||||
|
||||
#ifndef CV__CHECK_FILENAME
|
||||
# define CV__CHECK_FILENAME __FILE__
|
||||
#endif
|
||||
|
||||
#ifndef CV__CHECK_FUNCTION
|
||||
# if defined _MSC_VER
|
||||
# define CV__CHECK_FUNCTION __FUNCSIG__
|
||||
# elif defined __GNUC__
|
||||
# define CV__CHECK_FUNCTION __PRETTY_FUNCTION__
|
||||
# else
|
||||
# define CV__CHECK_FUNCTION "<unknown>"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define CV__CHECK_LOCATION_VARNAME(id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_check_, id), __LINE__)
|
||||
#define CV__DEFINE_CHECK_CONTEXT(id, message, testOp, p1_str, p2_str) \
|
||||
static const cv::detail::CheckContext CV__CHECK_LOCATION_VARNAME(id) = \
|
||||
{ CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, message, p1_str, p2_str }
|
||||
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const int v1, const int v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v1, const size_t v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const float v1, const float v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const double v1, const double v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_<int> v1, const Size_<int> v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v1, const int v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx);
|
||||
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const int v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const float v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const double v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_<int> v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v, const CheckContext& ctx);
|
||||
|
||||
|
||||
#define CV__TEST_EQ(v1, v2) ((v1) == (v2))
|
||||
#define CV__TEST_NE(v1, v2) ((v1) != (v2))
|
||||
#define CV__TEST_LE(v1, v2) ((v1) <= (v2))
|
||||
#define CV__TEST_LT(v1, v2) ((v1) < (v2))
|
||||
#define CV__TEST_GE(v1, v2) ((v1) >= (v2))
|
||||
#define CV__TEST_GT(v1, v2) ((v1) > (v2))
|
||||
|
||||
#define CV__CHECK(id, op, type, v1, v2, v1_str, v2_str, msg_str) do { \
|
||||
if(CV__TEST_##op((v1), (v2))) ; else { \
|
||||
CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_ ## op, v1_str, v2_str); \
|
||||
cv::detail::check_failed_ ## type((v1), (v2), CV__CHECK_LOCATION_VARNAME(id)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CV__CHECK_CUSTOM_TEST(id, type, v, test_expr, v_str, test_expr_str, msg_str) do { \
|
||||
if(!!(test_expr)) ; else { \
|
||||
CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_CUSTOM, v_str, test_expr_str); \
|
||||
cv::detail::check_failed_ ## type((v), CV__CHECK_LOCATION_VARNAME(id)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
} // namespace
|
||||
//! @endcond
|
||||
|
||||
|
||||
/// Supported values of these types: int, float, double
|
||||
#define CV_CheckEQ(v1, v2, msg) CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_CheckNE(v1, v2, msg) CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_CheckLE(v1, v2, msg) CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_CheckLT(v1, v2, msg) CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_CheckGE(v1, v2, msg) CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_CheckGT(v1, v2, msg) CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
|
||||
|
||||
/// Check with additional "decoding" of type values in error message
|
||||
#define CV_CheckTypeEQ(t1, t2, msg) CV__CHECK(_, EQ, MatType, t1, t2, #t1, #t2, msg)
|
||||
/// Check with additional "decoding" of depth values in error message
|
||||
#define CV_CheckDepthEQ(d1, d2, msg) CV__CHECK(_, EQ, MatDepth, d1, d2, #d1, #d2, msg)
|
||||
|
||||
#define CV_CheckChannelsEQ(c1, c2, msg) CV__CHECK(_, EQ, MatChannels, c1, c2, #c1, #c2, msg)
|
||||
|
||||
/// Example: type == CV_8UC1 || type == CV_8UC3
|
||||
#define CV_CheckType(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatType, t, (test_expr), #t, #test_expr, msg)
|
||||
|
||||
/// Example: depth == CV_32F || depth == CV_64F
|
||||
#define CV_CheckDepth(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatDepth, t, (test_expr), #t, #test_expr, msg)
|
||||
|
||||
/// Example: v == A || v == B
|
||||
#define CV_Check(v, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
|
||||
|
||||
/// Some complex conditions: CV_Check(src2, src2.empty() || (src2.type() == src1.type() && src2.size() == src1.size()), "src2 should have same size/type as src1")
|
||||
// TODO define pretty-printers
|
||||
|
||||
#ifndef NDEBUG
|
||||
#define CV_DbgCheck(v, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
|
||||
#define CV_DbgCheckEQ(v1, v2, msg) CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_DbgCheckNE(v1, v2, msg) CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_DbgCheckLE(v1, v2, msg) CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_DbgCheckLT(v1, v2, msg) CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_DbgCheckGE(v1, v2, msg) CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_DbgCheckGT(v1, v2, msg) CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
|
||||
#else
|
||||
#define CV_DbgCheck(v, test_expr, msg) do { } while (0)
|
||||
#define CV_DbgCheckEQ(v1, v2, msg) do { } while (0)
|
||||
#define CV_DbgCheckNE(v1, v2, msg) do { } while (0)
|
||||
#define CV_DbgCheckLE(v1, v2, msg) do { } while (0)
|
||||
#define CV_DbgCheckLT(v1, v2, msg) do { } while (0)
|
||||
#define CV_DbgCheckGE(v1, v2, msg) do { } while (0)
|
||||
#define CV_DbgCheckGT(v1, v2, msg) do { } while (0)
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // OPENCV_CORE_CHECK_HPP
|
||||
@@ -1576,8 +1576,8 @@ CVAPI(void) cvRestoreMemStoragePos( CvMemStorage* storage, CvMemStoragePos* pos
|
||||
CVAPI(void*) cvMemStorageAlloc( CvMemStorage* storage, size_t size );
|
||||
|
||||
/** Allocates string in memory storage */
|
||||
CVAPI(CvString) cvMemStorageAllocString( CvMemStorage* storage, const char* ptr,
|
||||
int len CV_DEFAULT(-1) );
|
||||
//CVAPI(CvString) cvMemStorageAllocString( CvMemStorage* storage, const char* ptr,
|
||||
// int len CV_DEFAULT(-1) );
|
||||
|
||||
/** Creates new empty sequence that will reside in the specified storage */
|
||||
CVAPI(CvSeq*) cvCreateSeq( int seq_flags, size_t header_size,
|
||||
@@ -1788,7 +1788,7 @@ CVAPI(int) cvGraphRemoveVtx( CvGraph* graph, int index );
|
||||
CVAPI(int) cvGraphRemoveVtxByPtr( CvGraph* graph, CvGraphVtx* vtx );
|
||||
|
||||
|
||||
/** Link two vertices specifed by indices or pointers if they
|
||||
/** Link two vertices specified by indices or pointers if they
|
||||
are not connected or return pointer to already existing edge
|
||||
connecting the vertices.
|
||||
Functions return 1 if a new edge was created, 0 otherwise */
|
||||
@@ -1970,6 +1970,7 @@ CVAPI(void) cvSetIPLAllocators( Cv_iplCreateImageHeader create_header,
|
||||
* Data Persistence *
|
||||
\****************************************************************************************/
|
||||
|
||||
#if 0
|
||||
/********************************** High-level functions ********************************/
|
||||
|
||||
/** @brief Opens file storage for reading or writing data.
|
||||
@@ -1978,11 +1979,7 @@ The function opens file storage for reading or writing data. In the latter case,
|
||||
created or an existing file is rewritten. The type of the read or written file is determined by the
|
||||
filename extension: .xml for XML, .yml or .yaml for YAML and .json for JSON.
|
||||
|
||||
At the same time, it also supports adding parameters like "example.xml?base64". The three ways
|
||||
are the same:
|
||||
@snippet samples/cpp/filestorage_base64.cpp suffix_in_file_name
|
||||
@snippet samples/cpp/filestorage_base64.cpp flag_write_base64
|
||||
@snippet samples/cpp/filestorage_base64.cpp flag_write_and_flag_base64
|
||||
At the same time, it also supports adding parameters like "example.xml?base64".
|
||||
|
||||
The function returns a pointer to the CvFileStorage structure.
|
||||
If the file cannot be opened then the function returns NULL.
|
||||
@@ -2206,11 +2203,6 @@ in plain text.
|
||||
|
||||
This function can only be used to write a sequence with a type "binary".
|
||||
|
||||
Consider the following two examples where their output is the same:
|
||||
@snippet samples/cpp/filestorage_base64.cpp without_base64_flag
|
||||
and
|
||||
@snippet samples/cpp/filestorage_base64.cpp with_write_base64_flag
|
||||
|
||||
@param fs File storage
|
||||
@param src Pointer to the written array
|
||||
@param len Number of the array elements to write
|
||||
@@ -2565,10 +2557,12 @@ returns NULL.
|
||||
*/
|
||||
CVAPI(CvTypeInfo*) cvTypeOf( const void* struct_ptr );
|
||||
|
||||
#endif
|
||||
|
||||
/** @brief Releases an object.
|
||||
|
||||
The function finds the type of a given object and calls release with the double pointer.
|
||||
@param struct_ptr Double pointer to the object
|
||||
The function finds the type of a given object and calls release with the double pointer.
|
||||
@param struct_ptr Double pointer to the object
|
||||
*/
|
||||
CVAPI(void) cvRelease( void** struct_ptr );
|
||||
|
||||
@@ -2581,41 +2575,6 @@ function, like cvCloneMat.
|
||||
*/
|
||||
CVAPI(void*) cvClone( const void* struct_ptr );
|
||||
|
||||
/** @brief Saves an object to a file.
|
||||
|
||||
The function saves an object to a file. It provides a simple interface to cvWrite .
|
||||
@param filename File name
|
||||
@param struct_ptr Object to save
|
||||
@param name Optional object name. If it is NULL, the name will be formed from filename .
|
||||
@param comment Optional comment to put in the beginning of the file
|
||||
@param attributes Optional attributes passed to cvWrite
|
||||
*/
|
||||
CVAPI(void) cvSave( const char* filename, const void* struct_ptr,
|
||||
const char* name CV_DEFAULT(NULL),
|
||||
const char* comment CV_DEFAULT(NULL),
|
||||
CvAttrList attributes CV_DEFAULT(cvAttrList()));
|
||||
|
||||
/** @brief Loads an object from a file.
|
||||
|
||||
The function loads an object from a file. It basically reads the specified file, find the first
|
||||
top-level node and calls cvRead for that node. If the file node does not have type information or
|
||||
the type information can not be found by the type name, the function returns NULL. After the object
|
||||
is loaded, the file storage is closed and all the temporary buffers are deleted. Thus, to load a
|
||||
dynamic structure, such as a sequence, contour, or graph, one should pass a valid memory storage
|
||||
destination to the function.
|
||||
@param filename File name
|
||||
@param memstorage Memory storage for dynamic structures, such as CvSeq or CvGraph . It is not used
|
||||
for matrices or images.
|
||||
@param name Optional object name. If it is NULL, the first top-level object in the storage will be
|
||||
loaded.
|
||||
@param real_name Optional output parameter that will contain the name of the loaded object
|
||||
(useful if name=NULL )
|
||||
*/
|
||||
CVAPI(void*) cvLoad( const char* filename,
|
||||
CvMemStorage* memstorage CV_DEFAULT(NULL),
|
||||
const char* name CV_DEFAULT(NULL),
|
||||
const char** real_name CV_DEFAULT(NULL) );
|
||||
|
||||
/*********************************** Measuring Execution Time ***************************/
|
||||
|
||||
/** helper functions for RNG initialization and accurate time measurement:
|
||||
@@ -2648,7 +2607,7 @@ CVAPI(void) cvSetErrStatus( int status );
|
||||
#define CV_ErrModeParent 1 /* Print error and continue */
|
||||
#define CV_ErrModeSilent 2 /* Don't print and continue */
|
||||
|
||||
/** Retrives current error processing mode */
|
||||
/** Retrieves current error processing mode */
|
||||
CVAPI(int) cvGetErrMode( void );
|
||||
|
||||
/** Sets error processing mode, returns previously used mode */
|
||||
@@ -2738,7 +2697,7 @@ static char cvFuncName[] = Name
|
||||
/**
|
||||
CV_CALL macro calls CV (or IPL) function, checks error status and
|
||||
signals a error if the function failed. Useful in "parent node"
|
||||
error procesing mode
|
||||
error processing mode
|
||||
*/
|
||||
#define CV_CALL( Func ) \
|
||||
{ \
|
||||
@@ -2766,24 +2725,6 @@ static char cvFuncName[] = Name
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
//! @addtogroup core_c_glue
|
||||
//! @{
|
||||
|
||||
//! class for automatic module/RTTI data registration/unregistration
|
||||
struct CV_EXPORTS CvType
|
||||
{
|
||||
CvType( const char* type_name,
|
||||
CvIsInstanceFunc is_instance, CvReleaseFunc release=0,
|
||||
CvReadFunc read=0, CvWriteFunc write=0, CvCloneFunc clone=0 );
|
||||
~CvType();
|
||||
CvTypeInfo* info;
|
||||
|
||||
static CvTypeInfo* first;
|
||||
static CvTypeInfo* last;
|
||||
};
|
||||
|
||||
//! @}
|
||||
|
||||
#include "opencv2/core/utility.hpp"
|
||||
|
||||
namespace cv
|
||||
@@ -2814,11 +2755,11 @@ CV_EXPORTS void insertImageCOI(InputArray coiimg, CvArr* arr, int coi=-1);
|
||||
|
||||
////// specialized implementations of DefaultDeleter::operator() for classic OpenCV types //////
|
||||
|
||||
template<> CV_EXPORTS void DefaultDeleter<CvMat>::operator ()(CvMat* obj) const;
|
||||
template<> CV_EXPORTS void DefaultDeleter<IplImage>::operator ()(IplImage* obj) const;
|
||||
template<> CV_EXPORTS void DefaultDeleter<CvMatND>::operator ()(CvMatND* obj) const;
|
||||
template<> CV_EXPORTS void DefaultDeleter<CvSparseMat>::operator ()(CvSparseMat* obj) const;
|
||||
template<> CV_EXPORTS void DefaultDeleter<CvMemStorage>::operator ()(CvMemStorage* obj) const;
|
||||
template<> struct DefaultDeleter<CvMat>{ CV_EXPORTS void operator ()(CvMat* obj) const; };
|
||||
template<> struct DefaultDeleter<IplImage>{ CV_EXPORTS void operator ()(IplImage* obj) const; };
|
||||
template<> struct DefaultDeleter<CvMatND>{ CV_EXPORTS void operator ()(CvMatND* obj) const; };
|
||||
template<> struct DefaultDeleter<CvSparseMat>{ CV_EXPORTS void operator ()(CvSparseMat* obj) const; };
|
||||
template<> struct DefaultDeleter<CvMemStorage>{ CV_EXPORTS void operator ()(CvMemStorage* obj) const; };
|
||||
|
||||
////////////// convenient wrappers for operating old-style dynamic structures //////////////
|
||||
|
||||
@@ -3073,7 +3014,7 @@ template<typename _Tp> inline void Seq<_Tp>::copyTo(std::vector<_Tp>& vec, const
|
||||
size_t len = !seq ? 0 : range == Range::all() ? seq->total : range.end - range.start;
|
||||
vec.resize(len);
|
||||
if( seq && len )
|
||||
cvCvtSeqToArray(seq, &vec[0], range);
|
||||
cvCvtSeqToArray(seq, &vec[0], cvSlice(range));
|
||||
}
|
||||
|
||||
template<typename _Tp> inline Seq<_Tp>::operator std::vector<_Tp>() const
|
||||
@@ -56,7 +56,7 @@
|
||||
@{
|
||||
@defgroup cudacore Core part
|
||||
@{
|
||||
@defgroup cudacore_init Initalization and Information
|
||||
@defgroup cudacore_init Initialization and Information
|
||||
@defgroup cudacore_struct Data Structures
|
||||
@}
|
||||
@}
|
||||
@@ -91,12 +91,21 @@ aligned to a size depending on the hardware. Single-row GpuMat is always a conti
|
||||
on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
|
||||
release function returns error if the CUDA context has been destroyed before.
|
||||
|
||||
Some member functions are described as a "Blocking Call" while some are described as a
|
||||
"Non-Blocking Call". Blocking functions are synchronous to host. It is guaranteed that the GPU
|
||||
operation is finished when the function returns. However, non-blocking functions are asynchronous to
|
||||
host. Those functions may return even if the GPU operation is not finished.
|
||||
|
||||
Compared to their blocking counterpart, non-blocking functions accept Stream as an additional
|
||||
argument. If a non-default stream is passed, the GPU operation may overlap with operations in other
|
||||
streams.
|
||||
|
||||
@sa Mat
|
||||
*/
|
||||
class CV_EXPORTS GpuMat
|
||||
class CV_EXPORTS_W GpuMat
|
||||
{
|
||||
public:
|
||||
class CV_EXPORTS Allocator
|
||||
class CV_EXPORTS_W Allocator
|
||||
{
|
||||
public:
|
||||
virtual ~Allocator() {}
|
||||
@@ -107,33 +116,33 @@ public:
|
||||
};
|
||||
|
||||
//! default allocator
|
||||
static Allocator* defaultAllocator();
|
||||
static void setDefaultAllocator(Allocator* allocator);
|
||||
CV_WRAP static GpuMat::Allocator* defaultAllocator();
|
||||
CV_WRAP static void setDefaultAllocator(GpuMat::Allocator* allocator);
|
||||
|
||||
//! default constructor
|
||||
explicit GpuMat(Allocator* allocator = defaultAllocator());
|
||||
CV_WRAP explicit GpuMat(GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
|
||||
|
||||
//! constructs GpuMat of the specified size and type
|
||||
GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
|
||||
GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
|
||||
CV_WRAP GpuMat(int rows, int cols, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
|
||||
CV_WRAP GpuMat(Size size, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
|
||||
|
||||
//! constucts GpuMat and fills it with the specified value _s
|
||||
GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
|
||||
GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());
|
||||
CV_WRAP GpuMat(int rows, int cols, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
|
||||
CV_WRAP GpuMat(Size size, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
|
||||
|
||||
//! copy constructor
|
||||
GpuMat(const GpuMat& m);
|
||||
CV_WRAP GpuMat(const GpuMat& m);
|
||||
|
||||
//! constructor for GpuMat headers pointing to user-allocated data
|
||||
GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
|
||||
GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
|
||||
|
||||
//! creates a GpuMat header for a part of the bigger matrix
|
||||
GpuMat(const GpuMat& m, Range rowRange, Range colRange);
|
||||
GpuMat(const GpuMat& m, Rect roi);
|
||||
CV_WRAP GpuMat(const GpuMat& m, Range rowRange, Range colRange);
|
||||
CV_WRAP GpuMat(const GpuMat& m, Rect roi);
|
||||
|
||||
//! builds GpuMat from host memory (Blocking call)
|
||||
explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());
|
||||
CV_WRAP explicit GpuMat(InputArray arr, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
|
||||
|
||||
//! destructor - calls release()
|
||||
~GpuMat();
|
||||
@@ -142,70 +151,92 @@ public:
|
||||
GpuMat& operator =(const GpuMat& m);
|
||||
|
||||
//! allocates new GpuMat data unless the GpuMat already has specified size and type
|
||||
void create(int rows, int cols, int type);
|
||||
void create(Size size, int type);
|
||||
CV_WRAP void create(int rows, int cols, int type);
|
||||
CV_WRAP void create(Size size, int type);
|
||||
|
||||
//! decreases reference counter, deallocate the data when reference counter reaches 0
|
||||
void release();
|
||||
|
||||
//! swaps with other smart pointer
|
||||
void swap(GpuMat& mat);
|
||||
CV_WRAP void swap(GpuMat& mat);
|
||||
|
||||
//! pefroms upload data to GpuMat (Blocking call)
|
||||
void upload(InputArray arr);
|
||||
/** @brief Performs data upload to GpuMat (Blocking call)
|
||||
|
||||
//! pefroms upload data to GpuMat (Non-Blocking call)
|
||||
void upload(InputArray arr, Stream& stream);
|
||||
This function copies data from host memory to device memory. As being a blocking call, it is
|
||||
guaranteed that the copy operation is finished when this function returns.
|
||||
*/
|
||||
CV_WRAP void upload(InputArray arr);
|
||||
|
||||
//! pefroms download data from device to host memory (Blocking call)
|
||||
void download(OutputArray dst) const;
|
||||
/** @brief Performs data upload to GpuMat (Non-Blocking call)
|
||||
|
||||
//! pefroms download data from device to host memory (Non-Blocking call)
|
||||
void download(OutputArray dst, Stream& stream) const;
|
||||
This function copies data from host memory to device memory. As being a non-blocking call, this
|
||||
function may return even if the copy operation is not finished.
|
||||
|
||||
The copy operation may be overlapped with operations in other non-default streams if \p stream is
|
||||
not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
|
||||
*/
|
||||
CV_WRAP void upload(InputArray arr, Stream& stream);
|
||||
|
||||
/** @brief Performs data download from GpuMat (Blocking call)
|
||||
|
||||
This function copies data from device memory to host memory. As being a blocking call, it is
|
||||
guaranteed that the copy operation is finished when this function returns.
|
||||
*/
|
||||
CV_WRAP void download(OutputArray dst) const;
|
||||
|
||||
/** @brief Performs data download from GpuMat (Non-Blocking call)
|
||||
|
||||
This function copies data from device memory to host memory. As being a non-blocking call, this
|
||||
function may return even if the copy operation is not finished.
|
||||
|
||||
The copy operation may be overlapped with operations in other non-default streams if \p stream is
|
||||
not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
|
||||
*/
|
||||
CV_WRAP void download(OutputArray dst, Stream& stream) const;
|
||||
|
||||
//! returns deep copy of the GpuMat, i.e. the data is copied
|
||||
GpuMat clone() const;
|
||||
CV_WRAP GpuMat clone() const;
|
||||
|
||||
//! copies the GpuMat content to device memory (Blocking call)
|
||||
void copyTo(OutputArray dst) const;
|
||||
CV_WRAP void copyTo(OutputArray dst) const;
|
||||
|
||||
//! copies the GpuMat content to device memory (Non-Blocking call)
|
||||
void copyTo(OutputArray dst, Stream& stream) const;
|
||||
CV_WRAP void copyTo(OutputArray dst, Stream& stream) const;
|
||||
|
||||
//! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
|
||||
void copyTo(OutputArray dst, InputArray mask) const;
|
||||
CV_WRAP void copyTo(OutputArray dst, InputArray mask) const;
|
||||
|
||||
//! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
|
||||
void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
|
||||
CV_WRAP void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
|
||||
|
||||
//! sets some of the GpuMat elements to s (Blocking call)
|
||||
GpuMat& setTo(Scalar s);
|
||||
CV_WRAP GpuMat& setTo(Scalar s);
|
||||
|
||||
//! sets some of the GpuMat elements to s (Non-Blocking call)
|
||||
GpuMat& setTo(Scalar s, Stream& stream);
|
||||
CV_WRAP GpuMat& setTo(Scalar s, Stream& stream);
|
||||
|
||||
//! sets some of the GpuMat elements to s, according to the mask (Blocking call)
|
||||
GpuMat& setTo(Scalar s, InputArray mask);
|
||||
CV_WRAP GpuMat& setTo(Scalar s, InputArray mask);
|
||||
|
||||
//! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
|
||||
GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
|
||||
CV_WRAP GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
|
||||
|
||||
//! converts GpuMat to another datatype (Blocking call)
|
||||
void convertTo(OutputArray dst, int rtype) const;
|
||||
CV_WRAP void convertTo(OutputArray dst, int rtype) const;
|
||||
|
||||
//! converts GpuMat to another datatype (Non-Blocking call)
|
||||
void convertTo(OutputArray dst, int rtype, Stream& stream) const;
|
||||
CV_WRAP void convertTo(OutputArray dst, int rtype, Stream& stream) const;
|
||||
|
||||
//! converts GpuMat to another datatype with scaling (Blocking call)
|
||||
void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
|
||||
CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
|
||||
|
||||
//! converts GpuMat to another datatype with scaling (Non-Blocking call)
|
||||
void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
|
||||
CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
|
||||
|
||||
//! converts GpuMat to another datatype with scaling (Non-Blocking call)
|
||||
void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
|
||||
CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
|
||||
|
||||
void assignTo(GpuMat& m, int type=-1) const;
|
||||
CV_WRAP void assignTo(GpuMat& m, int type = -1) const;
|
||||
|
||||
//! returns pointer to y-th row
|
||||
uchar* ptr(int y = 0);
|
||||
@@ -219,18 +250,18 @@ public:
|
||||
template <typename _Tp> operator PtrStep<_Tp>() const;
|
||||
|
||||
//! returns a new GpuMat header for the specified row
|
||||
GpuMat row(int y) const;
|
||||
CV_WRAP GpuMat row(int y) const;
|
||||
|
||||
//! returns a new GpuMat header for the specified column
|
||||
GpuMat col(int x) const;
|
||||
CV_WRAP GpuMat col(int x) const;
|
||||
|
||||
//! ... for the specified row span
|
||||
GpuMat rowRange(int startrow, int endrow) const;
|
||||
GpuMat rowRange(Range r) const;
|
||||
CV_WRAP GpuMat rowRange(int startrow, int endrow) const;
|
||||
CV_WRAP GpuMat rowRange(Range r) const;
|
||||
|
||||
//! ... for the specified column span
|
||||
GpuMat colRange(int startcol, int endcol) const;
|
||||
GpuMat colRange(Range r) const;
|
||||
CV_WRAP GpuMat colRange(int startcol, int endcol) const;
|
||||
CV_WRAP GpuMat colRange(Range r) const;
|
||||
|
||||
//! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
|
||||
GpuMat operator ()(Range rowRange, Range colRange) const;
|
||||
@@ -238,41 +269,44 @@ public:
|
||||
|
||||
//! creates alternative GpuMat header for the same data, with different
|
||||
//! number of channels and/or different number of rows
|
||||
GpuMat reshape(int cn, int rows = 0) const;
|
||||
CV_WRAP GpuMat reshape(int cn, int rows = 0) const;
|
||||
|
||||
//! locates GpuMat header within a parent GpuMat
|
||||
void locateROI(Size& wholeSize, Point& ofs) const;
|
||||
CV_WRAP void locateROI(Size& wholeSize, Point& ofs) const;
|
||||
|
||||
//! moves/resizes the current GpuMat ROI inside the parent GpuMat
|
||||
GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
|
||||
CV_WRAP GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
|
||||
|
||||
//! returns true iff the GpuMat data is continuous
|
||||
//! (i.e. when there are no gaps between successive rows)
|
||||
bool isContinuous() const;
|
||||
CV_WRAP bool isContinuous() const;
|
||||
|
||||
//! returns element size in bytes
|
||||
size_t elemSize() const;
|
||||
CV_WRAP size_t elemSize() const;
|
||||
|
||||
//! returns the size of element channel in bytes
|
||||
size_t elemSize1() const;
|
||||
CV_WRAP size_t elemSize1() const;
|
||||
|
||||
//! returns element type
|
||||
int type() const;
|
||||
CV_WRAP int type() const;
|
||||
|
||||
//! returns element type
|
||||
int depth() const;
|
||||
CV_WRAP int depth() const;
|
||||
|
||||
//! returns number of channels
|
||||
int channels() const;
|
||||
CV_WRAP int channels() const;
|
||||
|
||||
//! returns step/elemSize1()
|
||||
size_t step1() const;
|
||||
CV_WRAP size_t step1() const;
|
||||
|
||||
//! returns GpuMat size : width == number of columns, height == number of rows
|
||||
Size size() const;
|
||||
CV_WRAP Size size() const;
|
||||
|
||||
//! returns true if GpuMat data is NULL
|
||||
bool empty() const;
|
||||
CV_WRAP bool empty() const;
|
||||
|
||||
//! internal use method: updates the continuity flag
|
||||
CV_WRAP void updateContinuityFlag();
|
||||
|
||||
/*! includes several bit-fields:
|
||||
- the magic signature
|
||||
@@ -286,7 +320,7 @@ public:
|
||||
int rows, cols;
|
||||
|
||||
//! a distance between successive rows in bytes; includes the gap if any
|
||||
size_t step;
|
||||
CV_PROP size_t step;
|
||||
|
||||
//! pointer to the data
|
||||
uchar* data;
|
||||
@@ -314,7 +348,7 @@ public:
|
||||
Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
|
||||
end of each row.
|
||||
*/
|
||||
CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
|
||||
CV_EXPORTS_W void createContinuous(int rows, int cols, int type, OutputArray arr);
|
||||
|
||||
/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
|
||||
|
||||
@@ -325,17 +359,126 @@ CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
|
||||
|
||||
The function does not reallocate memory if the matrix has proper attributes already.
|
||||
*/
|
||||
CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
|
||||
CV_EXPORTS_W void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
|
||||
|
||||
/** @brief BufferPool for use with CUDA streams
|
||||
|
||||
* BufferPool utilizes cuda::Stream's allocator to create new buffers. It is
|
||||
* particularly useful when BufferPoolUsage is set to true, or a custom
|
||||
* allocator is specified for the cuda::Stream, and you want to implement your
|
||||
* own stream based functions utilizing the same underlying GPU memory
|
||||
* management.
|
||||
BufferPool utilizes Stream's allocator to create new buffers for GpuMat's. It is
|
||||
only useful when enabled with #setBufferPoolUsage.
|
||||
|
||||
@code
|
||||
setBufferPoolUsage(true);
|
||||
@endcode
|
||||
|
||||
@note #setBufferPoolUsage must be called \em before any Stream declaration.
|
||||
|
||||
Users may specify custom allocator for Stream and may implement their own stream based
|
||||
functions utilizing the same underlying GPU memory management.
|
||||
|
||||
If custom allocator is not specified, BufferPool utilizes StackAllocator by
|
||||
default. StackAllocator allocates a chunk of GPU device memory beforehand,
|
||||
and when GpuMat is declared later on, it is given the pre-allocated memory.
|
||||
This kind of strategy reduces the number of calls for memory allocating APIs
|
||||
such as cudaMalloc or cudaMallocPitch.
|
||||
|
||||
Below is an example that utilizes BufferPool with StackAllocator:
|
||||
|
||||
@code
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda
|
||||
|
||||
int main()
|
||||
{
|
||||
setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
|
||||
setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
|
||||
|
||||
Stream stream1, stream2; // Each stream uses 1 stack
|
||||
BufferPool pool1(stream1), pool2(stream2);
|
||||
|
||||
GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB
|
||||
GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full
|
||||
|
||||
GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB
|
||||
GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB
|
||||
|
||||
cvtColor(d_src1, d_dst1, CV_GRAY2BGR, 0, stream1);
|
||||
cvtColor(d_src2, d_dst2, CV_GRAY2BGR, 0, stream2);
|
||||
}
|
||||
@endcode
|
||||
|
||||
If we allocate another GpuMat on pool1 in the above example, it will be carried out by
|
||||
the DefaultAllocator since the stack for pool1 is full.
|
||||
|
||||
@code
|
||||
GpuMat d_add1 = pool1.getBuffer(1024, 1024, CV_8UC1); // Stack for pool1 is full, memory is allocated with DefaultAllocator
|
||||
@endcode
|
||||
|
||||
If a third stream is declared in the above example, allocating with #getBuffer
|
||||
within that stream will also be carried out by the DefaultAllocator because we've run out of
|
||||
stacks.
|
||||
|
||||
@code
|
||||
Stream stream3; // Only 2 stacks were allocated, we've run out of stacks
|
||||
BufferPool pool3(stream3);
|
||||
GpuMat d_src3 = pool3.getBuffer(1024, 1024, CV_8UC1); // Memory is allocated with DefaultAllocator
|
||||
@endcode
|
||||
|
||||
@warning When utilizing StackAllocator, deallocation order is important.
|
||||
|
||||
Just like a stack, deallocation must be done in LIFO order. Below is an example of
|
||||
erroneous usage that violates LIFO rule. If OpenCV is compiled in Debug mode, this
|
||||
sample code will emit CV_Assert error.
|
||||
|
||||
@code
|
||||
int main()
|
||||
{
|
||||
setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
|
||||
Stream stream; // A default size (10 MB) stack is allocated to this stream
|
||||
BufferPool pool(stream);
|
||||
|
||||
GpuMat mat1 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat1 (1MB)
|
||||
GpuMat mat2 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat2 (1MB)
|
||||
|
||||
mat1.release(); // erroneous usage : mat2 must be deallocated before mat1
|
||||
}
|
||||
@endcode
|
||||
|
||||
Since C++ local variables are destroyed in the reverse order of construction,
|
||||
the code sample below satisfies the LIFO rule. Local GpuMat's are deallocated
|
||||
and the corresponding memory is automatically returned to the pool for later usage.
|
||||
|
||||
@code
|
||||
int main()
|
||||
{
|
||||
setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
|
||||
setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
|
||||
|
||||
Stream stream1, stream2; // Each stream uses 1 stack
|
||||
BufferPool pool1(stream1), pool2(stream2);
|
||||
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB
|
||||
GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full
|
||||
|
||||
GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB
|
||||
GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB
|
||||
|
||||
d_src1.setTo(Scalar(i), stream1);
|
||||
d_src2.setTo(Scalar(i), stream2);
|
||||
|
||||
cvtColor(d_src1, d_dst1, CV_GRAY2BGR, 0, stream1);
|
||||
cvtColor(d_src2, d_dst2, CV_GRAY2BGR, 0, stream2);
|
||||
// The order of destruction of the local variables is:
|
||||
// d_dst2 => d_src2 => d_dst1 => d_src1
|
||||
// LIFO rule is satisfied, this code runs without error
|
||||
}
|
||||
}
|
||||
@endcode
|
||||
*/
|
||||
class CV_EXPORTS BufferPool
|
||||
class CV_EXPORTS_W BufferPool
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -343,21 +486,21 @@ public:
|
||||
explicit BufferPool(Stream& stream);
|
||||
|
||||
//! Allocates a new GpuMat of given size and type.
|
||||
GpuMat getBuffer(int rows, int cols, int type);
|
||||
CV_WRAP GpuMat getBuffer(int rows, int cols, int type);
|
||||
|
||||
//! Allocates a new GpuMat of given size and type.
|
||||
GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
|
||||
CV_WRAP GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
|
||||
|
||||
//! Returns the allocator associated with the stream.
|
||||
Ptr<GpuMat::Allocator> getAllocator() const { return allocator_; }
|
||||
CV_WRAP Ptr<GpuMat::Allocator> getAllocator() const { return allocator_; }
|
||||
|
||||
private:
|
||||
Ptr<GpuMat::Allocator> allocator_;
|
||||
};
|
||||
|
||||
//! BufferPool management (must be called before Stream creation)
|
||||
CV_EXPORTS void setBufferPoolUsage(bool on);
|
||||
CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
|
||||
CV_EXPORTS_W void setBufferPoolUsage(bool on);
|
||||
CV_EXPORTS_W void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
|
||||
|
||||
//===================================================================================
|
||||
// HostMem
|
||||
@@ -378,46 +521,46 @@ Its interface is also Mat-like but with additional memory type parameters.
|
||||
@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
|
||||
Pinned Memory APIs* document or *CUDA C Programming Guide*.
|
||||
*/
|
||||
class CV_EXPORTS HostMem
|
||||
class CV_EXPORTS_W HostMem
|
||||
{
|
||||
public:
|
||||
enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
|
||||
|
||||
static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED);
|
||||
static MatAllocator* getAllocator(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
|
||||
|
||||
explicit HostMem(AllocType alloc_type = PAGE_LOCKED);
|
||||
CV_WRAP explicit HostMem(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
|
||||
|
||||
HostMem(const HostMem& m);
|
||||
|
||||
HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED);
|
||||
HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED);
|
||||
CV_WRAP HostMem(int rows, int cols, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
|
||||
CV_WRAP HostMem(Size size, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
|
||||
|
||||
//! creates from host memory with coping data
|
||||
explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED);
|
||||
CV_WRAP explicit HostMem(InputArray arr, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
|
||||
|
||||
~HostMem();
|
||||
|
||||
HostMem& operator =(const HostMem& m);
|
||||
|
||||
//! swaps with other smart pointer
|
||||
void swap(HostMem& b);
|
||||
CV_WRAP void swap(HostMem& b);
|
||||
|
||||
//! returns deep copy of the matrix, i.e. the data is copied
|
||||
HostMem clone() const;
|
||||
CV_WRAP HostMem clone() const;
|
||||
|
||||
//! allocates new matrix data unless the matrix already has specified size and type.
|
||||
void create(int rows, int cols, int type);
|
||||
CV_WRAP void create(int rows, int cols, int type);
|
||||
void create(Size size, int type);
|
||||
|
||||
//! creates alternative HostMem header for the same data, with different
|
||||
//! number of channels and/or different number of rows
|
||||
HostMem reshape(int cn, int rows = 0) const;
|
||||
CV_WRAP HostMem reshape(int cn, int rows = 0) const;
|
||||
|
||||
//! decrements reference counter and released memory if needed.
|
||||
void release();
|
||||
|
||||
//! returns matrix header with disabled reference counting for HostMem data.
|
||||
Mat createMatHeader() const;
|
||||
CV_WRAP Mat createMatHeader() const;
|
||||
|
||||
/** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
|
||||
for it.
|
||||
@@ -429,20 +572,20 @@ public:
|
||||
GpuMat createGpuMatHeader() const;
|
||||
|
||||
// Please see cv::Mat for descriptions
|
||||
bool isContinuous() const;
|
||||
size_t elemSize() const;
|
||||
size_t elemSize1() const;
|
||||
int type() const;
|
||||
int depth() const;
|
||||
int channels() const;
|
||||
size_t step1() const;
|
||||
Size size() const;
|
||||
bool empty() const;
|
||||
CV_WRAP bool isContinuous() const;
|
||||
CV_WRAP size_t elemSize() const;
|
||||
CV_WRAP size_t elemSize1() const;
|
||||
CV_WRAP int type() const;
|
||||
CV_WRAP int depth() const;
|
||||
CV_WRAP int channels() const;
|
||||
CV_WRAP size_t step1() const;
|
||||
CV_WRAP Size size() const;
|
||||
CV_WRAP bool empty() const;
|
||||
|
||||
// Please see cv::Mat for descriptions
|
||||
int flags;
|
||||
int rows, cols;
|
||||
size_t step;
|
||||
CV_PROP size_t step;
|
||||
|
||||
uchar* data;
|
||||
int* refcount;
|
||||
@@ -457,13 +600,13 @@ public:
|
||||
|
||||
@param m Input matrix.
|
||||
*/
|
||||
CV_EXPORTS void registerPageLocked(Mat& m);
|
||||
CV_EXPORTS_W void registerPageLocked(Mat& m);
|
||||
|
||||
/** @brief Unmaps the memory of matrix and makes it pageable again.
|
||||
|
||||
@param m Input matrix.
|
||||
*/
|
||||
CV_EXPORTS void unregisterPageLocked(Mat& m);
|
||||
CV_EXPORTS_W void unregisterPageLocked(Mat& m);
|
||||
|
||||
//===================================================================================
|
||||
// Stream
|
||||
@@ -496,7 +639,7 @@ void thread2()
|
||||
@note By default all CUDA routines are launched in Stream::Null() object, if the stream is not specified by user.
|
||||
In multi-threading environment the stream objects must be passed explicitly (see previous note).
|
||||
*/
|
||||
class CV_EXPORTS Stream
|
||||
class CV_EXPORTS_W Stream
|
||||
{
|
||||
typedef void (Stream::*bool_type)() const;
|
||||
void this_type_does_not_support_comparisons() const {}
|
||||
@@ -505,22 +648,22 @@ public:
|
||||
typedef void (*StreamCallback)(int status, void* userData);
|
||||
|
||||
//! creates a new asynchronous stream
|
||||
Stream();
|
||||
CV_WRAP Stream();
|
||||
|
||||
//! creates a new asynchronous stream with custom allocator
|
||||
Stream(const Ptr<GpuMat::Allocator>& allocator);
|
||||
CV_WRAP Stream(const Ptr<GpuMat::Allocator>& allocator);
|
||||
|
||||
/** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
|
||||
*/
|
||||
bool queryIfComplete() const;
|
||||
CV_WRAP bool queryIfComplete() const;
|
||||
|
||||
/** @brief Blocks the current CPU thread until all operations in the stream are complete.
|
||||
*/
|
||||
void waitForCompletion();
|
||||
CV_WRAP void waitForCompletion();
|
||||
|
||||
/** @brief Makes a compute stream wait on an event.
|
||||
*/
|
||||
void waitEvent(const Event& event);
|
||||
CV_WRAP void waitEvent(const Event& event);
|
||||
|
||||
/** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
|
||||
completed.
|
||||
@@ -533,7 +676,7 @@ public:
|
||||
void enqueueHostCallback(StreamCallback callback, void* userData);
|
||||
|
||||
//! return Stream object for default CUDA stream
|
||||
static Stream& Null();
|
||||
CV_WRAP static Stream& Null();
|
||||
|
||||
//! returns true if stream object is not default (!= 0)
|
||||
operator bool_type() const;
|
||||
@@ -549,7 +692,7 @@ private:
|
||||
friend class DefaultDeviceInitializer;
|
||||
};
|
||||
|
||||
class CV_EXPORTS Event
|
||||
class CV_EXPORTS_W Event
|
||||
{
|
||||
public:
|
||||
enum CreateFlags
|
||||
@@ -560,19 +703,19 @@ public:
|
||||
INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */
|
||||
};
|
||||
|
||||
explicit Event(CreateFlags flags = DEFAULT);
|
||||
CV_WRAP explicit Event(Event::CreateFlags flags = Event::CreateFlags::DEFAULT);
|
||||
|
||||
//! records an event
|
||||
void record(Stream& stream = Stream::Null());
|
||||
CV_WRAP void record(Stream& stream = Stream::Null());
|
||||
|
||||
//! queries an event's status
|
||||
bool queryIfComplete() const;
|
||||
CV_WRAP bool queryIfComplete() const;
|
||||
|
||||
//! waits for an event to complete
|
||||
void waitForCompletion();
|
||||
CV_WRAP void waitForCompletion();
|
||||
|
||||
//! computes the elapsed time between events
|
||||
static float elapsedTime(const Event& start, const Event& end);
|
||||
CV_WRAP static float elapsedTime(const Event& start, const Event& end);
|
||||
|
||||
class Impl;
|
||||
|
||||
@@ -598,7 +741,7 @@ Use this function before any other CUDA functions calls. If OpenCV is compiled w
|
||||
this function returns 0. If the CUDA driver is not installed, or is incompatible, this function
|
||||
returns -1.
|
||||
*/
|
||||
CV_EXPORTS int getCudaEnabledDeviceCount();
|
||||
CV_EXPORTS_W int getCudaEnabledDeviceCount();
|
||||
|
||||
/** @brief Sets a device and initializes it for the current thread.
|
||||
|
||||
@@ -606,18 +749,18 @@ CV_EXPORTS int getCudaEnabledDeviceCount();
|
||||
|
||||
If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
|
||||
*/
|
||||
CV_EXPORTS void setDevice(int device);
|
||||
CV_EXPORTS_W void setDevice(int device);
|
||||
|
||||
/** @brief Returns the current device index set by cuda::setDevice or initialized by default.
|
||||
*/
|
||||
CV_EXPORTS int getDevice();
|
||||
CV_EXPORTS_W int getDevice();
|
||||
|
||||
/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
|
||||
process.
|
||||
|
||||
Any subsequent API call to this device will reinitialize the device.
|
||||
*/
|
||||
CV_EXPORTS void resetDevice();
|
||||
CV_EXPORTS_W void resetDevice();
|
||||
|
||||
/** @brief Enumeration providing CUDA computing features.
|
||||
*/
|
||||
@@ -650,7 +793,7 @@ built for.
|
||||
According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
|
||||
capability can always be compiled to binary code of greater or equal compute capability".
|
||||
*/
|
||||
class CV_EXPORTS TargetArchs
|
||||
class CV_EXPORTS_W TargetArchs
|
||||
{
|
||||
public:
|
||||
/** @brief The following method checks whether the module was built with the support of the given feature:
|
||||
@@ -665,23 +808,23 @@ public:
|
||||
@param major Major compute capability version.
|
||||
@param minor Minor compute capability version.
|
||||
*/
|
||||
static bool has(int major, int minor);
|
||||
static bool hasPtx(int major, int minor);
|
||||
static bool hasBin(int major, int minor);
|
||||
CV_WRAP static bool has(int major, int minor);
|
||||
CV_WRAP static bool hasPtx(int major, int minor);
|
||||
CV_WRAP static bool hasBin(int major, int minor);
|
||||
|
||||
static bool hasEqualOrLessPtx(int major, int minor);
|
||||
static bool hasEqualOrGreater(int major, int minor);
|
||||
static bool hasEqualOrGreaterPtx(int major, int minor);
|
||||
static bool hasEqualOrGreaterBin(int major, int minor);
|
||||
CV_WRAP static bool hasEqualOrLessPtx(int major, int minor);
|
||||
CV_WRAP static bool hasEqualOrGreater(int major, int minor);
|
||||
CV_WRAP static bool hasEqualOrGreaterPtx(int major, int minor);
|
||||
CV_WRAP static bool hasEqualOrGreaterBin(int major, int minor);
|
||||
};
|
||||
|
||||
/** @brief Class providing functionality for querying the specified GPU properties.
|
||||
*/
|
||||
class CV_EXPORTS DeviceInfo
|
||||
class CV_EXPORTS_W DeviceInfo
|
||||
{
|
||||
public:
|
||||
//! creates DeviceInfo object for the current GPU
|
||||
DeviceInfo();
|
||||
CV_WRAP DeviceInfo();
|
||||
|
||||
/** @brief The constructors.
|
||||
|
||||
@@ -690,68 +833,68 @@ public:
|
||||
Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
|
||||
constructs an object for the current device.
|
||||
*/
|
||||
DeviceInfo(int device_id);
|
||||
CV_WRAP DeviceInfo(int device_id);
|
||||
|
||||
/** @brief Returns system index of the CUDA device starting with 0.
|
||||
*/
|
||||
int deviceID() const;
|
||||
CV_WRAP int deviceID() const;
|
||||
|
||||
//! ASCII string identifying device
|
||||
const char* name() const;
|
||||
|
||||
//! global memory available on device in bytes
|
||||
size_t totalGlobalMem() const;
|
||||
CV_WRAP size_t totalGlobalMem() const;
|
||||
|
||||
//! shared memory available per block in bytes
|
||||
size_t sharedMemPerBlock() const;
|
||||
CV_WRAP size_t sharedMemPerBlock() const;
|
||||
|
||||
//! 32-bit registers available per block
|
||||
int regsPerBlock() const;
|
||||
CV_WRAP int regsPerBlock() const;
|
||||
|
||||
//! warp size in threads
|
||||
int warpSize() const;
|
||||
CV_WRAP int warpSize() const;
|
||||
|
||||
//! maximum pitch in bytes allowed by memory copies
|
||||
size_t memPitch() const;
|
||||
CV_WRAP size_t memPitch() const;
|
||||
|
||||
//! maximum number of threads per block
|
||||
int maxThreadsPerBlock() const;
|
||||
CV_WRAP int maxThreadsPerBlock() const;
|
||||
|
||||
//! maximum size of each dimension of a block
|
||||
Vec3i maxThreadsDim() const;
|
||||
CV_WRAP Vec3i maxThreadsDim() const;
|
||||
|
||||
//! maximum size of each dimension of a grid
|
||||
Vec3i maxGridSize() const;
|
||||
CV_WRAP Vec3i maxGridSize() const;
|
||||
|
||||
//! clock frequency in kilohertz
|
||||
int clockRate() const;
|
||||
CV_WRAP int clockRate() const;
|
||||
|
||||
//! constant memory available on device in bytes
|
||||
size_t totalConstMem() const;
|
||||
CV_WRAP size_t totalConstMem() const;
|
||||
|
||||
//! major compute capability
|
||||
int majorVersion() const;
|
||||
CV_WRAP int majorVersion() const;
|
||||
|
||||
//! minor compute capability
|
||||
int minorVersion() const;
|
||||
CV_WRAP int minorVersion() const;
|
||||
|
||||
//! alignment requirement for textures
|
||||
size_t textureAlignment() const;
|
||||
CV_WRAP size_t textureAlignment() const;
|
||||
|
||||
//! pitch alignment requirement for texture references bound to pitched memory
|
||||
size_t texturePitchAlignment() const;
|
||||
CV_WRAP size_t texturePitchAlignment() const;
|
||||
|
||||
//! number of multiprocessors on device
|
||||
int multiProcessorCount() const;
|
||||
CV_WRAP int multiProcessorCount() const;
|
||||
|
||||
//! specified whether there is a run time limit on kernels
|
||||
bool kernelExecTimeoutEnabled() const;
|
||||
CV_WRAP bool kernelExecTimeoutEnabled() const;
|
||||
|
||||
//! device is integrated as opposed to discrete
|
||||
bool integrated() const;
|
||||
CV_WRAP bool integrated() const;
|
||||
|
||||
//! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
|
||||
bool canMapHostMemory() const;
|
||||
CV_WRAP bool canMapHostMemory() const;
|
||||
|
||||
enum ComputeMode
|
||||
{
|
||||
@@ -762,108 +905,108 @@ public:
|
||||
};
|
||||
|
||||
//! compute mode
|
||||
ComputeMode computeMode() const;
|
||||
CV_WRAP DeviceInfo::ComputeMode computeMode() const;
|
||||
|
||||
//! maximum 1D texture size
|
||||
int maxTexture1D() const;
|
||||
CV_WRAP int maxTexture1D() const;
|
||||
|
||||
//! maximum 1D mipmapped texture size
|
||||
int maxTexture1DMipmap() const;
|
||||
CV_WRAP int maxTexture1DMipmap() const;
|
||||
|
||||
//! maximum size for 1D textures bound to linear memory
|
||||
int maxTexture1DLinear() const;
|
||||
CV_WRAP int maxTexture1DLinear() const;
|
||||
|
||||
//! maximum 2D texture dimensions
|
||||
Vec2i maxTexture2D() const;
|
||||
CV_WRAP Vec2i maxTexture2D() const;
|
||||
|
||||
//! maximum 2D mipmapped texture dimensions
|
||||
Vec2i maxTexture2DMipmap() const;
|
||||
CV_WRAP Vec2i maxTexture2DMipmap() const;
|
||||
|
||||
//! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
|
||||
Vec3i maxTexture2DLinear() const;
|
||||
CV_WRAP Vec3i maxTexture2DLinear() const;
|
||||
|
||||
//! maximum 2D texture dimensions if texture gather operations have to be performed
|
||||
Vec2i maxTexture2DGather() const;
|
||||
CV_WRAP Vec2i maxTexture2DGather() const;
|
||||
|
||||
//! maximum 3D texture dimensions
|
||||
Vec3i maxTexture3D() const;
|
||||
CV_WRAP Vec3i maxTexture3D() const;
|
||||
|
||||
//! maximum Cubemap texture dimensions
|
||||
int maxTextureCubemap() const;
|
||||
CV_WRAP int maxTextureCubemap() const;
|
||||
|
||||
//! maximum 1D layered texture dimensions
|
||||
Vec2i maxTexture1DLayered() const;
|
||||
CV_WRAP Vec2i maxTexture1DLayered() const;
|
||||
|
||||
//! maximum 2D layered texture dimensions
|
||||
Vec3i maxTexture2DLayered() const;
|
||||
CV_WRAP Vec3i maxTexture2DLayered() const;
|
||||
|
||||
//! maximum Cubemap layered texture dimensions
|
||||
Vec2i maxTextureCubemapLayered() const;
|
||||
CV_WRAP Vec2i maxTextureCubemapLayered() const;
|
||||
|
||||
//! maximum 1D surface size
|
||||
int maxSurface1D() const;
|
||||
CV_WRAP int maxSurface1D() const;
|
||||
|
||||
//! maximum 2D surface dimensions
|
||||
Vec2i maxSurface2D() const;
|
||||
CV_WRAP Vec2i maxSurface2D() const;
|
||||
|
||||
//! maximum 3D surface dimensions
|
||||
Vec3i maxSurface3D() const;
|
||||
CV_WRAP Vec3i maxSurface3D() const;
|
||||
|
||||
//! maximum 1D layered surface dimensions
|
||||
Vec2i maxSurface1DLayered() const;
|
||||
CV_WRAP Vec2i maxSurface1DLayered() const;
|
||||
|
||||
//! maximum 2D layered surface dimensions
|
||||
Vec3i maxSurface2DLayered() const;
|
||||
CV_WRAP Vec3i maxSurface2DLayered() const;
|
||||
|
||||
//! maximum Cubemap surface dimensions
|
||||
int maxSurfaceCubemap() const;
|
||||
CV_WRAP int maxSurfaceCubemap() const;
|
||||
|
||||
//! maximum Cubemap layered surface dimensions
|
||||
Vec2i maxSurfaceCubemapLayered() const;
|
||||
CV_WRAP Vec2i maxSurfaceCubemapLayered() const;
|
||||
|
||||
//! alignment requirements for surfaces
|
||||
size_t surfaceAlignment() const;
|
||||
CV_WRAP size_t surfaceAlignment() const;
|
||||
|
||||
//! device can possibly execute multiple kernels concurrently
|
||||
bool concurrentKernels() const;
|
||||
CV_WRAP bool concurrentKernels() const;
|
||||
|
||||
//! device has ECC support enabled
|
||||
bool ECCEnabled() const;
|
||||
CV_WRAP bool ECCEnabled() const;
|
||||
|
||||
//! PCI bus ID of the device
|
||||
int pciBusID() const;
|
||||
CV_WRAP int pciBusID() const;
|
||||
|
||||
//! PCI device ID of the device
|
||||
int pciDeviceID() const;
|
||||
CV_WRAP int pciDeviceID() const;
|
||||
|
||||
//! PCI domain ID of the device
|
||||
int pciDomainID() const;
|
||||
CV_WRAP int pciDomainID() const;
|
||||
|
||||
//! true if device is a Tesla device using TCC driver, false otherwise
|
||||
bool tccDriver() const;
|
||||
CV_WRAP bool tccDriver() const;
|
||||
|
||||
//! number of asynchronous engines
|
||||
int asyncEngineCount() const;
|
||||
CV_WRAP int asyncEngineCount() const;
|
||||
|
||||
//! device shares a unified address space with the host
|
||||
bool unifiedAddressing() const;
|
||||
CV_WRAP bool unifiedAddressing() const;
|
||||
|
||||
//! peak memory clock frequency in kilohertz
|
||||
int memoryClockRate() const;
|
||||
CV_WRAP int memoryClockRate() const;
|
||||
|
||||
//! global memory bus width in bits
|
||||
int memoryBusWidth() const;
|
||||
CV_WRAP int memoryBusWidth() const;
|
||||
|
||||
//! size of L2 cache in bytes
|
||||
int l2CacheSize() const;
|
||||
CV_WRAP int l2CacheSize() const;
|
||||
|
||||
//! maximum resident threads per multiprocessor
|
||||
int maxThreadsPerMultiProcessor() const;
|
||||
CV_WRAP int maxThreadsPerMultiProcessor() const;
|
||||
|
||||
//! gets free and total device memory
|
||||
void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
|
||||
size_t freeMemory() const;
|
||||
size_t totalMemory() const;
|
||||
CV_WRAP void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
|
||||
CV_WRAP size_t freeMemory() const;
|
||||
CV_WRAP size_t totalMemory() const;
|
||||
|
||||
/** @brief Provides information on CUDA feature support.
|
||||
|
||||
@@ -878,14 +1021,14 @@ public:
|
||||
This function returns true if the CUDA module can be run on the specified device. Otherwise, it
|
||||
returns false .
|
||||
*/
|
||||
bool isCompatible() const;
|
||||
CV_WRAP bool isCompatible() const;
|
||||
|
||||
private:
|
||||
int device_id_;
|
||||
};
|
||||
|
||||
CV_EXPORTS void printCudaDeviceInfo(int device);
|
||||
CV_EXPORTS void printShortCudaDeviceInfo(int device);
|
||||
CV_EXPORTS_W void printCudaDeviceInfo(int device);
|
||||
CV_EXPORTS_W void printShortCudaDeviceInfo(int device);
|
||||
|
||||
/** @brief Converts an array to half precision floating number.
|
||||
|
||||
211
lib/3rdParty/OpenCV/include/opencv2/core/cuda/block.hpp
vendored
Normal file
211
lib/3rdParty/OpenCV/include/opencv2/core/cuda/block.hpp
vendored
Normal file
@@ -0,0 +1,211 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_DEVICE_BLOCK_HPP
|
||||
#define OPENCV_CUDA_DEVICE_BLOCK_HPP
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
struct Block
|
||||
{
|
||||
static __device__ __forceinline__ unsigned int id()
|
||||
{
|
||||
return blockIdx.x;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int stride()
|
||||
{
|
||||
return blockDim.x * blockDim.y * blockDim.z;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ void sync()
|
||||
{
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ int flattenedThreadId()
|
||||
{
|
||||
return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
|
||||
}
|
||||
|
||||
template<typename It, typename T>
|
||||
static __device__ __forceinline__ void fill(It beg, It end, const T& value)
|
||||
{
|
||||
int STRIDE = stride();
|
||||
It t = beg + flattenedThreadId();
|
||||
|
||||
for(; t < end; t += STRIDE)
|
||||
*t = value;
|
||||
}
|
||||
|
||||
template<typename OutIt, typename T>
|
||||
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
|
||||
{
|
||||
int STRIDE = stride();
|
||||
int tid = flattenedThreadId();
|
||||
value += tid;
|
||||
|
||||
for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE)
|
||||
*t = value;
|
||||
}
|
||||
|
||||
template<typename InIt, typename OutIt>
|
||||
static __device__ __forceinline__ void copy(InIt beg, InIt end, OutIt out)
|
||||
{
|
||||
int STRIDE = stride();
|
||||
InIt t = beg + flattenedThreadId();
|
||||
OutIt o = out + (t - beg);
|
||||
|
||||
for(; t < end; t += STRIDE, o += STRIDE)
|
||||
*o = *t;
|
||||
}
|
||||
|
||||
template<typename InIt, typename OutIt, class UnOp>
|
||||
static __device__ __forceinline__ void transform(InIt beg, InIt end, OutIt out, UnOp op)
|
||||
{
|
||||
int STRIDE = stride();
|
||||
InIt t = beg + flattenedThreadId();
|
||||
OutIt o = out + (t - beg);
|
||||
|
||||
for(; t < end; t += STRIDE, o += STRIDE)
|
||||
*o = op(*t);
|
||||
}
|
||||
|
||||
template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
|
||||
static __device__ __forceinline__ void transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
|
||||
{
|
||||
int STRIDE = stride();
|
||||
InIt1 t1 = beg1 + flattenedThreadId();
|
||||
InIt2 t2 = beg2 + flattenedThreadId();
|
||||
OutIt o = out + (t1 - beg1);
|
||||
|
||||
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE)
|
||||
*o = op(*t1, *t2);
|
||||
}
|
||||
|
||||
template<int CTA_SIZE, typename T, class BinOp>
|
||||
static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op)
|
||||
{
|
||||
int tid = flattenedThreadId();
|
||||
T val = buffer[tid];
|
||||
|
||||
if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
|
||||
if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
|
||||
if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
|
||||
if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); }
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); }
|
||||
if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); }
|
||||
if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); }
|
||||
if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); }
|
||||
if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); }
|
||||
if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); }
|
||||
}
|
||||
}
|
||||
|
||||
template<int CTA_SIZE, typename T, class BinOp>
|
||||
static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op)
|
||||
{
|
||||
int tid = flattenedThreadId();
|
||||
T val = buffer[tid] = init;
|
||||
__syncthreads();
|
||||
|
||||
if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
|
||||
if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
|
||||
if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
|
||||
if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); }
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); }
|
||||
if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); }
|
||||
if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); }
|
||||
if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); }
|
||||
if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); }
|
||||
if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); }
|
||||
}
|
||||
__syncthreads();
|
||||
return buffer[0];
|
||||
}
|
||||
|
||||
template <typename T, class BinOp>
|
||||
static __device__ __forceinline__ void reduce_n(T* data, unsigned int n, BinOp op)
|
||||
{
|
||||
int ftid = flattenedThreadId();
|
||||
int sft = stride();
|
||||
|
||||
if (sft < n)
|
||||
{
|
||||
for (unsigned int i = sft + ftid; i < n; i += sft)
|
||||
data[ftid] = op(data[ftid], data[i]);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
n = sft;
|
||||
}
|
||||
|
||||
while (n > 1)
|
||||
{
|
||||
unsigned int half = n/2;
|
||||
|
||||
if (ftid < half)
|
||||
data[ftid] = op(data[ftid], data[n - ftid - 1]);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
n = n - half;
|
||||
}
|
||||
}
|
||||
};
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif /* OPENCV_CUDA_DEVICE_BLOCK_HPP */
|
||||
722
lib/3rdParty/OpenCV/include/opencv2/core/cuda/border_interpolate.hpp
vendored
Normal file
722
lib/3rdParty/OpenCV/include/opencv2/core/cuda/border_interpolate.hpp
vendored
Normal file
@@ -0,0 +1,722 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_BORDER_INTERPOLATE_HPP
|
||||
#define OPENCV_CUDA_BORDER_INTERPOLATE_HPP
|
||||
|
||||
#include "saturate_cast.hpp"
|
||||
#include "vec_traits.hpp"
|
||||
#include "vec_math.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
//////////////////////////////////////////////////////////////
|
||||
// BrdConstant
|
||||
|
||||
template <typename D> struct BrdRowConstant
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits<D>::all(0)) : width(width_), val(val_) {}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
|
||||
{
|
||||
return x >= 0 ? saturate_cast<D>(data[x]) : val;
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
|
||||
{
|
||||
return x < width ? saturate_cast<D>(data[x]) : val;
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
|
||||
{
|
||||
return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;
|
||||
}
|
||||
|
||||
int width;
|
||||
D val;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColConstant
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits<D>::all(0)) : height(height_), val(val_) {}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
|
||||
{
|
||||
return y >= 0 ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
|
||||
{
|
||||
return y < height ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
|
||||
{
|
||||
return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
|
||||
}
|
||||
|
||||
int height;
|
||||
D val;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdConstant
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
__host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : height(height_), width(width_), val(val_)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
|
||||
{
|
||||
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(((const T*)((const uchar*)data + y * step))[x]) : val;
|
||||
}
|
||||
|
||||
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
|
||||
{
|
||||
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
|
||||
}
|
||||
|
||||
int height;
|
||||
int width;
|
||||
D val;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// BrdReplicate
|
||||
|
||||
template <typename D> struct BrdRowReplicate
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdRowReplicate(int width, U) : last_col(width - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return ::max(x, 0);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return ::min(x, last_col);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_low(idx_col_high(x));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_low(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_high(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col(x)]);
|
||||
}
|
||||
|
||||
int last_col;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColReplicate
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdColReplicate(int height, U) : last_row(height - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return ::max(y, 0);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return ::min(y, last_row);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_low(idx_row_high(y));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const T*)((const char*)data + idx_row_low(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const T*)((const char*)data + idx_row_high(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdReplicate
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
__host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return ::max(y, 0);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return ::min(y, last_row);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_low(idx_row_high(y));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return ::max(x, 0);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return ::min(x, last_col);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_low(idx_col_high(x));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
|
||||
}
|
||||
|
||||
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
|
||||
{
|
||||
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
int last_col;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// BrdReflect101
|
||||
|
||||
template <typename D> struct BrdRowReflect101
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdRowReflect101(int width, U) : last_col(width - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return ::abs(x) % (last_col + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_low(idx_col_high(x));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_low(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_high(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col(x)]);
|
||||
}
|
||||
|
||||
int last_col;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColReflect101
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdColReflect101(int height, U) : last_row(height - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return ::abs(y) % (last_row + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_low(idx_row_high(y));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdReflect101
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
__host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return ::abs(y) % (last_row + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_low(idx_row_high(y));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return ::abs(x) % (last_col + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_low(idx_col_high(x));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
|
||||
}
|
||||
|
||||
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
|
||||
{
|
||||
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
int last_col;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// BrdReflect
|
||||
|
||||
template <typename D> struct BrdRowReflect
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdRowReflect(int width, U) : last_col(width - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return (::abs(x) - (x < 0)) % (last_col + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return ::abs(last_col - ::abs(last_col - x) + (x > last_col)) % (last_col + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_high(::abs(x) - (x < 0));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_low(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_high(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col(x)]);
|
||||
}
|
||||
|
||||
int last_col;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColReflect
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdColReflect(int height, U) : last_row(height - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return (::abs(y) - (y < 0)) % (last_row + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return ::abs(last_row - ::abs(last_row - y) + (y > last_row)) % (last_row + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_high(::abs(y) - (y < 0));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdReflect
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
__host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return (::abs(y) - (y < 0)) % (last_row + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return /*::abs*/(last_row - ::abs(last_row - y) + (y > last_row)) /*% (last_row + 1)*/;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_low(idx_row_high(y));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return (::abs(x) - (x < 0)) % (last_col + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return (last_col - ::abs(last_col - x) + (x > last_col));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_low(idx_col_high(x));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
|
||||
}
|
||||
|
||||
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
|
||||
{
|
||||
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
int last_col;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// BrdWrap
|
||||
|
||||
template <typename D> struct BrdRowWrap
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdRowWrap(int width_, U) : width(width_) {}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return (x < width) * x + (x >= width) * (x % width);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_high(idx_col_low(x));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_low(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_high(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col(x)]);
|
||||
}
|
||||
|
||||
int width;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColWrap
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdColWrap(int height_, U) : height(height_) {}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return (y < height) * y + (y >= height) * (y % height);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_high(idx_row_low(y));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
|
||||
}
|
||||
|
||||
int height;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdWrap
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
__host__ __device__ __forceinline__ BrdWrap(int height_, int width_) :
|
||||
height(height_), width(width_)
|
||||
{
|
||||
}
|
||||
template <typename U>
|
||||
__host__ __device__ __forceinline__ BrdWrap(int height_, int width_, U) :
|
||||
height(height_), width(width_)
|
||||
{
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return (y >= 0) ? y : (y - ((y - height + 1) / height) * height);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return (y < height) ? y : (y % height);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_high(idx_row_low(y));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return (x >= 0) ? x : (x - ((x - width + 1) / width) * width);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return (x < width) ? x : (x % width);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_high(idx_col_low(x));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
|
||||
}
|
||||
|
||||
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
|
||||
{
|
||||
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||
}
|
||||
|
||||
int height;
|
||||
int width;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// BorderReader
|
||||
|
||||
template <typename Ptr2D, typename B> struct BorderReader
|
||||
{
|
||||
typedef typename B::result_type elem_type;
|
||||
typedef typename Ptr2D::index_type index_type;
|
||||
|
||||
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& ptr_, const B& b_) : ptr(ptr_), b(b_) {}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const
|
||||
{
|
||||
return b.at(y, x, ptr);
|
||||
}
|
||||
|
||||
Ptr2D ptr;
|
||||
B b;
|
||||
};
|
||||
|
||||
// under win32 there is some bug with templated types that passed as kernel parameters
|
||||
// with this specialization all works fine
|
||||
template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
|
||||
{
|
||||
typedef typename BrdConstant<D>::result_type elem_type;
|
||||
typedef typename Ptr2D::index_type index_type;
|
||||
|
||||
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant<D>& b) :
|
||||
src(src_), height(b.height), width(b.width), val(b.val)
|
||||
{
|
||||
}
|
||||
|
||||
__device__ __forceinline__ D operator ()(index_type y, index_type x) const
|
||||
{
|
||||
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
int height;
|
||||
int width;
|
||||
D val;
|
||||
};
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_BORDER_INTERPOLATE_HPP
|
||||
309
lib/3rdParty/OpenCV/include/opencv2/core/cuda/color.hpp
vendored
Normal file
309
lib/3rdParty/OpenCV/include/opencv2/core/cuda/color.hpp
vendored
Normal file
@@ -0,0 +1,309 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_COLOR_HPP
|
||||
#define OPENCV_CUDA_COLOR_HPP
|
||||
|
||||
#include "detail/color_detail.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
// All OPENCV_CUDA_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
|
||||
// template <typename T> class ColorSpace1_to_ColorSpace2_traits
|
||||
// {
|
||||
// typedef ... functor_type;
|
||||
// static __host__ __device__ functor_type create_functor();
|
||||
// };
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
|
||||
OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab, 3, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab, 4, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab4, 3, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab4, 4, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab, 3, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab, 4, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab4, 3, 4, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab4, 4, 4, true, 0)
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab, 3, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab, 4, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab4, 3, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab4, 4, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab, 3, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab, 4, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab4, 3, 4, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab4, 4, 4, false, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgb, 3, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgb, 4, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgba, 3, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgba, 4, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgr, 3, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgr, 4, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgra, 3, 4, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgra, 4, 4, true, 0)
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgb, 3, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgb, 4, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgba, 3, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgba, 4, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgr, 3, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgr, 4, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgra, 3, 4, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgra, 4, 4, false, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv, 3, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv, 4, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv4, 3, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv4, 4, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv, 3, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv, 4, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv4, 3, 4, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv4, 4, 4, true, 0)
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv, 3, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv, 4, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv4, 3, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv4, 4, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv, 3, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv, 4, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv4, 3, 4, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv4, 4, 4, false, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgb, 3, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgb, 4, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgba, 3, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgba, 4, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgr, 3, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgr, 4, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgra, 3, 4, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgra, 4, 4, true, 0)
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgb, 3, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgb, 4, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgba, 3, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgba, 4, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgr, 3, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgr, 4, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgra, 3, 4, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgra, 4, 4, false, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_COLOR_HPP
|
||||
109
lib/3rdParty/OpenCV/include/opencv2/core/cuda/common.hpp
vendored
Normal file
109
lib/3rdParty/OpenCV/include/opencv2/core/cuda/common.hpp
vendored
Normal file
@@ -0,0 +1,109 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_COMMON_HPP
|
||||
#define OPENCV_CUDA_COMMON_HPP
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "opencv2/core/cuda_types.hpp"
|
||||
#include "opencv2/core/cvdef.h"
|
||||
#include "opencv2/core/base.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
#ifndef CV_PI_F
|
||||
#ifndef CV_PI
|
||||
#define CV_PI_F 3.14159265f
|
||||
#else
|
||||
#define CV_PI_F ((float)CV_PI)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace cv { namespace cuda {
|
||||
static inline void checkCudaError(cudaError_t err, const char* file, const int line, const char* func)
|
||||
{
|
||||
if (cudaSuccess != err)
|
||||
cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
|
||||
}
|
||||
}}
|
||||
|
||||
#ifndef cudaSafeCall
|
||||
#define cudaSafeCall(expr) cv::cuda::checkCudaError(expr, __FILE__, __LINE__, CV_Func)
|
||||
#endif
|
||||
|
||||
namespace cv { namespace cuda
|
||||
{
|
||||
template <typename T> static inline bool isAligned(const T* ptr, size_t size)
|
||||
{
|
||||
return reinterpret_cast<size_t>(ptr) % size == 0;
|
||||
}
|
||||
|
||||
static inline bool isAligned(size_t step, size_t size)
|
||||
{
|
||||
return step % size == 0;
|
||||
}
|
||||
}}
|
||||
|
||||
namespace cv { namespace cuda
|
||||
{
|
||||
namespace device
|
||||
{
|
||||
__host__ __device__ __forceinline__ int divUp(int total, int grain)
|
||||
{
|
||||
return (total + grain - 1) / grain;
|
||||
}
|
||||
|
||||
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
|
||||
{
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||
}
|
||||
}
|
||||
}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_COMMON_HPP
|
||||
113
lib/3rdParty/OpenCV/include/opencv2/core/cuda/datamov_utils.hpp
vendored
Normal file
113
lib/3rdParty/OpenCV/include/opencv2/core/cuda/datamov_utils.hpp
vendored
Normal file
@@ -0,0 +1,113 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_DATAMOV_UTILS_HPP
|
||||
#define OPENCV_CUDA_DATAMOV_UTILS_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
|
||||
|
||||
// for Fermi memory space is detected automatically
|
||||
template <typename T> struct ForceGlob
|
||||
{
|
||||
__device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
|
||||
};
|
||||
|
||||
#else // __CUDA_ARCH__ >= 200
|
||||
|
||||
#if defined(_WIN64) || defined(__LP64__)
|
||||
// 64-bit register modifier for inlined asm
|
||||
#define OPENCV_CUDA_ASM_PTR "l"
|
||||
#else
|
||||
// 32-bit register modifier for inlined asm
|
||||
#define OPENCV_CUDA_ASM_PTR "r"
|
||||
#endif
|
||||
|
||||
template<class T> struct ForceGlob;
|
||||
|
||||
#define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
|
||||
template <> struct ForceGlob<base_type> \
|
||||
{ \
|
||||
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||
{ \
|
||||
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
|
||||
} \
|
||||
};
|
||||
|
||||
#define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
|
||||
template <> struct ForceGlob<base_type> \
|
||||
{ \
|
||||
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||
{ \
|
||||
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(uchar, u8)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(schar, s8)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(char, b8)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB (ushort, u16, h)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB (short, s16, h)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB (uint, u32, r)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB (int, s32, r)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB (float, f32, f)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB (double, f64, d)
|
||||
|
||||
#undef OPENCV_CUDA_DEFINE_FORCE_GLOB
|
||||
#undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B
|
||||
#undef OPENCV_CUDA_ASM_PTR
|
||||
|
||||
#endif // __CUDA_ARCH__ >= 200
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_DATAMOV_UTILS_HPP
|
||||
2018
lib/3rdParty/OpenCV/include/opencv2/core/cuda/detail/color_detail.hpp
vendored
Normal file
2018
lib/3rdParty/OpenCV/include/opencv2/core/cuda/detail/color_detail.hpp
vendored
Normal file
File diff suppressed because one or more lines are too long
365
lib/3rdParty/OpenCV/include/opencv2/core/cuda/detail/reduce.hpp
vendored
Normal file
365
lib/3rdParty/OpenCV/include/opencv2/core/cuda/detail/reduce.hpp
vendored
Normal file
@@ -0,0 +1,365 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_REDUCE_DETAIL_HPP
|
||||
#define OPENCV_CUDA_REDUCE_DETAIL_HPP
|
||||
|
||||
#include <thrust/tuple.h>
|
||||
#include "../warp.hpp"
|
||||
#include "../warp_shuffle.hpp"
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace reduce_detail
|
||||
{
|
||||
template <typename T> struct GetType;
|
||||
template <typename T> struct GetType<T*>
|
||||
{
|
||||
typedef T type;
|
||||
};
|
||||
template <typename T> struct GetType<volatile T*>
|
||||
{
|
||||
typedef T type;
|
||||
};
|
||||
template <typename T> struct GetType<T&>
|
||||
{
|
||||
typedef T type;
|
||||
};
|
||||
|
||||
template <unsigned int I, unsigned int N>
|
||||
struct For
|
||||
{
|
||||
template <class PointerTuple, class ValTuple>
|
||||
static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
|
||||
{
|
||||
thrust::get<I>(smem)[tid] = thrust::get<I>(val);
|
||||
|
||||
For<I + 1, N>::loadToSmem(smem, val, tid);
|
||||
}
|
||||
template <class PointerTuple, class ValTuple>
|
||||
static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
|
||||
{
|
||||
thrust::get<I>(val) = thrust::get<I>(smem)[tid];
|
||||
|
||||
For<I + 1, N>::loadFromSmem(smem, val, tid);
|
||||
}
|
||||
|
||||
template <class PointerTuple, class ValTuple, class OpTuple>
|
||||
static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op)
|
||||
{
|
||||
typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta];
|
||||
thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
|
||||
|
||||
For<I + 1, N>::merge(smem, val, tid, delta, op);
|
||||
}
|
||||
template <class ValTuple, class OpTuple>
|
||||
static __device__ void mergeShfl(const ValTuple& val, unsigned int delta, unsigned int width, const OpTuple& op)
|
||||
{
|
||||
typename GetType<typename thrust::tuple_element<I, ValTuple>::type>::type reg = shfl_down(thrust::get<I>(val), delta, width);
|
||||
thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
|
||||
|
||||
For<I + 1, N>::mergeShfl(val, delta, width, op);
|
||||
}
|
||||
};
|
||||
template <unsigned int N>
|
||||
struct For<N, N>
|
||||
{
|
||||
template <class PointerTuple, class ValTuple>
|
||||
static __device__ void loadToSmem(const PointerTuple&, const ValTuple&, unsigned int)
|
||||
{
|
||||
}
|
||||
template <class PointerTuple, class ValTuple>
|
||||
static __device__ void loadFromSmem(const PointerTuple&, const ValTuple&, unsigned int)
|
||||
{
|
||||
}
|
||||
|
||||
template <class PointerTuple, class ValTuple, class OpTuple>
|
||||
static __device__ void merge(const PointerTuple&, const ValTuple&, unsigned int, unsigned int, const OpTuple&)
|
||||
{
|
||||
}
|
||||
template <class ValTuple, class OpTuple>
|
||||
static __device__ void mergeShfl(const ValTuple&, unsigned int, unsigned int, const OpTuple&)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid)
|
||||
{
|
||||
smem[tid] = val;
|
||||
}
|
||||
template <typename T>
|
||||
__device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid)
|
||||
{
|
||||
val = smem[tid];
|
||||
}
|
||||
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
|
||||
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
|
||||
__device__ __forceinline__ void loadToSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
|
||||
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
|
||||
unsigned int tid)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid);
|
||||
}
|
||||
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
|
||||
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
|
||||
__device__ __forceinline__ void loadFromSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
|
||||
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
|
||||
unsigned int tid)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid);
|
||||
}
|
||||
|
||||
template <typename T, class Op>
|
||||
__device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op)
|
||||
{
|
||||
T reg = smem[tid + delta];
|
||||
smem[tid] = val = op(val, reg);
|
||||
}
|
||||
template <typename T, class Op>
|
||||
__device__ __forceinline__ void mergeShfl(T& val, unsigned int delta, unsigned int width, const Op& op)
|
||||
{
|
||||
T reg = shfl_down(val, delta, width);
|
||||
val = op(val, reg);
|
||||
}
|
||||
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
|
||||
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
|
||||
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
|
||||
__device__ __forceinline__ void merge(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
|
||||
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
|
||||
unsigned int tid,
|
||||
unsigned int delta,
|
||||
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op);
|
||||
}
|
||||
template <typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
|
||||
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
|
||||
__device__ __forceinline__ void mergeShfl(const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
|
||||
unsigned int delta,
|
||||
unsigned int width,
|
||||
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9> >::value>::mergeShfl(val, delta, width, op);
|
||||
}
|
||||
|
||||
template <unsigned int N> struct Generic
|
||||
{
|
||||
template <typename Pointer, typename Reference, class Op>
|
||||
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
|
||||
{
|
||||
loadToSmem(smem, val, tid);
|
||||
if (N >= 32)
|
||||
__syncthreads();
|
||||
|
||||
if (N >= 2048)
|
||||
{
|
||||
if (tid < 1024)
|
||||
merge(smem, val, tid, 1024, op);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 1024)
|
||||
{
|
||||
if (tid < 512)
|
||||
merge(smem, val, tid, 512, op);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 512)
|
||||
{
|
||||
if (tid < 256)
|
||||
merge(smem, val, tid, 256, op);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 256)
|
||||
{
|
||||
if (tid < 128)
|
||||
merge(smem, val, tid, 128, op);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 128)
|
||||
{
|
||||
if (tid < 64)
|
||||
merge(smem, val, tid, 64, op);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 64)
|
||||
{
|
||||
if (tid < 32)
|
||||
merge(smem, val, tid, 32, op);
|
||||
}
|
||||
|
||||
if (tid < 16)
|
||||
{
|
||||
merge(smem, val, tid, 16, op);
|
||||
merge(smem, val, tid, 8, op);
|
||||
merge(smem, val, tid, 4, op);
|
||||
merge(smem, val, tid, 2, op);
|
||||
merge(smem, val, tid, 1, op);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned int I, typename Pointer, typename Reference, class Op>
|
||||
struct Unroll
|
||||
{
|
||||
static __device__ void loopShfl(Reference val, Op op, unsigned int N)
|
||||
{
|
||||
mergeShfl(val, I, N, op);
|
||||
Unroll<I / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
|
||||
}
|
||||
static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op)
|
||||
{
|
||||
merge(smem, val, tid, I, op);
|
||||
Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
|
||||
}
|
||||
};
|
||||
template <typename Pointer, typename Reference, class Op>
|
||||
struct Unroll<0, Pointer, Reference, Op>
|
||||
{
|
||||
static __device__ void loopShfl(Reference, Op, unsigned int)
|
||||
{
|
||||
}
|
||||
static __device__ void loop(Pointer, Reference, unsigned int, Op)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned int N> struct WarpOptimized
|
||||
{
|
||||
template <typename Pointer, typename Reference, class Op>
|
||||
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||
CV_UNUSED(smem);
|
||||
CV_UNUSED(tid);
|
||||
|
||||
Unroll<N / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
|
||||
#else
|
||||
loadToSmem(smem, val, tid);
|
||||
|
||||
if (tid < N / 2)
|
||||
Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned int N> struct GenericOptimized32
|
||||
{
|
||||
enum { M = N / 32 };
|
||||
|
||||
template <typename Pointer, typename Reference, class Op>
|
||||
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
|
||||
{
|
||||
const unsigned int laneId = Warp::laneId();
|
||||
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||
Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize);
|
||||
|
||||
if (laneId == 0)
|
||||
loadToSmem(smem, val, tid / 32);
|
||||
#else
|
||||
loadToSmem(smem, val, tid);
|
||||
|
||||
if (laneId < 16)
|
||||
Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (laneId == 0)
|
||||
loadToSmem(smem, val, tid / 32);
|
||||
#endif
|
||||
|
||||
__syncthreads();
|
||||
|
||||
loadFromSmem(smem, val, tid);
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||
Unroll<M / 2, Pointer, Reference, Op>::loopShfl(val, op, M);
|
||||
#else
|
||||
Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <bool val, class T1, class T2> struct StaticIf;
|
||||
template <class T1, class T2> struct StaticIf<true, T1, T2>
|
||||
{
|
||||
typedef T1 type;
|
||||
};
|
||||
template <class T1, class T2> struct StaticIf<false, T1, T2>
|
||||
{
|
||||
typedef T2 type;
|
||||
};
|
||||
|
||||
template <unsigned int N> struct IsPowerOf2
|
||||
{
|
||||
enum { value = ((N != 0) && !(N & (N - 1))) };
|
||||
};
|
||||
|
||||
template <unsigned int N> struct Dispatcher
|
||||
{
|
||||
typedef typename StaticIf<
|
||||
(N <= 32) && IsPowerOf2<N>::value,
|
||||
WarpOptimized<N>,
|
||||
typename StaticIf<
|
||||
(N <= 1024) && IsPowerOf2<N>::value,
|
||||
GenericOptimized32<N>,
|
||||
Generic<N>
|
||||
>::type
|
||||
>::type reductor;
|
||||
};
|
||||
}
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_REDUCE_DETAIL_HPP
|
||||
502
lib/3rdParty/OpenCV/include/opencv2/core/cuda/detail/reduce_key_val.hpp
vendored
Normal file
502
lib/3rdParty/OpenCV/include/opencv2/core/cuda/detail/reduce_key_val.hpp
vendored
Normal file
@@ -0,0 +1,502 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
|
||||
#define OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
|
||||
|
||||
#include <thrust/tuple.h>
|
||||
#include "../warp.hpp"
|
||||
#include "../warp_shuffle.hpp"
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace reduce_key_val_detail
|
||||
{
|
||||
template <typename T> struct GetType;
|
||||
template <typename T> struct GetType<T*>
|
||||
{
|
||||
typedef T type;
|
||||
};
|
||||
template <typename T> struct GetType<volatile T*>
|
||||
{
|
||||
typedef T type;
|
||||
};
|
||||
template <typename T> struct GetType<T&>
|
||||
{
|
||||
typedef T type;
|
||||
};
|
||||
|
||||
template <unsigned int I, unsigned int N>
|
||||
struct For
|
||||
{
|
||||
template <class PointerTuple, class ReferenceTuple>
|
||||
static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
|
||||
{
|
||||
thrust::get<I>(smem)[tid] = thrust::get<I>(data);
|
||||
|
||||
For<I + 1, N>::loadToSmem(smem, data, tid);
|
||||
}
|
||||
template <class PointerTuple, class ReferenceTuple>
|
||||
static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
|
||||
{
|
||||
thrust::get<I>(data) = thrust::get<I>(smem)[tid];
|
||||
|
||||
For<I + 1, N>::loadFromSmem(smem, data, tid);
|
||||
}
|
||||
|
||||
template <class ReferenceTuple>
|
||||
static __device__ void copyShfl(const ReferenceTuple& val, unsigned int delta, int width)
|
||||
{
|
||||
thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
|
||||
|
||||
For<I + 1, N>::copyShfl(val, delta, width);
|
||||
}
|
||||
template <class PointerTuple, class ReferenceTuple>
|
||||
static __device__ void copy(const PointerTuple& svals, const ReferenceTuple& val, unsigned int tid, unsigned int delta)
|
||||
{
|
||||
thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
|
||||
|
||||
For<I + 1, N>::copy(svals, val, tid, delta);
|
||||
}
|
||||
|
||||
template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
|
||||
static __device__ void mergeShfl(const KeyReferenceTuple& key, const ValReferenceTuple& val, const CmpTuple& cmp, unsigned int delta, int width)
|
||||
{
|
||||
typename GetType<typename thrust::tuple_element<I, KeyReferenceTuple>::type>::type reg = shfl_down(thrust::get<I>(key), delta, width);
|
||||
|
||||
if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
|
||||
{
|
||||
thrust::get<I>(key) = reg;
|
||||
thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
|
||||
}
|
||||
|
||||
For<I + 1, N>::mergeShfl(key, val, cmp, delta, width);
|
||||
}
|
||||
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
|
||||
static __device__ void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key,
|
||||
const ValPointerTuple& svals, const ValReferenceTuple& val,
|
||||
const CmpTuple& cmp,
|
||||
unsigned int tid, unsigned int delta)
|
||||
{
|
||||
typename GetType<typename thrust::tuple_element<I, KeyPointerTuple>::type>::type reg = thrust::get<I>(skeys)[tid + delta];
|
||||
|
||||
if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
|
||||
{
|
||||
thrust::get<I>(skeys)[tid] = thrust::get<I>(key) = reg;
|
||||
thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
|
||||
}
|
||||
|
||||
For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta);
|
||||
}
|
||||
};
|
||||
template <unsigned int N>
|
||||
struct For<N, N>
|
||||
{
|
||||
template <class PointerTuple, class ReferenceTuple>
|
||||
static __device__ void loadToSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
|
||||
{
|
||||
}
|
||||
template <class PointerTuple, class ReferenceTuple>
|
||||
static __device__ void loadFromSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
|
||||
{
|
||||
}
|
||||
|
||||
template <class ReferenceTuple>
|
||||
static __device__ void copyShfl(const ReferenceTuple&, unsigned int, int)
|
||||
{
|
||||
}
|
||||
template <class PointerTuple, class ReferenceTuple>
|
||||
static __device__ void copy(const PointerTuple&, const ReferenceTuple&, unsigned int, unsigned int)
|
||||
{
|
||||
}
|
||||
|
||||
template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
|
||||
static __device__ void mergeShfl(const KeyReferenceTuple&, const ValReferenceTuple&, const CmpTuple&, unsigned int, int)
|
||||
{
|
||||
}
|
||||
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
|
||||
static __device__ void merge(const KeyPointerTuple&, const KeyReferenceTuple&,
|
||||
const ValPointerTuple&, const ValReferenceTuple&,
|
||||
const CmpTuple&,
|
||||
unsigned int, unsigned int)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// loadToSmem
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid)
|
||||
{
|
||||
smem[tid] = data;
|
||||
}
|
||||
template <typename T>
|
||||
__device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid)
|
||||
{
|
||||
data = smem[tid];
|
||||
}
|
||||
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
|
||||
__device__ __forceinline__ void loadToSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
|
||||
unsigned int tid)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
|
||||
}
|
||||
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
|
||||
__device__ __forceinline__ void loadFromSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
|
||||
unsigned int tid)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// copyVals
|
||||
|
||||
template <typename V>
|
||||
__device__ __forceinline__ void copyValsShfl(V& val, unsigned int delta, int width)
|
||||
{
|
||||
val = shfl_down(val, delta, width);
|
||||
}
|
||||
template <typename V>
|
||||
__device__ __forceinline__ void copyVals(volatile V* svals, V& val, unsigned int tid, unsigned int delta)
|
||||
{
|
||||
svals[tid] = val = svals[tid + delta];
|
||||
}
|
||||
template <typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
|
||||
__device__ __forceinline__ void copyValsShfl(const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
unsigned int delta,
|
||||
int width)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9> >::value>::copyShfl(val, delta, width);
|
||||
}
|
||||
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
|
||||
__device__ __forceinline__ void copyVals(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
unsigned int tid, unsigned int delta)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// merge
|
||||
|
||||
template <typename K, typename V, class Cmp>
|
||||
__device__ __forceinline__ void mergeShfl(K& key, V& val, const Cmp& cmp, unsigned int delta, int width)
|
||||
{
|
||||
K reg = shfl_down(key, delta, width);
|
||||
|
||||
if (cmp(reg, key))
|
||||
{
|
||||
key = reg;
|
||||
copyValsShfl(val, delta, width);
|
||||
}
|
||||
}
|
||||
template <typename K, typename V, class Cmp>
|
||||
__device__ __forceinline__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, unsigned int tid, unsigned int delta)
|
||||
{
|
||||
K reg = skeys[tid + delta];
|
||||
|
||||
if (cmp(reg, key))
|
||||
{
|
||||
skeys[tid] = key = reg;
|
||||
copyVals(svals, val, tid, delta);
|
||||
}
|
||||
}
|
||||
template <typename K,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||
class Cmp>
|
||||
__device__ __forceinline__ void mergeShfl(K& key,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
const Cmp& cmp,
|
||||
unsigned int delta, int width)
|
||||
{
|
||||
K reg = shfl_down(key, delta, width);
|
||||
|
||||
if (cmp(reg, key))
|
||||
{
|
||||
key = reg;
|
||||
copyValsShfl(val, delta, width);
|
||||
}
|
||||
}
|
||||
template <typename K,
|
||||
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||
class Cmp>
|
||||
__device__ __forceinline__ void merge(volatile K* skeys, K& key,
|
||||
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
const Cmp& cmp, unsigned int tid, unsigned int delta)
|
||||
{
|
||||
K reg = skeys[tid + delta];
|
||||
|
||||
if (cmp(reg, key))
|
||||
{
|
||||
skeys[tid] = key = reg;
|
||||
copyVals(svals, val, tid, delta);
|
||||
}
|
||||
}
|
||||
template <typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
|
||||
__device__ __forceinline__ void mergeShfl(const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
|
||||
unsigned int delta, int width)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9> >::value>::mergeShfl(key, val, cmp, delta, width);
|
||||
}
|
||||
template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
|
||||
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
|
||||
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
|
||||
__device__ __forceinline__ void merge(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
|
||||
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
|
||||
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
|
||||
unsigned int tid, unsigned int delta)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Generic
|
||||
|
||||
template <unsigned int N> struct Generic
|
||||
{
|
||||
template <class KP, class KR, class VP, class VR, class Cmp>
|
||||
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
|
||||
{
|
||||
loadToSmem(skeys, key, tid);
|
||||
loadValsToSmem(svals, val, tid);
|
||||
if (N >= 32)
|
||||
__syncthreads();
|
||||
|
||||
if (N >= 2048)
|
||||
{
|
||||
if (tid < 1024)
|
||||
merge(skeys, key, svals, val, cmp, tid, 1024);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 1024)
|
||||
{
|
||||
if (tid < 512)
|
||||
merge(skeys, key, svals, val, cmp, tid, 512);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 512)
|
||||
{
|
||||
if (tid < 256)
|
||||
merge(skeys, key, svals, val, cmp, tid, 256);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 256)
|
||||
{
|
||||
if (tid < 128)
|
||||
merge(skeys, key, svals, val, cmp, tid, 128);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 128)
|
||||
{
|
||||
if (tid < 64)
|
||||
merge(skeys, key, svals, val, cmp, tid, 64);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 64)
|
||||
{
|
||||
if (tid < 32)
|
||||
merge(skeys, key, svals, val, cmp, tid, 32);
|
||||
}
|
||||
|
||||
if (tid < 16)
|
||||
{
|
||||
merge(skeys, key, svals, val, cmp, tid, 16);
|
||||
merge(skeys, key, svals, val, cmp, tid, 8);
|
||||
merge(skeys, key, svals, val, cmp, tid, 4);
|
||||
merge(skeys, key, svals, val, cmp, tid, 2);
|
||||
merge(skeys, key, svals, val, cmp, tid, 1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned int I, class KP, class KR, class VP, class VR, class Cmp>
|
||||
struct Unroll
|
||||
{
|
||||
static __device__ void loopShfl(KR key, VR val, Cmp cmp, unsigned int N)
|
||||
{
|
||||
mergeShfl(key, val, cmp, I, N);
|
||||
Unroll<I / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
|
||||
}
|
||||
static __device__ void loop(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
|
||||
{
|
||||
merge(skeys, key, svals, val, cmp, tid, I);
|
||||
Unroll<I / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
|
||||
}
|
||||
};
|
||||
template <class KP, class KR, class VP, class VR, class Cmp>
|
||||
struct Unroll<0, KP, KR, VP, VR, Cmp>
|
||||
{
|
||||
static __device__ void loopShfl(KR, VR, Cmp, unsigned int)
|
||||
{
|
||||
}
|
||||
static __device__ void loop(KP, KR, VP, VR, unsigned int, Cmp)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned int N> struct WarpOptimized
|
||||
{
|
||||
template <class KP, class KR, class VP, class VR, class Cmp>
|
||||
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
|
||||
{
|
||||
#if 0 // __CUDA_ARCH__ >= 300
|
||||
CV_UNUSED(skeys);
|
||||
CV_UNUSED(svals);
|
||||
CV_UNUSED(tid);
|
||||
|
||||
Unroll<N / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
|
||||
#else
|
||||
loadToSmem(skeys, key, tid);
|
||||
loadToSmem(svals, val, tid);
|
||||
|
||||
if (tid < N / 2)
|
||||
Unroll<N / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned int N> struct GenericOptimized32
|
||||
{
|
||||
enum { M = N / 32 };
|
||||
|
||||
template <class KP, class KR, class VP, class VR, class Cmp>
|
||||
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
|
||||
{
|
||||
const unsigned int laneId = Warp::laneId();
|
||||
|
||||
#if 0 // __CUDA_ARCH__ >= 300
|
||||
Unroll<16, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, warpSize);
|
||||
|
||||
if (laneId == 0)
|
||||
{
|
||||
loadToSmem(skeys, key, tid / 32);
|
||||
loadToSmem(svals, val, tid / 32);
|
||||
}
|
||||
#else
|
||||
loadToSmem(skeys, key, tid);
|
||||
loadToSmem(svals, val, tid);
|
||||
|
||||
if (laneId < 16)
|
||||
Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (laneId == 0)
|
||||
{
|
||||
loadToSmem(skeys, key, tid / 32);
|
||||
loadToSmem(svals, val, tid / 32);
|
||||
}
|
||||
#endif
|
||||
|
||||
__syncthreads();
|
||||
|
||||
loadFromSmem(skeys, key, tid);
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
#if 0 // __CUDA_ARCH__ >= 300
|
||||
loadFromSmem(svals, val, tid);
|
||||
|
||||
Unroll<M / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, M);
|
||||
#else
|
||||
Unroll<M / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <bool val, class T1, class T2> struct StaticIf;
|
||||
template <class T1, class T2> struct StaticIf<true, T1, T2>
|
||||
{
|
||||
typedef T1 type;
|
||||
};
|
||||
template <class T1, class T2> struct StaticIf<false, T1, T2>
|
||||
{
|
||||
typedef T2 type;
|
||||
};
|
||||
|
||||
template <unsigned int N> struct IsPowerOf2
|
||||
{
|
||||
enum { value = ((N != 0) && !(N & (N - 1))) };
|
||||
};
|
||||
|
||||
template <unsigned int N> struct Dispatcher
|
||||
{
|
||||
typedef typename StaticIf<
|
||||
(N <= 32) && IsPowerOf2<N>::value,
|
||||
WarpOptimized<N>,
|
||||
typename StaticIf<
|
||||
(N <= 1024) && IsPowerOf2<N>::value,
|
||||
GenericOptimized32<N>,
|
||||
Generic<N>
|
||||
>::type
|
||||
>::type reductor;
|
||||
};
|
||||
}
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
|
||||
392
lib/3rdParty/OpenCV/include/opencv2/core/cuda/detail/transform_detail.hpp
vendored
Normal file
392
lib/3rdParty/OpenCV/include/opencv2/core/cuda/detail/transform_detail.hpp
vendored
Normal file
@@ -0,0 +1,392 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_TRANSFORM_DETAIL_HPP
|
||||
#define OPENCV_CUDA_TRANSFORM_DETAIL_HPP
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../vec_traits.hpp"
|
||||
#include "../functional.hpp"
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace transform_detail
|
||||
{
|
||||
//! Read Write Traits
|
||||
|
||||
template <typename T, typename D, int shift> struct UnaryReadWriteTraits
|
||||
{
|
||||
typedef typename TypeVec<T, shift>::vec_type read_type;
|
||||
typedef typename TypeVec<D, shift>::vec_type write_type;
|
||||
};
|
||||
|
||||
template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
|
||||
{
|
||||
typedef typename TypeVec<T1, shift>::vec_type read_type1;
|
||||
typedef typename TypeVec<T2, shift>::vec_type read_type2;
|
||||
typedef typename TypeVec<D, shift>::vec_type write_type;
|
||||
};
|
||||
|
||||
//! Transform kernels
|
||||
|
||||
template <int shift> struct OpUnroller;
|
||||
template <> struct OpUnroller<1>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<2>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<3>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src.z);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src1.z, src2.z);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<4>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src.z);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.w = op(src.w);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src1.z, src2.z);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.w = op(src1.w, src2.w);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<8>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.a0 = op(src.a0);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.a1 = op(src.a1);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.a2 = op(src.a2);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.a3 = op(src.a3);
|
||||
if (mask(y, x_shifted + 4))
|
||||
dst.a4 = op(src.a4);
|
||||
if (mask(y, x_shifted + 5))
|
||||
dst.a5 = op(src.a5);
|
||||
if (mask(y, x_shifted + 6))
|
||||
dst.a6 = op(src.a6);
|
||||
if (mask(y, x_shifted + 7))
|
||||
dst.a7 = op(src.a7);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.a0 = op(src1.a0, src2.a0);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.a1 = op(src1.a1, src2.a1);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.a2 = op(src1.a2, src2.a2);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.a3 = op(src1.a3, src2.a3);
|
||||
if (mask(y, x_shifted + 4))
|
||||
dst.a4 = op(src1.a4, src2.a4);
|
||||
if (mask(y, x_shifted + 5))
|
||||
dst.a5 = op(src1.a5, src2.a5);
|
||||
if (mask(y, x_shifted + 6))
|
||||
dst.a6 = op(src1.a6, src2.a6);
|
||||
if (mask(y, x_shifted + 7))
|
||||
dst.a7 = op(src1.a7, src2.a7);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
|
||||
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * ft::smart_shift;
|
||||
|
||||
if (y < src_.rows)
|
||||
{
|
||||
const T* src = src_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + ft::smart_shift - 1 < src_.cols)
|
||||
{
|
||||
const read_type src_n_el = ((const read_type*)src)[x];
|
||||
OpUnroller<ft::smart_shift>::unroll(src_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
__global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < src.cols && y < src.rows && mask(y, x))
|
||||
{
|
||||
dst.ptr(y)[x] = op(src.ptr(y)[x]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
|
||||
const Mask mask, const BinOp op)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
|
||||
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
|
||||
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * ft::smart_shift;
|
||||
|
||||
if (y < src1_.rows)
|
||||
{
|
||||
const T1* src1 = src1_.ptr(y);
|
||||
const T2* src2 = src2_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + ft::smart_shift - 1 < src1_.cols)
|
||||
{
|
||||
const read_type1 src1_n_el = ((const read_type1*)src1)[x];
|
||||
const read_type2 src2_n_el = ((const read_type2*)src2)[x];
|
||||
|
||||
OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src1[real_x], src2[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
|
||||
const Mask mask, const BinOp op)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < src1.cols && y < src1.rows && mask(y, x))
|
||||
{
|
||||
const T1 src1_data = src1.ptr(y)[x];
|
||||
const T2 src2_data = src2.ptr(y)[x];
|
||||
dst.ptr(y)[x] = op(src1_data, src2_data);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool UseSmart> struct TransformDispatcher;
|
||||
template<> struct TransformDispatcher<false>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
|
||||
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
|
||||
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<> struct TransformDispatcher<true>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
|
||||
CV_StaticAssert(ft::smart_shift != 1, "");
|
||||
|
||||
if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
|
||||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
|
||||
{
|
||||
TransformDispatcher<false>::call(src, dst, op, mask, stream);
|
||||
return;
|
||||
}
|
||||
|
||||
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
|
||||
CV_StaticAssert(ft::smart_shift != 1, "");
|
||||
|
||||
if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
|
||||
!isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
|
||||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
|
||||
{
|
||||
TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
|
||||
return;
|
||||
}
|
||||
|
||||
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
} // namespace transform_detail
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_TRANSFORM_DETAIL_HPP
|
||||
191
lib/3rdParty/OpenCV/include/opencv2/core/cuda/detail/type_traits_detail.hpp
vendored
Normal file
191
lib/3rdParty/OpenCV/include/opencv2/core/cuda/detail/type_traits_detail.hpp
vendored
Normal file
@@ -0,0 +1,191 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
|
||||
#define OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../vec_traits.hpp"
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace type_traits_detail
|
||||
{
|
||||
template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
|
||||
template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
|
||||
|
||||
template <typename T> struct IsSignedIntergral { enum {value = 0}; };
|
||||
template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<char1> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<short> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<short1> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<int> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<int1> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
|
||||
template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<uchar1> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<ushort1> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<uint1> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
|
||||
template <> struct IsIntegral<char> { enum {value = 1}; };
|
||||
template <> struct IsIntegral<bool> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsFloat { enum {value = 0}; };
|
||||
template <> struct IsFloat<float> { enum {value = 1}; };
|
||||
template <> struct IsFloat<double> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsVec { enum {value = 0}; };
|
||||
template <> struct IsVec<uchar1> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar2> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar3> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar4> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar8> { enum {value = 1}; };
|
||||
template <> struct IsVec<char1> { enum {value = 1}; };
|
||||
template <> struct IsVec<char2> { enum {value = 1}; };
|
||||
template <> struct IsVec<char3> { enum {value = 1}; };
|
||||
template <> struct IsVec<char4> { enum {value = 1}; };
|
||||
template <> struct IsVec<char8> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort1> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort2> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort3> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort4> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort8> { enum {value = 1}; };
|
||||
template <> struct IsVec<short1> { enum {value = 1}; };
|
||||
template <> struct IsVec<short2> { enum {value = 1}; };
|
||||
template <> struct IsVec<short3> { enum {value = 1}; };
|
||||
template <> struct IsVec<short4> { enum {value = 1}; };
|
||||
template <> struct IsVec<short8> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint1> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint2> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint3> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint4> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint8> { enum {value = 1}; };
|
||||
template <> struct IsVec<int1> { enum {value = 1}; };
|
||||
template <> struct IsVec<int2> { enum {value = 1}; };
|
||||
template <> struct IsVec<int3> { enum {value = 1}; };
|
||||
template <> struct IsVec<int4> { enum {value = 1}; };
|
||||
template <> struct IsVec<int8> { enum {value = 1}; };
|
||||
template <> struct IsVec<float1> { enum {value = 1}; };
|
||||
template <> struct IsVec<float2> { enum {value = 1}; };
|
||||
template <> struct IsVec<float3> { enum {value = 1}; };
|
||||
template <> struct IsVec<float4> { enum {value = 1}; };
|
||||
template <> struct IsVec<float8> { enum {value = 1}; };
|
||||
template <> struct IsVec<double1> { enum {value = 1}; };
|
||||
template <> struct IsVec<double2> { enum {value = 1}; };
|
||||
template <> struct IsVec<double3> { enum {value = 1}; };
|
||||
template <> struct IsVec<double4> { enum {value = 1}; };
|
||||
template <> struct IsVec<double8> { enum {value = 1}; };
|
||||
|
||||
template <class U> struct AddParameterType { typedef const U& type; };
|
||||
template <class U> struct AddParameterType<U&> { typedef U& type; };
|
||||
template <> struct AddParameterType<void> { typedef void type; };
|
||||
|
||||
template <class U> struct ReferenceTraits
|
||||
{
|
||||
enum { value = false };
|
||||
typedef U type;
|
||||
};
|
||||
template <class U> struct ReferenceTraits<U&>
|
||||
{
|
||||
enum { value = true };
|
||||
typedef U type;
|
||||
};
|
||||
|
||||
template <class U> struct PointerTraits
|
||||
{
|
||||
enum { value = false };
|
||||
typedef void type;
|
||||
};
|
||||
template <class U> struct PointerTraits<U*>
|
||||
{
|
||||
enum { value = true };
|
||||
typedef U type;
|
||||
};
|
||||
template <class U> struct PointerTraits<U*&>
|
||||
{
|
||||
enum { value = true };
|
||||
typedef U type;
|
||||
};
|
||||
|
||||
template <class U> struct UnConst
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 0 };
|
||||
};
|
||||
template <class U> struct UnConst<const U>
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
template <class U> struct UnConst<const U&>
|
||||
{
|
||||
typedef U& type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
|
||||
template <class U> struct UnVolatile
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 0 };
|
||||
};
|
||||
template <class U> struct UnVolatile<volatile U>
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
template <class U> struct UnVolatile<volatile U&>
|
||||
{
|
||||
typedef U& type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
} // namespace type_traits_detail
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
|
||||
121
lib/3rdParty/OpenCV/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
vendored
Normal file
121
lib/3rdParty/OpenCV/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
vendored
Normal file
@@ -0,0 +1,121 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
|
||||
#define OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
|
||||
|
||||
#include "../datamov_utils.hpp"
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace vec_distance_detail
|
||||
{
|
||||
template <int THREAD_DIM, int N> struct UnrollVecDiffCached
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind)
|
||||
{
|
||||
if (ind < len)
|
||||
{
|
||||
T1 val1 = *vecCached++;
|
||||
|
||||
T2 val2;
|
||||
ForceGlob<T2>::Load(vecGlob, ind, val2);
|
||||
|
||||
dist.reduceIter(val1, val2);
|
||||
|
||||
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist)
|
||||
{
|
||||
T1 val1 = *vecCached++;
|
||||
|
||||
T2 val2;
|
||||
ForceGlob<T2>::Load(vecGlob, 0, val2);
|
||||
vecGlob += THREAD_DIM;
|
||||
|
||||
dist.reduceIter(val1, val2);
|
||||
|
||||
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist);
|
||||
}
|
||||
};
|
||||
template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0>
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator;
|
||||
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false>
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
|
||||
{
|
||||
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid);
|
||||
}
|
||||
};
|
||||
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true>
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
|
||||
{
|
||||
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
|
||||
}
|
||||
};
|
||||
} // namespace vec_distance_detail
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
|
||||
88
lib/3rdParty/OpenCV/include/opencv2/core/cuda/dynamic_smem.hpp
vendored
Normal file
88
lib/3rdParty/OpenCV/include/opencv2/core/cuda/dynamic_smem.hpp
vendored
Normal file
@@ -0,0 +1,88 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_DYNAMIC_SMEM_HPP
|
||||
#define OPENCV_CUDA_DYNAMIC_SMEM_HPP
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template<class T> struct DynamicSharedMem
|
||||
{
|
||||
__device__ __forceinline__ operator T*()
|
||||
{
|
||||
extern __shared__ int __smem[];
|
||||
return (T*)__smem;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator const T*() const
|
||||
{
|
||||
extern __shared__ int __smem[];
|
||||
return (T*)__smem;
|
||||
}
|
||||
};
|
||||
|
||||
// specialize for double to avoid unaligned memory access compile errors
|
||||
template<> struct DynamicSharedMem<double>
|
||||
{
|
||||
__device__ __forceinline__ operator double*()
|
||||
{
|
||||
extern __shared__ double __smem_d[];
|
||||
return (double*)__smem_d;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator const double*() const
|
||||
{
|
||||
extern __shared__ double __smem_d[];
|
||||
return (double*)__smem_d;
|
||||
}
|
||||
};
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_DYNAMIC_SMEM_HPP
|
||||
269
lib/3rdParty/OpenCV/include/opencv2/core/cuda/emulation.hpp
vendored
Normal file
269
lib/3rdParty/OpenCV/include/opencv2/core/cuda/emulation.hpp
vendored
Normal file
@@ -0,0 +1,269 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_EMULATION_HPP_
|
||||
#define OPENCV_CUDA_EMULATION_HPP_
|
||||
|
||||
#include "common.hpp"
|
||||
#include "warp_reduce.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
struct Emulation
|
||||
{
|
||||
|
||||
static __device__ __forceinline__ int syncthreadsOr(int pred)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
|
||||
// just campilation stab
|
||||
return 0;
|
||||
#else
|
||||
return __syncthreads_or(pred);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int CTA_SIZE>
|
||||
static __forceinline__ __device__ int Ballot(int predicate)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
|
||||
return __ballot(predicate);
|
||||
#else
|
||||
__shared__ volatile int cta_buffer[CTA_SIZE];
|
||||
|
||||
int tid = threadIdx.x;
|
||||
cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
|
||||
return warp_reduce(cta_buffer);
|
||||
#endif
|
||||
}
|
||||
|
||||
struct smem
|
||||
{
|
||||
enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
|
||||
|
||||
template<typename T>
|
||||
static __device__ __forceinline__ T atomicInc(T* address, T val)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
|
||||
T count;
|
||||
unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
|
||||
do
|
||||
{
|
||||
count = *address & TAG_MASK;
|
||||
count = tag | (count + 1);
|
||||
*address = count;
|
||||
} while (*address != count);
|
||||
|
||||
return (count & TAG_MASK) - 1;
|
||||
#else
|
||||
return ::atomicInc(address, val);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static __device__ __forceinline__ T atomicAdd(T* address, T val)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
|
||||
T count;
|
||||
unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
|
||||
do
|
||||
{
|
||||
count = *address & TAG_MASK;
|
||||
count = tag | (count + val);
|
||||
*address = count;
|
||||
} while (*address != count);
|
||||
|
||||
return (count & TAG_MASK) - val;
|
||||
#else
|
||||
return ::atomicAdd(address, val);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static __device__ __forceinline__ T atomicMin(T* address, T val)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
|
||||
T count = ::min(*address, val);
|
||||
do
|
||||
{
|
||||
*address = count;
|
||||
} while (*address > count);
|
||||
|
||||
return count;
|
||||
#else
|
||||
return ::atomicMin(address, val);
|
||||
#endif
|
||||
}
|
||||
}; // struct cmem
|
||||
|
||||
struct glob
|
||||
{
|
||||
static __device__ __forceinline__ int atomicAdd(int* address, int val)
|
||||
{
|
||||
return ::atomicAdd(address, val);
|
||||
}
|
||||
static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val)
|
||||
{
|
||||
return ::atomicAdd(address, val);
|
||||
}
|
||||
static __device__ __forceinline__ float atomicAdd(float* address, float val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 200
|
||||
return ::atomicAdd(address, val);
|
||||
#else
|
||||
int* address_as_i = (int*) address;
|
||||
int old = *address_as_i, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_i, assumed,
|
||||
__float_as_int(val + __int_as_float(assumed)));
|
||||
} while (assumed != old);
|
||||
return __int_as_float(old);
|
||||
#endif
|
||||
}
|
||||
static __device__ __forceinline__ double atomicAdd(double* address, double val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 130
|
||||
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
||||
unsigned long long int old = *address_as_ull, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_ull, assumed,
|
||||
__double_as_longlong(val + __longlong_as_double(assumed)));
|
||||
} while (assumed != old);
|
||||
return __longlong_as_double(old);
|
||||
#else
|
||||
CV_UNUSED(address);
|
||||
CV_UNUSED(val);
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ int atomicMin(int* address, int val)
|
||||
{
|
||||
return ::atomicMin(address, val);
|
||||
}
|
||||
static __device__ __forceinline__ float atomicMin(float* address, float val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 120
|
||||
int* address_as_i = (int*) address;
|
||||
int old = *address_as_i, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_i, assumed,
|
||||
__float_as_int(::fminf(val, __int_as_float(assumed))));
|
||||
} while (assumed != old);
|
||||
return __int_as_float(old);
|
||||
#else
|
||||
CV_UNUSED(address);
|
||||
CV_UNUSED(val);
|
||||
return 0.0f;
|
||||
#endif
|
||||
}
|
||||
static __device__ __forceinline__ double atomicMin(double* address, double val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 130
|
||||
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
||||
unsigned long long int old = *address_as_ull, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_ull, assumed,
|
||||
__double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
|
||||
} while (assumed != old);
|
||||
return __longlong_as_double(old);
|
||||
#else
|
||||
CV_UNUSED(address);
|
||||
CV_UNUSED(val);
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ int atomicMax(int* address, int val)
|
||||
{
|
||||
return ::atomicMax(address, val);
|
||||
}
|
||||
static __device__ __forceinline__ float atomicMax(float* address, float val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 120
|
||||
int* address_as_i = (int*) address;
|
||||
int old = *address_as_i, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_i, assumed,
|
||||
__float_as_int(::fmaxf(val, __int_as_float(assumed))));
|
||||
} while (assumed != old);
|
||||
return __int_as_float(old);
|
||||
#else
|
||||
CV_UNUSED(address);
|
||||
CV_UNUSED(val);
|
||||
return 0.0f;
|
||||
#endif
|
||||
}
|
||||
static __device__ __forceinline__ double atomicMax(double* address, double val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 130
|
||||
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
||||
unsigned long long int old = *address_as_ull, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_ull, assumed,
|
||||
__double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
|
||||
} while (assumed != old);
|
||||
return __longlong_as_double(old);
|
||||
#else
|
||||
CV_UNUSED(address);
|
||||
CV_UNUSED(val);
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
}; //struct Emulation
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif /* OPENCV_CUDA_EMULATION_HPP_ */
|
||||
286
lib/3rdParty/OpenCV/include/opencv2/core/cuda/filters.hpp
vendored
Normal file
286
lib/3rdParty/OpenCV/include/opencv2/core/cuda/filters.hpp
vendored
Normal file
@@ -0,0 +1,286 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_FILTERS_HPP
|
||||
#define OPENCV_CUDA_FILTERS_HPP
|
||||
|
||||
#include "saturate_cast.hpp"
|
||||
#include "vec_traits.hpp"
|
||||
#include "vec_math.hpp"
|
||||
#include "type_traits.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <typename Ptr2D> struct PointFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
|
||||
: src(src_)
|
||||
{
|
||||
CV_UNUSED(fx);
|
||||
CV_UNUSED(fy);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
return src(__float2int_rz(y), __float2int_rz(x));
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
};
|
||||
|
||||
template <typename Ptr2D> struct LinearFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
|
||||
: src(src_)
|
||||
{
|
||||
CV_UNUSED(fx);
|
||||
CV_UNUSED(fy);
|
||||
}
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
|
||||
work_type out = VecTraits<work_type>::all(0);
|
||||
|
||||
const int x1 = __float2int_rd(x);
|
||||
const int y1 = __float2int_rd(y);
|
||||
const int x2 = x1 + 1;
|
||||
const int y2 = y1 + 1;
|
||||
|
||||
elem_type src_reg = src(y1, x1);
|
||||
out = out + src_reg * ((x2 - x) * (y2 - y));
|
||||
|
||||
src_reg = src(y1, x2);
|
||||
out = out + src_reg * ((x - x1) * (y2 - y));
|
||||
|
||||
src_reg = src(y2, x1);
|
||||
out = out + src_reg * ((x2 - x) * (y - y1));
|
||||
|
||||
src_reg = src(y2, x2);
|
||||
out = out + src_reg * ((x - x1) * (y - y1));
|
||||
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
};
|
||||
|
||||
template <typename Ptr2D> struct CubicFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
|
||||
: src(src_)
|
||||
{
|
||||
CV_UNUSED(fx);
|
||||
CV_UNUSED(fy);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ float bicubicCoeff(float x_)
|
||||
{
|
||||
float x = fabsf(x_);
|
||||
if (x <= 1.0f)
|
||||
{
|
||||
return x * x * (1.5f * x - 2.5f) + 1.0f;
|
||||
}
|
||||
else if (x < 2.0f)
|
||||
{
|
||||
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
const float xmin = ::ceilf(x - 2.0f);
|
||||
const float xmax = ::floorf(x + 2.0f);
|
||||
|
||||
const float ymin = ::ceilf(y - 2.0f);
|
||||
const float ymax = ::floorf(y + 2.0f);
|
||||
|
||||
work_type sum = VecTraits<work_type>::all(0);
|
||||
float wsum = 0.0f;
|
||||
|
||||
for (float cy = ymin; cy <= ymax; cy += 1.0f)
|
||||
{
|
||||
for (float cx = xmin; cx <= xmax; cx += 1.0f)
|
||||
{
|
||||
const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
|
||||
sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx));
|
||||
wsum += w;
|
||||
}
|
||||
}
|
||||
|
||||
work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum;
|
||||
|
||||
return saturate_cast<elem_type>(res);
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
};
|
||||
// for integer scaling
|
||||
template <typename Ptr2D> struct IntegerAreaFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
|
||||
: src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
float fsx1 = x * scale_x;
|
||||
float fsx2 = fsx1 + scale_x;
|
||||
|
||||
int sx1 = __float2int_ru(fsx1);
|
||||
int sx2 = __float2int_rd(fsx2);
|
||||
|
||||
float fsy1 = y * scale_y;
|
||||
float fsy2 = fsy1 + scale_y;
|
||||
|
||||
int sy1 = __float2int_ru(fsy1);
|
||||
int sy2 = __float2int_rd(fsy2);
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
work_type out = VecTraits<work_type>::all(0.f);
|
||||
|
||||
for(int dy = sy1; dy < sy2; ++dy)
|
||||
for(int dx = sx1; dx < sx2; ++dx)
|
||||
{
|
||||
out = out + src(dy, dx) * scale;
|
||||
}
|
||||
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
float scale_x, scale_y ,scale;
|
||||
};
|
||||
|
||||
template <typename Ptr2D> struct AreaFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
|
||||
: src(src_), scale_x(scale_x_), scale_y(scale_y_){}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
float fsx1 = x * scale_x;
|
||||
float fsx2 = fsx1 + scale_x;
|
||||
|
||||
int sx1 = __float2int_ru(fsx1);
|
||||
int sx2 = __float2int_rd(fsx2);
|
||||
|
||||
float fsy1 = y * scale_y;
|
||||
float fsy2 = fsy1 + scale_y;
|
||||
|
||||
int sy1 = __float2int_ru(fsy1);
|
||||
int sy2 = __float2int_rd(fsy2);
|
||||
|
||||
float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
work_type out = VecTraits<work_type>::all(0.f);
|
||||
|
||||
for (int dy = sy1; dy < sy2; ++dy)
|
||||
{
|
||||
for (int dx = sx1; dx < sx2; ++dx)
|
||||
out = out + src(dy, dx) * scale;
|
||||
|
||||
if (sx1 > fsx1)
|
||||
out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
|
||||
|
||||
if (sx2 < fsx2)
|
||||
out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
|
||||
}
|
||||
|
||||
if (sy1 > fsy1)
|
||||
for (int dx = sx1; dx < sx2; ++dx)
|
||||
out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
|
||||
|
||||
if (sy2 < fsy2)
|
||||
for (int dx = sx1; dx < sx2; ++dx)
|
||||
out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
|
||||
|
||||
if ((sy1 > fsy1) && (sx1 > fsx1))
|
||||
out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
|
||||
|
||||
if ((sy1 > fsy1) && (sx2 < fsx2))
|
||||
out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
|
||||
|
||||
if ((sy2 < fsy2) && (sx2 < fsx2))
|
||||
out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
|
||||
|
||||
if ((sy2 < fsy2) && (sx1 > fsx1))
|
||||
out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
|
||||
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
float scale_x, scale_y;
|
||||
int width, haight;
|
||||
};
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_FILTERS_HPP
|
||||
@@ -40,34 +40,40 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_OLD_CV_H
|
||||
#define OPENCV_OLD_CV_H
|
||||
#ifndef OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP
|
||||
#define OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define CV_DO_PRAGMA(x) __pragma(x)
|
||||
#define __CVSTR2__(x) #x
|
||||
#define __CVSTR1__(x) __CVSTR2__(x)
|
||||
#define __CVMSVCLOC__ __FILE__ "("__CVSTR1__(__LINE__)") : "
|
||||
#define CV_MSG_PRAGMA(_msg) CV_DO_PRAGMA(message (__CVMSVCLOC__ _msg))
|
||||
#elif defined(__GNUC__)
|
||||
#define CV_DO_PRAGMA(x) _Pragma (#x)
|
||||
#define CV_MSG_PRAGMA(_msg) CV_DO_PRAGMA(message (_msg))
|
||||
#else
|
||||
#define CV_DO_PRAGMA(x)
|
||||
#define CV_MSG_PRAGMA(_msg)
|
||||
#endif
|
||||
#define CV_WARNING(x) CV_MSG_PRAGMA("Warning: " #x)
|
||||
#include <cstdio>
|
||||
|
||||
//CV_WARNING("This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module")
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
#include "opencv2/core/core_c.h"
|
||||
#include "opencv2/imgproc/imgproc_c.h"
|
||||
#include "opencv2/photo/photo_c.h"
|
||||
#include "opencv2/video/tracking_c.h"
|
||||
#include "opencv2/objdetect/objdetect_c.h"
|
||||
//! @cond IGNORED
|
||||
|
||||
#if !defined(CV_IMPL)
|
||||
#define CV_IMPL extern "C"
|
||||
#endif //CV_IMPL
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template<class Func>
|
||||
void printFuncAttrib(Func& func)
|
||||
{
|
||||
|
||||
#endif // __OPENCV_OLD_CV_H_
|
||||
cudaFuncAttributes attrs;
|
||||
cudaFuncGetAttributes(&attrs, func);
|
||||
|
||||
printf("=== Function stats ===\n");
|
||||
printf("Name: \n");
|
||||
printf("sharedSizeBytes = %d\n", attrs.sharedSizeBytes);
|
||||
printf("constSizeBytes = %d\n", attrs.constSizeBytes);
|
||||
printf("localSizeBytes = %d\n", attrs.localSizeBytes);
|
||||
printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock);
|
||||
printf("numRegs = %d\n", attrs.numRegs);
|
||||
printf("ptxVersion = %d\n", attrs.ptxVersion);
|
||||
printf("binaryVersion = %d\n", attrs.binaryVersion);
|
||||
printf("\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif /* OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP */
|
||||
805
lib/3rdParty/OpenCV/include/opencv2/core/cuda/functional.hpp
vendored
Normal file
805
lib/3rdParty/OpenCV/include/opencv2/core/cuda/functional.hpp
vendored
Normal file
@@ -0,0 +1,805 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_FUNCTIONAL_HPP
|
||||
#define OPENCV_CUDA_FUNCTIONAL_HPP
|
||||
|
||||
#include <functional>
|
||||
#include "saturate_cast.hpp"
|
||||
#include "vec_traits.hpp"
|
||||
#include "type_traits.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
// Function Objects
|
||||
template<typename Argument, typename Result> struct unary_function
|
||||
{
|
||||
typedef Argument argument_type;
|
||||
typedef Result result_type;
|
||||
};
|
||||
template<typename Argument1, typename Argument2, typename Result> struct binary_function
|
||||
{
|
||||
typedef Argument1 first_argument_type;
|
||||
typedef Argument2 second_argument_type;
|
||||
typedef Result result_type;
|
||||
};
|
||||
|
||||
// Arithmetic Operations
|
||||
template <typename T> struct plus : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a + b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ plus() {}
|
||||
__host__ __device__ __forceinline__ plus(const plus&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct minus : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a - b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ minus() {}
|
||||
__host__ __device__ __forceinline__ minus(const minus&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct multiplies : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a * b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ multiplies() {}
|
||||
__host__ __device__ __forceinline__ multiplies(const multiplies&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct divides : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a / b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ divides() {}
|
||||
__host__ __device__ __forceinline__ divides(const divides&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct modulus : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a % b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ modulus() {}
|
||||
__host__ __device__ __forceinline__ modulus(const modulus&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct negate : unary_function<T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a) const
|
||||
{
|
||||
return -a;
|
||||
}
|
||||
__host__ __device__ __forceinline__ negate() {}
|
||||
__host__ __device__ __forceinline__ negate(const negate&) {}
|
||||
};
|
||||
|
||||
// Comparison Operations
|
||||
template <typename T> struct equal_to : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a == b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ equal_to() {}
|
||||
__host__ __device__ __forceinline__ equal_to(const equal_to&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct not_equal_to : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a != b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ not_equal_to() {}
|
||||
__host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct greater : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a > b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ greater() {}
|
||||
__host__ __device__ __forceinline__ greater(const greater&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct less : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a < b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ less() {}
|
||||
__host__ __device__ __forceinline__ less(const less&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct greater_equal : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a >= b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ greater_equal() {}
|
||||
__host__ __device__ __forceinline__ greater_equal(const greater_equal&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct less_equal : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a <= b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ less_equal() {}
|
||||
__host__ __device__ __forceinline__ less_equal(const less_equal&) {}
|
||||
};
|
||||
|
||||
// Logical Operations
|
||||
template <typename T> struct logical_and : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a && b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ logical_and() {}
|
||||
__host__ __device__ __forceinline__ logical_and(const logical_and&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct logical_or : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a || b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ logical_or() {}
|
||||
__host__ __device__ __forceinline__ logical_or(const logical_or&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct logical_not : unary_function<T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a) const
|
||||
{
|
||||
return !a;
|
||||
}
|
||||
__host__ __device__ __forceinline__ logical_not() {}
|
||||
__host__ __device__ __forceinline__ logical_not(const logical_not&) {}
|
||||
};
|
||||
|
||||
// Bitwise Operations
|
||||
template <typename T> struct bit_and : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a & b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ bit_and() {}
|
||||
__host__ __device__ __forceinline__ bit_and(const bit_and&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct bit_or : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a | b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ bit_or() {}
|
||||
__host__ __device__ __forceinline__ bit_or(const bit_or&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct bit_xor : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a ^ b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ bit_xor() {}
|
||||
__host__ __device__ __forceinline__ bit_xor(const bit_xor&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct bit_not : unary_function<T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType v) const
|
||||
{
|
||||
return ~v;
|
||||
}
|
||||
__host__ __device__ __forceinline__ bit_not() {}
|
||||
__host__ __device__ __forceinline__ bit_not(const bit_not&) {}
|
||||
};
|
||||
|
||||
// Generalized Identity Operations
|
||||
template <typename T> struct identity : unary_function<T, T>
|
||||
{
|
||||
__device__ __forceinline__ typename TypeTraits<T>::ParameterType operator()(typename TypeTraits<T>::ParameterType x) const
|
||||
{
|
||||
return x;
|
||||
}
|
||||
__host__ __device__ __forceinline__ identity() {}
|
||||
__host__ __device__ __forceinline__ identity(const identity&) {}
|
||||
};
|
||||
|
||||
template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
|
||||
{
|
||||
__device__ __forceinline__ typename TypeTraits<T1>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
|
||||
{
|
||||
return lhs;
|
||||
}
|
||||
__host__ __device__ __forceinline__ project1st() {}
|
||||
__host__ __device__ __forceinline__ project1st(const project1st&) {}
|
||||
};
|
||||
|
||||
template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
|
||||
{
|
||||
__device__ __forceinline__ typename TypeTraits<T2>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
|
||||
{
|
||||
return rhs;
|
||||
}
|
||||
__host__ __device__ __forceinline__ project2nd() {}
|
||||
__host__ __device__ __forceinline__ project2nd(const project2nd&) {}
|
||||
};
|
||||
|
||||
// Min/Max Operations
|
||||
|
||||
#define OPENCV_CUDA_IMPLEMENT_MINMAX(name, type, op) \
|
||||
template <> struct name<type> : binary_function<type, type, type> \
|
||||
{ \
|
||||
__device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
|
||||
__host__ __device__ __forceinline__ name() {}\
|
||||
__host__ __device__ __forceinline__ name(const name&) {}\
|
||||
};
|
||||
|
||||
template <typename T> struct maximum : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
|
||||
{
|
||||
return max(lhs, rhs);
|
||||
}
|
||||
__host__ __device__ __forceinline__ maximum() {}
|
||||
__host__ __device__ __forceinline__ maximum(const maximum&) {}
|
||||
};
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uchar, ::max)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, schar, ::max)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, char, ::max)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, ushort, ::max)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, short, ::max)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, int, ::max)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uint, ::max)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, float, ::fmax)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, double, ::fmax)
|
||||
|
||||
template <typename T> struct minimum : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
|
||||
{
|
||||
return min(lhs, rhs);
|
||||
}
|
||||
__host__ __device__ __forceinline__ minimum() {}
|
||||
__host__ __device__ __forceinline__ minimum(const minimum&) {}
|
||||
};
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uchar, ::min)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, schar, ::min)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, char, ::min)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, ushort, ::min)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, short, ::min)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, int, ::min)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uint, ::min)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, float, ::fmin)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, double, ::fmin)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_MINMAX
|
||||
|
||||
// Math functions
|
||||
|
||||
template <typename T> struct abs_func : unary_function<T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType x) const
|
||||
{
|
||||
return abs(x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<unsigned char> : unary_function<unsigned char, unsigned char>
|
||||
{
|
||||
__device__ __forceinline__ unsigned char operator ()(unsigned char x) const
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<signed char> : unary_function<signed char, signed char>
|
||||
{
|
||||
__device__ __forceinline__ signed char operator ()(signed char x) const
|
||||
{
|
||||
return ::abs((int)x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<char> : unary_function<char, char>
|
||||
{
|
||||
__device__ __forceinline__ char operator ()(char x) const
|
||||
{
|
||||
return ::abs((int)x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<unsigned short> : unary_function<unsigned short, unsigned short>
|
||||
{
|
||||
__device__ __forceinline__ unsigned short operator ()(unsigned short x) const
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<short> : unary_function<short, short>
|
||||
{
|
||||
__device__ __forceinline__ short operator ()(short x) const
|
||||
{
|
||||
return ::abs((int)x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<unsigned int> : unary_function<unsigned int, unsigned int>
|
||||
{
|
||||
__device__ __forceinline__ unsigned int operator ()(unsigned int x) const
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<int> : unary_function<int, int>
|
||||
{
|
||||
__device__ __forceinline__ int operator ()(int x) const
|
||||
{
|
||||
return ::abs(x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<float> : unary_function<float, float>
|
||||
{
|
||||
__device__ __forceinline__ float operator ()(float x) const
|
||||
{
|
||||
return ::fabsf(x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<double> : unary_function<double, double>
|
||||
{
|
||||
__device__ __forceinline__ double operator ()(double x) const
|
||||
{
|
||||
return ::fabs(x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
|
||||
#define OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(name, func) \
|
||||
template <typename T> struct name ## _func : unary_function<T, float> \
|
||||
{ \
|
||||
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v) const \
|
||||
{ \
|
||||
return func ## f(v); \
|
||||
} \
|
||||
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||
}; \
|
||||
template <> struct name ## _func<double> : unary_function<double, double> \
|
||||
{ \
|
||||
__device__ __forceinline__ double operator ()(double v) const \
|
||||
{ \
|
||||
return func(v); \
|
||||
} \
|
||||
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||
};
|
||||
|
||||
#define OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(name, func) \
|
||||
template <typename T> struct name ## _func : binary_function<T, T, float> \
|
||||
{ \
|
||||
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v1, typename TypeTraits<T>::ParameterType v2) const \
|
||||
{ \
|
||||
return func ## f(v1, v2); \
|
||||
} \
|
||||
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||
}; \
|
||||
template <> struct name ## _func<double> : binary_function<double, double, double> \
|
||||
{ \
|
||||
__device__ __forceinline__ double operator ()(double v1, double v2) const \
|
||||
{ \
|
||||
return func(v1, v2); \
|
||||
} \
|
||||
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||
};
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp, ::exp)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp10, ::exp10)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log, ::log)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log2, ::log2)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log10, ::log10)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sin, ::sin)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cos, ::cos)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tan, ::tan)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asin, ::asin)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acos, ::acos)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atan, ::atan)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sinh, ::sinh)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cosh, ::cosh)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tanh, ::tanh)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asinh, ::asinh)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acosh, ::acosh)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atanh, ::atanh)
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(hypot, ::hypot)
|
||||
OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(atan2, ::atan2)
|
||||
OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(pow, ::pow)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR
|
||||
#undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR_NO_DOUBLE
|
||||
#undef OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR
|
||||
|
||||
template<typename T> struct hypot_sqr_func : binary_function<T, T, float>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType src1, typename TypeTraits<T>::ParameterType src2) const
|
||||
{
|
||||
return src1 * src1 + src2 * src2;
|
||||
}
|
||||
__host__ __device__ __forceinline__ hypot_sqr_func() {}
|
||||
__host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {}
|
||||
};
|
||||
|
||||
// Saturate Cast Functor
|
||||
template <typename T, typename D> struct saturate_cast_func : unary_function<T, D>
|
||||
{
|
||||
__device__ __forceinline__ D operator ()(typename TypeTraits<T>::ParameterType v) const
|
||||
{
|
||||
return saturate_cast<D>(v);
|
||||
}
|
||||
__host__ __device__ __forceinline__ saturate_cast_func() {}
|
||||
__host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {}
|
||||
};
|
||||
|
||||
// Threshold Functors
|
||||
template <typename T> struct thresh_binary_func : unary_function<T, T>
|
||||
{
|
||||
__host__ __device__ __forceinline__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||
{
|
||||
return (src > thresh) * maxVal;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ thresh_binary_func() {}
|
||||
__host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
|
||||
: thresh(other.thresh), maxVal(other.maxVal) {}
|
||||
|
||||
T thresh;
|
||||
T maxVal;
|
||||
};
|
||||
|
||||
template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
|
||||
{
|
||||
__host__ __device__ __forceinline__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||
{
|
||||
return (src <= thresh) * maxVal;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ thresh_binary_inv_func() {}
|
||||
__host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
|
||||
: thresh(other.thresh), maxVal(other.maxVal) {}
|
||||
|
||||
T thresh;
|
||||
T maxVal;
|
||||
};
|
||||
|
||||
template <typename T> struct thresh_trunc_func : unary_function<T, T>
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}
|
||||
|
||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||
{
|
||||
return minimum<T>()(src, thresh);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ thresh_trunc_func() {}
|
||||
__host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
|
||||
: thresh(other.thresh) {}
|
||||
|
||||
T thresh;
|
||||
};
|
||||
|
||||
template <typename T> struct thresh_to_zero_func : unary_function<T, T>
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}
|
||||
|
||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||
{
|
||||
return (src > thresh) * src;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ thresh_to_zero_func() {}
|
||||
__host__ __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
|
||||
: thresh(other.thresh) {}
|
||||
|
||||
T thresh;
|
||||
};
|
||||
|
||||
template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}
|
||||
|
||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||
{
|
||||
return (src <= thresh) * src;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ thresh_to_zero_inv_func() {}
|
||||
__host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
|
||||
: thresh(other.thresh) {}
|
||||
|
||||
T thresh;
|
||||
};
|
||||
|
||||
// Function Object Adaptors
|
||||
template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ unary_negate(const Predicate& p) : pred(p) {}
|
||||
|
||||
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::argument_type>::ParameterType x) const
|
||||
{
|
||||
return !pred(x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ unary_negate() {}
|
||||
__host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {}
|
||||
|
||||
Predicate pred;
|
||||
};
|
||||
|
||||
template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred)
|
||||
{
|
||||
return unary_negate<Predicate>(pred);
|
||||
}
|
||||
|
||||
template <typename Predicate> struct binary_negate : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ binary_negate(const Predicate& p) : pred(p) {}
|
||||
|
||||
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::first_argument_type>::ParameterType x,
|
||||
typename TypeTraits<typename Predicate::second_argument_type>::ParameterType y) const
|
||||
{
|
||||
return !pred(x,y);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ binary_negate() {}
|
||||
__host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {}
|
||||
|
||||
Predicate pred;
|
||||
};
|
||||
|
||||
template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
|
||||
{
|
||||
return binary_negate<BinaryPredicate>(pred);
|
||||
}
|
||||
|
||||
template <typename Op> struct binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type>
|
||||
{
|
||||
__host__ __device__ __forceinline__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {}
|
||||
|
||||
__device__ __forceinline__ typename Op::result_type operator ()(typename TypeTraits<typename Op::second_argument_type>::ParameterType a) const
|
||||
{
|
||||
return op(arg1, a);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ binder1st() {}
|
||||
__host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {}
|
||||
|
||||
Op op;
|
||||
typename Op::first_argument_type arg1;
|
||||
};
|
||||
|
||||
template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x)
|
||||
{
|
||||
return binder1st<Op>(op, typename Op::first_argument_type(x));
|
||||
}
|
||||
|
||||
template <typename Op> struct binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type>
|
||||
{
|
||||
__host__ __device__ __forceinline__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {}
|
||||
|
||||
__forceinline__ __device__ typename Op::result_type operator ()(typename TypeTraits<typename Op::first_argument_type>::ParameterType a) const
|
||||
{
|
||||
return op(a, arg2);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ binder2nd() {}
|
||||
__host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {}
|
||||
|
||||
Op op;
|
||||
typename Op::second_argument_type arg2;
|
||||
};
|
||||
|
||||
template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x)
|
||||
{
|
||||
return binder2nd<Op>(op, typename Op::second_argument_type(x));
|
||||
}
|
||||
|
||||
// Functor Traits
|
||||
template <typename F> struct IsUnaryFunction
|
||||
{
|
||||
typedef char Yes;
|
||||
struct No {Yes a[2];};
|
||||
|
||||
template <typename T, typename D> static Yes check(unary_function<T, D>);
|
||||
static No check(...);
|
||||
|
||||
static F makeF();
|
||||
|
||||
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
|
||||
};
|
||||
|
||||
template <typename F> struct IsBinaryFunction
|
||||
{
|
||||
typedef char Yes;
|
||||
struct No {Yes a[2];};
|
||||
|
||||
template <typename T1, typename T2, typename D> static Yes check(binary_function<T1, T2, D>);
|
||||
static No check(...);
|
||||
|
||||
static F makeF();
|
||||
|
||||
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
|
||||
};
|
||||
|
||||
namespace functional_detail
|
||||
{
|
||||
template <size_t src_elem_size, size_t dst_elem_size> struct UnOpShift { enum { shift = 1 }; };
|
||||
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 1> { enum { shift = 4 }; };
|
||||
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 2> { enum { shift = 2 }; };
|
||||
|
||||
template <typename T, typename D> struct DefaultUnaryShift
|
||||
{
|
||||
enum { shift = UnOpShift<sizeof(T), sizeof(D)>::shift };
|
||||
};
|
||||
|
||||
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size> struct BinOpShift { enum { shift = 1 }; };
|
||||
template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 1> { enum { shift = 4 }; };
|
||||
template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 2> { enum { shift = 2 }; };
|
||||
|
||||
template <typename T1, typename T2, typename D> struct DefaultBinaryShift
|
||||
{
|
||||
enum { shift = BinOpShift<sizeof(T1), sizeof(T2), sizeof(D)>::shift };
|
||||
};
|
||||
|
||||
template <typename Func, bool unary = IsUnaryFunction<Func>::value> struct ShiftDispatcher;
|
||||
template <typename Func> struct ShiftDispatcher<Func, true>
|
||||
{
|
||||
enum { shift = DefaultUnaryShift<typename Func::argument_type, typename Func::result_type>::shift };
|
||||
};
|
||||
template <typename Func> struct ShiftDispatcher<Func, false>
|
||||
{
|
||||
enum { shift = DefaultBinaryShift<typename Func::first_argument_type, typename Func::second_argument_type, typename Func::result_type>::shift };
|
||||
};
|
||||
}
|
||||
|
||||
template <typename Func> struct DefaultTransformShift
|
||||
{
|
||||
enum { shift = functional_detail::ShiftDispatcher<Func>::shift };
|
||||
};
|
||||
|
||||
template <typename Func> struct DefaultTransformFunctorTraits
|
||||
{
|
||||
enum { simple_block_dim_x = 16 };
|
||||
enum { simple_block_dim_y = 16 };
|
||||
|
||||
enum { smart_block_dim_x = 16 };
|
||||
enum { smart_block_dim_y = 16 };
|
||||
enum { smart_shift = DefaultTransformShift<Func>::shift };
|
||||
};
|
||||
|
||||
template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {};
|
||||
|
||||
#define OPENCV_CUDA_TRANSFORM_FUNCTOR_TRAITS(type) \
|
||||
template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_FUNCTIONAL_HPP
|
||||
128
lib/3rdParty/OpenCV/include/opencv2/core/cuda/limits.hpp
vendored
Normal file
128
lib/3rdParty/OpenCV/include/opencv2/core/cuda/limits.hpp
vendored
Normal file
@@ -0,0 +1,128 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_LIMITS_HPP
|
||||
#define OPENCV_CUDA_LIMITS_HPP
|
||||
|
||||
#include <limits.h>
|
||||
#include <float.h>
|
||||
#include "common.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <class T> struct numeric_limits;
|
||||
|
||||
template <> struct numeric_limits<bool>
|
||||
{
|
||||
__device__ __forceinline__ static bool min() { return false; }
|
||||
__device__ __forceinline__ static bool max() { return true; }
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<signed char>
|
||||
{
|
||||
__device__ __forceinline__ static signed char min() { return SCHAR_MIN; }
|
||||
__device__ __forceinline__ static signed char max() { return SCHAR_MAX; }
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<unsigned char>
|
||||
{
|
||||
__device__ __forceinline__ static unsigned char min() { return 0; }
|
||||
__device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; }
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<short>
|
||||
{
|
||||
__device__ __forceinline__ static short min() { return SHRT_MIN; }
|
||||
__device__ __forceinline__ static short max() { return SHRT_MAX; }
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<unsigned short>
|
||||
{
|
||||
__device__ __forceinline__ static unsigned short min() { return 0; }
|
||||
__device__ __forceinline__ static unsigned short max() { return USHRT_MAX; }
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<int>
|
||||
{
|
||||
__device__ __forceinline__ static int min() { return INT_MIN; }
|
||||
__device__ __forceinline__ static int max() { return INT_MAX; }
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<unsigned int>
|
||||
{
|
||||
__device__ __forceinline__ static unsigned int min() { return 0; }
|
||||
__device__ __forceinline__ static unsigned int max() { return UINT_MAX; }
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<float>
|
||||
{
|
||||
__device__ __forceinline__ static float min() { return FLT_MIN; }
|
||||
__device__ __forceinline__ static float max() { return FLT_MAX; }
|
||||
__device__ __forceinline__ static float epsilon() { return FLT_EPSILON; }
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<double>
|
||||
{
|
||||
__device__ __forceinline__ static double min() { return DBL_MIN; }
|
||||
__device__ __forceinline__ static double max() { return DBL_MAX; }
|
||||
__device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
}}} // namespace cv { namespace cuda { namespace cudev {
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_LIMITS_HPP
|
||||
209
lib/3rdParty/OpenCV/include/opencv2/core/cuda/reduce.hpp
vendored
Normal file
209
lib/3rdParty/OpenCV/include/opencv2/core/cuda/reduce.hpp
vendored
Normal file
@@ -0,0 +1,209 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_REDUCE_HPP
|
||||
#define OPENCV_CUDA_REDUCE_HPP
|
||||
|
||||
#ifndef THRUST_DEBUG // eliminate -Wundef warning
|
||||
#define THRUST_DEBUG 0
|
||||
#endif
|
||||
|
||||
#include <thrust/tuple.h>
|
||||
#include "detail/reduce.hpp"
|
||||
#include "detail/reduce_key_val.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <int N, typename T, class Op>
|
||||
__device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op)
|
||||
{
|
||||
reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
|
||||
}
|
||||
template <int N,
|
||||
typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
|
||||
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
|
||||
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
|
||||
__device__ __forceinline__ void reduce(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
|
||||
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
|
||||
unsigned int tid,
|
||||
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
|
||||
{
|
||||
reduce_detail::Dispatcher<N>::reductor::template reduce<
|
||||
const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>&,
|
||||
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>&,
|
||||
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
|
||||
}
|
||||
|
||||
template <unsigned int N, typename K, typename V, class Cmp>
|
||||
__device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, unsigned int tid, const Cmp& cmp)
|
||||
{
|
||||
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, volatile V*, V&, const Cmp&>(skeys, key, svals, val, tid, cmp);
|
||||
}
|
||||
template <unsigned int N,
|
||||
typename K,
|
||||
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||
class Cmp>
|
||||
__device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key,
|
||||
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
unsigned int tid, const Cmp& cmp)
|
||||
{
|
||||
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&,
|
||||
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
|
||||
const Cmp&>(skeys, key, svals, val, tid, cmp);
|
||||
}
|
||||
template <unsigned int N,
|
||||
typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
|
||||
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
|
||||
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
|
||||
__device__ __forceinline__ void reduceKeyVal(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
|
||||
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
|
||||
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
unsigned int tid,
|
||||
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp)
|
||||
{
|
||||
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<
|
||||
const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>&,
|
||||
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>&,
|
||||
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
|
||||
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>&
|
||||
>(skeys, key, svals, val, tid, cmp);
|
||||
}
|
||||
|
||||
// smem_tuple
|
||||
|
||||
template <typename T0>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*>
|
||||
smem_tuple(T0* t0)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*>
|
||||
smem_tuple(T0* t0, T1* t1)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3, typename T4>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*, volatile T9*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9);
|
||||
}
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_REDUCE_HPP
|
||||
292
lib/3rdParty/OpenCV/include/opencv2/core/cuda/saturate_cast.hpp
vendored
Normal file
292
lib/3rdParty/OpenCV/include/opencv2/core/cuda/saturate_cast.hpp
vendored
Normal file
@@ -0,0 +1,292 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_SATURATE_CAST_HPP
|
||||
#define OPENCV_CUDA_SATURATE_CAST_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
|
||||
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
|
||||
{
|
||||
uint res = 0;
|
||||
int vi = v;
|
||||
asm("cvt.sat.u8.s8 %0, %1;" : "=r"(res) : "r"(vi));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.u8.s16 %0, %1;" : "=r"(res) : "h"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.u8.u16 %0, %1;" : "=r"(res) : "h"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.u8.s32 %0, %1;" : "=r"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.u8.u32 %0, %1;" : "=r"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.rni.sat.u8.f32 %0, %1;" : "=r"(res) : "f"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
uint res = 0;
|
||||
asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v));
|
||||
return res;
|
||||
#else
|
||||
return saturate_cast<uchar>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
|
||||
{
|
||||
uint res = 0;
|
||||
uint vi = v;
|
||||
asm("cvt.sat.s8.u8 %0, %1;" : "=r"(res) : "r"(vi));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.s8.s16 %0, %1;" : "=r"(res) : "h"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.s8.u16 %0, %1;" : "=r"(res) : "h"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.s8.s32 %0, %1;" : "=r"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.s8.u32 %0, %1;" : "=r"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.rni.sat.s8.f32 %0, %1;" : "=r"(res) : "f"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
uint res = 0;
|
||||
asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v));
|
||||
return res;
|
||||
#else
|
||||
return saturate_cast<schar>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
|
||||
{
|
||||
ushort res = 0;
|
||||
int vi = v;
|
||||
asm("cvt.sat.u16.s8 %0, %1;" : "=h"(res) : "r"(vi));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
|
||||
{
|
||||
ushort res = 0;
|
||||
asm("cvt.sat.u16.s16 %0, %1;" : "=h"(res) : "h"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
|
||||
{
|
||||
ushort res = 0;
|
||||
asm("cvt.sat.u16.s32 %0, %1;" : "=h"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
|
||||
{
|
||||
ushort res = 0;
|
||||
asm("cvt.sat.u16.u32 %0, %1;" : "=h"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
|
||||
{
|
||||
ushort res = 0;
|
||||
asm("cvt.rni.sat.u16.f32 %0, %1;" : "=h"(res) : "f"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
ushort res = 0;
|
||||
asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v));
|
||||
return res;
|
||||
#else
|
||||
return saturate_cast<ushort>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
|
||||
{
|
||||
short res = 0;
|
||||
asm("cvt.sat.s16.u16 %0, %1;" : "=h"(res) : "h"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(int v)
|
||||
{
|
||||
short res = 0;
|
||||
asm("cvt.sat.s16.s32 %0, %1;" : "=h"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
|
||||
{
|
||||
short res = 0;
|
||||
asm("cvt.sat.s16.u32 %0, %1;" : "=h"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(float v)
|
||||
{
|
||||
short res = 0;
|
||||
asm("cvt.rni.sat.s16.f32 %0, %1;" : "=h"(res) : "f"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
short res = 0;
|
||||
asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v));
|
||||
return res;
|
||||
#else
|
||||
return saturate_cast<short>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ int saturate_cast<int>(uint v)
|
||||
{
|
||||
int res = 0;
|
||||
asm("cvt.sat.s32.u32 %0, %1;" : "=r"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ int saturate_cast<int>(float v)
|
||||
{
|
||||
return __float2int_rn(v);
|
||||
}
|
||||
template<> __device__ __forceinline__ int saturate_cast<int>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
return __double2int_rn(v);
|
||||
#else
|
||||
return saturate_cast<int>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(schar v)
|
||||
{
|
||||
uint res = 0;
|
||||
int vi = v;
|
||||
asm("cvt.sat.u32.s8 %0, %1;" : "=r"(res) : "r"(vi));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(short v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.u32.s16 %0, %1;" : "=r"(res) : "h"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(int v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.u32.s32 %0, %1;" : "=r"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
|
||||
{
|
||||
return __float2uint_rn(v);
|
||||
}
|
||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
return __double2uint_rn(v);
|
||||
#else
|
||||
return saturate_cast<uint>((float)v);
|
||||
#endif
|
||||
}
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif /* OPENCV_CUDA_SATURATE_CAST_HPP */
|
||||
258
lib/3rdParty/OpenCV/include/opencv2/core/cuda/scan.hpp
vendored
Normal file
258
lib/3rdParty/OpenCV/include/opencv2/core/cuda/scan.hpp
vendored
Normal file
@@ -0,0 +1,258 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_SCAN_HPP
|
||||
#define OPENCV_CUDA_SCAN_HPP
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/utility.hpp"
|
||||
#include "opencv2/core/cuda/warp.hpp"
|
||||
#include "opencv2/core/cuda/warp_shuffle.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
enum ScanKind { EXCLUSIVE = 0, INCLUSIVE = 1 };
|
||||
|
||||
template <ScanKind Kind, typename T, typename F> struct WarpScan
|
||||
{
|
||||
__device__ __forceinline__ WarpScan() {}
|
||||
__device__ __forceinline__ WarpScan(const WarpScan& other) { CV_UNUSED(other); }
|
||||
|
||||
__device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
|
||||
{
|
||||
const unsigned int lane = idx & 31;
|
||||
F op;
|
||||
|
||||
if ( lane >= 1) ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
|
||||
if ( lane >= 2) ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
|
||||
if ( lane >= 4) ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
|
||||
if ( lane >= 8) ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
|
||||
if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
|
||||
|
||||
if( Kind == INCLUSIVE )
|
||||
return ptr [idx];
|
||||
else
|
||||
return (lane > 0) ? ptr [idx - 1] : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ unsigned int index(const unsigned int tid)
|
||||
{
|
||||
return tid;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void init(volatile T *ptr){}
|
||||
|
||||
static const int warp_offset = 0;
|
||||
|
||||
typedef WarpScan<INCLUSIVE, T, F> merge;
|
||||
};
|
||||
|
||||
template <ScanKind Kind , typename T, typename F> struct WarpScanNoComp
|
||||
{
|
||||
__device__ __forceinline__ WarpScanNoComp() {}
|
||||
__device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { CV_UNUSED(other); }
|
||||
|
||||
__device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
|
||||
{
|
||||
const unsigned int lane = threadIdx.x & 31;
|
||||
F op;
|
||||
|
||||
ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
|
||||
ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
|
||||
ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
|
||||
ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
|
||||
ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
|
||||
|
||||
if( Kind == INCLUSIVE )
|
||||
return ptr [idx];
|
||||
else
|
||||
return (lane > 0) ? ptr [idx - 1] : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ unsigned int index(const unsigned int tid)
|
||||
{
|
||||
return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void init(volatile T *ptr)
|
||||
{
|
||||
ptr[threadIdx.x] = 0;
|
||||
}
|
||||
|
||||
static const int warp_smem_stride = 32 + 16 + 1;
|
||||
static const int warp_offset = 16;
|
||||
static const int warp_log = 5;
|
||||
static const int warp_mask = 31;
|
||||
|
||||
typedef WarpScanNoComp<INCLUSIVE, T, F> merge;
|
||||
};
|
||||
|
||||
template <ScanKind Kind , typename T, typename Sc, typename F> struct BlockScan
|
||||
{
|
||||
__device__ __forceinline__ BlockScan() {}
|
||||
__device__ __forceinline__ BlockScan(const BlockScan& other) { CV_UNUSED(other); }
|
||||
|
||||
__device__ __forceinline__ T operator()(volatile T *ptr)
|
||||
{
|
||||
const unsigned int tid = threadIdx.x;
|
||||
const unsigned int lane = tid & warp_mask;
|
||||
const unsigned int warp = tid >> warp_log;
|
||||
|
||||
Sc scan;
|
||||
typename Sc::merge merge_scan;
|
||||
const unsigned int idx = scan.index(tid);
|
||||
|
||||
T val = scan(ptr, idx);
|
||||
__syncthreads ();
|
||||
|
||||
if( warp == 0)
|
||||
scan.init(ptr);
|
||||
__syncthreads ();
|
||||
|
||||
if( lane == 31 )
|
||||
ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx];
|
||||
__syncthreads ();
|
||||
|
||||
if( warp == 0 )
|
||||
merge_scan(ptr, idx);
|
||||
__syncthreads();
|
||||
|
||||
if ( warp > 0)
|
||||
val = ptr [scan.warp_offset + warp - 1] + val;
|
||||
__syncthreads ();
|
||||
|
||||
ptr[idx] = val;
|
||||
__syncthreads ();
|
||||
|
||||
return val ;
|
||||
}
|
||||
|
||||
static const int warp_log = 5;
|
||||
static const int warp_mask = 31;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
const unsigned int laneId = cv::cuda::device::Warp::laneId();
|
||||
|
||||
// scan on shuffl functions
|
||||
#pragma unroll
|
||||
for (int i = 1; i <= (OPENCV_CUDA_WARP_SIZE / 2); i *= 2)
|
||||
{
|
||||
const T n = cv::cuda::device::shfl_up(idata, i);
|
||||
if (laneId >= i)
|
||||
idata += n;
|
||||
}
|
||||
|
||||
return idata;
|
||||
#else
|
||||
unsigned int pos = 2 * tid - (tid & (OPENCV_CUDA_WARP_SIZE - 1));
|
||||
s_Data[pos] = 0;
|
||||
pos += OPENCV_CUDA_WARP_SIZE;
|
||||
s_Data[pos] = idata;
|
||||
|
||||
s_Data[pos] += s_Data[pos - 1];
|
||||
s_Data[pos] += s_Data[pos - 2];
|
||||
s_Data[pos] += s_Data[pos - 4];
|
||||
s_Data[pos] += s_Data[pos - 8];
|
||||
s_Data[pos] += s_Data[pos - 16];
|
||||
|
||||
return s_Data[pos];
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid)
|
||||
{
|
||||
return warpScanInclusive(idata, s_Data, tid) - idata;
|
||||
}
|
||||
|
||||
template <int tiNumScanThreads, typename T>
|
||||
__device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
|
||||
{
|
||||
if (tiNumScanThreads > OPENCV_CUDA_WARP_SIZE)
|
||||
{
|
||||
//Bottom-level inclusive warp scan
|
||||
T warpResult = warpScanInclusive(idata, s_Data, tid);
|
||||
|
||||
//Save top elements of each warp for exclusive warp scan
|
||||
//sync to wait for warp scans to complete (because s_Data is being overwritten)
|
||||
__syncthreads();
|
||||
if ((tid & (OPENCV_CUDA_WARP_SIZE - 1)) == (OPENCV_CUDA_WARP_SIZE - 1))
|
||||
{
|
||||
s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE] = warpResult;
|
||||
}
|
||||
|
||||
//wait for warp scans to complete
|
||||
__syncthreads();
|
||||
|
||||
if (tid < (tiNumScanThreads / OPENCV_CUDA_WARP_SIZE) )
|
||||
{
|
||||
//grab top warp elements
|
||||
T val = s_Data[tid];
|
||||
//calculate exclusive scan and write back to shared memory
|
||||
s_Data[tid] = warpScanExclusive(val, s_Data, tid);
|
||||
}
|
||||
|
||||
//return updated warp scans with exclusive scan results
|
||||
__syncthreads();
|
||||
|
||||
return warpResult + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE];
|
||||
}
|
||||
else
|
||||
{
|
||||
return warpScanInclusive(idata, s_Data, tid);
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_SCAN_HPP
|
||||
869
lib/3rdParty/OpenCV/include/opencv2/core/cuda/simd_functions.hpp
vendored
Normal file
869
lib/3rdParty/OpenCV/include/opencv2/core/cuda/simd_functions.hpp
vendored
Normal file
@@ -0,0 +1,869 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* Neither the name of NVIDIA Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef OPENCV_CUDA_SIMD_FUNCTIONS_HPP
|
||||
#define OPENCV_CUDA_SIMD_FUNCTIONS_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
// 2
|
||||
|
||||
static __device__ __forceinline__ unsigned int vadd2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s;
|
||||
s = a ^ b; // sum bits
|
||||
r = a + b; // actual sum
|
||||
s = s ^ r; // determine carry-ins for each bit position
|
||||
s = s & 0x00010000; // carry-in to high word (= carry-out from low word)
|
||||
r = r - s; // subtract out carry-out from low word
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsub2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s;
|
||||
s = a ^ b; // sum bits
|
||||
r = a - b; // actual sum
|
||||
s = s ^ r; // determine carry-ins for each bit position
|
||||
s = s & 0x00010000; // borrow to high word
|
||||
r = r + s; // compensate for borrow from low word
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vabsdiff2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s, t, u, v;
|
||||
s = a & 0x0000ffff; // extract low halfword
|
||||
r = b & 0x0000ffff; // extract low halfword
|
||||
u = ::max(r, s); // maximum of low halfwords
|
||||
v = ::min(r, s); // minimum of low halfwords
|
||||
s = a & 0xffff0000; // extract high halfword
|
||||
r = b & 0xffff0000; // extract high halfword
|
||||
t = ::max(r, s); // maximum of high halfwords
|
||||
s = ::min(r, s); // minimum of high halfwords
|
||||
r = u | t; // maximum of both halfwords
|
||||
s = v | s; // minimum of both halfwords
|
||||
r = r - s; // |a - b| = max(a,b) - min(a,b);
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vavg2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, s;
|
||||
|
||||
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
|
||||
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
|
||||
s = a ^ b;
|
||||
r = a & b;
|
||||
s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries
|
||||
s = s >> 1;
|
||||
s = r + s;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vavrg2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
|
||||
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
|
||||
unsigned int s;
|
||||
s = a ^ b;
|
||||
r = a | b;
|
||||
s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries
|
||||
s = s >> 1;
|
||||
r = r - s;
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vseteq2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
unsigned int c;
|
||||
r = a ^ b; // 0x0000 if a == b
|
||||
c = r | 0x80008000; // set msbs, to catch carry out
|
||||
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
|
||||
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||
c = r & ~c; // msb = 1, if r was 0x0000
|
||||
r = c >> 15; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpeq2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vseteq2(a, b);
|
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
r = a ^ b; // 0x0000 if a == b
|
||||
c = r | 0x80008000; // set msbs, to catch carry out
|
||||
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
|
||||
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||
c = r & ~c; // msb = 1, if r was 0x0000
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetge2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(b));
|
||||
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpge2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetge2(a, b);
|
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else
|
||||
asm("not.b32 %0, %0;" : "+r"(b));
|
||||
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetgt2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(b));
|
||||
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||
c = c & 0x80008000; // msbs = carry-outs
|
||||
r = c >> 15; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpgt2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetgt2(a, b);
|
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else
|
||||
asm("not.b32 %0, %0;" : "+r"(b));
|
||||
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||
c = c & 0x80008000; // msbs = carry-outs
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetle2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmple2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetle2(a, b);
|
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetlt2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmplt2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetlt2(a, b);
|
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetne2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
unsigned int c;
|
||||
r = a ^ b; // 0x0000 if a == b
|
||||
c = r | 0x80008000; // set msbs, to catch carry out
|
||||
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||
c = r | c; // msb = 1, if r was not 0x0000
|
||||
c = c & 0x80008000; // extract msbs
|
||||
r = c >> 15; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpne2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetne2(a, b);
|
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
r = a ^ b; // 0x0000 if a == b
|
||||
c = r | 0x80008000; // set msbs, to catch carry out
|
||||
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||
c = r | c; // msb = 1, if r was not 0x0000
|
||||
c = c & 0x80008000; // extract msbs
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vmax2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s, t, u;
|
||||
r = a & 0x0000ffff; // extract low halfword
|
||||
s = b & 0x0000ffff; // extract low halfword
|
||||
t = ::max(r, s); // maximum of low halfwords
|
||||
r = a & 0xffff0000; // extract high halfword
|
||||
s = b & 0xffff0000; // extract high halfword
|
||||
u = ::max(r, s); // maximum of high halfwords
|
||||
r = t | u; // combine halfword maximums
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vmin2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s, t, u;
|
||||
r = a & 0x0000ffff; // extract low halfword
|
||||
s = b & 0x0000ffff; // extract low halfword
|
||||
t = ::min(r, s); // minimum of low halfwords
|
||||
r = a & 0xffff0000; // extract high halfword
|
||||
s = b & 0xffff0000; // extract high halfword
|
||||
u = ::min(r, s); // minimum of high halfwords
|
||||
r = t | u; // combine halfword minimums
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
// 4
|
||||
|
||||
static __device__ __forceinline__ unsigned int vadd4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s, t;
|
||||
s = a ^ b; // sum bits
|
||||
r = a & 0x7f7f7f7f; // clear msbs
|
||||
t = b & 0x7f7f7f7f; // clear msbs
|
||||
s = s & 0x80808080; // msb sum bits
|
||||
r = r + t; // add without msbs, record carry-out in msbs
|
||||
r = r ^ s; // sum of msb sum and carry-in bits, w/o carry-out
|
||||
#endif /* __CUDA_ARCH__ >= 300 */
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsub4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s, t;
|
||||
s = a ^ ~b; // inverted sum bits
|
||||
r = a | 0x80808080; // set msbs
|
||||
t = b & 0x7f7f7f7f; // clear msbs
|
||||
s = s & 0x80808080; // inverted msb sum bits
|
||||
r = r - t; // subtract w/o msbs, record inverted borrows in msb
|
||||
r = r ^ s; // combine inverted msb sum bits and borrows
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vavg4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, s;
|
||||
|
||||
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
|
||||
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
|
||||
s = a ^ b;
|
||||
r = a & b;
|
||||
s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
|
||||
s = s >> 1;
|
||||
s = r + s;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vavrg4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
|
||||
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
|
||||
unsigned int c;
|
||||
c = a ^ b;
|
||||
r = a | b;
|
||||
c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
|
||||
c = c >> 1;
|
||||
r = r - c;
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vseteq4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
unsigned int c;
|
||||
r = a ^ b; // 0x00 if a == b
|
||||
c = r | 0x80808080; // set msbs, to catch carry out
|
||||
r = r ^ c; // extract msbs, msb = 1 if r < 0x80
|
||||
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
|
||||
c = r & ~c; // msb = 1, if r was 0x00
|
||||
r = c >> 7; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpeq4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, t;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vseteq4(a, b);
|
||||
t = r << 8; // convert bool
|
||||
r = t - r; // to mask
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
t = a ^ b; // 0x00 if a == b
|
||||
r = t | 0x80808080; // set msbs, to catch carry out
|
||||
t = t ^ r; // extract msbs, msb = 1 if t < 0x80
|
||||
r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80
|
||||
r = t & ~r; // msb = 1, if t was 0x00
|
||||
t = r >> 7; // build mask
|
||||
t = r - t; // from
|
||||
r = t | r; // msbs
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetle4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||
c = c & 0x80808080; // msb = carry-outs
|
||||
r = c >> 7; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmple4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetle4(a, b);
|
||||
c = r << 8; // convert bool
|
||||
r = c - r; // to mask
|
||||
#else
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||
c = c & 0x80808080; // msbs = carry-outs
|
||||
r = c >> 7; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetlt4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||
c = c & 0x80808080; // msb = carry-outs
|
||||
r = c >> 7; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmplt4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetlt4(a, b);
|
||||
c = r << 8; // convert bool
|
||||
r = c - r; // to mask
|
||||
#else
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||
c = c & 0x80808080; // msbs = carry-outs
|
||||
r = c >> 7; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetge4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(b));
|
||||
c = vavrg4(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||
c = c & 0x80808080; // msb = carry-outs
|
||||
r = c >> 7; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpge4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, s;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetge4(a, b);
|
||||
s = r << 8; // convert bool
|
||||
r = s - r; // to mask
|
||||
#else
|
||||
asm ("not.b32 %0,%0;" : "+r"(b));
|
||||
r = vavrg4 (a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||
r = r & 0x80808080; // msb = carry-outs
|
||||
s = r >> 7; // build mask
|
||||
s = r - s; // from
|
||||
r = s | r; // msbs
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetgt4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(b));
|
||||
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||
c = c & 0x80808080; // msb = carry-outs
|
||||
r = c >> 7; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpgt4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetgt4(a, b);
|
||||
c = r << 8; // convert bool
|
||||
r = c - r; // to mask
|
||||
#else
|
||||
asm("not.b32 %0, %0;" : "+r"(b));
|
||||
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||
c = c & 0x80808080; // msb = carry-outs
|
||||
r = c >> 7; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetne4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
unsigned int c;
|
||||
r = a ^ b; // 0x00 if a == b
|
||||
c = r | 0x80808080; // set msbs, to catch carry out
|
||||
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
|
||||
c = r | c; // msb = 1, if r was not 0x00
|
||||
c = c & 0x80808080; // extract msbs
|
||||
r = c >> 7; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpne4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetne4(a, b);
|
||||
c = r << 8; // convert bool
|
||||
r = c - r; // to mask
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
r = a ^ b; // 0x00 if a == b
|
||||
c = r | 0x80808080; // set msbs, to catch carry out
|
||||
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
|
||||
c = r | c; // msb = 1, if r was not 0x00
|
||||
c = c & 0x80808080; // extract msbs
|
||||
r = c >> 7; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vabsdiff4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s;
|
||||
s = vcmpge4(a, b); // mask = 0xff if a >= b
|
||||
r = a ^ b; //
|
||||
s = (r & s) ^ b; // select a when a >= b, else select b => max(a,b)
|
||||
r = s ^ r; // select a when b >= a, else select b => min(a,b)
|
||||
r = s - r; // |a - b| = max(a,b) - min(a,b);
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vmax4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s;
|
||||
s = vcmpge4(a, b); // mask = 0xff if a >= b
|
||||
r = a & s; // select a when b >= a
|
||||
s = b & ~s; // select b when b < a
|
||||
r = r | s; // combine byte selections
|
||||
#endif
|
||||
|
||||
return r; // byte-wise unsigned maximum
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vmin4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s;
|
||||
s = vcmpge4(b, a); // mask = 0xff if a >= b
|
||||
r = a & s; // select a when b >= a
|
||||
s = b & ~s; // select b when b < a
|
||||
r = r | s; // combine byte selections
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_SIMD_FUNCTIONS_HPP
|
||||
75
lib/3rdParty/OpenCV/include/opencv2/core/cuda/transform.hpp
vendored
Normal file
75
lib/3rdParty/OpenCV/include/opencv2/core/cuda/transform.hpp
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_TRANSFORM_HPP
|
||||
#define OPENCV_CUDA_TRANSFORM_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
#include "utility.hpp"
|
||||
#include "detail/transform_detail.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_TRANSFORM_HPP
|
||||
90
lib/3rdParty/OpenCV/include/opencv2/core/cuda/type_traits.hpp
vendored
Normal file
90
lib/3rdParty/OpenCV/include/opencv2/core/cuda/type_traits.hpp
vendored
Normal file
@@ -0,0 +1,90 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_TYPE_TRAITS_HPP
|
||||
#define OPENCV_CUDA_TYPE_TRAITS_HPP
|
||||
|
||||
#include "detail/type_traits_detail.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <typename T> struct IsSimpleParameter
|
||||
{
|
||||
enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
|
||||
type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
|
||||
};
|
||||
|
||||
template <typename T> struct TypeTraits
|
||||
{
|
||||
typedef typename type_traits_detail::UnConst<T>::type NonConstType;
|
||||
typedef typename type_traits_detail::UnVolatile<T>::type NonVolatileType;
|
||||
typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
|
||||
typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type PointeeType;
|
||||
typedef typename type_traits_detail::ReferenceTraits<T>::type ReferredType;
|
||||
|
||||
enum { isConst = type_traits_detail::UnConst<T>::value };
|
||||
enum { isVolatile = type_traits_detail::UnVolatile<T>::value };
|
||||
|
||||
enum { isReference = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
|
||||
enum { isPointer = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };
|
||||
|
||||
enum { isUnsignedInt = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
|
||||
enum { isSignedInt = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
|
||||
enum { isIntegral = type_traits_detail::IsIntegral<UnqualifiedType>::value };
|
||||
enum { isFloat = type_traits_detail::IsFloat<UnqualifiedType>::value };
|
||||
enum { isArith = isIntegral || isFloat };
|
||||
enum { isVec = type_traits_detail::IsVec<UnqualifiedType>::value };
|
||||
|
||||
typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
|
||||
T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
|
||||
};
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_TYPE_TRAITS_HPP
|
||||
230
lib/3rdParty/OpenCV/include/opencv2/core/cuda/utility.hpp
vendored
Normal file
230
lib/3rdParty/OpenCV/include/opencv2/core/cuda/utility.hpp
vendored
Normal file
@@ -0,0 +1,230 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_UTILITY_HPP
|
||||
#define OPENCV_CUDA_UTILITY_HPP
|
||||
|
||||
#include "saturate_cast.hpp"
|
||||
#include "datamov_utils.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
struct CV_EXPORTS ThrustAllocator
|
||||
{
|
||||
typedef uchar value_type;
|
||||
virtual ~ThrustAllocator();
|
||||
virtual __device__ __host__ uchar* allocate(size_t numBytes) = 0;
|
||||
virtual __device__ __host__ void deallocate(uchar* ptr, size_t numBytes) = 0;
|
||||
static ThrustAllocator& getAllocator();
|
||||
static void setAllocator(ThrustAllocator* allocator);
|
||||
};
|
||||
#define OPENCV_CUDA_LOG_WARP_SIZE (5)
|
||||
#define OPENCV_CUDA_WARP_SIZE (1 << OPENCV_CUDA_LOG_WARP_SIZE)
|
||||
#define OPENCV_CUDA_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
|
||||
#define OPENCV_CUDA_MEM_BANKS (1 << OPENCV_CUDA_LOG_MEM_BANKS)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// swap
|
||||
|
||||
template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
|
||||
{
|
||||
const T temp = a;
|
||||
a = b;
|
||||
b = temp;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Mask Reader
|
||||
|
||||
struct SingleMask
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
|
||||
__host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
return mask.ptr(y)[x] != 0;
|
||||
}
|
||||
|
||||
PtrStepb mask;
|
||||
};
|
||||
|
||||
struct SingleMaskChannels
|
||||
{
|
||||
__host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_)
|
||||
: mask(mask_), channels(channels_) {}
|
||||
__host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_)
|
||||
:mask(mask_.mask), channels(mask_.channels){}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
return mask.ptr(y)[x / channels] != 0;
|
||||
}
|
||||
|
||||
PtrStepb mask;
|
||||
int channels;
|
||||
};
|
||||
|
||||
struct MaskCollection
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_)
|
||||
: maskCollection(maskCollection_) {}
|
||||
|
||||
__device__ __forceinline__ MaskCollection(const MaskCollection& masks_)
|
||||
: maskCollection(masks_.maskCollection), curMask(masks_.curMask){}
|
||||
|
||||
__device__ __forceinline__ void next()
|
||||
{
|
||||
curMask = *maskCollection++;
|
||||
}
|
||||
__device__ __forceinline__ void setMask(int z)
|
||||
{
|
||||
curMask = maskCollection[z];
|
||||
}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
uchar val;
|
||||
return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0));
|
||||
}
|
||||
|
||||
const PtrStepb* maskCollection;
|
||||
PtrStepb curMask;
|
||||
};
|
||||
|
||||
struct WithOutMask
|
||||
{
|
||||
__host__ __device__ __forceinline__ WithOutMask(){}
|
||||
__host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){}
|
||||
|
||||
__device__ __forceinline__ void next() const
|
||||
{
|
||||
}
|
||||
__device__ __forceinline__ void setMask(int) const
|
||||
{
|
||||
}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int, int) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int, int, int) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ bool check(int, int)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ bool check(int, int, int)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Solve linear system
|
||||
|
||||
// solve 2x2 linear system Ax=b
|
||||
template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
|
||||
{
|
||||
T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
|
||||
|
||||
if (det != 0)
|
||||
{
|
||||
double invdet = 1.0 / det;
|
||||
|
||||
x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));
|
||||
|
||||
x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// solve 3x3 linear system Ax=b
|
||||
template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
|
||||
{
|
||||
T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
|
||||
- A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
|
||||
+ A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
|
||||
|
||||
if (det != 0)
|
||||
{
|
||||
double invdet = 1.0 / det;
|
||||
|
||||
x[0] = saturate_cast<T>(invdet *
|
||||
(b[0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
|
||||
A[0][1] * (b[1] * A[2][2] - A[1][2] * b[2] ) +
|
||||
A[0][2] * (b[1] * A[2][1] - A[1][1] * b[2] )));
|
||||
|
||||
x[1] = saturate_cast<T>(invdet *
|
||||
(A[0][0] * (b[1] * A[2][2] - A[1][2] * b[2] ) -
|
||||
b[0] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
|
||||
A[0][2] * (A[1][0] * b[2] - b[1] * A[2][0])));
|
||||
|
||||
x[2] = saturate_cast<T>(invdet *
|
||||
(A[0][0] * (A[1][1] * b[2] - b[1] * A[2][1]) -
|
||||
A[0][1] * (A[1][0] * b[2] - b[1] * A[2][0]) +
|
||||
b[0] * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_UTILITY_HPP
|
||||
232
lib/3rdParty/OpenCV/include/opencv2/core/cuda/vec_distance.hpp
vendored
Normal file
232
lib/3rdParty/OpenCV/include/opencv2/core/cuda/vec_distance.hpp
vendored
Normal file
@@ -0,0 +1,232 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_VEC_DISTANCE_HPP
|
||||
#define OPENCV_CUDA_VEC_DISTANCE_HPP
|
||||
|
||||
#include "reduce.hpp"
|
||||
#include "functional.hpp"
|
||||
#include "detail/vec_distance_detail.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <typename T> struct L1Dist
|
||||
{
|
||||
typedef int value_type;
|
||||
typedef int result_type;
|
||||
|
||||
__device__ __forceinline__ L1Dist() : mySum(0) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(int val1, int val2)
|
||||
{
|
||||
mySum = __sad(val1, val2, mySum);
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator int() const
|
||||
{
|
||||
return mySum;
|
||||
}
|
||||
|
||||
int mySum;
|
||||
};
|
||||
template <> struct L1Dist<float>
|
||||
{
|
||||
typedef float value_type;
|
||||
typedef float result_type;
|
||||
|
||||
__device__ __forceinline__ L1Dist() : mySum(0.0f) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(float val1, float val2)
|
||||
{
|
||||
mySum += ::fabs(val1 - val2);
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator float() const
|
||||
{
|
||||
return mySum;
|
||||
}
|
||||
|
||||
float mySum;
|
||||
};
|
||||
|
||||
struct L2Dist
|
||||
{
|
||||
typedef float value_type;
|
||||
typedef float result_type;
|
||||
|
||||
__device__ __forceinline__ L2Dist() : mySum(0.0f) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(float val1, float val2)
|
||||
{
|
||||
float reg = val1 - val2;
|
||||
mySum += reg * reg;
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator float() const
|
||||
{
|
||||
return sqrtf(mySum);
|
||||
}
|
||||
|
||||
float mySum;
|
||||
};
|
||||
|
||||
struct HammingDist
|
||||
{
|
||||
typedef int value_type;
|
||||
typedef int result_type;
|
||||
|
||||
__device__ __forceinline__ HammingDist() : mySum(0) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(int val1, int val2)
|
||||
{
|
||||
mySum += __popc(val1 ^ val2);
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator int() const
|
||||
{
|
||||
return mySum;
|
||||
}
|
||||
|
||||
int mySum;
|
||||
};
|
||||
|
||||
// calc distance between two vectors in global memory
|
||||
template <int THREAD_DIM, typename Dist, typename T1, typename T2>
|
||||
__device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
|
||||
{
|
||||
for (int i = tid; i < len; i += THREAD_DIM)
|
||||
{
|
||||
T1 val1;
|
||||
ForceGlob<T1>::Load(vec1, i, val1);
|
||||
|
||||
T2 val2;
|
||||
ForceGlob<T2>::Load(vec2, i, val2);
|
||||
|
||||
dist.reduceIter(val1, val2);
|
||||
}
|
||||
|
||||
dist.reduceAll<THREAD_DIM>(smem, tid);
|
||||
}
|
||||
|
||||
// calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
|
||||
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
|
||||
__device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
|
||||
{
|
||||
vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
|
||||
|
||||
dist.reduceAll<THREAD_DIM>(smem, tid);
|
||||
}
|
||||
|
||||
// calc distance between two vectors in global memory
|
||||
template <int THREAD_DIM, typename T1> struct VecDiffGlobal
|
||||
{
|
||||
explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
|
||||
{
|
||||
vec1 = vec1_;
|
||||
}
|
||||
|
||||
template <typename T2, typename Dist>
|
||||
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
|
||||
{
|
||||
calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
|
||||
}
|
||||
|
||||
const T1* vec1;
|
||||
};
|
||||
|
||||
// calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
|
||||
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
|
||||
{
|
||||
template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
|
||||
{
|
||||
if (glob_tid < len)
|
||||
smem[glob_tid] = vec1[glob_tid];
|
||||
__syncthreads();
|
||||
|
||||
U* vec1ValsPtr = vec1Vals;
|
||||
|
||||
#pragma unroll
|
||||
for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
|
||||
*vec1ValsPtr++ = smem[i];
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
template <typename T2, typename Dist>
|
||||
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
|
||||
{
|
||||
calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
|
||||
}
|
||||
|
||||
U vec1Vals[MAX_LEN / THREAD_DIM];
|
||||
};
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_VEC_DISTANCE_HPP
|
||||
923
lib/3rdParty/OpenCV/include/opencv2/core/cuda/vec_math.hpp
vendored
Normal file
923
lib/3rdParty/OpenCV/include/opencv2/core/cuda/vec_math.hpp
vendored
Normal file
@@ -0,0 +1,923 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_VECMATH_HPP
|
||||
#define OPENCV_CUDA_VECMATH_HPP
|
||||
|
||||
#include "vec_traits.hpp"
|
||||
#include "saturate_cast.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
|
||||
// saturate_cast
|
||||
|
||||
namespace vec_math_detail
|
||||
{
|
||||
template <int cn, typename VecD> struct SatCastHelper;
|
||||
template <typename VecD> struct SatCastHelper<1, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCastHelper<2, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCastHelper<3, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCastHelper<4, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v)
|
||||
{
|
||||
return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
|
||||
// unary operators
|
||||
|
||||
#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \
|
||||
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(op (a.x)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(op (a.x), op (a.y)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(op (a.x), op (a.y), op (a.z)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(op (a.x), op (a.y), op (a.z), op (a.w)); \
|
||||
}
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)
|
||||
|
||||
#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP
|
||||
|
||||
// unary functions
|
||||
|
||||
#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \
|
||||
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(func (a.x)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(func (a.x), func (a.y)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(func (a.x), func (a.y), func (a.z)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
|
||||
}
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double)
|
||||
|
||||
#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC
|
||||
|
||||
// binary operators (vec & vec)
|
||||
|
||||
#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \
|
||||
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(a.x op b.x); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(a.x op b.x, a.y op b.y); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(a.x op b.x, a.y op b.y, a.z op b.z); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \
|
||||
}
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint)
|
||||
|
||||
#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP
|
||||
|
||||
// binary operators (vec & scalar)
|
||||
|
||||
#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \
|
||||
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(a.x op s); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(s op b.x); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(a.x op s, a.y op s); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(s op b.x, s op b.y); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(a.x op s, a.y op s, a.z op s); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(s op b.x, s op b.y, s op b.z); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(a.x op s, a.y op s, a.z op s, a.w op s); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(s op b.x, s op b.y, s op b.z, s op b.w); \
|
||||
}
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint)
|
||||
|
||||
#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP
|
||||
|
||||
// binary function (vec & vec)
|
||||
|
||||
#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \
|
||||
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(func (a.x, b.x)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(func (a.x, b.x), func (a.y, b.y)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \
|
||||
}
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double)
|
||||
|
||||
#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC
|
||||
|
||||
// binary function (vec & scalar)
|
||||
|
||||
#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \
|
||||
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(func ((output_type) a.x, (output_type) s)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(func ((output_type) s, (output_type) b.x)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \
|
||||
}
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)
|
||||
|
||||
#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC
|
||||
|
||||
}}} // namespace cv { namespace cuda { namespace device
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_VECMATH_HPP
|
||||
288
lib/3rdParty/OpenCV/include/opencv2/core/cuda/vec_traits.hpp
vendored
Normal file
288
lib/3rdParty/OpenCV/include/opencv2/core/cuda/vec_traits.hpp
vendored
Normal file
@@ -0,0 +1,288 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_VEC_TRAITS_HPP
|
||||
#define OPENCV_CUDA_VEC_TRAITS_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template<typename T, int N> struct TypeVec;
|
||||
|
||||
struct __align__(8) uchar8
|
||||
{
|
||||
uchar a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
|
||||
{
|
||||
uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(8) char8
|
||||
{
|
||||
schar a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
|
||||
{
|
||||
char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(16) ushort8
|
||||
{
|
||||
ushort a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
|
||||
{
|
||||
ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(16) short8
|
||||
{
|
||||
short a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
|
||||
{
|
||||
short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(32) uint8
|
||||
{
|
||||
uint a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
|
||||
{
|
||||
uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(32) int8
|
||||
{
|
||||
int a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
|
||||
{
|
||||
int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(32) float8
|
||||
{
|
||||
float a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
|
||||
{
|
||||
float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct double8
|
||||
{
|
||||
double a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
|
||||
{
|
||||
double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
|
||||
#define OPENCV_CUDA_IMPLEMENT_TYPE_VEC(type) \
|
||||
template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
|
||||
template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
|
||||
template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
|
||||
template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
|
||||
template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; }; \
|
||||
template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uchar)
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(char)
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(ushort)
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(short)
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(int)
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uint)
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(float)
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(double)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_TYPE_VEC
|
||||
|
||||
template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
|
||||
template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
|
||||
template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
|
||||
template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
|
||||
template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };
|
||||
|
||||
template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
|
||||
template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
|
||||
template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
|
||||
template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
|
||||
template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
|
||||
|
||||
template<typename T> struct VecTraits;
|
||||
|
||||
#define OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(type) \
|
||||
template<> struct VecTraits<type> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=1}; \
|
||||
static __device__ __host__ __forceinline__ type all(type v) {return v;} \
|
||||
static __device__ __host__ __forceinline__ type make(type x) {return x;} \
|
||||
static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 1> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=1}; \
|
||||
static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
|
||||
static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 2> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=2}; \
|
||||
static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
|
||||
static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 3> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=3}; \
|
||||
static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
|
||||
static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 4> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=4}; \
|
||||
static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
|
||||
static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 8> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=8}; \
|
||||
static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
|
||||
static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
|
||||
};
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uchar)
|
||||
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(ushort)
|
||||
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(short)
|
||||
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(int)
|
||||
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uint)
|
||||
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(float)
|
||||
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(double)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_VEC_TRAITS
|
||||
|
||||
template<> struct VecTraits<char>
|
||||
{
|
||||
typedef char elem_type;
|
||||
enum {cn=1};
|
||||
static __device__ __host__ __forceinline__ char all(char v) {return v;}
|
||||
static __device__ __host__ __forceinline__ char make(char x) {return x;}
|
||||
static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
|
||||
};
|
||||
template<> struct VecTraits<schar>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=1};
|
||||
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
|
||||
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
|
||||
static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
|
||||
};
|
||||
template<> struct VecTraits<char1>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=1};
|
||||
static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
|
||||
static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
|
||||
static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
|
||||
};
|
||||
template<> struct VecTraits<char2>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=2};
|
||||
static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
|
||||
static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
|
||||
static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
|
||||
};
|
||||
template<> struct VecTraits<char3>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=3};
|
||||
static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
|
||||
static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
|
||||
static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
|
||||
};
|
||||
template<> struct VecTraits<char4>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=4};
|
||||
static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
|
||||
static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
|
||||
static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
|
||||
};
|
||||
template<> struct VecTraits<char8>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=8};
|
||||
static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
|
||||
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
|
||||
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
|
||||
};
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_VEC_TRAITS_HPP
|
||||
139
lib/3rdParty/OpenCV/include/opencv2/core/cuda/warp.hpp
vendored
Normal file
139
lib/3rdParty/OpenCV/include/opencv2/core/cuda/warp.hpp
vendored
Normal file
@@ -0,0 +1,139 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_DEVICE_WARP_HPP
|
||||
#define OPENCV_CUDA_DEVICE_WARP_HPP
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
struct Warp
|
||||
{
|
||||
enum
|
||||
{
|
||||
LOG_WARP_SIZE = 5,
|
||||
WARP_SIZE = 1 << LOG_WARP_SIZE,
|
||||
STRIDE = WARP_SIZE
|
||||
};
|
||||
|
||||
/** \brief Returns the warp lane ID of the calling thread. */
|
||||
static __device__ __forceinline__ unsigned int laneId()
|
||||
{
|
||||
unsigned int ret;
|
||||
asm("mov.u32 %0, %%laneid;" : "=r"(ret) );
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename It, typename T>
|
||||
static __device__ __forceinline__ void fill(It beg, It end, const T& value)
|
||||
{
|
||||
for(It t = beg + laneId(); t < end; t += STRIDE)
|
||||
*t = value;
|
||||
}
|
||||
|
||||
template<typename InIt, typename OutIt>
|
||||
static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
|
||||
{
|
||||
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
|
||||
*out = *t;
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename InIt, typename OutIt, class UnOp>
|
||||
static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
|
||||
{
|
||||
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
|
||||
*out = op(*t);
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
|
||||
static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
|
||||
{
|
||||
unsigned int lane = laneId();
|
||||
|
||||
InIt1 t1 = beg1 + lane;
|
||||
InIt2 t2 = beg2 + lane;
|
||||
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
|
||||
*out = op(*t1, *t2);
|
||||
return out;
|
||||
}
|
||||
|
||||
template <class T, class BinOp>
|
||||
static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op)
|
||||
{
|
||||
const unsigned int lane = laneId();
|
||||
|
||||
if (lane < 16)
|
||||
{
|
||||
T partial = ptr[lane];
|
||||
|
||||
ptr[lane] = partial = op(partial, ptr[lane + 16]);
|
||||
ptr[lane] = partial = op(partial, ptr[lane + 8]);
|
||||
ptr[lane] = partial = op(partial, ptr[lane + 4]);
|
||||
ptr[lane] = partial = op(partial, ptr[lane + 2]);
|
||||
ptr[lane] = partial = op(partial, ptr[lane + 1]);
|
||||
}
|
||||
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
template<typename OutIt, typename T>
|
||||
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
|
||||
{
|
||||
unsigned int lane = laneId();
|
||||
value += lane;
|
||||
|
||||
for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
|
||||
*t = value;
|
||||
}
|
||||
};
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif /* OPENCV_CUDA_DEVICE_WARP_HPP */
|
||||
@@ -40,21 +40,37 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_OLD_CV_HPP
|
||||
#define OPENCV_OLD_CV_HPP
|
||||
#ifndef OPENCV_CUDA_WARP_REDUCE_HPP__
|
||||
#define OPENCV_CUDA_WARP_REDUCE_HPP__
|
||||
|
||||
//#if defined(__GNUC__)
|
||||
//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
|
||||
//#endif
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
#include "cv.h"
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include "opencv2/photo.hpp"
|
||||
#include "opencv2/video.hpp"
|
||||
#include "opencv2/highgui.hpp"
|
||||
#include "opencv2/features2d.hpp"
|
||||
#include "opencv2/calib3d.hpp"
|
||||
#include "opencv2/objdetect.hpp"
|
||||
//! @cond IGNORED
|
||||
|
||||
#endif
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <class T>
|
||||
__device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
|
||||
{
|
||||
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
|
||||
|
||||
if (lane < 16)
|
||||
{
|
||||
T partial = ptr[tid];
|
||||
|
||||
ptr[tid] = partial = partial + ptr[tid + 16];
|
||||
ptr[tid] = partial = partial + ptr[tid + 8];
|
||||
ptr[tid] = partial = partial + ptr[tid + 4];
|
||||
ptr[tid] = partial = partial + ptr[tid + 2];
|
||||
ptr[tid] = partial = partial + ptr[tid + 1];
|
||||
}
|
||||
|
||||
return ptr[tid - lane];
|
||||
}
|
||||
}}} // namespace cv { namespace cuda { namespace cudev {
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif /* OPENCV_CUDA_WARP_REDUCE_HPP__ */
|
||||
162
lib/3rdParty/OpenCV/include/opencv2/core/cuda/warp_shuffle.hpp
vendored
Normal file
162
lib/3rdParty/OpenCV/include/opencv2/core/cuda/warp_shuffle.hpp
vendored
Normal file
@@ -0,0 +1,162 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_WARP_SHUFFLE_HPP
|
||||
#define OPENCV_CUDA_WARP_SHUFFLE_HPP
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
#if __CUDACC_VER_MAJOR__ >= 9
|
||||
# define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z)
|
||||
# define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z)
|
||||
# define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z)
|
||||
#endif
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||
return __shfl(val, srcLane, width);
|
||||
#else
|
||||
return T();
|
||||
#endif
|
||||
}
|
||||
__device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||
return (unsigned int) __shfl((int) val, srcLane, width);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
__device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||
int lo = __double2loint(val);
|
||||
int hi = __double2hiint(val);
|
||||
|
||||
lo = __shfl(lo, srcLane, width);
|
||||
hi = __shfl(hi, srcLane, width);
|
||||
|
||||
return __hiloint2double(hi, lo);
|
||||
#else
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||
return __shfl_down(val, delta, width);
|
||||
#else
|
||||
return T();
|
||||
#endif
|
||||
}
|
||||
__device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||
return (unsigned int) __shfl_down((int) val, delta, width);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
__device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||
int lo = __double2loint(val);
|
||||
int hi = __double2hiint(val);
|
||||
|
||||
lo = __shfl_down(lo, delta, width);
|
||||
hi = __shfl_down(hi, delta, width);
|
||||
|
||||
return __hiloint2double(hi, lo);
|
||||
#else
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||
return __shfl_up(val, delta, width);
|
||||
#else
|
||||
return T();
|
||||
#endif
|
||||
}
|
||||
__device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||
return (unsigned int) __shfl_up((int) val, delta, width);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
__device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||
int lo = __double2loint(val);
|
||||
int hi = __double2hiint(val);
|
||||
|
||||
lo = __shfl_up(lo, delta, width);
|
||||
hi = __shfl_up(hi, delta, width);
|
||||
|
||||
return __hiloint2double(hi, lo);
|
||||
#else
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
}}}
|
||||
|
||||
# undef __shfl
|
||||
# undef __shfl_up
|
||||
# undef __shfl_down
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CUDA_WARP_SHUFFLE_HPP
|
||||
@@ -47,6 +47,13 @@
|
||||
# error cuda_types.hpp header must be compiled as C++
|
||||
#endif
|
||||
|
||||
#if defined(__OPENCV_BUILD) && defined(__clang__)
|
||||
#pragma clang diagnostic ignored "-Winconsistent-missing-override"
|
||||
#endif
|
||||
#if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5
|
||||
#pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||
#endif
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
@@ -120,10 +127,12 @@ namespace cv
|
||||
};
|
||||
|
||||
typedef PtrStepSz<unsigned char> PtrStepSzb;
|
||||
typedef PtrStepSz<unsigned short> PtrStepSzus;
|
||||
typedef PtrStepSz<float> PtrStepSzf;
|
||||
typedef PtrStepSz<int> PtrStepSzi;
|
||||
|
||||
typedef PtrStep<unsigned char> PtrStepb;
|
||||
typedef PtrStep<unsigned short> PtrStepus;
|
||||
typedef PtrStep<float> PtrStepf;
|
||||
typedef PtrStep<int> PtrStepi;
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
|
||||
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
|
||||
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
|
||||
#define CV_CPU_BASELINE_MODE 1
|
||||
#endif
|
||||
|
||||
|
||||
@@ -82,6 +83,14 @@
|
||||
# include <immintrin.h>
|
||||
# define CV_AVX2 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_AVX_512F
|
||||
# include <immintrin.h>
|
||||
# define CV_AVX_512F 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_AVX512_SKX
|
||||
# include <immintrin.h>
|
||||
# define CV_AVX512_SKX 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_FMA3
|
||||
# define CV_FMA3 1
|
||||
#endif
|
||||
@@ -99,7 +108,7 @@
|
||||
# include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#if defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
|
||||
#ifdef CV_CPU_COMPILE_VSX
|
||||
# include <altivec.h>
|
||||
# undef vector
|
||||
# undef pixel
|
||||
@@ -107,16 +116,24 @@
|
||||
# define CV_VSX 1
|
||||
#endif
|
||||
|
||||
#ifdef CV_CPU_COMPILE_VSX3
|
||||
# define CV_VSX3 1
|
||||
#endif
|
||||
|
||||
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
|
||||
|
||||
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
|
||||
struct VZeroUpperGuard {
|
||||
#ifdef __GNUC__
|
||||
__attribute__((always_inline))
|
||||
#endif
|
||||
inline VZeroUpperGuard() { _mm256_zeroupper(); }
|
||||
#ifdef __GNUC__
|
||||
__attribute__((always_inline))
|
||||
#endif
|
||||
inline ~VZeroUpperGuard() { _mm256_zeroupper(); }
|
||||
};
|
||||
#define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; (void)__vzeroupper_guard;
|
||||
#define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; CV_UNUSED(__vzeroupper_guard);
|
||||
#endif
|
||||
|
||||
#ifdef __CV_AVX_GUARD
|
||||
@@ -218,6 +235,9 @@ struct VZeroUpperGuard {
|
||||
#ifndef CV_AVX_512VL
|
||||
# define CV_AVX_512VL 0
|
||||
#endif
|
||||
#ifndef CV_AVX512_SKX
|
||||
# define CV_AVX512_SKX 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_NEON
|
||||
# define CV_NEON 0
|
||||
@@ -226,3 +246,7 @@ struct VZeroUpperGuard {
|
||||
#ifndef CV_VSX
|
||||
# define CV_VSX 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_VSX3
|
||||
# define CV_VSX3 0
|
||||
#endif
|
||||
@@ -2,198 +2,339 @@
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
|
||||
# define CV_TRY_SSE 1
|
||||
# define CV_CPU_FORCE_SSE 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE 1
|
||||
# define CV_CPU_CALL_SSE(fn, args) return (opt_SSE::fn args)
|
||||
# define CV_CPU_CALL_SSE(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_SSE_(fn, args) return (opt_SSE::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
|
||||
# define CV_TRY_SSE 1
|
||||
# define CV_CPU_FORCE_SSE 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
|
||||
# define CV_CPU_CALL_SSE(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
|
||||
# define CV_CPU_CALL_SSE_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
|
||||
#else
|
||||
# define CV_TRY_SSE 0
|
||||
# define CV_CPU_FORCE_SSE 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSE 0
|
||||
# define CV_CPU_CALL_SSE(fn, args)
|
||||
# define CV_CPU_CALL_SSE_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...) CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
|
||||
# define CV_TRY_SSE2 1
|
||||
# define CV_CPU_FORCE_SSE2 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE2 1
|
||||
# define CV_CPU_CALL_SSE2(fn, args) return (opt_SSE2::fn args)
|
||||
# define CV_CPU_CALL_SSE2(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_SSE2_(fn, args) return (opt_SSE2::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
|
||||
# define CV_TRY_SSE2 1
|
||||
# define CV_CPU_FORCE_SSE2 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
|
||||
# define CV_CPU_CALL_SSE2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
|
||||
# define CV_CPU_CALL_SSE2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
|
||||
#else
|
||||
# define CV_TRY_SSE2 0
|
||||
# define CV_CPU_FORCE_SSE2 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSE2 0
|
||||
# define CV_CPU_CALL_SSE2(fn, args)
|
||||
# define CV_CPU_CALL_SSE2_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...) CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
|
||||
# define CV_TRY_SSE3 1
|
||||
# define CV_CPU_FORCE_SSE3 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE3 1
|
||||
# define CV_CPU_CALL_SSE3(fn, args) return (opt_SSE3::fn args)
|
||||
# define CV_CPU_CALL_SSE3(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_SSE3_(fn, args) return (opt_SSE3::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
|
||||
# define CV_TRY_SSE3 1
|
||||
# define CV_CPU_FORCE_SSE3 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
|
||||
# define CV_CPU_CALL_SSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
|
||||
# define CV_CPU_CALL_SSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
|
||||
#else
|
||||
# define CV_TRY_SSE3 0
|
||||
# define CV_CPU_FORCE_SSE3 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSE3 0
|
||||
# define CV_CPU_CALL_SSE3(fn, args)
|
||||
# define CV_CPU_CALL_SSE3_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...) CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
|
||||
# define CV_TRY_SSSE3 1
|
||||
# define CV_CPU_FORCE_SSSE3 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSSE3 1
|
||||
# define CV_CPU_CALL_SSSE3(fn, args) return (opt_SSSE3::fn args)
|
||||
# define CV_CPU_CALL_SSSE3(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_SSSE3_(fn, args) return (opt_SSSE3::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
|
||||
# define CV_TRY_SSSE3 1
|
||||
# define CV_CPU_FORCE_SSSE3 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
|
||||
# define CV_CPU_CALL_SSSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
|
||||
# define CV_CPU_CALL_SSSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
|
||||
#else
|
||||
# define CV_TRY_SSSE3 0
|
||||
# define CV_CPU_FORCE_SSSE3 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSSE3 0
|
||||
# define CV_CPU_CALL_SSSE3(fn, args)
|
||||
# define CV_CPU_CALL_SSSE3_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...) CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
|
||||
# define CV_TRY_SSE4_1 1
|
||||
# define CV_CPU_FORCE_SSE4_1 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_1 1
|
||||
# define CV_CPU_CALL_SSE4_1(fn, args) return (opt_SSE4_1::fn args)
|
||||
# define CV_CPU_CALL_SSE4_1(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_SSE4_1_(fn, args) return (opt_SSE4_1::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
|
||||
# define CV_TRY_SSE4_1 1
|
||||
# define CV_CPU_FORCE_SSE4_1 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
|
||||
# define CV_CPU_CALL_SSE4_1(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
|
||||
# define CV_CPU_CALL_SSE4_1_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
|
||||
#else
|
||||
# define CV_TRY_SSE4_1 0
|
||||
# define CV_CPU_FORCE_SSE4_1 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_1 0
|
||||
# define CV_CPU_CALL_SSE4_1(fn, args)
|
||||
# define CV_CPU_CALL_SSE4_1_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...) CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
|
||||
# define CV_TRY_SSE4_2 1
|
||||
# define CV_CPU_FORCE_SSE4_2 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_2 1
|
||||
# define CV_CPU_CALL_SSE4_2(fn, args) return (opt_SSE4_2::fn args)
|
||||
# define CV_CPU_CALL_SSE4_2(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_SSE4_2_(fn, args) return (opt_SSE4_2::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
|
||||
# define CV_TRY_SSE4_2 1
|
||||
# define CV_CPU_FORCE_SSE4_2 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
|
||||
# define CV_CPU_CALL_SSE4_2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
|
||||
# define CV_CPU_CALL_SSE4_2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
|
||||
#else
|
||||
# define CV_TRY_SSE4_2 0
|
||||
# define CV_CPU_FORCE_SSE4_2 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_2 0
|
||||
# define CV_CPU_CALL_SSE4_2(fn, args)
|
||||
# define CV_CPU_CALL_SSE4_2_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...) CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
|
||||
# define CV_TRY_POPCNT 1
|
||||
# define CV_CPU_FORCE_POPCNT 1
|
||||
# define CV_CPU_HAS_SUPPORT_POPCNT 1
|
||||
# define CV_CPU_CALL_POPCNT(fn, args) return (opt_POPCNT::fn args)
|
||||
# define CV_CPU_CALL_POPCNT(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_POPCNT_(fn, args) return (opt_POPCNT::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
|
||||
# define CV_TRY_POPCNT 1
|
||||
# define CV_CPU_FORCE_POPCNT 0
|
||||
# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
|
||||
# define CV_CPU_CALL_POPCNT(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
|
||||
# define CV_CPU_CALL_POPCNT_(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
|
||||
#else
|
||||
# define CV_TRY_POPCNT 0
|
||||
# define CV_CPU_FORCE_POPCNT 0
|
||||
# define CV_CPU_HAS_SUPPORT_POPCNT 0
|
||||
# define CV_CPU_CALL_POPCNT(fn, args)
|
||||
# define CV_CPU_CALL_POPCNT_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...) CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
|
||||
# define CV_TRY_AVX 1
|
||||
# define CV_CPU_FORCE_AVX 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX 1
|
||||
# define CV_CPU_CALL_AVX(fn, args) return (opt_AVX::fn args)
|
||||
# define CV_CPU_CALL_AVX(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_AVX_(fn, args) return (opt_AVX::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
|
||||
# define CV_TRY_AVX 1
|
||||
# define CV_CPU_FORCE_AVX 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
|
||||
# define CV_CPU_CALL_AVX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
|
||||
# define CV_CPU_CALL_AVX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
|
||||
#else
|
||||
# define CV_TRY_AVX 0
|
||||
# define CV_CPU_FORCE_AVX 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX 0
|
||||
# define CV_CPU_CALL_AVX(fn, args)
|
||||
# define CV_CPU_CALL_AVX_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...) CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
|
||||
# define CV_TRY_FP16 1
|
||||
# define CV_CPU_FORCE_FP16 1
|
||||
# define CV_CPU_HAS_SUPPORT_FP16 1
|
||||
# define CV_CPU_CALL_FP16(fn, args) return (opt_FP16::fn args)
|
||||
# define CV_CPU_CALL_FP16(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_FP16_(fn, args) return (opt_FP16::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
|
||||
# define CV_TRY_FP16 1
|
||||
# define CV_CPU_FORCE_FP16 0
|
||||
# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
|
||||
# define CV_CPU_CALL_FP16(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
|
||||
# define CV_CPU_CALL_FP16_(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
|
||||
#else
|
||||
# define CV_TRY_FP16 0
|
||||
# define CV_CPU_FORCE_FP16 0
|
||||
# define CV_CPU_HAS_SUPPORT_FP16 0
|
||||
# define CV_CPU_CALL_FP16(fn, args)
|
||||
# define CV_CPU_CALL_FP16_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...) CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
|
||||
# define CV_TRY_AVX2 1
|
||||
# define CV_CPU_FORCE_AVX2 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX2 1
|
||||
# define CV_CPU_CALL_AVX2(fn, args) return (opt_AVX2::fn args)
|
||||
# define CV_CPU_CALL_AVX2(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_AVX2_(fn, args) return (opt_AVX2::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
|
||||
# define CV_TRY_AVX2 1
|
||||
# define CV_CPU_FORCE_AVX2 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
|
||||
# define CV_CPU_CALL_AVX2(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
|
||||
# define CV_CPU_CALL_AVX2_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
|
||||
#else
|
||||
# define CV_TRY_AVX2 0
|
||||
# define CV_CPU_FORCE_AVX2 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX2 0
|
||||
# define CV_CPU_CALL_AVX2(fn, args)
|
||||
# define CV_CPU_CALL_AVX2_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...) CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
|
||||
# define CV_TRY_FMA3 1
|
||||
# define CV_CPU_FORCE_FMA3 1
|
||||
# define CV_CPU_HAS_SUPPORT_FMA3 1
|
||||
# define CV_CPU_CALL_FMA3(fn, args) return (opt_FMA3::fn args)
|
||||
# define CV_CPU_CALL_FMA3(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_FMA3_(fn, args) return (opt_FMA3::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
|
||||
# define CV_TRY_FMA3 1
|
||||
# define CV_CPU_FORCE_FMA3 0
|
||||
# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
|
||||
# define CV_CPU_CALL_FMA3(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
|
||||
# define CV_CPU_CALL_FMA3_(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
|
||||
#else
|
||||
# define CV_TRY_FMA3 0
|
||||
# define CV_CPU_FORCE_FMA3 0
|
||||
# define CV_CPU_HAS_SUPPORT_FMA3 0
|
||||
# define CV_CPU_CALL_FMA3(fn, args)
|
||||
# define CV_CPU_CALL_FMA3_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...) CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX_512F
|
||||
# define CV_TRY_AVX_512F 1
|
||||
# define CV_CPU_FORCE_AVX_512F 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX_512F 1
|
||||
# define CV_CPU_CALL_AVX_512F(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_AVX_512F_(fn, args) return (opt_AVX_512F::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX_512F
|
||||
# define CV_TRY_AVX_512F 1
|
||||
# define CV_CPU_FORCE_AVX_512F 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX_512F (cv::checkHardwareSupport(CV_CPU_AVX_512F))
|
||||
# define CV_CPU_CALL_AVX_512F(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
|
||||
# define CV_CPU_CALL_AVX_512F_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
|
||||
#else
|
||||
# define CV_TRY_AVX_512F 0
|
||||
# define CV_CPU_FORCE_AVX_512F 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX_512F 0
|
||||
# define CV_CPU_CALL_AVX_512F(fn, args)
|
||||
# define CV_CPU_CALL_AVX_512F_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX_512F(fn, args, mode, ...) CV_CPU_CALL_AVX_512F(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_SKX
|
||||
# define CV_TRY_AVX512_SKX 1
|
||||
# define CV_CPU_FORCE_AVX512_SKX 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_SKX 1
|
||||
# define CV_CPU_CALL_AVX512_SKX(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_AVX512_SKX_(fn, args) return (opt_AVX512_SKX::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_SKX
|
||||
# define CV_TRY_AVX512_SKX 1
|
||||
# define CV_CPU_FORCE_AVX512_SKX 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_SKX (cv::checkHardwareSupport(CV_CPU_AVX512_SKX))
|
||||
# define CV_CPU_CALL_AVX512_SKX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
|
||||
# define CV_CPU_CALL_AVX512_SKX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
|
||||
#else
|
||||
# define CV_TRY_AVX512_SKX 0
|
||||
# define CV_CPU_FORCE_AVX512_SKX 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_SKX 0
|
||||
# define CV_CPU_CALL_AVX512_SKX(fn, args)
|
||||
# define CV_CPU_CALL_AVX512_SKX_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX512_SKX(fn, args, mode, ...) CV_CPU_CALL_AVX512_SKX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
|
||||
# define CV_TRY_NEON 1
|
||||
# define CV_CPU_FORCE_NEON 1
|
||||
# define CV_CPU_HAS_SUPPORT_NEON 1
|
||||
# define CV_CPU_CALL_NEON(fn, args) return (opt_NEON::fn args)
|
||||
# define CV_CPU_CALL_NEON(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_NEON_(fn, args) return (opt_NEON::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
|
||||
# define CV_TRY_NEON 1
|
||||
# define CV_CPU_FORCE_NEON 0
|
||||
# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
|
||||
# define CV_CPU_CALL_NEON(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
|
||||
# define CV_CPU_CALL_NEON_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
|
||||
#else
|
||||
# define CV_TRY_NEON 0
|
||||
# define CV_CPU_FORCE_NEON 0
|
||||
# define CV_CPU_HAS_SUPPORT_NEON 0
|
||||
# define CV_CPU_CALL_NEON(fn, args)
|
||||
# define CV_CPU_CALL_NEON_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX
|
||||
# define CV_TRY_VSX 1
|
||||
# define CV_CPU_FORCE_VSX 1
|
||||
# define CV_CPU_HAS_SUPPORT_VSX 1
|
||||
# define CV_CPU_CALL_VSX(fn, args) return (opt_VSX::fn args)
|
||||
# define CV_CPU_CALL_VSX(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_VSX_(fn, args) return (opt_VSX::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX
|
||||
# define CV_TRY_VSX 1
|
||||
# define CV_CPU_FORCE_VSX 0
|
||||
# define CV_CPU_HAS_SUPPORT_VSX (cv::checkHardwareSupport(CV_CPU_VSX))
|
||||
# define CV_CPU_CALL_VSX(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
|
||||
# define CV_CPU_CALL_VSX_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
|
||||
#else
|
||||
# define CV_TRY_VSX 0
|
||||
# define CV_CPU_FORCE_VSX 0
|
||||
# define CV_CPU_HAS_SUPPORT_VSX 0
|
||||
# define CV_CPU_CALL_VSX(fn, args)
|
||||
# define CV_CPU_CALL_VSX_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...) CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX3
|
||||
# define CV_TRY_VSX3 1
|
||||
# define CV_CPU_FORCE_VSX3 1
|
||||
# define CV_CPU_HAS_SUPPORT_VSX3 1
|
||||
# define CV_CPU_CALL_VSX3(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_VSX3_(fn, args) return (opt_VSX3::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX3
|
||||
# define CV_TRY_VSX3 1
|
||||
# define CV_CPU_FORCE_VSX3 0
|
||||
# define CV_CPU_HAS_SUPPORT_VSX3 (cv::checkHardwareSupport(CV_CPU_VSX3))
|
||||
# define CV_CPU_CALL_VSX3(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
|
||||
# define CV_CPU_CALL_VSX3_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
|
||||
#else
|
||||
# define CV_TRY_VSX3 0
|
||||
# define CV_CPU_FORCE_VSX3 0
|
||||
# define CV_CPU_HAS_SUPPORT_VSX3 0
|
||||
# define CV_CPU_CALL_VSX3(fn, args)
|
||||
# define CV_CPU_CALL_VSX3_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_VSX3(fn, args, mode, ...) CV_CPU_CALL_VSX3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
|
||||
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
|
||||
857
lib/3rdParty/OpenCV/include/opencv2/core/cvdef.h
vendored
Normal file
857
lib/3rdParty/OpenCV/include/opencv2/core/cvdef.h
vendored
Normal file
@@ -0,0 +1,857 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CORE_CVDEF_H
|
||||
#define OPENCV_CORE_CVDEF_H
|
||||
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
#if !defined CV_DOXYGEN && !defined CV_IGNORE_DEBUG_BUILD_GUARD
|
||||
#if (defined(_MSC_VER) && (defined(DEBUG) || defined(_DEBUG))) || \
|
||||
(defined(_GLIBCXX_DEBUG) || defined(_GLIBCXX_DEBUG_PEDANTIC))
|
||||
// Guard to prevent using of binary incompatible binaries / runtimes
|
||||
// https://github.com/opencv/opencv/pull/9161
|
||||
#define CV__DEBUG_NS_BEGIN namespace debug_build_guard {
|
||||
#define CV__DEBUG_NS_END }
|
||||
namespace cv { namespace debug_build_guard { } using namespace debug_build_guard; }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef CV__DEBUG_NS_BEGIN
|
||||
#define CV__DEBUG_NS_BEGIN
|
||||
#define CV__DEBUG_NS_END
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __OPENCV_BUILD
|
||||
#include "cvconfig.h"
|
||||
#endif
|
||||
|
||||
#ifndef __CV_EXPAND
|
||||
#define __CV_EXPAND(x) x
|
||||
#endif
|
||||
|
||||
#ifndef __CV_CAT
|
||||
#define __CV_CAT__(x, y) x ## y
|
||||
#define __CV_CAT_(x, y) __CV_CAT__(x, y)
|
||||
#define __CV_CAT(x, y) __CV_CAT_(x, y)
|
||||
#endif
|
||||
|
||||
#define __CV_VA_NUM_ARGS_HELPER(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N
|
||||
#define __CV_VA_NUM_ARGS(...) __CV_EXPAND(__CV_VA_NUM_ARGS_HELPER(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
|
||||
|
||||
#if defined __GNUC__
|
||||
#define CV_Func __func__
|
||||
#elif defined _MSC_VER
|
||||
#define CV_Func __FUNCTION__
|
||||
#else
|
||||
#define CV_Func ""
|
||||
#endif
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
//////////////// static assert /////////////////
|
||||
#define CVAUX_CONCAT_EXP(a, b) a##b
|
||||
#define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b)
|
||||
|
||||
#if defined(__clang__)
|
||||
# ifndef __has_extension
|
||||
# define __has_extension __has_feature /* compatibility, for older versions of clang */
|
||||
# endif
|
||||
# if __has_extension(cxx_static_assert)
|
||||
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
|
||||
# elif __has_extension(c_static_assert)
|
||||
# define CV_StaticAssert(condition, reason) _Static_assert((condition), reason " " #condition)
|
||||
# endif
|
||||
#elif defined(__GNUC__)
|
||||
# if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
|
||||
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
|
||||
# endif
|
||||
#elif defined(_MSC_VER)
|
||||
# if _MSC_VER >= 1600 /* MSVC 10 */
|
||||
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
|
||||
# endif
|
||||
#endif
|
||||
#ifndef CV_StaticAssert
|
||||
# if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302)
|
||||
# define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); })
|
||||
# else
|
||||
template <bool x> struct CV_StaticAssert_failed;
|
||||
template <> struct CV_StaticAssert_failed<true> { enum { val = 1 }; };
|
||||
template<int x> struct CV_StaticAssert_test {};
|
||||
# define CV_StaticAssert(condition, reason)\
|
||||
typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast<bool>(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Suppress warning "-Wdeprecated-declarations" / C4996
|
||||
#if defined(_MSC_VER)
|
||||
#define CV_DO_PRAGMA(x) __pragma(x)
|
||||
#elif defined(__GNUC__)
|
||||
#define CV_DO_PRAGMA(x) _Pragma (#x)
|
||||
#else
|
||||
#define CV_DO_PRAGMA(x)
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define CV_SUPPRESS_DEPRECATED_START \
|
||||
CV_DO_PRAGMA(warning(push)) \
|
||||
CV_DO_PRAGMA(warning(disable: 4996))
|
||||
#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop))
|
||||
#elif defined (__clang__) || ((__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 405))
|
||||
#define CV_SUPPRESS_DEPRECATED_START \
|
||||
CV_DO_PRAGMA(GCC diagnostic push) \
|
||||
CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
|
||||
#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop)
|
||||
#else
|
||||
#define CV_SUPPRESS_DEPRECATED_START
|
||||
#define CV_SUPPRESS_DEPRECATED_END
|
||||
#endif
|
||||
|
||||
#define CV_UNUSED(name) (void)name
|
||||
|
||||
//! @endcond
|
||||
|
||||
// undef problematic defines sometimes defined by system headers (windows.h in particular)
|
||||
#undef small
|
||||
#undef min
|
||||
#undef max
|
||||
#undef abs
|
||||
#undef Complex
|
||||
|
||||
#include <limits.h>
|
||||
#include "opencv2/core/hal/interface.h"
|
||||
|
||||
#if defined __ICL
|
||||
# define CV_ICC __ICL
|
||||
#elif defined __ICC
|
||||
# define CV_ICC __ICC
|
||||
#elif defined __ECL
|
||||
# define CV_ICC __ECL
|
||||
#elif defined __ECC
|
||||
# define CV_ICC __ECC
|
||||
#elif defined __INTEL_COMPILER
|
||||
# define CV_ICC __INTEL_COMPILER
|
||||
#endif
|
||||
|
||||
#ifndef CV_INLINE
|
||||
# if defined __cplusplus
|
||||
# define CV_INLINE static inline
|
||||
# elif defined _MSC_VER
|
||||
# define CV_INLINE __inline
|
||||
# else
|
||||
# define CV_INLINE static
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef CV_ALWAYS_INLINE
|
||||
#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
|
||||
#define CV_ALWAYS_INLINE inline __attribute__((always_inline))
|
||||
#elif defined(_MSC_VER)
|
||||
#define CV_ALWAYS_INLINE __forceinline
|
||||
#else
|
||||
#define CV_ALWAYS_INLINE inline
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
|
||||
# define CV_ENABLE_UNROLLED 0
|
||||
#else
|
||||
# define CV_ENABLE_UNROLLED 1
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
# define CV_DECL_ALIGNED(x) __attribute__ ((aligned (x)))
|
||||
#elif defined _MSC_VER
|
||||
# define CV_DECL_ALIGNED(x) __declspec(align(x))
|
||||
#else
|
||||
# define CV_DECL_ALIGNED(x)
|
||||
#endif
|
||||
|
||||
/* CPU features and intrinsics support */
|
||||
#define CV_CPU_NONE 0
|
||||
#define CV_CPU_MMX 1
|
||||
#define CV_CPU_SSE 2
|
||||
#define CV_CPU_SSE2 3
|
||||
#define CV_CPU_SSE3 4
|
||||
#define CV_CPU_SSSE3 5
|
||||
#define CV_CPU_SSE4_1 6
|
||||
#define CV_CPU_SSE4_2 7
|
||||
#define CV_CPU_POPCNT 8
|
||||
#define CV_CPU_FP16 9
|
||||
#define CV_CPU_AVX 10
|
||||
#define CV_CPU_AVX2 11
|
||||
#define CV_CPU_FMA3 12
|
||||
|
||||
#define CV_CPU_AVX_512F 13
|
||||
#define CV_CPU_AVX_512BW 14
|
||||
#define CV_CPU_AVX_512CD 15
|
||||
#define CV_CPU_AVX_512DQ 16
|
||||
#define CV_CPU_AVX_512ER 17
|
||||
#define CV_CPU_AVX_512IFMA512 18 // deprecated
|
||||
#define CV_CPU_AVX_512IFMA 18
|
||||
#define CV_CPU_AVX_512PF 19
|
||||
#define CV_CPU_AVX_512VBMI 20
|
||||
#define CV_CPU_AVX_512VL 21
|
||||
|
||||
#define CV_CPU_NEON 100
|
||||
|
||||
#define CV_CPU_VSX 200
|
||||
#define CV_CPU_VSX3 201
|
||||
|
||||
// CPU features groups
|
||||
#define CV_CPU_AVX512_SKX 256
|
||||
|
||||
// when adding to this list remember to update the following enum
|
||||
#define CV_HARDWARE_MAX_FEATURE 512
|
||||
|
||||
/** @brief Available CPU features.
|
||||
*/
|
||||
enum CpuFeatures {
|
||||
CPU_MMX = 1,
|
||||
CPU_SSE = 2,
|
||||
CPU_SSE2 = 3,
|
||||
CPU_SSE3 = 4,
|
||||
CPU_SSSE3 = 5,
|
||||
CPU_SSE4_1 = 6,
|
||||
CPU_SSE4_2 = 7,
|
||||
CPU_POPCNT = 8,
|
||||
CPU_FP16 = 9,
|
||||
CPU_AVX = 10,
|
||||
CPU_AVX2 = 11,
|
||||
CPU_FMA3 = 12,
|
||||
|
||||
CPU_AVX_512F = 13,
|
||||
CPU_AVX_512BW = 14,
|
||||
CPU_AVX_512CD = 15,
|
||||
CPU_AVX_512DQ = 16,
|
||||
CPU_AVX_512ER = 17,
|
||||
CPU_AVX_512IFMA512 = 18, // deprecated
|
||||
CPU_AVX_512IFMA = 18,
|
||||
CPU_AVX_512PF = 19,
|
||||
CPU_AVX_512VBMI = 20,
|
||||
CPU_AVX_512VL = 21,
|
||||
|
||||
CPU_NEON = 100,
|
||||
|
||||
CPU_VSX = 200,
|
||||
CPU_VSX3 = 201,
|
||||
|
||||
CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL
|
||||
|
||||
CPU_MAX_FEATURE = 512 // see CV_HARDWARE_MAX_FEATURE
|
||||
};
|
||||
|
||||
|
||||
#include "cv_cpu_dispatch.h"
|
||||
|
||||
|
||||
/* fundamental constants */
|
||||
#define CV_PI 3.1415926535897932384626433832795
|
||||
#define CV_2PI 6.283185307179586476925286766559
|
||||
#define CV_LOG2 0.69314718055994530941723212145818
|
||||
|
||||
#if defined __ARM_FP16_FORMAT_IEEE \
|
||||
&& !defined __CUDACC__
|
||||
# define CV_FP16_TYPE 1
|
||||
#else
|
||||
# define CV_FP16_TYPE 0
|
||||
#endif
|
||||
|
||||
typedef union Cv16suf
|
||||
{
|
||||
short i;
|
||||
ushort u;
|
||||
#if CV_FP16_TYPE
|
||||
__fp16 h;
|
||||
#endif
|
||||
}
|
||||
Cv16suf;
|
||||
|
||||
typedef union Cv32suf
|
||||
{
|
||||
int i;
|
||||
unsigned u;
|
||||
float f;
|
||||
}
|
||||
Cv32suf;
|
||||
|
||||
typedef union Cv64suf
|
||||
{
|
||||
int64 i;
|
||||
uint64 u;
|
||||
double f;
|
||||
}
|
||||
Cv64suf;
|
||||
|
||||
#define OPENCV_ABI_COMPATIBILITY 400
|
||||
|
||||
#ifdef __OPENCV_BUILD
|
||||
# define DISABLE_OPENCV_3_COMPATIBILITY
|
||||
# define OPENCV_DISABLE_DEPRECATED_COMPATIBILITY
|
||||
#endif
|
||||
|
||||
#ifdef CVAPI_EXPORTS
|
||||
# if (defined _WIN32 || defined WINCE || defined __CYGWIN__)
|
||||
# define CV_EXPORTS __declspec(dllexport)
|
||||
# elif defined __GNUC__ && __GNUC__ >= 4
|
||||
# define CV_EXPORTS __attribute__ ((visibility ("default")))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef CV_EXPORTS
|
||||
# define CV_EXPORTS
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# define CV_EXPORTS_TEMPLATE
|
||||
#else
|
||||
# define CV_EXPORTS_TEMPLATE CV_EXPORTS
|
||||
#endif
|
||||
|
||||
#ifndef CV_DEPRECATED
|
||||
# if defined(__GNUC__)
|
||||
# define CV_DEPRECATED __attribute__ ((deprecated))
|
||||
# elif defined(_MSC_VER)
|
||||
# define CV_DEPRECATED __declspec(deprecated)
|
||||
# else
|
||||
# define CV_DEPRECATED
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef CV_DEPRECATED_EXTERNAL
|
||||
# if defined(__OPENCV_BUILD)
|
||||
# define CV_DEPRECATED_EXTERNAL /* nothing */
|
||||
# else
|
||||
# define CV_DEPRECATED_EXTERNAL CV_DEPRECATED
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef CV_EXTERN_C
|
||||
# ifdef __cplusplus
|
||||
# define CV_EXTERN_C extern "C"
|
||||
# else
|
||||
# define CV_EXTERN_C
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* special informative macros for wrapper generators */
|
||||
#define CV_EXPORTS_W CV_EXPORTS
|
||||
#define CV_EXPORTS_W_SIMPLE CV_EXPORTS
|
||||
#define CV_EXPORTS_AS(synonym) CV_EXPORTS
|
||||
#define CV_EXPORTS_W_MAP CV_EXPORTS
|
||||
#define CV_IN_OUT
|
||||
#define CV_OUT
|
||||
#define CV_PROP
|
||||
#define CV_PROP_RW
|
||||
#define CV_WRAP
|
||||
#define CV_WRAP_AS(synonym)
|
||||
#define CV_WRAP_MAPPABLE(mappable)
|
||||
#define CV_WRAP_PHANTOM(phantom_header)
|
||||
#define CV_WRAP_DEFAULT(val)
|
||||
|
||||
/****************************************************************************************\
|
||||
* Matrix type (Mat) *
|
||||
\****************************************************************************************/
|
||||
|
||||
#define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT)
|
||||
#define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)
|
||||
#define CV_MAT_TYPE_MASK (CV_DEPTH_MAX*CV_CN_MAX - 1)
|
||||
#define CV_MAT_TYPE(flags) ((flags) & CV_MAT_TYPE_MASK)
|
||||
#define CV_MAT_CONT_FLAG_SHIFT 14
|
||||
#define CV_MAT_CONT_FLAG (1 << CV_MAT_CONT_FLAG_SHIFT)
|
||||
#define CV_IS_MAT_CONT(flags) ((flags) & CV_MAT_CONT_FLAG)
|
||||
#define CV_IS_CONT_MAT CV_IS_MAT_CONT
|
||||
#define CV_SUBMAT_FLAG_SHIFT 15
|
||||
#define CV_SUBMAT_FLAG (1 << CV_SUBMAT_FLAG_SHIFT)
|
||||
#define CV_IS_SUBMAT(flags) ((flags) & CV_MAT_SUBMAT_FLAG)
|
||||
|
||||
/** Size of each channel item,
|
||||
0x28442211 = 0010 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
|
||||
#define CV_ELEM_SIZE1(type) ((0x28442211 >> CV_MAT_DEPTH(type)*4) & 15)
|
||||
|
||||
#define CV_ELEM_SIZE(type) (CV_MAT_CN(type)*CV_ELEM_SIZE1(type))
|
||||
|
||||
#ifndef MIN
|
||||
# define MIN(a,b) ((a) > (b) ? (b) : (a))
|
||||
#endif
|
||||
|
||||
#ifndef MAX
|
||||
# define MAX(a,b) ((a) < (b) ? (b) : (a))
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////// Enum operators ///////////////////////////////////////
|
||||
|
||||
/**
|
||||
|
||||
Provides compatibility operators for both classical and C++11 enum classes,
|
||||
as well as exposing the C++11 enum class members for backwards compatibility
|
||||
|
||||
@code
|
||||
// Provides operators required for flag enums
|
||||
CV_ENUM_FLAGS(AccessFlag)
|
||||
|
||||
// Exposes the listed members of the enum class AccessFlag to the current namespace
|
||||
CV_ENUM_CLASS_EXPOSE(AccessFlag, ACCESS_READ [, ACCESS_WRITE [, ...] ]);
|
||||
@endcode
|
||||
*/
|
||||
|
||||
#define __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST) \
|
||||
static const EnumType MEMBER_CONST = EnumType::MEMBER_CONST; \
|
||||
|
||||
#define __CV_ENUM_CLASS_EXPOSE_2(EnumType, MEMBER_CONST, ...) \
|
||||
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_1(EnumType, __VA_ARGS__)); \
|
||||
|
||||
#define __CV_ENUM_CLASS_EXPOSE_3(EnumType, MEMBER_CONST, ...) \
|
||||
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_2(EnumType, __VA_ARGS__)); \
|
||||
|
||||
#define __CV_ENUM_CLASS_EXPOSE_4(EnumType, MEMBER_CONST, ...) \
|
||||
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_3(EnumType, __VA_ARGS__)); \
|
||||
|
||||
#define __CV_ENUM_CLASS_EXPOSE_5(EnumType, MEMBER_CONST, ...) \
|
||||
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_4(EnumType, __VA_ARGS__)); \
|
||||
|
||||
#define __CV_ENUM_CLASS_EXPOSE_6(EnumType, MEMBER_CONST, ...) \
|
||||
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_5(EnumType, __VA_ARGS__)); \
|
||||
|
||||
#define __CV_ENUM_CLASS_EXPOSE_7(EnumType, MEMBER_CONST, ...) \
|
||||
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_6(EnumType, __VA_ARGS__)); \
|
||||
|
||||
#define __CV_ENUM_CLASS_EXPOSE_8(EnumType, MEMBER_CONST, ...) \
|
||||
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_7(EnumType, __VA_ARGS__)); \
|
||||
|
||||
#define __CV_ENUM_CLASS_EXPOSE_9(EnumType, MEMBER_CONST, ...) \
|
||||
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_8(EnumType, __VA_ARGS__)); \
|
||||
|
||||
#define __CV_ENUM_FLAGS_LOGICAL_NOT(EnumType) \
|
||||
static inline bool operator!(const EnumType& val) \
|
||||
{ \
|
||||
typedef std::underlying_type<EnumType>::type UnderlyingType; \
|
||||
return !static_cast<UnderlyingType>(val); \
|
||||
} \
|
||||
|
||||
#define __CV_ENUM_FLAGS_LOGICAL_NOT_EQ(Arg1Type, Arg2Type) \
|
||||
static inline bool operator!=(const Arg1Type& a, const Arg2Type& b) \
|
||||
{ \
|
||||
return static_cast<int>(a) != static_cast<int>(b); \
|
||||
} \
|
||||
|
||||
#define __CV_ENUM_FLAGS_LOGICAL_EQ(Arg1Type, Arg2Type) \
|
||||
static inline bool operator==(const Arg1Type& a, const Arg2Type& b) \
|
||||
{ \
|
||||
return static_cast<int>(a) == static_cast<int>(b); \
|
||||
} \
|
||||
|
||||
#define __CV_ENUM_FLAGS_BITWISE_NOT(EnumType) \
|
||||
static inline EnumType operator~(const EnumType& val) \
|
||||
{ \
|
||||
typedef std::underlying_type<EnumType>::type UnderlyingType; \
|
||||
return static_cast<EnumType>(~static_cast<UnderlyingType>(val)); \
|
||||
} \
|
||||
|
||||
#define __CV_ENUM_FLAGS_BITWISE_OR(EnumType, Arg1Type, Arg2Type) \
|
||||
static inline EnumType operator|(const Arg1Type& a, const Arg2Type& b) \
|
||||
{ \
|
||||
typedef std::underlying_type<EnumType>::type UnderlyingType; \
|
||||
return static_cast<EnumType>(static_cast<UnderlyingType>(a) | static_cast<UnderlyingType>(b)); \
|
||||
} \
|
||||
|
||||
#define __CV_ENUM_FLAGS_BITWISE_AND(EnumType, Arg1Type, Arg2Type) \
|
||||
static inline EnumType operator&(const Arg1Type& a, const Arg2Type& b) \
|
||||
{ \
|
||||
typedef std::underlying_type<EnumType>::type UnderlyingType; \
|
||||
return static_cast<EnumType>(static_cast<UnderlyingType>(a) & static_cast<UnderlyingType>(b)); \
|
||||
} \
|
||||
|
||||
#define __CV_ENUM_FLAGS_BITWISE_XOR(EnumType, Arg1Type, Arg2Type) \
|
||||
static inline EnumType operator^(const Arg1Type& a, const Arg2Type& b) \
|
||||
{ \
|
||||
typedef std::underlying_type<EnumType>::type UnderlyingType; \
|
||||
return static_cast<EnumType>(static_cast<UnderlyingType>(a) ^ static_cast<UnderlyingType>(b)); \
|
||||
} \
|
||||
|
||||
#define __CV_ENUM_FLAGS_BITWISE_OR_EQ(EnumType, Arg1Type) \
|
||||
static inline EnumType& operator|=(EnumType& _this, const Arg1Type& val) \
|
||||
{ \
|
||||
_this = static_cast<EnumType>(static_cast<int>(_this) | static_cast<int>(val)); \
|
||||
return _this; \
|
||||
} \
|
||||
|
||||
#define __CV_ENUM_FLAGS_BITWISE_AND_EQ(EnumType, Arg1Type) \
|
||||
static inline EnumType& operator&=(EnumType& _this, const Arg1Type& val) \
|
||||
{ \
|
||||
_this = static_cast<EnumType>(static_cast<int>(_this) & static_cast<int>(val)); \
|
||||
return _this; \
|
||||
} \
|
||||
|
||||
#define __CV_ENUM_FLAGS_BITWISE_XOR_EQ(EnumType, Arg1Type) \
|
||||
static inline EnumType& operator^=(EnumType& _this, const Arg1Type& val) \
|
||||
{ \
|
||||
_this = static_cast<EnumType>(static_cast<int>(_this) ^ static_cast<int>(val)); \
|
||||
return _this; \
|
||||
} \
|
||||
|
||||
#define CV_ENUM_CLASS_EXPOSE(EnumType, ...) \
|
||||
__CV_EXPAND(__CV_CAT(__CV_ENUM_CLASS_EXPOSE_, __CV_VA_NUM_ARGS(__VA_ARGS__))(EnumType, __VA_ARGS__)); \
|
||||
|
||||
#define CV_ENUM_FLAGS(EnumType) \
|
||||
__CV_ENUM_FLAGS_LOGICAL_NOT (EnumType) \
|
||||
__CV_ENUM_FLAGS_LOGICAL_EQ (EnumType, int) \
|
||||
__CV_ENUM_FLAGS_LOGICAL_NOT_EQ (EnumType, int) \
|
||||
\
|
||||
__CV_ENUM_FLAGS_BITWISE_NOT (EnumType) \
|
||||
__CV_ENUM_FLAGS_BITWISE_OR (EnumType, EnumType, EnumType) \
|
||||
__CV_ENUM_FLAGS_BITWISE_AND (EnumType, EnumType, EnumType) \
|
||||
__CV_ENUM_FLAGS_BITWISE_XOR (EnumType, EnumType, EnumType) \
|
||||
\
|
||||
__CV_ENUM_FLAGS_BITWISE_OR_EQ (EnumType, EnumType) \
|
||||
__CV_ENUM_FLAGS_BITWISE_AND_EQ (EnumType, EnumType) \
|
||||
__CV_ENUM_FLAGS_BITWISE_XOR_EQ (EnumType, EnumType) \
|
||||
|
||||
/****************************************************************************************\
|
||||
* static analysys *
|
||||
\****************************************************************************************/
|
||||
|
||||
// In practice, some macro are not processed correctly (noreturn is not detected).
|
||||
// We need to use simplified definition for them.
|
||||
#ifndef CV_STATIC_ANALYSIS
|
||||
# if defined(__KLOCWORK__) || defined(__clang_analyzer__) || defined(__COVERITY__)
|
||||
# define CV_STATIC_ANALYSIS 1
|
||||
# endif
|
||||
#else
|
||||
# if defined(CV_STATIC_ANALYSIS) && !(__CV_CAT(1, CV_STATIC_ANALYSIS) == 1) // defined and not empty
|
||||
# if 0 == CV_STATIC_ANALYSIS
|
||||
# undef CV_STATIC_ANALYSIS
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/****************************************************************************************\
|
||||
* Thread sanitizer *
|
||||
\****************************************************************************************/
|
||||
#ifndef CV_THREAD_SANITIZER
|
||||
# if defined(__has_feature)
|
||||
# if __has_feature(thread_sanitizer)
|
||||
# define CV_THREAD_SANITIZER
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/****************************************************************************************\
|
||||
* exchange-add operation for atomic operations on reference counters *
|
||||
\****************************************************************************************/
|
||||
|
||||
#ifdef CV_XADD
|
||||
// allow to use user-defined macro
|
||||
#elif defined __GNUC__ || defined __clang__
|
||||
# if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__) && !defined __INTEL_COMPILER
|
||||
# ifdef __ATOMIC_ACQ_REL
|
||||
# define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
|
||||
# else
|
||||
# define CV_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4)
|
||||
# endif
|
||||
# else
|
||||
# if defined __ATOMIC_ACQ_REL && !defined __clang__
|
||||
// version for gcc >= 4.7
|
||||
# define CV_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL)
|
||||
# else
|
||||
# define CV_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta))
|
||||
# endif
|
||||
# endif
|
||||
#elif defined _MSC_VER && !defined RC_INVOKED
|
||||
# include <intrin.h>
|
||||
# define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
|
||||
#else
|
||||
CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
|
||||
#endif
|
||||
|
||||
|
||||
/****************************************************************************************\
|
||||
* CV_NORETURN attribute *
|
||||
\****************************************************************************************/
|
||||
|
||||
#ifndef CV_NORETURN
|
||||
# if defined(__GNUC__)
|
||||
# define CV_NORETURN __attribute__((__noreturn__))
|
||||
# elif defined(_MSC_VER) && (_MSC_VER >= 1300)
|
||||
# define CV_NORETURN __declspec(noreturn)
|
||||
# else
|
||||
# define CV_NORETURN /* nothing by default */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
/****************************************************************************************\
|
||||
* CV_NODISCARD attribute *
|
||||
* encourages the compiler to issue a warning if the return value is discarded (C++17) *
|
||||
\****************************************************************************************/
|
||||
#ifndef CV_NODISCARD
|
||||
# if defined(__GNUC__)
|
||||
# define CV_NODISCARD __attribute__((__warn_unused_result__)) // at least available with GCC 3.4
|
||||
# elif defined(__clang__) && defined(__has_attribute)
|
||||
# if __has_attribute(__warn_unused_result__)
|
||||
# define CV_NODISCARD __attribute__((__warn_unused_result__))
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
#ifndef CV_NODISCARD
|
||||
# define CV_NODISCARD /* nothing by default */
|
||||
#endif
|
||||
|
||||
|
||||
/****************************************************************************************\
|
||||
* C++ 11 *
|
||||
\****************************************************************************************/
|
||||
#ifndef CV_CXX11
|
||||
# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800)
|
||||
# define CV_CXX11 1
|
||||
# endif
|
||||
#else
|
||||
# if CV_CXX11 == 0
|
||||
# undef CV_CXX11
|
||||
# endif
|
||||
#endif
|
||||
#ifndef CV_CXX11
|
||||
# error "OpenCV 4.x+ requires enabled C++11 support"
|
||||
#endif
|
||||
|
||||
#define CV_CXX_MOVE_SEMANTICS 1
|
||||
#define CV_CXX_STD_ARRAY 1
|
||||
#include <array>
|
||||
#ifndef CV_OVERRIDE
|
||||
# define CV_OVERRIDE override
|
||||
#endif
|
||||
#ifndef CV_FINAL
|
||||
# define CV_FINAL final
|
||||
#endif
|
||||
|
||||
#ifndef CV_NOEXCEPT
|
||||
# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
|
||||
# define CV_NOEXCEPT noexcept
|
||||
# endif
|
||||
#endif
|
||||
#ifndef CV_NOEXCEPT
|
||||
# define CV_NOEXCEPT
|
||||
#endif
|
||||
|
||||
#ifndef CV_CONSTEXPR
|
||||
# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
|
||||
# define CV_CONSTEXPR constexpr
|
||||
# endif
|
||||
#endif
|
||||
#ifndef CV_CONSTEXPR
|
||||
# define CV_CONSTEXPR
|
||||
#endif
|
||||
|
||||
// Integer types portatibility
|
||||
#ifdef OPENCV_STDINT_HEADER
|
||||
#include OPENCV_STDINT_HEADER
|
||||
#elif defined(__cplusplus)
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1600 /* MSVS 2010 */
|
||||
namespace cv {
|
||||
typedef signed char int8_t;
|
||||
typedef unsigned char uint8_t;
|
||||
typedef signed short int16_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef signed int int32_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef signed __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
}
|
||||
#elif defined(_MSC_VER) || __cplusplus >= 201103L
|
||||
#include <cstdint>
|
||||
namespace cv {
|
||||
using std::int8_t;
|
||||
using std::uint8_t;
|
||||
using std::int16_t;
|
||||
using std::uint16_t;
|
||||
using std::int32_t;
|
||||
using std::uint32_t;
|
||||
using std::int64_t;
|
||||
using std::uint64_t;
|
||||
}
|
||||
#else
|
||||
#include <stdint.h>
|
||||
namespace cv {
|
||||
typedef ::int8_t int8_t;
|
||||
typedef ::uint8_t uint8_t;
|
||||
typedef ::int16_t int16_t;
|
||||
typedef ::uint16_t uint16_t;
|
||||
typedef ::int32_t int32_t;
|
||||
typedef ::uint32_t uint32_t;
|
||||
typedef ::int64_t int64_t;
|
||||
typedef ::uint64_t uint64_t;
|
||||
}
|
||||
#endif
|
||||
#else // pure C
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace cv
|
||||
{
|
||||
|
||||
class float16_t
|
||||
{
|
||||
public:
|
||||
#if CV_FP16_TYPE
|
||||
|
||||
float16_t() : h(0) {}
|
||||
explicit float16_t(float x) { h = (__fp16)x; }
|
||||
operator float() const { return (float)h; }
|
||||
static float16_t fromBits(ushort w)
|
||||
{
|
||||
Cv16suf u;
|
||||
u.u = w;
|
||||
float16_t result;
|
||||
result.h = u.h;
|
||||
return result;
|
||||
}
|
||||
static float16_t zero()
|
||||
{
|
||||
float16_t result;
|
||||
result.h = (__fp16)0;
|
||||
return result;
|
||||
}
|
||||
ushort bits() const
|
||||
{
|
||||
Cv16suf u;
|
||||
u.h = h;
|
||||
return u.u;
|
||||
}
|
||||
protected:
|
||||
__fp16 h;
|
||||
|
||||
#else
|
||||
float16_t() : w(0) {}
|
||||
explicit float16_t(float x)
|
||||
{
|
||||
#if CV_AVX2
|
||||
__m128 v = _mm_load_ss(&x);
|
||||
w = (ushort)_mm_cvtsi128_si32(_mm_cvtps_ph(v, 0));
|
||||
#else
|
||||
Cv32suf in;
|
||||
in.f = x;
|
||||
unsigned sign = in.u & 0x80000000;
|
||||
in.u ^= sign;
|
||||
|
||||
if( in.u >= 0x47800000 )
|
||||
w = (ushort)(in.u > 0x7f800000 ? 0x7e00 : 0x7c00);
|
||||
else
|
||||
{
|
||||
if (in.u < 0x38800000)
|
||||
{
|
||||
in.f += 0.5f;
|
||||
w = (ushort)(in.u - 0x3f000000);
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned t = in.u + 0xc8000fff;
|
||||
w = (ushort)((t + ((in.u >> 13) & 1)) >> 13);
|
||||
}
|
||||
}
|
||||
|
||||
w = (ushort)(w | (sign >> 16));
|
||||
#endif
|
||||
}
|
||||
|
||||
operator float() const
|
||||
{
|
||||
#if CV_AVX2
|
||||
float f;
|
||||
_mm_store_ss(&f, _mm_cvtph_ps(_mm_cvtsi32_si128(w)));
|
||||
return f;
|
||||
#else
|
||||
Cv32suf out;
|
||||
|
||||
unsigned t = ((w & 0x7fff) << 13) + 0x38000000;
|
||||
unsigned sign = (w & 0x8000) << 16;
|
||||
unsigned e = w & 0x7c00;
|
||||
|
||||
out.u = t + (1 << 23);
|
||||
out.u = (e >= 0x7c00 ? t + 0x38000000 :
|
||||
e == 0 ? (static_cast<void>(out.f -= 6.103515625e-05f), out.u) : t) | sign;
|
||||
return out.f;
|
||||
#endif
|
||||
}
|
||||
|
||||
static float16_t fromBits(ushort b)
|
||||
{
|
||||
float16_t result;
|
||||
result.w = b;
|
||||
return result;
|
||||
}
|
||||
static float16_t zero()
|
||||
{
|
||||
float16_t result;
|
||||
result.w = (ushort)0;
|
||||
return result;
|
||||
}
|
||||
ushort bits() const { return w; }
|
||||
protected:
|
||||
ushort w;
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
//! @}
|
||||
|
||||
#ifndef __cplusplus
|
||||
#include "opencv2/core/fast_math.hpp" // define cvRound(double)
|
||||
#endif
|
||||
|
||||
#endif // OPENCV_CORE_CVDEF_H
|
||||
190
lib/3rdParty/OpenCV/include/opencv2/core/cvstd.hpp
vendored
Normal file
190
lib/3rdParty/OpenCV/include/opencv2/core/cvstd.hpp
vendored
Normal file
@@ -0,0 +1,190 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CORE_CVSTD_HPP
|
||||
#define OPENCV_CORE_CVSTD_HPP
|
||||
|
||||
#ifndef __cplusplus
|
||||
# error cvstd.hpp header must be compiled as C++
|
||||
#endif
|
||||
|
||||
#include "opencv2/core/cvdef.h"
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <cctype>
|
||||
|
||||
#include <string>
|
||||
|
||||
// import useful primitives from stl
|
||||
# include <algorithm>
|
||||
# include <utility>
|
||||
# include <cstdlib> //for abs(int)
|
||||
# include <cmath>
|
||||
|
||||
namespace cv
|
||||
{
|
||||
static inline uchar abs(uchar a) { return a; }
|
||||
static inline ushort abs(ushort a) { return a; }
|
||||
static inline unsigned abs(unsigned a) { return a; }
|
||||
static inline uint64 abs(uint64 a) { return a; }
|
||||
|
||||
using std::min;
|
||||
using std::max;
|
||||
using std::abs;
|
||||
using std::swap;
|
||||
using std::sqrt;
|
||||
using std::exp;
|
||||
using std::pow;
|
||||
using std::log;
|
||||
}
|
||||
|
||||
#include "cvstd_wrapper.hpp"
|
||||
|
||||
namespace cv {
|
||||
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
//////////////////////////// memory management functions ////////////////////////////
|
||||
|
||||
/** @brief Allocates an aligned memory buffer.
|
||||
|
||||
The function allocates the buffer of the specified size and returns it. When the buffer size is 16
|
||||
bytes or more, the returned buffer is aligned to 16 bytes.
|
||||
@param bufSize Allocated buffer size.
|
||||
*/
|
||||
CV_EXPORTS void* fastMalloc(size_t bufSize);
|
||||
|
||||
/** @brief Deallocates a memory buffer.
|
||||
|
||||
The function deallocates the buffer allocated with fastMalloc . If NULL pointer is passed, the
|
||||
function does nothing. C version of the function clears the pointer *pptr* to avoid problems with
|
||||
double memory deallocation.
|
||||
@param ptr Pointer to the allocated buffer.
|
||||
*/
|
||||
CV_EXPORTS void fastFree(void* ptr);
|
||||
|
||||
/*!
|
||||
The STL-compliant memory Allocator based on cv::fastMalloc() and cv::fastFree()
|
||||
*/
|
||||
template<typename _Tp> class Allocator
|
||||
{
|
||||
public:
|
||||
typedef _Tp value_type;
|
||||
typedef value_type* pointer;
|
||||
typedef const value_type* const_pointer;
|
||||
typedef value_type& reference;
|
||||
typedef const value_type& const_reference;
|
||||
typedef size_t size_type;
|
||||
typedef ptrdiff_t difference_type;
|
||||
template<typename U> class rebind { typedef Allocator<U> other; };
|
||||
|
||||
explicit Allocator() {}
|
||||
~Allocator() {}
|
||||
explicit Allocator(Allocator const&) {}
|
||||
template<typename U>
|
||||
explicit Allocator(Allocator<U> const&) {}
|
||||
|
||||
// address
|
||||
pointer address(reference r) { return &r; }
|
||||
const_pointer address(const_reference r) { return &r; }
|
||||
|
||||
pointer allocate(size_type count, const void* =0) { return reinterpret_cast<pointer>(fastMalloc(count * sizeof (_Tp))); }
|
||||
void deallocate(pointer p, size_type) { fastFree(p); }
|
||||
|
||||
void construct(pointer p, const _Tp& v) { new(static_cast<void*>(p)) _Tp(v); }
|
||||
void destroy(pointer p) { p->~_Tp(); }
|
||||
|
||||
size_type max_size() const { return cv::max(static_cast<_Tp>(-1)/sizeof(_Tp), 1); }
|
||||
};
|
||||
|
||||
//! @} core_utils
|
||||
|
||||
//! @endcond
|
||||
|
||||
//! @addtogroup core_basic
|
||||
//! @{
|
||||
|
||||
//////////////////////////////// string class ////////////////////////////////
|
||||
|
||||
class CV_EXPORTS FileNode; //for string constructor from FileNode
|
||||
|
||||
typedef std::string String;
|
||||
|
||||
#ifndef OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS
|
||||
|
||||
//! @cond IGNORED
|
||||
namespace details {
|
||||
// std::tolower is int->int
|
||||
static inline char char_tolower(char ch)
|
||||
{
|
||||
return (char)std::tolower((int)ch);
|
||||
}
|
||||
// std::toupper is int->int
|
||||
static inline char char_toupper(char ch)
|
||||
{
|
||||
return (char)std::toupper((int)ch);
|
||||
}
|
||||
} // namespace details
|
||||
//! @endcond
|
||||
|
||||
static inline std::string toLowerCase(const std::string& str)
|
||||
{
|
||||
std::string result(str);
|
||||
std::transform(result.begin(), result.end(), result.begin(), details::char_tolower);
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline std::string toUpperCase(const std::string& str)
|
||||
{
|
||||
std::string result(str);
|
||||
std::transform(result.begin(), result.end(), result.begin(), details::char_toupper);
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif // OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS
|
||||
|
||||
//! @} core_basic
|
||||
} // cv
|
||||
|
||||
#endif //OPENCV_CORE_CVSTD_HPP
|
||||
@@ -73,95 +73,6 @@ public:
|
||||
typedef Vec<channel_type, channels> vec_type;
|
||||
};
|
||||
|
||||
inline
|
||||
String::String(const std::string& str)
|
||||
: cstr_(0), len_(0)
|
||||
{
|
||||
if (!str.empty())
|
||||
{
|
||||
size_t len = str.size();
|
||||
if (len) memcpy(allocate(len), str.c_str(), len);
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
String::String(const std::string& str, size_t pos, size_t len)
|
||||
: cstr_(0), len_(0)
|
||||
{
|
||||
size_t strlen = str.size();
|
||||
pos = min(pos, strlen);
|
||||
len = min(strlen - pos, len);
|
||||
if (!len) return;
|
||||
memcpy(allocate(len), str.c_str() + pos, len);
|
||||
}
|
||||
|
||||
inline
|
||||
String& String::operator = (const std::string& str)
|
||||
{
|
||||
deallocate();
|
||||
if (!str.empty())
|
||||
{
|
||||
size_t len = str.size();
|
||||
if (len) memcpy(allocate(len), str.c_str(), len);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
String& String::operator += (const std::string& str)
|
||||
{
|
||||
*this = *this + str;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
String::operator std::string() const
|
||||
{
|
||||
return std::string(cstr_, len_);
|
||||
}
|
||||
|
||||
inline
|
||||
String operator + (const String& lhs, const std::string& rhs)
|
||||
{
|
||||
String s;
|
||||
size_t rhslen = rhs.size();
|
||||
s.allocate(lhs.len_ + rhslen);
|
||||
if (lhs.len_) memcpy(s.cstr_, lhs.cstr_, lhs.len_);
|
||||
if (rhslen) memcpy(s.cstr_ + lhs.len_, rhs.c_str(), rhslen);
|
||||
return s;
|
||||
}
|
||||
|
||||
inline
|
||||
String operator + (const std::string& lhs, const String& rhs)
|
||||
{
|
||||
String s;
|
||||
size_t lhslen = lhs.size();
|
||||
s.allocate(lhslen + rhs.len_);
|
||||
if (lhslen) memcpy(s.cstr_, lhs.c_str(), lhslen);
|
||||
if (rhs.len_) memcpy(s.cstr_ + lhslen, rhs.cstr_, rhs.len_);
|
||||
return s;
|
||||
}
|
||||
|
||||
inline
|
||||
FileNode::operator std::string() const
|
||||
{
|
||||
String value;
|
||||
read(*this, value, value);
|
||||
return value;
|
||||
}
|
||||
|
||||
template<> inline
|
||||
void operator >> (const FileNode& n, std::string& value)
|
||||
{
|
||||
read(n, value, std::string());
|
||||
}
|
||||
|
||||
template<> inline
|
||||
FileStorage& operator << (FileStorage& fs, const std::string& value)
|
||||
{
|
||||
return fs << cv::String(value);
|
||||
}
|
||||
|
||||
static inline
|
||||
std::ostream& operator << (std::ostream& os, const String& str)
|
||||
{
|
||||
@@ -265,16 +176,21 @@ std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect)
|
||||
|
||||
static inline std::ostream& operator << (std::ostream& out, const MatSize& msize)
|
||||
{
|
||||
int i, dims = msize.p[-1];
|
||||
int i, dims = msize.dims();
|
||||
for( i = 0; i < dims; i++ )
|
||||
{
|
||||
out << msize.p[i];
|
||||
out << msize[i];
|
||||
if( i < dims-1 )
|
||||
out << " x ";
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static inline std::ostream &operator<< (std::ostream &s, cv::Range &r)
|
||||
{
|
||||
return s << "[" << r.start << " : " << r.end << ")";
|
||||
}
|
||||
|
||||
} // cv
|
||||
|
||||
#ifdef _MSC_VER
|
||||
149
lib/3rdParty/OpenCV/include/opencv2/core/cvstd_wrapper.hpp
vendored
Normal file
149
lib/3rdParty/OpenCV/include/opencv2/core/cvstd_wrapper.hpp
vendored
Normal file
@@ -0,0 +1,149 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_CORE_CVSTD_WRAPPER_HPP
|
||||
#define OPENCV_CORE_CVSTD_WRAPPER_HPP
|
||||
|
||||
#include "opencv2/core/cvdef.h"
|
||||
|
||||
#include <string>
|
||||
#include <memory> // std::shared_ptr
|
||||
#include <type_traits> // std::enable_if
|
||||
|
||||
namespace cv {
|
||||
|
||||
using std::nullptr_t;
|
||||
|
||||
//! @addtogroup core_basic
|
||||
//! @{
|
||||
|
||||
#ifdef CV_DOXYGEN
|
||||
|
||||
template <typename _Tp> using Ptr = std::shared_ptr<_Tp>; // In ideal world it should look like this, but we need some compatibility workarounds below
|
||||
|
||||
template<typename _Tp, typename ... A1> static inline
|
||||
Ptr<_Tp> makePtr(const A1&... a1) { return std::make_shared<_Tp>(a1...); }
|
||||
|
||||
#else // cv::Ptr with compatibility workarounds
|
||||
|
||||
// It should be defined for C-API types only.
|
||||
// C++ types should use regular "delete" operator.
|
||||
template<typename Y> struct DefaultDeleter;
|
||||
#if 0
|
||||
{
|
||||
void operator()(Y* p) const;
|
||||
};
|
||||
#endif
|
||||
|
||||
namespace sfinae {
|
||||
template<typename C, typename Ret, typename... Args>
|
||||
struct has_parenthesis_operator
|
||||
{
|
||||
private:
|
||||
template<typename T>
|
||||
static CV_CONSTEXPR std::true_type check(typename std::is_same<typename std::decay<decltype(std::declval<T>().operator()(std::declval<Args>()...))>::type, Ret>::type*);
|
||||
|
||||
template<typename> static CV_CONSTEXPR std::false_type check(...);
|
||||
|
||||
typedef decltype(check<C>(0)) type;
|
||||
|
||||
public:
|
||||
static CV_CONSTEXPR bool value = type::value;
|
||||
};
|
||||
} // namespace sfinae
|
||||
|
||||
template <typename T, typename = void>
|
||||
struct has_custom_delete
|
||||
: public std::false_type {};
|
||||
|
||||
// Force has_custom_delete to std::false_type when NVCC is compiling CUDA source files
|
||||
#ifndef __CUDACC__
|
||||
template <typename T>
|
||||
struct has_custom_delete<T, typename std::enable_if< sfinae::has_parenthesis_operator<DefaultDeleter<T>, void, T*>::value >::type >
|
||||
: public std::true_type {};
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
struct Ptr : public std::shared_ptr<T>
|
||||
{
|
||||
#if 0
|
||||
using std::shared_ptr<T>::shared_ptr; // GCC 5.x can't handle this
|
||||
#else
|
||||
inline Ptr() CV_NOEXCEPT : std::shared_ptr<T>() {}
|
||||
inline Ptr(nullptr_t) CV_NOEXCEPT : std::shared_ptr<T>(nullptr) {}
|
||||
template<typename Y, typename D> inline Ptr(Y* p, D d) : std::shared_ptr<T>(p, d) {}
|
||||
template<typename D> inline Ptr(nullptr_t, D d) : std::shared_ptr<T>(nullptr, d) {}
|
||||
|
||||
template<typename Y> inline Ptr(const Ptr<Y>& r, T* ptr) CV_NOEXCEPT : std::shared_ptr<T>(r, ptr) {}
|
||||
|
||||
inline Ptr(const Ptr<T>& o) CV_NOEXCEPT : std::shared_ptr<T>(o) {}
|
||||
inline Ptr(Ptr<T>&& o) CV_NOEXCEPT : std::shared_ptr<T>(std::move(o)) {}
|
||||
|
||||
template<typename Y> inline Ptr(const Ptr<Y>& o) CV_NOEXCEPT : std::shared_ptr<T>(o) {}
|
||||
template<typename Y> inline Ptr(Ptr<Y>&& o) CV_NOEXCEPT : std::shared_ptr<T>(std::move(o)) {}
|
||||
#endif
|
||||
inline Ptr(const std::shared_ptr<T>& o) CV_NOEXCEPT : std::shared_ptr<T>(o) {}
|
||||
inline Ptr(std::shared_ptr<T>&& o) CV_NOEXCEPT : std::shared_ptr<T>(std::move(o)) {}
|
||||
|
||||
// Overload with custom DefaultDeleter: Ptr<IplImage>(...)
|
||||
template<typename Y>
|
||||
inline Ptr(const std::true_type&, Y* ptr) : std::shared_ptr<T>(ptr, DefaultDeleter<Y>()) {}
|
||||
|
||||
// Overload without custom deleter: Ptr<std::string>(...);
|
||||
template<typename Y>
|
||||
inline Ptr(const std::false_type&, Y* ptr) : std::shared_ptr<T>(ptr) {}
|
||||
|
||||
template<typename Y = T>
|
||||
inline Ptr(Y* ptr) : Ptr(has_custom_delete<Y>(), ptr) {}
|
||||
|
||||
// Overload with custom DefaultDeleter: Ptr<IplImage>(...)
|
||||
template<typename Y>
|
||||
inline void reset(const std::true_type&, Y* ptr) { std::shared_ptr<T>::reset(ptr, DefaultDeleter<Y>()); }
|
||||
|
||||
// Overload without custom deleter: Ptr<std::string>(...);
|
||||
template<typename Y>
|
||||
inline void reset(const std::false_type&, Y* ptr) { std::shared_ptr<T>::reset(ptr); }
|
||||
|
||||
template<typename Y>
|
||||
inline void reset(Y* ptr) { Ptr<T>::reset(has_custom_delete<Y>(), ptr); }
|
||||
|
||||
template<class Y, class Deleter>
|
||||
void reset(Y* ptr, Deleter d) { std::shared_ptr<T>::reset(ptr, d); }
|
||||
|
||||
void reset() CV_NOEXCEPT { std::shared_ptr<T>::reset(); }
|
||||
|
||||
Ptr& operator=(const Ptr& o) { std::shared_ptr<T>::operator =(o); return *this; }
|
||||
template<typename Y> inline Ptr& operator=(const Ptr<Y>& o) { std::shared_ptr<T>::operator =(o); return *this; }
|
||||
|
||||
T* operator->() const CV_NOEXCEPT { return std::shared_ptr<T>::get();}
|
||||
typename std::add_lvalue_reference<T>::type operator*() const CV_NOEXCEPT { return *std::shared_ptr<T>::get(); }
|
||||
|
||||
// OpenCV 3.x methods (not a part of standard C++ library)
|
||||
inline void release() { std::shared_ptr<T>::reset(); }
|
||||
inline operator T* () const { return std::shared_ptr<T>::get(); }
|
||||
inline bool empty() const { return std::shared_ptr<T>::get() == nullptr; }
|
||||
|
||||
template<typename Y> inline
|
||||
Ptr<Y> staticCast() const CV_NOEXCEPT { return std::static_pointer_cast<Y>(*this); }
|
||||
|
||||
template<typename Y> inline
|
||||
Ptr<Y> constCast() const CV_NOEXCEPT { return std::const_pointer_cast<Y>(*this); }
|
||||
|
||||
template<typename Y> inline
|
||||
Ptr<Y> dynamicCast() const CV_NOEXCEPT { return std::dynamic_pointer_cast<Y>(*this); }
|
||||
};
|
||||
|
||||
template<typename _Tp, typename ... A1> static inline
|
||||
Ptr<_Tp> makePtr(const A1&... a1)
|
||||
{
|
||||
static_assert( !has_custom_delete<_Tp>::value, "Can't use this makePtr with custom DefaultDeleter");
|
||||
return (Ptr<_Tp>)std::make_shared<_Tp>(a1...);
|
||||
}
|
||||
|
||||
#endif // CV_DOXYGEN
|
||||
|
||||
//! @} core_basic
|
||||
} // cv
|
||||
|
||||
#endif //OPENCV_CORE_CVSTD_WRAPPER_HPP
|
||||
@@ -60,7 +60,7 @@ namespace cv
|
||||
//! @{
|
||||
|
||||
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
|
||||
void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, Mat& dst )
|
||||
void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, OutputArray dst )
|
||||
{
|
||||
if( !(src.Flags & Eigen::RowMajorBit) )
|
||||
{
|
||||
@@ -70,16 +70,12 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
# include "tegra_round.hpp"
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ && !defined(__CUDACC__)
|
||||
// 1. general scheme
|
||||
#define ARM_ROUND(_value, _asm_string) \
|
||||
int res; \
|
||||
float temp; \
|
||||
(void)temp; \
|
||||
CV_UNUSED(temp); \
|
||||
__asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
|
||||
return res
|
||||
// 2. version for double
|
||||
@@ -112,9 +108,6 @@ cvRound( double value )
|
||||
fistp t;
|
||||
}
|
||||
return t;
|
||||
#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
|
||||
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
|
||||
TEGRA_ROUND_DBL(value);
|
||||
#elif defined CV_ICC || defined __GNUC__
|
||||
# if defined ARM_ROUND_DBL
|
||||
ARM_ROUND_DBL(value);
|
||||
@@ -200,9 +193,6 @@ CV_INLINE int cvRound(float value)
|
||||
fistp t;
|
||||
}
|
||||
return t;
|
||||
#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
|
||||
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
|
||||
TEGRA_ROUND_FLT(value);
|
||||
#elif defined CV_ICC || defined __GNUC__
|
||||
# if defined ARM_ROUND_FLT
|
||||
ARM_ROUND_FLT(value);
|
||||
@@ -195,6 +195,12 @@ CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2,
|
||||
CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
|
||||
CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
|
||||
|
||||
CV_EXPORTS void cvt16f32f( const float16_t* src, float* dst, int len );
|
||||
CV_EXPORTS void cvt32f16f( const float* src, float16_t* dst, int len );
|
||||
|
||||
CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len );
|
||||
CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len );
|
||||
|
||||
struct CV_EXPORTS DFT1D
|
||||
{
|
||||
static Ptr<DFT1D> create(int len, int count, int depth, int flags, bool * useBuffer = 0);
|
||||
@@ -64,6 +64,8 @@ typedef signed char schar;
|
||||
# define CV_BIG_UINT(n) n##ULL
|
||||
#endif
|
||||
|
||||
#define CV_USRTYPE1 (void)"CV_USRTYPE1 support has been dropped in OpenCV 4.0"
|
||||
|
||||
#define CV_CN_MAX 512
|
||||
#define CV_CN_SHIFT 3
|
||||
#define CV_DEPTH_MAX (1 << CV_CN_SHIFT)
|
||||
@@ -75,7 +77,7 @@ typedef signed char schar;
|
||||
#define CV_32S 4
|
||||
#define CV_32F 5
|
||||
#define CV_64F 6
|
||||
#define CV_USRTYPE1 7
|
||||
#define CV_16F 7
|
||||
|
||||
#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1)
|
||||
#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK)
|
||||
@@ -124,6 +126,12 @@ typedef signed char schar;
|
||||
#define CV_64FC3 CV_MAKETYPE(CV_64F,3)
|
||||
#define CV_64FC4 CV_MAKETYPE(CV_64F,4)
|
||||
#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))
|
||||
|
||||
#define CV_16FC1 CV_MAKETYPE(CV_16F,1)
|
||||
#define CV_16FC2 CV_MAKETYPE(CV_16F,2)
|
||||
#define CV_16FC3 CV_MAKETYPE(CV_16F,3)
|
||||
#define CV_16FC4 CV_MAKETYPE(CV_16F,4)
|
||||
#define CV_16FC(n) CV_MAKETYPE(CV_16F,(n))
|
||||
//! @}
|
||||
|
||||
//! @name Comparison operation
|
||||
429
lib/3rdParty/OpenCV/include/opencv2/core/hal/intrin.hpp
vendored
Normal file
429
lib/3rdParty/OpenCV/include/opencv2/core/hal/intrin.hpp
vendored
Normal file
@@ -0,0 +1,429 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_HAL_INTRIN_HPP
|
||||
#define OPENCV_HAL_INTRIN_HPP
|
||||
|
||||
#include <cmath>
|
||||
#include <float.h>
|
||||
#include <stdlib.h>
|
||||
#include "opencv2/core/cvdef.h"
|
||||
|
||||
#define OPENCV_HAL_ADD(a, b) ((a) + (b))
|
||||
#define OPENCV_HAL_AND(a, b) ((a) & (b))
|
||||
#define OPENCV_HAL_NOP(a) (a)
|
||||
#define OPENCV_HAL_1ST(a, b) (a)
|
||||
|
||||
// unlike HAL API, which is in cv::hal,
|
||||
// we put intrinsics into cv namespace to make its
|
||||
// access from within opencv code more accessible
|
||||
namespace cv {
|
||||
|
||||
namespace hal {
|
||||
|
||||
enum StoreMode
|
||||
{
|
||||
STORE_UNALIGNED = 0,
|
||||
STORE_ALIGNED = 1,
|
||||
STORE_ALIGNED_NOCACHE = 2
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
template<typename _Tp> struct V_TypeTraits
|
||||
{
|
||||
};
|
||||
|
||||
#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_, nlanes128_) \
|
||||
template<> struct V_TypeTraits<type> \
|
||||
{ \
|
||||
typedef type value_type; \
|
||||
typedef int_type_ int_type; \
|
||||
typedef abs_type_ abs_type; \
|
||||
typedef uint_type_ uint_type; \
|
||||
typedef w_type_ w_type; \
|
||||
typedef q_type_ q_type; \
|
||||
typedef sum_type_ sum_type; \
|
||||
enum { nlanes128 = nlanes128_ }; \
|
||||
\
|
||||
static inline int_type reinterpret_int(type x) \
|
||||
{ \
|
||||
union { type l; int_type i; } v; \
|
||||
v.l = x; \
|
||||
return v.i; \
|
||||
} \
|
||||
\
|
||||
static inline type reinterpret_from_int(int_type x) \
|
||||
{ \
|
||||
union { type l; int_type i; } v; \
|
||||
v.i = x; \
|
||||
return v.l; \
|
||||
} \
|
||||
}
|
||||
|
||||
CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned, 16);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int, 16);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned, 8);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int, 8);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS(unsigned, int, unsigned, unsigned, uint64, void, unsigned, 4);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS(int, int, unsigned, unsigned, int64, void, int, 4);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS(float, int, unsigned, float, double, void, float, 4);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS(uint64, int64, uint64, uint64, void, void, uint64, 2);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS(int64, int64, uint64, uint64, void, void, int64, 2);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS(double, int64, uint64, double, void, void, double, 2);
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
|
||||
#ifdef CV_CPU_DISPATCH_MODE
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
|
||||
#else
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
|
||||
#endif
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CV_DOXYGEN
|
||||
# undef CV_AVX2
|
||||
# undef CV_SSE2
|
||||
# undef CV_NEON
|
||||
# undef CV_VSX
|
||||
# undef CV_FP16
|
||||
#endif
|
||||
|
||||
#if CV_SSE2 || CV_NEON || CV_VSX
|
||||
#define CV__SIMD_FORWARD 128
|
||||
#include "opencv2/core/hal/intrin_forward.hpp"
|
||||
#endif
|
||||
|
||||
#if CV_SSE2
|
||||
|
||||
#include "opencv2/core/hal/intrin_sse_em.hpp"
|
||||
#include "opencv2/core/hal/intrin_sse.hpp"
|
||||
|
||||
#elif CV_NEON
|
||||
|
||||
#include "opencv2/core/hal/intrin_neon.hpp"
|
||||
|
||||
#elif CV_VSX
|
||||
|
||||
#include "opencv2/core/hal/intrin_vsx.hpp"
|
||||
|
||||
#else
|
||||
|
||||
#define CV_SIMD128_CPP 1
|
||||
#include "opencv2/core/hal/intrin_cpp.hpp"
|
||||
|
||||
#endif
|
||||
|
||||
// AVX2 can be used together with SSE2, so
|
||||
// we define those two sets of intrinsics at once.
|
||||
// Most of the intrinsics do not conflict (the proper overloaded variant is
|
||||
// resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2),
|
||||
// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
|
||||
// Correspondingly, the wide intrinsics (which are mapped to the "widest"
|
||||
// available instruction set) will get vx_ prefix
|
||||
// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
|
||||
#if CV_AVX2
|
||||
|
||||
#define CV__SIMD_FORWARD 256
|
||||
#include "opencv2/core/hal/intrin_forward.hpp"
|
||||
#include "opencv2/core/hal/intrin_avx.hpp"
|
||||
|
||||
#endif
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv {
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD128
|
||||
#define CV_SIMD128 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD128_64F
|
||||
#define CV_SIMD128_64F 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD256
|
||||
#define CV_SIMD256 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD256_64F
|
||||
#define CV_SIMD256_64F 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD512
|
||||
#define CV_SIMD512 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD512_64F
|
||||
#define CV_SIMD512_64F 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD128_FP16
|
||||
#define CV_SIMD128_FP16 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD256_FP16
|
||||
#define CV_SIMD256_FP16 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD512_FP16
|
||||
#define CV_SIMD512_FP16 0
|
||||
#endif
|
||||
|
||||
//==================================================================================================
|
||||
|
||||
#define CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
|
||||
inline vtyp vx_setall_##short_typ(typ v) { return prefix##_setall_##short_typ(v); } \
|
||||
inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \
|
||||
inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \
|
||||
inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \
|
||||
inline vtyp vx_##loadsfx##_low(const typ* ptr) { return prefix##_##loadsfx##_low(ptr); } \
|
||||
inline vtyp vx_##loadsfx##_halves(const typ* ptr0, const typ* ptr1) { return prefix##_##loadsfx##_halves(ptr0, ptr1); } \
|
||||
inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \
|
||||
inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); } \
|
||||
inline vtyp vx_lut(const typ* ptr, const int* idx) { return prefix##_lut(ptr, idx); } \
|
||||
inline vtyp vx_lut_pairs(const typ* ptr, const int* idx) { return prefix##_lut_pairs(ptr, idx); }
|
||||
|
||||
#define CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \
|
||||
inline vtyp vx_lut_quads(const typ* ptr, const int* idx) { return prefix##_lut_quads(ptr, idx); }
|
||||
|
||||
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
|
||||
inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); }
|
||||
|
||||
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \
|
||||
inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); }
|
||||
|
||||
#define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
|
||||
CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \
|
||||
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
|
||||
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix)
|
||||
|
||||
#define CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(prefix) \
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(uchar, v_uint8, u8, v_uint16, v_uint32, prefix, load) \
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(schar, v_int8, s8, v_int16, v_int32, prefix, load) \
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN(ushort, v_uint16, u16, prefix, load) \
|
||||
CV_INTRIN_DEFINE_WIDE_LUT_QUAD(ushort, v_uint16, prefix) \
|
||||
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(ushort, v_uint32, prefix) \
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN(short, v_int16, s16, prefix, load) \
|
||||
CV_INTRIN_DEFINE_WIDE_LUT_QUAD(short, v_int16, prefix) \
|
||||
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(short, v_int32, prefix) \
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN(int, v_int32, s32, prefix, load) \
|
||||
CV_INTRIN_DEFINE_WIDE_LUT_QUAD(int, v_int32, prefix) \
|
||||
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(int, v_int64, prefix) \
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN(unsigned, v_uint32, u32, prefix, load) \
|
||||
CV_INTRIN_DEFINE_WIDE_LUT_QUAD(unsigned, v_uint32, prefix) \
|
||||
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(unsigned, v_uint64, prefix) \
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN(float, v_float32, f32, prefix, load) \
|
||||
CV_INTRIN_DEFINE_WIDE_LUT_QUAD(float, v_float32, prefix) \
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN(int64, v_int64, s64, prefix, load) \
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN(uint64, v_uint64, u64, prefix, load) \
|
||||
CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(float16_t, v_float32, prefix)
|
||||
|
||||
template<typename _Tp> struct V_RegTraits
|
||||
{
|
||||
};
|
||||
|
||||
#define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \
|
||||
template<> struct V_RegTraits<_reg> \
|
||||
{ \
|
||||
typedef _reg reg; \
|
||||
typedef _u_reg u_reg; \
|
||||
typedef _w_reg w_reg; \
|
||||
typedef _q_reg q_reg; \
|
||||
typedef _int_reg int_reg; \
|
||||
typedef _round_reg round_reg; \
|
||||
}
|
||||
|
||||
#if CV_SIMD128 || CV_SIMD128_CPP
|
||||
CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void);
|
||||
CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void);
|
||||
CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void);
|
||||
CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void);
|
||||
CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void);
|
||||
CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void);
|
||||
#if CV_SIMD128_64F
|
||||
CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4);
|
||||
#else
|
||||
CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4);
|
||||
#endif
|
||||
CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void);
|
||||
CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void);
|
||||
#if CV_SIMD128_64F
|
||||
CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if CV_SIMD256
|
||||
CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8);
|
||||
CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8);
|
||||
#endif
|
||||
|
||||
#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512)
|
||||
#define CV__SIMD_NAMESPACE simd512
|
||||
namespace CV__SIMD_NAMESPACE {
|
||||
#define CV_SIMD 1
|
||||
#define CV_SIMD_64F CV_SIMD512_64F
|
||||
#define CV_SIMD_WIDTH 64
|
||||
// TODO typedef v_uint8 / v_int32 / etc types here
|
||||
} // namespace
|
||||
using namespace CV__SIMD_NAMESPACE;
|
||||
#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256)
|
||||
#define CV__SIMD_NAMESPACE simd256
|
||||
namespace CV__SIMD_NAMESPACE {
|
||||
#define CV_SIMD 1
|
||||
#define CV_SIMD_64F CV_SIMD256_64F
|
||||
#define CV_SIMD_FP16 CV_SIMD256_FP16
|
||||
#define CV_SIMD_WIDTH 32
|
||||
typedef v_uint8x32 v_uint8;
|
||||
typedef v_int8x32 v_int8;
|
||||
typedef v_uint16x16 v_uint16;
|
||||
typedef v_int16x16 v_int16;
|
||||
typedef v_uint32x8 v_uint32;
|
||||
typedef v_int32x8 v_int32;
|
||||
typedef v_uint64x4 v_uint64;
|
||||
typedef v_int64x4 v_int64;
|
||||
typedef v_float32x8 v_float32;
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256)
|
||||
#if CV_SIMD256_64F
|
||||
typedef v_float64x4 v_float64;
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load)
|
||||
#endif
|
||||
inline void vx_cleanup() { v256_cleanup(); }
|
||||
} // namespace
|
||||
using namespace CV__SIMD_NAMESPACE;
|
||||
#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128)
|
||||
#define CV__SIMD_NAMESPACE simd128
|
||||
namespace CV__SIMD_NAMESPACE {
|
||||
#define CV_SIMD CV_SIMD128
|
||||
#define CV_SIMD_64F CV_SIMD128_64F
|
||||
#define CV_SIMD_WIDTH 16
|
||||
typedef v_uint8x16 v_uint8;
|
||||
typedef v_int8x16 v_int8;
|
||||
typedef v_uint16x8 v_uint16;
|
||||
typedef v_int16x8 v_int16;
|
||||
typedef v_uint32x4 v_uint32;
|
||||
typedef v_int32x4 v_int32;
|
||||
typedef v_uint64x2 v_uint64;
|
||||
typedef v_int64x2 v_int64;
|
||||
typedef v_float32x4 v_float32;
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v)
|
||||
#if CV_SIMD128_64F
|
||||
typedef v_float64x2 v_float64;
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v, load)
|
||||
#endif
|
||||
inline void vx_cleanup() { v_cleanup(); }
|
||||
} // namespace
|
||||
using namespace CV__SIMD_NAMESPACE;
|
||||
#endif
|
||||
|
||||
inline unsigned int trailingZeros32(unsigned int value) {
|
||||
#if defined(_MSC_VER)
|
||||
#if (_MSC_VER < 1700) || defined(_M_ARM)
|
||||
unsigned long index = 0;
|
||||
_BitScanForward(&index, value);
|
||||
return (unsigned int)index;
|
||||
#elif defined(__clang__)
|
||||
// clang-cl doesn't export _tzcnt_u32 for non BMI systems
|
||||
return value ? __builtin_ctz(value) : 32;
|
||||
#else
|
||||
return _tzcnt_u32(value);
|
||||
#endif
|
||||
#elif defined(__GNUC__) || defined(__GNUG__)
|
||||
return __builtin_ctz(value);
|
||||
#elif defined(__ICC) || defined(__INTEL_COMPILER)
|
||||
return _bit_scan_forward(value);
|
||||
#elif defined(__clang__)
|
||||
return llvm.cttz.i32(value, true);
|
||||
#else
|
||||
static const int MultiplyDeBruijnBitPosition[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
|
||||
return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD_64F
|
||||
#define CV_SIMD_64F 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD_FP16
|
||||
#define CV_SIMD_FP16 0 //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef CV_SIMD
|
||||
#define CV_SIMD 0
|
||||
#endif
|
||||
|
||||
} // cv::
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif
|
||||
2772
lib/3rdParty/OpenCV/include/opencv2/core/hal/intrin_avx.hpp
vendored
Normal file
2772
lib/3rdParty/OpenCV/include/opencv2/core/hal/intrin_avx.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@@ -108,8 +108,8 @@ block and to save contents of the register to memory block.
|
||||
These operations allow to reorder or recombine elements in one or multiple vectors.
|
||||
|
||||
- Interleave, deinterleave (2, 3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave
|
||||
- Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand
|
||||
- Pack: @ref v_pack, @ref v_pack_u, @ref v_rshr_pack, @ref v_rshr_pack_u,
|
||||
- Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand, @ref v_expand_low, @ref v_expand_high
|
||||
- Pack: @ref v_pack, @ref v_pack_u, @ref v_pack_b, @ref v_rshr_pack, @ref v_rshr_pack_u,
|
||||
@ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store
|
||||
- Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high
|
||||
- Extract: @ref v_extract
|
||||
@@ -159,7 +159,7 @@ Most of these operations return only one value.
|
||||
### Other math
|
||||
|
||||
- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude
|
||||
- Absolute values: @ref v_abs, @ref v_absdiff
|
||||
- Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs
|
||||
|
||||
### Conversions
|
||||
|
||||
@@ -185,23 +185,29 @@ Regular integers:
|
||||
|load, store | x | x | x | x | x | x |
|
||||
|interleave | x | x | x | x | x | x |
|
||||
|expand | x | x | x | x | x | x |
|
||||
|expand_low | x | x | x | x | x | x |
|
||||
|expand_high | x | x | x | x | x | x |
|
||||
|expand_q | x | x | | | | |
|
||||
|add, sub | x | x | x | x | x | x |
|
||||
|add_wrap, sub_wrap | x | x | x | x | | |
|
||||
|mul | | | x | x | x | x |
|
||||
|mul_expand | | | x | x | x | |
|
||||
|mul_wrap | x | x | x | x | | |
|
||||
|mul | x | x | x | x | x | x |
|
||||
|mul_expand | x | x | x | x | x | |
|
||||
|compare | x | x | x | x | x | x |
|
||||
|shift | | | x | x | x | x |
|
||||
|dotprod | | | | x | | |
|
||||
|logical | x | x | x | x | x | x |
|
||||
|min, max | x | x | x | x | x | x |
|
||||
|absdiff | x | x | x | x | x | x |
|
||||
|absdiffs | | x | | x | | |
|
||||
|reduce | | | | | x | x |
|
||||
|mask | x | x | x | x | x | x |
|
||||
|pack | x | x | x | x | x | x |
|
||||
|pack_u | x | | x | | | |
|
||||
|pack_b | x | | | | | |
|
||||
|unpack | x | x | x | x | x | x |
|
||||
|extract | x | x | x | x | x | x |
|
||||
|rotate (lanes) | x | x | x | x | x | x |
|
||||
|cvt_flt32 | | | | | | x |
|
||||
|cvt_flt64 | | | | | | x |
|
||||
|transpose4x4 | | | | | x | x |
|
||||
@@ -215,6 +221,7 @@ Big integers:
|
||||
|shift | x | x |
|
||||
|logical | x | x |
|
||||
|extract | x | x |
|
||||
|rotate (lanes) | x | x |
|
||||
|
||||
Floating point:
|
||||
|
||||
@@ -236,7 +243,8 @@ Floating point:
|
||||
|sqrt, abs | x | x |
|
||||
|float math | x | x |
|
||||
|transpose4x4 | x | |
|
||||
|
||||
|extract | x | x |
|
||||
|rotate (lanes) | x | x |
|
||||
|
||||
@{ */
|
||||
|
||||
@@ -244,8 +252,6 @@ template<typename _Tp, int n> struct v_reg
|
||||
{
|
||||
//! @cond IGNORED
|
||||
typedef _Tp lane_type;
|
||||
typedef v_reg<typename V_TypeTraits<_Tp>::int_type, n> int_vec;
|
||||
typedef v_reg<typename V_TypeTraits<_Tp>::abs_type, n> abs_vec;
|
||||
enum { nlanes = n };
|
||||
// !@endcond
|
||||
|
||||
@@ -677,9 +683,28 @@ OPENCV_HAL_IMPL_CMP_OP(==)
|
||||
For all types except 64-bit integer values. */
|
||||
OPENCV_HAL_IMPL_CMP_OP(!=)
|
||||
|
||||
template<int n>
|
||||
inline v_reg<float, n> v_not_nan(const v_reg<float, n>& a)
|
||||
{
|
||||
typedef typename V_TypeTraits<float>::int_type itype;
|
||||
v_reg<float, n> c;
|
||||
for (int i = 0; i < n; i++)
|
||||
c.s[i] = V_TypeTraits<float>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
|
||||
return c;
|
||||
}
|
||||
template<int n>
|
||||
inline v_reg<double, n> v_not_nan(const v_reg<double, n>& a)
|
||||
{
|
||||
typedef typename V_TypeTraits<double>::int_type itype;
|
||||
v_reg<double, n> c;
|
||||
for (int i = 0; i < n; i++)
|
||||
c.s[i] = V_TypeTraits<double>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
|
||||
return c;
|
||||
}
|
||||
|
||||
//! @brief Helper macro
|
||||
//! @ingroup core_hal_intrin_impl
|
||||
#define OPENCV_HAL_IMPL_ADD_SUB_OP(func, bin_op, cast_op, _Tp2) \
|
||||
#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \
|
||||
template<typename _Tp, int n> \
|
||||
inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
|
||||
{ \
|
||||
@@ -693,12 +718,17 @@ inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
|
||||
/** @brief Add values without saturation
|
||||
|
||||
For 8- and 16-bit integer values. */
|
||||
OPENCV_HAL_IMPL_ADD_SUB_OP(v_add_wrap, +, (_Tp), _Tp)
|
||||
OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp)
|
||||
|
||||
/** @brief Subtract values without saturation
|
||||
|
||||
For 8- and 16-bit integer values. */
|
||||
OPENCV_HAL_IMPL_ADD_SUB_OP(v_sub_wrap, -, (_Tp), _Tp)
|
||||
OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp)
|
||||
|
||||
/** @brief Multiply values without saturation
|
||||
|
||||
For 8- and 16-bit integer values. */
|
||||
OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp)
|
||||
|
||||
//! @cond IGNORED
|
||||
template<typename T> inline T _absdiff(T a, T b)
|
||||
@@ -753,6 +783,19 @@ inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b)
|
||||
return c;
|
||||
}
|
||||
|
||||
/** @brief Saturating absolute difference
|
||||
|
||||
Returns \f$ saturate(|a - b|) \f$ .
|
||||
For 8-, 16-bit signed integer source types. */
|
||||
template<typename _Tp, int n>
|
||||
inline v_reg<_Tp, n> v_absdiffs(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
|
||||
{
|
||||
v_reg<_Tp, n> c;
|
||||
for( int i = 0; i < n; i++)
|
||||
c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i]));
|
||||
return c;
|
||||
}
|
||||
|
||||
/** @brief Inversed square root
|
||||
|
||||
Returns \f$ 1/sqrt(a) \f$
|
||||
@@ -794,11 +837,11 @@ inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>
|
||||
|
||||
/** @brief Multiply and add
|
||||
|
||||
Returns \f$ a*b + c \f$
|
||||
For floating point types only. */
|
||||
Returns \f$ a*b + c \f$
|
||||
For floating point types and signed 32bit int only. */
|
||||
template<typename _Tp, int n>
|
||||
inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
|
||||
const v_reg<_Tp, n>& c)
|
||||
inline v_reg<_Tp, n> v_fma(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
|
||||
const v_reg<_Tp, n>& c)
|
||||
{
|
||||
v_reg<_Tp, n> d;
|
||||
for( int i = 0; i < n; i++ )
|
||||
@@ -806,6 +849,14 @@ inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
|
||||
return d;
|
||||
}
|
||||
|
||||
/** @brief A synonym for v_fma */
|
||||
template<typename _Tp, int n>
|
||||
inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
|
||||
const v_reg<_Tp, n>& c)
|
||||
{
|
||||
return v_fma(a, b, c);
|
||||
}
|
||||
|
||||
/** @brief Dot product of elements
|
||||
|
||||
Multiply values in two registers and sum adjacent result pairs.
|
||||
@@ -828,6 +879,29 @@ template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n
|
||||
return c;
|
||||
}
|
||||
|
||||
/** @brief Dot product of elements
|
||||
|
||||
Same as cv::v_dotprod, but add a third element to the sum of adjacent pairs.
|
||||
Scheme:
|
||||
@code
|
||||
{A1 A2 ...} // 16-bit
|
||||
x {B1 B2 ...} // 16-bit
|
||||
-------------
|
||||
{A1B1+A2B2+C1 ...} // 32-bit
|
||||
|
||||
@endcode
|
||||
Implemented only for 16-bit signed source type (v_int16x8).
|
||||
*/
|
||||
template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
|
||||
v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, const v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>& c)
|
||||
{
|
||||
typedef typename V_TypeTraits<_Tp>::w_type w_type;
|
||||
v_reg<w_type, n/2> s;
|
||||
for( int i = 0; i < (n/2); i++ )
|
||||
s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i];
|
||||
return s;
|
||||
}
|
||||
|
||||
/** @brief Multiply and expand
|
||||
|
||||
Multiply values two registers and store results in two registers with wider pack type.
|
||||
@@ -859,6 +933,20 @@ template<typename _Tp, int n> inline void v_mul_expand(const v_reg<_Tp, n>& a, c
|
||||
}
|
||||
}
|
||||
|
||||
/** @brief Multiply and extract high part
|
||||
|
||||
Multiply values two registers and store high part of the results.
|
||||
Implemented only for 16-bit source types (v_int16x8, v_uint16x8). Returns \f$ a*b >> 16 \f$
|
||||
*/
|
||||
template<typename _Tp, int n> inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
|
||||
{
|
||||
typedef typename V_TypeTraits<_Tp>::w_type w_type;
|
||||
v_reg<_Tp, n> c;
|
||||
for (int i = 0; i < n; i++)
|
||||
c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8);
|
||||
return c;
|
||||
}
|
||||
|
||||
//! @cond IGNORED
|
||||
template<typename _Tp, int n> inline void v_hsum(const v_reg<_Tp, n>& a,
|
||||
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c)
|
||||
@@ -975,6 +1063,21 @@ inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
|
||||
return r;
|
||||
}
|
||||
|
||||
/** @brief Sum absolute differences of values
|
||||
|
||||
Scheme:
|
||||
@code
|
||||
{A1 A2 A3 ...} {B1 B2 B3 ...} => sum{ABS(A1-B1),abs(A2-B2),abs(A3-B3),...}
|
||||
@endcode
|
||||
For all types except 64-bit types.*/
|
||||
template<typename _Tp, int n> inline typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type v_reduce_sad(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
|
||||
{
|
||||
typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type c = _absdiff(a.s[0], b.s[0]);
|
||||
for (int i = 1; i < n; i++)
|
||||
c += _absdiff(a.s[i], b.s[i]);
|
||||
return c;
|
||||
}
|
||||
|
||||
/** @brief Get negative values mask
|
||||
|
||||
Returned value is a bit mask with bits set to 1 on places corresponding to negative packed values indexes.
|
||||
@@ -1016,13 +1119,16 @@ template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a)
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @brief Bitwise select
|
||||
/** @brief Per-element select (blend operation)
|
||||
|
||||
Return value will be built by combining values a and b using the following scheme:
|
||||
If the i-th bit in _mask_ is 1
|
||||
select i-th bit from _a_
|
||||
else
|
||||
select i-th bit from _b_ */
|
||||
Return value will be built by combining values _a_ and _b_ using the following scheme:
|
||||
result[i] = mask[i] ? a[i] : b[i];
|
||||
|
||||
@note: _mask_ element values are restricted to these values:
|
||||
- 0: select element from _b_
|
||||
- 0xff/0xffff/etc: select element from _a_
|
||||
(fully compatible with bitwise-based operator)
|
||||
*/
|
||||
template<typename _Tp, int n> inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask,
|
||||
const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
|
||||
{
|
||||
@@ -1032,8 +1138,8 @@ template<typename _Tp, int n> inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>&
|
||||
for( int i = 0; i < n; i++ )
|
||||
{
|
||||
int_type m = Traits::reinterpret_int(mask.s[i]);
|
||||
c.s[i] = Traits::reinterpret_from_int((Traits::reinterpret_int(a.s[i]) & m)
|
||||
| (Traits::reinterpret_int(b.s[i]) & ~m));
|
||||
CV_DbgAssert(m == 0 || m == (~(int_type)0)); // restrict mask values: 0 or 0xff/0xffff/etc
|
||||
c.s[i] = m ? a.s[i] : b.s[i];
|
||||
}
|
||||
return c;
|
||||
}
|
||||
@@ -1057,6 +1163,44 @@ template<typename _Tp, int n> inline void v_expand(const v_reg<_Tp, n>& a,
|
||||
}
|
||||
}
|
||||
|
||||
/** @brief Expand lower values to the wider pack type
|
||||
|
||||
Same as cv::v_expand, but return lower half of the vector.
|
||||
|
||||
Scheme:
|
||||
@code
|
||||
int32x4 int64x2
|
||||
{A B C D} ==> {A B}
|
||||
@endcode */
|
||||
template<typename _Tp, int n>
|
||||
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
|
||||
v_expand_low(const v_reg<_Tp, n>& a)
|
||||
{
|
||||
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> b;
|
||||
for( int i = 0; i < (n/2); i++ )
|
||||
b.s[i] = a.s[i];
|
||||
return b;
|
||||
}
|
||||
|
||||
/** @brief Expand higher values to the wider pack type
|
||||
|
||||
Same as cv::v_expand_low, but expand higher half of the vector instead.
|
||||
|
||||
Scheme:
|
||||
@code
|
||||
int32x4 int64x2
|
||||
{A B C D} ==> {C D}
|
||||
@endcode */
|
||||
template<typename _Tp, int n>
|
||||
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
|
||||
v_expand_high(const v_reg<_Tp, n>& a)
|
||||
{
|
||||
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> b;
|
||||
for( int i = 0; i < (n/2); i++ )
|
||||
b.s[i] = a.s[i+(n/2)];
|
||||
return b;
|
||||
}
|
||||
|
||||
//! @cond IGNORED
|
||||
template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::int_type, n>
|
||||
v_reinterpret_as_int(const v_reg<_Tp, n>& a)
|
||||
@@ -1112,9 +1256,9 @@ template<typename _Tp, int n> inline void v_zip( const v_reg<_Tp, n>& a0, const
|
||||
@note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc.
|
||||
*/
|
||||
template<typename _Tp>
|
||||
inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load(const _Tp* ptr)
|
||||
inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load(const _Tp* ptr)
|
||||
{
|
||||
return v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes>(ptr);
|
||||
return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr);
|
||||
}
|
||||
|
||||
/** @brief Load register contents from memory (aligned)
|
||||
@@ -1122,9 +1266,9 @@ inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load(const _Tp* ptr)
|
||||
similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary)
|
||||
*/
|
||||
template<typename _Tp>
|
||||
inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_aligned(const _Tp* ptr)
|
||||
inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_aligned(const _Tp* ptr)
|
||||
{
|
||||
return v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes>(ptr);
|
||||
return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr);
|
||||
}
|
||||
|
||||
/** @brief Load 64-bits of data to lower part (high part is undefined).
|
||||
@@ -1137,9 +1281,9 @@ v_int32x4 r = v_load_low(lo);
|
||||
@endcode
|
||||
*/
|
||||
template<typename _Tp>
|
||||
inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_low(const _Tp* ptr)
|
||||
inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr)
|
||||
{
|
||||
v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> c;
|
||||
v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
|
||||
for( int i = 0; i < c.nlanes/2; i++ )
|
||||
{
|
||||
c.s[i] = ptr[i];
|
||||
@@ -1158,9 +1302,9 @@ v_int32x4 r = v_load_halves(lo, hi);
|
||||
@endcode
|
||||
*/
|
||||
template<typename _Tp>
|
||||
inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_halves(const _Tp* loptr, const _Tp* hiptr)
|
||||
inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_halves(const _Tp* loptr, const _Tp* hiptr)
|
||||
{
|
||||
v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> c;
|
||||
v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
|
||||
for( int i = 0; i < c.nlanes/2; i++ )
|
||||
{
|
||||
c.s[i] = loptr[i];
|
||||
@@ -1179,11 +1323,11 @@ v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32
|
||||
@endcode
|
||||
For 8-, 16-, 32-bit integer source types. */
|
||||
template<typename _Tp>
|
||||
inline v_reg<typename V_TypeTraits<_Tp>::w_type, V_SIMD128Traits<_Tp>::nlanes / 2>
|
||||
inline v_reg<typename V_TypeTraits<_Tp>::w_type, V_TypeTraits<_Tp>::nlanes128 / 2>
|
||||
v_load_expand(const _Tp* ptr)
|
||||
{
|
||||
typedef typename V_TypeTraits<_Tp>::w_type w_type;
|
||||
v_reg<w_type, V_SIMD128Traits<w_type>::nlanes> c;
|
||||
v_reg<w_type, V_TypeTraits<w_type>::nlanes128> c;
|
||||
for( int i = 0; i < c.nlanes; i++ )
|
||||
{
|
||||
c.s[i] = ptr[i];
|
||||
@@ -1200,11 +1344,11 @@ v_int32x4 r = v_load_q(buf); // r = {1, 2, 3, 4} - type is int32
|
||||
@endcode
|
||||
For 8-bit integer source types. */
|
||||
template<typename _Tp>
|
||||
inline v_reg<typename V_TypeTraits<_Tp>::q_type, V_SIMD128Traits<_Tp>::nlanes / 4>
|
||||
inline v_reg<typename V_TypeTraits<_Tp>::q_type, V_TypeTraits<_Tp>::nlanes128 / 4>
|
||||
v_load_expand_q(const _Tp* ptr)
|
||||
{
|
||||
typedef typename V_TypeTraits<_Tp>::q_type q_type;
|
||||
v_reg<q_type, V_SIMD128Traits<q_type>::nlanes> c;
|
||||
v_reg<q_type, V_TypeTraits<q_type>::nlanes128> c;
|
||||
for( int i = 0; i < c.nlanes; i++ )
|
||||
{
|
||||
c.s[i] = ptr[i];
|
||||
@@ -1284,7 +1428,8 @@ Scheme:
|
||||
For all types except 64-bit. */
|
||||
template<typename _Tp, int n>
|
||||
inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
|
||||
const v_reg<_Tp, n>& b)
|
||||
const v_reg<_Tp, n>& b,
|
||||
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
|
||||
{
|
||||
int i, i2;
|
||||
for( i = i2 = 0; i < n; i++, i2 += 2 )
|
||||
@@ -1304,7 +1449,8 @@ Scheme:
|
||||
For all types except 64-bit. */
|
||||
template<typename _Tp, int n>
|
||||
inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
|
||||
const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c)
|
||||
const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
|
||||
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
|
||||
{
|
||||
int i, i3;
|
||||
for( i = i3 = 0; i < n; i++, i3 += 3 )
|
||||
@@ -1325,7 +1471,8 @@ Scheme:
|
||||
For all types except 64-bit. */
|
||||
template<typename _Tp, int n> inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
|
||||
const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
|
||||
const v_reg<_Tp, n>& d)
|
||||
const v_reg<_Tp, n>& d,
|
||||
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
|
||||
{
|
||||
int i, i4;
|
||||
for( i = i4 = 0; i < n; i++, i4 += 4 )
|
||||
@@ -1395,6 +1542,20 @@ inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a)
|
||||
ptr[i] = a.s[i];
|
||||
}
|
||||
|
||||
template<typename _Tp, int n>
|
||||
inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a)
|
||||
{
|
||||
for( int i = 0; i < n; i++ )
|
||||
ptr[i] = a.s[i];
|
||||
}
|
||||
|
||||
template<typename _Tp, int n>
|
||||
inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/)
|
||||
{
|
||||
for( int i = 0; i < n; i++ )
|
||||
ptr[i] = a.s[i];
|
||||
}
|
||||
|
||||
/** @brief Combine vector from first elements of two vectors
|
||||
|
||||
Scheme:
|
||||
@@ -1476,7 +1637,7 @@ Usage:
|
||||
v_int32x4 a, b, c;
|
||||
c = v_extract<2>(a, b);
|
||||
@endcode
|
||||
For integer types only. */
|
||||
For all types. */
|
||||
template<int s, typename _Tp, int n>
|
||||
inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
|
||||
{
|
||||
@@ -1501,6 +1662,18 @@ template<int n> inline v_reg<int, n> v_round(const v_reg<float, n>& a)
|
||||
return c;
|
||||
}
|
||||
|
||||
/** @overload */
|
||||
template<int n> inline v_reg<int, n*2> v_round(const v_reg<double, n>& a, const v_reg<double, n>& b)
|
||||
{
|
||||
v_reg<int, n*2> c;
|
||||
for( int i = 0; i < n; i++ )
|
||||
{
|
||||
c.s[i] = cvRound(a.s[i]);
|
||||
c.s[i+n] = cvRound(b.s[i]);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
/** @brief Floor
|
||||
|
||||
Floor each value. Input type is float vector ==> output type is int vector.*/
|
||||
@@ -1593,6 +1766,17 @@ template<int n> inline v_reg<float, n> v_cvt_f32(const v_reg<int, n>& a)
|
||||
return c;
|
||||
}
|
||||
|
||||
template<int n> inline v_reg<float, n*2> v_cvt_f32(const v_reg<double, n>& a, const v_reg<double, n>& b)
|
||||
{
|
||||
v_reg<float, n*2> c;
|
||||
for( int i = 0; i < n; i++ )
|
||||
{
|
||||
c.s[i] = (float)a.s[i];
|
||||
c.s[i+n] = (float)b.s[i];
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
/** @brief Convert to double
|
||||
|
||||
Supported input type is cv::v_int32x4. */
|
||||
@@ -1615,6 +1799,124 @@ template<int n> inline v_reg<double, n> v_cvt_f64(const v_reg<float, n*2>& a)
|
||||
return c;
|
||||
}
|
||||
|
||||
template<typename _Tp> inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut(const _Tp* tab, const int* idx)
|
||||
{
|
||||
v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
|
||||
for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++)
|
||||
c.s[i] = tab[idx[i]];
|
||||
return c;
|
||||
}
|
||||
template<typename _Tp> inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_pairs(const _Tp* tab, const int* idx)
|
||||
{
|
||||
v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
|
||||
for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++)
|
||||
c.s[i] = tab[idx[i / 2] + i % 2];
|
||||
return c;
|
||||
}
|
||||
template<typename _Tp> inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_quads(const _Tp* tab, const int* idx)
|
||||
{
|
||||
v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
|
||||
for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++)
|
||||
c.s[i] = tab[idx[i / 4] + i % 4];
|
||||
return c;
|
||||
}
|
||||
|
||||
template<int n> inline v_reg<int, n> v_lut(const int* tab, const v_reg<int, n>& idx)
|
||||
{
|
||||
v_reg<int, n> c;
|
||||
for( int i = 0; i < n; i++ )
|
||||
c.s[i] = tab[idx.s[i]];
|
||||
return c;
|
||||
}
|
||||
|
||||
template<int n> inline v_reg<unsigned, n> v_lut(const unsigned* tab, const v_reg<int, n>& idx)
|
||||
{
|
||||
v_reg<int, n> c;
|
||||
for (int i = 0; i < n; i++)
|
||||
c.s[i] = tab[idx.s[i]];
|
||||
return c;
|
||||
}
|
||||
|
||||
template<int n> inline v_reg<float, n> v_lut(const float* tab, const v_reg<int, n>& idx)
|
||||
{
|
||||
v_reg<float, n> c;
|
||||
for( int i = 0; i < n; i++ )
|
||||
c.s[i] = tab[idx.s[i]];
|
||||
return c;
|
||||
}
|
||||
|
||||
template<int n> inline v_reg<double, n> v_lut(const double* tab, const v_reg<int, n*2>& idx)
|
||||
{
|
||||
v_reg<double, n> c;
|
||||
for( int i = 0; i < n; i++ )
|
||||
c.s[i] = tab[idx.s[i]];
|
||||
return c;
|
||||
}
|
||||
|
||||
template<int n> inline void v_lut_deinterleave(const float* tab, const v_reg<int, n>& idx,
|
||||
v_reg<float, n>& x, v_reg<float, n>& y)
|
||||
{
|
||||
for( int i = 0; i < n; i++ )
|
||||
{
|
||||
int j = idx.s[i];
|
||||
x.s[i] = tab[j];
|
||||
y.s[i] = tab[j+1];
|
||||
}
|
||||
}
|
||||
|
||||
template<int n> inline void v_lut_deinterleave(const double* tab, const v_reg<int, n*2>& idx,
|
||||
v_reg<double, n>& x, v_reg<double, n>& y)
|
||||
{
|
||||
for( int i = 0; i < n; i++ )
|
||||
{
|
||||
int j = idx.s[i];
|
||||
x.s[i] = tab[j];
|
||||
y.s[i] = tab[j+1];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _Tp, int n> inline v_reg<_Tp, n> v_interleave_pairs(const v_reg<_Tp, n>& vec)
|
||||
{
|
||||
v_reg<float, n> c;
|
||||
for (int i = 0; i < n/4; i++)
|
||||
{
|
||||
c.s[4*i ] = vec.s[4*i ];
|
||||
c.s[4*i+1] = vec.s[4*i+2];
|
||||
c.s[4*i+2] = vec.s[4*i+1];
|
||||
c.s[4*i+3] = vec.s[4*i+3];
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
template<typename _Tp, int n> inline v_reg<_Tp, n> v_interleave_quads(const v_reg<_Tp, n>& vec)
|
||||
{
|
||||
v_reg<float, n> c;
|
||||
for (int i = 0; i < n/8; i++)
|
||||
{
|
||||
c.s[8*i ] = vec.s[8*i ];
|
||||
c.s[8*i+1] = vec.s[8*i+4];
|
||||
c.s[8*i+2] = vec.s[8*i+1];
|
||||
c.s[8*i+3] = vec.s[8*i+5];
|
||||
c.s[8*i+4] = vec.s[8*i+2];
|
||||
c.s[8*i+5] = vec.s[8*i+6];
|
||||
c.s[8*i+6] = vec.s[8*i+3];
|
||||
c.s[8*i+7] = vec.s[8*i+7];
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
template<typename _Tp, int n> inline v_reg<_Tp, n> v_pack_triplets(const v_reg<_Tp, n>& vec)
|
||||
{
|
||||
v_reg<float, n> c;
|
||||
for (int i = 0; i < n/4; i++)
|
||||
{
|
||||
c.s[3*i ] = vec.s[4*i ];
|
||||
c.s[3*i+1] = vec.s[4*i+1];
|
||||
c.s[3*i+2] = vec.s[4*i+2];
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
/** @brief Transpose 4x4 matrix
|
||||
|
||||
Scheme:
|
||||
@@ -1890,6 +2192,103 @@ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, s
|
||||
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast)
|
||||
//! @}
|
||||
|
||||
//! @cond IGNORED
|
||||
template<typename _Tpm, typename _Tp, int n>
|
||||
inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
|
||||
{
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
mptr[i] = (_Tpm)a.s[i];
|
||||
mptr[i + n] = (_Tpm)b.s[i];
|
||||
}
|
||||
}
|
||||
//! @endcond
|
||||
|
||||
//! @name Pack boolean values
|
||||
//! @{
|
||||
//! @brief Pack boolean values from multiple vectors to one unsigned 8-bit integer vector
|
||||
//!
|
||||
//! @note Must provide valid boolean values to guarantee same result for all architectures.
|
||||
|
||||
/** @brief
|
||||
//! For 16-bit boolean values
|
||||
|
||||
Scheme:
|
||||
@code
|
||||
a {0xFFFF 0 0 0xFFFF 0 0xFFFF 0xFFFF 0}
|
||||
b {0xFFFF 0 0xFFFF 0 0 0xFFFF 0 0xFFFF}
|
||||
===============
|
||||
{
|
||||
0xFF 0 0 0xFF 0 0xFF 0xFF 0
|
||||
0xFF 0 0xFF 0 0 0xFF 0 0xFF
|
||||
}
|
||||
@endcode */
|
||||
|
||||
inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
v_uint8x16 mask;
|
||||
_pack_b(mask.s, a, b);
|
||||
return mask;
|
||||
}
|
||||
|
||||
/** @overload
|
||||
For 32-bit boolean values
|
||||
|
||||
Scheme:
|
||||
@code
|
||||
a {0xFFFF.. 0 0 0xFFFF..}
|
||||
b {0 0xFFFF.. 0xFFFF.. 0}
|
||||
c {0xFFFF.. 0 0xFFFF.. 0}
|
||||
d {0 0xFFFF.. 0 0xFFFF..}
|
||||
===============
|
||||
{
|
||||
0xFF 0 0 0xFF 0 0xFF 0xFF 0
|
||||
0xFF 0 0xFF 0 0 0xFF 0 0xFF
|
||||
}
|
||||
@endcode */
|
||||
|
||||
inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
|
||||
const v_uint32x4& c, const v_uint32x4& d)
|
||||
{
|
||||
v_uint8x16 mask;
|
||||
_pack_b(mask.s, a, b);
|
||||
_pack_b(mask.s + 8, c, d);
|
||||
return mask;
|
||||
}
|
||||
|
||||
/** @overload
|
||||
For 64-bit boolean values
|
||||
|
||||
Scheme:
|
||||
@code
|
||||
a {0xFFFF.. 0}
|
||||
b {0 0xFFFF..}
|
||||
c {0xFFFF.. 0}
|
||||
d {0 0xFFFF..}
|
||||
|
||||
e {0xFFFF.. 0}
|
||||
f {0xFFFF.. 0}
|
||||
g {0 0xFFFF..}
|
||||
h {0 0xFFFF..}
|
||||
===============
|
||||
{
|
||||
0xFF 0 0 0xFF 0xFF 0 0 0xFF
|
||||
0xFF 0 0xFF 0 0 0xFF 0 0xFF
|
||||
}
|
||||
@endcode */
|
||||
inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
|
||||
const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
|
||||
const v_uint64x2& g, const v_uint64x2& h)
|
||||
{
|
||||
v_uint8x16 mask;
|
||||
_pack_b(mask.s, a, b);
|
||||
_pack_b(mask.s + 4, c, d);
|
||||
_pack_b(mask.s + 8, e, f);
|
||||
_pack_b(mask.s + 12, g, h);
|
||||
return mask;
|
||||
}
|
||||
//! @}
|
||||
|
||||
/** @brief Matrix multiplication
|
||||
|
||||
Scheme:
|
||||
@@ -1939,6 +2338,30 @@ inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,
|
||||
v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + m3.s[3]);
|
||||
}
|
||||
|
||||
////// FP16 support ///////
|
||||
|
||||
inline v_reg<float, V_TypeTraits<float>::nlanes128>
|
||||
v_load_expand(const float16_t* ptr)
|
||||
{
|
||||
v_reg<float, V_TypeTraits<float>::nlanes128> v;
|
||||
for( int i = 0; i < v.nlanes; i++ )
|
||||
{
|
||||
v.s[i] = ptr[i];
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
inline void
|
||||
v_pack_store(float16_t* ptr, v_reg<float, V_TypeTraits<float>::nlanes128>& v)
|
||||
{
|
||||
for( int i = 0; i < v.nlanes; i++ )
|
||||
{
|
||||
ptr[i] = float16_t(v.s[i]);
|
||||
}
|
||||
}
|
||||
|
||||
inline void v_cleanup() {}
|
||||
|
||||
//! @}
|
||||
|
||||
//! @name Check SIMD support
|
||||
158
lib/3rdParty/OpenCV/include/opencv2/core/hal/intrin_forward.hpp
vendored
Normal file
158
lib/3rdParty/OpenCV/include/opencv2/core/hal/intrin_forward.hpp
vendored
Normal file
@@ -0,0 +1,158 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
#ifndef CV__SIMD_FORWARD
|
||||
#error "Need to pre-define forward width"
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
|
||||
/** Types **/
|
||||
#if CV__SIMD_FORWARD == 512
|
||||
// [todo] 512
|
||||
#error "AVX512 Not implemented yet"
|
||||
#elif CV__SIMD_FORWARD == 256
|
||||
// 256
|
||||
#define __CV_VX(fun) v256_##fun
|
||||
#define __CV_V_UINT8 v_uint8x32
|
||||
#define __CV_V_INT8 v_int8x32
|
||||
#define __CV_V_UINT16 v_uint16x16
|
||||
#define __CV_V_INT16 v_int16x16
|
||||
#define __CV_V_UINT32 v_uint32x8
|
||||
#define __CV_V_INT32 v_int32x8
|
||||
#define __CV_V_UINT64 v_uint64x4
|
||||
#define __CV_V_INT64 v_int64x4
|
||||
#define __CV_V_FLOAT32 v_float32x8
|
||||
#define __CV_V_FLOAT64 v_float64x4
|
||||
struct v_uint8x32;
|
||||
struct v_int8x32;
|
||||
struct v_uint16x16;
|
||||
struct v_int16x16;
|
||||
struct v_uint32x8;
|
||||
struct v_int32x8;
|
||||
struct v_uint64x4;
|
||||
struct v_int64x4;
|
||||
struct v_float32x8;
|
||||
struct v_float64x4;
|
||||
#else
|
||||
// 128
|
||||
#define __CV_VX(fun) v_##fun
|
||||
#define __CV_V_UINT8 v_uint8x16
|
||||
#define __CV_V_INT8 v_int8x16
|
||||
#define __CV_V_UINT16 v_uint16x8
|
||||
#define __CV_V_INT16 v_int16x8
|
||||
#define __CV_V_UINT32 v_uint32x4
|
||||
#define __CV_V_INT32 v_int32x4
|
||||
#define __CV_V_UINT64 v_uint64x2
|
||||
#define __CV_V_INT64 v_int64x2
|
||||
#define __CV_V_FLOAT32 v_float32x4
|
||||
#define __CV_V_FLOAT64 v_float64x2
|
||||
struct v_uint8x16;
|
||||
struct v_int8x16;
|
||||
struct v_uint16x8;
|
||||
struct v_int16x8;
|
||||
struct v_uint32x4;
|
||||
struct v_int32x4;
|
||||
struct v_uint64x2;
|
||||
struct v_int64x2;
|
||||
struct v_float32x4;
|
||||
struct v_float64x2;
|
||||
#endif
|
||||
|
||||
/** Value reordering **/
|
||||
|
||||
// Expansion
|
||||
void v_expand(const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&);
|
||||
void v_expand(const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&);
|
||||
void v_expand(const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
|
||||
void v_expand(const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&);
|
||||
void v_expand(const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
|
||||
void v_expand(const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&);
|
||||
// Low Expansion
|
||||
__CV_V_UINT16 v_expand_low(const __CV_V_UINT8&);
|
||||
__CV_V_INT16 v_expand_low(const __CV_V_INT8&);
|
||||
__CV_V_UINT32 v_expand_low(const __CV_V_UINT16&);
|
||||
__CV_V_INT32 v_expand_low(const __CV_V_INT16&);
|
||||
__CV_V_UINT64 v_expand_low(const __CV_V_UINT32&);
|
||||
__CV_V_INT64 v_expand_low(const __CV_V_INT32&);
|
||||
// High Expansion
|
||||
__CV_V_UINT16 v_expand_high(const __CV_V_UINT8&);
|
||||
__CV_V_INT16 v_expand_high(const __CV_V_INT8&);
|
||||
__CV_V_UINT32 v_expand_high(const __CV_V_UINT16&);
|
||||
__CV_V_INT32 v_expand_high(const __CV_V_INT16&);
|
||||
__CV_V_UINT64 v_expand_high(const __CV_V_UINT32&);
|
||||
__CV_V_INT64 v_expand_high(const __CV_V_INT32&);
|
||||
// Load & Low Expansion
|
||||
__CV_V_UINT16 __CV_VX(load_expand)(const uchar*);
|
||||
__CV_V_INT16 __CV_VX(load_expand)(const schar*);
|
||||
__CV_V_UINT32 __CV_VX(load_expand)(const ushort*);
|
||||
__CV_V_INT32 __CV_VX(load_expand)(const short*);
|
||||
__CV_V_UINT64 __CV_VX(load_expand)(const uint*);
|
||||
__CV_V_INT64 __CV_VX(load_expand)(const int*);
|
||||
// Load lower 8-bit and expand into 32-bit
|
||||
__CV_V_UINT32 __CV_VX(load_expand_q)(const uchar*);
|
||||
__CV_V_INT32 __CV_VX(load_expand_q)(const schar*);
|
||||
|
||||
// Saturating Pack
|
||||
__CV_V_UINT8 v_pack(const __CV_V_UINT16&, const __CV_V_UINT16&);
|
||||
__CV_V_INT8 v_pack(const __CV_V_INT16&, const __CV_V_INT16&);
|
||||
__CV_V_UINT16 v_pack(const __CV_V_UINT32&, const __CV_V_UINT32&);
|
||||
__CV_V_INT16 v_pack(const __CV_V_INT32&, const __CV_V_INT32&);
|
||||
// Non-saturating Pack
|
||||
__CV_V_UINT32 v_pack(const __CV_V_UINT64&, const __CV_V_UINT64&);
|
||||
__CV_V_INT32 v_pack(const __CV_V_INT64&, const __CV_V_INT64&);
|
||||
// Pack signed integers with unsigned saturation
|
||||
__CV_V_UINT8 v_pack_u(const __CV_V_INT16&, const __CV_V_INT16&);
|
||||
__CV_V_UINT16 v_pack_u(const __CV_V_INT32&, const __CV_V_INT32&);
|
||||
|
||||
/** Arithmetic, bitwise and comparison operations **/
|
||||
|
||||
// Non-saturating multiply
|
||||
#if CV_VSX
|
||||
template<typename Tvec>
|
||||
Tvec v_mul_wrap(const Tvec& a, const Tvec& b);
|
||||
#else
|
||||
__CV_V_UINT8 v_mul_wrap(const __CV_V_UINT8&, const __CV_V_UINT8&);
|
||||
__CV_V_INT8 v_mul_wrap(const __CV_V_INT8&, const __CV_V_INT8&);
|
||||
__CV_V_UINT16 v_mul_wrap(const __CV_V_UINT16&, const __CV_V_UINT16&);
|
||||
__CV_V_INT16 v_mul_wrap(const __CV_V_INT16&, const __CV_V_INT16&);
|
||||
#endif
|
||||
|
||||
// Multiply and expand
|
||||
#if CV_VSX
|
||||
template<typename Tvec, typename Twvec>
|
||||
void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d);
|
||||
#else
|
||||
void v_mul_expand(const __CV_V_UINT8&, const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&);
|
||||
void v_mul_expand(const __CV_V_INT8&, const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&);
|
||||
void v_mul_expand(const __CV_V_UINT16&, const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
|
||||
void v_mul_expand(const __CV_V_INT16&, const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&);
|
||||
void v_mul_expand(const __CV_V_UINT32&, const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
|
||||
void v_mul_expand(const __CV_V_INT32&, const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&);
|
||||
#endif
|
||||
|
||||
/** Cleanup **/
|
||||
#undef CV__SIMD_FORWARD
|
||||
#undef __CV_VX
|
||||
#undef __CV_V_UINT8
|
||||
#undef __CV_V_INT8
|
||||
#undef __CV_V_UINT16
|
||||
#undef __CV_V_INT16
|
||||
#undef __CV_V_UINT32
|
||||
#undef __CV_V_INT32
|
||||
#undef __CV_V_UINT64
|
||||
#undef __CV_V_INT64
|
||||
#undef __CV_V_FLOAT32
|
||||
#undef __CV_V_FLOAT64
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
|
||||
//! @endcond
|
||||
|
||||
} // cv::
|
||||
@@ -280,11 +280,10 @@ struct v_float64x2
|
||||
|
||||
#if CV_FP16
|
||||
// Workaround for old compilers
|
||||
template <typename T> static inline int16x4_t vreinterpret_s16_f16(T a)
|
||||
{ return (int16x4_t)a; }
|
||||
template <typename T> static inline float16x4_t vreinterpret_f16_s16(T a)
|
||||
{ return (float16x4_t)a; }
|
||||
template <typename T> static inline float16x4_t cv_vld1_f16(const T* ptr)
|
||||
static inline int16x4_t vreinterpret_s16_f16(float16x4_t a) { return (int16x4_t)a; }
|
||||
static inline float16x4_t vreinterpret_f16_s16(int16x4_t a) { return (float16x4_t)a; }
|
||||
|
||||
static inline float16x4_t cv_vld1_f16(const void* ptr)
|
||||
{
|
||||
#ifndef vld1_f16 // APPLE compiler defines vld1_f16 as macro
|
||||
return vreinterpret_f16_s16(vld1_s16((const short*)ptr));
|
||||
@@ -292,7 +291,7 @@ template <typename T> static inline float16x4_t cv_vld1_f16(const T* ptr)
|
||||
return vld1_f16((const __fp16*)ptr);
|
||||
#endif
|
||||
}
|
||||
template <typename T> static inline void cv_vst1_f16(T* ptr, float16x4_t a)
|
||||
static inline void cv_vst1_f16(void* ptr, float16x4_t a)
|
||||
{
|
||||
#ifndef vst1_f16 // APPLE compiler defines vst1_f16 as macro
|
||||
vst1_s16((short*)ptr, vreinterpret_s16_f16(a));
|
||||
@@ -301,24 +300,24 @@ template <typename T> static inline void cv_vst1_f16(T* ptr, float16x4_t a)
|
||||
#endif
|
||||
}
|
||||
|
||||
struct v_float16x4
|
||||
{
|
||||
typedef short lane_type;
|
||||
enum { nlanes = 4 };
|
||||
#ifndef vdup_n_f16
|
||||
#define vdup_n_f16(v) (float16x4_t){v, v, v, v}
|
||||
#endif
|
||||
|
||||
v_float16x4() {}
|
||||
explicit v_float16x4(float16x4_t v) : val(v) {}
|
||||
v_float16x4(short v0, short v1, short v2, short v3)
|
||||
{
|
||||
short v[] = {v0, v1, v2, v3};
|
||||
val = cv_vld1_f16(v);
|
||||
}
|
||||
short get0() const
|
||||
{
|
||||
return vget_lane_s16(vreinterpret_s16_f16(val), 0);
|
||||
}
|
||||
float16x4_t val;
|
||||
};
|
||||
#endif // CV_FP16
|
||||
|
||||
#if CV_FP16
|
||||
inline v_float32x4 v128_load_fp16_f32(const short* ptr)
|
||||
{
|
||||
float16x4_t a = cv_vld1_f16((const __fp16*)ptr);
|
||||
return v_float32x4(vcvt_f32_f16(a));
|
||||
}
|
||||
|
||||
inline void v_store_fp16(short* ptr, const v_float32x4& a)
|
||||
{
|
||||
float16x4_t fp16 = vcvt_f16_f32(a.val);
|
||||
cv_vst1_f16((short*)ptr, fp16);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix) \
|
||||
@@ -395,6 +394,35 @@ OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, pack, vmovn
|
||||
OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, pack_u, vqmovun_s16, vqrshrun_n_s16)
|
||||
OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, pack_u, vqmovun_s32, vqrshrun_n_s32)
|
||||
|
||||
// pack boolean
|
||||
inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
uint8x16_t ab = vcombine_u8(vmovn_u16(a.val), vmovn_u16(b.val));
|
||||
return v_uint8x16(ab);
|
||||
}
|
||||
|
||||
inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
|
||||
const v_uint32x4& c, const v_uint32x4& d)
|
||||
{
|
||||
uint16x8_t nab = vcombine_u16(vmovn_u32(a.val), vmovn_u32(b.val));
|
||||
uint16x8_t ncd = vcombine_u16(vmovn_u32(c.val), vmovn_u32(d.val));
|
||||
return v_uint8x16(vcombine_u8(vmovn_u16(nab), vmovn_u16(ncd)));
|
||||
}
|
||||
|
||||
inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
|
||||
const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
|
||||
const v_uint64x2& g, const v_uint64x2& h)
|
||||
{
|
||||
uint32x4_t ab = vcombine_u32(vmovn_u64(a.val), vmovn_u64(b.val));
|
||||
uint32x4_t cd = vcombine_u32(vmovn_u64(c.val), vmovn_u64(d.val));
|
||||
uint32x4_t ef = vcombine_u32(vmovn_u64(e.val), vmovn_u64(f.val));
|
||||
uint32x4_t gh = vcombine_u32(vmovn_u64(g.val), vmovn_u64(h.val));
|
||||
|
||||
uint16x8_t abcd = vcombine_u16(vmovn_u32(ab), vmovn_u32(cd));
|
||||
uint16x8_t efgh = vcombine_u16(vmovn_u32(ef), vmovn_u32(gh));
|
||||
return v_uint8x16(vcombine_u8(vmovn_u16(abcd), vmovn_u16(efgh)));
|
||||
}
|
||||
|
||||
inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
|
||||
const v_float32x4& m1, const v_float32x4& m2,
|
||||
const v_float32x4& m3)
|
||||
@@ -436,10 +464,8 @@ OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int8x16, vqaddq_s8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int8x16, vqsubq_s8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint16x8, vqaddq_u16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint16x8, vqsubq_u16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint16x8, vmulq_u16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int16x8, vqaddq_s16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int16x8, vqsubq_s16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int16x8, vmulq_s16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int32x4, vaddq_s32)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int32x4, vsubq_s32)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int32x4, vmulq_s32)
|
||||
@@ -477,6 +503,37 @@ inline v_float32x4& operator /= (v_float32x4& a, const v_float32x4& b)
|
||||
}
|
||||
#endif
|
||||
|
||||
// saturating multiply 8-bit, 16-bit
|
||||
#define OPENCV_HAL_IMPL_NEON_MUL_SAT(_Tpvec, _Tpwvec) \
|
||||
inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
_Tpwvec c, d; \
|
||||
v_mul_expand(a, b, c, d); \
|
||||
return v_pack(c, d); \
|
||||
} \
|
||||
inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \
|
||||
{ a = a * b; return a; }
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_MUL_SAT(v_int8x16, v_int16x8)
|
||||
OPENCV_HAL_IMPL_NEON_MUL_SAT(v_uint8x16, v_uint16x8)
|
||||
OPENCV_HAL_IMPL_NEON_MUL_SAT(v_int16x8, v_int32x4)
|
||||
OPENCV_HAL_IMPL_NEON_MUL_SAT(v_uint16x8, v_uint32x4)
|
||||
|
||||
// Multiply and expand
|
||||
inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b,
|
||||
v_int16x8& c, v_int16x8& d)
|
||||
{
|
||||
c.val = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val));
|
||||
d.val = vmull_s8(vget_high_s8(a.val), vget_high_s8(b.val));
|
||||
}
|
||||
|
||||
inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b,
|
||||
v_uint16x8& c, v_uint16x8& d)
|
||||
{
|
||||
c.val = vmull_u8(vget_low_u8(a.val), vget_low_u8(b.val));
|
||||
d.val = vmull_u8(vget_high_u8(a.val), vget_high_u8(b.val));
|
||||
}
|
||||
|
||||
inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
|
||||
v_int32x4& c, v_int32x4& d)
|
||||
{
|
||||
@@ -498,6 +555,21 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
|
||||
d.val = vmull_u32(vget_high_u32(a.val), vget_high_u32(b.val));
|
||||
}
|
||||
|
||||
inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
return v_int16x8(vcombine_s16(
|
||||
vshrn_n_s32(vmull_s16( vget_low_s16(a.val), vget_low_s16(b.val)), 16),
|
||||
vshrn_n_s32(vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)), 16)
|
||||
));
|
||||
}
|
||||
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
return v_uint16x8(vcombine_u16(
|
||||
vshrn_n_u32(vmull_u16( vget_low_u16(a.val), vget_low_u16(b.val)), 16),
|
||||
vshrn_n_u32(vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)), 16)
|
||||
));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
int32x4_t c = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
|
||||
@@ -506,6 +578,12 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
return v_int32x4(vaddq_s32(cd.val[0], cd.val[1]));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
||||
{
|
||||
v_int32x4 s = v_dotprod(a, b);
|
||||
return v_int32x4(vaddq_s32(s.val , c.val));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix) \
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(|, _Tpvec, vorrq_##suffix) \
|
||||
@@ -686,6 +764,13 @@ OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64)
|
||||
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float64x2, vreinterpretq_f64_u64, f64, u64)
|
||||
#endif
|
||||
|
||||
inline v_float32x4 v_not_nan(const v_float32x4& a)
|
||||
{ return v_float32x4(vreinterpretq_f32_u32(vceqq_f32(a.val, a.val))); }
|
||||
#if CV_SIMD128_64F
|
||||
inline v_float64x2 v_not_nan(const v_float64x2& a)
|
||||
{ return v_float64x2(vreinterpretq_f64_u64(vceqq_f64(a.val, a.val))); }
|
||||
#endif
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_add_wrap, vaddq_u8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_add_wrap, vaddq_s8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_add_wrap, vaddq_u16)
|
||||
@@ -694,8 +779,11 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_sub_wrap, vsubq_u8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_sub_wrap, vsubq_s8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_sub_wrap, vsubq_u16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_sub_wrap, vsubq_s16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_mul_wrap, vmulq_u8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_mul_wrap, vmulq_s8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_mul_wrap, vmulq_u16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_mul_wrap, vmulq_s16)
|
||||
|
||||
// TODO: absdiff for signed integers
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_absdiff, vabdq_u8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_absdiff, vabdq_u16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_absdiff, vabdq_u32)
|
||||
@@ -704,6 +792,12 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_absdiff, vabdq_f32)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_absdiff, vabdq_f64)
|
||||
#endif
|
||||
|
||||
/** Saturating absolute difference **/
|
||||
inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b)
|
||||
{ return v_int8x16(vqabsq_s8(vqsubq_s8(a.val, b.val))); }
|
||||
inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b)
|
||||
{ return v_int16x8(vqabsq_s16(vqsubq_s16(a.val, b.val))); }
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \
|
||||
inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
@@ -725,9 +819,30 @@ inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b)
|
||||
return v_float32x4(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val));
|
||||
}
|
||||
|
||||
inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
|
||||
{
|
||||
#if CV_SIMD128_64F
|
||||
// ARMv8, which adds support for 64-bit floating-point (so CV_SIMD128_64F is defined),
|
||||
// also adds FMA support both for single- and double-precision floating-point vectors
|
||||
return v_float32x4(vfmaq_f32(c.val, a.val, b.val));
|
||||
#else
|
||||
return v_float32x4(vmlaq_f32(c.val, a.val, b.val));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
|
||||
{
|
||||
return v_int32x4(vmlaq_s32(c.val, a.val, b.val));
|
||||
}
|
||||
|
||||
inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
|
||||
{
|
||||
return v_float32x4(vmlaq_f32(c.val, a.val, b.val));
|
||||
return v_fma(a, b, c);
|
||||
}
|
||||
|
||||
inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
|
||||
{
|
||||
return v_fma(a, b, c);
|
||||
}
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
@@ -742,9 +857,14 @@ inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b)
|
||||
return v_float64x2(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val)));
|
||||
}
|
||||
|
||||
inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
|
||||
{
|
||||
return v_float64x2(vfmaq_f64(c.val, a.val, b.val));
|
||||
}
|
||||
|
||||
inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
|
||||
{
|
||||
return v_float64x2(vaddq_f64(c.val, vmulq_f64(a.val, b.val)));
|
||||
return v_fma(a, b, c);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -759,15 +879,7 @@ template<int n> inline _Tpvec v_shl(const _Tpvec& a) \
|
||||
template<int n> inline _Tpvec v_shr(const _Tpvec& a) \
|
||||
{ return _Tpvec(vshrq_n_##suffix(a.val, n)); } \
|
||||
template<int n> inline _Tpvec v_rshr(const _Tpvec& a) \
|
||||
{ return _Tpvec(vrshrq_n_##suffix(a.val, n)); } \
|
||||
template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a) \
|
||||
{ return _Tpvec(vextq_##suffix(a.val, vdupq_n_##suffix(0), n)); } \
|
||||
template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a) \
|
||||
{ return _Tpvec(vextq_##suffix(vdupq_n_##suffix(0), a.val, _Tpvec::nlanes - n)); } \
|
||||
template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vextq_##suffix(a.val, b.val, n)); } \
|
||||
template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vextq_##suffix(b.val, a.val, _Tpvec::nlanes - n)); }
|
||||
{ return _Tpvec(vrshrq_n_##suffix(a.val, n)); }
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint8x16, u8, schar, s8)
|
||||
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int8x16, s8, schar, s8)
|
||||
@@ -778,6 +890,33 @@ OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int32x4, s32, int, s32)
|
||||
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint64x2, u64, int64, s64)
|
||||
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int64x2, s64, int64, s64)
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_ROTATE_OP(_Tpvec, suffix) \
|
||||
template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a) \
|
||||
{ return _Tpvec(vextq_##suffix(a.val, vdupq_n_##suffix(0), n)); } \
|
||||
template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a) \
|
||||
{ return _Tpvec(vextq_##suffix(vdupq_n_##suffix(0), a.val, _Tpvec::nlanes - n)); } \
|
||||
template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \
|
||||
{ return a; } \
|
||||
template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vextq_##suffix(a.val, b.val, n)); } \
|
||||
template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vextq_##suffix(b.val, a.val, _Tpvec::nlanes - n)); } \
|
||||
template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ CV_UNUSED(b); return a; }
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint8x16, u8)
|
||||
OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int8x16, s8)
|
||||
OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint16x8, u16)
|
||||
OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int16x8, s16)
|
||||
OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint32x4, u32)
|
||||
OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int32x4, s32)
|
||||
OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float32x4, f32)
|
||||
OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint64x2, u64)
|
||||
OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int64x2, s64)
|
||||
#if CV_SIMD128_64F
|
||||
OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float64x2, f64)
|
||||
#endif
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(_Tpvec, _Tp, suffix) \
|
||||
inline _Tpvec v_load(const _Tp* ptr) \
|
||||
{ return _Tpvec(vld1q_##suffix(ptr)); } \
|
||||
@@ -791,6 +930,10 @@ inline void v_store(_Tp* ptr, const _Tpvec& a) \
|
||||
{ vst1q_##suffix(ptr, a.val); } \
|
||||
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
|
||||
{ vst1q_##suffix(ptr, a.val); } \
|
||||
inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
|
||||
{ vst1q_##suffix(ptr, a.val); } \
|
||||
inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \
|
||||
{ vst1q_##suffix(ptr, a.val); } \
|
||||
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
||||
{ vst1_##suffix(ptr, vget_low_##suffix(a.val)); } \
|
||||
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
||||
@@ -809,14 +952,6 @@ OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32)
|
||||
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64)
|
||||
#endif
|
||||
|
||||
#if CV_FP16
|
||||
// Workaround for old comiplers
|
||||
inline v_float16x4 v_load_f16(const short* ptr)
|
||||
{ return v_float16x4(cv_vld1_f16(ptr)); }
|
||||
inline void v_store_f16(short* ptr, v_float16x4& a)
|
||||
{ cv_vst1_f16(ptr, a.val); }
|
||||
#endif
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
|
||||
inline scalartype v_reduce_##func(const _Tpvec& a) \
|
||||
{ \
|
||||
@@ -849,6 +984,13 @@ OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, sum, add, f32)
|
||||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32)
|
||||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32)
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
inline double v_reduce_sum(const v_float64x2& a)
|
||||
{
|
||||
return vgetq_lane_f64(a.val, 0) + vgetq_lane_f64(a.val, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
|
||||
const v_float32x4& c, const v_float32x4& d)
|
||||
{
|
||||
@@ -864,6 +1006,49 @@ inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
|
||||
return v_float32x4(vaddq_f32(v0, v1));
|
||||
}
|
||||
|
||||
inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b)
|
||||
{
|
||||
uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(vabdq_u8(a.val, b.val)));
|
||||
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
|
||||
return vget_lane_u32(vpadd_u32(t1, t1), 0);
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
|
||||
{
|
||||
uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(vreinterpretq_u8_s8(vabdq_s8(a.val, b.val))));
|
||||
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
|
||||
return vget_lane_u32(vpadd_u32(t1, t1), 0);
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
uint32x4_t t0 = vpaddlq_u16(vabdq_u16(a.val, b.val));
|
||||
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
|
||||
return vget_lane_u32(vpadd_u32(t1, t1), 0);
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
uint32x4_t t0 = vpaddlq_u16(vreinterpretq_u16_s16(vabdq_s16(a.val, b.val)));
|
||||
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
|
||||
return vget_lane_u32(vpadd_u32(t1, t1), 0);
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b)
|
||||
{
|
||||
uint32x4_t t0 = vabdq_u32(a.val, b.val);
|
||||
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
|
||||
return vget_lane_u32(vpadd_u32(t1, t1), 0);
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b)
|
||||
{
|
||||
uint32x4_t t0 = vreinterpretq_u32_s32(vabdq_s32(a.val, b.val));
|
||||
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
|
||||
return vget_lane_u32(vpadd_u32(t1, t1), 0);
|
||||
}
|
||||
inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b)
|
||||
{
|
||||
float32x4_t t0 = vabdq_f32(a.val, b.val);
|
||||
float32x2_t t1 = vpadd_f32(vget_low_f32(t0), vget_high_f32(t0));
|
||||
return vget_lane_f32(vpadd_f32(t1, t1), 0);
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_POPCOUNT(_Tpvec, cast) \
|
||||
inline v_uint32x4 v_popcount(const _Tpvec& a) \
|
||||
{ \
|
||||
@@ -995,6 +1180,14 @@ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
|
||||
b0.val = vmovl_##suffix(vget_low_##suffix(a.val)); \
|
||||
b1.val = vmovl_##suffix(vget_high_##suffix(a.val)); \
|
||||
} \
|
||||
inline _Tpwvec v_expand_low(const _Tpvec& a) \
|
||||
{ \
|
||||
return _Tpwvec(vmovl_##suffix(vget_low_##suffix(a.val))); \
|
||||
} \
|
||||
inline _Tpwvec v_expand_high(const _Tpvec& a) \
|
||||
{ \
|
||||
return _Tpwvec(vmovl_##suffix(vget_high_##suffix(a.val))); \
|
||||
} \
|
||||
inline _Tpwvec v_load_expand(const _Tp* ptr) \
|
||||
{ \
|
||||
return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \
|
||||
@@ -1009,14 +1202,16 @@ OPENCV_HAL_IMPL_NEON_EXPAND(v_int32x4, v_int64x2, int, s32)
|
||||
|
||||
inline v_uint32x4 v_load_expand_q(const uchar* ptr)
|
||||
{
|
||||
uint8x8_t v0 = vcreate_u8(*(unsigned*)ptr);
|
||||
typedef unsigned int CV_DECL_ALIGNED(1) unaligned_uint;
|
||||
uint8x8_t v0 = vcreate_u8(*(unaligned_uint*)ptr);
|
||||
uint16x4_t v1 = vget_low_u16(vmovl_u8(v0));
|
||||
return v_uint32x4(vmovl_u16(v1));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_load_expand_q(const schar* ptr)
|
||||
{
|
||||
int8x8_t v0 = vcreate_s8(*(unsigned*)ptr);
|
||||
typedef unsigned int CV_DECL_ALIGNED(1) unaligned_uint;
|
||||
int8x8_t v0 = vcreate_s8(*(unaligned_uint*)ptr);
|
||||
int16x4_t v1 = vget_low_s16(vmovl_s8(v0));
|
||||
return v_int32x4(vmovl_s16(v1));
|
||||
}
|
||||
@@ -1095,6 +1290,18 @@ OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT(float64x2, f64)
|
||||
#endif
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
inline v_int32x4 v_round(const v_float32x4& a)
|
||||
{
|
||||
float32x4_t a_ = a.val;
|
||||
int32x4_t result;
|
||||
__asm__ ("fcvtns %0.4s, %1.4s"
|
||||
: "=w"(result)
|
||||
: "w"(a_)
|
||||
: /* No clobbers */);
|
||||
return v_int32x4(result);
|
||||
}
|
||||
#else
|
||||
inline v_int32x4 v_round(const v_float32x4& a)
|
||||
{
|
||||
static const int32x4_t v_sign = vdupq_n_s32(1 << 31),
|
||||
@@ -1103,7 +1310,7 @@ inline v_int32x4 v_round(const v_float32x4& a)
|
||||
int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(a.val)));
|
||||
return v_int32x4(vcvtq_s32_f32(vaddq_f32(a.val, vreinterpretq_f32_s32(v_addition))));
|
||||
}
|
||||
|
||||
#endif
|
||||
inline v_int32x4 v_floor(const v_float32x4& a)
|
||||
{
|
||||
int32x4_t a1 = vcvtq_s32_f32(a.val);
|
||||
@@ -1128,6 +1335,11 @@ inline v_int32x4 v_round(const v_float64x2& a)
|
||||
return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
|
||||
{
|
||||
return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), vmovn_s64(vcvtaq_s64_f64(b.val))));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_floor(const v_float64x2& a)
|
||||
{
|
||||
static const int32x2_t zero = vdup_n_s32(0);
|
||||
@@ -1202,14 +1414,16 @@ inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \
|
||||
c.val = v.val[2]; \
|
||||
d.val = v.val[3]; \
|
||||
} \
|
||||
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b) \
|
||||
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
|
||||
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||
{ \
|
||||
_Tpvec##x2_t v; \
|
||||
v.val[0] = a.val; \
|
||||
v.val[1] = b.val; \
|
||||
vst2q_##suffix(ptr, v); \
|
||||
} \
|
||||
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, const v_##_Tpvec& c) \
|
||||
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
|
||||
const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||
{ \
|
||||
_Tpvec##x3_t v; \
|
||||
v.val[0] = a.val; \
|
||||
@@ -1218,7 +1432,8 @@ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec&
|
||||
vst3q_##suffix(ptr, v); \
|
||||
} \
|
||||
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
|
||||
const v_##_Tpvec& c, const v_##_Tpvec& d) \
|
||||
const v_##_Tpvec& c, const v_##_Tpvec& d, \
|
||||
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \
|
||||
{ \
|
||||
_Tpvec##x4_t v; \
|
||||
v.val[0] = a.val; \
|
||||
@@ -1228,6 +1443,83 @@ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec&
|
||||
vst4q_##suffix(ptr, v); \
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(tp, suffix) \
|
||||
inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b ) \
|
||||
{ \
|
||||
tp##x1_t a0 = vld1_##suffix(ptr); \
|
||||
tp##x1_t b0 = vld1_##suffix(ptr + 1); \
|
||||
tp##x1_t a1 = vld1_##suffix(ptr + 2); \
|
||||
tp##x1_t b1 = vld1_##suffix(ptr + 3); \
|
||||
a = v_##tp##x2(vcombine_##suffix(a0, a1)); \
|
||||
b = v_##tp##x2(vcombine_##suffix(b0, b1)); \
|
||||
} \
|
||||
\
|
||||
inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, \
|
||||
v_##tp##x2& b, v_##tp##x2& c ) \
|
||||
{ \
|
||||
tp##x1_t a0 = vld1_##suffix(ptr); \
|
||||
tp##x1_t b0 = vld1_##suffix(ptr + 1); \
|
||||
tp##x1_t c0 = vld1_##suffix(ptr + 2); \
|
||||
tp##x1_t a1 = vld1_##suffix(ptr + 3); \
|
||||
tp##x1_t b1 = vld1_##suffix(ptr + 4); \
|
||||
tp##x1_t c1 = vld1_##suffix(ptr + 5); \
|
||||
a = v_##tp##x2(vcombine_##suffix(a0, a1)); \
|
||||
b = v_##tp##x2(vcombine_##suffix(b0, b1)); \
|
||||
c = v_##tp##x2(vcombine_##suffix(c0, c1)); \
|
||||
} \
|
||||
\
|
||||
inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b, \
|
||||
v_##tp##x2& c, v_##tp##x2& d ) \
|
||||
{ \
|
||||
tp##x1_t a0 = vld1_##suffix(ptr); \
|
||||
tp##x1_t b0 = vld1_##suffix(ptr + 1); \
|
||||
tp##x1_t c0 = vld1_##suffix(ptr + 2); \
|
||||
tp##x1_t d0 = vld1_##suffix(ptr + 3); \
|
||||
tp##x1_t a1 = vld1_##suffix(ptr + 4); \
|
||||
tp##x1_t b1 = vld1_##suffix(ptr + 5); \
|
||||
tp##x1_t c1 = vld1_##suffix(ptr + 6); \
|
||||
tp##x1_t d1 = vld1_##suffix(ptr + 7); \
|
||||
a = v_##tp##x2(vcombine_##suffix(a0, a1)); \
|
||||
b = v_##tp##x2(vcombine_##suffix(b0, b1)); \
|
||||
c = v_##tp##x2(vcombine_##suffix(c0, c1)); \
|
||||
d = v_##tp##x2(vcombine_##suffix(d0, d1)); \
|
||||
} \
|
||||
\
|
||||
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \
|
||||
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||
{ \
|
||||
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
|
||||
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
|
||||
vst1_##suffix(ptr + 2, vget_high_##suffix(a.val)); \
|
||||
vst1_##suffix(ptr + 3, vget_high_##suffix(b.val)); \
|
||||
} \
|
||||
\
|
||||
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, \
|
||||
const v_##tp##x2& b, const v_##tp##x2& c, \
|
||||
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||
{ \
|
||||
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
|
||||
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
|
||||
vst1_##suffix(ptr + 2, vget_low_##suffix(c.val)); \
|
||||
vst1_##suffix(ptr + 3, vget_high_##suffix(a.val)); \
|
||||
vst1_##suffix(ptr + 4, vget_high_##suffix(b.val)); \
|
||||
vst1_##suffix(ptr + 5, vget_high_##suffix(c.val)); \
|
||||
} \
|
||||
\
|
||||
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \
|
||||
const v_##tp##x2& c, const v_##tp##x2& d, \
|
||||
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||
{ \
|
||||
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
|
||||
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
|
||||
vst1_##suffix(ptr + 2, vget_low_##suffix(c.val)); \
|
||||
vst1_##suffix(ptr + 3, vget_low_##suffix(d.val)); \
|
||||
vst1_##suffix(ptr + 4, vget_high_##suffix(a.val)); \
|
||||
vst1_##suffix(ptr + 5, vget_high_##suffix(b.val)); \
|
||||
vst1_##suffix(ptr + 6, vget_high_##suffix(c.val)); \
|
||||
vst1_##suffix(ptr + 7, vget_high_##suffix(d.val)); \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint8x16, uchar, u8)
|
||||
OPENCV_HAL_IMPL_NEON_INTERLEAVED(int8x16, schar, s8)
|
||||
OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint16x8, ushort, u16)
|
||||
@@ -1239,6 +1531,9 @@ OPENCV_HAL_IMPL_NEON_INTERLEAVED(float32x4, float, f32)
|
||||
OPENCV_HAL_IMPL_NEON_INTERLEAVED(float64x2, double, f64)
|
||||
#endif
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(int64, s64)
|
||||
OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(uint64, u64)
|
||||
|
||||
inline v_float32x4 v_cvt_f32(const v_int32x4& a)
|
||||
{
|
||||
return v_float32x4(vcvtq_f32_s32(a.val));
|
||||
@@ -1251,6 +1546,11 @@ inline v_float32x4 v_cvt_f32(const v_float64x2& a)
|
||||
return v_float32x4(vcombine_f32(vcvt_f32_f64(a.val), zero));
|
||||
}
|
||||
|
||||
inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b)
|
||||
{
|
||||
return v_float32x4(vcombine_f32(vcvt_f32_f64(a.val), vcvt_f32_f64(b.val)));
|
||||
}
|
||||
|
||||
inline v_float64x2 v_cvt_f64(const v_int32x4& a)
|
||||
{
|
||||
return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_low_s32(a.val))));
|
||||
@@ -1272,18 +1572,358 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CV_FP16
|
||||
inline v_float32x4 v_cvt_f32(const v_float16x4& a)
|
||||
////////////// Lookup table access ////////////////////
|
||||
|
||||
inline v_int8x16 v_lut(const schar* tab, const int* idx)
|
||||
{
|
||||
return v_float32x4(vcvt_f32_f16(a.val));
|
||||
schar CV_DECL_ALIGNED(32) elems[16] =
|
||||
{
|
||||
tab[idx[ 0]],
|
||||
tab[idx[ 1]],
|
||||
tab[idx[ 2]],
|
||||
tab[idx[ 3]],
|
||||
tab[idx[ 4]],
|
||||
tab[idx[ 5]],
|
||||
tab[idx[ 6]],
|
||||
tab[idx[ 7]],
|
||||
tab[idx[ 8]],
|
||||
tab[idx[ 9]],
|
||||
tab[idx[10]],
|
||||
tab[idx[11]],
|
||||
tab[idx[12]],
|
||||
tab[idx[13]],
|
||||
tab[idx[14]],
|
||||
tab[idx[15]]
|
||||
};
|
||||
return v_int8x16(vld1q_s8(elems));
|
||||
}
|
||||
inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx)
|
||||
{
|
||||
schar CV_DECL_ALIGNED(32) elems[16] =
|
||||
{
|
||||
tab[idx[0]],
|
||||
tab[idx[0] + 1],
|
||||
tab[idx[1]],
|
||||
tab[idx[1] + 1],
|
||||
tab[idx[2]],
|
||||
tab[idx[2] + 1],
|
||||
tab[idx[3]],
|
||||
tab[idx[3] + 1],
|
||||
tab[idx[4]],
|
||||
tab[idx[4] + 1],
|
||||
tab[idx[5]],
|
||||
tab[idx[5] + 1],
|
||||
tab[idx[6]],
|
||||
tab[idx[6] + 1],
|
||||
tab[idx[7]],
|
||||
tab[idx[7] + 1]
|
||||
};
|
||||
return v_int8x16(vld1q_s8(elems));
|
||||
}
|
||||
inline v_int8x16 v_lut_quads(const schar* tab, const int* idx)
|
||||
{
|
||||
schar CV_DECL_ALIGNED(32) elems[16] =
|
||||
{
|
||||
tab[idx[0]],
|
||||
tab[idx[0] + 1],
|
||||
tab[idx[0] + 2],
|
||||
tab[idx[0] + 3],
|
||||
tab[idx[1]],
|
||||
tab[idx[1] + 1],
|
||||
tab[idx[1] + 2],
|
||||
tab[idx[1] + 3],
|
||||
tab[idx[2]],
|
||||
tab[idx[2] + 1],
|
||||
tab[idx[2] + 2],
|
||||
tab[idx[2] + 3],
|
||||
tab[idx[3]],
|
||||
tab[idx[3] + 1],
|
||||
tab[idx[3] + 2],
|
||||
tab[idx[3] + 3]
|
||||
};
|
||||
return v_int8x16(vld1q_s8(elems));
|
||||
}
|
||||
inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); }
|
||||
inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); }
|
||||
inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); }
|
||||
|
||||
inline v_int16x8 v_lut(const short* tab, const int* idx)
|
||||
{
|
||||
short CV_DECL_ALIGNED(32) elems[8] =
|
||||
{
|
||||
tab[idx[0]],
|
||||
tab[idx[1]],
|
||||
tab[idx[2]],
|
||||
tab[idx[3]],
|
||||
tab[idx[4]],
|
||||
tab[idx[5]],
|
||||
tab[idx[6]],
|
||||
tab[idx[7]]
|
||||
};
|
||||
return v_int16x8(vld1q_s16(elems));
|
||||
}
|
||||
inline v_int16x8 v_lut_pairs(const short* tab, const int* idx)
|
||||
{
|
||||
short CV_DECL_ALIGNED(32) elems[8] =
|
||||
{
|
||||
tab[idx[0]],
|
||||
tab[idx[0] + 1],
|
||||
tab[idx[1]],
|
||||
tab[idx[1] + 1],
|
||||
tab[idx[2]],
|
||||
tab[idx[2] + 1],
|
||||
tab[idx[3]],
|
||||
tab[idx[3] + 1]
|
||||
};
|
||||
return v_int16x8(vld1q_s16(elems));
|
||||
}
|
||||
inline v_int16x8 v_lut_quads(const short* tab, const int* idx)
|
||||
{
|
||||
return v_int16x8(vcombine_s16(vld1_s16(tab + idx[0]), vld1_s16(tab + idx[1])));
|
||||
}
|
||||
inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); }
|
||||
inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); }
|
||||
inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((short*)tab, idx)); }
|
||||
|
||||
inline v_int32x4 v_lut(const int* tab, const int* idx)
|
||||
{
|
||||
int CV_DECL_ALIGNED(32) elems[4] =
|
||||
{
|
||||
tab[idx[0]],
|
||||
tab[idx[1]],
|
||||
tab[idx[2]],
|
||||
tab[idx[3]]
|
||||
};
|
||||
return v_int32x4(vld1q_s32(elems));
|
||||
}
|
||||
inline v_int32x4 v_lut_pairs(const int* tab, const int* idx)
|
||||
{
|
||||
return v_int32x4(vcombine_s32(vld1_s32(tab + idx[0]), vld1_s32(tab + idx[1])));
|
||||
}
|
||||
inline v_int32x4 v_lut_quads(const int* tab, const int* idx)
|
||||
{
|
||||
return v_int32x4(vld1q_s32(tab + idx[0]));
|
||||
}
|
||||
inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); }
|
||||
inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((int*)tab, idx)); }
|
||||
inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((int*)tab, idx)); }
|
||||
|
||||
inline v_int64x2 v_lut(const int64_t* tab, const int* idx)
|
||||
{
|
||||
return v_int64x2(vcombine_s64(vcreate_s64(tab[idx[0]]), vcreate_s64(tab[idx[1]])));
|
||||
}
|
||||
inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx)
|
||||
{
|
||||
return v_int64x2(vld1q_s64(tab + idx[0]));
|
||||
}
|
||||
inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); }
|
||||
inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); }
|
||||
|
||||
inline v_float32x4 v_lut(const float* tab, const int* idx)
|
||||
{
|
||||
float CV_DECL_ALIGNED(32) elems[4] =
|
||||
{
|
||||
tab[idx[0]],
|
||||
tab[idx[1]],
|
||||
tab[idx[2]],
|
||||
tab[idx[3]]
|
||||
};
|
||||
return v_float32x4(vld1q_f32(elems));
|
||||
}
|
||||
inline v_float32x4 v_lut_pairs(const float* tab, const int* idx)
|
||||
{
|
||||
uint64 CV_DECL_ALIGNED(32) elems[2] =
|
||||
{
|
||||
*(uint64*)(tab + idx[0]),
|
||||
*(uint64*)(tab + idx[1])
|
||||
};
|
||||
return v_float32x4(vreinterpretq_f32_u64(vld1q_u64(elems)));
|
||||
}
|
||||
inline v_float32x4 v_lut_quads(const float* tab, const int* idx)
|
||||
{
|
||||
return v_float32x4(vld1q_f32(tab + idx[0]));
|
||||
}
|
||||
|
||||
inline v_float16x4 v_cvt_f16(const v_float32x4& a)
|
||||
inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec)
|
||||
{
|
||||
return v_float16x4(vcvt_f16_f32(a.val));
|
||||
int CV_DECL_ALIGNED(32) elems[4] =
|
||||
{
|
||||
tab[vgetq_lane_s32(idxvec.val, 0)],
|
||||
tab[vgetq_lane_s32(idxvec.val, 1)],
|
||||
tab[vgetq_lane_s32(idxvec.val, 2)],
|
||||
tab[vgetq_lane_s32(idxvec.val, 3)]
|
||||
};
|
||||
return v_int32x4(vld1q_s32(elems));
|
||||
}
|
||||
|
||||
inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec)
|
||||
{
|
||||
unsigned CV_DECL_ALIGNED(32) elems[4] =
|
||||
{
|
||||
tab[vgetq_lane_s32(idxvec.val, 0)],
|
||||
tab[vgetq_lane_s32(idxvec.val, 1)],
|
||||
tab[vgetq_lane_s32(idxvec.val, 2)],
|
||||
tab[vgetq_lane_s32(idxvec.val, 3)]
|
||||
};
|
||||
return v_uint32x4(vld1q_u32(elems));
|
||||
}
|
||||
|
||||
inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec)
|
||||
{
|
||||
float CV_DECL_ALIGNED(32) elems[4] =
|
||||
{
|
||||
tab[vgetq_lane_s32(idxvec.val, 0)],
|
||||
tab[vgetq_lane_s32(idxvec.val, 1)],
|
||||
tab[vgetq_lane_s32(idxvec.val, 2)],
|
||||
tab[vgetq_lane_s32(idxvec.val, 3)]
|
||||
};
|
||||
return v_float32x4(vld1q_f32(elems));
|
||||
}
|
||||
|
||||
inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y)
|
||||
{
|
||||
/*int CV_DECL_ALIGNED(32) idx[4];
|
||||
v_store(idx, idxvec);
|
||||
|
||||
float32x4_t xy02 = vcombine_f32(vld1_f32(tab + idx[0]), vld1_f32(tab + idx[2]));
|
||||
float32x4_t xy13 = vcombine_f32(vld1_f32(tab + idx[1]), vld1_f32(tab + idx[3]));
|
||||
|
||||
float32x4x2_t xxyy = vuzpq_f32(xy02, xy13);
|
||||
x = v_float32x4(xxyy.val[0]);
|
||||
y = v_float32x4(xxyy.val[1]);*/
|
||||
int CV_DECL_ALIGNED(32) idx[4];
|
||||
v_store_aligned(idx, idxvec);
|
||||
|
||||
x = v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
|
||||
y = v_float32x4(tab[idx[0]+1], tab[idx[1]+1], tab[idx[2]+1], tab[idx[3]+1]);
|
||||
}
|
||||
|
||||
inline v_int8x16 v_interleave_pairs(const v_int8x16& vec)
|
||||
{
|
||||
return v_int8x16(vcombine_s8(vtbl1_s8(vget_low_s8(vec.val), vcreate_s8(0x0705060403010200)), vtbl1_s8(vget_high_s8(vec.val), vcreate_s8(0x0705060403010200))));
|
||||
}
|
||||
inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); }
|
||||
inline v_int8x16 v_interleave_quads(const v_int8x16& vec)
|
||||
{
|
||||
return v_int8x16(vcombine_s8(vtbl1_s8(vget_low_s8(vec.val), vcreate_s8(0x0703060205010400)), vtbl1_s8(vget_high_s8(vec.val), vcreate_s8(0x0703060205010400))));
|
||||
}
|
||||
inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); }
|
||||
|
||||
inline v_int16x8 v_interleave_pairs(const v_int16x8& vec)
|
||||
{
|
||||
return v_int16x8(vreinterpretq_s16_s8(vcombine_s8(vtbl1_s8(vget_low_s8(vreinterpretq_s8_s16(vec.val)), vcreate_s8(0x0706030205040100)), vtbl1_s8(vget_high_s8(vreinterpretq_s8_s16(vec.val)), vcreate_s8(0x0706030205040100)))));
|
||||
}
|
||||
inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); }
|
||||
inline v_int16x8 v_interleave_quads(const v_int16x8& vec)
|
||||
{
|
||||
int16x4x2_t res = vzip_s16(vget_low_s16(vec.val), vget_high_s16(vec.val));
|
||||
return v_int16x8(vcombine_s16(res.val[0], res.val[1]));
|
||||
}
|
||||
inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); }
|
||||
|
||||
inline v_int32x4 v_interleave_pairs(const v_int32x4& vec)
|
||||
{
|
||||
int32x2x2_t res = vzip_s32(vget_low_s32(vec.val), vget_high_s32(vec.val));
|
||||
return v_int32x4(vcombine_s32(res.val[0], res.val[1]));
|
||||
}
|
||||
inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
|
||||
inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
|
||||
|
||||
inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
|
||||
{
|
||||
return v_int8x16(vextq_s8(vcombine_s8(vtbl1_s8(vget_low_s8(vec.val), vcreate_s8(0x0605040201000000)), vtbl1_s8(vget_high_s8(vec.val), vcreate_s8(0x0807060504020100))), vdupq_n_s8(0), 2));
|
||||
}
|
||||
inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); }
|
||||
|
||||
inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
|
||||
{
|
||||
return v_int16x8(vreinterpretq_s16_s8(vextq_s8(vcombine_s8(vtbl1_s8(vget_low_s8(vreinterpretq_s8_s16(vec.val)), vcreate_s8(0x0504030201000000)), vget_high_s8(vreinterpretq_s8_s16(vec.val))), vdupq_n_s8(0), 2)));
|
||||
}
|
||||
inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); }
|
||||
|
||||
inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; }
|
||||
inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; }
|
||||
inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
inline v_float64x2 v_lut(const double* tab, const int* idx)
|
||||
{
|
||||
double CV_DECL_ALIGNED(32) elems[2] =
|
||||
{
|
||||
tab[idx[0]],
|
||||
tab[idx[1]]
|
||||
};
|
||||
return v_float64x2(vld1q_f64(elems));
|
||||
}
|
||||
|
||||
inline v_float64x2 v_lut_pairs(const double* tab, const int* idx)
|
||||
{
|
||||
return v_float64x2(vld1q_f64(tab + idx[0]));
|
||||
}
|
||||
|
||||
inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec)
|
||||
{
|
||||
double CV_DECL_ALIGNED(32) elems[2] =
|
||||
{
|
||||
tab[vgetq_lane_s32(idxvec.val, 0)],
|
||||
tab[vgetq_lane_s32(idxvec.val, 1)],
|
||||
};
|
||||
return v_float64x2(vld1q_f64(elems));
|
||||
}
|
||||
|
||||
inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y)
|
||||
{
|
||||
int CV_DECL_ALIGNED(32) idx[4];
|
||||
v_store_aligned(idx, idxvec);
|
||||
|
||||
x = v_float64x2(tab[idx[0]], tab[idx[1]]);
|
||||
y = v_float64x2(tab[idx[0]+1], tab[idx[1]+1]);
|
||||
}
|
||||
#endif
|
||||
|
||||
////// FP16 support ///////
|
||||
#if CV_FP16
|
||||
inline v_float32x4 v_load_expand(const float16_t* ptr)
|
||||
{
|
||||
float16x4_t v =
|
||||
#ifndef vld1_f16 // APPLE compiler defines vld1_f16 as macro
|
||||
(float16x4_t)vld1_s16((const short*)ptr);
|
||||
#else
|
||||
vld1_f16((const __fp16*)ptr);
|
||||
#endif
|
||||
return v_float32x4(vcvt_f32_f16(v));
|
||||
}
|
||||
|
||||
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
|
||||
{
|
||||
float16x4_t hv = vcvt_f16_f32(v.val);
|
||||
|
||||
#ifndef vst1_f16 // APPLE compiler defines vst1_f16 as macro
|
||||
vst1_s16((short*)ptr, (int16x4_t)hv);
|
||||
#else
|
||||
vst1_f16((__fp16*)ptr, hv);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
inline v_float32x4 v_load_expand(const float16_t* ptr)
|
||||
{
|
||||
const int N = 4;
|
||||
float buf[N];
|
||||
for( int i = 0; i < N; i++ ) buf[i] = (float)ptr[i];
|
||||
return v_load(buf);
|
||||
}
|
||||
|
||||
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
|
||||
{
|
||||
const int N = 4;
|
||||
float buf[N];
|
||||
v_store(buf, v);
|
||||
for( int i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
inline void v_cleanup() {}
|
||||
|
||||
//! @name Check SIMD support
|
||||
//! @{
|
||||
//! @brief Check CPU capability of SIMD operation
|
||||
File diff suppressed because it is too large
Load Diff
167
lib/3rdParty/OpenCV/include/opencv2/core/hal/intrin_sse_em.hpp
vendored
Normal file
167
lib/3rdParty/OpenCV/include/opencv2/core/hal/intrin_sse_em.hpp
vendored
Normal file
@@ -0,0 +1,167 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
#ifndef OPENCV_HAL_INTRIN_SSE_EM_HPP
|
||||
#define OPENCV_HAL_INTRIN_SSE_EM_HPP
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
|
||||
#define OPENCV_HAL_SSE_WRAP_1(fun, tp) \
|
||||
inline tp _v128_##fun(const tp& a) \
|
||||
{ return _mm_##fun(a); }
|
||||
|
||||
#define OPENCV_HAL_SSE_WRAP_2(fun, tp) \
|
||||
inline tp _v128_##fun(const tp& a, const tp& b) \
|
||||
{ return _mm_##fun(a, b); }
|
||||
|
||||
#define OPENCV_HAL_SSE_WRAP_3(fun, tp) \
|
||||
inline tp _v128_##fun(const tp& a, const tp& b, const tp& c) \
|
||||
{ return _mm_##fun(a, b, c); }
|
||||
|
||||
///////////////////////////// XOP /////////////////////////////
|
||||
|
||||
// [todo] define CV_XOP
|
||||
#if 1 // CV_XOP
|
||||
inline __m128i _v128_comgt_epu32(const __m128i& a, const __m128i& b)
|
||||
{
|
||||
const __m128i delta = _mm_set1_epi32((int)0x80000000);
|
||||
return _mm_cmpgt_epi32(_mm_xor_si128(a, delta), _mm_xor_si128(b, delta));
|
||||
}
|
||||
// wrapping XOP
|
||||
#else
|
||||
OPENCV_HAL_SSE_WRAP_2(_v128_comgt_epu32, __m128i)
|
||||
#endif // !CV_XOP
|
||||
|
||||
///////////////////////////// SSE4.1 /////////////////////////////
|
||||
|
||||
#if !CV_SSE4_1
|
||||
|
||||
/** Swizzle **/
|
||||
inline __m128i _v128_blendv_epi8(const __m128i& a, const __m128i& b, const __m128i& mask)
|
||||
{ return _mm_xor_si128(a, _mm_and_si128(_mm_xor_si128(b, a), mask)); }
|
||||
|
||||
/** Convert **/
|
||||
// 8 >> 16
|
||||
inline __m128i _v128_cvtepu8_epi16(const __m128i& a)
|
||||
{
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
return _mm_unpacklo_epi8(a, z);
|
||||
}
|
||||
inline __m128i _v128_cvtepi8_epi16(const __m128i& a)
|
||||
{ return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); }
|
||||
// 8 >> 32
|
||||
inline __m128i _v128_cvtepu8_epi32(const __m128i& a)
|
||||
{
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z);
|
||||
}
|
||||
inline __m128i _v128_cvtepi8_epi32(const __m128i& a)
|
||||
{
|
||||
__m128i r = _mm_unpacklo_epi8(a, a);
|
||||
r = _mm_unpacklo_epi8(r, r);
|
||||
return _mm_srai_epi32(r, 24);
|
||||
}
|
||||
// 16 >> 32
|
||||
inline __m128i _v128_cvtepu16_epi32(const __m128i& a)
|
||||
{
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
return _mm_unpacklo_epi16(a, z);
|
||||
}
|
||||
inline __m128i _v128_cvtepi16_epi32(const __m128i& a)
|
||||
{ return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); }
|
||||
// 32 >> 64
|
||||
inline __m128i _v128_cvtepu32_epi64(const __m128i& a)
|
||||
{
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
return _mm_unpacklo_epi32(a, z);
|
||||
}
|
||||
inline __m128i _v128_cvtepi32_epi64(const __m128i& a)
|
||||
{ return _mm_unpacklo_epi32(a, _mm_srai_epi32(a, 31)); }
|
||||
|
||||
/** Arithmetic **/
|
||||
inline __m128i _v128_mullo_epi32(const __m128i& a, const __m128i& b)
|
||||
{
|
||||
__m128i c0 = _mm_mul_epu32(a, b);
|
||||
__m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a, 32), _mm_srli_epi64(b, 32));
|
||||
__m128i d0 = _mm_unpacklo_epi32(c0, c1);
|
||||
__m128i d1 = _mm_unpackhi_epi32(c0, c1);
|
||||
return _mm_unpacklo_epi64(d0, d1);
|
||||
}
|
||||
|
||||
/** Math **/
|
||||
inline __m128i _v128_min_epu32(const __m128i& a, const __m128i& b)
|
||||
{ return _v128_blendv_epi8(a, b, _v128_comgt_epu32(a, b)); }
|
||||
|
||||
// wrapping SSE4.1
|
||||
#else
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi16, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi16, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi32, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi32, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepu16_epi32, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepi16_epi32, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepu32_epi64, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepi32_epi64, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_2(min_epu32, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_2(mullo_epi32, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_3(blendv_epi8, __m128i)
|
||||
#endif // !CV_SSE4_1
|
||||
|
||||
///////////////////////////// Revolutionary /////////////////////////////
|
||||
|
||||
/** Convert **/
|
||||
// 16 << 8
|
||||
inline __m128i _v128_cvtepu8_epi16_high(const __m128i& a)
|
||||
{
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
return _mm_unpackhi_epi8(a, z);
|
||||
}
|
||||
inline __m128i _v128_cvtepi8_epi16_high(const __m128i& a)
|
||||
{ return _mm_srai_epi16(_mm_unpackhi_epi8(a, a), 8); }
|
||||
// 32 << 16
|
||||
inline __m128i _v128_cvtepu16_epi32_high(const __m128i& a)
|
||||
{
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
return _mm_unpackhi_epi16(a, z);
|
||||
}
|
||||
inline __m128i _v128_cvtepi16_epi32_high(const __m128i& a)
|
||||
{ return _mm_srai_epi32(_mm_unpackhi_epi16(a, a), 16); }
|
||||
// 64 << 32
|
||||
inline __m128i _v128_cvtepu32_epi64_high(const __m128i& a)
|
||||
{
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
return _mm_unpackhi_epi32(a, z);
|
||||
}
|
||||
inline __m128i _v128_cvtepi32_epi64_high(const __m128i& a)
|
||||
{ return _mm_unpackhi_epi32(a, _mm_srai_epi32(a, 31)); }
|
||||
|
||||
/** Miscellaneous **/
|
||||
inline __m128i _v128_packs_epu32(const __m128i& a, const __m128i& b)
|
||||
{
|
||||
const __m128i m = _mm_set1_epi32(65535);
|
||||
__m128i am = _v128_min_epu32(a, m);
|
||||
__m128i bm = _v128_min_epu32(b, m);
|
||||
#if CV_SSE4_1
|
||||
return _mm_packus_epi32(am, bm);
|
||||
#else
|
||||
const __m128i d = _mm_set1_epi32(32768), nd = _mm_set1_epi16(-32768);
|
||||
am = _mm_sub_epi32(am, d);
|
||||
bm = _mm_sub_epi32(bm, d);
|
||||
am = _mm_packs_epi32(am, bm);
|
||||
return _mm_sub_epi16(am, nd);
|
||||
#endif
|
||||
}
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
|
||||
//! @endcond
|
||||
|
||||
} // cv::
|
||||
|
||||
#endif // OPENCV_HAL_INTRIN_SSE_EM_HPP
|
||||
@@ -1,46 +1,6 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
#ifndef OPENCV_HAL_VSX_HPP
|
||||
#define OPENCV_HAL_VSX_HPP
|
||||
@@ -51,11 +11,6 @@
|
||||
#define CV_SIMD128 1
|
||||
#define CV_SIMD128_64F 1
|
||||
|
||||
/**
|
||||
* todo: supporting half precision for power9
|
||||
* convert instractions xvcvhpsp, xvcvsphp
|
||||
**/
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
@@ -276,34 +231,50 @@ OPENCV_HAL_IMPL_VSX_INITVEC(v_int64x2, int64, s64, vec_dword2)
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4)
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2)
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(_Tpvec, _Tp, ld_func, st_func) \
|
||||
#define OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, ld, ld_a, st, st_a) \
|
||||
inline _Tpvec v_load(const _Tp* ptr) \
|
||||
{ return _Tpvec(ld_func(0, ptr)); } \
|
||||
inline _Tpvec v_load_aligned(const _Tp* ptr) \
|
||||
{ return _Tpvec(ld_func(0, ptr)); } \
|
||||
{ return _Tpvec(ld(0, ptr)); } \
|
||||
inline _Tpvec v_load_aligned(VSX_UNUSED(const _Tp* ptr)) \
|
||||
{ return _Tpvec(ld_a(0, ptr)); } \
|
||||
inline _Tpvec v_load_low(const _Tp* ptr) \
|
||||
{ return _Tpvec(vec_ld_l8(ptr)); } \
|
||||
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
|
||||
{ return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); } \
|
||||
inline void v_store(_Tp* ptr, const _Tpvec& a) \
|
||||
{ st_func(a.val, 0, ptr); } \
|
||||
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
|
||||
{ st_func(a.val, 0, ptr); } \
|
||||
{ st(a.val, 0, ptr); } \
|
||||
inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \
|
||||
{ st_a(a.val, 0, ptr); } \
|
||||
inline void v_store_aligned_nocache(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \
|
||||
{ st_a(a.val, 0, ptr); } \
|
||||
inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \
|
||||
{ if(mode == hal::STORE_UNALIGNED) st(a.val, 0, ptr); else st_a(a.val, 0, ptr); } \
|
||||
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
||||
{ vec_st_l8(a.val, ptr); } \
|
||||
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
||||
{ vec_st_h8(a.val, ptr); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint8x16, uchar, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int8x16, schar, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint16x8, ushort, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int16x8, short, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint32x4, uint, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int32x4, int, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float32x4, float, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float64x2, double, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint64x2, uint64, vsx_ld2, vsx_st2)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int64x2, int64, vsx_ld2, vsx_st2)
|
||||
// working around gcc bug for aligned ld/st
|
||||
// if runtime check for vec_ld/st fail we failback to unaligned ld/st
|
||||
// https://github.com/opencv/opencv/issues/13211
|
||||
#ifdef CV_COMPILER_VSX_BROKEN_ALIGNED
|
||||
#define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vsx_ld, vsx_st, vsx_st)
|
||||
#else
|
||||
#define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vec_ld, vsx_st, vec_st)
|
||||
#endif
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint8x16, uchar)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int8x16, schar)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint16x8, ushort)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int16x8, short)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint32x4, uint)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int32x4, int)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_float32x4, float)
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_float64x2, double, vsx_ld, vsx_ld, vsx_st, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_uint64x2, uint64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_int64x2, int64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
|
||||
|
||||
//////////////// Value reordering ///////////////
|
||||
|
||||
@@ -317,13 +288,16 @@ inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, \
|
||||
inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b, \
|
||||
_Tpvec& c, _Tpvec& d) \
|
||||
{ vec_ld_deinterleave(ptr, a.val, b.val, c.val, d.val); } \
|
||||
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b) \
|
||||
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \
|
||||
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||
{ vec_st_interleave(a.val, b.val, ptr); } \
|
||||
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, \
|
||||
const _Tpvec& b, const _Tpvec& c) \
|
||||
const _Tpvec& b, const _Tpvec& c, \
|
||||
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||
{ vec_st_interleave(a.val, b.val, c.val, ptr); } \
|
||||
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \
|
||||
const _Tpvec& c, const _Tpvec& d) \
|
||||
const _Tpvec& c, const _Tpvec& d, \
|
||||
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||
{ vec_st_interleave(a.val, b.val, c.val, d.val, ptr); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(uchar, v_uint8x16)
|
||||
@@ -334,6 +308,8 @@ OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint, v_uint32x4)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(int, v_int32x4)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(float, v_float32x4)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(double, v_float64x2)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(int64, v_int64x2)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint64, v_uint64x2)
|
||||
|
||||
/* Expand */
|
||||
#define OPENCV_HAL_IMPL_VSX_EXPAND(_Tpvec, _Tpwvec, _Tp, fl, fh) \
|
||||
@@ -342,8 +318,12 @@ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
|
||||
b0.val = fh(a.val); \
|
||||
b1.val = fl(a.val); \
|
||||
} \
|
||||
inline _Tpwvec v_expand_low(const _Tpvec& a) \
|
||||
{ return _Tpwvec(fh(a.val)); } \
|
||||
inline _Tpwvec v_expand_high(const _Tpvec& a) \
|
||||
{ return _Tpwvec(fl(a.val)); } \
|
||||
inline _Tpwvec v_load_expand(const _Tp* ptr) \
|
||||
{ return _Tpwvec(fh(vsx_ld(0, ptr))); }
|
||||
{ return _Tpwvec(fh(vec_ld_l8(ptr))); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu)
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh)
|
||||
@@ -353,10 +333,10 @@ OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpac
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh)
|
||||
|
||||
inline v_uint32x4 v_load_expand_q(const uchar* ptr)
|
||||
{ return v_uint32x4(vec_ld_buw(ptr)); }
|
||||
{ return v_uint32x4(vec_uint4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }
|
||||
|
||||
inline v_int32x4 v_load_expand_q(const schar* ptr)
|
||||
{ return v_int32x4(vec_ld_bsw(ptr)); }
|
||||
{ return v_int32x4(vec_int4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }
|
||||
|
||||
/* pack */
|
||||
#define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack) \
|
||||
@@ -406,6 +386,35 @@ OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_int32x4, unsigned int, int,
|
||||
//OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_int64x2, unsigned long long, long long,
|
||||
// vec_sra, vec_packsu, vec_add, pack_u)
|
||||
|
||||
// pack boolean
|
||||
inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
vec_uchar16 ab = vec_pack(a.val, b.val);
|
||||
return v_uint8x16(ab);
|
||||
}
|
||||
|
||||
inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
|
||||
const v_uint32x4& c, const v_uint32x4& d)
|
||||
{
|
||||
vec_ushort8 ab = vec_pack(a.val, b.val);
|
||||
vec_ushort8 cd = vec_pack(c.val, d.val);
|
||||
return v_uint8x16(vec_pack(ab, cd));
|
||||
}
|
||||
|
||||
inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
|
||||
const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
|
||||
const v_uint64x2& g, const v_uint64x2& h)
|
||||
{
|
||||
vec_uint4 ab = vec_pack(a.val, b.val);
|
||||
vec_uint4 cd = vec_pack(c.val, d.val);
|
||||
vec_uint4 ef = vec_pack(e.val, f.val);
|
||||
vec_uint4 gh = vec_pack(g.val, h.val);
|
||||
|
||||
vec_ushort8 abcd = vec_pack(ab, cd);
|
||||
vec_ushort8 efgh = vec_pack(ef, gh);
|
||||
return v_uint8x16(vec_pack(abcd, efgh));
|
||||
}
|
||||
|
||||
/* Recombine */
|
||||
template <typename _Tpvec>
|
||||
inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1)
|
||||
@@ -429,36 +438,6 @@ inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d)
|
||||
d.val = vec_mergesql(a.val, b.val);
|
||||
}
|
||||
|
||||
/* Extract */
|
||||
template<int s, typename _Tpvec>
|
||||
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
|
||||
{
|
||||
const int w = sizeof(typename _Tpvec::lane_type);
|
||||
const int n = _Tpvec::nlanes;
|
||||
const unsigned int sf = ((w * n) - (s * w));
|
||||
if (s == 0)
|
||||
return _Tpvec(a.val);
|
||||
else if (sf > 15)
|
||||
return _Tpvec();
|
||||
// bitwise it just to make xlc happy
|
||||
return _Tpvec(vec_sld(b.val, a.val, sf & 15));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_EXTRACT_2(_Tpvec) \
|
||||
template<int s> \
|
||||
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
switch(s) { \
|
||||
case 0: return _Tpvec(a.val); \
|
||||
case 2: return _Tpvec(b.val); \
|
||||
case 1: return _Tpvec(vec_sldw(b.val, a.val, 2)); \
|
||||
default: return _Tpvec(); \
|
||||
} \
|
||||
}
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_uint64x2)
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_int64x2)
|
||||
|
||||
|
||||
////////// Arithmetic, bitwise and comparison operations /////////
|
||||
|
||||
/* Element-wise binary and unary operations */
|
||||
@@ -475,10 +454,8 @@ OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int8x16, vec_adds)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int8x16, vec_subs)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint16x8, vec_adds)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint16x8, vec_subs)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint16x8, vec_mul)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int16x8, vec_adds)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int16x8, vec_subs)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_int16x8, vec_mul)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint32x4, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint32x4, vec_sub)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint32x4, vec_mul)
|
||||
@@ -498,22 +475,51 @@ OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint64x2, vec_sub)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int64x2, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int64x2, vec_sub)
|
||||
|
||||
inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, v_int32x4& c, v_int32x4& d)
|
||||
// saturating multiply
|
||||
#define OPENCV_HAL_IMPL_VSX_MUL_SAT(_Tpvec, _Tpwvec) \
|
||||
inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
_Tpwvec c, d; \
|
||||
v_mul_expand(a, b, c, d); \
|
||||
return v_pack(c, d); \
|
||||
} \
|
||||
inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \
|
||||
{ a = a * b; return a; }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_MUL_SAT(v_int8x16, v_int16x8)
|
||||
OPENCV_HAL_IMPL_VSX_MUL_SAT(v_uint8x16, v_uint16x8)
|
||||
OPENCV_HAL_IMPL_VSX_MUL_SAT(v_int16x8, v_int32x4)
|
||||
OPENCV_HAL_IMPL_VSX_MUL_SAT(v_uint16x8, v_uint32x4)
|
||||
|
||||
template<typename Tvec, typename Twvec>
|
||||
inline void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d)
|
||||
{
|
||||
c.val = vec_mul(vec_unpackh(a.val), vec_unpackh(b.val));
|
||||
d.val = vec_mul(vec_unpackl(a.val), vec_unpackl(b.val));
|
||||
}
|
||||
inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, v_uint32x4& c, v_uint32x4& d)
|
||||
{
|
||||
c.val = vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val));
|
||||
d.val = vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val));
|
||||
Twvec p0 = Twvec(vec_mule(a.val, b.val));
|
||||
Twvec p1 = Twvec(vec_mulo(a.val, b.val));
|
||||
v_zip(p0, p1, c, d);
|
||||
}
|
||||
|
||||
inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, v_uint64x2& c, v_uint64x2& d)
|
||||
{
|
||||
c.val = vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val));
|
||||
d.val = vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val));
|
||||
}
|
||||
|
||||
inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
vec_int4 p0 = vec_mule(a.val, b.val);
|
||||
vec_int4 p1 = vec_mulo(a.val, b.val);
|
||||
static const vec_uchar16 perm = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31};
|
||||
return v_int16x8(vec_perm(vec_short8_c(p0), vec_short8_c(p1), perm));
|
||||
}
|
||||
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
vec_uint4 p0 = vec_mule(a.val, b.val);
|
||||
vec_uint4 p1 = vec_mulo(a.val, b.val);
|
||||
static const vec_uchar16 perm = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31};
|
||||
return v_uint16x8(vec_perm(vec_ushort8_c(p0), vec_ushort8_c(p1), perm));
|
||||
}
|
||||
|
||||
/** Non-saturating arithmetics **/
|
||||
#define OPENCV_HAL_IMPL_VSX_BIN_FUNC(func, intrin) \
|
||||
template<typename _Tpvec> \
|
||||
@@ -522,6 +528,7 @@ inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_add_wrap, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_sub_wrap, vec_sub)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_mul_wrap, vec_mul)
|
||||
|
||||
/** Bitwise shifts **/
|
||||
#define OPENCV_HAL_IMPL_VSX_SHIFT_OP(_Tpvec, shr, splfunc) \
|
||||
@@ -603,6 +610,11 @@ OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float64x2)
|
||||
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint64x2)
|
||||
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int64x2)
|
||||
|
||||
inline v_float32x4 v_not_nan(const v_float32x4& a)
|
||||
{ return v_float32x4(vec_cmpeq(a.val, a.val)); }
|
||||
inline v_float64x2 v_not_nan(const v_float64x2& a)
|
||||
{ return v_float64x2(vec_cmpeq(a.val, a.val)); }
|
||||
|
||||
/** min/max **/
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_min, vec_min)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_max, vec_max)
|
||||
@@ -628,9 +640,10 @@ OPENCV_IMPL_VSX_ROTATE_LR(v_uint16x8, vec_ushort8)
|
||||
OPENCV_IMPL_VSX_ROTATE_LR(v_int16x8, vec_short8)
|
||||
OPENCV_IMPL_VSX_ROTATE_LR(v_uint32x4, vec_uint4)
|
||||
OPENCV_IMPL_VSX_ROTATE_LR(v_int32x4, vec_int4)
|
||||
OPENCV_IMPL_VSX_ROTATE_LR(v_float32x4, vec_float4)
|
||||
OPENCV_IMPL_VSX_ROTATE_LR(v_uint64x2, vec_udword2)
|
||||
OPENCV_IMPL_VSX_ROTATE_LR(v_int64x2, vec_dword2)
|
||||
|
||||
OPENCV_IMPL_VSX_ROTATE_LR(v_float64x2, vec_double2)
|
||||
|
||||
template<int imm, typename _Tpvec>
|
||||
inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b)
|
||||
@@ -654,7 +667,7 @@ inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b)
|
||||
return _Tpvec(vec_sld(a.val, b.val, CV_SHIFT));
|
||||
}
|
||||
|
||||
#define OPENCV_IMPL_VSX_ROTATE_64(_Tpvec, suffix, rg1, rg2) \
|
||||
#define OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, suffix, rg1, rg2) \
|
||||
template<int imm> \
|
||||
inline _Tpvec v_rotate_##suffix(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
@@ -663,11 +676,18 @@ inline _Tpvec v_rotate_##suffix(const _Tpvec& a, const _Tpvec& b) \
|
||||
return imm ? b : a; \
|
||||
}
|
||||
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_int64x2, right, a, b)
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, right, a, b)
|
||||
#define OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(_Tpvec) \
|
||||
OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, left, b, a) \
|
||||
OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, right, a, b)
|
||||
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_int64x2, left, b, a)
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, left, b, a)
|
||||
OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_float64x2)
|
||||
OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_uint64x2)
|
||||
OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_int64x2)
|
||||
|
||||
/* Extract */
|
||||
template<int s, typename _Tpvec>
|
||||
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
|
||||
{ return v_rotate_right<s>(a, b); }
|
||||
|
||||
////////// Reduce and mask /////////
|
||||
|
||||
@@ -699,6 +719,11 @@ OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, sum, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, max, vec_max)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, min, vec_min)
|
||||
|
||||
inline double v_reduce_sum(const v_float64x2& a)
|
||||
{
|
||||
return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0);
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(_Tpvec, _Tpvec2, scalartype, suffix, func) \
|
||||
inline scalartype v_reduce_##suffix(const _Tpvec& a) \
|
||||
{ \
|
||||
@@ -722,6 +747,50 @@ inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
|
||||
return v_float32x4(vec_mergeh(ac, bd));
|
||||
}
|
||||
|
||||
inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b)
|
||||
{
|
||||
const vec_uint4 zero4 = vec_uint4_z;
|
||||
vec_uint4 sum4 = vec_sum4s(vec_absd(a.val, b.val), zero4);
|
||||
return (unsigned)vec_extract(vec_sums(vec_int4_c(sum4), vec_int4_c(zero4)), 3);
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
|
||||
{
|
||||
const vec_int4 zero4 = vec_int4_z;
|
||||
vec_char16 ad = vec_abss(vec_subs(a.val, b.val));
|
||||
vec_int4 sum4 = vec_sum4s(ad, zero4);
|
||||
return (unsigned)vec_extract(vec_sums(sum4, zero4), 3);
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
vec_ushort8 ad = vec_absd(a.val, b.val);
|
||||
VSX_UNUSED(vec_int4) sum = vec_sums(vec_int4_c(vec_unpackhu(ad)), vec_int4_c(vec_unpacklu(ad)));
|
||||
return (unsigned)vec_extract(sum, 3);
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
const vec_int4 zero4 = vec_int4_z;
|
||||
vec_short8 ad = vec_abss(vec_subs(a.val, b.val));
|
||||
vec_int4 sum4 = vec_sum4s(ad, zero4);
|
||||
return (unsigned)vec_extract(vec_sums(sum4, zero4), 3);
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b)
|
||||
{
|
||||
const vec_uint4 ad = vec_absd(a.val, b.val);
|
||||
const vec_uint4 rd = vec_add(ad, vec_sld(ad, ad, 8));
|
||||
return vec_extract(vec_add(rd, vec_sld(rd, rd, 4)), 0);
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b)
|
||||
{
|
||||
vec_int4 ad = vec_abss(vec_sub(a.val, b.val));
|
||||
return (unsigned)vec_extract(vec_sums(ad, vec_int4_z), 3);
|
||||
}
|
||||
inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b)
|
||||
{
|
||||
const vec_float4 ad = vec_abs(vec_sub(a.val, b.val));
|
||||
const vec_float4 rd = vec_add(ad, vec_sld(ad, ad, 8));
|
||||
return vec_extract(vec_add(rd, vec_sld(rd, rd, 4)), 0);
|
||||
}
|
||||
|
||||
/** Popcount **/
|
||||
template<typename _Tpvec>
|
||||
inline v_uint32x4 v_popcount(const _Tpvec& a)
|
||||
@@ -776,26 +845,33 @@ inline int v_signmask(const v_uint64x2& a)
|
||||
inline int v_signmask(const v_float64x2& a)
|
||||
{ return v_signmask(v_reinterpret_as_s64(a)); }
|
||||
|
||||
|
||||
template<typename _Tpvec>
|
||||
inline bool v_check_all(const _Tpvec& a)
|
||||
{ return vec_all_lt(a.val, _Tpvec().val);}
|
||||
inline bool v_check_all(const v_uint8x16 &a)
|
||||
{ return vec_all_lt(a.val, _Tpvec().val); }
|
||||
inline bool v_check_all(const v_uint8x16& a)
|
||||
{ return v_check_all(v_reinterpret_as_s8(a)); }
|
||||
inline bool v_check_all(const v_uint16x8 &a)
|
||||
inline bool v_check_all(const v_uint16x8& a)
|
||||
{ return v_check_all(v_reinterpret_as_s16(a)); }
|
||||
inline bool v_check_all(const v_uint32x4 &a)
|
||||
inline bool v_check_all(const v_uint32x4& a)
|
||||
{ return v_check_all(v_reinterpret_as_s32(a)); }
|
||||
inline bool v_check_all(const v_float32x4& a)
|
||||
{ return v_check_all(v_reinterpret_as_s32(a)); }
|
||||
inline bool v_check_all(const v_float64x2& a)
|
||||
{ return v_check_all(v_reinterpret_as_s64(a)); }
|
||||
|
||||
template<typename _Tpvec>
|
||||
inline bool v_check_any(const _Tpvec& a)
|
||||
{ return vec_any_lt(a.val, _Tpvec().val);}
|
||||
inline bool v_check_any(const v_uint8x16 &a)
|
||||
{ return vec_any_lt(a.val, _Tpvec().val); }
|
||||
inline bool v_check_any(const v_uint8x16& a)
|
||||
{ return v_check_any(v_reinterpret_as_s8(a)); }
|
||||
inline bool v_check_any(const v_uint16x8 &a)
|
||||
inline bool v_check_any(const v_uint16x8& a)
|
||||
{ return v_check_any(v_reinterpret_as_s16(a)); }
|
||||
inline bool v_check_any(const v_uint32x4 &a)
|
||||
inline bool v_check_any(const v_uint32x4& a)
|
||||
{ return v_check_any(v_reinterpret_as_s32(a)); }
|
||||
inline bool v_check_any(const v_float32x4& a)
|
||||
{ return v_check_any(v_reinterpret_as_s32(a)); }
|
||||
inline bool v_check_any(const v_float64x2& a)
|
||||
{ return v_check_any(v_reinterpret_as_s64(a)); }
|
||||
|
||||
////////// Other math /////////
|
||||
|
||||
@@ -815,12 +891,17 @@ inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vec_sqrt(vec_madd(a.val, a.val, vec_mul(b.val, b.val)))); } \
|
||||
inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vec_madd(a.val, a.val, vec_mul(b.val, b.val))); } \
|
||||
inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
|
||||
{ return _Tpvec(vec_madd(a.val, b.val, c.val)); } \
|
||||
inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
|
||||
{ return _Tpvec(vec_madd(a.val, b.val, c.val)); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_MULADD(v_float32x4)
|
||||
OPENCV_HAL_IMPL_VSX_MULADD(v_float64x2)
|
||||
|
||||
inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
|
||||
{ return a * b + c; }
|
||||
|
||||
// TODO: exp, log, sin, cos
|
||||
|
||||
/** Absolute values **/
|
||||
@@ -839,25 +920,39 @@ inline v_float32x4 v_abs(const v_float32x4& x)
|
||||
inline v_float64x2 v_abs(const v_float64x2& x)
|
||||
{ return v_float64x2(vec_abs(x.val)); }
|
||||
|
||||
/** Absolute difference **/
|
||||
// unsigned
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_absdiff, vec_absd)
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \
|
||||
inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec2(cast(intrin(a.val, b.val))); }
|
||||
inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b)
|
||||
{ return v_reinterpret_as_u8(v_sub_wrap(v_max(a, b), v_min(a, b))); }
|
||||
inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b)
|
||||
{ return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); }
|
||||
inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b)
|
||||
{ return v_reinterpret_as_u32(v_max(a, b) - v_min(a, b)); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int8x16, v_uint8x16, vec_uchar16_c, v_absdiff, vec_absd)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int16x8, v_uint16x8, vec_ushort8_c, v_absdiff, vec_absd)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int32x4, v_uint32x4, vec_uint4_c, v_absdiff, vec_absd)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int64x2, v_uint64x2, vec_udword2_c, v_absdiff, vec_absd)
|
||||
inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b)
|
||||
{ return v_abs(a - b); }
|
||||
inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b)
|
||||
{ return v_abs(a - b); }
|
||||
|
||||
/** Absolute difference for signed integers **/
|
||||
inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b)
|
||||
{ return v_int8x16(vec_abss(vec_subs(a.val, b.val))); }
|
||||
inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b)
|
||||
{ return v_int16x8(vec_abss(vec_subs(a.val, b.val))); }
|
||||
|
||||
////////// Conversions /////////
|
||||
|
||||
/** Rounding **/
|
||||
inline v_int32x4 v_round(const v_float32x4& a)
|
||||
{ return v_int32x4(vec_cts(vec_round(a.val))); }
|
||||
{ return v_int32x4(vec_cts(vec_rint(a.val))); }
|
||||
|
||||
inline v_int32x4 v_round(const v_float64x2& a)
|
||||
{ return v_int32x4(vec_mergesqo(vec_ctso(vec_round(a.val)), vec_int4_z)); }
|
||||
{ return v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_int4_z)); }
|
||||
|
||||
inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
|
||||
{ return v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_ctso(vec_rint(b.val)))); }
|
||||
|
||||
inline v_int32x4 v_floor(const v_float32x4& a)
|
||||
{ return v_int32x4(vec_cts(vec_floor(a.val))); }
|
||||
@@ -884,6 +979,9 @@ inline v_float32x4 v_cvt_f32(const v_int32x4& a)
|
||||
inline v_float32x4 v_cvt_f32(const v_float64x2& a)
|
||||
{ return v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_float4_z)); }
|
||||
|
||||
inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b)
|
||||
{ return v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_cvfo(b.val))); }
|
||||
|
||||
inline v_float64x2 v_cvt_f64(const v_int32x4& a)
|
||||
{ return v_float64x2(vec_ctdo(vec_mergeh(a.val, a.val))); }
|
||||
|
||||
@@ -896,6 +994,271 @@ inline v_float64x2 v_cvt_f64(const v_float32x4& a)
|
||||
inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
|
||||
{ return v_float64x2(vec_cvfo(vec_mergel(a.val, a.val))); }
|
||||
|
||||
////////////// Lookup table access ////////////////////
|
||||
|
||||
inline v_int8x16 v_lut(const schar* tab, const int* idx)
|
||||
{
|
||||
return v_int8x16(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]],
|
||||
tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]]);
|
||||
}
|
||||
inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx)
|
||||
{
|
||||
return v_reinterpret_as_s8(v_int16x8(*(const short*)(tab+idx[0]), *(const short*)(tab+idx[1]), *(const short*)(tab+idx[2]), *(const short*)(tab+idx[3]),
|
||||
*(const short*)(tab+idx[4]), *(const short*)(tab+idx[5]), *(const short*)(tab+idx[6]), *(const short*)(tab+idx[7])));
|
||||
}
|
||||
inline v_int8x16 v_lut_quads(const schar* tab, const int* idx)
|
||||
{
|
||||
return v_reinterpret_as_s8(v_int32x4(*(const int*)(tab+idx[0]), *(const int*)(tab+idx[1]), *(const int*)(tab+idx[2]), *(const int*)(tab+idx[3])));
|
||||
}
|
||||
inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((const schar*)tab, idx)); }
|
||||
inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((const schar*)tab, idx)); }
|
||||
inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((const schar*)tab, idx)); }
|
||||
|
||||
inline v_int16x8 v_lut(const short* tab, const int* idx)
|
||||
{
|
||||
return v_int16x8(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]]);
|
||||
}
|
||||
inline v_int16x8 v_lut_pairs(const short* tab, const int* idx)
|
||||
{
|
||||
return v_reinterpret_as_s16(v_int32x4(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1]), *(const int*)(tab + idx[2]), *(const int*)(tab + idx[3])));
|
||||
}
|
||||
inline v_int16x8 v_lut_quads(const short* tab, const int* idx)
|
||||
{
|
||||
return v_reinterpret_as_s16(v_int64x2(*(const int64*)(tab + idx[0]), *(const int64*)(tab + idx[1])));
|
||||
}
|
||||
inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((const short*)tab, idx)); }
|
||||
inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((const short*)tab, idx)); }
|
||||
inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((const short*)tab, idx)); }
|
||||
|
||||
inline v_int32x4 v_lut(const int* tab, const int* idx)
|
||||
{
|
||||
return v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
|
||||
}
|
||||
inline v_int32x4 v_lut_pairs(const int* tab, const int* idx)
|
||||
{
|
||||
return v_reinterpret_as_s32(v_int64x2(*(const int64*)(tab + idx[0]), *(const int64*)(tab + idx[1])));
|
||||
}
|
||||
inline v_int32x4 v_lut_quads(const int* tab, const int* idx)
|
||||
{
|
||||
return v_int32x4(vsx_ld(0, tab + idx[0]));
|
||||
}
|
||||
inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((const int*)tab, idx)); }
|
||||
inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((const int*)tab, idx)); }
|
||||
inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((const int*)tab, idx)); }
|
||||
|
||||
inline v_int64x2 v_lut(const int64_t* tab, const int* idx)
|
||||
{
|
||||
return v_int64x2(tab[idx[0]], tab[idx[1]]);
|
||||
}
|
||||
inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx)
|
||||
{
|
||||
return v_int64x2(vsx_ld2(0, tab + idx[0]));
|
||||
}
|
||||
inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); }
|
||||
inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); }
|
||||
|
||||
inline v_float32x4 v_lut(const float* tab, const int* idx)
|
||||
{
|
||||
return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
|
||||
}
|
||||
inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_pairs((const int*)tab, idx)); }
|
||||
inline v_float32x4 v_lut_quads(const float* tab, const int* idx) { return v_load(tab + *idx); }
|
||||
|
||||
inline v_float64x2 v_lut(const double* tab, const int* idx)
|
||||
{
|
||||
return v_float64x2(tab[idx[0]], tab[idx[1]]);
|
||||
}
|
||||
inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) { return v_load(tab + *idx); }
|
||||
|
||||
inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec)
|
||||
{
|
||||
const int idx[4] = {
|
||||
vec_extract(idxvec.val, 0),
|
||||
vec_extract(idxvec.val, 1),
|
||||
vec_extract(idxvec.val, 2),
|
||||
vec_extract(idxvec.val, 3)
|
||||
};
|
||||
return v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
|
||||
}
|
||||
|
||||
inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec)
|
||||
{
|
||||
const int idx[4] = {
|
||||
vec_extract(idxvec.val, 0),
|
||||
vec_extract(idxvec.val, 1),
|
||||
vec_extract(idxvec.val, 2),
|
||||
vec_extract(idxvec.val, 3)
|
||||
};
|
||||
return v_uint32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
|
||||
}
|
||||
|
||||
inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec)
|
||||
{
|
||||
const int idx[4] = {
|
||||
vec_extract(idxvec.val, 0),
|
||||
vec_extract(idxvec.val, 1),
|
||||
vec_extract(idxvec.val, 2),
|
||||
vec_extract(idxvec.val, 3)
|
||||
};
|
||||
return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
|
||||
}
|
||||
|
||||
inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec)
|
||||
{
|
||||
const int idx[2] = {
|
||||
vec_extract(idxvec.val, 0),
|
||||
vec_extract(idxvec.val, 1)
|
||||
};
|
||||
return v_float64x2(tab[idx[0]], tab[idx[1]]);
|
||||
}
|
||||
|
||||
inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y)
|
||||
{
|
||||
vec_float4 xy0 = vec_ld_l8(tab + vec_extract(idxvec.val, 0));
|
||||
vec_float4 xy1 = vec_ld_l8(tab + vec_extract(idxvec.val, 1));
|
||||
vec_float4 xy2 = vec_ld_l8(tab + vec_extract(idxvec.val, 2));
|
||||
vec_float4 xy3 = vec_ld_l8(tab + vec_extract(idxvec.val, 3));
|
||||
vec_float4 xy02 = vec_mergeh(xy0, xy2); // x0, x2, y0, y2
|
||||
vec_float4 xy13 = vec_mergeh(xy1, xy3); // x1, x3, y1, y3
|
||||
x.val = vec_mergeh(xy02, xy13);
|
||||
y.val = vec_mergel(xy02, xy13);
|
||||
}
|
||||
inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y)
|
||||
{
|
||||
vec_double2 xy0 = vsx_ld(vec_extract(idxvec.val, 0), tab);
|
||||
vec_double2 xy1 = vsx_ld(vec_extract(idxvec.val, 1), tab);
|
||||
x.val = vec_mergeh(xy0, xy1);
|
||||
y.val = vec_mergel(xy0, xy1);
|
||||
}
|
||||
|
||||
inline v_int8x16 v_interleave_pairs(const v_int8x16& vec)
|
||||
{
|
||||
static const vec_uchar16 perm = {0, 2, 1, 3, 4, 6, 5, 7, 8, 10, 9, 11, 12, 14, 13, 15};
|
||||
return v_int8x16(vec_perm(vec.val, vec.val, perm));
|
||||
}
|
||||
inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec)
|
||||
{ return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); }
|
||||
|
||||
inline v_int8x16 v_interleave_quads(const v_int8x16& vec)
|
||||
{
|
||||
static const vec_uchar16 perm = {0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15};
|
||||
return v_int8x16(vec_perm(vec.val, vec.val, perm));
|
||||
}
|
||||
inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec)
|
||||
{ return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); }
|
||||
|
||||
inline v_int16x8 v_interleave_pairs(const v_int16x8& vec)
|
||||
{
|
||||
static const vec_uchar16 perm = {0,1, 4,5, 2,3, 6,7, 8,9, 12,13, 10,11, 14,15};
|
||||
return v_int16x8(vec_perm(vec.val, vec.val, perm));
|
||||
}
|
||||
inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec)
|
||||
{ return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); }
|
||||
|
||||
inline v_int16x8 v_interleave_quads(const v_int16x8& vec)
|
||||
{
|
||||
static const vec_uchar16 perm = {0,1, 8,9, 2,3, 10,11, 4,5, 12,13, 6,7, 14,15};
|
||||
return v_int16x8(vec_perm(vec.val, vec.val, perm));
|
||||
}
|
||||
inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec)
|
||||
{ return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); }
|
||||
|
||||
inline v_int32x4 v_interleave_pairs(const v_int32x4& vec)
|
||||
{
|
||||
static const vec_uchar16 perm = {0,1,2,3, 8,9,10,11, 4,5,6,7, 12,13,14,15};
|
||||
return v_int32x4(vec_perm(vec.val, vec.val, perm));
|
||||
}
|
||||
inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec)
|
||||
{ return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
|
||||
inline v_float32x4 v_interleave_pairs(const v_float32x4& vec)
|
||||
{ return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
|
||||
|
||||
inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
|
||||
{
|
||||
static const vec_uchar16 perm = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 15, 15, 15};
|
||||
return v_int8x16(vec_perm(vec.val, vec.val, perm));
|
||||
}
|
||||
inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec)
|
||||
{ return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); }
|
||||
|
||||
inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
|
||||
{
|
||||
static const vec_uchar16 perm = {0,1, 2,3, 4,5, 8,9, 10,11, 12,13, 14,15, 14,15};
|
||||
return v_int16x8(vec_perm(vec.val, vec.val, perm));
|
||||
}
|
||||
inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec)
|
||||
{ return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); }
|
||||
|
||||
inline v_int32x4 v_pack_triplets(const v_int32x4& vec)
|
||||
{ return vec; }
|
||||
inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec)
|
||||
{ return vec; }
|
||||
inline v_float32x4 v_pack_triplets(const v_float32x4& vec)
|
||||
{ return vec; }
|
||||
|
||||
/////// FP16 support ////////
|
||||
|
||||
inline v_float32x4 v_load_expand(const float16_t* ptr)
|
||||
{
|
||||
vec_ushort8 vf16 = vec_ld_l8((const ushort*)ptr);
|
||||
#if CV_VSX3 && defined(vec_extract_fp_from_shorth)
|
||||
return v_float32x4(vec_extract_fp_from_shorth(vf16));
|
||||
#elif CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
|
||||
vec_float4 vf32;
|
||||
__asm__ __volatile__ ("xvcvhpsp %x0,%x1" : "=wf" (vf32) : "wa" (vec_mergeh(vf16, vf16)));
|
||||
return v_float32x4(vf32);
|
||||
#else
|
||||
const vec_int4 z = vec_int4_z, delta = vec_int4_sp(0x38000000);
|
||||
const vec_int4 signmask = vec_int4_sp(0x80000000);
|
||||
const vec_int4 maxexp = vec_int4_sp(0x7c000000);
|
||||
const vec_float4 deltaf = vec_float4_c(vec_int4_sp(0x38800000));
|
||||
|
||||
vec_int4 bits = vec_int4_c(vec_mergeh(vec_short8_c(z), vec_short8_c(vf16)));
|
||||
vec_int4 e = vec_and(bits, maxexp), sign = vec_and(bits, signmask);
|
||||
vec_int4 t = vec_add(vec_sr(vec_xor(bits, sign), vec_uint4_sp(3)), delta); // ((h & 0x7fff) << 13) + delta
|
||||
vec_int4 zt = vec_int4_c(vec_sub(vec_float4_c(vec_add(t, vec_int4_sp(1 << 23))), deltaf));
|
||||
|
||||
t = vec_add(t, vec_and(delta, vec_cmpeq(maxexp, e)));
|
||||
vec_bint4 zmask = vec_cmpeq(e, z);
|
||||
vec_int4 ft = vec_sel(t, zt, zmask);
|
||||
return v_float32x4(vec_float4_c(vec_or(ft, sign)));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
|
||||
{
|
||||
// fixme: Is there any buitin op or intrinsic that cover "xvcvsphp"?
|
||||
#if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
|
||||
vec_ushort8 vf16;
|
||||
__asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wf" (v.val));
|
||||
vec_st_l8(vec_mergesqe(vf16, vf16), ptr);
|
||||
#else
|
||||
const vec_int4 signmask = vec_int4_sp(0x80000000);
|
||||
const vec_int4 rval = vec_int4_sp(0x3f000000);
|
||||
|
||||
vec_int4 t = vec_int4_c(v.val);
|
||||
vec_int4 sign = vec_sra(vec_and(t, signmask), vec_uint4_sp(16));
|
||||
t = vec_and(vec_nor(signmask, signmask), t);
|
||||
|
||||
vec_bint4 finitemask = vec_cmpgt(vec_int4_sp(0x47800000), t);
|
||||
vec_bint4 isnan = vec_cmpgt(t, vec_int4_sp(0x7f800000));
|
||||
vec_int4 naninf = vec_sel(vec_int4_sp(0x7c00), vec_int4_sp(0x7e00), isnan);
|
||||
vec_bint4 tinymask = vec_cmpgt(vec_int4_sp(0x38800000), t);
|
||||
vec_int4 tt = vec_int4_c(vec_add(vec_float4_c(t), vec_float4_c(rval)));
|
||||
tt = vec_sub(tt, rval);
|
||||
vec_int4 odd = vec_and(vec_sr(t, vec_uint4_sp(13)), vec_int4_sp(1));
|
||||
vec_int4 nt = vec_add(t, vec_int4_sp(0xc8000fff));
|
||||
nt = vec_sr(vec_add(nt, odd), vec_uint4_sp(13));
|
||||
t = vec_sel(nt, tt, tinymask);
|
||||
t = vec_sel(naninf, t, finitemask);
|
||||
t = vec_or(t, sign);
|
||||
vec_st_l8(vec_packs(t, t), ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void v_cleanup() {}
|
||||
|
||||
|
||||
/** Reinterpret **/
|
||||
/** its up there with load and store operations **/
|
||||
|
||||
@@ -904,6 +1267,9 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)); }
|
||||
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
||||
{ return v_int32x4(vec_msum(a.val, b.val, c.val)); }
|
||||
|
||||
inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
|
||||
const v_float32x4& m1, const v_float32x4& m2,
|
||||
const v_float32x4& m3)
|
||||
@@ -53,9 +53,7 @@
|
||||
|
||||
#include "opencv2/core/bufferpool.hpp"
|
||||
|
||||
#ifdef CV_CXX11
|
||||
#include <type_traits>
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@@ -63,8 +61,10 @@ namespace cv
|
||||
//! @addtogroup core_basic
|
||||
//! @{
|
||||
|
||||
enum { ACCESS_READ=1<<24, ACCESS_WRITE=1<<25,
|
||||
enum AccessFlag { ACCESS_READ=1<<24, ACCESS_WRITE=1<<25,
|
||||
ACCESS_RW=3<<24, ACCESS_MASK=ACCESS_RW, ACCESS_FAST=1<<26 };
|
||||
CV_ENUM_FLAGS(AccessFlag)
|
||||
__CV_ENUM_FLAGS_BITWISE_AND(AccessFlag, int, AccessFlag)
|
||||
|
||||
CV__DEBUG_NS_BEGIN
|
||||
|
||||
@@ -148,11 +148,17 @@ synonym is needed to generate Python/Java etc. wrappers properly. At the functio
|
||||
level their use is similar, but _InputArray::getMat(idx) should be used to get header for the
|
||||
idx-th component of the outer vector and _InputArray::size().area() should be used to find the
|
||||
number of components (vectors/matrices) of the outer vector.
|
||||
|
||||
In general, type support is limited to cv::Mat types. Other types are forbidden.
|
||||
But in some cases we need to support passing of custom non-general Mat types, like arrays of cv::KeyPoint, cv::DMatch, etc.
|
||||
This data is not intented to be interpreted as an image data, or processed somehow like regular cv::Mat.
|
||||
To pass such custom type use rawIn() / rawOut() / rawInOut() wrappers.
|
||||
Custom type is wrapped as Mat-compatible `CV_8UC<N>` values (N = sizeof(T), N <= CV_CN_MAX).
|
||||
*/
|
||||
class CV_EXPORTS _InputArray
|
||||
{
|
||||
public:
|
||||
enum {
|
||||
enum KindFlag {
|
||||
KIND_SHIFT = 16,
|
||||
FIXED_TYPE = 0x8000 << KIND_SHIFT,
|
||||
FIXED_SIZE = 0x4000 << KIND_SHIFT,
|
||||
@@ -185,7 +191,7 @@ public:
|
||||
template<typename _Tp> _InputArray(const std::vector<_Tp>& vec);
|
||||
_InputArray(const std::vector<bool>& vec);
|
||||
template<typename _Tp> _InputArray(const std::vector<std::vector<_Tp> >& vec);
|
||||
_InputArray(const std::vector<std::vector<bool> >&);
|
||||
_InputArray(const std::vector<std::vector<bool> >&) = delete; // not supported
|
||||
template<typename _Tp> _InputArray(const std::vector<Mat_<_Tp> >& vec);
|
||||
template<typename _Tp> _InputArray(const _Tp* vec, int n);
|
||||
template<typename _Tp, int m, int n> _InputArray(const Matx<_Tp, m, n>& matx);
|
||||
@@ -198,10 +204,11 @@ public:
|
||||
_InputArray(const UMat& um);
|
||||
_InputArray(const std::vector<UMat>& umv);
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> _InputArray(const std::array<_Tp, _Nm>& arr);
|
||||
template<std::size_t _Nm> _InputArray(const std::array<Mat, _Nm>& arr);
|
||||
#endif
|
||||
|
||||
template<typename _Tp> static _InputArray rawIn(const std::vector<_Tp>& vec);
|
||||
template<typename _Tp, std::size_t _Nm> static _InputArray rawIn(const std::array<_Tp, _Nm>& arr);
|
||||
|
||||
Mat getMat(int idx=-1) const;
|
||||
Mat getMat_(int idx=-1) const;
|
||||
@@ -216,7 +223,7 @@ public:
|
||||
void* getObj() const;
|
||||
Size getSz() const;
|
||||
|
||||
int kind() const;
|
||||
_InputArray::KindFlag kind() const;
|
||||
int dims(int i=-1) const;
|
||||
int cols(int i=-1) const;
|
||||
int rows(int i=-1) const;
|
||||
@@ -240,6 +247,7 @@ public:
|
||||
bool isUMatVector() const;
|
||||
bool isMatx() const;
|
||||
bool isVector() const;
|
||||
bool isGpuMat() const;
|
||||
bool isGpuMatVector() const;
|
||||
~_InputArray();
|
||||
|
||||
@@ -251,7 +259,8 @@ protected:
|
||||
void init(int _flags, const void* _obj);
|
||||
void init(int _flags, const void* _obj, Size _sz);
|
||||
};
|
||||
|
||||
CV_ENUM_FLAGS(_InputArray::KindFlag)
|
||||
__CV_ENUM_FLAGS_BITWISE_AND(_InputArray::KindFlag, int, _InputArray::KindFlag)
|
||||
|
||||
/** @brief This type is very similar to InputArray except that it is used for input/output and output function
|
||||
parameters.
|
||||
@@ -281,7 +290,7 @@ generators:
|
||||
class CV_EXPORTS _OutputArray : public _InputArray
|
||||
{
|
||||
public:
|
||||
enum
|
||||
enum DepthMask
|
||||
{
|
||||
DEPTH_MASK_8U = 1 << CV_8U,
|
||||
DEPTH_MASK_8S = 1 << CV_8S,
|
||||
@@ -290,8 +299,10 @@ public:
|
||||
DEPTH_MASK_32S = 1 << CV_32S,
|
||||
DEPTH_MASK_32F = 1 << CV_32F,
|
||||
DEPTH_MASK_64F = 1 << CV_64F,
|
||||
DEPTH_MASK_16F = 1 << CV_16F,
|
||||
DEPTH_MASK_ALL = (DEPTH_MASK_64F<<1)-1,
|
||||
DEPTH_MASK_ALL_BUT_8S = DEPTH_MASK_ALL & ~DEPTH_MASK_8S,
|
||||
DEPTH_MASK_ALL_16F = (DEPTH_MASK_16F<<1)-1,
|
||||
DEPTH_MASK_FLT = DEPTH_MASK_32F + DEPTH_MASK_64F
|
||||
};
|
||||
|
||||
@@ -305,9 +316,9 @@ public:
|
||||
_OutputArray(cuda::HostMem& cuda_mem);
|
||||
template<typename _Tp> _OutputArray(cudev::GpuMat_<_Tp>& m);
|
||||
template<typename _Tp> _OutputArray(std::vector<_Tp>& vec);
|
||||
_OutputArray(std::vector<bool>& vec);
|
||||
_OutputArray(std::vector<bool>& vec) = delete; // not supported
|
||||
template<typename _Tp> _OutputArray(std::vector<std::vector<_Tp> >& vec);
|
||||
_OutputArray(std::vector<std::vector<bool> >&);
|
||||
_OutputArray(std::vector<std::vector<bool> >&) = delete; // not supported
|
||||
template<typename _Tp> _OutputArray(std::vector<Mat_<_Tp> >& vec);
|
||||
template<typename _Tp> _OutputArray(Mat_<_Tp>& m);
|
||||
template<typename _Tp> _OutputArray(_Tp* vec, int n);
|
||||
@@ -331,12 +342,13 @@ public:
|
||||
_OutputArray(const UMat& m);
|
||||
_OutputArray(const std::vector<UMat>& vec);
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> _OutputArray(std::array<_Tp, _Nm>& arr);
|
||||
template<typename _Tp, std::size_t _Nm> _OutputArray(const std::array<_Tp, _Nm>& arr);
|
||||
template<std::size_t _Nm> _OutputArray(std::array<Mat, _Nm>& arr);
|
||||
template<std::size_t _Nm> _OutputArray(const std::array<Mat, _Nm>& arr);
|
||||
#endif
|
||||
|
||||
template<typename _Tp> static _OutputArray rawOut(std::vector<_Tp>& vec);
|
||||
template<typename _Tp, std::size_t _Nm> static _OutputArray rawOut(std::array<_Tp, _Nm>& arr);
|
||||
|
||||
bool fixedSize() const;
|
||||
bool fixedType() const;
|
||||
@@ -347,9 +359,9 @@ public:
|
||||
std::vector<cuda::GpuMat>& getGpuMatVecRef() const;
|
||||
ogl::Buffer& getOGlBufferRef() const;
|
||||
cuda::HostMem& getHostMemRef() const;
|
||||
void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
|
||||
void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
|
||||
void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
|
||||
void create(Size sz, int type, int i=-1, bool allowTransposed=false, _OutputArray::DepthMask fixedDepthMask=static_cast<_OutputArray::DepthMask>(0)) const;
|
||||
void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, _OutputArray::DepthMask fixedDepthMask=static_cast<_OutputArray::DepthMask>(0)) const;
|
||||
void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, _OutputArray::DepthMask fixedDepthMask=static_cast<_OutputArray::DepthMask>(0)) const;
|
||||
void createSameSize(const _InputArray& arr, int mtype) const;
|
||||
void release() const;
|
||||
void clear() const;
|
||||
@@ -375,7 +387,7 @@ public:
|
||||
_InputOutputArray(cuda::HostMem& cuda_mem);
|
||||
template<typename _Tp> _InputOutputArray(cudev::GpuMat_<_Tp>& m);
|
||||
template<typename _Tp> _InputOutputArray(std::vector<_Tp>& vec);
|
||||
_InputOutputArray(std::vector<bool>& vec);
|
||||
_InputOutputArray(std::vector<bool>& vec) = delete; // not supported
|
||||
template<typename _Tp> _InputOutputArray(std::vector<std::vector<_Tp> >& vec);
|
||||
template<typename _Tp> _InputOutputArray(std::vector<Mat_<_Tp> >& vec);
|
||||
template<typename _Tp> _InputOutputArray(Mat_<_Tp>& m);
|
||||
@@ -400,15 +412,23 @@ public:
|
||||
_InputOutputArray(const UMat& m);
|
||||
_InputOutputArray(const std::vector<UMat>& vec);
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> _InputOutputArray(std::array<_Tp, _Nm>& arr);
|
||||
template<typename _Tp, std::size_t _Nm> _InputOutputArray(const std::array<_Tp, _Nm>& arr);
|
||||
template<std::size_t _Nm> _InputOutputArray(std::array<Mat, _Nm>& arr);
|
||||
template<std::size_t _Nm> _InputOutputArray(const std::array<Mat, _Nm>& arr);
|
||||
#endif
|
||||
|
||||
template<typename _Tp> static _InputOutputArray rawInOut(std::vector<_Tp>& vec);
|
||||
template<typename _Tp, std::size_t _Nm> _InputOutputArray rawInOut(std::array<_Tp, _Nm>& arr);
|
||||
|
||||
};
|
||||
|
||||
/** Helper to wrap custom types. @see InputArray */
|
||||
template<typename _Tp> static inline _InputArray rawIn(_Tp& v);
|
||||
/** Helper to wrap custom types. @see InputArray */
|
||||
template<typename _Tp> static inline _OutputArray rawOut(_Tp& v);
|
||||
/** Helper to wrap custom types. @see InputArray */
|
||||
template<typename _Tp> static inline _InputOutputArray rawInOut(_Tp& v);
|
||||
|
||||
CV__DEBUG_NS_END
|
||||
|
||||
typedef const _InputArray& InputArray;
|
||||
@@ -450,10 +470,10 @@ public:
|
||||
// uchar*& datastart, uchar*& data, size_t* step) = 0;
|
||||
//virtual void deallocate(int* refcount, uchar* datastart, uchar* data) = 0;
|
||||
virtual UMatData* allocate(int dims, const int* sizes, int type,
|
||||
void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const = 0;
|
||||
virtual bool allocate(UMatData* data, int accessflags, UMatUsageFlags usageFlags) const = 0;
|
||||
void* data, size_t* step, AccessFlag flags, UMatUsageFlags usageFlags) const = 0;
|
||||
virtual bool allocate(UMatData* data, AccessFlag accessflags, UMatUsageFlags usageFlags) const = 0;
|
||||
virtual void deallocate(UMatData* data) const = 0;
|
||||
virtual void map(UMatData* data, int accessflags) const;
|
||||
virtual void map(UMatData* data, AccessFlag accessflags) const;
|
||||
virtual void unmap(UMatData* data) const;
|
||||
virtual void download(UMatData* data, void* dst, int dims, const size_t sz[],
|
||||
const size_t srcofs[], const size_t srcstep[],
|
||||
@@ -506,7 +526,7 @@ protected:
|
||||
// it should be explicitly initialized using init().
|
||||
struct CV_EXPORTS UMatData
|
||||
{
|
||||
enum { COPY_ON_MAP=1, HOST_COPY_OBSOLETE=2,
|
||||
enum MemoryFlag { COPY_ON_MAP=1, HOST_COPY_OBSOLETE=2,
|
||||
DEVICE_COPY_OBSOLETE=4, TEMP_UMAT=8, TEMP_COPIED_UMAT=24,
|
||||
USER_ALLOCATED=32, DEVICE_MEM_MAPPED=64,
|
||||
ASYNC_CLEANUP=128
|
||||
@@ -536,30 +556,24 @@ struct CV_EXPORTS UMatData
|
||||
uchar* origdata;
|
||||
size_t size;
|
||||
|
||||
int flags;
|
||||
UMatData::MemoryFlag flags;
|
||||
void* handle;
|
||||
void* userdata;
|
||||
int allocatorFlags_;
|
||||
int mapcount;
|
||||
UMatData* originalUMatData;
|
||||
};
|
||||
|
||||
|
||||
struct CV_EXPORTS UMatDataAutoLock
|
||||
{
|
||||
explicit UMatDataAutoLock(UMatData* u);
|
||||
~UMatDataAutoLock();
|
||||
UMatData* u;
|
||||
};
|
||||
CV_ENUM_FLAGS(UMatData::MemoryFlag)
|
||||
|
||||
|
||||
struct CV_EXPORTS MatSize
|
||||
{
|
||||
explicit MatSize(int* _p);
|
||||
int dims() const;
|
||||
Size operator()() const;
|
||||
const int& operator[](int i) const;
|
||||
int& operator[](int i);
|
||||
operator const int*() const;
|
||||
operator const int*() const; // TODO OpenCV 4.0: drop this
|
||||
bool operator == (const MatSize& sz) const;
|
||||
bool operator != (const MatSize& sz) const;
|
||||
|
||||
@@ -581,7 +595,7 @@ protected:
|
||||
MatStep& operator = (const MatStep&);
|
||||
};
|
||||
|
||||
/** @example cout_mat.cpp
|
||||
/** @example samples/cpp/cout_mat.cpp
|
||||
An example demonstrating the serial out capabilities of cv::Mat
|
||||
*/
|
||||
|
||||
@@ -600,12 +614,11 @@ Note that `M.step[i] >= M.step[i+1]` (in fact, `M.step[i] >= M.step[i+1]*M.size[
|
||||
that 2-dimensional matrices are stored row-by-row, 3-dimensional matrices are stored plane-by-plane,
|
||||
and so on. M.step[M.dims-1] is minimal and always equal to the element size M.elemSize() .
|
||||
|
||||
So, the data layout in Mat is fully compatible with CvMat, IplImage, and CvMatND types from OpenCV
|
||||
1.x. It is also compatible with the majority of dense array types from the standard toolkits and
|
||||
SDKs, such as Numpy (ndarray), Win32 (independent device bitmaps), and others, that is, with any
|
||||
array that uses *steps* (or *strides*) to compute the position of a pixel. Due to this
|
||||
compatibility, it is possible to make a Mat header for user-allocated data and process it in-place
|
||||
using OpenCV functions.
|
||||
So, the data layout in Mat is compatible with the majority of dense array types from the standard
|
||||
toolkits and SDKs, such as Numpy (ndarray), Win32 (independent device bitmaps), and others,
|
||||
that is, with any array that uses *steps* (or *strides*) to compute the position of a pixel.
|
||||
Due to this compatibility, it is possible to make a Mat header for user-allocated data and process
|
||||
it in-place using OpenCV functions.
|
||||
|
||||
There are many different ways to create a Mat object. The most popular options are listed below:
|
||||
|
||||
@@ -690,10 +703,6 @@ sub-matrices.
|
||||
Mat M = Mat(3, 3, CV_64F, m).inv();
|
||||
@endcode
|
||||
.
|
||||
Partial yet very common cases of this *user-allocated data* case are conversions from CvMat and
|
||||
IplImage to Mat. For this purpose, there is function cv::cvarrToMat taking pointers to CvMat or
|
||||
IplImage and the optional flag indicating whether to copy the data or not.
|
||||
@snippet samples/cpp/image.cpp iplimage
|
||||
|
||||
- Use MATLAB-style array initializers, zeros(), ones(), eye(), for example:
|
||||
@code
|
||||
@@ -992,18 +1001,18 @@ public:
|
||||
*/
|
||||
template<typename _Tp> explicit Mat(const std::vector<_Tp>& vec, bool copyData=false);
|
||||
|
||||
#ifdef CV_CXX11
|
||||
/** @overload
|
||||
*/
|
||||
template<typename _Tp, typename = typename std::enable_if<std::is_arithmetic<_Tp>::value>::type>
|
||||
explicit Mat(const std::initializer_list<_Tp> list);
|
||||
#endif
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
/** @overload
|
||||
*/
|
||||
template<typename _Tp> explicit Mat(const std::initializer_list<int> sizes, const std::initializer_list<_Tp> list);
|
||||
|
||||
/** @overload
|
||||
*/
|
||||
template<typename _Tp, size_t _Nm> explicit Mat(const std::array<_Tp, _Nm>& arr, bool copyData=false);
|
||||
#endif
|
||||
|
||||
/** @overload
|
||||
*/
|
||||
@@ -1051,7 +1060,7 @@ public:
|
||||
Mat& operator = (const MatExpr& expr);
|
||||
|
||||
//! retrieve UMat from Mat
|
||||
UMat getUMat(int accessFlags, UMatUsageFlags usageFlags = USAGE_DEFAULT) const;
|
||||
UMat getUMat(AccessFlag accessFlags, UMatUsageFlags usageFlags = USAGE_DEFAULT) const;
|
||||
|
||||
/** @brief Creates a matrix header for the specified matrix row.
|
||||
|
||||
@@ -1173,7 +1182,7 @@ public:
|
||||
The method creates a full copy of the array. The original step[] is not taken into account. So, the
|
||||
array copy is a continuous array occupying total()*elemSize() bytes.
|
||||
*/
|
||||
Mat clone() const;
|
||||
Mat clone() const CV_NODISCARD;
|
||||
|
||||
/** @brief Copies the matrix to another one.
|
||||
|
||||
@@ -1326,7 +1335,7 @@ public:
|
||||
/** @brief Returns a zero array of the specified size and type.
|
||||
|
||||
The method returns a Matlab-style zero array initializer. It can be used to quickly form a constant
|
||||
array as a function parameter, part of a matrix expression, or as a matrix initializer. :
|
||||
array as a function parameter, part of a matrix expression, or as a matrix initializer:
|
||||
@code
|
||||
Mat A;
|
||||
A = Mat::zeros(3, 3, CV_32F);
|
||||
@@ -1362,6 +1371,8 @@ public:
|
||||
The above operation does not form a 100x100 matrix of 1's and then multiply it by 3. Instead, it
|
||||
just remembers the scale factor (3 in this case) and use it when actually invoking the matrix
|
||||
initializer.
|
||||
@note In case of multi-channels type, only the first channel will be initialized with 1's, the
|
||||
others will be set to 0's.
|
||||
@param rows Number of rows.
|
||||
@param cols Number of columns.
|
||||
@param type Created matrix type.
|
||||
@@ -1389,6 +1400,8 @@ public:
|
||||
// make a 4x4 diagonal matrix with 0.1's on the diagonal.
|
||||
Mat A = Mat::eye(4, 4, CV_32F)*0.1;
|
||||
@endcode
|
||||
@note In case of multi-channels type, identity matrix will be initialized only for the first channel,
|
||||
the others will be set to 0's
|
||||
@param rows Number of rows.
|
||||
@param cols Number of columns.
|
||||
@param type Created matrix type.
|
||||
@@ -1623,20 +1636,11 @@ public:
|
||||
*/
|
||||
Mat operator()(const std::vector<Range>& ranges) const;
|
||||
|
||||
// //! converts header to CvMat; no data is copied
|
||||
// operator CvMat() const;
|
||||
// //! converts header to CvMatND; no data is copied
|
||||
// operator CvMatND() const;
|
||||
// //! converts header to IplImage; no data is copied
|
||||
// operator IplImage() const;
|
||||
|
||||
template<typename _Tp> operator std::vector<_Tp>() const;
|
||||
template<typename _Tp, int n> operator Vec<_Tp, n>() const;
|
||||
template<typename _Tp, int m, int n> operator Matx<_Tp, m, n>() const;
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> operator std::array<_Tp, _Nm>() const;
|
||||
#endif
|
||||
|
||||
/** @brief Reports whether the matrix is continuous or not.
|
||||
|
||||
@@ -1786,7 +1790,27 @@ public:
|
||||
*/
|
||||
size_t total(int startDim, int endDim=INT_MAX) const;
|
||||
|
||||
//! returns N if the matrix is 1-channel (N x ptdim) or ptdim-channel (1 x N) or (N x 1); negative number otherwise
|
||||
/**
|
||||
* @param elemChannels Number of channels or number of columns the matrix should have.
|
||||
* For a 2-D matrix, when the matrix has only 1 column, then it should have
|
||||
* elemChannels channels; When the matrix has only 1 channel,
|
||||
* then it should have elemChannels columns.
|
||||
* For a 3-D matrix, it should have only one channel. Furthermore,
|
||||
* if the number of planes is not one, then the number of rows
|
||||
* within every plane has to be 1; if the number of rows within
|
||||
* every plane is not 1, then the number of planes has to be 1.
|
||||
* @param depth The depth the matrix should have. Set it to -1 when any depth is fine.
|
||||
* @param requireContinuous Set it to true to require the matrix to be continuous
|
||||
* @return -1 if the requirement is not satisfied.
|
||||
* Otherwise, it returns the number of elements in the matrix. Note
|
||||
* that an element may have multiple channels.
|
||||
*
|
||||
* The following code demonstrates its usage for a 2-d matrix:
|
||||
* @snippet snippets/core_mat_checkVector.cpp example-2d
|
||||
*
|
||||
* The following code demonstrates its usage for a 3-d matrix:
|
||||
* @snippet snippets/core_mat_checkVector.cpp example-3d
|
||||
*/
|
||||
int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const;
|
||||
|
||||
/** @brief Returns a pointer to the specified matrix row.
|
||||
@@ -2040,10 +2064,8 @@ public:
|
||||
/** @overload */
|
||||
template<typename _Tp, typename Functor> void forEach(const Functor& operation) const;
|
||||
|
||||
#ifdef CV_CXX_MOVE_SEMANTICS
|
||||
Mat(Mat&& m);
|
||||
Mat& operator = (Mat&& m);
|
||||
#endif
|
||||
|
||||
enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
|
||||
enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 };
|
||||
@@ -2074,6 +2096,9 @@ public:
|
||||
static MatAllocator* getDefaultAllocator();
|
||||
static void setDefaultAllocator(MatAllocator* allocator);
|
||||
|
||||
//! internal use method: updates the continuity flag
|
||||
void updateContinuityFlag();
|
||||
|
||||
//! interaction with UMat
|
||||
UMatData* u;
|
||||
|
||||
@@ -2168,7 +2193,7 @@ public:
|
||||
Mat_(int _ndims, const int* _sizes);
|
||||
//! n-dim array constructor that sets each matrix element to specified value
|
||||
Mat_(int _ndims, const int* _sizes, const _Tp& value);
|
||||
//! copy/conversion contructor. If m is of different type, it's converted
|
||||
//! copy/conversion constructor. If m is of different type, it's converted
|
||||
Mat_(const Mat& m);
|
||||
//! copy constructor
|
||||
Mat_(const Mat_& m);
|
||||
@@ -2194,13 +2219,10 @@ public:
|
||||
explicit Mat_(const Point3_<typename DataType<_Tp>::channel_type>& pt, bool copyData=true);
|
||||
explicit Mat_(const MatCommaInitializer_<_Tp>& commaInitializer);
|
||||
|
||||
#ifdef CV_CXX11
|
||||
Mat_(std::initializer_list<_Tp> values);
|
||||
#endif
|
||||
explicit Mat_(const std::initializer_list<int> sizes, const std::initializer_list<_Tp> values);
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template <std::size_t _Nm> explicit Mat_(const std::array<_Tp, _Nm>& arr, bool copyData=false);
|
||||
#endif
|
||||
|
||||
Mat_& operator = (const Mat& m);
|
||||
Mat_& operator = (const Mat_& m);
|
||||
@@ -2236,7 +2258,7 @@ public:
|
||||
Mat_ row(int y) const;
|
||||
Mat_ col(int x) const;
|
||||
Mat_ diag(int d=0) const;
|
||||
Mat_ clone() const;
|
||||
Mat_ clone() const CV_NODISCARD;
|
||||
|
||||
//! overridden forms of Mat::elemSize() etc.
|
||||
size_t elemSize() const;
|
||||
@@ -2258,7 +2280,7 @@ public:
|
||||
static MatExpr eye(int rows, int cols);
|
||||
static MatExpr eye(Size size);
|
||||
|
||||
//! some more overriden methods
|
||||
//! some more overridden methods
|
||||
Mat_& adjustROI( int dtop, int dbottom, int dleft, int dright );
|
||||
Mat_ operator()( const Range& rowRange, const Range& colRange ) const;
|
||||
Mat_ operator()( const Rect& roi ) const;
|
||||
@@ -2298,17 +2320,14 @@ public:
|
||||
//! conversion to vector.
|
||||
operator std::vector<_Tp>() const;
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
//! conversion to array.
|
||||
template<std::size_t _Nm> operator std::array<_Tp, _Nm>() const;
|
||||
#endif
|
||||
|
||||
//! conversion to Vec
|
||||
template<int n> operator Vec<typename DataType<_Tp>::channel_type, n>() const;
|
||||
//! conversion to Matx
|
||||
template<int m, int n> operator Matx<typename DataType<_Tp>::channel_type, m, n>() const;
|
||||
|
||||
#ifdef CV_CXX_MOVE_SEMANTICS
|
||||
Mat_(Mat_&& m);
|
||||
Mat_& operator = (Mat_&& m);
|
||||
|
||||
@@ -2316,7 +2335,6 @@ public:
|
||||
Mat_& operator = (Mat&& m);
|
||||
|
||||
Mat_(MatExpr&& e);
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef Mat_<uchar> Mat1b;
|
||||
@@ -2394,7 +2412,7 @@ public:
|
||||
//! assignment operators
|
||||
UMat& operator = (const UMat& m);
|
||||
|
||||
Mat getMat(int flags) const;
|
||||
Mat getMat(AccessFlag flags) const;
|
||||
|
||||
//! returns a new matrix header for the specified row
|
||||
UMat row(int y) const;
|
||||
@@ -2415,7 +2433,7 @@ public:
|
||||
static UMat diag(const UMat& d);
|
||||
|
||||
//! returns deep copy of the matrix, i.e. the data is copied
|
||||
UMat clone() const;
|
||||
UMat clone() const CV_NODISCARD;
|
||||
//! copies the matrix content to "m".
|
||||
// It calls m.create(this->size(), this->type()).
|
||||
void copyTo( OutputArray m ) const;
|
||||
@@ -2513,16 +2531,14 @@ public:
|
||||
//! returns N if the matrix is 1-channel (N x ptdim) or ptdim-channel (1 x N) or (N x 1); negative number otherwise
|
||||
int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const;
|
||||
|
||||
#ifdef CV_CXX_MOVE_SEMANTICS
|
||||
UMat(UMat&& m);
|
||||
UMat& operator = (UMat&& m);
|
||||
#endif
|
||||
|
||||
/*! Returns the OpenCL buffer handle on which UMat operates on.
|
||||
The UMat instance should be kept alive during the use of the handle to prevent the buffer to be
|
||||
returned to the OpenCV buffer pool.
|
||||
*/
|
||||
void* handle(int accessFlags) const;
|
||||
void* handle(AccessFlag accessFlags) const;
|
||||
void ndoffset(size_t* ofs) const;
|
||||
|
||||
enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
|
||||
@@ -2546,6 +2562,9 @@ public:
|
||||
//! and the standard allocator
|
||||
static MatAllocator* getStdAllocator();
|
||||
|
||||
//! internal use method: updates the continuity flag
|
||||
void updateContinuityFlag();
|
||||
|
||||
// black-box container of UMat data
|
||||
UMatData* u;
|
||||
|
||||
@@ -2707,7 +2726,7 @@ public:
|
||||
SparseMat& operator = (const Mat& m);
|
||||
|
||||
//! creates full copy of the matrix
|
||||
SparseMat clone() const;
|
||||
SparseMat clone() const CV_NODISCARD;
|
||||
|
||||
//! copies all the data to the destination matrix. All the previous content of m is erased
|
||||
void copyTo( SparseMat& m ) const;
|
||||
@@ -2926,7 +2945,7 @@ public:
|
||||
|
||||
//! the default constructor
|
||||
SparseMat_();
|
||||
//! the full constructor equivelent to SparseMat(dims, _sizes, DataType<_Tp>::type)
|
||||
//! the full constructor equivalent to SparseMat(dims, _sizes, DataType<_Tp>::type)
|
||||
SparseMat_(int dims, const int* _sizes);
|
||||
//! the copy constructor. If DataType<_Tp>.type != m.type(), the m elements are converted
|
||||
SparseMat_(const SparseMat& m);
|
||||
@@ -2944,7 +2963,7 @@ public:
|
||||
SparseMat_& operator = (const Mat& m);
|
||||
|
||||
//! makes full copy of the matrix. All the elements are duplicated
|
||||
SparseMat_ clone() const;
|
||||
SparseMat_ clone() const CV_NODISCARD;
|
||||
//! equivalent to cv::SparseMat::create(dims, _sizes, DataType<_Tp>::type)
|
||||
void create(int dims, const int* _sizes);
|
||||
//! converts sparse matrix to the old-style CvSparseMat. All the elements are copied
|
||||
@@ -3540,6 +3559,10 @@ CV_EXPORTS MatExpr operator + (const Mat& m, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator + (const MatExpr& e, const Scalar& s);
|
||||
CV_EXPORTS MatExpr operator + (const Scalar& s, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator + (const MatExpr& e1, const MatExpr& e2);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator + (const Mat& a, const Matx<_Tp, m, n>& b) { return a + Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator + (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) + b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator - (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator - (const Mat& a, const Scalar& s);
|
||||
@@ -3549,6 +3572,10 @@ CV_EXPORTS MatExpr operator - (const Mat& m, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator - (const MatExpr& e, const Scalar& s);
|
||||
CV_EXPORTS MatExpr operator - (const Scalar& s, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator - (const MatExpr& e1, const MatExpr& e2);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator - (const Mat& a, const Matx<_Tp, m, n>& b) { return a - Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator - (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) - b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator - (const Mat& m);
|
||||
CV_EXPORTS MatExpr operator - (const MatExpr& e);
|
||||
@@ -3561,6 +3588,10 @@ CV_EXPORTS MatExpr operator * (const Mat& m, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator * (const MatExpr& e, double s);
|
||||
CV_EXPORTS MatExpr operator * (double s, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator * (const MatExpr& e1, const MatExpr& e2);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator * (const Mat& a, const Matx<_Tp, m, n>& b) { return a * Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator * (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) * b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator / (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator / (const Mat& a, double s);
|
||||
@@ -3570,52 +3601,100 @@ CV_EXPORTS MatExpr operator / (const Mat& m, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator / (const MatExpr& e, double s);
|
||||
CV_EXPORTS MatExpr operator / (double s, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator / (const MatExpr& e1, const MatExpr& e2);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator / (const Mat& a, const Matx<_Tp, m, n>& b) { return a / Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator / (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) / b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator < (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator < (const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr operator < (double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator < (const Mat& a, const Matx<_Tp, m, n>& b) { return a < Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator < (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) < b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator <= (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator <= (const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr operator <= (double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator <= (const Mat& a, const Matx<_Tp, m, n>& b) { return a <= Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator <= (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) <= b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator == (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator == (const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr operator == (double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator == (const Mat& a, const Matx<_Tp, m, n>& b) { return a == Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator == (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) == b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator != (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator != (const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr operator != (double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator != (const Mat& a, const Matx<_Tp, m, n>& b) { return a != Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator != (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) != b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator >= (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator >= (const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr operator >= (double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator >= (const Mat& a, const Matx<_Tp, m, n>& b) { return a >= Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator >= (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) >= b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator > (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator > (const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr operator > (double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator > (const Mat& a, const Matx<_Tp, m, n>& b) { return a > Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator > (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) > b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator & (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator & (const Mat& a, const Scalar& s);
|
||||
CV_EXPORTS MatExpr operator & (const Scalar& s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator & (const Mat& a, const Matx<_Tp, m, n>& b) { return a & Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator & (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) & b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator | (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator | (const Mat& a, const Scalar& s);
|
||||
CV_EXPORTS MatExpr operator | (const Scalar& s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator | (const Mat& a, const Matx<_Tp, m, n>& b) { return a | Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator | (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) | b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator ^ (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator ^ (const Mat& a, const Scalar& s);
|
||||
CV_EXPORTS MatExpr operator ^ (const Scalar& s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator ^ (const Mat& a, const Matx<_Tp, m, n>& b) { return a ^ Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator ^ (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) ^ b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator ~(const Mat& m);
|
||||
|
||||
CV_EXPORTS MatExpr min(const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr min(const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr min(double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr min (const Mat& a, const Matx<_Tp, m, n>& b) { return min(a, Mat(b)); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr min (const Matx<_Tp, m, n>& a, const Mat& b) { return min(Mat(a), b); }
|
||||
|
||||
CV_EXPORTS MatExpr max(const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr max(const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr max(double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr max (const Mat& a, const Matx<_Tp, m, n>& b) { return max(a, Mat(b)); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr max (const Matx<_Tp, m, n>& a, const Mat& b) { return max(Mat(a), b); }
|
||||
|
||||
/** @brief Calculates an absolute value of each matrix element.
|
||||
|
||||
@@ -61,6 +61,16 @@ CV__DEBUG_NS_BEGIN
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
////////////////////////// Custom (raw) type wrapper //////////////////////////
|
||||
|
||||
template<typename _Tp> static inline
|
||||
int rawType()
|
||||
{
|
||||
CV_StaticAssert(sizeof(_Tp) <= CV_CN_MAX, "sizeof(_Tp) is too large");
|
||||
const int elemSize = sizeof(_Tp);
|
||||
return (int)CV_MAKETYPE(CV_8U, elemSize);
|
||||
}
|
||||
|
||||
//////////////////////// Input/Output Arrays ////////////////////////
|
||||
|
||||
inline void _InputArray::init(int _flags, const void* _obj)
|
||||
@@ -73,7 +83,7 @@ inline void* _InputArray::getObj() const { return obj; }
|
||||
inline int _InputArray::getFlags() const { return flags; }
|
||||
inline Size _InputArray::getSz() const { return sz; }
|
||||
|
||||
inline _InputArray::_InputArray() { init(NONE, 0); }
|
||||
inline _InputArray::_InputArray() { init(0 + NONE, 0); }
|
||||
inline _InputArray::_InputArray(int _flags, void* _obj) { init(_flags, _obj); }
|
||||
inline _InputArray::_InputArray(const Mat& m) { init(MAT+ACCESS_READ, &m); }
|
||||
inline _InputArray::_InputArray(const std::vector<Mat>& vec) { init(STD_VECTOR_MAT+ACCESS_READ, &vec); }
|
||||
@@ -84,7 +94,6 @@ template<typename _Tp> inline
|
||||
_InputArray::_InputArray(const std::vector<_Tp>& vec)
|
||||
{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_READ, &vec); }
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_InputArray::_InputArray(const std::array<_Tp, _Nm>& arr)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_READ, arr.data(), Size(1, _Nm)); }
|
||||
@@ -92,7 +101,6 @@ _InputArray::_InputArray(const std::array<_Tp, _Nm>& arr)
|
||||
template<std::size_t _Nm> inline
|
||||
_InputArray::_InputArray(const std::array<Mat, _Nm>& arr)
|
||||
{ init(STD_ARRAY_MAT + ACCESS_READ, arr.data(), Size(1, _Nm)); }
|
||||
#endif
|
||||
|
||||
inline
|
||||
_InputArray::_InputArray(const std::vector<bool>& vec)
|
||||
@@ -102,10 +110,6 @@ template<typename _Tp> inline
|
||||
_InputArray::_InputArray(const std::vector<std::vector<_Tp> >& vec)
|
||||
{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_READ, &vec); }
|
||||
|
||||
inline
|
||||
_InputArray::_InputArray(const std::vector<std::vector<bool> >&)
|
||||
{ CV_Error(Error::StsUnsupportedFormat, "std::vector<std::vector<bool> > is not supported!\n"); }
|
||||
|
||||
template<typename _Tp> inline
|
||||
_InputArray::_InputArray(const std::vector<Mat_<_Tp> >& vec)
|
||||
{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_READ, &vec); }
|
||||
@@ -140,6 +144,25 @@ inline _InputArray::_InputArray(const ogl::Buffer& buf)
|
||||
inline _InputArray::_InputArray(const cuda::HostMem& cuda_mem)
|
||||
{ init(CUDA_HOST_MEM + ACCESS_READ, &cuda_mem); }
|
||||
|
||||
template<typename _Tp> inline
|
||||
_InputArray _InputArray::rawIn(const std::vector<_Tp>& vec)
|
||||
{
|
||||
_InputArray v;
|
||||
v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_READ;
|
||||
v.obj = (void*)&vec;
|
||||
return v;
|
||||
}
|
||||
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_InputArray _InputArray::rawIn(const std::array<_Tp, _Nm>& arr)
|
||||
{
|
||||
_InputArray v;
|
||||
v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_READ;
|
||||
v.obj = (void*)arr.data();
|
||||
v.sz = Size(1, _Nm);
|
||||
return v;
|
||||
}
|
||||
|
||||
inline _InputArray::~_InputArray() {}
|
||||
|
||||
inline Mat _InputArray::getMat(int i) const
|
||||
@@ -157,22 +180,22 @@ inline bool _InputArray::isMatx() const { return kind() == _InputArray::MATX; }
|
||||
inline bool _InputArray::isVector() const { return kind() == _InputArray::STD_VECTOR ||
|
||||
kind() == _InputArray::STD_BOOL_VECTOR ||
|
||||
kind() == _InputArray::STD_ARRAY; }
|
||||
inline bool _InputArray::isGpuMat() const { return kind() == _InputArray::CUDA_GPU_MAT; }
|
||||
inline bool _InputArray::isGpuMatVector() const { return kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline _OutputArray::_OutputArray() { init(ACCESS_WRITE, 0); }
|
||||
inline _OutputArray::_OutputArray(int _flags, void* _obj) { init(_flags|ACCESS_WRITE, _obj); }
|
||||
inline _OutputArray::_OutputArray() { init(NONE + ACCESS_WRITE, 0); }
|
||||
inline _OutputArray::_OutputArray(int _flags, void* _obj) { init(_flags + ACCESS_WRITE, _obj); }
|
||||
inline _OutputArray::_OutputArray(Mat& m) { init(MAT+ACCESS_WRITE, &m); }
|
||||
inline _OutputArray::_OutputArray(std::vector<Mat>& vec) { init(STD_VECTOR_MAT+ACCESS_WRITE, &vec); }
|
||||
inline _OutputArray::_OutputArray(UMat& m) { init(UMAT+ACCESS_WRITE, &m); }
|
||||
inline _OutputArray::_OutputArray(std::vector<UMat>& vec) { init(STD_VECTOR_UMAT+ACCESS_WRITE, &vec); }
|
||||
inline _OutputArray::_OutputArray(std::vector<Mat>& vec) { init(STD_VECTOR_MAT + ACCESS_WRITE, &vec); }
|
||||
inline _OutputArray::_OutputArray(UMat& m) { init(UMAT + ACCESS_WRITE, &m); }
|
||||
inline _OutputArray::_OutputArray(std::vector<UMat>& vec) { init(STD_VECTOR_UMAT + ACCESS_WRITE, &vec); }
|
||||
|
||||
template<typename _Tp> inline
|
||||
_OutputArray::_OutputArray(std::vector<_Tp>& vec)
|
||||
{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_OutputArray::_OutputArray(std::array<_Tp, _Nm>& arr)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
|
||||
@@ -180,20 +203,11 @@ _OutputArray::_OutputArray(std::array<_Tp, _Nm>& arr)
|
||||
template<std::size_t _Nm> inline
|
||||
_OutputArray::_OutputArray(std::array<Mat, _Nm>& arr)
|
||||
{ init(STD_ARRAY_MAT + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
|
||||
#endif
|
||||
|
||||
inline
|
||||
_OutputArray::_OutputArray(std::vector<bool>&)
|
||||
{ CV_Error(Error::StsUnsupportedFormat, "std::vector<bool> cannot be an output array\n"); }
|
||||
|
||||
template<typename _Tp> inline
|
||||
_OutputArray::_OutputArray(std::vector<std::vector<_Tp> >& vec)
|
||||
{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
|
||||
|
||||
inline
|
||||
_OutputArray::_OutputArray(std::vector<std::vector<bool> >&)
|
||||
{ CV_Error(Error::StsUnsupportedFormat, "std::vector<std::vector<bool> > cannot be an output array\n"); }
|
||||
|
||||
template<typename _Tp> inline
|
||||
_OutputArray::_OutputArray(std::vector<Mat_<_Tp> >& vec)
|
||||
{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
|
||||
@@ -214,7 +228,6 @@ template<typename _Tp> inline
|
||||
_OutputArray::_OutputArray(const std::vector<_Tp>& vec)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_OutputArray::_OutputArray(const std::array<_Tp, _Nm>& arr)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
|
||||
@@ -222,7 +235,6 @@ _OutputArray::_OutputArray(const std::array<_Tp, _Nm>& arr)
|
||||
template<std::size_t _Nm> inline
|
||||
_OutputArray::_OutputArray(const std::array<Mat, _Nm>& arr)
|
||||
{ init(FIXED_SIZE + STD_ARRAY_MAT + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
|
||||
#endif
|
||||
|
||||
template<typename _Tp> inline
|
||||
_OutputArray::_OutputArray(const std::vector<std::vector<_Tp> >& vec)
|
||||
@@ -278,10 +290,29 @@ inline _OutputArray::_OutputArray(const ogl::Buffer& buf)
|
||||
inline _OutputArray::_OutputArray(const cuda::HostMem& cuda_mem)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); }
|
||||
|
||||
template<typename _Tp> inline
|
||||
_OutputArray _OutputArray::rawOut(std::vector<_Tp>& vec)
|
||||
{
|
||||
_OutputArray v;
|
||||
v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_WRITE;
|
||||
v.obj = (void*)&vec;
|
||||
return v;
|
||||
}
|
||||
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_OutputArray _OutputArray::rawOut(std::array<_Tp, _Nm>& arr)
|
||||
{
|
||||
_OutputArray v;
|
||||
v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE;
|
||||
v.obj = (void*)arr.data();
|
||||
v.sz = Size(1, _Nm);
|
||||
return v;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline _InputOutputArray::_InputOutputArray() { init(ACCESS_RW, 0); }
|
||||
inline _InputOutputArray::_InputOutputArray(int _flags, void* _obj) { init(_flags|ACCESS_RW, _obj); }
|
||||
inline _InputOutputArray::_InputOutputArray() { init(0+ACCESS_RW, 0); }
|
||||
inline _InputOutputArray::_InputOutputArray(int _flags, void* _obj) { init(_flags+ACCESS_RW, _obj); }
|
||||
inline _InputOutputArray::_InputOutputArray(Mat& m) { init(MAT+ACCESS_RW, &m); }
|
||||
inline _InputOutputArray::_InputOutputArray(std::vector<Mat>& vec) { init(STD_VECTOR_MAT+ACCESS_RW, &vec); }
|
||||
inline _InputOutputArray::_InputOutputArray(UMat& m) { init(UMAT+ACCESS_RW, &m); }
|
||||
@@ -291,7 +322,6 @@ template<typename _Tp> inline
|
||||
_InputOutputArray::_InputOutputArray(std::vector<_Tp>& vec)
|
||||
{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); }
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_InputOutputArray::_InputOutputArray(std::array<_Tp, _Nm>& arr)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); }
|
||||
@@ -299,10 +329,6 @@ _InputOutputArray::_InputOutputArray(std::array<_Tp, _Nm>& arr)
|
||||
template<std::size_t _Nm> inline
|
||||
_InputOutputArray::_InputOutputArray(std::array<Mat, _Nm>& arr)
|
||||
{ init(STD_ARRAY_MAT + ACCESS_RW, arr.data(), Size(1, _Nm)); }
|
||||
#endif
|
||||
|
||||
inline _InputOutputArray::_InputOutputArray(std::vector<bool>&)
|
||||
{ CV_Error(Error::StsUnsupportedFormat, "std::vector<bool> cannot be an input/output array\n"); }
|
||||
|
||||
template<typename _Tp> inline
|
||||
_InputOutputArray::_InputOutputArray(std::vector<std::vector<_Tp> >& vec)
|
||||
@@ -328,7 +354,6 @@ template<typename _Tp> inline
|
||||
_InputOutputArray::_InputOutputArray(const std::vector<_Tp>& vec)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); }
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_InputOutputArray::_InputOutputArray(const std::array<_Tp, _Nm>& arr)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); }
|
||||
@@ -336,7 +361,6 @@ _InputOutputArray::_InputOutputArray(const std::array<_Tp, _Nm>& arr)
|
||||
template<std::size_t _Nm> inline
|
||||
_InputOutputArray::_InputOutputArray(const std::array<Mat, _Nm>& arr)
|
||||
{ init(FIXED_SIZE + STD_ARRAY_MAT + ACCESS_RW, arr.data(), Size(1, _Nm)); }
|
||||
#endif
|
||||
|
||||
template<typename _Tp> inline
|
||||
_InputOutputArray::_InputOutputArray(const std::vector<std::vector<_Tp> >& vec)
|
||||
@@ -394,6 +418,30 @@ inline _InputOutputArray::_InputOutputArray(const ogl::Buffer& buf)
|
||||
inline _InputOutputArray::_InputOutputArray(const cuda::HostMem& cuda_mem)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); }
|
||||
|
||||
template<typename _Tp> inline
|
||||
_InputOutputArray _InputOutputArray::rawInOut(std::vector<_Tp>& vec)
|
||||
{
|
||||
_InputOutputArray v;
|
||||
v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_RW;
|
||||
v.obj = (void*)&vec;
|
||||
return v;
|
||||
}
|
||||
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_InputOutputArray _InputOutputArray::rawInOut(std::array<_Tp, _Nm>& arr)
|
||||
{
|
||||
_InputOutputArray v;
|
||||
v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW;
|
||||
v.obj = (void*)arr.data();
|
||||
v.sz = Size(1, _Nm);
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
template<typename _Tp> static inline _InputArray rawIn(_Tp& v) { return _InputArray::rawIn(v); }
|
||||
template<typename _Tp> static inline _OutputArray rawOut(_Tp& v) { return _OutputArray::rawOut(v); }
|
||||
template<typename _Tp> static inline _InputOutputArray rawInOut(_Tp& v) { return _InputOutputArray::rawInOut(v); }
|
||||
|
||||
CV__DEBUG_NS_END
|
||||
|
||||
//////////////////////////////////////////// Mat //////////////////////////////////////////
|
||||
@@ -504,24 +552,20 @@ Mat::Mat(int _rows, int _cols, int _type, void* _data, size_t _step)
|
||||
if( _step == AUTO_STEP )
|
||||
{
|
||||
_step = minstep;
|
||||
flags |= CONTINUOUS_FLAG;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_DbgAssert( _step >= minstep );
|
||||
|
||||
if (_step % esz1 != 0)
|
||||
{
|
||||
CV_Error(Error::BadStep, "Step must be a multiple of esz1");
|
||||
}
|
||||
|
||||
if (_step == minstep || rows == 1)
|
||||
flags |= CONTINUOUS_FLAG;
|
||||
}
|
||||
step[0] = _step;
|
||||
step[1] = esz;
|
||||
datalimit = datastart + _step * rows;
|
||||
dataend = datalimit - _step + minstep;
|
||||
updateContinuityFlag();
|
||||
}
|
||||
|
||||
inline
|
||||
@@ -537,7 +581,6 @@ Mat::Mat(Size _sz, int _type, void* _data, size_t _step)
|
||||
if( _step == AUTO_STEP )
|
||||
{
|
||||
_step = minstep;
|
||||
flags |= CONTINUOUS_FLAG;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -547,19 +590,17 @@ Mat::Mat(Size _sz, int _type, void* _data, size_t _step)
|
||||
{
|
||||
CV_Error(Error::BadStep, "Step must be a multiple of esz1");
|
||||
}
|
||||
|
||||
if (_step == minstep || rows == 1)
|
||||
flags |= CONTINUOUS_FLAG;
|
||||
}
|
||||
step[0] = _step;
|
||||
step[1] = esz;
|
||||
datalimit = datastart + _step*rows;
|
||||
dataend = datalimit - _step + minstep;
|
||||
updateContinuityFlag();
|
||||
}
|
||||
|
||||
template<typename _Tp> inline
|
||||
Mat::Mat(const std::vector<_Tp>& vec, bool copyData)
|
||||
: flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
|
||||
: flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
|
||||
cols(1), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
{
|
||||
if(vec.empty())
|
||||
@@ -574,22 +615,29 @@ Mat::Mat(const std::vector<_Tp>& vec, bool copyData)
|
||||
Mat((int)vec.size(), 1, traits::Type<_Tp>::value, (uchar*)&vec[0]).copyTo(*this);
|
||||
}
|
||||
|
||||
#ifdef CV_CXX11
|
||||
template<typename _Tp, typename> inline
|
||||
Mat::Mat(const std::initializer_list<_Tp> list)
|
||||
: flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows((int)list.size()),
|
||||
cols(1), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
: Mat()
|
||||
{
|
||||
if(list.size() == 0)
|
||||
return;
|
||||
CV_Assert(list.size() != 0);
|
||||
Mat((int)list.size(), 1, traits::Type<_Tp>::value, (uchar*)list.begin()).copyTo(*this);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp> inline
|
||||
Mat::Mat(const std::initializer_list<int> sizes, const std::initializer_list<_Tp> list)
|
||||
: Mat()
|
||||
{
|
||||
size_t size_total = 1;
|
||||
for(auto s : sizes)
|
||||
size_total *= s;
|
||||
CV_Assert(list.size() != 0);
|
||||
CV_Assert(size_total == list.size());
|
||||
Mat((int)sizes.size(), (int*)sizes.begin(), traits::Type<_Tp>::value, (uchar*)list.begin()).copyTo(*this);
|
||||
}
|
||||
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
Mat::Mat(const std::array<_Tp, _Nm>& arr, bool copyData)
|
||||
: flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows((int)arr.size()),
|
||||
: flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)arr.size()),
|
||||
cols(1), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
{
|
||||
if(arr.empty())
|
||||
@@ -603,11 +651,10 @@ Mat::Mat(const std::array<_Tp, _Nm>& arr, bool copyData)
|
||||
else
|
||||
Mat((int)arr.size(), 1, traits::Type<_Tp>::value, (uchar*)arr.data()).copyTo(*this);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename _Tp, int n> inline
|
||||
Mat::Mat(const Vec<_Tp, n>& vec, bool copyData)
|
||||
: flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows(n), cols(1), data(0),
|
||||
: flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(n), cols(1), data(0),
|
||||
datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
{
|
||||
if( !copyData )
|
||||
@@ -623,7 +670,7 @@ Mat::Mat(const Vec<_Tp, n>& vec, bool copyData)
|
||||
|
||||
template<typename _Tp, int m, int n> inline
|
||||
Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData)
|
||||
: flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows(m), cols(n), data(0),
|
||||
: flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(m), cols(n), data(0),
|
||||
datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
{
|
||||
if( !copyData )
|
||||
@@ -639,7 +686,7 @@ Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData)
|
||||
|
||||
template<typename _Tp> inline
|
||||
Mat::Mat(const Point_<_Tp>& pt, bool copyData)
|
||||
: flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows(2), cols(1), data(0),
|
||||
: flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(2), cols(1), data(0),
|
||||
datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
{
|
||||
if( !copyData )
|
||||
@@ -658,7 +705,7 @@ Mat::Mat(const Point_<_Tp>& pt, bool copyData)
|
||||
|
||||
template<typename _Tp> inline
|
||||
Mat::Mat(const Point3_<_Tp>& pt, bool copyData)
|
||||
: flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows(3), cols(1), data(0),
|
||||
: flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(3), cols(1), data(0),
|
||||
datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
{
|
||||
if( !copyData )
|
||||
@@ -678,7 +725,7 @@ Mat::Mat(const Point3_<_Tp>& pt, bool copyData)
|
||||
|
||||
template<typename _Tp> inline
|
||||
Mat::Mat(const MatCommaInitializer_<_Tp>& commaInitializer)
|
||||
: flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(0), rows(0), cols(0), data(0),
|
||||
: flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(0), rows(0), cols(0), data(0),
|
||||
datastart(0), dataend(0), allocator(0), u(0), size(&rows)
|
||||
{
|
||||
*this = commaInitializer.operator Mat_<_Tp>();
|
||||
@@ -857,7 +904,9 @@ bool Mat::isSubmatrix() const
|
||||
inline
|
||||
size_t Mat::elemSize() const
|
||||
{
|
||||
return dims > 0 ? step.p[dims - 1] : 0;
|
||||
size_t res = dims > 0 ? step.p[dims - 1] : 0;
|
||||
CV_DbgAssert(res != 0);
|
||||
return res;
|
||||
}
|
||||
|
||||
inline
|
||||
@@ -942,7 +991,7 @@ _Tp* Mat::ptr(int y)
|
||||
template<typename _Tp> inline
|
||||
const _Tp* Mat::ptr(int y) const
|
||||
{
|
||||
CV_DbgAssert( y == 0 || (data && dims >= 1 && data && (unsigned)y < (unsigned)size.p[0]) );
|
||||
CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) );
|
||||
return (const _Tp*)(data + step.p[0] * y);
|
||||
}
|
||||
|
||||
@@ -1253,7 +1302,6 @@ Mat::operator std::vector<_Tp>() const
|
||||
return v;
|
||||
}
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
Mat::operator std::array<_Tp, _Nm>() const
|
||||
{
|
||||
@@ -1261,7 +1309,6 @@ Mat::operator std::array<_Tp, _Nm>() const
|
||||
copyTo(v);
|
||||
return v;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename _Tp, int n> inline
|
||||
Mat::operator Vec<_Tp, n>() const
|
||||
@@ -1329,8 +1376,6 @@ void Mat::push_back(const std::vector<_Tp>& v)
|
||||
push_back(Mat(v));
|
||||
}
|
||||
|
||||
#ifdef CV_CXX_MOVE_SEMANTICS
|
||||
|
||||
inline
|
||||
Mat::Mat(Mat&& m)
|
||||
: flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data),
|
||||
@@ -1392,8 +1437,6 @@ Mat& Mat::operator = (Mat&& m)
|
||||
return *this;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
///////////////////////////// MatSize ////////////////////////////
|
||||
|
||||
@@ -1401,22 +1444,36 @@ inline
|
||||
MatSize::MatSize(int* _p)
|
||||
: p(_p) {}
|
||||
|
||||
inline
|
||||
int MatSize::dims() const
|
||||
{
|
||||
return (p - 1)[0];
|
||||
}
|
||||
|
||||
inline
|
||||
Size MatSize::operator()() const
|
||||
{
|
||||
CV_DbgAssert(p[-1] <= 2);
|
||||
CV_DbgAssert(dims() <= 2);
|
||||
return Size(p[1], p[0]);
|
||||
}
|
||||
|
||||
inline
|
||||
const int& MatSize::operator[](int i) const
|
||||
{
|
||||
CV_DbgAssert(i < dims());
|
||||
#ifdef __OPENCV_BUILD
|
||||
CV_DbgAssert(i >= 0);
|
||||
#endif
|
||||
return p[i];
|
||||
}
|
||||
|
||||
inline
|
||||
int& MatSize::operator[](int i)
|
||||
{
|
||||
CV_DbgAssert(i < dims());
|
||||
#ifdef __OPENCV_BUILD
|
||||
CV_DbgAssert(i >= 0);
|
||||
#endif
|
||||
return p[i];
|
||||
}
|
||||
|
||||
@@ -1429,8 +1486,8 @@ MatSize::operator const int*() const
|
||||
inline
|
||||
bool MatSize::operator == (const MatSize& sz) const
|
||||
{
|
||||
int d = p[-1];
|
||||
int dsz = sz.p[-1];
|
||||
int d = dims();
|
||||
int dsz = sz.dims();
|
||||
if( d != dsz )
|
||||
return false;
|
||||
if( d == 2 )
|
||||
@@ -1497,7 +1554,7 @@ template<typename _Tp> inline
|
||||
Mat_<_Tp>::Mat_()
|
||||
: Mat()
|
||||
{
|
||||
flags = (flags & ~CV_MAT_TYPE_MASK) | traits::Type<_Tp>::value;
|
||||
flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value;
|
||||
}
|
||||
|
||||
template<typename _Tp> inline
|
||||
@@ -1554,7 +1611,7 @@ template<typename _Tp> inline
|
||||
Mat_<_Tp>::Mat_(const Mat& m)
|
||||
: Mat()
|
||||
{
|
||||
flags = (flags & ~CV_MAT_TYPE_MASK) | traits::Type<_Tp>::value;
|
||||
flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value;
|
||||
*this = m;
|
||||
}
|
||||
|
||||
@@ -1624,19 +1681,20 @@ Mat_<_Tp>::Mat_(const std::vector<_Tp>& vec, bool copyData)
|
||||
: Mat(vec, copyData)
|
||||
{}
|
||||
|
||||
#ifdef CV_CXX11
|
||||
template<typename _Tp> inline
|
||||
Mat_<_Tp>::Mat_(std::initializer_list<_Tp> list)
|
||||
: Mat(list)
|
||||
{}
|
||||
#endif
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp> inline
|
||||
Mat_<_Tp>::Mat_(const std::initializer_list<int> sizes, std::initializer_list<_Tp> list)
|
||||
: Mat(sizes, list)
|
||||
{}
|
||||
|
||||
template<typename _Tp> template<std::size_t _Nm> inline
|
||||
Mat_<_Tp>::Mat_(const std::array<_Tp, _Nm>& arr, bool copyData)
|
||||
: Mat(arr, copyData)
|
||||
{}
|
||||
#endif
|
||||
|
||||
template<typename _Tp> inline
|
||||
Mat_<_Tp>& Mat_<_Tp>::operator = (const Mat& m)
|
||||
@@ -1650,7 +1708,7 @@ Mat_<_Tp>& Mat_<_Tp>::operator = (const Mat& m)
|
||||
{
|
||||
return (*this = m.reshape(DataType<_Tp>::channels, m.dims, 0));
|
||||
}
|
||||
CV_DbgAssert(DataType<_Tp>::channels == m.channels());
|
||||
CV_Assert(DataType<_Tp>::channels == m.channels() || m.empty());
|
||||
m.convertTo(*this, type());
|
||||
return *this;
|
||||
}
|
||||
@@ -1693,7 +1751,7 @@ void Mat_<_Tp>::release()
|
||||
{
|
||||
Mat::release();
|
||||
#ifdef _DEBUG
|
||||
flags = (flags & ~CV_MAT_TYPE_MASK) | traits::Type<_Tp>::value;
|
||||
flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1924,7 +1982,6 @@ Mat_<_Tp>::operator std::vector<_Tp>() const
|
||||
return v;
|
||||
}
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp> template<std::size_t _Nm> inline
|
||||
Mat_<_Tp>::operator std::array<_Tp, _Nm>() const
|
||||
{
|
||||
@@ -1932,7 +1989,6 @@ Mat_<_Tp>::operator std::array<_Tp, _Nm>() const
|
||||
copyTo(a);
|
||||
return a;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename _Tp> template<int n> inline
|
||||
Mat_<_Tp>::operator Vec<typename DataType<_Tp>::channel_type, n>() const
|
||||
@@ -1996,8 +2052,6 @@ void Mat_<_Tp>::forEach(const Functor& operation) const {
|
||||
Mat::forEach<_Tp, Functor>(operation);
|
||||
}
|
||||
|
||||
#ifdef CV_CXX_MOVE_SEMANTICS
|
||||
|
||||
template<typename _Tp> inline
|
||||
Mat_<_Tp>::Mat_(Mat_&& m)
|
||||
: Mat(m)
|
||||
@@ -2015,7 +2069,7 @@ template<typename _Tp> inline
|
||||
Mat_<_Tp>::Mat_(Mat&& m)
|
||||
: Mat()
|
||||
{
|
||||
flags = (flags & ~CV_MAT_TYPE_MASK) | traits::Type<_Tp>::value;
|
||||
flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value;
|
||||
*this = m;
|
||||
}
|
||||
|
||||
@@ -2041,11 +2095,10 @@ template<typename _Tp> inline
|
||||
Mat_<_Tp>::Mat_(MatExpr&& e)
|
||||
: Mat()
|
||||
{
|
||||
flags = (flags & ~CV_MAT_TYPE_MASK) | traits::Type<_Tp>::value;
|
||||
flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value;
|
||||
*this = Mat(e);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
///////////////////////////// SparseMat /////////////////////////////
|
||||
|
||||
@@ -2378,7 +2431,7 @@ SparseMatConstIterator_<_Tp> SparseMat::end() const
|
||||
template<typename _Tp> inline
|
||||
SparseMat_<_Tp>::SparseMat_()
|
||||
{
|
||||
flags = MAGIC_VAL | traits::Type<_Tp>::value;
|
||||
flags = MAGIC_VAL + traits::Type<_Tp>::value;
|
||||
}
|
||||
|
||||
template<typename _Tp> inline
|
||||
@@ -3601,7 +3654,7 @@ UMat::UMat(const UMat& m)
|
||||
|
||||
template<typename _Tp> inline
|
||||
UMat::UMat(const std::vector<_Tp>& vec, bool copyData)
|
||||
: flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
|
||||
: flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
|
||||
cols(1), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
|
||||
{
|
||||
if(vec.empty())
|
||||
@@ -3766,7 +3819,9 @@ bool UMat::isSubmatrix() const
|
||||
inline
|
||||
size_t UMat::elemSize() const
|
||||
{
|
||||
return dims > 0 ? step.p[dims - 1] : 0;
|
||||
size_t res = dims > 0 ? step.p[dims - 1] : 0;
|
||||
CV_DbgAssert(res != 0);
|
||||
return res;
|
||||
}
|
||||
|
||||
inline
|
||||
@@ -3816,8 +3871,6 @@ size_t UMat::total() const
|
||||
return p;
|
||||
}
|
||||
|
||||
#ifdef CV_CXX_MOVE_SEMANTICS
|
||||
|
||||
inline
|
||||
UMat::UMat(UMat&& m)
|
||||
: flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), allocator(m.allocator),
|
||||
@@ -3878,8 +3931,6 @@ UMat& UMat::operator = (UMat&& m)
|
||||
return *this;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
inline bool UMatData::hostCopyObsolete() const { return (flags & HOST_COPY_OBSOLETE) != 0; }
|
||||
inline bool UMatData::deviceCopyObsolete() const { return (flags & DEVICE_COPY_OBSOLETE) != 0; }
|
||||
@@ -3911,9 +3962,6 @@ inline void UMatData::markDeviceCopyObsolete(bool flag)
|
||||
flags &= ~DEVICE_COPY_OBSOLETE;
|
||||
}
|
||||
|
||||
inline UMatDataAutoLock::UMatDataAutoLock(UMatData* _u) : u(_u) { u->lock(); }
|
||||
inline UMatDataAutoLock::~UMatDataAutoLock() { u->unlock(); }
|
||||
|
||||
//! @endcond
|
||||
|
||||
} //cv
|
||||
@@ -53,9 +53,7 @@
|
||||
#include "opencv2/core/traits.hpp"
|
||||
#include "opencv2/core/saturate.hpp"
|
||||
|
||||
#ifdef CV_CXX11
|
||||
#include <initializer_list>
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@@ -66,13 +64,14 @@ namespace cv
|
||||
////////////////////////////// Small Matrix ///////////////////////////
|
||||
|
||||
//! @cond IGNORED
|
||||
struct CV_EXPORTS Matx_AddOp {};
|
||||
struct CV_EXPORTS Matx_SubOp {};
|
||||
struct CV_EXPORTS Matx_ScaleOp {};
|
||||
struct CV_EXPORTS Matx_MulOp {};
|
||||
struct CV_EXPORTS Matx_DivOp {};
|
||||
struct CV_EXPORTS Matx_MatMulOp {};
|
||||
struct CV_EXPORTS Matx_TOp {};
|
||||
// FIXIT Remove this (especially CV_EXPORTS modifier)
|
||||
struct CV_EXPORTS Matx_AddOp { Matx_AddOp() {} Matx_AddOp(const Matx_AddOp&) {} };
|
||||
struct CV_EXPORTS Matx_SubOp { Matx_SubOp() {} Matx_SubOp(const Matx_SubOp&) {} };
|
||||
struct CV_EXPORTS Matx_ScaleOp { Matx_ScaleOp() {} Matx_ScaleOp(const Matx_ScaleOp&) {} };
|
||||
struct CV_EXPORTS Matx_MulOp { Matx_MulOp() {} Matx_MulOp(const Matx_MulOp&) {} };
|
||||
struct CV_EXPORTS Matx_DivOp { Matx_DivOp() {} Matx_DivOp(const Matx_DivOp&) {} };
|
||||
struct CV_EXPORTS Matx_MatMulOp { Matx_MatMulOp() {} Matx_MatMulOp(const Matx_MatMulOp&) {} };
|
||||
struct CV_EXPORTS Matx_TOp { Matx_TOp() {} Matx_TOp(const Matx_TOp&) {} };
|
||||
//! @endcond
|
||||
|
||||
/** @brief Template class for small matrices whose type and size are known at compilation time
|
||||
@@ -92,7 +91,7 @@ Except of the plain constructor which takes a list of elements, Matx can be init
|
||||
float values[] = { 1, 2, 3};
|
||||
Matx31f m(values);
|
||||
@endcode
|
||||
In case if C++11 features are avaliable, std::initializer_list can be also used to initizlize Matx:
|
||||
In case if C++11 features are available, std::initializer_list can be also used to initialize Matx:
|
||||
@code{.cpp}
|
||||
Matx31f m = { 1, 2, 3};
|
||||
@endcode
|
||||
@@ -118,7 +117,7 @@ public:
|
||||
//! default constructor
|
||||
Matx();
|
||||
|
||||
Matx(_Tp v0); //!< 1x1 matrix
|
||||
explicit Matx(_Tp v0); //!< 1x1 matrix
|
||||
Matx(_Tp v0, _Tp v1); //!< 1x2 or 2x1 matrix
|
||||
Matx(_Tp v0, _Tp v1, _Tp v2); //!< 1x3 or 3x1 matrix
|
||||
Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3); //!< 1x4, 2x2 or 4x1 matrix
|
||||
@@ -141,9 +140,7 @@ public:
|
||||
_Tp v12, _Tp v13, _Tp v14, _Tp v15); //!< 1x16, 4x4 or 16x1 matrix
|
||||
explicit Matx(const _Tp* vals); //!< initialize from a plain array
|
||||
|
||||
#ifdef CV_CXX11
|
||||
Matx(std::initializer_list<_Tp>); //!< initialize from an initializer list
|
||||
#endif
|
||||
|
||||
static Matx all(_Tp alpha);
|
||||
static Matx zeros();
|
||||
@@ -166,7 +163,7 @@ public:
|
||||
template<int m1, int n1> Matx<_Tp, m1, n1> reshape() const;
|
||||
|
||||
//! extract part of the matrix
|
||||
template<int m1, int n1> Matx<_Tp, m1, n1> get_minor(int i, int j) const;
|
||||
template<int m1, int n1> Matx<_Tp, m1, n1> get_minor(int base_row, int base_col) const;
|
||||
|
||||
//! extract the matrix row
|
||||
Matx<_Tp, 1, n> row(int i) const;
|
||||
@@ -194,8 +191,8 @@ public:
|
||||
Matx<_Tp, m, n> div(const Matx<_Tp, m, n>& a) const;
|
||||
|
||||
//! element access
|
||||
const _Tp& operator ()(int i, int j) const;
|
||||
_Tp& operator ()(int i, int j);
|
||||
const _Tp& operator ()(int row, int col) const;
|
||||
_Tp& operator ()(int row, int col);
|
||||
|
||||
//! 1D element access
|
||||
const _Tp& operator ()(int i) const;
|
||||
@@ -361,9 +358,7 @@ public:
|
||||
Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13); //!< 14-element vector constructor
|
||||
explicit Vec(const _Tp* values);
|
||||
|
||||
#ifdef CV_CXX11
|
||||
Vec(std::initializer_list<_Tp>);
|
||||
#endif
|
||||
|
||||
Vec(const Vec<_Tp, cn>& v);
|
||||
|
||||
@@ -665,7 +660,6 @@ Matx<_Tp, m, n>::Matx(const _Tp* values)
|
||||
for( int i = 0; i < channels; i++ ) val[i] = values[i];
|
||||
}
|
||||
|
||||
#ifdef CV_CXX11
|
||||
template<typename _Tp, int m, int n> inline
|
||||
Matx<_Tp, m, n>::Matx(std::initializer_list<_Tp> list)
|
||||
{
|
||||
@@ -676,7 +670,6 @@ Matx<_Tp, m, n>::Matx(std::initializer_list<_Tp> list)
|
||||
val[i++] = elem;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename _Tp, int m, int n> inline
|
||||
Matx<_Tp, m, n> Matx<_Tp, m, n>::all(_Tp alpha)
|
||||
@@ -749,13 +742,13 @@ Matx<_Tp, m1, n1> Matx<_Tp, m, n>::reshape() const
|
||||
|
||||
template<typename _Tp, int m, int n>
|
||||
template<int m1, int n1> inline
|
||||
Matx<_Tp, m1, n1> Matx<_Tp, m, n>::get_minor(int i, int j) const
|
||||
Matx<_Tp, m1, n1> Matx<_Tp, m, n>::get_minor(int base_row, int base_col) const
|
||||
{
|
||||
CV_DbgAssert(0 <= i && i+m1 <= m && 0 <= j && j+n1 <= n);
|
||||
CV_DbgAssert(0 <= base_row && base_row+m1 <= m && 0 <= base_col && base_col+n1 <= n);
|
||||
Matx<_Tp, m1, n1> s;
|
||||
for( int di = 0; di < m1; di++ )
|
||||
for( int dj = 0; dj < n1; dj++ )
|
||||
s(di, dj) = (*this)(i+di, j+dj);
|
||||
s(di, dj) = (*this)(base_row+di, base_col+dj);
|
||||
return s;
|
||||
}
|
||||
|
||||
@@ -786,17 +779,17 @@ typename Matx<_Tp, m, n>::diag_type Matx<_Tp, m, n>::diag() const
|
||||
}
|
||||
|
||||
template<typename _Tp, int m, int n> inline
|
||||
const _Tp& Matx<_Tp, m, n>::operator()(int i, int j) const
|
||||
const _Tp& Matx<_Tp, m, n>::operator()(int row_idx, int col_idx) const
|
||||
{
|
||||
CV_DbgAssert( (unsigned)i < (unsigned)m && (unsigned)j < (unsigned)n );
|
||||
return this->val[i*n + j];
|
||||
CV_DbgAssert( (unsigned)row_idx < (unsigned)m && (unsigned)col_idx < (unsigned)n );
|
||||
return this->val[row_idx*n + col_idx];
|
||||
}
|
||||
|
||||
template<typename _Tp, int m, int n> inline
|
||||
_Tp& Matx<_Tp, m, n>::operator ()(int i, int j)
|
||||
_Tp& Matx<_Tp, m, n>::operator ()(int row_idx, int col_idx)
|
||||
{
|
||||
CV_DbgAssert( (unsigned)i < (unsigned)m && (unsigned)j < (unsigned)n );
|
||||
return val[i*n + j];
|
||||
CV_DbgAssert( (unsigned)row_idx < (unsigned)m && (unsigned)col_idx < (unsigned)n );
|
||||
return val[row_idx*n + col_idx];
|
||||
}
|
||||
|
||||
template<typename _Tp, int m, int n> inline
|
||||
@@ -1019,11 +1012,9 @@ template<typename _Tp, int cn> inline
|
||||
Vec<_Tp, cn>::Vec(const _Tp* values)
|
||||
: Matx<_Tp, cn, 1>(values) {}
|
||||
|
||||
#ifdef CV_CXX11
|
||||
template<typename _Tp, int cn> inline
|
||||
Vec<_Tp, cn>::Vec(std::initializer_list<_Tp> list)
|
||||
: Matx<_Tp, cn, 1>(list) {}
|
||||
#endif
|
||||
|
||||
template<typename _Tp, int cn> inline
|
||||
Vec<_Tp, cn>::Vec(const Vec<_Tp, cn>& m)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user