diff --git a/.travis.yml b/.travis.yml
index e169a021..200c5e90 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,7 +7,6 @@ branches:
   only:
     - master
     - develop
-    - feature/code_cleanup
 compiler:
   - gcc
 
@@ -46,7 +45,7 @@ before_install:
   - if [ "$CXX" = "g++" ]; then
       if [ ${TRAVIS_OS_NAME} = linux ]; then
         $CXX --version;
-        sudo cmake -D CMAKE_BUILD_TYPE=RELEASE -D WITH_V4L=ON -D WITH_OPENCL=OFF -D INSTALL_C_EXAMPLES=OFF -D BUILD_EXAMPLES=OFF -D BUILD_TESTS=OFF -D BUILD_PERF_TESTS=OFF -D BUILD_EXAMPLES=OFF -D INSTALL_PYTHON_EXAMPLES=OFF ..;
+        sudo cmake -D CMAKE_BUILD_TYPE=RELEASE -D WITH_V4L=ON -D WITH_OPENCL=OFF -D INSTALL_C_EXAMPLES=OFF -D WITH_TBB=ON -D BUILD_EXAMPLES=OFF -D BUILD_TESTS=OFF -D BUILD_PERF_TESTS=OFF -D BUILD_EXAMPLES=OFF -D INSTALL_PYTHON_EXAMPLES=OFF ..;
         sudo make -j4;
         sudo make install;
         cd ../..;
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 734665d6..d965b766 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -48,25 +48,6 @@ else()
     MESSAGE(FATAL_ERROR "Boost not found in the system.")
 endif()
 
-# Try finding TBB in default location
-find_package( TBB CONFIG )
-
-# If not found, use FindTBB.cmake
-if ("${TBB_LIBRARIES}" STREQUAL "")
-	MESSAGE("TBB not found in CONFIG, searching with FindTBB.cmake.") 
-    find_package( TBB REQUIRED )
-	if ("${TBB_LIBRARIES}" STREQUAL "")
-		MESSAGE(FATAL_ERROR "TBB not found")
-	endif()
-else()
-	MESSAGE("TBB found in CONFIG: ${TBB_LIBRARIES}") 
-endif()
-
-MESSAGE("TBB information:")
-MESSAGE("  TBB_VERSION: ${TBB_VERSION}")
-MESSAGE("  TBB_INCLUDE_DIRS: ${TBB_INCLUDE_DIRS}")
-MESSAGE("  TBB_LIBRARIES: ${TBB_LIBRARIES}")
-
 # Move LandmarkDetector model
 file(GLOB files "lib/local/LandmarkDetector/model/*.txt")
 foreach(file ${files})
diff --git a/README.md b/README.md
index 659b68ab..c900028c 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# OpenFace 2.0.6: an open source facial behavior analysis toolkit
+# OpenFace 2.1.0: an open source facial behavior analysis toolkit
 
 [![Build Status](https://travis-ci.org/TadasBaltrusaitis/OpenFace.svg?branch=master)](https://travis-ci.org/TadasBaltrusaitis/OpenFace)
 [![Build status](https://ci.appveyor.com/api/projects/status/8msiklxfbhlnsmxp/branch/master?svg=true)](https://ci.appveyor.com/project/TadasBaltrusaitis/openface/branch/master)
@@ -100,6 +100,6 @@ I did my best to make sure that the code runs out of the box but there are alway
 
 Copyright can be found in the Copyright.txt
 
-You have to respect boost, TBB, dlib, OpenBLAS, and OpenCV licenses.
+You have to respect boost, dlib, OpenBLAS, and OpenCV licenses.
 
 Furthermore you have to respect the licenses of the datasets used for model training - https://github.com/TadasBaltrusaitis/OpenFace/wiki/Datasets
diff --git a/appveyor.yml b/appveyor.yml
index df31985e..539e3164 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -3,7 +3,6 @@ branches:
   only:
   - develop
   - master
-  - feature/code_cleanup
 max_jobs: 4  
 configuration:
 - Release
diff --git a/download_models.ps1 b/download_models.ps1
index f7463f28..f818337f 100644
--- a/download_models.ps1
+++ b/download_models.ps1
@@ -1,30 +1,42 @@
-# Download the models from the cloud (stored in Dropbox, OneDrive, and Google Drive
+# Download the models from the cloud (stored in Dropbox, OneDrive, and Google Drive)
+
+# Determine correct path to the model files
+if([System.IO.Directory]::Exists( (Join-Path (Get-Location) 'lib') ))
+{
+    # If the lib folder exists, code is compiled from source
+    $modelPath = "lib/local/LandmarkDetector/"
+}
+else
+{
+    # Otherwise, binaries are used
+    $modelPath = ""
+}
 
 # Start with 0.25 scale models
-$destination = "lib/local/LandmarkDetector/model/patch_experts/cen_patches_0.25_of.dat"
+$destination = $modelPath + "model/patch_experts/cen_patches_0.25_of.dat"
 
-if(!([System.IO.File]::Exists($destination)))
+if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
 {
 	$source = "https://www.dropbox.com/s/7na5qsjzz8yfoer/cen_patches_0.25_of.dat?dl=1"
 	Invoke-WebRequest $source -OutFile $destination
 }
 
-if(!([System.IO.File]::Exists($destination)))
+if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
 {
 	$source = "https://onedrive.live.com/download?cid=2E2ADA578BFF6E6E&resid=2E2ADA578BFF6E6E%2153072&authkey=AKqoZtcN0PSIZH4"
 	Invoke-WebRequest $source -OutFile $destination
 }
 
 # 0.35 scale models
-$destination = "lib/local/LandmarkDetector/model/patch_experts/cen_patches_0.35_of.dat"
+$destination = $modelPath + "model/patch_experts/cen_patches_0.35_of.dat"
 
-if(!([System.IO.File]::Exists($destination)))
+if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
 {
 	$source = "https://www.dropbox.com/s/k7bj804cyiu474t/cen_patches_0.35_of.dat?dl=1"
 	Invoke-WebRequest $source -OutFile $destination
 }
 
-if(!([System.IO.File]::Exists($destination)))
+if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
 {
 	$source = "https://onedrive.live.com/download?cid=2E2ADA578BFF6E6E&resid=2E2ADA578BFF6E6E%2153079&authkey=ANpDR1n3ckL_0gs"
 	Invoke-WebRequest $source -OutFile $destination
@@ -32,30 +44,30 @@ if(!([System.IO.File]::Exists($destination)))
 
 
 # 0.5 scale models
-$destination = "lib/local/LandmarkDetector/model/patch_experts/cen_patches_0.50_of.dat"
+$destination = $modelPath + "model/patch_experts/cen_patches_0.50_of.dat"
 
-if(!([System.IO.File]::Exists($destination)))
+if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
 {
 	$source = "https://www.dropbox.com/s/ixt4vkbmxgab1iu/cen_patches_0.50_of.dat?dl=1"
 	Invoke-WebRequest $source -OutFile $destination
 }
 
-if(!([System.IO.File]::Exists($destination)))
+if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
 {
 	$source = "https://onedrive.live.com/download?cid=2E2ADA578BFF6E6E&resid=2E2ADA578BFF6E6E%2153074&authkey=AGi-e30AfRc_zvs"
 	Invoke-WebRequest $source -OutFile $destination
 }
 
 # 1.0 scale models
-$destination = "lib/local/LandmarkDetector/model/patch_experts/cen_patches_1.00_of.dat"
+$destination = $modelPath + "model/patch_experts/cen_patches_1.00_of.dat"
 
-if(!([System.IO.File]::Exists($destination)))
+if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
 {
 	$source = "https://www.dropbox.com/s/2t5t1sdpshzfhpj/cen_patches_1.00_of.dat?dl=1"
 	Invoke-WebRequest $source -OutFile $destination
 }
 
-if(!([System.IO.File]::Exists($destination)))
+if(!([System.IO.File]::Exists( (Join-Path (Get-Location) $destination) )))
 {
 	$source = "https://onedrive.live.com/download?cid=2E2ADA578BFF6E6E&resid=2E2ADA578BFF6E6E%2153070&authkey=AD6KjtYipphwBPc"
 	Invoke-WebRequest $source -OutFile $destination
diff --git a/exe/FaceLandmarkImg/FaceLandmarkImg.cpp b/exe/FaceLandmarkImg/FaceLandmarkImg.cpp
index 10b1eb06..58c0faec 100644
--- a/exe/FaceLandmarkImg/FaceLandmarkImg.cpp
+++ b/exe/FaceLandmarkImg/FaceLandmarkImg.cpp
@@ -38,8 +38,6 @@
 
 #include "LandmarkCoreIncludes.h"
 
-#include <tbb/tbb.h>
-
 #include <FaceAnalyser.h>
 #include <GazeEstimation.h>
 
diff --git a/exe/FaceLandmarkImg/FaceLandmarkImg.vcxproj b/exe/FaceLandmarkImg/FaceLandmarkImg.vcxproj
index e59e5312..b50d2b06 100644
--- a/exe/FaceLandmarkImg/FaceLandmarkImg.vcxproj
+++ b/exe/FaceLandmarkImg/FaceLandmarkImg.vcxproj
@@ -59,33 +59,29 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
     <Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb_d.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
     <Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb_d.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\boost\boost.props" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\boost\boost.props" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
diff --git a/exe/FaceLandmarkVid/FaceLandmarkVid.vcxproj b/exe/FaceLandmarkVid/FaceLandmarkVid.vcxproj
index 33fd71d9..76102fc9 100644
--- a/exe/FaceLandmarkVid/FaceLandmarkVid.vcxproj
+++ b/exe/FaceLandmarkVid/FaceLandmarkVid.vcxproj
@@ -59,33 +59,29 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
     <Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb_d.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
     <Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb_d.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\boost\boost.props" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\boost\boost.props" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
diff --git a/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.cpp b/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.cpp
index 423d368a..4333b913 100644
--- a/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.cpp
+++ b/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.cpp
@@ -263,13 +263,11 @@ int main(int argc, char **argv)
 
 			}
 
-			// Keep only non overlapping detections (also convert to a concurrent vector
+			// Keep only non overlapping detections (so as not to start tracking where the face is already tracked)
 			NonOverlapingDetections(face_models, face_detections);
-
-			vector<tbb::atomic<bool> > face_detections_used(face_detections.size());
+			std::vector<bool> face_detections_used(face_detections.size(), false);
 
 			// Go through every model and update the tracking
-			//tbb::parallel_for(0, (int)face_models.size(), [&](int model) {
 			for (unsigned int model = 0; model < face_models.size(); ++model)
 			{
 
@@ -288,9 +286,10 @@ int main(int argc, char **argv)
 
 					for (size_t detection_ind = 0; detection_ind < face_detections.size(); ++detection_ind)
 					{
-						// if it was not taken by another tracker take it (if it is false swap it to true and enter detection, this makes it parallel safe)
-						if (face_detections_used[detection_ind].compare_and_swap(true, false) == false)
+						// if it was not taken by another tracker take it
+						if (!face_detections_used[detection_ind])
 						{
+							face_detections_used[detection_ind] = true;
 
 							// Reinitialise the model
 							face_models[model].Reset();
@@ -314,7 +313,6 @@ int main(int argc, char **argv)
 					detection_success = LandmarkDetector::DetectLandmarksInVideo(rgb_image, face_models[model], det_parameters[model], grayscale_image);
 				}
 			}
-			//});
 
 			// Keeping track of FPS
 			fps_tracker.AddFrame();
diff --git a/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.vcxproj b/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.vcxproj
index 3d68ce0f..6be02c90 100644
--- a/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.vcxproj
+++ b/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.vcxproj
@@ -58,33 +58,29 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
     <Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb_d.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
     <Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb_d.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\boost\boost.props" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\boost\boost.props" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
diff --git a/exe/FeatureExtraction/FeatureExtraction.vcxproj b/exe/FeatureExtraction/FeatureExtraction.vcxproj
index 992ac525..e2e53ee6 100644
--- a/exe/FeatureExtraction/FeatureExtraction.vcxproj
+++ b/exe/FeatureExtraction/FeatureExtraction.vcxproj
@@ -58,33 +58,29 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
     <Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb_d.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
     <Import Project="..\..\lib\3rdParty\boost\boost_d.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb_d.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\boost\boost.props" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\lib\3rdParty\boost\boost.props" />
     <Import Project="..\..\lib\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\lib\3rdParty\tbb\tbb.props" />
-    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\lib\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\lib\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
diff --git a/exe/releases/package_windows_executables.m b/exe/releases/package_windows_executables.m
index 1af9795a..b362b124 100644
--- a/exe/releases/package_windows_executables.m
+++ b/exe/releases/package_windows_executables.m
@@ -1,5 +1,5 @@
 clear;
-version = '2.0.6';
+version = '2.1.0';
 
 out_x86 = sprintf('OpenFace_%s_win_x86', version);
 out_x64 = sprintf('OpenFace_%s_win_x64', version);
diff --git a/lib/3rdParty/OpenBLAS/OpenBLAS_64.props b/lib/3rdParty/OpenBLAS/OpenBLAS_64.props
new file mode 100644
index 00000000..4b9bb69f
--- /dev/null
+++ b/lib/3rdParty/OpenBLAS/OpenBLAS_64.props
@@ -0,0 +1,20 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ImportGroup Label="PropertySheets" />
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup />
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <AdditionalIncludeDirectories>$(SolutionDir)lib\3rdParty\OpenBLAS\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <AdditionalLibraryDirectories>$(SolutionDir)lib\3rdParty\OpenBLAS\lib\$(PlatformShortName);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>openblas.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <PreBuildEvent />
+    <PreLinkEvent>
+      <Command>xcopy /I /E /Y /D /C "$(SolutionDir)lib\3rdParty\OpenBlas\bin\$(PlatformShortName)" "$(OutDir)"</Command>
+    </PreLinkEvent>
+  </ItemDefinitionGroup>
+  <ItemGroup />
+</Project>
\ No newline at end of file
diff --git a/lib/3rdParty/OpenBLAS/OpenBLAS.props b/lib/3rdParty/OpenBLAS/OpenBLAS_x86.props
similarity index 100%
rename from lib/3rdParty/OpenBLAS/OpenBLAS.props
rename to lib/3rdParty/OpenBLAS/OpenBLAS_x86.props
diff --git a/lib/3rdParty/OpenBLAS/bin/x64/flang.dll b/lib/3rdParty/OpenBLAS/bin/x64/flang.dll
new file mode 100644
index 00000000..31456a91
Binary files /dev/null and b/lib/3rdParty/OpenBLAS/bin/x64/flang.dll differ
diff --git a/lib/3rdParty/OpenBLAS/bin/x64/flangrti.dll b/lib/3rdParty/OpenBLAS/bin/x64/flangrti.dll
new file mode 100644
index 00000000..5f2103f7
Binary files /dev/null and b/lib/3rdParty/OpenBLAS/bin/x64/flangrti.dll differ
diff --git a/lib/3rdParty/OpenBLAS/bin/x64/libgcc_s_seh-1.dll b/lib/3rdParty/OpenBLAS/bin/x64/libgcc_s_seh-1.dll
deleted file mode 100644
index 500f7a5c..00000000
Binary files a/lib/3rdParty/OpenBLAS/bin/x64/libgcc_s_seh-1.dll and /dev/null differ
diff --git a/lib/3rdParty/OpenBLAS/bin/x64/libgfortran-3.dll b/lib/3rdParty/OpenBLAS/bin/x64/libgfortran-3.dll
deleted file mode 100644
index 23136a90..00000000
Binary files a/lib/3rdParty/OpenBLAS/bin/x64/libgfortran-3.dll and /dev/null differ
diff --git a/lib/3rdParty/OpenBLAS/bin/x64/libomp.dll b/lib/3rdParty/OpenBLAS/bin/x64/libomp.dll
new file mode 100644
index 00000000..38a1658e
Binary files /dev/null and b/lib/3rdParty/OpenBLAS/bin/x64/libomp.dll differ
diff --git a/lib/3rdParty/OpenBLAS/bin/x64/libopenblas.dll b/lib/3rdParty/OpenBLAS/bin/x64/libopenblas.dll
deleted file mode 100644
index 6bc5c2d4..00000000
Binary files a/lib/3rdParty/OpenBLAS/bin/x64/libopenblas.dll and /dev/null differ
diff --git a/lib/3rdParty/OpenBLAS/bin/x64/libquadmath-0.dll b/lib/3rdParty/OpenBLAS/bin/x64/libquadmath-0.dll
deleted file mode 100644
index 028a3c34..00000000
Binary files a/lib/3rdParty/OpenBLAS/bin/x64/libquadmath-0.dll and /dev/null differ
diff --git a/lib/3rdParty/OpenBLAS/bin/x64/openblas.dll b/lib/3rdParty/OpenBLAS/bin/x64/openblas.dll
new file mode 100644
index 00000000..09a28aa8
Binary files /dev/null and b/lib/3rdParty/OpenBLAS/bin/x64/openblas.dll differ
diff --git a/lib/3rdParty/OpenBLAS/lib/x64/libopenblas.dll.a b/lib/3rdParty/OpenBLAS/lib/x64/libopenblas.dll.a
deleted file mode 100644
index 19e9da2f..00000000
Binary files a/lib/3rdParty/OpenBLAS/lib/x64/libopenblas.dll.a and /dev/null differ
diff --git a/lib/3rdParty/OpenBLAS/lib/x64/openblas.lib b/lib/3rdParty/OpenBLAS/lib/x64/openblas.lib
new file mode 100644
index 00000000..c477c5f8
Binary files /dev/null and b/lib/3rdParty/OpenBLAS/lib/x64/openblas.lib differ
diff --git a/lib/3rdParty/OpenBLAS/readme_2.txt b/lib/3rdParty/OpenBLAS/readme_2.txt
index dde99583..a05b7a79 100644
--- a/lib/3rdParty/OpenBLAS/readme_2.txt
+++ b/lib/3rdParty/OpenBLAS/readme_2.txt
@@ -1,8 +1,8 @@
 ﻿Using the following OpenBLAS implementation - https://github.com/xianyi/OpenBLAS
 
 The pre-compiled library was acquired from (64 bit version):
-https://sourceforge.net/projects/openblas/files/v0.2.19/ 
-File - OpenBLAS-v0.2.19-Win64-int32.zip
+Manually building OpenBLAS 0.3.3 from code using Miniconda - https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio
+version - https://github.com/xianyi/OpenBLAS/archive/v0.3.3.zip
 
 The pre-compiled library was acquired from (32 bit version):
 https://sourceforge.net/projects/openblas/files/v0.2.15/ 
diff --git a/lib/3rdParty/tbb/CHANGES b/lib/3rdParty/tbb/CHANGES
deleted file mode 100644
index ab1d1a13..00000000
--- a/lib/3rdParty/tbb/CHANGES
+++ /dev/null
@@ -1,2359 +0,0 @@
-------------------------------------------------------------------------
-The list of most significant changes made over time in
-Intel(R) Threading Building Blocks (Intel(R) TBB).
-
-Intel TBB 2017 Update 7
-TBB_INTERFACE_VERSION == 9107
-
-Changes (w.r.t. Intel TBB 2017 Update 6):
-
-- In the huge pages mode, the memory allocator now is also able to use
-    transparent huge pages.
-
-Preview Features:
-
-- Added support for Intel TBB integration into CMake-aware
-    projects, with valuable guidance and feedback provided by Brad King
-    (Kitware).
-
-Bugs fixed:
-
-- Fixed scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, 0)
-    to process memory left after exited threads.
-
-------------------------------------------------------------------------
-Intel TBB 2017 Update 6
-TBB_INTERFACE_VERSION == 9106
-
-Changes (w.r.t. Intel TBB 2017 Update 5):
-
-- Added support for Android* NDK r14.
-
-Preview Features:
-
-- Added a blocking terminate extension to the task_scheduler_init class
-    that allows an object to wait for termination of worker threads.
-
-Bugs fixed:
-
-- Fixed compilation and testing issues with MinGW (GCC 6).
-- Fixed compilation with /std:c++latest option of VS 2017
-    (https://github.com/01org/tbb/issues/13).
-
-------------------------------------------------------------------------
-Intel TBB 2017 Update 5
-TBB_INTERFACE_VERSION == 9105
-
-Changes (w.r.t. Intel TBB 2017 Update 4):
-
-- Added support for Microsoft* Visual Studio* 2017.
-- Added graph/matmult example to demonstrate support for compute offload
-    to Intel(R) Graphics Technology in the flow graph API.
-- The "compiler" build option now allows to specify a full path to the
-    compiler.
-
-Changes affecting backward compatibility:
-
-- Constructors for many classes, including graph nodes, concurrent
-    containers, thread-local containers, etc., are declared explicit and
-    cannot be used for implicit conversions anymore.
-
-Bugs fixed:
-
-- Added a workaround for bug 16657 in the GNU C Library (glibc)
-    affecting the debug version of tbb::mutex.
-- Fixed a crash in pool_identify() called for an object allocated in
-    another thread.
-
-------------------------------------------------------------------------
-Intel TBB 2017 Update 4
-TBB_INTERFACE_VERSION == 9104
-
-Changes (w.r.t. Intel TBB 2017 Update 3):
-
-- Added support for C++11 move semantics in parallel_do.
-- Added support for FreeBSD* 11.
-
-Changes affecting backward compatibility:
-
-- Minimal compiler versions required for support of C++11 move semantics
-    raised to GCC 4.5, VS 2012, and Intel(R) C++ Compiler 14.0.
-
-Bugs fixed:
-
-- The workaround for crashes in the library compiled with GCC 6
-    (-flifetime-dse=1) was extended to Windows*.
-
-------------------------------------------------------------------------
-Intel TBB 2017 Update 3
-TBB_INTERFACE_VERSION == 9103
-
-Changes (w.r.t. Intel TBB 2017 Update 2):
-
-- Added support for Android* 7.0 and Android* NDK r13, r13b.
-
-Preview Features:
-
-- Added template class gfx_factory to the flow graph API. It implements
-    the Factory concept for streaming_node to offload computations to
-    Intel(R) processor graphics.
-
-Bugs fixed:
-
-- Fixed a possible deadlock caused by missed wakeup signals in
-    task_arena::execute().
-
-Open-source contributions integrated:
-
-- A build fix for Linux* s390x platform by Jerry J.
-
-------------------------------------------------------------------------
-Intel TBB 2017 Update 2
-TBB_INTERFACE_VERSION == 9102
-
-Changes (w.r.t. Intel TBB 2017 Update 1):
-
-- Removed the long-outdated support for Xbox* consoles.
-
-Bugs fixed:
-
-- Fixed the issue with task_arena::execute() not being processed when
-    the calling thread cannot join the arena.
-- Fixed dynamic memory allocation replacement failure on macOS* 10.12.
-
-------------------------------------------------------------------------
-Intel TBB 2017 Update 1
-TBB_INTERFACE_VERSION == 9101
-
-Changes (w.r.t. Intel TBB 2017):
-
-Bugs fixed:
-
-- Fixed dynamic memory allocation replacement failures on Windows* 10
-    Anniversary Update.
-- Fixed emplace() method of concurrent unordered containers to not
-    require a copy constructor.
-
-------------------------------------------------------------------------
-Intel TBB 2017
-TBB_INTERFACE_VERSION == 9100
-
-Changes (w.r.t. Intel TBB 4.4 Update 5):
-
-- static_partitioner class is now a fully supported feature.
-- async_node class is now a fully supported feature.
-- Improved dynamic memory allocation replacement on Windows* OS to skip
-    DLLs for which replacement cannot be done, instead of aborting.
-- Intel TBB no longer performs dynamic memory allocation replacement
-    for Microsoft* Visual Studio* 2008.
-- For 64-bit platforms, quadrupled the worst-case limit on the amount
-    of memory the Intel TBB allocator can handle.
-- Added TBB_USE_GLIBCXX_VERSION macro to specify the version of GNU
-    libstdc++ when it cannot be properly recognized, e.g. when used
-    with Clang on Linux* OS. Inspired by a contribution from David A.
-- Added graph/stereo example to demostrate tbb::flow::async_msg.
-- Removed a few cases of excessive user data copying in the flow graph.
-- Reworked split_node to eliminate unnecessary overheads.
-- Added support for C++11 move semantics to the argument of
-    tbb::parallel_do_feeder::add() method.
-- Added C++11 move constructor and assignment operator to
-    tbb::combinable template class.
-- Added tbb::this_task_arena::max_concurrency() function and
-    max_concurrency() method of class task_arena returning the maximal
-    number of threads that can work inside an arena.
-- Deprecated tbb::task_arena::current_thread_index() static method;
-    use tbb::this_task_arena::current_thread_index() function instead.
-- All examples for commercial version of library moved online:
-    https://software.intel.com/en-us/product-code-samples. Examples are
-    available as a standalone package or as a part of Intel(R) Parallel
-    Studio XE or Intel(R) System Studio Online Samples packages.
-
-Changes affecting backward compatibility:
-
-- Renamed following methods and types in async_node class:
-    Old                   New
-    async_gateway_type => gateway_type
-    async_gateway()    => gateway()
-    async_try_put()    => try_put()
-    async_reserve()    => reserve_wait()
-    async_commit()     => release_wait()
-- Internal layout of some flow graph nodes has changed; recompilation
-    is recommended for all binaries that use the flow graph.
-
-Preview Features:
-
-- Added template class streaming_node to the flow graph API. It allows
-    a flow graph to offload computations to other devices through
-    streaming or offloading APIs.
-- Template class opencl_node reimplemented as a specialization of
-    streaming_node that works with OpenCL*.
-- Added tbb::this_task_arena::isolate() function to isolate execution
-    of a group of tasks or an algorithm from other tasks submitted
-    to the scheduler.
-
-Bugs fixed:
-
-- Added a workaround for GCC bug #62258 in std::rethrow_exception()
-    to prevent possible problems in case of exception propagation.
-- Fixed parallel_scan to provide correct result if the initial value
-    of an accumulator is not the operation identity value.
-- Fixed a memory corruption in the memory allocator when it meets
-    internal limits.
-- Fixed the memory allocator on 64-bit platforms to align memory
-    to 16 bytes by default for all allocations bigger than 8 bytes.
-- As a workaround for crashes in the Intel TBB library compiled with
-    GCC 6, added -flifetime-dse=1 to compilation options on Linux* OS.
-- Fixed a race in the flow graph implementation.
-
-Open-source contributions integrated:
-
-- Enabling use of C++11 'override' keyword by Raf Schietekat.
-
-------------------------------------------------------------------------
-Intel TBB 4.4 Update 6
-TBB_INTERFACE_VERSION == 9006
-
-Changes (w.r.t. Intel TBB 4.4 Update 5):
-
-- For 64-bit platforms, quadrupled the worst-case limit on the amount
-    of memory the Intel TBB allocator can handle.
-
-Bugs fixed:
-
-- Fixed a memory corruption in the memory allocator when it meets
-    internal limits.
-- Fixed the memory allocator on 64-bit platforms to align memory
-    to 16 bytes by default for all allocations bigger than 8 bytes.
-- Fixed parallel_scan to provide correct result if the initial value
-    of an accumulator is not the operation identity value.
-- As a workaround for crashes in the Intel TBB library compiled with
-    GCC 6, added -flifetime-dse=1 to compilation options on Linux* OS.
-
-------------------------------------------------------------------------
-Intel TBB 4.4 Update 5
-TBB_INTERFACE_VERSION == 9005
-
-Changes (w.r.t. Intel TBB 4.4 Update 4):
-
-- Modified graph/fgbzip2 example to remove unnecessary data queuing.
-
-Preview Features:
-
-- Added a Python* module which is able to replace Python's thread pool
-    class with the implementation based on Intel TBB task scheduler.
-
-Bugs fixed:
-
-- Fixed the implementation of 64-bit tbb::atomic for IA-32 architecture
-    to work correctly with GCC 5.2 in C++11/14 mode.
-- Fixed a possible crash when tasks with affinity (e.g. specified via
-    affinity_partitioner) are used simultaneously with task priority
-    changes.
-
-------------------------------------------------------------------------
-Intel TBB 4.4 Update 4
-TBB_INTERFACE_VERSION == 9004
-
-Changes (w.r.t. Intel TBB 4.4 Update 3):
-
-- Removed a few cases of excessive user data copying in the flow graph.
-- Improved robustness of concurrent_bounded_queue::abort() in case of
-    simultaneous push and pop operations.
-
-Preview Features:
-
-- Added tbb::flow::async_msg, a special message type to support
-    communications between the flow graph and external asynchronous
-    activities.
-- async_node modified to support use with C++03 compilers.
-
-Bugs fixed:
-
-- Fixed a bug in dynamic memory allocation replacement for Windows* OS.
-- Fixed excessive memory consumption on Linux* OS caused by enabling
-    zero-copy realloc.
-- Fixed performance regression on Intel(R) Xeon Phi(tm) coprocessor with
-    auto_partitioner.
-
-------------------------------------------------------------------------
-Intel TBB 4.4 Update 3
-TBB_INTERFACE_VERSION == 9003
-
-Changes (w.r.t. Intel TBB 4.4 Update 2):
-
-- Modified parallel_sort to not require a default constructor for values
-    and to use iter_swap() for value swapping.
-- Added support for creating or initializing a task_arena instance that
-    is connected to the arena currently used by the thread.
-- graph/binpack example modified to use multifunction_node.
-- For performance analysis, use Intel(R) VTune(TM) Amplifier XE 2015
-    and higher; older versions are no longer supported.
-- Improved support for compilation with disabled RTTI, by omitting its use
-    in auxiliary code, such as assertions. However some functionality,
-    particularly the flow graph, does not work if RTTI is disabled.
-- The tachyon example for Android* can be built using Android Studio 1.5
-    and higher with experimental Gradle plugin 0.4.0.
-
-Preview Features:
-
-- Added class opencl_subbufer that allows using OpenCL* sub-buffer
-    objects with opencl_node.
-- Class global_control supports the value of 1 for
-    max_allowed_parallelism.
-
-Bugs fixed:
-
-- Fixed a race causing "TBB Warning: setaffinity syscall failed" message.
-- Fixed a compilation issue on OS X* with Intel(R) C++ Compiler 15.0.
-- Fixed a bug in queuing_rw_mutex::downgrade() that could temporarily
-    block new readers.
-- Fixed speculative_spin_rw_mutex to stop using the lazy subscription
-    technique due to its known flaws.
-- Fixed memory leaks in the tool support code.
-
-------------------------------------------------------------------------
-Intel TBB 4.4 Update 2
-TBB_INTERFACE_VERSION == 9002
-
-Changes (w.r.t. Intel TBB 4.4 Update 1):
-
-- Improved interoperability with Intel(R) OpenMP RTL (libiomp) on Linux:
-    OpenMP affinity settings do not affect the default number of threads
-    used in the task scheduler. Intel(R) C++ Compiler 16.0 Update 1
-    or later is required.
-- Added a new flow graph example with different implementations of the
-    Cholesky Factorization algorithm.
-
-Preview Features:
-
-- Added template class opencl_node to the flow graph API. It allows a
-    flow graph to offload computations to OpenCL* devices.
-- Extended join_node to use type-specified message keys. It simplifies
-    the API of the node by obtaining message keys via functions
-    associated with the message type (instead of node ports).
-- Added static_partitioner that minimizes overhead of parallel_for and
-    parallel_reduce for well-balanced workloads.
-- Improved template class async_node in the flow graph API to support
-    user settable concurrency limits.
-
-Bugs fixed:
-
-- Fixed a possible crash in the GUI layer for library examples on Linux.
-
-------------------------------------------------------------------------
-Intel TBB 4.4 Update 1
-TBB_INTERFACE_VERSION == 9001
-
-Changes (w.r.t. Intel TBB 4.4):
-
-- Added support for Microsoft* Visual Studio* 2015.
-- Intel TBB no longer performs dynamic replacement of memory allocation
-    functions for Microsoft Visual Studio 2005 and earlier versions.
-- For GCC 4.7 and higher, the intrinsics-based platform isolation layer
-    uses __atomic_* built-ins instead of the legacy __sync_* ones.
-    This change is inspired by a contribution from Mathieu Malaterre.
-- Improvements in task_arena:
-    Several application threads may join a task_arena and execute tasks
-    simultaneously. The amount of concurrency reserved for application
-    threads at task_arena construction can be set to any value between
-    0 and the arena concurrency limit.
-- The fractal example was modified to demonstrate class task_arena
-    and moved to examples/task_arena/fractal.
-
-Bugs fixed:
-
-- Fixed a deadlock during destruction of task_scheduler_init objects
-    when one of destructors is set to wait for worker threads.
-- Added a workaround for a possible crash on OS X* when dynamic memory
-    allocator replacement (libtbbmalloc_proxy) is used and memory is
-    released during application startup.
-- Usage of mutable functors with task_group::run_and_wait() and
-    task_arena::enqueue() is disabled. An attempt to pass a functor
-    which operator()() is not const will produce compilation errors.
-- Makefiles and environment scripts now properly recognize GCC 5.0 and
-    higher.
-
-Open-source contributions integrated:
-
-- Improved performance of parallel_for_each for inputs allowing random
-    access, by Raf Schietekat.
-
-------------------------------------------------------------------------
-Intel TBB 4.4
-TBB_INTERFACE_VERSION == 9000
-
-Changes (w.r.t. Intel TBB 4.3 Update 6):
-
-- The following features are now fully supported:
-    tbb::flow::composite_node;
-    additional policies of tbb::flow::graph_node::reset().
-- Platform abstraction layer for Windows* OS updated to use compiler
-    intrinsics for most atomic operations.
-- The tbb/compat/thread header updated to automatically include
-    C++11 <thread> where available.
-- Fixes and refactoring in the task scheduler and class task_arena.
-- Added key_matching policy to tbb::flow::join_node, which removes
-    the restriction on the type that can be compared-against.
-- For tag_matching join_node, tag_value is redefined to be 64 bits
-    wide on all architectures.
-- Expanded the documentation for the flow graph with details about
-    node semantics and behavior.
-- Added dynamic replacement of C11 standard function aligned_alloc()
-    under Linux* OS.
-- Added C++11 move constructors and assignment operators to
-    tbb::enumerable_thread_specific container.
-- Added hashing support for tbb::tbb_thread::id.
-- On OS X*, binaries that depend on libstdc++ are not provided anymore.
-    In the makefiles, libc++ is now used by default; for building with
-    libstdc++, specify stdlib=libstdc++ in the make command line.
-
-Preview Features:
-
-- Added a new example, graph/fgbzip2, that shows usage of
-    tbb::flow::async_node.
-- Modification to the low-level API for memory pools:
-    added a function for finding a memory pool by an object allocated
-    from that pool.
-- tbb::memory_pool now does not request memory till the first allocation
-    from the pool.
-
-Changes affecting backward compatibility:
-
-- Internal layout of flow graph nodes has changed; recompilation is
-    recommended for all binaries that use the flow graph.
-- Resetting a tbb::flow::source_node will immediately activate it,
-    unless it was created in inactive state.
-
-Bugs fixed:
-
-- Failure at creation of a memory pool will not cause process
-    termination anymore.
-
-Open-source contributions integrated:
-
-- Supported building TBB with Clang on AArch64 with use of built-in
-    intrinsics by David A.
-
-------------------------------------------------------------------------
-Intel TBB 4.3 Update 6
-TBB_INTERFACE_VERSION == 8006
-
-Changes (w.r.t. Intel TBB 4.3 Update 5):
-
-- Supported zero-copy realloc for objects >1MB under Linux* via
-    mremap system call.
-- C++11 move-aware insert and emplace methods have been added to
-    concurrent_hash_map container.
-- install_name is set to @rpath/<library name> on OS X*.
-
-Preview Features:
-
-- Added template class async_node to the flow graph API. It allows a
-    flow graph to communicate with an external activity managed by
-    the user or another runtime.
-- Improved speed of flow::graph::reset() clearing graph edges.
-    rf_extract flag has been renamed rf_clear_edges.
-- extract() method of graph nodes now takes no arguments.
-
-Bugs fixed:
-
-- concurrent_unordered_{set,map} behaves correctly for degenerate
-    hashes.
-- Fixed a race condition in the memory allocator that may lead to
-    excessive memory consumption under high multithreading load.
-
-------------------------------------------------------------------------
-Intel TBB 4.3 Update 5
-TBB_INTERFACE_VERSION == 8005
-
-Changes (w.r.t. Intel TBB 4.3 Update 4):
-
-- Added add_ref_count() method of class tbb::task.
-
-Preview Features:
-
-- Added class global_control for application-wide control of allowed
-    parallelism and thread stack size.
-- memory_pool_allocator now throws the std::bad_alloc exception on
-    allocation failure.
-- Exceptions thrown for by memory pool constructors changed from
-    std::bad_alloc to std::invalid_argument and std::runtime_error.
-
-Bugs fixed:
-
-- scalable_allocator now throws the std::bad_alloc exception on
-    allocation failure.
-- Fixed a race condition in the memory allocator that may lead to
-    excessive memory consumption under high multithreading load.
-- A new scheduler created right after destruction of the previous one
-    might be unable to modify the number of worker threads.
-
-Open-source contributions integrated:
-
-- (Added but not enabled) push_front() method of class tbb::task_list
-    by Raf Schietekat.
-
-------------------------------------------------------------------------
-Intel TBB 4.3 Update 4
-TBB_INTERFACE_VERSION == 8004
-
-Changes (w.r.t. Intel TBB 4.3 Update 3):
-
-- Added a C++11 variadic constructor for enumerable_thread_specific.
-    The arguments from this constructor are used to construct
-    thread-local values.
-- Improved exception safety for enumerable_thread_specific.
-- Added documentation for tbb::flow::tagged_msg class and
-    tbb::flow::output_port function.
-- Fixed build errors for systems that do not support dynamic linking.
-- C++11 move-aware insert and emplace methods have been added to
-    concurrent unordered containers.
-
-Preview Features:
-
-- Interface-breaking change: typedefs changed for node predecessor and
-    successor lists, affecting copy_predecessors and copy_successors
-    methods.
-- Added template class composite_node to the flow graph API. It packages
-    a subgraph to represent it as a first-class flow graph node.
-- make_edge and remove_edge now accept multiport nodes as arguments,
-    automatically using the node port with index 0 for an edge.
-
-Open-source contributions integrated:
-
-- Draft code for enumerable_thread_specific constructor with multiple
-    arguments (see above) by Adrien Guinet.
-- Fix for GCC invocation on IBM* Blue Gene*
-    by Jeff Hammond and Raf Schietekat.
-- Extended testing with smart pointers for Clang & libc++
-    by Raf Schietekat.
-
-------------------------------------------------------------------------
-Intel TBB 4.3 Update 3
-TBB_INTERFACE_VERSION == 8003
-
-Changes (w.r.t. Intel TBB 4.3 Update 2):
-
-- Move constructor and assignment operator were added to unique_lock.
-
-Preview Features:
-
-- Time overhead for memory pool destruction was reduced.
-
-Open-source contributions integrated:
-
-- Build error fix for iOS* by Raf Schietekat.
-
-------------------------------------------------------------------------
-Intel TBB 4.3 Update 2
-TBB_INTERFACE_VERSION == 8002
-
-Changes (w.r.t. Intel TBB 4.3 Update 1):
-
-- Binary files for 64-bit Android* applications were added as part of the
-    Linux* OS package.
-- Exact exception propagation is enabled for Intel C++ Compiler on OS X*.
-- concurrent_vector::shrink_to_fit was optimized for types that support
-    C++11 move semantics.
-
-Bugs fixed:
-
-- Fixed concurrent unordered containers to insert elements much faster
-    in debug mode.
-- Fixed concurrent priority queue to support types that do not have
-    copy constructors.
-- Fixed enumerable_thread_specific to forbid copying from an instance
-    with a different value type.
-
-Open-source contributions integrated:
-
-- Support for PathScale* EKOPath* Compiler by Erik Lindahl.
-
-------------------------------------------------------------------------
-Intel TBB 4.3 Update 1
-TBB_INTERFACE_VERSION == 8001
-
-Changes (w.r.t. Intel TBB 4.3):
-
-- The ability to split blocked_ranges in a proportion, used by
-    affinity_partitioner since version 4.2 Update 4, became a formal
-    extension of the Range concept.
-- More checks for an incorrect address to release added to the debug
-    version of the memory allocator.
-- Different kind of solutions for each TBB example were merged.
-
-Preview Features:
-
-- Task priorities are re-enabled in preview binaries.
-
-Bugs fixed:
-
-- Fixed a duplicate symbol when TBB_PREVIEW_VARIADIC_PARALLEL_INVOKE is
-    used in multiple compilation units.
-- Fixed a crash in __itt_fini_ittlib seen on Ubuntu 14.04.
-- Fixed a crash in memory release after dynamic replacement of the
-    OS X* memory allocator.
-- Fixed incorrect indexing of arrays in seismic example.
-- Fixed a data race in lazy initialization of task_arena.
-
-Open-source contributions integrated:
-
-- Fix for dumping information about gcc and clang compiler versions
-    by Misty De Meo.
-
-------------------------------------------------------------------------
-Intel TBB 4.3
-TBB_INTERFACE_VERSION == 8000
-
-Changes (w.r.t. Intel TBB 4.2 Update 5):
-
-- The following features are now fully supported: flow::indexer_node,
-    task_arena, speculative_spin_rw_mutex.
-- Compatibility with C++11 standard improved for tbb/compat/thread
-    and tbb::mutex.
-- C++11 move constructors have been added to concurrent_queue and
-    concurrent_bounded_queue.
-- C++11 move constructors and assignment operators have been added to
-    concurrent_vector, concurrent_hash_map, concurrent_priority_queue,
-    concurrent_unordered_{set,multiset,map,multimap}.
-- C++11 move-aware emplace/push/pop methods have been added to
-    concurrent_vector, concurrent_queue, concurrent_bounded_queue,
-    concurrent_priority_queue.
-- Methods to insert a C++11 initializer list have been added:
-    concurrent_vector::grow_by(), concurrent_hash_map::insert(),
-    concurrent_unordered_{set,multiset,map,multimap}::insert().
-- Testing for compatibility of containers with some C++11 standard
-    library types has been added.
-- Dynamic replacement of standard memory allocation routines has been
-    added for OS X*.
-- Microsoft* Visual Studio* projects for Intel TBB examples updated
-    to VS 2010.
-- For open-source packages, debugging information (line numbers) in
-    precompiled binaries now matches the source code.
-- Debug information was added to release builds for OS X*, Solaris*,
-    FreeBSD* operating systems and MinGW*.
-- Various improvements in documentation, debug diagnostics and examples.
-
-Preview Features:
-
-- Additional actions on reset of graphs, and extraction of individual
-    nodes from a graph (TBB_PREVIEW_FLOW_GRAPH_FEATURES).
-- Support for an arbitrary number of arguments in parallel_invoke
-   (TBB_PREVIEW_VARIADIC_PARALLEL_INVOKE).
-
-Changes affecting backward compatibility:
-
-- For compatibility with C++11 standard, copy and move constructors and
-    assignment operators are disabled for all mutex classes. To allow
-    the old behavior, use TBB_DEPRECATED_MUTEX_COPYING macro.
-- flow::sequencer_node rejects messages with repeating sequence numbers.
-- Changed internal interface between tbbmalloc and tbbmalloc_proxy.
-- Following deprecated functionality has been removed:
-    old debugging macros TBB_DO_ASSERT & TBB_DO_THREADING_TOOLS;
-    no-op depth-related methods in class task;
-    tbb::deprecated::concurrent_queue;
-    deprecated variants of concurrent_vector methods.
-- register_successor() and remove_successor() are deprecated as methods
-    to add and remove edges in flow::graph; use make_edge() and
-    remove_edge() instead.
-
-Bugs fixed:
-
-- Fixed incorrect scalable_msize() implementation for aligned objects.
-- Flow graph buffering nodes now destroy their copy of forwarded items.
-- Multiple fixes in task_arena implementation, including for:
-    inconsistent task scheduler state inside executed functions;
-    incorrect floating-point settings and exception propagation;
-    possible stalls in concurrent invocations of execute().
-- Fixed floating-point settings propagation when the same instance of
-    task_group_context is used in different arenas.
-- Fixed compilation error in pipeline.h with Intel Compiler on OS X*.
-- Added missed headers for individual components to tbb.h.
-
-Open-source contributions integrated:
-
-- Range interface addition to parallel_do, parallel_for_each and
-    parallel_sort by Stephan Dollberg.
-- Variadic template implementation of parallel_invoke
-    by Kizza George Mbidde (see Preview Features).
-- Improvement in Seismic example for MacBook Pro* with Retina* display
-    by Raf Schietekat.
-
-------------------------------------------------------------------------
-Intel TBB 4.2 Update 5
-TBB_INTERFACE_VERSION == 7005
-
-Changes (w.r.t. Intel TBB 4.2 Update 4):
-
-- The second template argument of class aligned_space<T,N> now is set
-    to 1 by default.
-
-Preview Features:
-
-- Better support for exception safety, task priorities and floating
-    point settings in class task_arena.
-- task_arena::current_slot() has been renamed to
-    task_arena::current_thread_index().
-
-Bugs fixed:
-
-- Task priority change possibly ignored by a worker thread entering
-    a nested parallel construct.
-- Memory leaks inside the task scheduler when running on
-    Intel(R) Xeon Phi(tm) coprocessor.
-
-Open-source contributions integrated:
-
-- Improved detection of X Window support for Intel TBB examples
-    and other feedback by Raf Schietekat.
-
-------------------------------------------------------------------------
-Intel TBB 4.2 Update 4
-TBB_INTERFACE_VERSION == 7004
-
-Changes (w.r.t. Intel TBB 4.2 Update 3):
-
-- Added possibility to specify floating-point settings at invocation
-    of most parallel algorithms (including flow::graph) via
-    task_group_context.
-- Added dynamic replacement of malloc_usable_size() under
-    Linux*/Android* and dlmalloc_usable_size() under Android*.
-- Added new methods to concurrent_vector:
-    grow_by() that appends a sequence between two given iterators;
-    grow_to_at_least() that initializes new elements with a given value.
-- Improved affinity_partitioner for better performance on balanced
-    workloads.
-- Improvements in the task scheduler, including better scalability
-    when threads search for a task arena, and better diagnostics.
-- Improved allocation performance for workloads that do intensive
-    allocation/releasing of same-size objects larger than ~8KB from
-    multiple threads.
-- Exception support is enabled by default for 32-bit MinGW compilers.
-- The tachyon example for Android* can be built for all targets
-    supported by the installed NDK.
-- Added Windows Store* version of the tachyon example.
-- GettingStarted/sub_string_finder example ported to offload execution
-    on Windows* for Intel(R) Many Integrated Core Architecture.
-
-Preview Features:
-
-- Removed task_scheduler_observer::on_scheduler_leaving() callback.
-- Added task_scheduler_observer::may_sleep() callback.
-- The CPF or_node has been renamed indexer_node. The input to
-    indexer_node is now a list of types. The output of indexer_node is
-    a tagged_msg type composed of a tag and a value. For indexer_node,
-    the tag is a size_t.
-
-Bugs fixed:
-
-- Fixed data races in preview extensions of task_scheduler_observer.
-- Added noexcept(false) for destructor of task_group_base to avoid
-    crash on cancellation of structured task group in C++11.
-
-Open-source contributions integrated:
-
-- Improved concurrency detection for BG/Q, and other improvements
-    by Raf Schietekat.
-- Fix for crashes in enumerable_thread_specific in case if a contained
-    object is too big to be constructed on the stack by Adrien Guinet.
-
-------------------------------------------------------------------------
-Intel TBB 4.2 Update 3
-TBB_INTERFACE_VERSION == 7003
-
-Changes (w.r.t. Intel TBB 4.2 Update 2):
-
-- Added support for Microsoft* Visual Studio* 2013.
-- Improved Microsoft* PPL-compatible form of parallel_for for better
-    support of auto-vectorization.
-- Added a new example for cancellation and reset in the flow graph:
-    Kohonen self-organizing map (examples/graph/som).
-- Various improvements in source code, tests, and makefiles.
-
-Bugs fixed:
-
-- Added dynamic replacement of _aligned_msize() previously missed.
-- Fixed task_group::run_and_wait() to throw invalid_multiple_scheduling
-    exception if the specified task handle is already scheduled.
-
-Open-source contributions integrated:
-
-- A fix for ARM* processors by Steve Capper.
-- Improvements in std::swap calls by Robert Maynard.
-
-------------------------------------------------------------------------
-Intel TBB 4.2 Update 2
-TBB_INTERFACE_VERSION == 7002
-
-Changes (w.r.t. Intel TBB 4.2 Update 1):
-
-- Enable C++11 features for Microsoft* Visual Studio* 2013 Preview.
-- Added a test for compatibility of TBB containers with C++11
-    range-based for loop.
-
-Changes affecting backward compatibility:
-
-- Internal layout changed for class tbb::flow::limiter_node.
-
-Preview Features:
-
-- Added speculative_spin_rw_mutex, a read-write lock class which uses
-    Intel(R) Transactional Synchronization Extensions.
-
-Bugs fixed:
-
-- When building for Intel(R) Xeon Phi(tm) coprocessor, TBB programs
-    no longer require explicit linking with librt and libpthread.
-
-Open-source contributions integrated:
-
-- Fixes for ARM* processors by Steve Capper, Leif Lindholm
-    and Steven Noonan.
-- Support for Clang on Linux by Raf Schietekat.
-- Typo correction in scheduler.cpp by Julien Schueller.
-
-------------------------------------------------------------------------
-Intel TBB 4.2 Update 1
-TBB_INTERFACE_VERSION == 7001
-
-Changes (w.r.t. Intel TBB 4.2):
-
-- Added project files for Microsoft* Visual Studio* 2010.
-- Initial support of Microsoft* Visual Studio* 2013 Preview.
-- Enable C++11 features available in Intel(R) C++ Compiler 14.0.
-- scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, <size>) can be
-    used to urge releasing memory from tbbmalloc internal buffers when
-    the given limit is exceeded.
-
-Preview Features:
-
-- Class task_arena no longer requires linking with a preview library,
-    though still remains a community preview feature.
-- The method task_arena::wait_until_empty() is removed.
-- The method task_arena::current_slot() now returns -1 if
-    the task scheduler is not initialized in the thread.
-
-Changes affecting backward compatibility:
-
-- Because of changes in internal layout of graph nodes, the namespace
-    interface number of flow::graph has been incremented from 6 to 7.
-
-Bugs fixed:
-
-- Fixed a race in lazy initialization of task_arena.
-- Fixed flow::graph::reset() to prevent situations where tasks would be
-    spawned in the process of resetting the graph to its initial state.
-- Fixed decrement bug in limiter_node.
-- Fixed a race in arc deletion in the flow graph.
-
-Open-source contributions integrated:
-
-- Improved support for IBM* Blue Gene* by Raf Schietekat.
-
-------------------------------------------------------------------------
-Intel TBB 4.2
-TBB_INTERFACE_VERSION == 7000
-
-Changes (w.r.t. Intel TBB 4.1 Update 4):
-
-- Added speculative_spin_mutex, which uses Intel(R) Transactional
-    Synchronization Extensions when they are supported by hardware.
-- Binary files linked with libc++ (the C++ standard library in Clang)
-    were added on OS X*.
-- For OS X* exact exception propagation is supported with Clang;
-    it requires use of libc++ and corresponding Intel TBB binaries.
-- Support for C++11 initializer lists in constructor and assigment
-    has been added to concurrent_hash_map, concurrent_unordered_set,
-    concurrent_unordered_multiset, concurrent_unordered_map,
-    concurrent_unordered_multimap.
-- The memory allocator may now clean its per-thread memory caches
-    when it cannot get more memory.
-- Added the scalable_allocation_command() function for on-demand
-    cleaning of internal memory caches.
-- Reduced the time overhead for freeing memory objects smaller than ~8K.
-- Simplified linking with the debug library for applications that use
-    Intel TBB in code offloaded to Intel(R) Xeon Phi(tm) coprocessors.
-    See an example in
-    examples/GettingStarted/sub_string_finder/Makefile.
-- Various improvements in source code, scripts and makefiles.
-
-Changes affecting backward compatibility:
-
-- tbb::flow::graph has been modified to spawn its tasks;
-    the old behaviour (task enqueuing) is deprecated. This change may
-    impact applications that expected a flow graph to make progress
-    without calling wait_for_all(), which is no longer guaranteed. See
-    the documentation for more details.
-- Changed the return values of the scalable_allocation_mode() function.
-
-Bugs fixed:
-
-- Fixed a leak of parallel_reduce body objects when execution is
-    cancelled or an exception is thrown, as suggested by Darcy Harrison.
-- Fixed a race in the task scheduler which can lower the effective
-    priority despite the existence of higher priority tasks.
-- On Linux an error during destruction of the internal thread local
-    storage no longer results in an exception.
-
-Open-source contributions integrated:
-
-- Fixed task_group_context state propagation to unrelated context trees
-    by Raf Schietekat.
-
-------------------------------------------------------------------------
-Intel TBB 4.1 Update 4
-TBB_INTERFACE_VERSION == 6105
-
-Changes (w.r.t. Intel TBB 4.1 Update 3):
-
-- Use /volatile:iso option with VS 2012 to disable extended
-    semantics for volatile variables.
-- Various improvements in affinity_partitioner, scheduler,
-    tests, examples, makefiles.
-- Concurrent_priority_queue class now supports initialization/assignment
-    via C++11 initializer list feature (std::initializer_list<T>).
-
-Bugs fixed:
-
-- Fixed more possible stalls in concurrent invocations of
-    task_arena::execute(), especially waiting for enqueued tasks.
-- Fixed requested number of workers for task_arena(P,0).
-- Fixed interoperability with Intel(R) VTune(TM) Amplifier XE in
-    case of using task_arena::enqueue() from a terminating thread.
-
-Open-source contributions integrated:
-
-- Type fixes, cleanups, and code beautification by Raf Schietekat.
-- Improvements in atomic operations for big endian platforms
-    by Raf Schietekat.
-
-------------------------------------------------------------------------
-Intel TBB 4.1 Update 3
-TBB_INTERFACE_VERSION == 6103
-
-Changes (w.r.t. Intel TBB 4.1 Update 2):
-
-- Binary files for Android* applications were added to the Linux* OS
-    package.
-- Binary files for Windows Store* applications were added to the
-    Windows* OS package.
-- Exact exception propagation (exception_ptr) support on Linux OS is
-    now turned on by default for GCC 4.4 and higher.
-- Stopped implicit use of large memory pages by tbbmalloc (Linux-only).
-    Now use of large pages must be explicitly enabled with
-    scalable_allocation_mode() function or TBB_MALLOC_USE_HUGE_PAGES
-    environment variable.
-
-Community Preview Features:
-
-- Extended class task_arena constructor and method initialize() to
-    allow some concurrency to be reserved strictly for application
-    threads.
-- New methods terminate() and is_active() were added to class
-    task_arena.
-
-Bugs fixed:
-
-- Fixed initialization of hashing helper constant in the hash
-    containers.
-- Fixed possible stalls in concurrent invocations of
-    task_arena::execute() when no worker thread is available to make
-    progress.
-- Fixed incorrect calculation of hardware concurrency in the presence
-    of inactive processor groups, particularly on systems running
-    Windows* 8 and Windows* Server 2012.
-
-Open-source contributions integrated:
-
-- The fix for the GUI examples on OS X* systems by Raf Schietekat.
-- Moved some power-of-2 calculations to functions to improve readability
-    by Raf Schietekat.
-- C++11/Clang support improvements by arcata.
-- ARM* platform isolation layer by Steve Capper, Leif Lindholm, Leo Lara
-    (ARM).
-
-------------------------------------------------------------------------
-Intel TBB 4.1 Update 2
-TBB_INTERFACE_VERSION == 6102
-
-Changes (w.r.t. Intel TBB 4.1 Update 1):
-
-- Objects up to 128 MB are now cached by the tbbmalloc. Previously
-    the threshold was 8MB. Objects larger than 128 MB are still
-    processed by direct OS calls.
-- concurrent_unordered_multiset and concurrent_unordered_multimap
-    have been added, based on Microsoft* PPL prototype.
-- Ability to value-initialize a tbb::atomic<T> variable on construction
-    in C++11, with const expressions properly supported.
-
-Community Preview Features:
-
-- Added a possibility to wait until all worker threads terminate.
-    This is necessary before calling fork() from an application.
-
-Bugs fixed:
-
-- Fixed data race in tbbmalloc that might lead to memory leaks
-    for large object allocations.
-- Fixed task_arena::enqueue() to use task_group_context of target arena.
-- Improved implementation of 64 bit atomics on ia32.
-
-------------------------------------------------------------------------
-Intel TBB 4.1 Update 1
-TBB_INTERFACE_VERSION == 6101
-
-Changes (w.r.t. Intel TBB 4.1):
-
-- concurrent_vector class now supports initialization/assignment
-    via C++11 initializer list feature (std::initializer_list<T>)
-- Added implementation of the platform isolation layer based on
-    Intel compiler atomic built-ins; it is supposed to work on
-    any platform supported by compiler version 12.1 and newer.
-- Using GetNativeSystemInfo() instead of GetSystemInfo() to support
-    more than 32 processors for 32-bit applications under WOW64.
-- The following form of parallel_for:
-    parallel_for(first, last, [step,] f[, context]) now accepts an
-    optional partitioner parameter after the function f.
-
-Backward-incompatible API changes:
-
-- The library no longer injects tuple in to namespace std.
-    In previous releases, tuple was injected into namespace std by
-    flow_graph.h when std::tuple was not available.  In this release,
-    flow_graph.h now uses tbb::flow::tuple.  On platforms where
-    std::tuple is available, tbb::flow::tuple is typedef'ed to
-    std::tuple.  On all other platforms, tbb::flow::tuple provides
-    a subset of the functionality defined by std::tuple. Users of
-    flow_graph.h may need to change their uses of std::tuple to
-    tbb::flow::tuple to ensure compatibility with non-C++11 compliant
-    compilers.
-
-Bugs fixed:
-
-- Fixed local observer to be able to override propagated CPU state and
-    to provide correct value of task_arena::current_slot() in callbacks.
-
-------------------------------------------------------------------------
-Intel TBB 4.1
-TBB_INTERFACE_VERSION == 6100
-
-Changes (w.r.t. Intel TBB 4.0 Update 5):
-
-- _WIN32_WINNT must be set to 0x0501 or greater in order to use TBB
-    on Microsoft* Windows*.
-- parallel_deterministic_reduce template function is fully supported.
-- TBB headers can be used with C++0x/C++11 mode (-std=c++0x) of GCC
-    and Intel(R) Compiler.
-- C++11 std::make_exception_ptr is used where available, instead of
-    std::copy_exception from earlier C++0x implementations.
-- Improvements in the TBB allocator to reduce extra memory consumption.
-- Partial refactoring of the task scheduler data structures.
-- TBB examples allow more flexible specification of the thread number,
-    including arithmetic and geometric progression.
-
-Bugs fixed:
-
-- On Linux & OS X*, pre-built TBB binaries do not yet support exact
-    exception propagation via C++11 exception_ptr. To prevent run time
-    errors, by default TBB headers disable exact exception propagation
-    even if the C++ implementation provides exception_ptr.
-
-Community Preview Features:
-
-- Added: class task_arena, for work submission by multiple application
-    threads with thread-independent control of concurrency level.
-- Added: task_scheduler_observer can be created as local to a master
-    thread, to observe threads that work on behalf of that master.
-    Local observers may have new on_scheduler_leaving() callback.
-
-------------------------------------------------------------------------
-Intel TBB 4.0 Update 5
-TBB_INTERFACE_VERSION == 6005
-
-Changes (w.r.t. Intel TBB 4.0 Update 4):
-
-- Parallel pipeline optimization (directly storing small objects in the
-    interstage data buffers) limited to trivially-copyable types for
-    C++11 and a short list of types for earlier compilers.
-- _VARIADIC_MAX switch is honored for TBB tuple implementation
-    and flow::graph nodes based on tuple.
-- Support of Cocoa framework was added to the GUI examples on OS X*
-    systems.
-
-Bugs fixed:
-
-- Fixed a tv_nsec overflow bug in condition_variable::wait_for.
-- Fixed execution order of enqueued tasks with different priorities.
-- Fixed a bug with task priority changes causing lack of progress
-    for fire-and-forget tasks when TBB was initialized to use 1 thread.
-- Fixed duplicate symbol problem when linking multiple compilation
-    units that include flow_graph.h on VC 10.
-
-------------------------------------------------------------------------
-Intel TBB 4.0 Update 4
-TBB_INTERFACE_VERSION == 6004
-
-Changes (w.r.t. Intel TBB 4.0 Update 3):
-
-- The TBB memory allocator transparently supports large pages on Linux.
-- A new flow_graph example, logic_sim, was added.
-- Support for DirectX* 9 was added to GUI examples.
-
-Community Preview Features:
-
-- Added: aggregator, a new concurrency control mechanism.
-
-Bugs fixed:
-
-- The abort operation on concurrent_bounded_queue now leaves the queue
-    in a reusable state. If a bad_alloc or bad_last_alloc exception is
-    thrown while the queue is recovering from an abort, that exception
-    will be reported instead of user_abort on the thread on which it
-    occurred, and the queue will not be reusable.
-- Steal limiting heuristic fixed to avoid premature stealing disabling
-    when large amount of __thread data is allocated on thread stack.
-- Fixed a low-probability leak of arenas in the task scheduler.
-- In STL-compatible allocator classes, the method construct() was fixed
-    to comply with C++11 requirements.
-- Fixed a bug that prevented creation of fixed-size memory pools
-    smaller than 2M.
-- Significantly reduced the amount of warnings from various compilers.
-
-Open-source contributions integrated:
-
-- Multiple improvements by Raf Schietekat.
-- Basic support for Clang on OS X* by Blas Rodriguez Somoza.
-- Fixes for warnings and corner-case bugs by Blas Rodriguez Somoza
-    and Edward Lam.
-
-------------------------------------------------------------------------
-Intel TBB 4.0 Update 3
-TBB_INTERFACE_VERSION == 6003
-
-Changes (w.r.t. Intel TBB 4.0 Update 2):
-
-- Modifications to the low-level API for memory pools:
-    added support for aligned allocations;
-    pool policies reworked to allow backward-compatible extensions;
-    added a policy to not return memory space till destruction;
-    pool_reset() does not return memory space anymore.
-- Class tbb::flow::graph_iterator added to iterate over all nodes
-    registered with a graph instance.
-- multioutput_function_node has been renamed multifunction_node.
-    multifunction_node and split_node are now fully-supported features.
-- For the tagged join node, the policy for try_put of an item with
-    already existing tag has been defined: the item will be rejected.
-- Matching the behavior on Windows, on other platforms the optional
-    shared libraries (libtbbmalloc, libirml) now are also searched
-    only in the directory where libtbb is located.
-- The platform isolation layer based on GCC built-ins is extended.
-
-Backward-incompatible API changes:
-
-- a graph reference parameter is now required to be passed to the
-    constructors of the following flow graph nodes: overwrite_node,
-    write_once_node, broadcast_node, and the CPF or_node.
-- the following tbb::flow node methods and typedefs have been renamed:
-       Old                             New
-    join_node and or_node:
-       inputs()                 ->     input_ports()
-       input_ports_tuple_type   ->     input_ports_type
-    multifunction_node and split_node:
-       ports_type               ->     output_ports_type
-
-Bugs fixed:
-
-- Not all logical processors were utilized on systems with more than
-    64 cores split by Windows into several processor groups.
-
-------------------------------------------------------------------------
-Intel TBB 4.0 Update 2 commercial-aligned release
-TBB_INTERFACE_VERSION == 6002
-
-Changes (w.r.t. Intel TBB 4.0 Update 1 commercial-aligned release):
-
-- concurrent_bounded_queue now has an abort() operation that releases
-    threads involved in pending push or pop operations. The released
-    threads will receive a tbb::user_abort exception.
-- Added Community Preview Feature:  concurrent_lru_cache container,
-    a concurrent implementation of LRU (least-recently-used) cache.
-
-Bugs fixed:
-
-- fixed a race condition in the TBB scalable allocator.
-- concurrent_queue counter wraparound bug was fixed, which occurred when
-    the number of push and pop operations exceeded ~>4 billion on IA32.
-- fixed races in the TBB scheduler that could put workers asleep too
-    early, especially in presence of affinitized tasks.
-
-------------------------------------------------------------------------
-Intel TBB 4.0 Update 1 commercial-aligned release
-TBB_INTERFACE_VERSION == 6000 (forgotten to increment)
-
-Changes (w.r.t. Intel TBB 4.0 commercial-aligned release):
-
-- Memory leaks fixed in binpack example.
-- Improvements and fixes in the TBB allocator.
-
-------------------------------------------------------------------------
-Intel TBB 4.0 commercial-aligned release
-TBB_INTERFACE_VERSION == 6000
-
-Changes (w.r.t. Intel TBB 3.0 Update 8 commercial-aligned release):
-
-- concurrent_priority_queue is now a fully supported feature.
-    Capacity control methods were removed.
-- Flow graph is now a fully supported feature.
-- A new memory backend has been implemented in the TBB allocator.
-    It can reuse freed memory for both small and large objects, and
-    returns unused memory blocks to the OS more actively.
-- Improved partitioning algorithms for parallel_for and parallel_reduce
-    to better handle load imbalance.
-- The convex_hull example has been refactored for reproducible
-    performance results.
-- The major interface version has changed from 5 to 6.
-    Deprecated interfaces might be removed in future releases.
-
-Community Preview Features:
-
-- Added: serial subset, i.e. sequential implementations of TBB generic
-    algorithms (currently, only provided for parallel_for).
-- Preview of new flow graph nodes:
-    or_node (accepts multiple inputs, forwards each input separately
-      to all successors),
-    split_node (accepts tuples, and forwards each element of a tuple
-      to a corresponding successor), and
-    multioutput_function_node (accepts one input, and passes the input
-    and a tuple of output ports to the function body to support outputs
-    to multiple successors).
-- Added: memory pools for more control on memory source, grouping,
-    and collective deallocation.
-
-------------------------------------------------------------------------
-Intel TBB 3.0 Update 8 commercial-aligned release
-TBB_INTERFACE_VERSION == 5008
-
-Changes (w.r.t. Intel TBB 3.0 Update 7 commercial-aligned release):
-
-- Task priorities become an official feature of TBB,
-    not community preview as before.
-- Atomics API extended, and implementation refactored.
-- Added task::set_parent() method.
-- Added concurrent_unordered_set container.
-
-Open-source contributions integrated:
-
-- PowerPC support by Raf Schietekat.
-- Fix of potential task pool overrun and other improvements
-    in the task scheduler by Raf Schietekat.
-- Fix in parallel_for_each to work with std::set in Visual* C++ 2010.
-
-Community Preview Features:
-
-- Graph community preview feature was renamed to flow graph.
-    Multiple improvements in the implementation.
-    Binpack example was added for the feature.
-- A number of improvements to concurrent_priority_queue.
-    Shortpath example was added for the feature.
-- TBB runtime loaded functionality was added (Windows*-only).
-    It allows to specify which versions of TBB should be used,
-    as well as to set directories for the library search.
-- parallel_deterministic_reduce template function was added.
-
-------------------------------------------------------------------------
-Intel TBB 3.0 Update 7 commercial-aligned release
-TBB_INTERFACE_VERSION == 5006 (forgotten to increment)
-
-Changes (w.r.t. Intel TBB 3.0 Update 6 commercial-aligned release):
-
-- Added implementation of the platform isolation layer based on
-    GCC atomic built-ins; it is supposed to work on any platform
-    where GCC has these built-ins.
-
-Community Preview Features:
-
-- Graph's dining_philosophers example added.
-- A number of improvements to graph and concurrent_priority_queue.
-
-
-------------------------------------------------------------------------
-Intel TBB 3.0 Update 6 commercial-aligned release
-TBB_INTERFACE_VERSION == 5006
-
-Changes (w.r.t. Intel TBB 3.0 Update 5 commercial-aligned release):
-
-- Added Community Preview feature: task and task group priority, and
-    Fractal example demonstrating it.
-- parallel_pipeline optimized for data items of small and large sizes.
-- Graph's join_node is now parametrized with a tuple of up to 10 types.
-- Improved performance of concurrent_priority_queue.
-
-Open-source contributions integrated:
-
-- Initial NetBSD support by Aleksej Saushev.
-
-Bugs fixed:
-
-- Failure to enable interoperability with Intel(R) Cilk(tm) Plus runtime
-    library, and a crash caused by invoking the interoperability layer
-    after one of the libraries was unloaded.
-- Data race that could result in concurrent_unordered_map structure
-    corruption after call to clear() method.
-- Stack corruption caused by PIC version of 64-bit CAS compiled by Intel
-    compiler on Linux.
-- Inconsistency of exception propagation mode possible when application
-    built with Microsoft* Visual Studio* 2008 or earlier uses TBB built
-    with Microsoft* Visual Studio* 2010.
-- Affinitizing master thread to a subset of available CPUs after TBB
-    scheduler was initialized tied all worker threads to the same CPUs.
-- Method is_stolen_task() always returned 'false' for affinitized tasks.
-- write_once_node and overwrite_node did not immediately send buffered
-    items to successors
-
-------------------------------------------------------------------------
-Intel TBB 3.0 Update 5 commercial-aligned release
-TBB_INTERFACE_VERSION == 5005
-
-Changes (w.r.t. Intel TBB 3.0 Update 4 commercial-aligned release):
-
-- Added Community Preview feature: graph.
-- Added automatic propagation of master thread FPU settings to
-    TBB worker threads.
-- Added a public function to perform a sequentially consistent full
-    memory fence: tbb::atomic_fence() in tbb/atomic.h.
-
-Bugs fixed:
-
-- Data race that could result in scheduler data structures corruption
-    when using fire-and-forget tasks.
-- Potential referencing of destroyed concurrent_hash_map element after
-    using erase(accessor&A) method with A acquired as const_accessor.
-- Fixed a correctness bug in the convex hull example.
-
-Open-source contributions integrated:
-
-- Patch for calls to internal::atomic_do_once() by Andrey Semashev.
-
-------------------------------------------------------------------------
-Intel TBB 3.0 Update 4 commercial-aligned release
-TBB_INTERFACE_VERSION == 5004
-
-Changes (w.r.t. Intel TBB 3.0 Update 3 commercial-aligned release):
-
-- Added Community Preview feature: concurrent_priority_queue.
-- Fixed library loading to avoid possibility for remote code execution,
-    see http://www.microsoft.com/technet/security/advisory/2269637.mspx.
-- Added support of more than 64 cores for appropriate Microsoft*
-    Windows* versions. For more details, see
-    http://msdn.microsoft.com/en-us/library/dd405503.aspx.
-- Default number of worker threads is adjusted in accordance with
-    process affinity mask.
-
-Bugs fixed:
-
-- Calls of scalable_* functions from inside the allocator library
-    caused issues if the functions were overridden by another module.
-- A crash occurred if methods run() and wait() were called concurrently
-    for an empty tbb::task_group (1736).
-- The tachyon example exhibited build problems associated with
-    bug 554339 on Microsoft* Visual Studio* 2010. Project files were
-    modified as a partial workaround to overcome the problem. See
-    http://connect.microsoft.com/VisualStudio/feedback/details/554339.
-
-------------------------------------------------------------------------
-Intel TBB 3.0 Update 3 commercial-aligned release
-TBB_INTERFACE_VERSION == 5003
-
-Changes (w.r.t. Intel TBB 3.0 Update 2 commercial-aligned release):
-
-- cache_aligned_allocator class reworked to use scalable_aligned_malloc.
-- Improved performance of count() and equal_range() methods
-    in concurrent_unordered_map.
-- Improved implementation of 64-bit atomic loads and stores on 32-bit
-    platforms, including compilation with VC 7.1.
-- Added implementation of atomic operations on top of OSAtomic API
-    provided by OS X*.
-- Removed gratuitous try/catch blocks surrounding thread function calls
-  in tbb_thread.
-- Xcode* projects were added for sudoku and game_of_life examples.
-- Xcode* projects were updated to work without TBB framework.
-
-Bugs fixed:
-
-- Fixed a data race in task scheduler destruction that on rare occasion
-    could result in memory corruption.
-- Fixed idle spinning in thread bound filters in tbb::pipeline (1670).
-
-Open-source contributions integrated:
-
-- MinGW-64 basic support by brsomoza (partially).
-- Patch for atomic.h by Andrey Semashev.
-- Support for AIX & GCC on PowerPC by Giannis Papadopoulos.
-- Various improvements by Raf Schietekat.
-
-------------------------------------------------------------------------
-Intel TBB 3.0 Update 2 commercial-aligned release
-TBB_INTERFACE_VERSION == 5002
-
-Changes (w.r.t. Intel TBB 3.0 Update 1 commercial-aligned release):
-
-- Destructor of tbb::task_group class throws missing_wait exception
-    if there are tasks running when it is invoked.
-- Interoperability layer with Intel Cilk Plus runtime library added
-    to protect TBB TLS in case of nested usage with Intel Cilk Plus.
-- Compilation fix for dependent template names in concurrent_queue.
-- Memory allocator code refactored to ease development and maintenance.
-
-Bugs fixed:
-
-- Improved interoperability with other Intel software tools on Linux in
-    case of dynamic replacement of memory allocator (1700)
-- Fixed install issues that prevented installation on
-    Mac OS* X 10.6.4 (1711).
-
-------------------------------------------------------------------------
-Intel TBB 3.0 Update 1 commercial-aligned release
-TBB_INTERFACE_VERSION == 5000 (forgotten to increment)
-
-Changes (w.r.t. Intel TBB 3.0 commercial-aligned release):
-
-- Decreased memory fragmentation by allocations bigger than 8K.
-- Lazily allocate worker threads, to avoid creating unnecessary stacks.
-
-Bugs fixed:
-
-- TBB allocator used much more memory than malloc (1703) - see above.
-- Deadlocks happened in some specific initialization scenarios
-    of the TBB allocator (1701, 1704).
-- Regression in enumerable_thread_specific: excessive requirements
-    for object constructors.
-- A bug in construction of parallel_pipeline filters when body instance
-    was a temporary object.
-- Incorrect usage of memory fences on PowerPC and XBOX360 platforms.
-- A subtle issue in task group context binding that could result
-    in cancellation signal being missed by nested task groups.
-- Incorrect construction of concurrent_unordered_map if specified
-    number of buckets is not power of two.
-- Broken count() and equal_range() of concurrent_unordered_map.
-- Return type of postfix form of operator++ for hash map's iterators.
-
-------------------------------------------------------------------------
-Intel TBB 3.0 commercial-aligned release
-TBB_INTERFACE_VERSION == 5000
-
-Changes (w.r.t. Intel TBB 2.2 Update 3 commercial-aligned release):
-
-- All open-source-release changes down to TBB 2.2 U3 below
-    were incorporated into this release.
-
-------------------------------------------------------------------------
-20100406 open-source release
-
-Changes (w.r.t. 20100310 open-source release):
-
-- Added support for Microsoft* Visual Studio* 2010, including binaries.
-- Added a PDF file with recommended Design Patterns for TBB.
-- Added parallel_pipeline function and companion classes and functions
-    that provide a strongly typed lambda-friendly pipeline interface.
-- Reworked enumerable_thread_specific to use a custom implementation of
-    hash map that is more efficient for ETS usage models.
-- Added example for class task_group; see examples/task_group/sudoku.
-- Removed two examples, as they were long outdated and superceded:
-    pipeline/text_filter (use pipeline/square);
-    parallel_while/parallel_preorder (use parallel_do/parallel_preorder).
-- PDF documentation updated.
-- Other fixes and changes in code, tests, and examples.
-
-Bugs fixed:
-
-- Eliminated build errors with MinGW32.
-- Fixed post-build step and other issues in VS projects for examples.
-- Fixed discrepancy between scalable_realloc and scalable_msize that
-    caused crashes with malloc replacement on Windows.
-
-------------------------------------------------------------------------
-20100310 open-source release
-
-Changes (w.r.t. Intel TBB 2.2 Update 3 commercial-aligned release):
-
-- Version macros changed in anticipation of a future release.
-- Directory structure aligned with Intel(R) C++ Compiler;
-    now TBB binaries reside in <arch>/<os_key>/[bin|lib]
-    (in TBB 2.x, it was [bin|lib]/<arch>/<os_key>).
-- Visual Studio projects changed for examples: instead of separate set
-    of files for each VS version, now there is single 'msvs' directory
-    that contains workspaces for MS C++ compiler (<example>_cl.sln) and
-    Intel C++ compiler (<example>_icl.sln). Works with VS 2005 and above.
-- The name versioning scheme for backward compatibility was improved;
-    now compatibility-breaking changes are done in a separate namespace.
-- Added concurrent_unordered_map implementation based on a prototype
-    developed in Microsoft for a future version of PPL.
-- Added PPL-compatible writer-preference RW lock (reader_writer_lock).
-- Added TBB_IMPLEMENT_CPP0X macro to control injection of C++0x names
-    implemented in TBB into namespace std.
-- Added almost-C++0x-compatible std::condition_variable, plus a bunch
-    of other C++0x classes required by condition_variable.
-- With TBB_IMPLEMENT_CPP0X, tbb_thread can be also used as std::thread.
-- task.cpp was split into several translation units to structure
-    TBB scheduler sources layout. Static data layout and library
-    initialization logic were also updated.
-- TBB scheduler reworked to prevent master threads from stealing
-    work belonging to other masters.
-- Class task was extended with enqueue() method, and slightly changed
-    semantics of methods spawn() and destroy(). For exact semantics,
-    refer to TBB Reference manual.
-- task_group_context now allows for destruction by non-owner threads.
-- Added TBB_USE_EXCEPTIONS macro to control use of exceptions in TBB
-    headers. It turns off (i.e. sets to 0) automatically if specified
-    compiler options disable exception handling.
-- TBB is enabled to run on top of Microsoft's Concurrency Runtime
-    on Windows* 7 (via our worker dispatcher known as RML).
-- Removed old unused busy-waiting code in concurrent_queue.
-- Described the advanced build & test options in src/index.html.
-- Warning level for GCC raised with -Wextra and a few other options.
-- Multiple fixes and improvements in code, tests, examples, and docs.
-
-Open-source contributions integrated:
-
-- Xbox support by Roman Lut (Deep Shadows), though further changes are
-    required to make it working; e.g. post-2.1 entry points are missing.
-- "Eventcount" by Dmitry Vyukov evolved into concurrent_monitor,
-    an internal class used in the implementation of concurrent_queue.
-
-------------------------------------------------------------------------
-Intel TBB 2.2 Update 3 commercial-aligned release
-TBB_INTERFACE_VERSION == 4003
-
-Changes (w.r.t. Intel TBB 2.2 Update 2 commercial-aligned release):
-
-- PDF documentation updated.
-
-Bugs fixed:
-
-- concurrent_hash_map compatibility issue exposed on Linux in case
-    two versions of the container were used by different modules.
-- enforce 16 byte stack alignment for consistence with GCC; required
-    to work correctly with 128-bit variables processed by SSE.
-- construct() methods of allocator classes now use global operator new.
-
-------------------------------------------------------------------------
-Intel TBB 2.2 Update 2 commercial-aligned release
-TBB_INTERFACE_VERSION == 4002
-
-Changes (w.r.t. Intel TBB 2.2 Update 1 commercial-aligned release):
-
-- parallel_invoke and parallel_for_each now take function objects
-    by const reference, not by value.
-- Building TBB with /MT is supported, to avoid dependency on particular
-    versions of Visual C++* runtime DLLs. TBB DLLs built with /MT
-    are located in vc_mt directory.
-- Class critical_section introduced.
-- Improvements in exception support: new exception classes introduced,
-    all exceptions are thrown via an out-of-line internal method.
-- Improvements and fixes in the TBB allocator and malloc replacement,
-    including robust memory identification, and more reliable dynamic
-    function substitution on Windows*.
-- Method swap() added to class tbb_thread.
-- Methods rehash() and bucket_count() added to concurrent_hash_map.
-- Added support for Visual Studio* 2010 Beta2. No special binaries
-    provided, but CRT-independent DLLs (vc_mt) should work.
-- Other fixes and improvements in code, tests, examples, and docs.
-
-Open-source contributions integrated:
-
-- The fix to build 32-bit TBB on Mac OS* X 10.6.
-- GCC-based port for SPARC Solaris by Michailo Matijkiw, with use of
-    earlier work by Raf Schietekat.
-
-Bugs fixed:
-
-- 159 - TBB build for PowerPC* running Mac OS* X.
-- 160 - IBM* Java segfault if used with TBB allocator.
-- crash in concurrent_queue<char> (1616).
-
-------------------------------------------------------------------------
-Intel TBB 2.2 Update 1 commercial-aligned release
-TBB_INTERFACE_VERSION == 4001
-
-Changes (w.r.t. Intel TBB 2.2 commercial-aligned release):
-
-- Incorporates all changes from open-source releases below.
-- Documentation was updated.
-- TBB scheduler auto-initialization now covers all possible use cases.
-- concurrent_queue: made argument types of sizeof used in paddings
-  consistent with those actually used.
-- Memory allocator was improved: supported corner case of user's malloc
-    calling scalable_malloc (non-Windows), corrected processing of
-    memory allocation requests during tbb memory allocator startup
-    (Linux).
-- Windows malloc replacement has got better support for static objects.
-- In pipeline setups that do not allow actual parallelism, execution
-    by a single thread is guaranteed, idle spinning eliminated, and
-    performance improved.
-- RML refactoring and clean-up.
-- New constructor for concurrent_hash_map allows reserving space for
-    a number of items.
-- Operator delete() added to the TBB exception classes.
-- Lambda support was improved in parallel_reduce.
-- gcc 4.3 warnings were fixed for concurrent_queue.
-- Fixed possible initialization deadlock in modules using TBB entities
-    during construction of global static objects.
-- Copy constructor in concurrent_hash_map was fixed.
-- Fixed a couple of rare crashes in the scheduler possible before
-    in very specific use cases.
-- Fixed a rare crash in the TBB allocator running out of memory.
-- New tests were implemented, including test_lambda.cpp that checks
-    support for lambda expressions.
-- A few other small changes in code, tests, and documentation.
-
-------------------------------------------------------------------------
-20090809 open-source release
-
-Changes (w.r.t. Intel TBB 2.2 commercial-aligned release):
-
-- Fixed known exception safety issues in concurrent_vector.
-- Better concurrency of simultaneous grow requests in concurrent_vector.
-- TBB allocator further improves performance of large object allocation.
-- Problem with source of text relocations was fixed on Linux
-- Fixed bugs related to malloc replacement under Windows
-- A few other small changes in code and documentation.
-
-------------------------------------------------------------------------
-Intel TBB 2.2 commercial-aligned release
-TBB_INTERFACE_VERSION == 4000
-
-Changes (w.r.t. Intel TBB 2.1 U4 commercial-aligned release):
-
-- Incorporates all changes from open-source releases below.
-- Architecture folders renamed from em64t to intel64 and from itanium
-    to ia64.
-- Major Interface version changed from 3 to 4. Deprecated interfaces
-    might be removed in future releases.
-- Parallel algorithms that use partitioners have switched to use
-    the auto_partitioner by default.
-- Improved memory allocator performance for allocations bigger than 8K.
-- Added new thread-bound filters functionality for pipeline.
-- New implementation of concurrent_hash_map that improves performance
-    significantly.
-- A few other small changes in code and documentation.
-
-------------------------------------------------------------------------
-20090511 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Basic support for MinGW32 development kit.
-- Added tbb::zero_allocator class that initializes memory with zeros.
-    It can be used as an adaptor to any STL-compatible allocator class.
-- Added tbb::parallel_for_each template function as alias to parallel_do.
-- Added more overloads for tbb::parallel_for.
-- Added support for exact exception propagation (can only be used with
-    compilers that support C++0x std::exception_ptr).
-- tbb::atomic template class can be used with enumerations.
-- mutex, recursive_mutex, spin_mutex, spin_rw_mutex classes extended
-    with explicit lock/unlock methods.
-- Fixed size() and grow_to_at_least() methods of tbb::concurrent_vector
-    to provide space allocation guarantees. More methods added for
-    compatibility with std::vector, including some from C++0x.
-- Preview of a lambda-friendly interface for low-level use of tasks.
-- scalable_msize function added to the scalable allocator (Windows only).
-- Rationalized internal auxiliary functions for spin-waiting and backoff.
-- Several tests undergo decent refactoring.
-
-Changes affecting backward compatibility:
-
-- Improvements in concurrent_queue, including limited API changes.
-    The previous version is deprecated; its functionality is accessible
-    via methods of the new tbb::concurrent_bounded_queue class.
-- grow* and push_back methods of concurrent_vector changed to return
-    iterators; old semantics is deprecated.
-
-------------------------------------------------------------------------
-Intel TBB 2.1 Update 4 commercial-aligned release
-TBB_INTERFACE_VERSION == 3016
-
-Changes (w.r.t. Intel TBB 2.1 U3 commercial-aligned release):
-
-- Added tests for aligned memory allocations and malloc replacement.
-- Several improvements for better bundling with Intel(R) C++ Compiler.
-- A few other small changes in code and documentaion.
-
-Bugs fixed:
-
-- 150 - request to build TBB examples with debug info in release mode.
-- backward compatibility issue with concurrent_queue on Windows.
-- dependency on VS 2005 SP1 runtime libraries removed.
-- compilation of GUI examples under Xcode* 3.1 (1577).
-- On Windows, TBB allocator classes can be instantiated with const types
-    for compatibility with MS implementation of STL containers (1566).
-
-------------------------------------------------------------------------
-20090313 open-source release
-
-Changes (w.r.t. 20081109 open-source release):
-
-- Includes all changes introduced in TBB 2.1 Update 2 & Update 3
-    commercial-aligned releases (see below for details).
-- Added tbb::parallel_invoke template function. It runs up to 10
-    user-defined functions in parallel and waits for them to complete.
-- Added a special library providing ability to replace the standard
-    memory allocation routines in Microsoft* C/C++ RTL (malloc/free,
-    global new/delete, etc.) with the TBB memory allocator.
-    Usage details are described in include/tbb/tbbmalloc_proxy.h file.
-- Task scheduler switched to use new implementation of its core
-    functionality (deque based task pool, new structure of arena slots).
-- Preview of Microsoft* Visual Studio* 2005 project files for
-    building the library is available in build/vsproject folder.
-- Added tests for aligned memory allocations and malloc replacement.
-- Added parallel_for/game_of_life.net example (for Windows only)
-    showing TBB usage in a .NET application.
-- A number of other fixes and improvements to code, tests, makefiles,
-    examples and documents.
-
-Bugs fixed:
-
-- The same list as in TBB 2.1 Update 4 right above.
-
-------------------------------------------------------------------------
-Intel TBB 2.1 Update 3 commercial-aligned release
-TBB_INTERFACE_VERSION == 3015
-
-Changes (w.r.t. Intel TBB 2.1 U2 commercial-aligned release):
-
-- Added support for aligned allocations to the TBB memory allocator.
-- Added a special library to use with LD_PRELOAD on Linux* in order to
-    replace the standard memory allocation routines in C/C++ with the
-    TBB memory allocator.
-- Added null_mutex and null_rw_mutex: no-op classes interface-compliant
-    to other TBB mutexes.
-- Improved performance of parallel_sort, to close most of the serial gap
-    with std::sort, and beat it on 2 and more cores.
-- A few other small changes.
-
-Bugs fixed:
-
-- the problem where parallel_for hanged after exception throw
-    if affinity_partitioner was used (1556).
-- get rid of VS warnings about mbstowcs deprecation (1560),
-    as well as some other warnings.
-- operator== for concurrent_vector::iterator fixed to work correctly
-    with different vector instances.
-
-------------------------------------------------------------------------
-Intel TBB 2.1 Update 2 commercial-aligned release
-TBB_INTERFACE_VERSION == 3014
-
-Changes (w.r.t. Intel TBB 2.1 U1 commercial-aligned release):
-
-- Incorporates all open-source-release changes down to TBB 2.1 U1,
-    except for:
-    - 20081019 addition of enumerable_thread_specific;
-- Warning level for Microsoft* Visual C++* compiler raised to /W4 /Wp64;
-    warnings found on this level were cleaned or suppressed.
-- Added TBB_runtime_interface_version API function.
-- Added new example: pipeline/square.
-- Added exception handling and cancellation support
-    for parallel_do and pipeline.
-- Added copy constructor and [begin,end) constructor to concurrent_queue.
-- Added some support for beta version of Intel(R) Parallel Amplifier.
-- Added scripts to set environment for cross-compilation of 32-bit
-    applications on 64-bit Linux with Intel(R) C++ Compiler.
-- Fixed semantics of concurrent_vector::clear() to not deallocate
-    internal arrays. Fixed compact() to perform such deallocation later.
-- Fixed the issue with atomic<T*> when T is incomplete type.
-- Improved support for PowerPC* Macintosh*, including the fix
-    for a bug in masked compare-and-swap reported by a customer.
-- As usual, a number of other improvements everywhere.
-
-------------------------------------------------------------------------
-20081109 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Added new serial out of order filter for tbb::pipeline.
-- Fixed the issue with atomic<T*>::operator= reported at the forum.
-- Fixed the issue with using tbb::task::self() in task destructor
-    reported at the forum.
-- A number of other improvements to code, tests, makefiles, examples
-    and documents.
-
-Open-source contributions integrated:
-- Changes in the memory allocator were partially integrated.
-
-------------------------------------------------------------------------
-20081019 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Introduced enumerable_thread_specific<T>.  This new class provides a
-    wrapper around native thread local storage as well as iterators and
-    ranges for accessing the thread local copies (1533).
-- Improved support for Intel(R) Threading Analysis Tools
-    on Intel(R) 64 architecture.
-- Dependency from Microsoft* CRT was integrated to the libraries using
-    manifests, to avoid issues if called from code that uses different
-    version of Visual C++* runtime than the library.
-- Introduced new defines TBB_USE_ASSERT, TBB_USE_DEBUG,
-    TBB_USE_PERFORMANCE_WARNINGS, TBB_USE_THREADING_TOOLS.
-- A number of other improvements to code, tests, makefiles, examples
-    and documents.
-
-Open-source contributions integrated:
-
-- linker optimization: /incremental:no .
-
-------------------------------------------------------------------------
-20080925 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Same fix for a memory leak in the memory allocator as in TBB 2.1 U1.
-- Improved support for lambda functions.
-- Fixed more concurrent_queue issues reported at the forum.
-- A number of other improvements to code, tests, makefiles, examples
-    and documents.
-
-------------------------------------------------------------------------
-Intel TBB 2.1 Update 1 commercial-aligned release
-TBB_INTERFACE_VERSION == 3013
-
-Changes (w.r.t. Intel TBB 2.1 commercial-aligned release):
-
-- Fixed small memory leak in the memory allocator.
-- Incorporates all open-source-release changes since TBB 2.1,
-    except for:
-    - 20080825 changes for parallel_do;
-
-------------------------------------------------------------------------
-20080825 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Added exception handling and cancellation support for parallel_do.
-- Added default HashCompare template argument for concurrent_hash_map.
-- Fixed concurrent_queue.clear() issues due to incorrect assumption
-    about clear() being private method.
-- Added the possibility to use TBB in applications that change
-    default calling conventions (Windows* only).
-- Many improvements to code, tests, examples, makefiles and documents.
-
-Bugs fixed:
-
-- 120, 130 - memset declaration missed in concurrent_hash_map.h
-
-------------------------------------------------------------------------
-20080724 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Inline assembly for atomic operations improved for gcc 4.3
-- A few more improvements to the code.
-
-------------------------------------------------------------------------
-20080709 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- operator=() was added to the tbb_thread class according to
-    the current working draft for std::thread.
-- Recognizing SPARC* in makefiles for Linux* and Sun Solaris*.
-
-Bugs fixed:
-
-- 127 - concurrent_hash_map::range fixed to split correctly.
-
-Open-source contributions integrated:
-
-- fix_set_midpoint.diff by jyasskin
-- SPARC* support in makefiles by Raf Schietekat
-
-------------------------------------------------------------------------
-20080622 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Fixed a hang that rarely happened on Linux
-    during deinitialization of the TBB scheduler.
-- Improved support for Intel(R) Thread Checker.
-- A few more improvements to the code.
-
-------------------------------------------------------------------------
-Intel TBB 2.1 commercial-aligned release
-TBB_INTERFACE_VERSION == 3011
-
-Changes (w.r.t. Intel TBB 2.0 U3 commercial-aligned release):
-
-- All open-source-release changes down to, and including, TBB 2.0 below,
-    were incorporated into this release.
-
-------------------------------------------------------------------------
-20080605 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Explicit control of exported symbols by version scripts added on Linux.
-- Interfaces polished for exception handling & algorithm cancellation.
-- Cache behavior improvements in the scalable allocator.
-- Improvements in text_filter, polygon_overlay, and other examples.
-- A lot of other stability improvements in code, tests, and makefiles.
-- First release where binary packages include headers/docs/examples, so
-    binary packages are now self-sufficient for using TBB.
-
-Open-source contributions integrated:
-
-- atomics patch (partially).
-- tick_count warning patch.
-
-Bugs fixed:
-
-- 118 - fix for boost compatibility.
-- 123 - fix for tbb_machine.h.
-
-------------------------------------------------------------------------
-20080512 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Fixed a problem with backward binary compatibility
-    of debug Linux builds.
-- Sun* Studio* support added.
-- soname support added on Linux via linker script. To restore backward
-    binary compatibility, *.so -> *.so.2 softlinks should be created.
-- concurrent_hash_map improvements - added few new forms of insert()
-    method and fixed precondition and guarantees of erase() methods.
-    Added runtime warning reporting about bad hash function used for
-    the container. Various improvements for performance and concurrency.
-- Cancellation mechanism reworked so that it does not hurt scalability.
-- Algorithm parallel_do reworked. Requirement for Body::argument_type
-    definition removed, and work item argument type can be arbitrarily
-    cv-qualified.
-- polygon_overlay example added.
-- A few more improvements to code, tests, examples and Makefiles.
-
-Open-source contributions integrated:
-
-- Soname support patch for Bugzilla #112.
-
-Bugs fixed:
-
-- 112 - fix for soname support.
-
-------------------------------------------------------------------------
-Intel TBB 2.0 U3 commercial-aligned release (package 017, April 20, 2008)
-
-Corresponds to commercial 019 (for Linux*, 020; for Mac OS* X, 018)
-packages.
-
-Changes (w.r.t. Intel TBB 2.0 U2 commercial-aligned release):
-
-- Does not contain open-source-release changes below; this release is
-    only a minor update of TBB 2.0 U2.
-- Removed spin-waiting in pipeline and concurrent_queue.
-- A few more small bug fixes from open-source releases below.
-
-------------------------------------------------------------------------
-20080408 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- count_strings example reworked: new word generator implemented, hash
-    function replaced, and tbb_allocator is used with std::string class.
-- Static methods of spin_rw_mutex were replaced by normal member
-    functions, and the class name was versioned.
-- tacheon example was renamed to tachyon.
-- Improved support for Intel(R) Thread Checker.
-- A few more minor improvements.
-
-Open-source contributions integrated:
-
-- Two sets of Sun patches for IA Solaris support.
-
-------------------------------------------------------------------------
-20080402 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Exception handling and cancellation support for tasks and algorithms
-    fully enabled.
-- Exception safety guaranties defined and fixed for all concurrent
-    containers.
-- User-defined memory allocator support added to all concurrent
-    containers.
-- Performance improvement of concurrent_hash_map, spin_rw_mutex.
-- Critical fix for a rare race condition during scheduler
-    initialization/de-initialization.
-- New methods added for concurrent containers to be closer to STL,
-    as well as automatic filters removal from pipeline
-    and __TBB_AtomicAND function.
-- The volatile keyword dropped from where it is not really needed.
-- A few more minor improvements.
-
-------------------------------------------------------------------------
-20080319 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Support for gcc version 4.3 was added.
-- tbb_thread class, near compatible with std::thread expected in C++0x,
-    was added.
-
-Bugs fixed:
-
-- 116 - fix for compilation issues with gcc version 4.2.1.
-- 120 - fix for compilation issues with gcc version 4.3.
-
-------------------------------------------------------------------------
-20080311 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- An enumerator added for pipeline filter types (serial vs. parallel).
-- New task_scheduler_observer class introduced, to observe when
-    threads start and finish interacting with the TBB task scheduler.
-- task_scheduler_init reverted to not use internal versioned class;
-    binary compatibility guaranteed with stable releases only.
-- Various improvements to code, tests, examples and Makefiles.
-
-------------------------------------------------------------------------
-20080304 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Task-to-thread affinity support, previously kept under a macro,
-    now fully legalized.
-- Work-in-progress on cache_aligned_allocator improvements.
-- Pipeline really supports parallel input stage; it's no more serialized.
-- Various improvements to code, tests, examples and Makefiles.
-
-Bugs fixed:
-
-- 119 - fix for scalable_malloc sometimes failing to return a big block.
-- TR575 - fixed a deadlock occurring on Windows in startup/shutdown
-    under some conditions.
-
-------------------------------------------------------------------------
-20080226 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Introduced tbb_allocator to select between standard allocator and
-    tbb::scalable_allocator when available.
-- Removed spin-waiting in pipeline and concurrent_queue.
-- Improved performance of concurrent_hash_map by using tbb_allocator.
-- Improved support for Intel(R) Thread Checker.
-- Various improvements to code, tests, examples and Makefiles.
-
-------------------------------------------------------------------------
-Intel TBB 2.0 U2 commercial-aligned release (package 017, February 14, 2008)
-
-Corresponds to commercial 017 (for Linux*, 018; for Mac OS* X, 016)
-packages.
-
-Changes (w.r.t. Intel TBB 2.0 U1 commercial-aligned release):
-
-- Does not contain open-source-release changes below; this release is
-    only a minor update of TBB 2.0 U1.
-- Add support for Microsoft* Visual Studio* 2008, including binary
-    libraries and VS2008 projects for examples.
-- Use SwitchToThread() not Sleep() to yield threads on Windows*.
-- Enhancements to Doxygen-readable comments in source code.
-- A few more small bug fixes from open-source releases below.
-
-Bugs fixed:
-
-- TR569 - Memory leak in concurrent_queue.
-
-------------------------------------------------------------------------
-20080207 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Improvements and minor fixes in VS2008 projects for examples.
-- Improvements in code for gating worker threads that wait for work,
-  previously consolidated under #if IMPROVED_GATING, now legalized.
-- Cosmetic changes in code, examples, tests.
-
-Bugs fixed:
-
-- 113 - Iterators and ranges should be convertible to their const
-    counterparts.
-- TR569 - Memory leak in concurrent_queue.
-
-------------------------------------------------------------------------
-20080122 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Updated examples/parallel_for/seismic to improve the visuals and to
-    use the affinity_partitioner (20071127 and forward) for better
-    performance.
-- Minor improvements to unittests and performance tests.
-
-------------------------------------------------------------------------
-20080115 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Cleanup, simplifications and enhancements to the Makefiles for
-    building the libraries (see build/index.html for high-level
-    changes) and the examples.
-- Use SwitchToThread() not Sleep() to yield threads on Windows*.
-- Engineering work-in-progress on exception safety/support.
-- Engineering work-in-progress on affinity_partitioner for
-    parallel_reduce.
-- Engineering work-in-progress on improved gating for worker threads
-    (idle workers now block in the OS instead of spinning).
-- Enhancements to Doxygen-readable comments in source code.
-
-Bugs fixed:
-
-- 102 - Support for parallel build with gmake -j
-- 114 - /Wp64 build warning on Windows*.
-
-------------------------------------------------------------------------
-20071218 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Full support for Microsoft* Visual Studio* 2008 in open-source.
-    Binaries for vc9/ will be available in future stable releases.
-- New recursive_mutex class.
-- Full support for 32-bit PowerMac including export files for builds.
-- Improvements to parallel_do.
-
-------------------------------------------------------------------------
-20071206 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Support for Microsoft* Visual Studio* 2008 in building libraries
-    from source as well as in vc9/ projects for examples.
-- Small fixes to the affinity_partitioner first introduced in 20071127.
-- Small fixes to the thread-stack size hook first introduced in 20071127.
-- Engineering work in progress on concurrent_vector.
-- Engineering work in progress on exception behavior.
-- Unittest improvements.
-
-------------------------------------------------------------------------
-20071127 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- Task-to-thread affinity support (affinity partitioner) first appears.
-- More work on concurrent_vector.
-- New parallel_do algorithm (function-style version of parallel while)
-    and parallel_do/parallel_preorder example.
-- New task_scheduler_init() hooks for getting default_num_threads() and
-    for setting thread stack size.
-- Support for weak memory consistency models in the code base.
-- Futex usage in the task scheduler (Linux).
-- Started adding 32-bit PowerMac support.
-- Intel(R) 9.1 compilers are now the base supported Intel(R) compiler
-    version.
-- TBB libraries added to link line automatically on Microsoft Windows*
-    systems via #pragma comment linker directives.
-
-Open-source contributions integrated:
-
-- FreeBSD platform support patches.
-- AIX weak memory model patch.
-
-Bugs fixed:
-
-- 108 - Removed broken affinity.h reference.
-- 101 - Does not build on Debian Lenny (replaced arch with uname -m).
-
-------------------------------------------------------------------------
-20071030 open-source release
-
-Changes (w.r.t. previous open-source release):
-
-- More work on concurrent_vector.
-- Better support for building with -Wall -Werror (or not) as desired.
-- A few fixes to eliminate extraneous warnings.
-- Begin introduction of versioning hooks so that the internal/API
-    version is tracked via TBB_INTERFACE_VERSION.  The newest binary
-    libraries should always work with previously-compiled code when-
-    ever possible.
-- Engineering work in progress on using futex inside the mutexes (Linux).
-- Engineering work in progress on exception behavior.
-- Engineering work in progress on a new parallel_do algorithm.
-- Unittest improvements.
-
-------------------------------------------------------------------------
-20070927 open-source release
-
-Changes (w.r.t. Intel TBB 2.0 U1 commercial-aligned release):
-
-- Minor update to TBB 2.0 U1 below.
-- Begin introduction of new concurrent_vector interfaces not released
-    with TBB 2.0 U1.
-
-------------------------------------------------------------------------
-Intel TBB 2.0 U1 commercial-aligned release (package 014, October 1, 2007)
-
-Corresponds to commercial 014 (for Linux*, 016) packages.
-
-Changes (w.r.t. Intel TBB 2.0 commercial-aligned release):
-
-- All open-source-release changes down to, and including, TBB 2.0
-    below, were incorporated into this release.
-- Made a number of changes to the officially supported OS list:
-    Added Linux* OSs:
-	Asianux* 3, Debian* 4.0, Fedora Core* 6, Fedora* 7,
-	Turbo Linux* 11, Ubuntu* 7.04;
-    Dropped Linux* OSs:
-	Asianux* 2, Fedora Core* 4, Haansoft* Linux 2006 Server,
-	Mandriva/Mandrake* 10.1, Miracle Linux* 4.0,
-	Red Flag* DC Server 5.0;
-    Only Mac OS* X 10.4.9 (and forward) and Xcode* tool suite 2.4.1 (and
-	forward) are now supported.
-- Commercial installers on Linux* fixed to recommend the correct
-    binaries to use in more cases, with less unnecessary warnings.
-- Changes to eliminate spurious build warnings.
-
-Open-source contributions integrated:
-
-- Two small header guard macro patches; it also fixed bug #94.
-- New blocked_range3d class.
-
-Bugs fixed:
-
-- 93 - Removed misleading comments in task.h.
-- 94 - See above.
-
-------------------------------------------------------------------------
-20070815 open-source release
-
-Changes:
-
-- Changes to eliminate spurious build warnings.
-- Engineering work in progress on concurrent_vector allocator behavior.
-- Added hooks to use the Intel(R) compiler code coverage tools.
-
-Open-source contributions integrated:
-
-- Mac OS* X build warning patch.
-
-Bugs fixed:
-
-- 88 - Fixed TBB compilation errors if both VS2005 and Windows SDK are
-    installed.
-
-------------------------------------------------------------------------
-20070719 open-source release
-
-Changes:
-
-- Minor update to TBB 2.0 commercial-aligned release below.
-- Changes to eliminate spurious build warnings.
-
-------------------------------------------------------------------------
-Intel TBB 2.0 commercial-aligned release (package 010, July 19, 2007)
-
-Corresponds to commercial 010 (for Linux*, 012) packages.
-
-- TBB open-source debut release.
-
-------------------------------------------------------------------------
-Intel TBB 1.1 commercial release (April 10, 2007)
-
-Changes (w.r.t. Intel TBB 1.0 commercial release):
-
-- auto_partitioner which offered an automatic alternative to specifying
-    a grain size parameter to estimate the best granularity for tasks.
-- The release was added to the Intel(R) C++ Compiler 10.0 Pro.
-
-------------------------------------------------------------------------
-Intel TBB 1.0 Update 2 commercial release
-
-Changes (w.r.t. Intel TBB 1.0 Update 1 commercial release):
-
-- Mac OS* X 64-bit support added.
-- Source packages for commercial releases introduced.
-
-------------------------------------------------------------------------
-Intel TBB 1.0 Update 1 commercial-aligned release
-
-Changes (w.r.t. Intel TBB 1.0 commercial release):
-
-- Fix for critical package issue on Mac OS* X.
-
-------------------------------------------------------------------------
-Intel TBB 1.0 commercial release (August 29, 2006)
-
-Changes (w.r.t. Intel TBB 1.0 beta commercial release):
-
-- New namespace (and compatibility headers for old namespace).
-    Namespaces are tbb and tbb::internal and all classes are in the
-    underscore_style not the WindowsStyle.
-- New class: scalable_allocator (and cache_aligned_allocator using that
-    if it exists).
-- Added parallel_for/tacheon example.
-- Removed C-style casts from headers for better C++ compliance.
-- Bug fixes.
-- Documentation improvements.
-- Improved performance of the concurrent_hash_map class.
-- Upgraded parallel_sort() to support STL-style random-access iterators
-    instead of just pointers.
-- The Windows vs7_1 directories renamed to vs7.1 in examples.
-- New class: spin version of reader-writer lock.
-- Added push_back() interface to concurrent_vector().
-
-------------------------------------------------------------------------
-Intel TBB 1.0 beta commercial release
-
-Initial release.
-
-Features / APIs:
-
-- Concurrent containers: ConcurrentHashTable, ConcurrentVector,
-    ConcurrentQueue.
-- Parallel algorithms: ParallelFor, ParallelReduce, ParallelScan,
-    ParallelWhile, Pipeline, ParallelSort.
-- Support: AlignedSpace, BlockedRange (i.e., 1D), BlockedRange2D
-- Task scheduler with multi-master support.
-- Atomics: read, write, fetch-and-store, fetch-and-add, compare-and-swap.
-- Locks: spin, reader-writer, queuing, OS-wrapper.
-- Memory allocation: STL-style memory allocator that avoids false
-    sharing.
-- Timers.
-
-Tools Support:
-- Intel(R) Thread Checker 3.0.
-- Intel(R) Thread Profiler 3.0.
-
-Documentation:
-- First Use Documents: README.txt, INSTALL.txt, Release_Notes.txt,
-    Doc_Index.html, Getting_Started.pdf, Tutorial.pdf, Reference.pdf.
-- Class hierarchy HTML pages (Doxygen).
-- Tree of index.html pages for navigating the installed package, esp.
-    for the examples.
-
-Examples:
-- One for each of these TBB features: ConcurrentHashTable, ParallelFor,
-    ParallelReduce, ParallelWhile, Pipeline, Task.
-- Live copies of examples from Getting_Started.pdf.
-- TestAll example that exercises every class and header in the package
-    (i.e., a "liveness test").
-- Compilers: see Release_Notes.txt.
-- APIs: OpenMP, WinThreads, Pthreads.
-
-Packaging:
-- Package for Windows installs IA-32 and EM64T bits.
-- Package for Linux installs IA-32, EM64T and IPF bits.
-- Package for Mac OS* X installs IA-32 bits.
-- All packages support Intel(R) software setup assistant (ISSA) and
-    install-time FLEXlm license checking.
-- ISSA support allows license file to be specified directly in case of
-    no Internet connection or problems with IRC or serial #s.
-- Linux installer allows root or non-root, RPM or non-RPM installs.
-- FLEXlm license servers (for those who need floating/counted licenses)
-    are provided separately on Intel(R) Premier.
-
-------------------------------------------------------------------------
-Intel, the Intel logo, Xeon, Intel Xeon Phi, and Cilk are registered
-trademarks or trademarks of Intel Corporation or its subsidiaries in
-the United States and other countries.
-
-* Other names and brands may be claimed as the property of others.
diff --git a/lib/3rdParty/tbb/LICENSE b/lib/3rdParty/tbb/LICENSE
deleted file mode 100644
index 261eeb9e..00000000
--- a/lib/3rdParty/tbb/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/lib/3rdParty/tbb/README b/lib/3rdParty/tbb/README
deleted file mode 100644
index fcc87af0..00000000
--- a/lib/3rdParty/tbb/README
+++ /dev/null
@@ -1,11 +0,0 @@
-Intel(R) Threading Building Blocks - README
-
-See index.html for directions and documentation.
-
-If source is present (./Makefile and src/ directories),
-type 'gmake' in this directory to build and test.
-
-See examples/index.html for runnable examples and directions.
-
-See http://threadingbuildingblocks.org for full documentation
-and software information.
diff --git a/lib/3rdParty/tbb/bin/x64/v140/irml/irml.dll b/lib/3rdParty/tbb/bin/x64/v140/irml/irml.dll
deleted file mode 100644
index eff1eeee..00000000
Binary files a/lib/3rdParty/tbb/bin/x64/v140/irml/irml.dll and /dev/null differ
diff --git a/lib/3rdParty/tbb/bin/x64/v140/irml/irml_debug.dll b/lib/3rdParty/tbb/bin/x64/v140/irml/irml_debug.dll
deleted file mode 100644
index c6863995..00000000
Binary files a/lib/3rdParty/tbb/bin/x64/v140/irml/irml_debug.dll and /dev/null differ
diff --git a/lib/3rdParty/tbb/bin/x64/v140/tbb.dll b/lib/3rdParty/tbb/bin/x64/v140/tbb.dll
deleted file mode 100644
index 655e6515..00000000
Binary files a/lib/3rdParty/tbb/bin/x64/v140/tbb.dll and /dev/null differ
diff --git a/lib/3rdParty/tbb/bin/x64/v140/tbb_debug.dll b/lib/3rdParty/tbb/bin/x64/v140/tbb_debug.dll
deleted file mode 100644
index bad9f08b..00000000
Binary files a/lib/3rdParty/tbb/bin/x64/v140/tbb_debug.dll and /dev/null differ
diff --git a/lib/3rdParty/tbb/bin/x64/v140/tbb_preview.dll b/lib/3rdParty/tbb/bin/x64/v140/tbb_preview.dll
deleted file mode 100644
index 73518080..00000000
Binary files a/lib/3rdParty/tbb/bin/x64/v140/tbb_preview.dll and /dev/null differ
diff --git a/lib/3rdParty/tbb/bin/x64/v140/tbbmalloc.dll b/lib/3rdParty/tbb/bin/x64/v140/tbbmalloc.dll
deleted file mode 100644
index cee43368..00000000
Binary files a/lib/3rdParty/tbb/bin/x64/v140/tbbmalloc.dll and /dev/null differ
diff --git a/lib/3rdParty/tbb/bin/x64/v140/tbbmalloc_proxy.dll b/lib/3rdParty/tbb/bin/x64/v140/tbbmalloc_proxy.dll
deleted file mode 100644
index 7e533095..00000000
Binary files a/lib/3rdParty/tbb/bin/x64/v140/tbbmalloc_proxy.dll and /dev/null differ
diff --git a/lib/3rdParty/tbb/bin/x86/v140/irml/irml.dll b/lib/3rdParty/tbb/bin/x86/v140/irml/irml.dll
deleted file mode 100644
index a04c1b5f..00000000
Binary files a/lib/3rdParty/tbb/bin/x86/v140/irml/irml.dll and /dev/null differ
diff --git a/lib/3rdParty/tbb/bin/x86/v140/irml/irml_debug.dll b/lib/3rdParty/tbb/bin/x86/v140/irml/irml_debug.dll
deleted file mode 100644
index fb94ec32..00000000
Binary files a/lib/3rdParty/tbb/bin/x86/v140/irml/irml_debug.dll and /dev/null differ
diff --git a/lib/3rdParty/tbb/bin/x86/v140/tbb.dll b/lib/3rdParty/tbb/bin/x86/v140/tbb.dll
deleted file mode 100644
index 20ca289a..00000000
Binary files a/lib/3rdParty/tbb/bin/x86/v140/tbb.dll and /dev/null differ
diff --git a/lib/3rdParty/tbb/bin/x86/v140/tbb_debug.dll b/lib/3rdParty/tbb/bin/x86/v140/tbb_debug.dll
deleted file mode 100644
index 96c5011a..00000000
Binary files a/lib/3rdParty/tbb/bin/x86/v140/tbb_debug.dll and /dev/null differ
diff --git a/lib/3rdParty/tbb/bin/x86/v140/tbb_preview.dll b/lib/3rdParty/tbb/bin/x86/v140/tbb_preview.dll
deleted file mode 100644
index 20903025..00000000
Binary files a/lib/3rdParty/tbb/bin/x86/v140/tbb_preview.dll and /dev/null differ
diff --git a/lib/3rdParty/tbb/bin/x86/v140/tbbmalloc.dll b/lib/3rdParty/tbb/bin/x86/v140/tbbmalloc.dll
deleted file mode 100644
index 99c6495b..00000000
Binary files a/lib/3rdParty/tbb/bin/x86/v140/tbbmalloc.dll and /dev/null differ
diff --git a/lib/3rdParty/tbb/bin/x86/v140/tbbmalloc_proxy.dll b/lib/3rdParty/tbb/bin/x86/v140/tbbmalloc_proxy.dll
deleted file mode 100644
index cae643fc..00000000
Binary files a/lib/3rdParty/tbb/bin/x86/v140/tbbmalloc_proxy.dll and /dev/null differ
diff --git a/lib/3rdParty/tbb/include/index.html b/lib/3rdParty/tbb/include/index.html
deleted file mode 100644
index b0962e01..00000000
--- a/lib/3rdParty/tbb/include/index.html
+++ /dev/null
@@ -1,25 +0,0 @@
-<HTML>
-<BODY>
-
-<H2>Overview</H2>
-Include files for Intel&reg; Threading Building Blocks (Intel&reg; TBB).
-
-<H2>Directories</H2>
-<DL>
-<DT><A HREF="tbb/index.html">tbb</A>
-<DD>Include files for Intel TBB classes and functions.
-<DT><A HREF="serial/tbb/">serial/tbb</A>
-<DD>Include files for a sequential implementation of the parallel_for algorithm.
-</DL>
-
-<HR>
-<A HREF="../index.html">Up to parent directory</A>
-<p></p>
-Copyright &copy; 2005-2017 Intel Corporation.  All Rights Reserved.
-<P></P>
-Intel is a registered trademark or trademark of Intel Corporation
-or its subsidiaries in the United States and other countries.
-<p></p>
-* Other names and brands may be claimed as the property of others.
-</BODY>
-</HTML>
diff --git a/lib/3rdParty/tbb/include/serial/tbb/parallel_for.h b/lib/3rdParty/tbb/include/serial/tbb/parallel_for.h
deleted file mode 100644
index c8f9a790..00000000
--- a/lib/3rdParty/tbb/include/serial/tbb/parallel_for.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_SERIAL_parallel_for_H
-#define __TBB_SERIAL_parallel_for_H
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
-    #pragma warning (push)
-    #pragma warning (disable: 4530)
-#endif
-
-#include <stdexcept>
-#include <string> // required to construct std exception classes
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    #pragma warning (pop)
-#endif
-
-#include "tbb_annotate.h"
-
-#ifndef __TBB_NORMAL_EXECUTION
-#include "tbb/blocked_range.h"
-#include "tbb/partitioner.h"
-#endif
-
-namespace tbb {
-namespace serial {
-namespace interface9 {
-
-// parallel_for serial annotated implementation
-
-template< typename Range, typename Body, typename Partitioner >
-class start_for : tbb::internal::no_copy {
-    Range my_range;
-    const Body my_body;
-    typename Partitioner::task_partition_type my_partition;
-    void execute();
-
-    //! Constructor for root task.
-    start_for( const Range& range, const Body& body, Partitioner& partitioner ) :
-        my_range( range ),
-        my_body( body ),
-        my_partition( partitioner )
-    {
-    }
-
-    //! Splitting constructor used to generate children.
-    /** this becomes left child.  Newly constructed object is right child. */
-    start_for( start_for& parent_, typename Partitioner::split_type& split_obj ) :
-        my_range( parent_.my_range, split_obj ),
-        my_body( parent_.my_body ),
-        my_partition( parent_.my_partition, split_obj )
-    {
-    }
-
-public:
-    static void run(  const Range& range, const Body& body, Partitioner& partitioner ) {
-        if( !range.empty() ) {
-            ANNOTATE_SITE_BEGIN( tbb_parallel_for );
-            {
-                start_for a( range, body, partitioner );
-                a.execute();
-            }
-            ANNOTATE_SITE_END( tbb_parallel_for );
-        }
-    }
-};
-
-template< typename Range, typename Body, typename Partitioner >
-void start_for< Range, Body, Partitioner >::execute() {
-    if( !my_range.is_divisible() || !my_partition.is_divisible() ) {
-        ANNOTATE_TASK_BEGIN( tbb_parallel_for_range );
-        {
-            my_body( my_range );
-        }
-        ANNOTATE_TASK_END( tbb_parallel_for_range );
-    } else {
-        typename Partitioner::split_type split_obj;
-        start_for b( *this, split_obj );
-        this->execute(); // Execute the left interval first to keep the serial order.
-        b.execute();     // Execute the right interval then.
-    }
-}
-
-//! Parallel iteration over range with default partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body ) {
-    serial::interface9::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
-}
-
-//! Parallel iteration over range with simple partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
-    serial::interface9::start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner);
-}
-
-//! Parallel iteration over range with auto_partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
-    serial::interface9::start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner);
-}
-
-//! Parallel iteration over range with static_partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) {
-    serial::interface9::start_for<Range,Body,const static_partitioner>::run(range,body,partitioner);
-}
-
-//! Parallel iteration over range with affinity_partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
-    serial::interface9::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner);
-}
-
-//! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner (ignored)
-template <typename Index, typename Function, typename Partitioner>
-void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& ) {
-    if (step <= 0 )
-        throw std::invalid_argument( "nonpositive_step" );
-    else if (last > first) {
-        // Above "else" avoids "potential divide by zero" warning on some platforms
-        ANNOTATE_SITE_BEGIN( tbb_parallel_for );
-        for( Index i = first; i < last; i = i + step ) {
-            ANNOTATE_TASK_BEGIN( tbb_parallel_for_iteration );
-            { f( i ); }
-            ANNOTATE_TASK_END( tbb_parallel_for_iteration );
-        }
-        ANNOTATE_SITE_END( tbb_parallel_for );
-    }
-}
-
-//! Parallel iteration over a range of integers with explicit step and default partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
-}
-//! Parallel iteration over a range of integers with explicit step and simple partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& p) {
-    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, p);
-}
-//! Parallel iteration over a range of integers with explicit step and auto partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& p) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, p);
-}
-//! Parallel iteration over a range of integers with explicit step and static partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& p) {
-    parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, p);
-}
-//! Parallel iteration over a range of integers with explicit step and affinity partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& p) {
-    parallel_for_impl(first, last, step, f, p);
-}
-
-//! Parallel iteration over a range of integers with default step and default partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
-}
-//! Parallel iteration over a range of integers with default step and simple partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& p) {
-    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, p);
-}
-//! Parallel iteration over a range of integers with default step and auto partitioner
-template <typename Index, typename Function>
-    void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& p) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, p);
-}
-//! Parallel iteration over a range of integers with default step and static partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, const static_partitioner& p) {
-    parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, p);
-}
-//! Parallel iteration over a range of integers with default step and affinity_partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& p) {
-    parallel_for_impl(first, last, static_cast<Index>(1), f, p);
-}
-
-} // namespace interfaceX
-
-using interface9::parallel_for;
-
-} // namespace serial
-
-#ifndef __TBB_NORMAL_EXECUTION
-using serial::interface9::parallel_for;
-#endif
-
-} // namespace tbb
-
-#endif /* __TBB_SERIAL_parallel_for_H */
diff --git a/lib/3rdParty/tbb/include/serial/tbb/tbb_annotate.h b/lib/3rdParty/tbb/include/serial/tbb/tbb_annotate.h
deleted file mode 100644
index 81c8d3a4..00000000
--- a/lib/3rdParty/tbb/include/serial/tbb/tbb_annotate.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_annotate_H
-#define __TBB_annotate_H
-
-// Macros used by the Intel(R) Parallel Advisor.
-#ifdef __TBB_NORMAL_EXECUTION
-    #define ANNOTATE_SITE_BEGIN( site )
-    #define ANNOTATE_SITE_END( site )
-    #define ANNOTATE_TASK_BEGIN( task )
-    #define ANNOTATE_TASK_END( task )
-    #define ANNOTATE_LOCK_ACQUIRE( lock )
-    #define ANNOTATE_LOCK_RELEASE( lock )
-#else
-    #include <advisor-annotate.h>
-#endif
-
-#endif /* __TBB_annotate_H */
diff --git a/lib/3rdParty/tbb/include/tbb/aggregator.h b/lib/3rdParty/tbb/include/tbb/aggregator.h
deleted file mode 100644
index 6aecbb74..00000000
--- a/lib/3rdParty/tbb/include/tbb/aggregator.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB__aggregator_H
-#define __TBB__aggregator_H
-
-#if !TBB_PREVIEW_AGGREGATOR
-#error Set TBB_PREVIEW_AGGREGATOR before including aggregator.h
-#endif
-
-#include "atomic.h"
-#include "tbb_profiling.h"
-
-namespace tbb {
-namespace interface6 {
-
-using namespace tbb::internal;
-
-class aggregator_operation {
-    template<typename handler_type> friend class aggregator_ext;
-    uintptr_t status;
-    aggregator_operation* my_next;
-public:
-    enum aggregator_operation_status { agg_waiting=0, agg_finished };
-    aggregator_operation() : status(agg_waiting), my_next(NULL) {}
-    /// Call start before handling this operation
-    void start() { call_itt_notify(acquired, &status); }
-    /// Call finish when done handling this operation
-    /** The operation will be released to its originating thread, and possibly deleted. */
-    void finish() { itt_store_word_with_release(status, uintptr_t(agg_finished)); }
-    aggregator_operation* next() { return itt_hide_load_word(my_next);}
-    void set_next(aggregator_operation* n) { itt_hide_store_word(my_next, n); }
-};
-
-namespace internal {
-
-class basic_operation_base : public aggregator_operation {
-    friend class basic_handler;
-    virtual void apply_body() = 0;
-public:
-    basic_operation_base() : aggregator_operation() {}
-    virtual ~basic_operation_base() {}
-};
-
-template<typename Body>
-class basic_operation : public basic_operation_base, no_assign {
-    const Body& my_body;
-    void apply_body() __TBB_override { my_body(); }
-public:
-    basic_operation(const Body& b) : basic_operation_base(), my_body(b) {}
-};
-
-class basic_handler {
-public:
-    basic_handler() {}
-    void operator()(aggregator_operation* op_list) const {
-        while (op_list) {
-            // ITT note: &(op_list->status) tag is used to cover accesses to the operation data.
-            // The executing thread "acquires" the tag (see start()) and then performs
-            // the associated operation w/o triggering a race condition diagnostics.
-            // A thread that created the operation is waiting for its status (see execute_impl()),
-            // so when this thread is done with the operation, it will "release" the tag
-            // and update the status (see finish()) to give control back to the waiting thread.
-            basic_operation_base& request = static_cast<basic_operation_base&>(*op_list);
-            // IMPORTANT: need to advance op_list to op_list->next() before calling request.finish()
-            op_list = op_list->next();
-            request.start();
-            request.apply_body();
-            request.finish();
-        }
-    }
-};
-
-} // namespace internal
-
-//! Aggregator base class and expert interface
-/** An aggregator for collecting operations coming from multiple sources and executing
-    them serially on a single thread. */
-template <typename handler_type>
-class aggregator_ext : tbb::internal::no_copy {
-public:
-    aggregator_ext(const handler_type& h) : handler_busy(0), handle_operations(h) { mailbox = NULL; }
-
-    //! EXPERT INTERFACE: Enter a user-made operation into the aggregator's mailbox.
-    /** Details of user-made operations must be handled by user-provided handler */
-    void process(aggregator_operation *op) { execute_impl(*op); }
-
-protected:
-    /** Place operation in mailbox, then either handle mailbox or wait for the operation
-        to be completed by a different thread. */
-    void execute_impl(aggregator_operation& op) {
-        aggregator_operation* res;
-
-        // ITT note: &(op.status) tag is used to cover accesses to this operation. This
-        // thread has created the operation, and now releases it so that the handler
-        // thread may handle the associated operation w/o triggering a race condition;
-        // thus this tag will be acquired just before the operation is handled in the
-        // handle_operations functor.
-        call_itt_notify(releasing, &(op.status));
-        // insert the operation into the list
-        do {
-            // ITT may flag the following line as a race; it is a false positive:
-            // This is an atomic read; we don't provide itt_hide_load_word for atomics
-            op.my_next = res = mailbox; // NOT A RACE
-        } while (mailbox.compare_and_swap(&op, res) != res);
-        if (!res) { // first in the list; handle the operations
-            // ITT note: &mailbox tag covers access to the handler_busy flag, which this
-            // waiting handler thread will try to set before entering handle_operations.
-            call_itt_notify(acquired, &mailbox);
-            start_handle_operations();
-            __TBB_ASSERT(op.status, NULL);
-        }
-        else { // not first; wait for op to be ready
-            call_itt_notify(prepare, &(op.status));
-            spin_wait_while_eq(op.status, uintptr_t(aggregator_operation::agg_waiting));
-            itt_load_word_with_acquire(op.status);
-        }
-    }
-
-
-private:
-    //! An atomically updated list (aka mailbox) of aggregator_operations
-    atomic<aggregator_operation *> mailbox;
-
-    //! Controls thread access to handle_operations
-    /** Behaves as boolean flag where 0=false, 1=true */
-    uintptr_t handler_busy;
-
-    handler_type handle_operations;
-
-    //! Trigger the handling of operations when the handler is free
-    void start_handle_operations() {
-        aggregator_operation *pending_operations;
-
-        // ITT note: &handler_busy tag covers access to mailbox as it is passed
-        // between active and waiting handlers.  Below, the waiting handler waits until
-        // the active handler releases, and the waiting handler acquires &handler_busy as
-        // it becomes the active_handler. The release point is at the end of this
-        // function, when all operations in mailbox have been handled by the
-        // owner of this aggregator.
-        call_itt_notify(prepare, &handler_busy);
-        // get handler_busy: only one thread can possibly spin here at a time
-        spin_wait_until_eq(handler_busy, uintptr_t(0));
-        call_itt_notify(acquired, &handler_busy);
-        // acquire fence not necessary here due to causality rule and surrounding atomics
-        __TBB_store_with_release(handler_busy, uintptr_t(1));
-
-        // ITT note: &mailbox tag covers access to the handler_busy flag itself.
-        // Capturing the state of the mailbox signifies that handler_busy has been
-        // set and a new active handler will now process that list's operations.
-        call_itt_notify(releasing, &mailbox);
-        // grab pending_operations
-        pending_operations = mailbox.fetch_and_store(NULL);
-
-        // handle all the operations
-        handle_operations(pending_operations);
-
-        // release the handler
-        itt_store_word_with_release(handler_busy, uintptr_t(0));
-    }
-};
-
-//! Basic aggregator interface
-class aggregator : private aggregator_ext<internal::basic_handler> {
-public:
-    aggregator() : aggregator_ext<internal::basic_handler>(internal::basic_handler()) {}
-    //! BASIC INTERFACE: Enter a function for exclusive execution by the aggregator.
-    /** The calling thread stores the function object in a basic_operation and
-        places the operation in the aggregator's mailbox */
-    template<typename Body>
-    void execute(const Body& b) {
-        internal::basic_operation<Body> op(b);
-        this->execute_impl(op);
-    }
-};
-
-} // namespace interface6
-
-using interface6::aggregator;
-using interface6::aggregator_ext;
-using interface6::aggregator_operation;
-
-} // namespace tbb
-
-#endif  // __TBB__aggregator_H
diff --git a/lib/3rdParty/tbb/include/tbb/aligned_space.h b/lib/3rdParty/tbb/include/tbb/aligned_space.h
deleted file mode 100644
index 56fd85f3..00000000
--- a/lib/3rdParty/tbb/include/tbb/aligned_space.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_aligned_space_H
-#define __TBB_aligned_space_H
-
-#include "tbb_stddef.h"
-#include "tbb_machine.h"
-
-namespace tbb {
-
-//! Block of space aligned sufficiently to construct an array T with N elements.
-/** The elements are not constructed or destroyed by this class.
-    @ingroup memory_allocation */
-template<typename T,size_t N=1>
-class aligned_space {
-private:
-    typedef __TBB_TypeWithAlignmentAtLeastAsStrict(T) element_type;
-    element_type array[(sizeof(T)*N+sizeof(element_type)-1)/sizeof(element_type)];
-public:
-    //! Pointer to beginning of array
-    T* begin() {return internal::punned_cast<T*>(this);}
-
-    //! Pointer to one past last element in array.
-    T* end() {return begin()+N;}
-};
-
-} // namespace tbb
-
-#endif /* __TBB_aligned_space_H */
diff --git a/lib/3rdParty/tbb/include/tbb/atomic.h b/lib/3rdParty/tbb/include/tbb/atomic.h
deleted file mode 100644
index 72ec534e..00000000
--- a/lib/3rdParty/tbb/include/tbb/atomic.h
+++ /dev/null
@@ -1,558 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_atomic_H
-#define __TBB_atomic_H
-
-#include <cstddef>
-
-#if _MSC_VER
-#define __TBB_LONG_LONG __int64
-#else
-#define __TBB_LONG_LONG long long
-#endif /* _MSC_VER */
-
-#include "tbb_machine.h"
-
-#if _MSC_VER && !__INTEL_COMPILER
-    // Suppress overzealous compiler warnings till the end of the file
-    #pragma warning (push)
-    #pragma warning (disable: 4244 4267 4512)
-#endif
-
-namespace tbb {
-
-//! Specifies memory semantics.
-enum memory_semantics {
-    //! Sequential consistency
-    full_fence,
-    //! Acquire
-    acquire,
-    //! Release
-    release,
-    //! No ordering
-    relaxed
-};
-
-//! @cond INTERNAL
-namespace internal {
-
-#if __TBB_ALIGNAS_PRESENT
-    #define __TBB_DECL_ATOMIC_FIELD(t,f,a) alignas(a) t f;
-#elif __TBB_ATTRIBUTE_ALIGNED_PRESENT
-    #define __TBB_DECL_ATOMIC_FIELD(t,f,a) t f  __attribute__ ((aligned(a)));
-#elif __TBB_DECLSPEC_ALIGN_PRESENT
-    #define __TBB_DECL_ATOMIC_FIELD(t,f,a) __declspec(align(a)) t f;
-#else
-    #error Do not know syntax for forcing alignment.
-#endif
-
-template<size_t S>
-struct atomic_rep;           // Primary template declared, but never defined.
-
-template<>
-struct atomic_rep<1> {       // Specialization
-    typedef int8_t word;
-};
-template<>
-struct atomic_rep<2> {       // Specialization
-    typedef int16_t word;
-};
-template<>
-struct atomic_rep<4> {       // Specialization
-#if _MSC_VER && !_WIN64
-    // Work-around that avoids spurious /Wp64 warnings
-    typedef intptr_t word;
-#else
-    typedef int32_t word;
-#endif
-};
-#if __TBB_64BIT_ATOMICS
-template<>
-struct atomic_rep<8> {       // Specialization
-    typedef int64_t word;
-};
-#endif
-
-template<typename value_type, size_t size>
-struct aligned_storage;
-
-//the specializations are needed to please MSVC syntax of __declspec(align()) which accept _literal_ constants only
-#if __TBB_ATOMIC_CTORS
-    #define ATOMIC_STORAGE_PARTIAL_SPECIALIZATION(S)                  \
-    template<typename value_type>                                     \
-    struct aligned_storage<value_type,S> {                            \
-        __TBB_DECL_ATOMIC_FIELD(value_type,my_value,S)                \
-        aligned_storage() = default ;                                 \
-        constexpr aligned_storage(value_type value):my_value(value){} \
-    };                                                                \
-
-#else
-    #define ATOMIC_STORAGE_PARTIAL_SPECIALIZATION(S)                  \
-    template<typename value_type>                                     \
-    struct aligned_storage<value_type,S> {                            \
-        __TBB_DECL_ATOMIC_FIELD(value_type,my_value,S)                \
-    };                                                                \
-
-#endif
-
-template<typename value_type>
-struct aligned_storage<value_type,1> {
-    value_type my_value;
-#if __TBB_ATOMIC_CTORS
-    aligned_storage() = default ;
-    constexpr aligned_storage(value_type value):my_value(value){}
-#endif
-};
-
-ATOMIC_STORAGE_PARTIAL_SPECIALIZATION(2)
-ATOMIC_STORAGE_PARTIAL_SPECIALIZATION(4)
-#if __TBB_64BIT_ATOMICS
-ATOMIC_STORAGE_PARTIAL_SPECIALIZATION(8)
-#endif
-
-template<size_t Size, memory_semantics M>
-struct atomic_traits;        // Primary template declared, but not defined.
-
-#define __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(S,M)                                                         \
-    template<> struct atomic_traits<S,M> {                                                               \
-        typedef atomic_rep<S>::word word;                                                                \
-        inline static word compare_and_swap( volatile void* location, word new_value, word comparand ) { \
-            return __TBB_machine_cmpswp##S##M(location,new_value,comparand);                             \
-        }                                                                                                \
-        inline static word fetch_and_add( volatile void* location, word addend ) {                       \
-            return __TBB_machine_fetchadd##S##M(location,addend);                                        \
-        }                                                                                                \
-        inline static word fetch_and_store( volatile void* location, word value ) {                      \
-            return __TBB_machine_fetchstore##S##M(location,value);                                       \
-        }                                                                                                \
-    };
-
-#define __TBB_DECL_ATOMIC_PRIMITIVES(S)                                                                  \
-    template<memory_semantics M>                                                                         \
-    struct atomic_traits<S,M> {                                                                          \
-        typedef atomic_rep<S>::word word;                                                                \
-        inline static word compare_and_swap( volatile void* location, word new_value, word comparand ) { \
-            return __TBB_machine_cmpswp##S(location,new_value,comparand);                                \
-        }                                                                                                \
-        inline static word fetch_and_add( volatile void* location, word addend ) {                       \
-            return __TBB_machine_fetchadd##S(location,addend);                                           \
-        }                                                                                                \
-        inline static word fetch_and_store( volatile void* location, word value ) {                      \
-            return __TBB_machine_fetchstore##S(location,value);                                          \
-        }                                                                                                \
-    };
-
-template<memory_semantics M>
-struct atomic_load_store_traits;    // Primary template declaration
-
-#define __TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(M)                      \
-    template<> struct atomic_load_store_traits<M> {                     \
-        template <typename T>                                           \
-        inline static T load( const volatile T& location ) {            \
-            return __TBB_load_##M( location );                          \
-        }                                                               \
-        template <typename T>                                           \
-        inline static void store( volatile T& location, T value ) {     \
-            __TBB_store_##M( location, value );                         \
-        }                                                               \
-    }
-
-#if __TBB_USE_FENCED_ATOMICS
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,full_fence)
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,full_fence)
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,full_fence)
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,acquire)
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,acquire)
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,acquire)
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,release)
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,release)
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,release)
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,relaxed)
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,relaxed)
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,relaxed)
-#if __TBB_64BIT_ATOMICS
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,full_fence)
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,acquire)
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,release)
-__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,relaxed)
-#endif
-#else /* !__TBB_USE_FENCED_ATOMICS */
-__TBB_DECL_ATOMIC_PRIMITIVES(1)
-__TBB_DECL_ATOMIC_PRIMITIVES(2)
-__TBB_DECL_ATOMIC_PRIMITIVES(4)
-#if __TBB_64BIT_ATOMICS
-__TBB_DECL_ATOMIC_PRIMITIVES(8)
-#endif
-#endif /* !__TBB_USE_FENCED_ATOMICS */
-
-__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(full_fence);
-__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(acquire);
-__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(release);
-__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(relaxed);
-
-//! Additive inverse of 1 for type T.
-/** Various compilers issue various warnings if -1 is used with various integer types.
-    The baroque expression below avoids all the warnings (we hope). */
-#define __TBB_MINUS_ONE(T) (T(T(0)-T(1)))
-
-//! Base class that provides basic functionality for atomic<T> without fetch_and_add.
-/** Works for any type T that has the same size as an integral type, has a trivial constructor/destructor,
-    and can be copied/compared by memcpy/memcmp. */
-template<typename T>
-struct atomic_impl {
-protected:
-    aligned_storage<T,sizeof(T)> my_storage;
-private:
-    //TODO: rechecks on recent versions of gcc if union is still the _only_ way to do a conversion without warnings
-    //! Union type used to convert type T to underlying integral type.
-    template<typename value_type>
-    union converter {
-        typedef typename atomic_rep<sizeof(value_type)>::word bits_type;
-        converter(){}
-        converter(value_type a_value) : value(a_value) {}
-        value_type value;
-        bits_type bits;
-    };
-
-    template<typename value_t>
-    static typename converter<value_t>::bits_type to_bits(value_t value){
-        return converter<value_t>(value).bits;
-    }
-    template<typename value_t>
-    static value_t to_value(typename converter<value_t>::bits_type bits){
-        converter<value_t> u;
-        u.bits = bits;
-        return u.value;
-    }
-
-    template<typename value_t>
-    union ptr_converter;            //Primary template declared, but never defined.
-
-    template<typename value_t>
-    union ptr_converter<value_t *> {
-        ptr_converter(){}
-        ptr_converter(value_t* a_value) : value(a_value) {}
-        value_t* value;
-        uintptr_t bits;
-    };
-    //TODO: check if making to_bits accepting reference (thus unifying it with to_bits_ref)
-    //does not hurt performance
-    template<typename value_t>
-    static typename converter<value_t>::bits_type & to_bits_ref(value_t& value){
-        //TODO: this #ifdef is temporary workaround, as union conversion seems to fail
-        //on suncc for 64 bit types for 32 bit target
-        #if !__SUNPRO_CC
-            return *(typename converter<value_t>::bits_type*)ptr_converter<value_t*>(&value).bits;
-        #else
-            return *(typename converter<value_t>::bits_type*)(&value);
-        #endif
-    }
-
-
-public:
-    typedef T value_type;
-
-#if __TBB_ATOMIC_CTORS
-    atomic_impl() = default ;
-    constexpr atomic_impl(value_type value):my_storage(value){}
-#endif
-    template<memory_semantics M>
-    value_type fetch_and_store( value_type value ) {
-          return to_value<value_type>(
-                  internal::atomic_traits<sizeof(value_type),M>::fetch_and_store( &my_storage.my_value, to_bits(value) )
-          );
-    }
-
-    value_type fetch_and_store( value_type value ) {
-        return fetch_and_store<full_fence>(value);
-    }
-
-    template<memory_semantics M>
-    value_type compare_and_swap( value_type value, value_type comparand ) {
-        return to_value<value_type>(
-                internal::atomic_traits<sizeof(value_type),M>::compare_and_swap( &my_storage.my_value, to_bits(value), to_bits(comparand) )
-        );
-    }
-
-    value_type compare_and_swap( value_type value, value_type comparand ) {
-        return compare_and_swap<full_fence>(value,comparand);
-    }
-
-    operator value_type() const volatile {                // volatile qualifier here for backwards compatibility
-        return  to_value<value_type>(
-                __TBB_load_with_acquire( to_bits_ref(my_storage.my_value) )
-        );
-    }
-
-    template<memory_semantics M>
-    value_type load () const {
-        return to_value<value_type>(
-                internal::atomic_load_store_traits<M>::load( to_bits_ref(my_storage.my_value) )
-        );
-    }
-
-    value_type load () const {
-        return load<acquire>();
-    }
-
-    template<memory_semantics M>
-    void store ( value_type value ) {
-        internal::atomic_load_store_traits<M>::store( to_bits_ref(my_storage.my_value), to_bits(value));
-    }
-
-    void store ( value_type value ) {
-        store<release>( value );
-    }
-
-protected:
-    value_type store_with_release( value_type rhs ) {
-       //TODO: unify with store<release>
-        __TBB_store_with_release( to_bits_ref(my_storage.my_value), to_bits(rhs) );
-        return rhs;
-    }
-};
-
-//! Base class that provides basic functionality for atomic<T> with fetch_and_add.
-/** I is the underlying type.
-    D is the difference type.
-    StepType should be char if I is an integral type, and T if I is a T*. */
-template<typename I, typename D, typename StepType>
-struct atomic_impl_with_arithmetic: atomic_impl<I> {
-public:
-    typedef I value_type;
-#if    __TBB_ATOMIC_CTORS
-    atomic_impl_with_arithmetic() = default ;
-    constexpr atomic_impl_with_arithmetic(value_type value): atomic_impl<I>(value){}
-#endif
-    template<memory_semantics M>
-    value_type fetch_and_add( D addend ) {
-        return value_type(internal::atomic_traits<sizeof(value_type),M>::fetch_and_add( &this->my_storage.my_value, addend*sizeof(StepType) ));
-    }
-
-    value_type fetch_and_add( D addend ) {
-        return fetch_and_add<full_fence>(addend);
-    }
-
-    template<memory_semantics M>
-    value_type fetch_and_increment() {
-        return fetch_and_add<M>(1);
-    }
-
-    value_type fetch_and_increment() {
-        return fetch_and_add(1);
-    }
-
-    template<memory_semantics M>
-    value_type fetch_and_decrement() {
-        return fetch_and_add<M>(__TBB_MINUS_ONE(D));
-    }
-
-    value_type fetch_and_decrement() {
-        return fetch_and_add(__TBB_MINUS_ONE(D));
-    }
-
-public:
-    value_type operator+=( D value ) {
-        return fetch_and_add(value)+value;
-    }
-
-    value_type operator-=( D value ) {
-        // Additive inverse of value computed using binary minus,
-        // instead of unary minus, for sake of avoiding compiler warnings.
-        return operator+=(D(0)-value);
-    }
-
-    value_type operator++() {
-        return fetch_and_add(1)+1;
-    }
-
-    value_type operator--() {
-        return fetch_and_add(__TBB_MINUS_ONE(D))-1;
-    }
-
-    value_type operator++(int) {
-        return fetch_and_add(1);
-    }
-
-    value_type operator--(int) {
-        return fetch_and_add(__TBB_MINUS_ONE(D));
-    }
-};
-
-} /* Internal */
-//! @endcond
-
-//! Primary template for atomic.
-/** See the Reference for details.
-    @ingroup synchronization */
-template<typename T>
-struct atomic: internal::atomic_impl<T> {
-#if __TBB_ATOMIC_CTORS
-    atomic() = default;
-    constexpr atomic(T arg): internal::atomic_impl<T>(arg) {}
-#endif
-    T operator=( T rhs ) {
-        // "this" required here in strict ISO C++ because store_with_release is a dependent name
-        return this->store_with_release(rhs);
-    }
-    atomic<T>& operator=( const atomic<T>& rhs ) {this->store_with_release(rhs); return *this;}
-};
-
-#if __TBB_ATOMIC_CTORS
-    #define __TBB_DECL_ATOMIC(T)                                                                    \
-        template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T,char> {              \
-            atomic() = default;                                                                     \
-            constexpr atomic(T arg): internal::atomic_impl_with_arithmetic<T,T,char>(arg) {}        \
-                                                                                                    \
-            T operator=( T rhs ) {return store_with_release(rhs);}                                  \
-            atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rhs); return *this;}   \
-        };
-#else
-    #define __TBB_DECL_ATOMIC(T)                                                                    \
-        template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T,char> {              \
-            T operator=( T rhs ) {return store_with_release(rhs);}                                  \
-            atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rhs); return *this;}   \
-        };
-#endif
-
-#if __TBB_64BIT_ATOMICS
-//TODO: consider adding non-default (and atomic) copy constructor for 32bit platform
-__TBB_DECL_ATOMIC(__TBB_LONG_LONG)
-__TBB_DECL_ATOMIC(unsigned __TBB_LONG_LONG)
-#else
-// test_atomic will verify that sizeof(long long)==8
-#endif
-__TBB_DECL_ATOMIC(long)
-__TBB_DECL_ATOMIC(unsigned long)
-
-#if _MSC_VER && !_WIN64
-#if __TBB_ATOMIC_CTORS
-/* Special version of __TBB_DECL_ATOMIC that avoids gratuitous warnings from cl /Wp64 option.
-   It is identical to __TBB_DECL_ATOMIC(unsigned) except that it replaces operator=(T)
-   with an operator=(U) that explicitly converts the U to a T.  Types T and U should be
-   type synonyms on the platform.  Type U should be the wider variant of T from the
-   perspective of /Wp64. */
-#define __TBB_DECL_ATOMIC_ALT(T,U) \
-    template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T,char> {             \
-        atomic() = default ;                                                                   \
-        constexpr atomic(T arg): internal::atomic_impl_with_arithmetic<T,T,char>(arg) {}       \
-        T operator=( U rhs ) {return store_with_release(T(rhs));}                              \
-        atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rhs); return *this;}  \
-    };
-#else
-#define __TBB_DECL_ATOMIC_ALT(T,U) \
-    template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T,char> {             \
-        T operator=( U rhs ) {return store_with_release(T(rhs));}                              \
-        atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rhs); return *this;}  \
-    };
-#endif
-__TBB_DECL_ATOMIC_ALT(unsigned,size_t)
-__TBB_DECL_ATOMIC_ALT(int,ptrdiff_t)
-#else
-__TBB_DECL_ATOMIC(unsigned)
-__TBB_DECL_ATOMIC(int)
-#endif /* _MSC_VER && !_WIN64 */
-
-__TBB_DECL_ATOMIC(unsigned short)
-__TBB_DECL_ATOMIC(short)
-__TBB_DECL_ATOMIC(char)
-__TBB_DECL_ATOMIC(signed char)
-__TBB_DECL_ATOMIC(unsigned char)
-
-#if !_MSC_VER || defined(_NATIVE_WCHAR_T_DEFINED)
-__TBB_DECL_ATOMIC(wchar_t)
-#endif /* _MSC_VER||!defined(_NATIVE_WCHAR_T_DEFINED) */
-
-//! Specialization for atomic<T*> with arithmetic and operator->.
-template<typename T> struct atomic<T*>: internal::atomic_impl_with_arithmetic<T*,ptrdiff_t,T> {
-#if __TBB_ATOMIC_CTORS
-    atomic() = default ;
-    constexpr atomic(T* arg): internal::atomic_impl_with_arithmetic<T*,ptrdiff_t,T>(arg) {}
-#endif
-    T* operator=( T* rhs ) {
-        // "this" required here in strict ISO C++ because store_with_release is a dependent name
-        return this->store_with_release(rhs);
-    }
-    atomic<T*>& operator=( const atomic<T*>& rhs ) {
-        this->store_with_release(rhs); return *this;
-    }
-    T* operator->() const {
-        return (*this);
-    }
-};
-
-//! Specialization for atomic<void*>, for sake of not allowing arithmetic or operator->.
-template<> struct atomic<void*>: internal::atomic_impl<void*> {
-#if __TBB_ATOMIC_CTORS
-    atomic() = default ;
-    constexpr atomic(void* arg): internal::atomic_impl<void*>(arg) {}
-#endif
-    void* operator=( void* rhs ) {
-        // "this" required here in strict ISO C++ because store_with_release is a dependent name
-        return this->store_with_release(rhs);
-    }
-    atomic<void*>& operator=( const atomic<void*>& rhs ) {
-        this->store_with_release(rhs); return *this;
-    }
-};
-
-// Helpers to workaround ugly syntax of calling template member function of a
-// template class with template argument dependent on template parameters.
-
-template <memory_semantics M, typename T>
-T load ( const atomic<T>& a ) { return a.template load<M>(); }
-
-template <memory_semantics M, typename T>
-void store ( atomic<T>& a, T value ) { a.template store<M>(value); }
-
-namespace interface6{
-//! Make an atomic for use in an initialization (list), as an alternative to zero-initialization or normal assignment.
-template<typename T>
-atomic<T> make_atomic(T t) {
-    atomic<T> a;
-    store<relaxed>(a,t);
-    return a;
-}
-}
-using interface6::make_atomic;
-
-namespace internal {
-template<memory_semantics M, typename T >
-void swap(atomic<T> & lhs, atomic<T> & rhs){
-    T tmp = load<M>(lhs);
-    store<M>(lhs,load<M>(rhs));
-    store<M>(rhs,tmp);
-}
-
-// only to aid in the gradual conversion of ordinary variables to proper atomics
-template<typename T>
-inline atomic<T>& as_atomic( T& t ) {
-    return (atomic<T>&)t;
-}
-} // namespace tbb::internal
-
-} // namespace tbb
-
-#if _MSC_VER && !__INTEL_COMPILER
-    #pragma warning (pop)
-#endif // warnings are restored
-
-#endif /* __TBB_atomic_H */
diff --git a/lib/3rdParty/tbb/include/tbb/blocked_range.h b/lib/3rdParty/tbb/include/tbb/blocked_range.h
deleted file mode 100644
index 9f24cd2b..00000000
--- a/lib/3rdParty/tbb/include/tbb/blocked_range.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_blocked_range_H
-#define __TBB_blocked_range_H
-
-#include "tbb_stddef.h"
-
-namespace tbb {
-
-/** \page range_req Requirements on range concept
-    Class \c R implementing the concept of range must define:
-    - \code R::R( const R& ); \endcode               Copy constructor
-    - \code R::~R(); \endcode                        Destructor
-    - \code bool R::is_divisible() const; \endcode   True if range can be partitioned into two subranges
-    - \code bool R::empty() const; \endcode          True if range is empty
-    - \code R::R( R& r, split ); \endcode            Split range \c r into two subranges.
-**/
-
-//! A range over which to iterate.
-/** @ingroup algorithms */
-template<typename Value>
-class blocked_range {
-public:
-    //! Type of a value
-    /** Called a const_iterator for sake of algorithms that need to treat a blocked_range
-        as an STL container. */
-    typedef Value const_iterator;
-
-    //! Type for size of a range
-    typedef std::size_t size_type;
-
-    //! Construct range with default-constructed values for begin and end.
-    /** Requires that Value have a default constructor. */
-    blocked_range() : my_end(), my_begin() {}
-
-    //! Construct range over half-open interval [begin,end), with the given grainsize.
-    blocked_range( Value begin_, Value end_, size_type grainsize_=1 ) :
-        my_end(end_), my_begin(begin_), my_grainsize(grainsize_)
-    {
-        __TBB_ASSERT( my_grainsize>0, "grainsize must be positive" );
-    }
-
-    //! Beginning of range.
-    const_iterator begin() const {return my_begin;}
-
-    //! One past last value in range.
-    const_iterator end() const {return my_end;}
-
-    //! Size of the range
-    /** Unspecified if end()<begin(). */
-    size_type size() const {
-        __TBB_ASSERT( !(end()<begin()), "size() unspecified if end()<begin()" );
-        return size_type(my_end-my_begin);
-    }
-
-    //! The grain size for this range.
-    size_type grainsize() const {return my_grainsize;}
-
-    //------------------------------------------------------------------------
-    // Methods that implement Range concept
-    //------------------------------------------------------------------------
-
-    //! True if range is empty.
-    bool empty() const {return !(my_begin<my_end);}
-
-    //! True if range is divisible.
-    /** Unspecified if end()<begin(). */
-    bool is_divisible() const {return my_grainsize<size();}
-
-    //! Split range.
-    /** The new Range *this has the second part, the old range r has the first part.
-        Unspecified if end()<begin() or !is_divisible(). */
-    blocked_range( blocked_range& r, split ) :
-        my_end(r.my_end),
-        my_begin(do_split(r, split())),
-        my_grainsize(r.my_grainsize)
-    {
-        // only comparison 'less than' is required from values of blocked_range objects
-        __TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" );
-    }
-
-#if __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES
-    //! Static field to support proportional split
-    static const bool is_splittable_in_proportion = true;
-
-    //! Split range.
-    /** The new Range *this has the second part split according to specified proportion, the old range r has the first part.
-        Unspecified if end()<begin() or !is_divisible(). */
-    blocked_range( blocked_range& r, proportional_split& proportion ) :
-        my_end(r.my_end),
-        my_begin(do_split(r, proportion)),
-        my_grainsize(r.my_grainsize)
-    {
-        // only comparison 'less than' is required from values of blocked_range objects
-        __TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" );
-    }
-#endif /* __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES */
-
-private:
-    /** NOTE: my_end MUST be declared before my_begin, otherwise the forking constructor will break. */
-    Value my_end;
-    Value my_begin;
-    size_type my_grainsize;
-
-    //! Auxiliary function used by forking constructor.
-    /** Using this function lets us not require that Value support assignment or default construction. */
-    static Value do_split( blocked_range& r, split )
-    {
-        __TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" );
-        Value middle = r.my_begin + (r.my_end - r.my_begin) / 2u;
-        r.my_end = middle;
-        return middle;
-    }
-
-#if __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES
-    static Value do_split( blocked_range& r, proportional_split& proportion )
-    {
-        __TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" );
-
-        // usage of 32-bit floating point arithmetic is not enough to handle ranges of
-        // more than 2^24 iterations accurately. However, even on ranges with 2^64
-        // iterations the computational error approximately equals to 0.000001% which
-        // makes small impact on uniform distribution of such range's iterations (assuming
-        // all iterations take equal time to complete). See 'test_partitioner_whitebox'
-        // for implementation of an exact split algorithm
-        size_type right_part = size_type(float(r.size()) * float(proportion.right())
-                                         / float(proportion.left() + proportion.right()) + 0.5f);
-        return r.my_end = Value(r.my_end - right_part);
-    }
-#endif /* __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES */
-
-    template<typename RowValue, typename ColValue>
-    friend class blocked_range2d;
-
-    template<typename RowValue, typename ColValue, typename PageValue>
-    friend class blocked_range3d;
-};
-
-} // namespace tbb
-
-#endif /* __TBB_blocked_range_H */
diff --git a/lib/3rdParty/tbb/include/tbb/blocked_range2d.h b/lib/3rdParty/tbb/include/tbb/blocked_range2d.h
deleted file mode 100644
index f1b9f35d..00000000
--- a/lib/3rdParty/tbb/include/tbb/blocked_range2d.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_blocked_range2d_H
-#define __TBB_blocked_range2d_H
-
-#include "tbb_stddef.h"
-#include "blocked_range.h"
-
-namespace tbb {
-
-//! A 2-dimensional range that models the Range concept.
-/** @ingroup algorithms */
-template<typename RowValue, typename ColValue=RowValue>
-class blocked_range2d {
-public:
-    //! Type for size of an iteration range
-    typedef blocked_range<RowValue> row_range_type;
-    typedef blocked_range<ColValue> col_range_type;
-
-private:
-    row_range_type my_rows;
-    col_range_type my_cols;
-
-public:
-
-    blocked_range2d( RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize,
-                     ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) :
-        my_rows(row_begin,row_end,row_grainsize),
-        my_cols(col_begin,col_end,col_grainsize)
-    {
-    }
-
-    blocked_range2d( RowValue row_begin, RowValue row_end,
-                     ColValue col_begin, ColValue col_end ) :
-        my_rows(row_begin,row_end),
-        my_cols(col_begin,col_end)
-    {
-    }
-
-    //! True if range is empty
-    bool empty() const {
-        // Yes, it is a logical OR here, not AND.
-        return my_rows.empty() || my_cols.empty();
-    }
-
-    //! True if range is divisible into two pieces.
-    bool is_divisible() const {
-        return my_rows.is_divisible() || my_cols.is_divisible();
-    }
-
-    blocked_range2d( blocked_range2d& r, split ) :
-        my_rows(r.my_rows),
-        my_cols(r.my_cols)
-    {
-        split split_obj;
-        do_split(r, split_obj);
-    }
-
-#if __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES
-    //! Static field to support proportional split
-    static const bool is_splittable_in_proportion = true;
-
-    blocked_range2d( blocked_range2d& r, proportional_split& proportion ) :
-        my_rows(r.my_rows),
-        my_cols(r.my_cols)
-    {
-        do_split(r, proportion);
-    }
-#endif /* __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES */
-
-    template <typename Split>
-    void do_split( blocked_range2d& r, Split& split_obj )
-    {
-        if( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) {
-            my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj);
-        } else {
-            my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj);
-        }
-    }
-
-    //! The rows of the iteration space
-    const row_range_type& rows() const {return my_rows;}
-
-    //! The columns of the iteration space
-    const col_range_type& cols() const {return my_cols;}
-};
-
-} // namespace tbb
-
-#endif /* __TBB_blocked_range2d_H */
diff --git a/lib/3rdParty/tbb/include/tbb/blocked_range3d.h b/lib/3rdParty/tbb/include/tbb/blocked_range3d.h
deleted file mode 100644
index c62565ee..00000000
--- a/lib/3rdParty/tbb/include/tbb/blocked_range3d.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_blocked_range3d_H
-#define __TBB_blocked_range3d_H
-
-#include "tbb_stddef.h"
-#include "blocked_range.h"
-
-namespace tbb {
-
-//! A 3-dimensional range that models the Range concept.
-/** @ingroup algorithms */
-template<typename PageValue, typename RowValue=PageValue, typename ColValue=RowValue>
-class blocked_range3d {
-public:
-    //! Type for size of an iteration range
-    typedef blocked_range<PageValue> page_range_type;
-    typedef blocked_range<RowValue>  row_range_type;
-    typedef blocked_range<ColValue>  col_range_type;
-
-private:
-    page_range_type my_pages;
-    row_range_type  my_rows;
-    col_range_type  my_cols;
-
-public:
-
-    blocked_range3d( PageValue page_begin, PageValue page_end,
-                     RowValue  row_begin,  RowValue row_end,
-                     ColValue  col_begin,  ColValue col_end ) :
-        my_pages(page_begin,page_end),
-        my_rows(row_begin,row_end),
-        my_cols(col_begin,col_end)
-    {
-    }
-
-    blocked_range3d( PageValue page_begin, PageValue page_end, typename page_range_type::size_type page_grainsize,
-                     RowValue  row_begin,  RowValue row_end,   typename row_range_type::size_type row_grainsize,
-                     ColValue  col_begin,  ColValue col_end,   typename col_range_type::size_type col_grainsize ) :
-        my_pages(page_begin,page_end,page_grainsize),
-        my_rows(row_begin,row_end,row_grainsize),
-        my_cols(col_begin,col_end,col_grainsize)
-    {
-    }
-
-    //! True if range is empty
-    bool empty() const {
-        // Yes, it is a logical OR here, not AND.
-        return my_pages.empty() || my_rows.empty() || my_cols.empty();
-    }
-
-    //! True if range is divisible into two pieces.
-    bool is_divisible() const {
-        return  my_pages.is_divisible() || my_rows.is_divisible() || my_cols.is_divisible();
-    }
-
-    blocked_range3d( blocked_range3d& r, split ) :
-        my_pages(r.my_pages),
-        my_rows(r.my_rows),
-        my_cols(r.my_cols)
-    {
-        split split_obj;
-        do_split(r, split_obj);
-    }
-
-#if __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES
-    //! Static field to support proportional split
-    static const bool is_splittable_in_proportion = true;
-
-    blocked_range3d( blocked_range3d& r, proportional_split& proportion ) :
-        my_pages(r.my_pages),
-        my_rows(r.my_rows),
-        my_cols(r.my_cols)
-    {
-        do_split(r, proportion);
-    }
-#endif /* __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES */
-
-    template <typename Split>
-    void do_split( blocked_range3d& r, Split& split_obj)
-    {
-        if ( my_pages.size()*double(my_rows.grainsize()) < my_rows.size()*double(my_pages.grainsize()) ) {
-            if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) {
-                my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj);
-            } else {
-                my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj);
-            }
-	} else {
-            if ( my_pages.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_pages.grainsize()) ) {
-                my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj);
-            } else {
-                my_pages.my_begin = page_range_type::do_split(r.my_pages, split_obj);
-            }
-        }
-    }
-
-    //! The pages of the iteration space
-    const page_range_type& pages() const {return my_pages;}
-
-    //! The rows of the iteration space
-    const row_range_type& rows() const {return my_rows;}
-
-    //! The columns of the iteration space
-    const col_range_type& cols() const {return my_cols;}
-
-};
-
-} // namespace tbb
-
-#endif /* __TBB_blocked_range3d_H */
diff --git a/lib/3rdParty/tbb/include/tbb/cache_aligned_allocator.h b/lib/3rdParty/tbb/include/tbb/cache_aligned_allocator.h
deleted file mode 100644
index d435e785..00000000
--- a/lib/3rdParty/tbb/include/tbb/cache_aligned_allocator.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_cache_aligned_allocator_H
-#define __TBB_cache_aligned_allocator_H
-
-#include <new>
-#include "tbb_stddef.h"
-#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
- #include <utility> // std::forward
-#endif
-
-namespace tbb {
-
-//! @cond INTERNAL
-namespace internal {
-    //! Cache/sector line size.
-    /** @ingroup memory_allocation */
-    size_t __TBB_EXPORTED_FUNC NFS_GetLineSize();
-
-    //! Allocate memory on cache/sector line boundary.
-    /** @ingroup memory_allocation */
-    void* __TBB_EXPORTED_FUNC NFS_Allocate( size_t n_element, size_t element_size, void* hint );
-
-    //! Free memory allocated by NFS_Allocate.
-    /** Freeing a NULL pointer is allowed, but has no effect.
-        @ingroup memory_allocation */
-    void __TBB_EXPORTED_FUNC NFS_Free( void* );
-}
-//! @endcond
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    // Workaround for erroneous "unreferenced parameter" warning in method destroy.
-    #pragma warning (push)
-    #pragma warning (disable: 4100)
-#endif
-
-//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
-/** The members are ordered the same way they are in section 20.4.1
-    of the ISO C++ standard.
-    @ingroup memory_allocation */
-template<typename T>
-class cache_aligned_allocator {
-public:
-    typedef typename internal::allocator_type<T>::value_type value_type;
-    typedef value_type* pointer;
-    typedef const value_type* const_pointer;
-    typedef value_type& reference;
-    typedef const value_type& const_reference;
-    typedef size_t size_type;
-    typedef ptrdiff_t difference_type;
-    template<typename U> struct rebind {
-        typedef cache_aligned_allocator<U> other;
-    };
-
-    cache_aligned_allocator() throw() {}
-    cache_aligned_allocator( const cache_aligned_allocator& ) throw() {}
-    template<typename U> cache_aligned_allocator(const cache_aligned_allocator<U>&) throw() {}
-
-    pointer address(reference x) const {return &x;}
-    const_pointer address(const_reference x) const {return &x;}
-
-    //! Allocate space for n objects, starting on a cache/sector line.
-    pointer allocate( size_type n, const void* hint=0 ) {
-        // The "hint" argument is always ignored in NFS_Allocate thus const_cast shouldn't hurt
-        return pointer(internal::NFS_Allocate( n, sizeof(value_type), const_cast<void*>(hint) ));
-    }
-
-    //! Free block of memory that starts on a cache line
-    void deallocate( pointer p, size_type ) {
-        internal::NFS_Free(p);
-    }
-
-    //! Largest value for which method allocate might succeed.
-    size_type max_size() const throw() {
-        return (~size_t(0)-internal::NFS_MaxLineSize)/sizeof(value_type);
-    }
-
-    //! Copy-construct value at location pointed to by p.
-#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
-    template<typename U, typename... Args>
-    void construct(U *p, Args&&... args)
-        { ::new((void *)p) U(std::forward<Args>(args)...); }
-#else // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    void construct( pointer p, value_type&& value ) {::new((void*)(p)) value_type(std::move(value));}
-#endif
-    void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);}
-#endif // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
-
-    //! Destroy value at location pointed to by p.
-    void destroy( pointer p ) {p->~value_type();}
-};
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    #pragma warning (pop)
-#endif // warning 4100 is back
-
-//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
-/** @ingroup memory_allocation */
-template<>
-class cache_aligned_allocator<void> {
-public:
-    typedef void* pointer;
-    typedef const void* const_pointer;
-    typedef void value_type;
-    template<typename U> struct rebind {
-        typedef cache_aligned_allocator<U> other;
-    };
-};
-
-template<typename T, typename U>
-inline bool operator==( const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>& ) {return true;}
-
-template<typename T, typename U>
-inline bool operator!=( const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>& ) {return false;}
-
-} // namespace tbb
-
-#endif /* __TBB_cache_aligned_allocator_H */
diff --git a/lib/3rdParty/tbb/include/tbb/combinable.h b/lib/3rdParty/tbb/include/tbb/combinable.h
deleted file mode 100644
index 0063dbb4..00000000
--- a/lib/3rdParty/tbb/include/tbb/combinable.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_combinable_H
-#define __TBB_combinable_H
-
-#include "enumerable_thread_specific.h"
-#include "cache_aligned_allocator.h"
-
-namespace tbb {
-/** \name combinable
-    **/
-//@{
-//! Thread-local storage with optional reduction
-/** @ingroup containers */
-    template <typename T>
-    class combinable {
-
-    private:
-        typedef typename tbb::cache_aligned_allocator<T> my_alloc;
-        typedef typename tbb::enumerable_thread_specific<T, my_alloc, ets_no_key> my_ets_type;
-        my_ets_type my_ets;
-
-    public:
-
-        combinable() { }
-
-        template <typename finit>
-        explicit combinable( finit _finit) : my_ets(_finit) { }
-
-        //! destructor
-        ~combinable() { }
-
-        combinable( const combinable& other) : my_ets(other.my_ets) { }
-
-#if __TBB_ETS_USE_CPP11
-        combinable( combinable&& other) : my_ets( std::move(other.my_ets)) { }
-#endif
-
-        combinable & operator=( const combinable & other) {
-            my_ets = other.my_ets;
-            return *this;
-        }
-
-#if __TBB_ETS_USE_CPP11
-        combinable & operator=( combinable && other) {
-            my_ets=std::move(other.my_ets);
-            return *this;
-        }
-#endif
-
-        void clear() { my_ets.clear(); }
-
-        T& local() { return my_ets.local(); }
-
-        T& local(bool & exists) { return my_ets.local(exists); }
-
-        // combine_func_t has signature T(T,T) or T(const T&, const T&)
-        template <typename combine_func_t>
-        T combine(combine_func_t f_combine) { return my_ets.combine(f_combine); }
-
-        // combine_func_t has signature void(T) or void(const T&)
-        template <typename combine_func_t>
-        void combine_each(combine_func_t f_combine) { my_ets.combine_each(f_combine); }
-
-    };
-} // namespace tbb
-#endif /* __TBB_combinable_H */
diff --git a/lib/3rdParty/tbb/include/tbb/compat/condition_variable b/lib/3rdParty/tbb/include/tbb/compat/condition_variable
deleted file mode 100644
index 43edfc03..00000000
--- a/lib/3rdParty/tbb/include/tbb/compat/condition_variable
+++ /dev/null
@@ -1,476 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_condition_variable_H
-#define __TBB_condition_variable_H
-
-#if _WIN32||_WIN64
-#include "../machine/windows_api.h"
-
-namespace tbb { 
-namespace interface5 {
-namespace internal { 
-struct condition_variable_using_event
-{
-    //! Event for blocking waiting threads.
-    HANDLE event;
-    //! Protects invariants involving n_waiters, release_count, and epoch.
-    CRITICAL_SECTION mutex;
-    //! Number of threads waiting on this condition variable
-    int n_waiters;
-    //! Number of threads remaining that should no longer wait on this condition variable.
-    int release_count;
-    //! To keep threads from waking up prematurely with earlier signals.
-    unsigned epoch;
-};
-}}} // namespace tbb::interface5::internal
-
-#ifndef CONDITION_VARIABLE_INIT
-typedef void* CONDITION_VARIABLE;
-typedef CONDITION_VARIABLE* PCONDITION_VARIABLE;
-#endif
-
-#else /* if not _WIN32||_WIN64 */
-#include <errno.h> // some systems need it for ETIMEDOUT
-#include <pthread.h>
-#if __linux__
-#include <ctime>
-#else /* generic Unix */
-#include <sys/time.h>
-#endif
-#endif /* _WIN32||_WIN64 */
-
-#include "../tbb_stddef.h"
-#include "../mutex.h"
-#include "../tbb_thread.h"
-#include "../tbb_exception.h"
-#include "../tbb_profiling.h"
-
-namespace tbb {
-
-namespace interface5 {
-
-// C++0x standard working draft 30.4.3
-// Lock tag types
-struct defer_lock_t { }; //! do not acquire ownership of the mutex
-struct try_to_lock_t { }; //! try to acquire ownership of the mutex without blocking
-struct adopt_lock_t { }; //! assume the calling thread has already
-const defer_lock_t defer_lock = {};
-const try_to_lock_t try_to_lock = {};
-const adopt_lock_t adopt_lock = {};
-
-// C++0x standard working draft 30.4.3.1
-//! lock_guard 
-template<typename M>
-class lock_guard : tbb::internal::no_copy {
-public:
-    //! mutex type
-    typedef M mutex_type;
-
-    //! Constructor
-    /** precondition: If mutex_type is not a recursive mutex, the calling thread
-        does not own the mutex m. */
-    explicit lock_guard(mutex_type& m) : pm(m) {m.lock();}
-    
-    //! Adopt_lock constructor
-    /** precondition: the calling thread owns the mutex m. */
-    lock_guard(mutex_type& m, adopt_lock_t) : pm(m) {}
-
-    //! Destructor
-    ~lock_guard() { pm.unlock(); }
-private:
-    mutex_type& pm;
-};
-
-// C++0x standard working draft 30.4.3.2
-//! unique_lock 
-template<typename M>
-class unique_lock : tbb::internal::no_copy {
-    friend class condition_variable;
-public:
-    typedef M mutex_type;
-
-    // 30.4.3.2.1 construct/copy/destroy
-    // NB: Without constructors that take an r-value reference to a unique_lock, the following constructor is of little use.
-    //! Constructor
-    /** postcondition: pm==0 && owns==false */
-    unique_lock() : pm(NULL), owns(false) {}
-
-    //! Constructor
-    /** precondition: if mutex_type is not a recursive mutex, the  calling thread
-        does not own the mutex m.  If the precondition is not met, a deadlock occurs.
-        postcondition: pm==&m and owns==true */
-    explicit unique_lock(mutex_type& m) : pm(&m) {m.lock(); owns=true;}
-
-    //! Defer_lock constructor
-    /** postcondition: pm==&m and owns==false */
-    unique_lock(mutex_type& m, defer_lock_t) : pm(&m), owns(false) {}
-
-    //! Try_to_lock constructor
-    /** precondition: if mutex_type is not a recursive mutex, the  calling thread
-       does not own the mutex m.  If the precondition is not met, a deadlock occurs.
-       postcondition: pm==&m and owns==res where res is the value returned by
-       the call to m.try_lock(). */
-    unique_lock(mutex_type& m, try_to_lock_t) : pm(&m) {owns = m.try_lock();}
-
-    //! Adopt_lock constructor
-    /** precondition: the calling thread owns the mutex. If it does not, mutex->unlock() would fail.
-        postcondition: pm==&m and owns==true */
-    unique_lock(mutex_type& m, adopt_lock_t) : pm(&m), owns(true) {}
-
-    //! Timed unique_lock acquisition.
-    /** To avoid requiring support for namespace chrono, this method deviates from the working draft in that 
-        it uses tbb::tick_count::interval_t to specify the time duration. */
-    unique_lock(mutex_type& m, const tick_count::interval_t &i) : pm(&m) {owns = try_lock_for( i );}
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    //! Move constructor
-    /** postconditions: pm == src_p.pm and owns == src_p.owns (where src_p is the state of src just prior to this
-        construction), src.pm == 0 and src.owns == false. */
-    unique_lock(unique_lock && src): pm(NULL), owns(false) {this->swap(src);}
-
-    //! Move assignment
-    /** effects: If owns calls pm->unlock().
-        Postconditions: pm == src_p.pm and owns == src_p.owns (where src_p is the state of src just prior to this
-        assignment), src.pm == 0 and src.owns == false. */
-    unique_lock& operator=(unique_lock && src) {
-        if (owns)
-            this->unlock();
-        pm = NULL;
-        this->swap(src);
-        return *this;
-    }
-#endif // __TBB_CPP11_RVALUE_REF_PRESENT
-
-    //! Destructor
-    ~unique_lock() { if( owns ) pm->unlock(); }
-
-    // 30.4.3.2.2 locking
-    //! Lock the mutex and own it.
-    void lock() {
-        if( pm ) {
-            if( !owns ) {
-                pm->lock();
-                owns = true;
-            } else 
-                throw_exception_v4( tbb::internal::eid_possible_deadlock );
-        } else 
-            throw_exception_v4( tbb::internal::eid_operation_not_permitted );
-        __TBB_ASSERT( owns, NULL );
-    }
-
-    //! Try to lock the mutex. 
-    /** If successful, note that this lock owns it. Otherwise, set it false. */
-    bool try_lock() {
-        if( pm ) {
-            if( !owns )
-                owns = pm->try_lock();
-            else
-                throw_exception_v4( tbb::internal::eid_possible_deadlock );
-        } else 
-            throw_exception_v4( tbb::internal::eid_operation_not_permitted );
-        return owns;
-    }
- 
-    //! Try to lock the mutex. 
-    bool try_lock_for( const tick_count::interval_t &i );
-
-    //! Unlock the mutex
-    /** And note that this lock no longer owns it. */
-    void unlock() { 
-        if( owns ) {
-            pm->unlock();
-            owns = false;
-        } else
-            throw_exception_v4( tbb::internal::eid_operation_not_permitted );
-        __TBB_ASSERT( !owns, NULL );
-    }
-
-    // 30.4.3.2.3 modifiers
-    //! Swap the two unique locks
-    void swap(unique_lock& u) {
-        mutex_type* t_pm = u.pm;    u.pm   = pm;    pm   = t_pm;
-        bool t_owns      = u.owns;  u.owns = owns;  owns = t_owns;
-    }
-
-    //! Release control over the mutex.
-    mutex_type* release() {
-        mutex_type* o_pm = pm; 
-        pm = NULL; 
-        owns = false; 
-        return o_pm; 
-    }
-
-    // 30.4.3.2.4 observers
-    //! Does this lock own the mutex?
-    bool owns_lock() const { return owns; }
-
-    // TODO: Un-comment 'explicit' when the last non-C++0x compiler support is dropped
-    //! Does this lock own the mutex?
-    /*explicit*/ operator bool() const { return owns; }
-
-    //! Return the mutex that this lock currently has.
-    mutex_type* mutex() const { return pm; }
-
-private:
-    mutex_type* pm;
-    bool owns;
-};
-
-template<typename M>
-bool unique_lock<M>::try_lock_for( const tick_count::interval_t &i)
-{ 
-    const int unique_lock_tick = 100; /* microseconds; 0.1 milliseconds */
-    // the smallest wait-time is 0.1 milliseconds.
-    bool res = pm->try_lock();
-    int duration_in_micro; 
-    if( !res && (duration_in_micro=int(i.seconds()*1e6))>unique_lock_tick ) {
-        tick_count::interval_t i_100( double(unique_lock_tick)/1e6 /* seconds */); // 100 microseconds = 0.1*10E-3
-        do {
-            this_tbb_thread::sleep(i_100); // sleep for 100 micro seconds
-            duration_in_micro -= unique_lock_tick;
-            res = pm->try_lock();
-        } while( !res && duration_in_micro>unique_lock_tick );
-    }
-    return (owns=res);
-}
-
-//! Swap the two unique locks that have the mutexes of same type 
-template<typename M>
-void swap(unique_lock<M>& x, unique_lock<M>& y) { x.swap( y ); }
-
-namespace internal {
-
-#if _WIN32||_WIN64
-union condvar_impl_t {
-    condition_variable_using_event cv_event;
-    CONDITION_VARIABLE             cv_native;
-};
-void __TBB_EXPORTED_FUNC internal_initialize_condition_variable( condvar_impl_t& cv );
-void __TBB_EXPORTED_FUNC internal_destroy_condition_variable(    condvar_impl_t& cv );
-void __TBB_EXPORTED_FUNC internal_condition_variable_notify_one( condvar_impl_t& cv );
-void __TBB_EXPORTED_FUNC internal_condition_variable_notify_all( condvar_impl_t& cv );
-bool __TBB_EXPORTED_FUNC internal_condition_variable_wait( condvar_impl_t& cv, mutex* mtx, const tick_count::interval_t* i = NULL );
-
-#else /* if !(_WIN32||_WIN64), i.e., POSIX threads */
-typedef pthread_cond_t condvar_impl_t;
-#endif
-
-} // namespace internal
-
-//! cv_status
-/** C++0x standard working draft 30.5 */
-enum cv_status { no_timeout, timeout }; 
-
-//! condition variable
-/** C++0x standard working draft 30.5.1 
-    @ingroup synchronization */
-class condition_variable : tbb::internal::no_copy {
-public:
-    //! Constructor
-    condition_variable() { 
-#if _WIN32||_WIN64
-        internal_initialize_condition_variable( my_cv ); 
-#else
-        pthread_cond_init( &my_cv, NULL );
-#endif
-    }
-
-    //! Destructor
-    ~condition_variable() { 
-        //precondition: There shall be no thread blocked on *this.
-#if _WIN32||_WIN64
-        internal_destroy_condition_variable( my_cv );
-#else
-        pthread_cond_destroy( &my_cv );
-#endif
-    }
-
-    //! Notify one thread and wake it up
-    void notify_one() { 
-#if _WIN32||_WIN64
-        internal_condition_variable_notify_one( my_cv ); 
-#else
-        pthread_cond_signal( &my_cv );
-#endif
-    }
-
-    //! Notify all threads 
-    void notify_all() { 
-#if _WIN32||_WIN64
-        internal_condition_variable_notify_all( my_cv ); 
-#else
-        pthread_cond_broadcast( &my_cv );
-#endif
-    }
-
-    //! Release the mutex associated with the lock and wait on this condition variable
-    void wait(unique_lock<mutex>& lock);
-
-    //! Wait on this condition variable while pred is false
-    template <class Predicate>
-    void wait(unique_lock<mutex>& lock, Predicate pred) {
-        while( !pred() )
-            wait( lock );
-    }
-
-    //! Timed version of wait()
-    cv_status wait_for(unique_lock<mutex>& lock, const tick_count::interval_t &i );
-
-    //! Timed version of the predicated wait
-    /** The loop terminates when pred() returns true or when the time duration specified by rel_time (i) has elapsed. */
-    template<typename Predicate>
-    bool wait_for(unique_lock<mutex>& lock, const tick_count::interval_t &i, Predicate pred)
-    {
-        while( !pred() ) {
-            cv_status st = wait_for( lock, i );
-            if( st==timeout )
-                return pred();
-        }
-        return true;
-    }
-
-    // C++0x standard working draft. 30.2.3
-    typedef internal::condvar_impl_t* native_handle_type;
-
-    native_handle_type native_handle() { return (native_handle_type) &my_cv; }
-
-private:
-    internal::condvar_impl_t my_cv;
-};
-
-
-#if _WIN32||_WIN64
-inline void condition_variable::wait( unique_lock<mutex>& lock )
-{
-    __TBB_ASSERT( lock.owns, NULL );
-    lock.owns = false;
-    if( !internal_condition_variable_wait( my_cv, lock.mutex() ) ) {
-        int ec = GetLastError();
-        // on Windows 7, SleepConditionVariableCS() may return ERROR_TIMEOUT while the doc says it returns WAIT_TIMEOUT
-        __TBB_ASSERT_EX( ec!=WAIT_TIMEOUT&&ec!=ERROR_TIMEOUT, NULL );
-        lock.owns = true;
-        throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
-    }
-    lock.owns = true;
-}
-
-inline cv_status condition_variable::wait_for( unique_lock<mutex>& lock, const tick_count::interval_t& i )
-{
-    cv_status rc = no_timeout;
-    __TBB_ASSERT( lock.owns, NULL );
-    lock.owns = false;
-    // condvar_wait could be SleepConditionVariableCS (or SleepConditionVariableSRW) or our own pre-vista cond_var_wait()
-    if( !internal_condition_variable_wait( my_cv, lock.mutex(), &i ) ) {
-        int ec = GetLastError();
-        if( ec==WAIT_TIMEOUT || ec==ERROR_TIMEOUT )
-            rc = timeout;
-        else {
-            lock.owns = true;
-            throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
-        }
-    }
-    lock.owns = true;
-    return rc;
-}
-
-#else /* !(_WIN32||_WIN64) */
-inline void condition_variable::wait( unique_lock<mutex>& lock )
-{
-    __TBB_ASSERT( lock.owns, NULL );
-    lock.owns = false;
-    if( pthread_cond_wait( &my_cv, lock.mutex()->native_handle() ) ) {
-        lock.owns = true;
-        throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
-    }
-    // upon successful return, the mutex has been locked and is owned by the calling thread.
-    lock.owns = true;
-}
-
-inline cv_status condition_variable::wait_for( unique_lock<mutex>& lock, const tick_count::interval_t& i )
-{
-#if __linux__
-    struct timespec req;
-    double sec = i.seconds();
-    clock_gettime( CLOCK_REALTIME, &req );
-    req.tv_sec  += static_cast<long>(sec);
-    req.tv_nsec += static_cast<long>( (sec - static_cast<long>(sec))*1e9 );
-#else /* generic Unix */
-    struct timeval tv;
-    struct timespec req;
-    double sec = i.seconds();
-    int status = gettimeofday(&tv, NULL);
-    __TBB_ASSERT_EX( status==0, "gettimeofday failed" );
-    req.tv_sec  = tv.tv_sec + static_cast<long>(sec);
-    req.tv_nsec = tv.tv_usec*1000 + static_cast<long>( (sec - static_cast<long>(sec))*1e9 );
-#endif /*(choice of OS) */
-    if( req.tv_nsec>=1e9 ) {
-        req.tv_sec  += 1;
-        req.tv_nsec -= static_cast<long int>(1e9);
-    }
-    __TBB_ASSERT( 0<=req.tv_nsec && req.tv_nsec<1e9, NULL );
-
-    int ec;
-    cv_status rc = no_timeout;
-    __TBB_ASSERT( lock.owns, NULL );
-    lock.owns = false;
-    if( ( ec=pthread_cond_timedwait( &my_cv, lock.mutex()->native_handle(), &req ) ) ) {
-        if( ec==ETIMEDOUT )
-            rc = timeout;
-        else {
-            __TBB_ASSERT( lock.try_lock()==false, NULL );
-            lock.owns = true;
-            throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
-        }
-    }
-    lock.owns = true;
-    return rc;
-}
-#endif /* !(_WIN32||_WIN64) */
-
-} // namespace interface5
-
-__TBB_DEFINE_PROFILING_SET_NAME(interface5::condition_variable)
-
-} // namespace tbb 
-
-#if TBB_IMPLEMENT_CPP0X
-
-namespace std {
-
-using tbb::interface5::defer_lock_t;
-using tbb::interface5::try_to_lock_t;
-using tbb::interface5::adopt_lock_t;
-using tbb::interface5::defer_lock;
-using tbb::interface5::try_to_lock;
-using tbb::interface5::adopt_lock;
-using tbb::interface5::lock_guard;
-using tbb::interface5::unique_lock;
-using tbb::interface5::swap;   /* this is for void std::swap(unique_lock<M>&,unique_lock<M>&) */
-using tbb::interface5::condition_variable;
-using tbb::interface5::cv_status;
-using tbb::interface5::timeout;
-using tbb::interface5::no_timeout;
-
-} // namespace std 
-
-#endif /* TBB_IMPLEMENT_CPP0X */
-
-#endif /* __TBB_condition_variable_H */
diff --git a/lib/3rdParty/tbb/include/tbb/compat/ppl.h b/lib/3rdParty/tbb/include/tbb/compat/ppl.h
deleted file mode 100644
index 840dfb22..00000000
--- a/lib/3rdParty/tbb/include/tbb/compat/ppl.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_compat_ppl_H
-#define __TBB_compat_ppl_H
-
-#include "../task_group.h"
-#include "../parallel_invoke.h"
-#include "../parallel_for_each.h"
-#include "../parallel_for.h"
-#include "../tbb_exception.h"
-#include "../critical_section.h"
-#include "../reader_writer_lock.h"
-#include "../combinable.h"
-
-namespace Concurrency {
-
-#if __TBB_TASK_GROUP_CONTEXT
-    using tbb::task_handle;
-    using tbb::task_group_status;
-    using tbb::task_group;
-    using tbb::structured_task_group;
-    using tbb::invalid_multiple_scheduling;
-    using tbb::missing_wait;
-    using tbb::make_task;
-
-    using tbb::not_complete;
-    using tbb::complete;
-    using tbb::canceled;
-
-    using tbb::is_current_task_group_canceling;
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-    using tbb::parallel_invoke;
-    using tbb::strict_ppl::parallel_for;
-    using tbb::parallel_for_each;
-    using tbb::critical_section;
-    using tbb::reader_writer_lock;
-    using tbb::combinable;
-
-    using tbb::improper_lock;
-
-} // namespace Concurrency
-
-#endif /* __TBB_compat_ppl_H */
diff --git a/lib/3rdParty/tbb/include/tbb/compat/thread b/lib/3rdParty/tbb/include/tbb/compat/thread
deleted file mode 100644
index 0edd9289..00000000
--- a/lib/3rdParty/tbb/include/tbb/compat/thread
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_thread_H
-#define __TBB_thread_H
-
-#include "../tbb_config.h"
-
-#if TBB_IMPLEMENT_CPP0X
-
-#include "../tbb_thread.h"
-
-namespace std {
-
-typedef tbb::tbb_thread thread;
-
-namespace this_thread {
-    using tbb::this_tbb_thread::get_id;
-    using tbb::this_tbb_thread::yield;
-
-    inline void sleep_for(const tbb::tick_count::interval_t& rel_time) {
-        tbb::internal::thread_sleep_v3( rel_time );
-    }
-}
-
-} // namespace std
-
-#else /* TBB_IMPLEMENT_CPP0X */
-
-#define __TBB_COMPAT_THREAD_RECURSION_PROTECTOR 1
-#include <thread>
-#undef __TBB_COMPAT_THREAD_RECURSION_PROTECTOR
-
-#endif /* TBB_IMPLEMENT_CPP0X */
-
-#else /* __TBB_thread_H */
-
-#if __TBB_COMPAT_THREAD_RECURSION_PROTECTOR
-#error The tbb/compat/thread header attempts to include itself. \
-       Please make sure that {TBBROOT}/include/tbb/compat is NOT in include paths.
-#endif
-
-#endif /* __TBB_thread_H */
diff --git a/lib/3rdParty/tbb/include/tbb/compat/tuple b/lib/3rdParty/tbb/include/tbb/compat/tuple
deleted file mode 100644
index 5767c49e..00000000
--- a/lib/3rdParty/tbb/include/tbb/compat/tuple
+++ /dev/null
@@ -1,488 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_tuple_H
-#define __TBB_tuple_H
-
-#include <utility>
-#include "../tbb_stddef.h"
-
-// build preprocessor variables for varying number of arguments
-// Need the leading comma so the empty __TBB_T_PACK will not cause a syntax error.
-#if __TBB_VARIADIC_MAX <= 5
-#define __TBB_T_PACK
-#define __TBB_U_PACK
-#define __TBB_TYPENAME_T_PACK
-#define __TBB_TYPENAME_U_PACK
-#define __TBB_NULL_TYPE_PACK
-#define __TBB_REF_T_PARAM_PACK
-#define __TBB_CONST_REF_T_PARAM_PACK
-#define __TBB_T_PARAM_LIST_PACK
-#define __TBB_CONST_NULL_REF_PACK
-//
-#elif __TBB_VARIADIC_MAX == 6
-#define __TBB_T_PACK ,__T5
-#define __TBB_U_PACK ,__U5
-#define __TBB_TYPENAME_T_PACK , typename __T5
-#define __TBB_TYPENAME_U_PACK , typename __U5
-#define __TBB_NULL_TYPE_PACK , null_type
-#define __TBB_REF_T_PARAM_PACK ,__T5& t5
-#define __TBB_CONST_REF_T_PARAM_PACK ,const __T5& t5
-#define __TBB_T_PARAM_LIST_PACK ,t5
-#define __TBB_CONST_NULL_REF_PACK , const null_type&
-//
-#elif __TBB_VARIADIC_MAX == 7
-#define __TBB_T_PACK ,__T5, __T6
-#define __TBB_U_PACK ,__U5, __U6
-#define __TBB_TYPENAME_T_PACK , typename __T5 , typename __T6
-#define __TBB_TYPENAME_U_PACK , typename __U5 , typename __U6
-#define __TBB_NULL_TYPE_PACK , null_type, null_type
-#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6
-#define __TBB_CONST_REF_T_PARAM_PACK ,const __T5& t5, const __T6& t6
-#define __TBB_T_PARAM_LIST_PACK ,t5 ,t6
-#define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&
-//
-#elif __TBB_VARIADIC_MAX == 8
-#define __TBB_T_PACK ,__T5, __T6, __T7
-#define __TBB_U_PACK ,__U5, __U6, __U7
-#define __TBB_TYPENAME_T_PACK , typename __T5 , typename __T6, typename __T7
-#define __TBB_TYPENAME_U_PACK , typename __U5 , typename __U6, typename __U7
-#define __TBB_NULL_TYPE_PACK , null_type, null_type, null_type
-#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6, __T7& t7
-#define __TBB_CONST_REF_T_PARAM_PACK , const __T5& t5, const __T6& t6, const __T7& t7
-#define __TBB_T_PARAM_LIST_PACK ,t5 ,t6 ,t7
-#define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&, const null_type&
-//
-#elif __TBB_VARIADIC_MAX == 9
-#define __TBB_T_PACK ,__T5, __T6, __T7, __T8
-#define __TBB_U_PACK ,__U5, __U6, __U7, __U8
-#define __TBB_TYPENAME_T_PACK , typename __T5, typename __T6, typename __T7, typename __T8
-#define __TBB_TYPENAME_U_PACK , typename __U5, typename __U6, typename __U7, typename __U8
-#define __TBB_NULL_TYPE_PACK , null_type, null_type, null_type, null_type
-#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6, __T7& t7, __T8& t8
-#define __TBB_CONST_REF_T_PARAM_PACK , const __T5& t5, const __T6& t6, const __T7& t7, const __T8& t8
-#define __TBB_T_PARAM_LIST_PACK ,t5 ,t6 ,t7 ,t8
-#define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&, const null_type&, const null_type&
-//
-#elif __TBB_VARIADIC_MAX >= 10
-#define __TBB_T_PACK ,__T5, __T6, __T7, __T8, __T9
-#define __TBB_U_PACK ,__U5, __U6, __U7, __U8, __U9
-#define __TBB_TYPENAME_T_PACK , typename __T5, typename __T6, typename __T7, typename __T8, typename __T9
-#define __TBB_TYPENAME_U_PACK , typename __U5, typename __U6, typename __U7, typename __U8, typename __U9
-#define __TBB_NULL_TYPE_PACK , null_type, null_type, null_type, null_type, null_type
-#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6, __T7& t7, __T8& t8, __T9& t9
-#define __TBB_CONST_REF_T_PARAM_PACK , const __T5& t5, const __T6& t6, const __T7& t7, const __T8& t8, const __T9& t9
-#define __TBB_T_PARAM_LIST_PACK ,t5 ,t6 ,t7 ,t8 ,t9
-#define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&, const null_type&, const null_type&, const null_type&
-#endif
-
-
-
-namespace tbb {
-namespace interface5 {
-
-namespace internal {
-struct null_type { };
-}
-using internal::null_type;
-
-// tuple forward declaration
-template <typename __T0=null_type, typename __T1=null_type, typename __T2=null_type,
-          typename __T3=null_type, typename __T4=null_type
-#if __TBB_VARIADIC_MAX >= 6
-, typename __T5=null_type
-#if __TBB_VARIADIC_MAX >= 7
-, typename __T6=null_type
-#if __TBB_VARIADIC_MAX >= 8
-, typename __T7=null_type
-#if __TBB_VARIADIC_MAX >= 9
-, typename __T8=null_type
-#if __TBB_VARIADIC_MAX >= 10
-, typename __T9=null_type
-#endif
-#endif
-#endif
-#endif
-#endif
->
-class tuple;
-
-namespace internal {
-
-// const null_type temp
-inline const null_type cnull() { return null_type(); }
-
-// cons forward declaration
-template <typename __HT, typename __TT> struct cons;
-
-// type of a component of the cons
-template<int __N, typename __T>
-struct component {
-    typedef typename __T::tail_type next;
-    typedef typename component<__N-1,next>::type type;
-};
-
-template<typename __T>
-struct component<0,__T> {
-    typedef typename __T::head_type type;
-};
-
-template<>
-struct component<0,null_type> {
-    typedef null_type type;
-};
-
-// const version of component
-
-template<int __N, typename __T>
-struct component<__N, const __T>
-{
-    typedef typename __T::tail_type next;
-    typedef const typename component<__N-1,next>::type type;
-};
-
-template<typename __T>
-struct component<0, const __T>
-{
-    typedef const typename __T::head_type type;
-};
-
-
-// helper class for getting components of cons
-template< int __N>
-struct get_helper {
-template<typename __HT, typename __TT>
-inline static typename component<__N, cons<__HT,__TT> >::type& get(cons<__HT,__TT>& ti) {
-    return get_helper<__N-1>::get(ti.tail);
-}
-template<typename __HT, typename __TT>
-inline static typename component<__N, cons<__HT,__TT> >::type const& get(const cons<__HT,__TT>& ti) {
-    return get_helper<__N-1>::get(ti.tail);
-}
-};
-
-template<>
-struct get_helper<0> {
-template<typename __HT, typename __TT>
-inline static typename component<0, cons<__HT,__TT> >::type& get(cons<__HT,__TT>& ti) {
-    return ti.head;
-}
-template<typename __HT, typename __TT>
-inline static typename component<0, cons<__HT,__TT> >::type const& get(const cons<__HT,__TT>& ti) {
-    return ti.head;
-}
-};
-
-// traits adaptor
-template <typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK>
-struct tuple_traits {
-    typedef cons <__T0, typename tuple_traits<__T1, __T2, __T3, __T4 __TBB_T_PACK , null_type>::U > U;
-};
-
-template <typename __T0>
-struct tuple_traits<__T0, null_type, null_type, null_type, null_type __TBB_NULL_TYPE_PACK > {
-    typedef cons<__T0, null_type> U;
-};
-
-template<>
-struct tuple_traits<null_type, null_type, null_type, null_type, null_type __TBB_NULL_TYPE_PACK > {
-    typedef null_type U;
-};
-
-
-// core cons defs
-template <typename __HT, typename __TT>
-struct cons{
-
-    typedef __HT head_type;
-    typedef __TT tail_type;
-
-    head_type head; 
-    tail_type tail;
-
-    static const int length = 1 + tail_type::length;
-
-    // default constructors
-    explicit cons() : head(), tail() { }
-
-    // non-default constructors
-    cons(head_type& h, const tail_type& t) : head(h), tail(t) { }
-
-    template <typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
-    cons(const __T0& t0, const __T1& t1, const __T2& t2, const __T3& t3, const __T4& t4 __TBB_CONST_REF_T_PARAM_PACK) :
-        head(t0), tail(t1, t2, t3, t4 __TBB_T_PARAM_LIST_PACK, cnull()) { }
-
-    template <typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
-    cons(__T0& t0, __T1& t1, __T2& t2, __T3& t3, __T4& t4 __TBB_REF_T_PARAM_PACK) :
-        head(t0), tail(t1, t2, t3, t4 __TBB_T_PARAM_LIST_PACK , cnull()) { }
-
-    template <typename __HT1, typename __TT1>
-    cons(const cons<__HT1,__TT1>& other) : head(other.head), tail(other.tail) { }
-
-    cons& operator=(const cons& other) { head = other.head; tail = other.tail; return *this; }
-
-    friend bool operator==(const cons& me, const cons& other) {
-        return me.head == other.head && me.tail == other.tail;
-    }
-    friend bool operator<(const cons& me, const cons& other)  {
-        return me.head < other.head || (!(other.head < me.head) && me.tail < other.tail);
-    }
-    friend bool operator>(const cons& me, const cons& other)  { return other<me; }
-    friend bool operator!=(const cons& me, const cons& other) { return !(me==other); }
-    friend bool operator>=(const cons& me, const cons& other) { return !(me<other); }
-    friend bool operator<=(const cons& me, const cons& other) { return !(me>other); }
-
-    template<typename __HT1, typename __TT1>
-    friend bool operator==(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) {
-        return me.head == other.head && me.tail == other.tail;
-    }
-
-    template<typename __HT1, typename __TT1>
-    friend bool operator<(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) {
-        return me.head < other.head || (!(other.head < me.head) && me.tail < other.tail);
-    }
-
-    template<typename __HT1, typename __TT1>
-    friend bool operator>(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return other<me; }
-
-    template<typename __HT1, typename __TT1>
-    friend bool operator!=(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return !(me==other); }
-
-    template<typename __HT1, typename __TT1>
-    friend bool operator>=(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return !(me<other); }
-
-    template<typename __HT1, typename __TT1>
-    friend bool operator<=(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return !(me>other); }
-
-
-};  // cons
-
-
-template <typename __HT>
-struct cons<__HT,null_type> { 
-
-    typedef __HT head_type;
-    typedef null_type tail_type;
-
-    head_type head; 
-
-    static const int length = 1;
-
-    // default constructor
-    cons() : head() { /*std::cout << "default constructor 1\n";*/ }
-
-    cons(const null_type&, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head() { /*std::cout << "default constructor 2\n";*/ }
-
-    // non-default constructor
-    template<typename __T1>
-    cons(__T1& t1, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head(t1) { /*std::cout << "non-default a1, t1== " << t1 << "\n";*/}
-
-    cons(head_type& h, const null_type& = null_type() ) : head(h) { }
-    cons(const head_type& t0, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head(t0) { }
-
-    // converting constructor
-    template<typename __HT1>
-    cons(__HT1 h1, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head(h1) { }
-
-    // copy constructor
-    template<typename __HT1>
-    cons( const cons<__HT1, null_type>& other) : head(other.head) { }
-
-    // assignment operator
-    cons& operator=(const cons& other) { head = other.head; return *this; }
-
-    friend bool operator==(const cons& me, const cons& other) { return me.head == other.head; }
-    friend bool operator<(const cons& me, const cons& other) { return me.head < other.head; }
-    friend bool operator>(const cons& me, const cons& other) { return other<me; }
-    friend bool operator!=(const cons& me, const cons& other) {return !(me==other); }
-    friend bool operator<=(const cons& me, const cons& other) {return !(me>other); }
-    friend bool operator>=(const cons& me, const cons& other) {return !(me<other); }
-
-    template<typename __HT1>
-    friend bool operator==(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) {
-        return me.head == other.head;
-    }
-
-    template<typename __HT1>
-    friend bool operator<(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) {
-        return me.head < other.head;
-    }
-
-    template<typename __HT1>
-    friend bool operator>(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return other<me; }
-
-    template<typename __HT1>
-    friend bool operator!=(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return !(me==other); }
-
-    template<typename __HT1>
-    friend bool operator<=(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return !(me>other); }
-
-    template<typename __HT1>
-    friend bool operator>=(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return !(me<other); }
-
-};  // cons
-
-template <>
-struct cons<null_type,null_type> { typedef null_type tail_type; static const int length = 0; };
-
-// wrapper for default constructor
-template<typename __T>
-inline const __T wrap_dcons(__T*) { return __T(); }
-
-} // namespace internal
-
-// tuple definition
-template<typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
-class tuple : public internal::tuple_traits<__T0, __T1, __T2, __T3, __T4 __TBB_T_PACK >::U {
-    // friends
-    template <typename __T> friend class tuple_size;
-    template<int __N, typename __T> friend struct tuple_element;
-
-    // stl components
-    typedef tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK > value_type;
-    typedef value_type *pointer;
-    typedef const value_type *const_pointer;
-    typedef value_type &reference;
-    typedef const value_type &const_reference;
-    typedef size_t size_type;
-
-    typedef typename internal::tuple_traits<__T0,__T1,__T2,__T3, __T4 __TBB_T_PACK >::U my_cons;
-
-public:
-    tuple(const __T0& t0=internal::wrap_dcons((__T0*)NULL)
-          ,const __T1& t1=internal::wrap_dcons((__T1*)NULL)
-          ,const __T2& t2=internal::wrap_dcons((__T2*)NULL)
-          ,const __T3& t3=internal::wrap_dcons((__T3*)NULL)
-          ,const __T4& t4=internal::wrap_dcons((__T4*)NULL)
-#if __TBB_VARIADIC_MAX >= 6
-          ,const __T5& t5=internal::wrap_dcons((__T5*)NULL)
-#if __TBB_VARIADIC_MAX >= 7
-          ,const __T6& t6=internal::wrap_dcons((__T6*)NULL)
-#if __TBB_VARIADIC_MAX >= 8
-          ,const __T7& t7=internal::wrap_dcons((__T7*)NULL)
-#if __TBB_VARIADIC_MAX >= 9
-          ,const __T8& t8=internal::wrap_dcons((__T8*)NULL)
-#if __TBB_VARIADIC_MAX >= 10
-          ,const __T9& t9=internal::wrap_dcons((__T9*)NULL)
-#endif
-#endif
-#endif
-#endif
-#endif
-          ) :
-        my_cons(t0,t1,t2,t3,t4 __TBB_T_PARAM_LIST_PACK) { }
-
-    template<int __N>
-    struct internal_tuple_element {
-        typedef typename internal::component<__N,my_cons>::type type;
-    };
-
-    template<int __N>
-    typename internal_tuple_element<__N>::type& get() { return internal::get_helper<__N>::get(*this); }
-
-    template<int __N>
-    typename internal_tuple_element<__N>::type const& get() const { return internal::get_helper<__N>::get(*this); }
-
-    template<typename __U1, typename __U2>
-    tuple& operator=(const internal::cons<__U1,__U2>& other) {
-        my_cons::operator=(other);
-        return *this;
-    }
-
-    template<typename __U1, typename __U2>
-    tuple& operator=(const std::pair<__U1,__U2>& other) {
-        // __TBB_ASSERT(tuple_size<value_type>::value == 2, "Invalid size for pair to tuple assignment");
-        this->head = other.first;
-        this->tail.head = other.second;
-        return *this;
-    }
-
-    friend bool operator==(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)==(other);}
-    friend bool operator<(const tuple& me,  const tuple& other) {return static_cast<const my_cons &>(me)<(other);}
-    friend bool operator>(const tuple& me,  const tuple& other) {return static_cast<const my_cons &>(me)>(other);}
-    friend bool operator!=(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)!=(other);}
-    friend bool operator>=(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)>=(other);}
-    friend bool operator<=(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)<=(other);}
-
-};  // tuple
-
-// empty tuple
-template<>
-class tuple<null_type, null_type, null_type, null_type, null_type __TBB_NULL_TYPE_PACK > : public null_type {
-};
-
-// helper classes
-
-template < typename __T>
-class tuple_size {
-public:
-    static const size_t value = 1 + tuple_size<typename __T::tail_type>::value;
-};
-
-template <>
-class tuple_size<tuple<> > { 
-public:
-    static const size_t value = 0;
-};
-
-template <>
-class tuple_size<null_type> {
-public:
-    static const size_t value = 0;
-};
-
-template<int __N, typename __T>
-struct tuple_element {
-    typedef typename internal::component<__N, typename __T::my_cons>::type type;
-};
-
-template<int __N, typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
-inline static typename tuple_element<__N,tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK > >::type&
-    get(tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK >& t) { return internal::get_helper<__N>::get(t); }
-
-template<int __N, typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
-inline static typename tuple_element<__N,tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK > >::type const&
-    get(const tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK >& t) { return internal::get_helper<__N>::get(t); }
-
-}  // interface5
-} // tbb
-
-#if !__TBB_CPP11_TUPLE_PRESENT
-namespace tbb {
-    namespace flow {
-        using tbb::interface5::tuple;
-        using tbb::interface5::tuple_size;
-        using tbb::interface5::tuple_element;
-        using tbb::interface5::get;
-    }
-}
-#endif
-
-#undef __TBB_T_PACK
-#undef __TBB_U_PACK
-#undef __TBB_TYPENAME_T_PACK
-#undef __TBB_TYPENAME_U_PACK
-#undef __TBB_NULL_TYPE_PACK
-#undef __TBB_REF_T_PARAM_PACK
-#undef __TBB_CONST_REF_T_PARAM_PACK
-#undef __TBB_T_PARAM_LIST_PACK
-#undef __TBB_CONST_NULL_REF_PACK
- 
-#endif /* __TBB_tuple_H */
diff --git a/lib/3rdParty/tbb/include/tbb/concurrent_hash_map.h b/lib/3rdParty/tbb/include/tbb/concurrent_hash_map.h
deleted file mode 100644
index 09b2765b..00000000
--- a/lib/3rdParty/tbb/include/tbb/concurrent_hash_map.h
+++ /dev/null
@@ -1,1510 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_concurrent_hash_map_H
-#define __TBB_concurrent_hash_map_H
-
-#include "tbb_stddef.h"
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
-    #pragma warning (push)
-    #pragma warning (disable: 4530)
-#endif
-
-#include <iterator>
-#include <utility>      // Need std::pair
-#include <cstring>      // Need std::memset
-#include __TBB_STD_SWAP_HEADER
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    #pragma warning (pop)
-#endif
-
-#include "cache_aligned_allocator.h"
-#include "tbb_allocator.h"
-#include "spin_rw_mutex.h"
-#include "atomic.h"
-#include "tbb_exception.h"
-#include "tbb_profiling.h"
-#include "internal/_tbb_hash_compare_impl.h"
-#if __TBB_INITIALIZER_LISTS_PRESENT
-#include <initializer_list>
-#endif
-#if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
-#include <typeinfo>
-#endif
-#if __TBB_STATISTICS
-#include <stdio.h>
-#endif
-
-namespace tbb {
-
-namespace interface5 {
-
-    template<typename Key, typename T, typename HashCompare = tbb_hash_compare<Key>, typename A = tbb_allocator<std::pair<Key, T> > >
-    class concurrent_hash_map;
-
-    //! @cond INTERNAL
-    namespace internal {
-    using namespace tbb::internal;
-
-
-    //! Type of a hash code.
-    typedef size_t hashcode_t;
-    //! Node base type
-    struct hash_map_node_base : tbb::internal::no_copy {
-        //! Mutex type
-        typedef spin_rw_mutex mutex_t;
-        //! Scoped lock type for mutex
-        typedef mutex_t::scoped_lock scoped_t;
-        //! Next node in chain
-        hash_map_node_base *next;
-        mutex_t mutex;
-    };
-    //! Incompleteness flag value
-    static hash_map_node_base *const rehash_req = reinterpret_cast<hash_map_node_base*>(size_t(3));
-    //! Rehashed empty bucket flag
-    static hash_map_node_base *const empty_rehashed = reinterpret_cast<hash_map_node_base*>(size_t(0));
-    //! base class of concurrent_hash_map
-    class hash_map_base {
-    public:
-        //! Size type
-        typedef size_t size_type;
-        //! Type of a hash code.
-        typedef size_t hashcode_t;
-        //! Segment index type
-        typedef size_t segment_index_t;
-        //! Node base type
-        typedef hash_map_node_base node_base;
-        //! Bucket type
-        struct bucket : tbb::internal::no_copy {
-            //! Mutex type for buckets
-            typedef spin_rw_mutex mutex_t;
-            //! Scoped lock type for mutex
-            typedef mutex_t::scoped_lock scoped_t;
-            mutex_t mutex;
-            node_base *node_list;
-        };
-        //! Count of segments in the first block
-        static size_type const embedded_block = 1;
-        //! Count of segments in the first block
-        static size_type const embedded_buckets = 1<<embedded_block;
-        //! Count of segments in the first block
-        static size_type const first_block = 8; //including embedded_block. perfect with bucket size 16, so the allocations are power of 4096
-        //! Size of a pointer / table size
-        static size_type const pointers_per_table = sizeof(segment_index_t) * 8; // one segment per bit
-        //! Segment pointer
-        typedef bucket *segment_ptr_t;
-        //! Segment pointers table type
-        typedef segment_ptr_t segments_table_t[pointers_per_table];
-        //! Hash mask = sum of allocated segment sizes - 1
-        atomic<hashcode_t> my_mask;
-        //! Segment pointers table. Also prevents false sharing between my_mask and my_size
-        segments_table_t my_table;
-        //! Size of container in stored items
-        atomic<size_type> my_size; // It must be in separate cache line from my_mask due to performance effects
-        //! Zero segment
-        bucket my_embedded_segment[embedded_buckets];
-#if __TBB_STATISTICS
-        atomic<unsigned> my_info_resizes; // concurrent ones
-        mutable atomic<unsigned> my_info_restarts; // race collisions
-        atomic<unsigned> my_info_rehashes;  // invocations of rehash_bucket
-#endif
-        //! Constructor
-        hash_map_base() {
-            std::memset( this, 0, pointers_per_table*sizeof(segment_ptr_t) // 32*4=128   or 64*8=512
-                + sizeof(my_size) + sizeof(my_mask)  // 4+4 or 8+8
-                + embedded_buckets*sizeof(bucket) ); // n*8 or n*16
-            for( size_type i = 0; i < embedded_block; i++ ) // fill the table
-                my_table[i] = my_embedded_segment + segment_base(i);
-            my_mask = embedded_buckets - 1;
-            __TBB_ASSERT( embedded_block <= first_block, "The first block number must include embedded blocks");
-#if __TBB_STATISTICS
-            my_info_resizes = 0; // concurrent ones
-            my_info_restarts = 0; // race collisions
-            my_info_rehashes = 0;  // invocations of rehash_bucket
-#endif
-        }
-
-        //! @return segment index of given index in the array
-        static segment_index_t segment_index_of( size_type index ) {
-            return segment_index_t( __TBB_Log2( index|1 ) );
-        }
-
-        //! @return the first array index of given segment
-        static segment_index_t segment_base( segment_index_t k ) {
-            return (segment_index_t(1)<<k & ~segment_index_t(1));
-        }
-
-        //! @return segment size except for @arg k == 0
-        static size_type segment_size( segment_index_t k ) {
-            return size_type(1)<<k; // fake value for k==0
-        }
-
-        //! @return true if @arg ptr is valid pointer
-        static bool is_valid( void *ptr ) {
-            return reinterpret_cast<uintptr_t>(ptr) > uintptr_t(63);
-        }
-
-        //! Initialize buckets
-        static void init_buckets( segment_ptr_t ptr, size_type sz, bool is_initial ) {
-            if( is_initial ) std::memset(ptr, 0, sz*sizeof(bucket) );
-            else for(size_type i = 0; i < sz; i++, ptr++) {
-                *reinterpret_cast<intptr_t*>(&ptr->mutex) = 0;
-                ptr->node_list = rehash_req;
-            }
-        }
-
-        //! Add node @arg n to bucket @arg b
-        static void add_to_bucket( bucket *b, node_base *n ) {
-            __TBB_ASSERT(b->node_list != rehash_req, NULL);
-            n->next = b->node_list;
-            b->node_list = n; // its under lock and flag is set
-        }
-
-        //! Exception safety helper
-        struct enable_segment_failsafe : tbb::internal::no_copy {
-            segment_ptr_t *my_segment_ptr;
-            enable_segment_failsafe(segments_table_t &table, segment_index_t k) : my_segment_ptr(&table[k]) {}
-            ~enable_segment_failsafe() {
-                if( my_segment_ptr ) *my_segment_ptr = 0; // indicate no allocation in progress
-            }
-        };
-
-        //! Enable segment
-        void enable_segment( segment_index_t k, bool is_initial = false ) {
-            __TBB_ASSERT( k, "Zero segment must be embedded" );
-            enable_segment_failsafe watchdog( my_table, k );
-            cache_aligned_allocator<bucket> alloc;
-            size_type sz;
-            __TBB_ASSERT( !is_valid(my_table[k]), "Wrong concurrent assignment");
-            if( k >= first_block ) {
-                sz = segment_size( k );
-                segment_ptr_t ptr = alloc.allocate( sz );
-                init_buckets( ptr, sz, is_initial );
-                itt_hide_store_word( my_table[k], ptr );
-                sz <<= 1;// double it to get entire capacity of the container
-            } else { // the first block
-                __TBB_ASSERT( k == embedded_block, "Wrong segment index" );
-                sz = segment_size( first_block );
-                segment_ptr_t ptr = alloc.allocate( sz - embedded_buckets );
-                init_buckets( ptr, sz - embedded_buckets, is_initial );
-                ptr -= segment_base(embedded_block);
-                for(segment_index_t i = embedded_block; i < first_block; i++) // calc the offsets
-                    itt_hide_store_word( my_table[i], ptr + segment_base(i) );
-            }
-            itt_store_word_with_release( my_mask, sz-1 );
-            watchdog.my_segment_ptr = 0;
-        }
-
-        //! Get bucket by (masked) hashcode
-        bucket *get_bucket( hashcode_t h ) const throw() { // TODO: add throw() everywhere?
-            segment_index_t s = segment_index_of( h );
-            h -= segment_base(s);
-            segment_ptr_t seg = my_table[s];
-            __TBB_ASSERT( is_valid(seg), "hashcode must be cut by valid mask for allocated segments" );
-            return &seg[h];
-        }
-
-        // internal serial rehashing helper
-        void mark_rehashed_levels( hashcode_t h ) throw () {
-            segment_index_t s = segment_index_of( h );
-            while( segment_ptr_t seg = my_table[++s] )
-                if( seg[h].node_list == rehash_req ) {
-                    seg[h].node_list = empty_rehashed;
-                    mark_rehashed_levels( h + ((hashcode_t)1<<s) ); // optimized segment_base(s)
-                }
-        }
-
-        //! Check for mask race
-        // Splitting into two functions should help inlining
-        inline bool check_mask_race( const hashcode_t h, hashcode_t &m ) const {
-            hashcode_t m_now, m_old = m;
-            m_now = (hashcode_t) itt_load_word_with_acquire( my_mask );
-            if( m_old != m_now )
-                return check_rehashing_collision( h, m_old, m = m_now );
-            return false;
-        }
-
-        //! Process mask race, check for rehashing collision
-        bool check_rehashing_collision( const hashcode_t h, hashcode_t m_old, hashcode_t m ) const {
-            __TBB_ASSERT(m_old != m, NULL); // TODO?: m arg could be optimized out by passing h = h&m
-            if( (h & m_old) != (h & m) ) { // mask changed for this hashcode, rare event
-                // condition above proves that 'h' has some other bits set beside 'm_old'
-                // find next applicable mask after m_old    //TODO: look at bsl instruction
-                for( ++m_old; !(h & m_old); m_old <<= 1 ) // at maximum few rounds depending on the first block size
-                    ;
-                m_old = (m_old<<1) - 1; // get full mask from a bit
-                __TBB_ASSERT((m_old&(m_old+1))==0 && m_old <= m, NULL);
-                // check whether it is rehashing/ed
-                if( itt_load_word_with_acquire(get_bucket(h & m_old)->node_list) != rehash_req )
-                {
-#if __TBB_STATISTICS
-                    my_info_restarts++; // race collisions
-#endif
-                    return true;
-                }
-            }
-            return false;
-        }
-
-        //! Insert a node and check for load factor. @return segment index to enable.
-        segment_index_t insert_new_node( bucket *b, node_base *n, hashcode_t mask ) {
-            size_type sz = ++my_size; // prefix form is to enforce allocation after the first item inserted
-            add_to_bucket( b, n );
-            // check load factor
-            if( sz >= mask ) { // TODO: add custom load_factor
-                segment_index_t new_seg = __TBB_Log2( mask+1 ); //optimized segment_index_of
-                __TBB_ASSERT( is_valid(my_table[new_seg-1]), "new allocations must not publish new mask until segment has allocated");
-                static const segment_ptr_t is_allocating = (segment_ptr_t)2;
-                if( !itt_hide_load_word(my_table[new_seg])
-                  && as_atomic(my_table[new_seg]).compare_and_swap(is_allocating, NULL) == NULL )
-                    return new_seg; // The value must be processed
-            }
-            return 0;
-        }
-
-        //! Prepare enough segments for number of buckets
-        void reserve(size_type buckets) {
-            if( !buckets-- ) return;
-            bool is_initial = !my_size;
-            for( size_type m = my_mask; buckets > m; m = my_mask )
-                enable_segment( segment_index_of( m+1 ), is_initial );
-        }
-        //! Swap hash_map_bases
-        void internal_swap(hash_map_base &table) {
-            using std::swap;
-            swap(this->my_mask, table.my_mask);
-            swap(this->my_size, table.my_size);
-            for(size_type i = 0; i < embedded_buckets; i++)
-                swap(this->my_embedded_segment[i].node_list, table.my_embedded_segment[i].node_list);
-            for(size_type i = embedded_block; i < pointers_per_table; i++)
-                swap(this->my_table[i], table.my_table[i]);
-        }
-    };
-
-    template<typename Iterator>
-    class hash_map_range;
-
-    //! Meets requirements of a forward iterator for STL */
-    /** Value is either the T or const T type of the container.
-        @ingroup containers */
-    template<typename Container, typename Value>
-    class hash_map_iterator
-        : public std::iterator<std::forward_iterator_tag,Value>
-    {
-        typedef Container map_type;
-        typedef typename Container::node node;
-        typedef hash_map_base::node_base node_base;
-        typedef hash_map_base::bucket bucket;
-
-        template<typename C, typename T, typename U>
-        friend bool operator==( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j );
-
-        template<typename C, typename T, typename U>
-        friend bool operator!=( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j );
-
-        template<typename C, typename T, typename U>
-        friend ptrdiff_t operator-( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j );
-
-        template<typename C, typename U>
-        friend class hash_map_iterator;
-
-        template<typename I>
-        friend class hash_map_range;
-
-        void advance_to_next_bucket() { // TODO?: refactor to iterator_base class
-            size_t k = my_index+1;
-            __TBB_ASSERT( my_bucket, "advancing an invalid iterator?");
-            while( k <= my_map->my_mask ) {
-                // Following test uses 2's-complement wizardry
-                if( k&(k-2) ) // not the beginning of a segment
-                    ++my_bucket;
-                else my_bucket = my_map->get_bucket( k );
-                my_node = static_cast<node*>( my_bucket->node_list );
-                if( hash_map_base::is_valid(my_node) ) {
-                    my_index = k; return;
-                }
-                ++k;
-            }
-            my_bucket = 0; my_node = 0; my_index = k; // the end
-        }
-#if !defined(_MSC_VER) || defined(__INTEL_COMPILER)
-        template<typename Key, typename T, typename HashCompare, typename A>
-        friend class interface5::concurrent_hash_map;
-#else
-    public: // workaround
-#endif
-        //! concurrent_hash_map over which we are iterating.
-        const Container *my_map;
-
-        //! Index in hash table for current item
-        size_t my_index;
-
-        //! Pointer to bucket
-        const bucket *my_bucket;
-
-        //! Pointer to node that has current item
-        node *my_node;
-
-        hash_map_iterator( const Container &map, size_t index, const bucket *b, node_base *n );
-
-    public:
-        //! Construct undefined iterator
-        hash_map_iterator(): my_map(), my_index(), my_bucket(), my_node() {}
-        hash_map_iterator( const hash_map_iterator<Container,typename Container::value_type> &other ) :
-            my_map(other.my_map),
-            my_index(other.my_index),
-            my_bucket(other.my_bucket),
-            my_node(other.my_node)
-        {}
-        Value& operator*() const {
-            __TBB_ASSERT( hash_map_base::is_valid(my_node), "iterator uninitialized or at end of container?" );
-            return my_node->item;
-        }
-        Value* operator->() const {return &operator*();}
-        hash_map_iterator& operator++();
-
-        //! Post increment
-        hash_map_iterator operator++(int) {
-            hash_map_iterator old(*this);
-            operator++();
-            return old;
-        }
-    };
-
-    template<typename Container, typename Value>
-    hash_map_iterator<Container,Value>::hash_map_iterator( const Container &map, size_t index, const bucket *b, node_base *n ) :
-        my_map(&map),
-        my_index(index),
-        my_bucket(b),
-        my_node( static_cast<node*>(n) )
-    {
-        if( b && !hash_map_base::is_valid(n) )
-            advance_to_next_bucket();
-    }
-
-    template<typename Container, typename Value>
-    hash_map_iterator<Container,Value>& hash_map_iterator<Container,Value>::operator++() {
-        my_node = static_cast<node*>( my_node->next );
-        if( !my_node ) advance_to_next_bucket();
-        return *this;
-    }
-
-    template<typename Container, typename T, typename U>
-    bool operator==( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) {
-        return i.my_node == j.my_node && i.my_map == j.my_map;
-    }
-
-    template<typename Container, typename T, typename U>
-    bool operator!=( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) {
-        return i.my_node != j.my_node || i.my_map != j.my_map;
-    }
-
-    //! Range class used with concurrent_hash_map
-    /** @ingroup containers */
-    template<typename Iterator>
-    class hash_map_range {
-        typedef typename Iterator::map_type map_type;
-        Iterator my_begin;
-        Iterator my_end;
-        mutable Iterator my_midpoint;
-        size_t my_grainsize;
-        //! Set my_midpoint to point approximately half way between my_begin and my_end.
-        void set_midpoint() const;
-        template<typename U> friend class hash_map_range;
-    public:
-        //! Type for size of a range
-        typedef std::size_t size_type;
-        typedef typename Iterator::value_type value_type;
-        typedef typename Iterator::reference reference;
-        typedef typename Iterator::difference_type difference_type;
-        typedef Iterator iterator;
-
-        //! True if range is empty.
-        bool empty() const {return my_begin==my_end;}
-
-        //! True if range can be partitioned into two subranges.
-        bool is_divisible() const {
-            return my_midpoint!=my_end;
-        }
-        //! Split range.
-        hash_map_range( hash_map_range& r, split ) :
-            my_end(r.my_end),
-            my_grainsize(r.my_grainsize)
-        {
-            r.my_end = my_begin = r.my_midpoint;
-            __TBB_ASSERT( !empty(), "Splitting despite the range is not divisible" );
-            __TBB_ASSERT( !r.empty(), "Splitting despite the range is not divisible" );
-            set_midpoint();
-            r.set_midpoint();
-        }
-        //! type conversion
-        template<typename U>
-        hash_map_range( hash_map_range<U>& r) :
-            my_begin(r.my_begin),
-            my_end(r.my_end),
-            my_midpoint(r.my_midpoint),
-            my_grainsize(r.my_grainsize)
-        {}
-        //! Init range with container and grainsize specified
-        hash_map_range( const map_type &map, size_type grainsize_ = 1 ) :
-            my_begin( Iterator( map, 0, map.my_embedded_segment, map.my_embedded_segment->node_list ) ),
-            my_end( Iterator( map, map.my_mask + 1, 0, 0 ) ),
-            my_grainsize( grainsize_ )
-        {
-            __TBB_ASSERT( grainsize_>0, "grainsize must be positive" );
-            set_midpoint();
-        }
-        const Iterator& begin() const {return my_begin;}
-        const Iterator& end() const {return my_end;}
-        //! The grain size for this range.
-        size_type grainsize() const {return my_grainsize;}
-    };
-
-    template<typename Iterator>
-    void hash_map_range<Iterator>::set_midpoint() const {
-        // Split by groups of nodes
-        size_t m = my_end.my_index-my_begin.my_index;
-        if( m > my_grainsize ) {
-            m = my_begin.my_index + m/2u;
-            hash_map_base::bucket *b = my_begin.my_map->get_bucket(m);
-            my_midpoint = Iterator(*my_begin.my_map,m,b,b->node_list);
-        } else {
-            my_midpoint = my_end;
-        }
-        __TBB_ASSERT( my_begin.my_index <= my_midpoint.my_index,
-            "my_begin is after my_midpoint" );
-        __TBB_ASSERT( my_midpoint.my_index <= my_end.my_index,
-            "my_midpoint is after my_end" );
-        __TBB_ASSERT( my_begin != my_midpoint || my_begin == my_end,
-            "[my_begin, my_midpoint) range should not be empty" );
-    }
-
-    } // internal
-//! @endcond
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    // Suppress "conditional expression is constant" warning.
-    #pragma warning( push )
-    #pragma warning( disable: 4127 )
-#endif
-
-//! Unordered map from Key to T.
-/** concurrent_hash_map is associative container with concurrent access.
-
-@par Compatibility
-    The class meets all Container Requirements from C++ Standard (See ISO/IEC 14882:2003(E), clause 23.1).
-
-@par Exception Safety
-    - Hash function is not permitted to throw an exception. User-defined types Key and T are forbidden from throwing an exception in destructors.
-    - If exception happens during insert() operations, it has no effect (unless exception raised by HashCompare::hash() function during grow_segment).
-    - If exception happens during operator=() operation, the container can have a part of source items, and methods size() and empty() can return wrong results.
-
-@par Changes since TBB 2.1
-    - Replaced internal algorithm and data structure. Patent is pending.
-    - Added buckets number argument for constructor
-
-@par Changes since TBB 2.0
-    - Fixed exception-safety
-    - Added template argument for allocator
-    - Added allocator argument in constructors
-    - Added constructor from a range of iterators
-    - Added several new overloaded insert() methods
-    - Added get_allocator()
-    - Added swap()
-    - Added count()
-    - Added overloaded erase(accessor &) and erase(const_accessor&)
-    - Added equal_range() [const]
-    - Added [const_]pointer, [const_]reference, and allocator_type types
-    - Added global functions: operator==(), operator!=(), and swap()
-
-    @ingroup containers */
-template<typename Key, typename T, typename HashCompare, typename Allocator>
-class concurrent_hash_map : protected internal::hash_map_base {
-    template<typename Container, typename Value>
-    friend class internal::hash_map_iterator;
-
-    template<typename I>
-    friend class internal::hash_map_range;
-
-public:
-    typedef Key key_type;
-    typedef T mapped_type;
-    typedef std::pair<const Key,T> value_type;
-    typedef hash_map_base::size_type size_type;
-    typedef ptrdiff_t difference_type;
-    typedef value_type *pointer;
-    typedef const value_type *const_pointer;
-    typedef value_type &reference;
-    typedef const value_type &const_reference;
-    typedef internal::hash_map_iterator<concurrent_hash_map,value_type> iterator;
-    typedef internal::hash_map_iterator<concurrent_hash_map,const value_type> const_iterator;
-    typedef internal::hash_map_range<iterator> range_type;
-    typedef internal::hash_map_range<const_iterator> const_range_type;
-    typedef Allocator allocator_type;
-
-protected:
-    friend class const_accessor;
-    struct node;
-    typedef typename Allocator::template rebind<node>::other node_allocator_type;
-    node_allocator_type my_allocator;
-    HashCompare my_hash_compare;
-
-    struct node : public node_base {
-        value_type item;
-        node( const Key &key ) : item(key, T()) {}
-        node( const Key &key, const T &t ) : item(key, t) {}
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-        node( const Key &key, T &&t ) : item(key, std::move(t)) {}
-        node( value_type&& i ) : item(std::move(i)){}
-#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-        template<typename... Args>
-        node( Args&&... args ) : item(std::forward<Args>(args)...) {}
-#if __TBB_COPY_FROM_NON_CONST_REF_BROKEN
-        node( value_type& i ) : item(const_cast<const value_type&>(i)) {}
-#endif //__TBB_COPY_FROM_NON_CONST_REF_BROKEN
-#endif //__TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-#endif //__TBB_CPP11_RVALUE_REF_PRESENT
-        node( const value_type& i ) : item(i) {}
-
-        // exception-safe allocation, see C++ Standard 2003, clause 5.3.4p17
-        void *operator new( size_t /*size*/, node_allocator_type &a ) {
-            void *ptr = a.allocate(1);
-            if(!ptr)
-                tbb::internal::throw_exception(tbb::internal::eid_bad_alloc);
-            return ptr;
-        }
-        // match placement-new form above to be called if exception thrown in constructor
-        void operator delete( void *ptr, node_allocator_type &a ) { a.deallocate(static_cast<node*>(ptr),1); }
-    };
-
-    void delete_node( node_base *n ) {
-        my_allocator.destroy( static_cast<node*>(n) );
-        my_allocator.deallocate( static_cast<node*>(n), 1);
-    }
-
-    static node* allocate_node_copy_construct(node_allocator_type& allocator, const Key &key, const T * t){
-        return  new( allocator ) node(key, *t);
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    static node* allocate_node_move_construct(node_allocator_type& allocator, const Key &key, const T * t){
-        return  new( allocator ) node(key, std::move(*const_cast<T*>(t)));
-    }
-#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-    template<typename... Args>
-    static node* allocate_node_emplace_construct(node_allocator_type& allocator, Args&&... args){
-        return  new( allocator ) node(std::forward<Args>(args)...);
-    }
-#endif //#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-#endif
-
-    static node* allocate_node_default_construct(node_allocator_type& allocator, const Key &key, const T * ){
-        return  new( allocator ) node(key);
-    }
-
-    static node* do_not_allocate_node(node_allocator_type& , const Key &, const T * ){
-        __TBB_ASSERT(false,"this dummy function should not be called");
-        return NULL;
-    }
-
-    node *search_bucket( const key_type &key, bucket *b ) const {
-        node *n = static_cast<node*>( b->node_list );
-        while( is_valid(n) && !my_hash_compare.equal(key, n->item.first) )
-            n = static_cast<node*>( n->next );
-        __TBB_ASSERT(n != internal::rehash_req, "Search can be executed only for rehashed bucket");
-        return n;
-    }
-
-    //! bucket accessor is to find, rehash, acquire a lock, and access a bucket
-    class bucket_accessor : public bucket::scoped_t {
-        bucket *my_b;
-    public:
-        bucket_accessor( concurrent_hash_map *base, const hashcode_t h, bool writer = false ) { acquire( base, h, writer ); }
-        //! find a bucket by masked hashcode, optionally rehash, and acquire the lock
-        inline void acquire( concurrent_hash_map *base, const hashcode_t h, bool writer = false ) {
-            my_b = base->get_bucket( h );
-            // TODO: actually, notification is unnecessary here, just hiding double-check
-            if( itt_load_word_with_acquire(my_b->node_list) == internal::rehash_req
-                && try_acquire( my_b->mutex, /*write=*/true ) )
-            {
-                if( my_b->node_list == internal::rehash_req ) base->rehash_bucket( my_b, h ); //recursive rehashing
-            }
-            else bucket::scoped_t::acquire( my_b->mutex, writer );
-            __TBB_ASSERT( my_b->node_list != internal::rehash_req, NULL);
-        }
-        //! check whether bucket is locked for write
-        bool is_writer() { return bucket::scoped_t::is_writer; }
-        //! get bucket pointer
-        bucket *operator() () { return my_b; }
-    };
-
-    // TODO refactor to hash_base
-    void rehash_bucket( bucket *b_new, const hashcode_t h ) {
-        __TBB_ASSERT( *(intptr_t*)(&b_new->mutex), "b_new must be locked (for write)");
-        __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" );
-        __TBB_store_with_release(b_new->node_list, internal::empty_rehashed); // mark rehashed
-        hashcode_t mask = ( 1u<<__TBB_Log2( h ) ) - 1; // get parent mask from the topmost bit
-#if __TBB_STATISTICS
-        my_info_rehashes++; // invocations of rehash_bucket
-#endif
-
-        bucket_accessor b_old( this, h & mask );
-
-        mask = (mask<<1) | 1; // get full mask for new bucket
-        __TBB_ASSERT( (mask&(mask+1))==0 && (h & mask) == h, NULL );
-    restart:
-        for( node_base **p = &b_old()->node_list, *n = __TBB_load_with_acquire(*p); is_valid(n); n = *p ) {
-            hashcode_t c = my_hash_compare.hash( static_cast<node*>(n)->item.first );
-#if TBB_USE_ASSERT
-            hashcode_t bmask = h & (mask>>1);
-            bmask = bmask==0? 1 : ( 1u<<(__TBB_Log2( bmask )+1 ) ) - 1; // minimal mask of parent bucket
-            __TBB_ASSERT( (c & bmask) == (h & bmask), "hash() function changed for key in table" );
-#endif
-            if( (c & mask) == h ) {
-                if( !b_old.is_writer() )
-                    if( !b_old.upgrade_to_writer() ) {
-                        goto restart; // node ptr can be invalid due to concurrent erase
-                    }
-                *p = n->next; // exclude from b_old
-                add_to_bucket( b_new, n );
-            } else p = &n->next; // iterate to next item
-        }
-    }
-
-    struct call_clear_on_leave {
-        concurrent_hash_map* my_ch_map;
-        call_clear_on_leave( concurrent_hash_map* a_ch_map ) : my_ch_map(a_ch_map) {}
-        void dismiss() {my_ch_map = 0;}
-        ~call_clear_on_leave(){
-            if (my_ch_map){
-                my_ch_map->clear();
-            }
-        }
-    };
-public:
-
-    class accessor;
-    //! Combines data access, locking, and garbage collection.
-    class const_accessor : private node::scoped_t /*which derived from no_copy*/ {
-        friend class concurrent_hash_map<Key,T,HashCompare,Allocator>;
-        friend class accessor;
-    public:
-        //! Type of value
-        typedef const typename concurrent_hash_map::value_type value_type;
-
-        //! True if result is empty.
-        bool empty() const { return !my_node; }
-
-        //! Set to null
-        void release() {
-            if( my_node ) {
-                node::scoped_t::release();
-                my_node = 0;
-            }
-        }
-
-        //! Return reference to associated value in hash table.
-        const_reference operator*() const {
-            __TBB_ASSERT( my_node, "attempt to dereference empty accessor" );
-            return my_node->item;
-        }
-
-        //! Return pointer to associated value in hash table.
-        const_pointer operator->() const {
-            return &operator*();
-        }
-
-        //! Create empty result
-        const_accessor() : my_node(NULL) {}
-
-        //! Destroy result after releasing the underlying reference.
-        ~const_accessor() {
-            my_node = NULL; // scoped lock's release() is called in its destructor
-        }
-    protected:
-        bool is_writer() { return node::scoped_t::is_writer; }
-        node *my_node;
-        hashcode_t my_hash;
-    };
-
-    //! Allows write access to elements and combines data access, locking, and garbage collection.
-    class accessor: public const_accessor {
-    public:
-        //! Type of value
-        typedef typename concurrent_hash_map::value_type value_type;
-
-        //! Return reference to associated value in hash table.
-        reference operator*() const {
-            __TBB_ASSERT( this->my_node, "attempt to dereference empty accessor" );
-            return this->my_node->item;
-        }
-
-        //! Return pointer to associated value in hash table.
-        pointer operator->() const {
-            return &operator*();
-        }
-    };
-
-    //! Construct empty table.
-    explicit concurrent_hash_map( const allocator_type &a = allocator_type() )
-        : internal::hash_map_base(), my_allocator(a)
-    {}
-
-    //! Construct empty table with n preallocated buckets. This number serves also as initial concurrency level.
-    concurrent_hash_map( size_type n, const allocator_type &a = allocator_type() )
-        : my_allocator(a)
-    {
-        reserve( n );
-    }
-
-    //! Copy constructor
-    concurrent_hash_map( const concurrent_hash_map &table, const allocator_type &a = allocator_type() )
-        : internal::hash_map_base(), my_allocator(a)
-    {
-        internal_copy(table);
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    //! Move constructor
-    concurrent_hash_map( concurrent_hash_map &&table )
-        : internal::hash_map_base(), my_allocator(std::move(table.get_allocator()))
-    {
-        swap(table);
-    }
-
-    //! Move constructor
-    concurrent_hash_map( concurrent_hash_map &&table, const allocator_type &a )
-        : internal::hash_map_base(), my_allocator(a)
-    {
-        if (a == table.get_allocator()){
-            this->swap(table);
-        }else{
-            call_clear_on_leave scope_guard(this);
-            internal_copy(std::make_move_iterator(table.begin()), std::make_move_iterator(table.end()));
-            scope_guard.dismiss();
-        }
-    }
-#endif //__TBB_CPP11_RVALUE_REF_PRESENT
-
-    //! Construction with copying iteration range and given allocator instance
-    template<typename I>
-    concurrent_hash_map( I first, I last, const allocator_type &a = allocator_type() )
-        : my_allocator(a)
-    {
-        reserve( std::distance(first, last) ); // TODO: load_factor?
-        internal_copy(first, last);
-    }
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    //! Construct empty table with n preallocated buckets. This number serves also as initial concurrency level.
-    concurrent_hash_map( std::initializer_list<value_type> il, const allocator_type &a = allocator_type() )
-        : my_allocator(a)
-    {
-        reserve(il.size());
-        internal_copy(il.begin(), il.end());
-    }
-
-#endif //__TBB_INITIALIZER_LISTS_PRESENT
-
-    //! Assignment
-    concurrent_hash_map& operator=( const concurrent_hash_map &table ) {
-        if( this!=&table ) {
-            clear();
-            internal_copy(table);
-        }
-        return *this;
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    //! Move Assignment
-    concurrent_hash_map& operator=( concurrent_hash_map &&table ) {
-        if(this != &table){
-            typedef typename tbb::internal::allocator_traits<allocator_type>::propagate_on_container_move_assignment pocma_t;
-            if(pocma_t::value || this->my_allocator == table.my_allocator) {
-                concurrent_hash_map trash (std::move(*this));
-                //TODO: swapping allocators here may be a problem, replace with single direction moving iff pocma is set
-                this->swap(table);
-            } else {
-                //do per element move
-                concurrent_hash_map moved_copy(std::move(table), this->my_allocator);
-                this->swap(moved_copy);
-            }
-        }
-        return *this;
-    }
-#endif //__TBB_CPP11_RVALUE_REF_PRESENT
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    //! Assignment
-    concurrent_hash_map& operator=( std::initializer_list<value_type> il ) {
-        clear();
-        reserve(il.size());
-        internal_copy(il.begin(), il.end());
-        return *this;
-    }
-#endif //__TBB_INITIALIZER_LISTS_PRESENT
-
-
-    //! Rehashes and optionally resizes the whole table.
-    /** Useful to optimize performance before or after concurrent operations.
-        Also enables using of find() and count() concurrent methods in serial context. */
-    void rehash(size_type n = 0);
-
-    //! Clear table
-    void clear();
-
-    //! Clear table and destroy it.
-    ~concurrent_hash_map() { clear(); }
-
-    //------------------------------------------------------------------------
-    // Parallel algorithm support
-    //------------------------------------------------------------------------
-    range_type range( size_type grainsize=1 ) {
-        return range_type( *this, grainsize );
-    }
-    const_range_type range( size_type grainsize=1 ) const {
-        return const_range_type( *this, grainsize );
-    }
-
-    //------------------------------------------------------------------------
-    // STL support - not thread-safe methods
-    //------------------------------------------------------------------------
-    iterator begin() { return iterator( *this, 0, my_embedded_segment, my_embedded_segment->node_list ); }
-    iterator end() { return iterator( *this, 0, 0, 0 ); }
-    const_iterator begin() const { return const_iterator( *this, 0, my_embedded_segment, my_embedded_segment->node_list ); }
-    const_iterator end() const { return const_iterator( *this, 0, 0, 0 ); }
-    std::pair<iterator, iterator> equal_range( const Key& key ) { return internal_equal_range( key, end() ); }
-    std::pair<const_iterator, const_iterator> equal_range( const Key& key ) const { return internal_equal_range( key, end() ); }
-
-    //! Number of items in table.
-    size_type size() const { return my_size; }
-
-    //! True if size()==0.
-    bool empty() const { return my_size == 0; }
-
-    //! Upper bound on size.
-    size_type max_size() const {return (~size_type(0))/sizeof(node);}
-
-    //! Returns the current number of buckets
-    size_type bucket_count() const { return my_mask+1; }
-
-    //! return allocator object
-    allocator_type get_allocator() const { return this->my_allocator; }
-
-    //! swap two instances. Iterators are invalidated
-    void swap( concurrent_hash_map &table );
-
-    //------------------------------------------------------------------------
-    // concurrent map operations
-    //------------------------------------------------------------------------
-
-    //! Return count of items (0 or 1)
-    size_type count( const Key &key ) const {
-        return const_cast<concurrent_hash_map*>(this)->lookup(/*insert*/false, key, NULL, NULL, /*write=*/false, &do_not_allocate_node );
-    }
-
-    //! Find item and acquire a read lock on the item.
-    /** Return true if item is found, false otherwise. */
-    bool find( const_accessor &result, const Key &key ) const {
-        result.release();
-        return const_cast<concurrent_hash_map*>(this)->lookup(/*insert*/false, key, NULL, &result, /*write=*/false, &do_not_allocate_node );
-    }
-
-    //! Find item and acquire a write lock on the item.
-    /** Return true if item is found, false otherwise. */
-    bool find( accessor &result, const Key &key ) {
-        result.release();
-        return lookup(/*insert*/false, key, NULL, &result, /*write=*/true, &do_not_allocate_node );
-    }
-
-    //! Insert item (if not already present) and acquire a read lock on the item.
-    /** Returns true if item is new. */
-    bool insert( const_accessor &result, const Key &key ) {
-        result.release();
-        return lookup(/*insert*/true, key, NULL, &result, /*write=*/false, &allocate_node_default_construct );
-    }
-
-    //! Insert item (if not already present) and acquire a write lock on the item.
-    /** Returns true if item is new. */
-    bool insert( accessor &result, const Key &key ) {
-        result.release();
-        return lookup(/*insert*/true, key, NULL, &result, /*write=*/true, &allocate_node_default_construct );
-    }
-
-    //! Insert item by copying if there is no such key present already and acquire a read lock on the item.
-    /** Returns true if item is new. */
-    bool insert( const_accessor &result, const value_type &value ) {
-        result.release();
-        return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/false, &allocate_node_copy_construct );
-    }
-
-    //! Insert item by copying if there is no such key present already and acquire a write lock on the item.
-    /** Returns true if item is new. */
-    bool insert( accessor &result, const value_type &value ) {
-        result.release();
-        return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/true, &allocate_node_copy_construct );
-    }
-
-    //! Insert item by copying if there is no such key present already
-    /** Returns true if item is inserted. */
-    bool insert( const value_type &value ) {
-        return lookup(/*insert*/true, value.first, &value.second, NULL, /*write=*/false, &allocate_node_copy_construct );
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    //! Insert item by copying if there is no such key present already and acquire a read lock on the item.
-    /** Returns true if item is new. */
-    bool insert( const_accessor &result, value_type && value ) {
-        return generic_move_insert(result, std::move(value));
-    }
-
-    //! Insert item by copying if there is no such key present already and acquire a write lock on the item.
-    /** Returns true if item is new. */
-    bool insert( accessor &result, value_type && value ) {
-        return generic_move_insert(result, std::move(value));
-    }
-
-    //! Insert item by copying if there is no such key present already
-    /** Returns true if item is inserted. */
-    bool insert( value_type && value ) {
-        return generic_move_insert(accessor_not_used(), std::move(value));
-    }
-
-#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-    //! Insert item by copying if there is no such key present already and acquire a read lock on the item.
-    /** Returns true if item is new. */
-    template<typename... Args>
-    bool emplace( const_accessor &result, Args&&... args ) {
-        return generic_emplace(result, std::forward<Args>(args)...);
-    }
-
-    //! Insert item by copying if there is no such key present already and acquire a write lock on the item.
-    /** Returns true if item is new. */
-    template<typename... Args>
-    bool emplace( accessor &result, Args&&... args ) {
-        return generic_emplace(result, std::forward<Args>(args)...);
-    }
-
-    //! Insert item by copying if there is no such key present already
-    /** Returns true if item is inserted. */
-    template<typename... Args>
-    bool emplace( Args&&... args ) {
-        return generic_emplace(accessor_not_used(), std::forward<Args>(args)...);
-    }
-#endif //__TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-#endif //__TBB_CPP11_RVALUE_REF_PRESENT
-
-    //! Insert range [first, last)
-    template<typename I>
-    void insert( I first, I last ) {
-        for ( ; first != last; ++first )
-            insert( *first );
-    }
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    //! Insert initializer list
-    void insert( std::initializer_list<value_type> il ) {
-        insert( il.begin(), il.end() );
-    }
-#endif //__TBB_INITIALIZER_LISTS_PRESENT
-
-    //! Erase item.
-    /** Return true if item was erased by particularly this call. */
-    bool erase( const Key& key );
-
-    //! Erase item by const_accessor.
-    /** Return true if item was erased by particularly this call. */
-    bool erase( const_accessor& item_accessor ) {
-        return exclude( item_accessor );
-    }
-
-    //! Erase item by accessor.
-    /** Return true if item was erased by particularly this call. */
-    bool erase( accessor& item_accessor ) {
-        return exclude( item_accessor );
-    }
-
-protected:
-    //! Insert or find item and optionally acquire a lock on the item.
-    bool lookup(bool op_insert, const Key &key, const T *t, const_accessor *result, bool write,  node* (*allocate_node)(node_allocator_type& ,  const Key &, const T * ), node *tmp_n = 0  ) ;
-
-    struct accessor_not_used { void release(){}};
-    friend const_accessor* accessor_location( accessor_not_used const& ){ return NULL;}
-    friend const_accessor* accessor_location( const_accessor & a )      { return &a;}
-
-    friend bool is_write_access_needed( accessor const& )           { return true;}
-    friend bool is_write_access_needed( const_accessor const& )     { return false;}
-    friend bool is_write_access_needed( accessor_not_used const& )  { return false;}
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    template<typename Accessor>
-    bool generic_move_insert( Accessor && result, value_type && value ) {
-        result.release();
-        return lookup(/*insert*/true, value.first, &value.second, accessor_location(result), is_write_access_needed(result), &allocate_node_move_construct );
-    }
-
-#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-    template<typename Accessor, typename... Args>
-    bool generic_emplace( Accessor && result, Args &&... args ) {
-        result.release();
-        node * node_ptr = allocate_node_emplace_construct(my_allocator, std::forward<Args>(args)...);
-        return lookup(/*insert*/true, node_ptr->item.first, NULL, accessor_location(result), is_write_access_needed(result), &do_not_allocate_node, node_ptr );
-    }
-#endif //__TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-#endif //__TBB_CPP11_RVALUE_REF_PRESENT
-
-    //! delete item by accessor
-    bool exclude( const_accessor &item_accessor );
-
-    //! Returns an iterator for an item defined by the key, or for the next item after it (if upper==true)
-    template<typename I>
-    std::pair<I, I> internal_equal_range( const Key& key, I end ) const;
-
-    //! Copy "source" to *this, where *this must start out empty.
-    void internal_copy( const concurrent_hash_map& source );
-
-    template<typename I>
-    void internal_copy( I first, I last );
-
-    //! Fast find when no concurrent erasure is used. For internal use inside TBB only!
-    /** Return pointer to item with given key, or NULL if no such item exists.
-        Must not be called concurrently with erasure operations. */
-    const_pointer internal_fast_find( const Key& key ) const {
-        hashcode_t h = my_hash_compare.hash( key );
-        hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask );
-        node *n;
-    restart:
-        __TBB_ASSERT((m&(m+1))==0, "data structure is invalid");
-        bucket *b = get_bucket( h & m );
-        // TODO: actually, notification is unnecessary here, just hiding double-check
-        if( itt_load_word_with_acquire(b->node_list) == internal::rehash_req )
-        {
-            bucket::scoped_t lock;
-            if( lock.try_acquire( b->mutex, /*write=*/true ) ) {
-                if( b->node_list == internal::rehash_req)
-                    const_cast<concurrent_hash_map*>(this)->rehash_bucket( b, h & m ); //recursive rehashing
-            }
-            else lock.acquire( b->mutex, /*write=*/false );
-            __TBB_ASSERT(b->node_list!=internal::rehash_req,NULL);
-        }
-        n = search_bucket( key, b );
-        if( n )
-            return &n->item;
-        else if( check_mask_race( h, m ) )
-            goto restart;
-        return 0;
-    }
-};
-
-template<typename Key, typename T, typename HashCompare, typename A>
-bool concurrent_hash_map<Key,T,HashCompare,A>::lookup( bool op_insert, const Key &key, const T *t, const_accessor *result, bool write, node* (*allocate_node)(node_allocator_type& , const Key&, const T*), node *tmp_n ) {
-    __TBB_ASSERT( !result || !result->my_node, NULL );
-    bool return_value;
-    hashcode_t const h = my_hash_compare.hash( key );
-    hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask );
-    segment_index_t grow_segment = 0;
-    node *n;
-    restart:
-    {//lock scope
-        __TBB_ASSERT((m&(m+1))==0, "data structure is invalid");
-        return_value = false;
-        // get bucket
-        bucket_accessor b( this, h & m );
-
-        // find a node
-        n = search_bucket( key, b() );
-        if( op_insert ) {
-            // [opt] insert a key
-            if( !n ) {
-                if( !tmp_n ) {
-                    tmp_n = allocate_node(my_allocator, key, t);
-                }
-                if( !b.is_writer() && !b.upgrade_to_writer() ) { // TODO: improved insertion
-                    // Rerun search_list, in case another thread inserted the item during the upgrade.
-                    n = search_bucket( key, b() );
-                    if( is_valid(n) ) { // unfortunately, it did
-                        b.downgrade_to_reader();
-                        goto exists;
-                    }
-                }
-                if( check_mask_race(h, m) )
-                    goto restart; // b.release() is done in ~b().
-                // insert and set flag to grow the container
-                grow_segment = insert_new_node( b(), n = tmp_n, m );
-                tmp_n = 0;
-                return_value = true;
-            }
-        } else { // find or count
-            if( !n ) {
-                if( check_mask_race( h, m ) )
-                    goto restart; // b.release() is done in ~b(). TODO: replace by continue
-                return false;
-            }
-            return_value = true;
-        }
-    exists:
-        if( !result ) goto check_growth;
-        // TODO: the following seems as generic/regular operation
-        // acquire the item
-        if( !result->try_acquire( n->mutex, write ) ) {
-            for( tbb::internal::atomic_backoff backoff(true);; ) {
-                if( result->try_acquire( n->mutex, write ) ) break;
-                if( !backoff.bounded_pause() ) {
-                    // the wait takes really long, restart the operation
-                    b.release();
-                    __TBB_ASSERT( !op_insert || !return_value, "Can't acquire new item in locked bucket?" );
-                    __TBB_Yield();
-                    m = (hashcode_t) itt_load_word_with_acquire( my_mask );
-                    goto restart;
-                }
-            }
-        }
-    }//lock scope
-    result->my_node = n;
-    result->my_hash = h;
-check_growth:
-    // [opt] grow the container
-    if( grow_segment ) {
-#if __TBB_STATISTICS
-        my_info_resizes++; // concurrent ones
-#endif
-        enable_segment( grow_segment );
-    }
-    if( tmp_n ) // if op_insert only
-        delete_node( tmp_n );
-    return return_value;
-}
-
-template<typename Key, typename T, typename HashCompare, typename A>
-template<typename I>
-std::pair<I, I> concurrent_hash_map<Key,T,HashCompare,A>::internal_equal_range( const Key& key, I end_ ) const {
-    hashcode_t h = my_hash_compare.hash( key );
-    hashcode_t m = my_mask;
-    __TBB_ASSERT((m&(m+1))==0, "data structure is invalid");
-    h &= m;
-    bucket *b = get_bucket( h );
-    while( b->node_list == internal::rehash_req ) {
-        m = ( 1u<<__TBB_Log2( h ) ) - 1; // get parent mask from the topmost bit
-        b = get_bucket( h &= m );
-    }
-    node *n = search_bucket( key, b );
-    if( !n )
-        return std::make_pair(end_, end_);
-    iterator lower(*this, h, b, n), upper(lower);
-    return std::make_pair(lower, ++upper);
-}
-
-template<typename Key, typename T, typename HashCompare, typename A>
-bool concurrent_hash_map<Key,T,HashCompare,A>::exclude( const_accessor &item_accessor ) {
-    __TBB_ASSERT( item_accessor.my_node, NULL );
-    node_base *const n = item_accessor.my_node;
-    hashcode_t const h = item_accessor.my_hash;
-    hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask );
-    do {
-        // get bucket
-        bucket_accessor b( this, h & m, /*writer=*/true );
-        node_base **p = &b()->node_list;
-        while( *p && *p != n )
-            p = &(*p)->next;
-        if( !*p ) { // someone else was first
-            if( check_mask_race( h, m ) )
-                continue;
-            item_accessor.release();
-            return false;
-        }
-        __TBB_ASSERT( *p == n, NULL );
-        *p = n->next; // remove from container
-        my_size--;
-        break;
-    } while(true);
-    if( !item_accessor.is_writer() ) // need to get exclusive lock
-        item_accessor.upgrade_to_writer(); // return value means nothing here
-    item_accessor.release();
-    delete_node( n ); // Only one thread can delete it
-    return true;
-}
-
-template<typename Key, typename T, typename HashCompare, typename A>
-bool concurrent_hash_map<Key,T,HashCompare,A>::erase( const Key &key ) {
-    node_base *n;
-    hashcode_t const h = my_hash_compare.hash( key );
-    hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask );
-restart:
-    {//lock scope
-        // get bucket
-        bucket_accessor b( this, h & m );
-    search:
-        node_base **p = &b()->node_list;
-        n = *p;
-        while( is_valid(n) && !my_hash_compare.equal(key, static_cast<node*>(n)->item.first ) ) {
-            p = &n->next;
-            n = *p;
-        }
-        if( !n ) { // not found, but mask could be changed
-            if( check_mask_race( h, m ) )
-                goto restart;
-            return false;
-        }
-        else if( !b.is_writer() && !b.upgrade_to_writer() ) {
-            if( check_mask_race( h, m ) ) // contended upgrade, check mask
-                goto restart;
-            goto search;
-        }
-        *p = n->next;
-        my_size--;
-    }
-    {
-        typename node::scoped_t item_locker( n->mutex, /*write=*/true );
-    }
-    // note: there should be no threads pretending to acquire this mutex again, do not try to upgrade const_accessor!
-    delete_node( n ); // Only one thread can delete it due to write lock on the bucket
-    return true;
-}
-
-template<typename Key, typename T, typename HashCompare, typename A>
-void concurrent_hash_map<Key,T,HashCompare,A>::swap(concurrent_hash_map<Key,T,HashCompare,A> &table) {
-    //TODO: respect C++11 allocator_traits<A>::propogate_on_constainer_swap
-    using std::swap;
-    swap(this->my_allocator, table.my_allocator);
-    swap(this->my_hash_compare, table.my_hash_compare);
-    internal_swap(table);
-}
-
-template<typename Key, typename T, typename HashCompare, typename A>
-void concurrent_hash_map<Key,T,HashCompare,A>::rehash(size_type sz) {
-    reserve( sz ); // TODO: add reduction of number of buckets as well
-    hashcode_t mask = my_mask;
-    hashcode_t b = (mask+1)>>1; // size or first index of the last segment
-    __TBB_ASSERT((b&(b-1))==0, NULL); // zero or power of 2
-    bucket *bp = get_bucket( b ); // only the last segment should be scanned for rehashing
-    for(; b <= mask; b++, bp++ ) {
-        node_base *n = bp->node_list;
-        __TBB_ASSERT( is_valid(n) || n == internal::empty_rehashed || n == internal::rehash_req, "Broken internal structure" );
-        __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concurrent or unexpectedly terminated operation during rehash() execution" );
-        if( n == internal::rehash_req ) { // rehash bucket, conditional because rehashing of a previous bucket may affect this one
-            hashcode_t h = b; bucket *b_old = bp;
-            do {
-                __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" );
-                hashcode_t m = ( 1u<<__TBB_Log2( h ) ) - 1; // get parent mask from the topmost bit
-                b_old = get_bucket( h &= m );
-            } while( b_old->node_list == internal::rehash_req );
-            // now h - is index of the root rehashed bucket b_old
-            mark_rehashed_levels( h ); // mark all non-rehashed children recursively across all segments
-            for( node_base **p = &b_old->node_list, *q = *p; is_valid(q); q = *p ) {
-                hashcode_t c = my_hash_compare.hash( static_cast<node*>(q)->item.first );
-                if( (c & mask) != h ) { // should be rehashed
-                    *p = q->next; // exclude from b_old
-                    bucket *b_new = get_bucket( c & mask );
-                    __TBB_ASSERT( b_new->node_list != internal::rehash_req, "hash() function changed for key in table or internal error" );
-                    add_to_bucket( b_new, q );
-                } else p = &q->next; // iterate to next item
-            }
-        }
-    }
-#if TBB_USE_PERFORMANCE_WARNINGS
-    int current_size = int(my_size), buckets = int(mask)+1, empty_buckets = 0, overpopulated_buckets = 0; // usage statistics
-    static bool reported = false;
-#endif
-#if TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS
-    for( b = 0; b <= mask; b++ ) {// only last segment should be scanned for rehashing
-        if( b & (b-2) ) ++bp; // not the beginning of a segment
-        else bp = get_bucket( b );
-        node_base *n = bp->node_list;
-        __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concurrent or unexpectedly terminated operation during rehash() execution" );
-        __TBB_ASSERT( is_valid(n) || n == internal::empty_rehashed, "Broken internal structure" );
-#if TBB_USE_PERFORMANCE_WARNINGS
-        if( n == internal::empty_rehashed ) empty_buckets++;
-        else if( n->next ) overpopulated_buckets++;
-#endif
-#if TBB_USE_ASSERT
-        for( ; is_valid(n); n = n->next ) {
-            hashcode_t h = my_hash_compare.hash( static_cast<node*>(n)->item.first ) & mask;
-            __TBB_ASSERT( h == b, "hash() function changed for key in table or internal error" );
-        }
-#endif
-    }
-#endif // TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS
-#if TBB_USE_PERFORMANCE_WARNINGS
-    if( buckets > current_size) empty_buckets -= buckets - current_size;
-    else overpopulated_buckets -= current_size - buckets; // TODO: load_factor?
-    if( !reported && buckets >= 512 && ( 2*empty_buckets > current_size || 2*overpopulated_buckets > current_size ) ) {
-        tbb::internal::runtime_warning(
-            "Performance is not optimal because the hash function produces bad randomness in lower bits in %s.\nSize: %d  Empties: %d  Overlaps: %d",
-#if __TBB_USE_OPTIONAL_RTTI
-            typeid(*this).name(),
-#else
-            "concurrent_hash_map",
-#endif
-            current_size, empty_buckets, overpopulated_buckets );
-        reported = true;
-    }
-#endif
-}
-
-template<typename Key, typename T, typename HashCompare, typename A>
-void concurrent_hash_map<Key,T,HashCompare,A>::clear() {
-    hashcode_t m = my_mask;
-    __TBB_ASSERT((m&(m+1))==0, "data structure is invalid");
-#if TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
-#if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
-    int current_size = int(my_size), buckets = int(m)+1, empty_buckets = 0, overpopulated_buckets = 0; // usage statistics
-    static bool reported = false;
-#endif
-    bucket *bp = 0;
-    // check consistency
-    for( segment_index_t b = 0; b <= m; b++ ) {
-        if( b & (b-2) ) ++bp; // not the beginning of a segment
-        else bp = get_bucket( b );
-        node_base *n = bp->node_list;
-        __TBB_ASSERT( is_valid(n) || n == internal::empty_rehashed || n == internal::rehash_req, "Broken internal structure" );
-        __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concurrent or unexpectedly terminated operation during clear() execution" );
-#if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
-        if( n == internal::empty_rehashed ) empty_buckets++;
-        else if( n == internal::rehash_req ) buckets--;
-        else if( n->next ) overpopulated_buckets++;
-#endif
-#if __TBB_EXTRA_DEBUG
-        for(; is_valid(n); n = n->next ) {
-            hashcode_t h = my_hash_compare.hash( static_cast<node*>(n)->item.first );
-            h &= m;
-            __TBB_ASSERT( h == b || get_bucket(h)->node_list == internal::rehash_req, "hash() function changed for key in table or internal error" );
-        }
-#endif
-    }
-#if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
-#if __TBB_STATISTICS
-    printf( "items=%d buckets: capacity=%d rehashed=%d empty=%d overpopulated=%d"
-        " concurrent: resizes=%u rehashes=%u restarts=%u\n",
-        current_size, int(m+1), buckets, empty_buckets, overpopulated_buckets,
-        unsigned(my_info_resizes), unsigned(my_info_rehashes), unsigned(my_info_restarts) );
-    my_info_resizes = 0; // concurrent ones
-    my_info_restarts = 0; // race collisions
-    my_info_rehashes = 0;  // invocations of rehash_bucket
-#endif
-    if( buckets > current_size) empty_buckets -= buckets - current_size;
-    else overpopulated_buckets -= current_size - buckets; // TODO: load_factor?
-    if( !reported && buckets >= 512 && ( 2*empty_buckets > current_size || 2*overpopulated_buckets > current_size ) ) {
-        tbb::internal::runtime_warning(
-            "Performance is not optimal because the hash function produces bad randomness in lower bits in %s.\nSize: %d  Empties: %d  Overlaps: %d",
-#if __TBB_USE_OPTIONAL_RTTI
-            typeid(*this).name(),
-#else
-            "concurrent_hash_map",
-#endif
-            current_size, empty_buckets, overpopulated_buckets );
-        reported = true;
-    }
-#endif
-#endif//TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
-    my_size = 0;
-    segment_index_t s = segment_index_of( m );
-    __TBB_ASSERT( s+1 == pointers_per_table || !my_table[s+1], "wrong mask or concurrent grow" );
-    cache_aligned_allocator<bucket> alloc;
-    do {
-        __TBB_ASSERT( is_valid( my_table[s] ), "wrong mask or concurrent grow" );
-        segment_ptr_t buckets_ptr = my_table[s];
-        size_type sz = segment_size( s ? s : 1 );
-        for( segment_index_t i = 0; i < sz; i++ )
-            for( node_base *n = buckets_ptr[i].node_list; is_valid(n); n = buckets_ptr[i].node_list ) {
-                buckets_ptr[i].node_list = n->next;
-                delete_node( n );
-            }
-        if( s >= first_block) // the first segment or the next
-            alloc.deallocate( buckets_ptr, sz );
-        else if( s == embedded_block && embedded_block != first_block )
-            alloc.deallocate( buckets_ptr, segment_size(first_block)-embedded_buckets );
-        if( s >= embedded_block ) my_table[s] = 0;
-    } while(s-- > 0);
-    my_mask = embedded_buckets - 1;
-}
-
-template<typename Key, typename T, typename HashCompare, typename A>
-void concurrent_hash_map<Key,T,HashCompare,A>::internal_copy( const concurrent_hash_map& source ) {
-    reserve( source.my_size ); // TODO: load_factor?
-    hashcode_t mask = source.my_mask;
-    if( my_mask == mask ) { // optimized version
-        bucket *dst = 0, *src = 0;
-        bool rehash_required = false;
-        for( hashcode_t k = 0; k <= mask; k++ ) {
-            if( k & (k-2) ) ++dst,src++; // not the beginning of a segment
-            else { dst = get_bucket( k ); src = source.get_bucket( k ); }
-            __TBB_ASSERT( dst->node_list != internal::rehash_req, "Invalid bucket in destination table");
-            node *n = static_cast<node*>( src->node_list );
-            if( n == internal::rehash_req ) { // source is not rehashed, items are in previous buckets
-                rehash_required = true;
-                dst->node_list = internal::rehash_req;
-            } else for(; n; n = static_cast<node*>( n->next ) ) {
-                add_to_bucket( dst, new( my_allocator ) node(n->item.first, n->item.second) );
-                ++my_size; // TODO: replace by non-atomic op
-            }
-        }
-        if( rehash_required ) rehash();
-    } else internal_copy( source.begin(), source.end() );
-}
-
-template<typename Key, typename T, typename HashCompare, typename A>
-template<typename I>
-void concurrent_hash_map<Key,T,HashCompare,A>::internal_copy(I first, I last) {
-    hashcode_t m = my_mask;
-    for(; first != last; ++first) {
-        hashcode_t h = my_hash_compare.hash( (*first).first );
-        bucket *b = get_bucket( h & m );
-        __TBB_ASSERT( b->node_list != internal::rehash_req, "Invalid bucket in destination table");
-        node *n = new( my_allocator ) node(*first);
-        add_to_bucket( b, n );
-        ++my_size; // TODO: replace by non-atomic op
-    }
-}
-
-} // namespace interface5
-
-using interface5::concurrent_hash_map;
-
-
-template<typename Key, typename T, typename HashCompare, typename A1, typename A2>
-inline bool operator==(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) {
-    if(a.size() != b.size()) return false;
-    typename concurrent_hash_map<Key, T, HashCompare, A1>::const_iterator i(a.begin()), i_end(a.end());
-    typename concurrent_hash_map<Key, T, HashCompare, A2>::const_iterator j, j_end(b.end());
-    for(; i != i_end; ++i) {
-        j = b.equal_range(i->first).first;
-        if( j == j_end || !(i->second == j->second) ) return false;
-    }
-    return true;
-}
-
-template<typename Key, typename T, typename HashCompare, typename A1, typename A2>
-inline bool operator!=(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b)
-{    return !(a == b); }
-
-template<typename Key, typename T, typename HashCompare, typename A>
-inline void swap(concurrent_hash_map<Key, T, HashCompare, A> &a, concurrent_hash_map<Key, T, HashCompare, A> &b)
-{    a.swap( b ); }
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    #pragma warning( pop )
-#endif // warning 4127 is back
-
-} // namespace tbb
-
-#endif /* __TBB_concurrent_hash_map_H */
diff --git a/lib/3rdParty/tbb/include/tbb/concurrent_lru_cache.h b/lib/3rdParty/tbb/include/tbb/concurrent_lru_cache.h
deleted file mode 100644
index 8aacf241..00000000
--- a/lib/3rdParty/tbb/include/tbb/concurrent_lru_cache.h
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_concurrent_lru_cache_H
-#define __TBB_concurrent_lru_cache_H
-
-#if ! TBB_PREVIEW_CONCURRENT_LRU_CACHE
-    #error Set TBB_PREVIEW_CONCURRENT_LRU_CACHE to include concurrent_lru_cache.h
-#endif
-
-#include <map>
-#include <list>
-
-#include "tbb_stddef.h"
-#include "atomic.h"
-#include "internal/_aggregator_impl.h"
-
-namespace tbb{
-namespace interface6 {
-
-
-template <typename key_type, typename value_type, typename value_functor_type = value_type (*)(key_type) >
-class concurrent_lru_cache : internal::no_assign{
-private:
-    typedef concurrent_lru_cache self_type;
-    typedef value_functor_type value_function_type;
-    typedef std::size_t ref_counter_type;
-    struct map_value_type;
-    typedef std::map<key_type, map_value_type> map_storage_type;
-    typedef std::list<typename map_storage_type::iterator> lru_list_type;
-    struct map_value_type {
-        value_type my_value;
-        ref_counter_type my_ref_counter;
-        typename lru_list_type::iterator my_lru_list_iterator;
-        bool my_is_ready;
-
-        map_value_type (value_type const& a_value,  ref_counter_type a_ref_counter,    typename lru_list_type::iterator a_lru_list_iterator, bool a_is_ready)
-            : my_value(a_value), my_ref_counter(a_ref_counter), my_lru_list_iterator (a_lru_list_iterator), my_is_ready(a_is_ready)
-        {}
-    };
-
-    class handle_object;
-
-    struct aggregator_operation;
-    typedef aggregator_operation aggregated_operation_type;
-    typedef tbb::internal::aggregating_functor<self_type,aggregated_operation_type> aggregator_function_type;
-    friend class tbb::internal::aggregating_functor<self_type,aggregated_operation_type>;
-    typedef tbb::internal::aggregator<aggregator_function_type, aggregated_operation_type> aggregator_type;
-
-private:
-    value_function_type my_value_function;
-    std::size_t const my_number_of_lru_history_items;
-    map_storage_type my_map_storage;
-    lru_list_type my_lru_list;
-    aggregator_type my_aggregator;
-
-public:
-    typedef handle_object handle;
-
-public:
-    concurrent_lru_cache(value_function_type f, std::size_t number_of_lru_history_items)
-        : my_value_function(f),my_number_of_lru_history_items(number_of_lru_history_items)
-    {
-        my_aggregator.initialize_handler(aggregator_function_type(this));
-    }
-
-    handle_object operator[](key_type k){
-        retrieve_aggregator_operation op(k);
-        my_aggregator.execute(&op);
-        if (op.is_new_value_needed()){
-             op.result().second.my_value = my_value_function(k);
-             __TBB_store_with_release(op.result().second.my_is_ready, true);
-        }else{
-            tbb::internal::spin_wait_while_eq(op.result().second.my_is_ready,false);
-        }
-        return handle_object(*this,op.result());
-    }
-private:
-    void signal_end_of_usage(typename map_storage_type::reference value_ref){
-        signal_end_of_usage_aggregator_operation op(value_ref);
-        my_aggregator.execute(&op);
-    }
-
-private:
-    struct handle_move_t:no_assign{
-        concurrent_lru_cache & my_cache_ref;
-        typename map_storage_type::reference my_map_record_ref;
-        handle_move_t(concurrent_lru_cache & cache_ref, typename map_storage_type::reference value_ref):my_cache_ref(cache_ref),my_map_record_ref(value_ref) {};
-    };
-    class handle_object {
-        concurrent_lru_cache * my_cache_pointer;
-        typename map_storage_type::reference my_map_record_ref;
-    public:
-        handle_object(concurrent_lru_cache & cache_ref, typename map_storage_type::reference value_ref):my_cache_pointer(&cache_ref), my_map_record_ref(value_ref) {}
-        handle_object(handle_move_t m):my_cache_pointer(&m.my_cache_ref), my_map_record_ref(m.my_map_record_ref){}
-        operator handle_move_t(){ return move(*this);}
-        value_type& value(){
-            __TBB_ASSERT(my_cache_pointer,"get value from moved from object?");
-            return my_map_record_ref.second.my_value;
-        }
-        ~handle_object(){
-            if (my_cache_pointer){
-                my_cache_pointer->signal_end_of_usage(my_map_record_ref);
-            }
-        }
-    private:
-        friend handle_move_t move(handle_object& h){
-            return handle_object::move(h);
-        }
-        static handle_move_t move(handle_object& h){
-            __TBB_ASSERT(h.my_cache_pointer,"move from the same object twice ?");
-            concurrent_lru_cache * cache_pointer = h.my_cache_pointer;
-            h.my_cache_pointer = NULL;
-            return handle_move_t(*cache_pointer,h.my_map_record_ref);
-        }
-    private:
-        void operator=(handle_object&);
-#if __SUNPRO_CC
-    // Presumably due to a compiler error, private copy constructor
-    // breaks expressions like handle h = cache[key];
-    public:
-#endif
-        handle_object(handle_object &);
-    };
-private:
-    //TODO: looks like aggregator_operation is a perfect match for statically typed variant type
-    struct aggregator_operation : tbb::internal::aggregated_operation<aggregator_operation>{
-        enum e_op_type {op_retive, op_signal_end_of_usage};
-        //TODO: try to use pointer to function apply_visitor here
-        //TODO: try virtual functions and measure the difference
-        e_op_type my_operation_type;
-        aggregator_operation(e_op_type operation_type): my_operation_type(operation_type) {}
-        void cast_and_handle(self_type& container ){
-            if (my_operation_type==op_retive){
-                static_cast<retrieve_aggregator_operation*>(this)->handle(container);
-            }else{
-                static_cast<signal_end_of_usage_aggregator_operation*>(this)->handle(container);
-            }
-        }
-    };
-    struct retrieve_aggregator_operation : aggregator_operation, private internal::no_assign {
-        key_type my_key;
-        typename map_storage_type::pointer my_result_map_record_pointer;
-        bool my_is_new_value_needed;
-        retrieve_aggregator_operation(key_type key):aggregator_operation(aggregator_operation::op_retive),my_key(key),my_is_new_value_needed(false){}
-        void handle(self_type& container ){
-            my_result_map_record_pointer = & container.retrieve_serial(my_key,my_is_new_value_needed);
-        }
-        typename map_storage_type::reference result(){ return * my_result_map_record_pointer; }
-        bool is_new_value_needed(){return my_is_new_value_needed;}
-    };
-    struct signal_end_of_usage_aggregator_operation : aggregator_operation, private internal::no_assign {
-        typename map_storage_type::reference my_map_record_ref;
-        signal_end_of_usage_aggregator_operation(typename map_storage_type::reference map_record_ref):aggregator_operation(aggregator_operation::op_signal_end_of_usage),my_map_record_ref(map_record_ref){}
-        void handle(self_type& container ){
-            container.signal_end_of_usage_serial(my_map_record_ref);
-        }
-    };
-
-private:
-   void handle_operations(aggregator_operation* op_list){
-       while(op_list){
-           op_list->cast_and_handle(*this);
-           aggregator_operation* tmp = op_list;
-           op_list=op_list->next;
-           tbb::internal::itt_store_word_with_release(tmp->status, uintptr_t(1));
-       }
-   }
-
-private:
-   typename map_storage_type::reference retrieve_serial(key_type k, bool& is_new_value_needed){
-        typename map_storage_type::iterator it = my_map_storage.find(k);
-        if (it == my_map_storage.end()){
-            it = my_map_storage.insert(it,std::make_pair(k,map_value_type(value_type(),0,my_lru_list.end(),false)));
-            is_new_value_needed = true;
-        }else {
-            typename lru_list_type::iterator list_it = it->second.my_lru_list_iterator;
-            if (list_it!=my_lru_list.end()) {
-                __TBB_ASSERT(!it->second.my_ref_counter,"item to be evicted should not have a live references");
-                //item is going to be used. Therefore it is not a subject for eviction
-                //so - remove it from LRU history.
-                my_lru_list.erase(list_it);
-                it->second.my_lru_list_iterator= my_lru_list.end();
-            }
-        }
-        ++(it->second.my_ref_counter);
-        return *it;
-    }
-
-    void signal_end_of_usage_serial(typename map_storage_type::reference map_record_ref){
-        typename map_storage_type::iterator it = my_map_storage.find(map_record_ref.first);
-        __TBB_ASSERT(it!=my_map_storage.end(),"cache should not return past-end iterators to outer world");
-        __TBB_ASSERT(&(*it) == &map_record_ref,"dangling reference has been returned to outside world? data race ?");
-        __TBB_ASSERT( my_lru_list.end()== std::find(my_lru_list.begin(),my_lru_list.end(),it),
-                "object in use should not be in list of unused objects ");
-        if (! --(it->second.my_ref_counter)){
-            //it was the last reference so put it to the LRU history
-            if (my_lru_list.size()>=my_number_of_lru_history_items){
-                //evict items in order to get a space
-                size_t number_of_elements_to_evict = 1 + my_lru_list.size() - my_number_of_lru_history_items;
-                for (size_t i=0; i<number_of_elements_to_evict; ++i){
-                    typename map_storage_type::iterator it_to_evict = my_lru_list.back();
-                    __TBB_ASSERT(!it_to_evict->second.my_ref_counter,"item to be evicted should not have a live references");
-                    my_lru_list.pop_back();
-                    my_map_storage.erase(it_to_evict);
-                }
-            }
-            my_lru_list.push_front(it);
-            it->second.my_lru_list_iterator = my_lru_list.begin();
-        }
-    }
-};
-} // namespace interface6
-
-using interface6::concurrent_lru_cache;
-
-} // namespace tbb
-#endif //__TBB_concurrent_lru_cache_H
diff --git a/lib/3rdParty/tbb/include/tbb/concurrent_priority_queue.h b/lib/3rdParty/tbb/include/tbb/concurrent_priority_queue.h
deleted file mode 100644
index 89297d85..00000000
--- a/lib/3rdParty/tbb/include/tbb/concurrent_priority_queue.h
+++ /dev/null
@@ -1,490 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_concurrent_priority_queue_H
-#define __TBB_concurrent_priority_queue_H
-
-#include "atomic.h"
-#include "cache_aligned_allocator.h"
-#include "tbb_exception.h"
-#include "tbb_stddef.h"
-#include "tbb_profiling.h"
-#include "internal/_aggregator_impl.h"
-#include <vector>
-#include <iterator>
-#include <functional>
-#include __TBB_STD_SWAP_HEADER
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    #include <initializer_list>
-#endif
-
-#if __TBB_CPP11_IS_COPY_CONSTRUCTIBLE_PRESENT
-    #include <type_traits>
-#endif
-
-namespace tbb {
-namespace interface5 {
-namespace internal {
-#if __TBB_CPP11_IS_COPY_CONSTRUCTIBLE_PRESENT
-    template<typename T, bool C = std::is_copy_constructible<T>::value>
-    struct use_element_copy_constructor {
-        typedef tbb::internal::true_type type;
-    };
-    template<typename T>
-    struct use_element_copy_constructor <T,false> {
-        typedef tbb::internal::false_type type;
-    };
-#else
-    template<typename>
-    struct use_element_copy_constructor {
-        typedef tbb::internal::true_type type;
-    };
-#endif
-} // namespace internal
-
-using namespace tbb::internal;
-
-//! Concurrent priority queue
-template <typename T, typename Compare=std::less<T>, typename A=cache_aligned_allocator<T> >
-class concurrent_priority_queue {
- public:
-    //! Element type in the queue.
-    typedef T value_type;
-
-    //! Reference type
-    typedef T& reference;
-
-    //! Const reference type
-    typedef const T& const_reference;
-
-    //! Integral type for representing size of the queue.
-    typedef size_t size_type;
-
-    //! Difference type for iterator
-    typedef ptrdiff_t difference_type;
-
-    //! Allocator type
-    typedef A allocator_type;
-
-    //! Constructs a new concurrent_priority_queue with default capacity
-    explicit concurrent_priority_queue(const allocator_type& a = allocator_type()) : mark(0), my_size(0), data(a)
-    {
-        my_aggregator.initialize_handler(my_functor_t(this));
-    }
-
-    //! Constructs a new concurrent_priority_queue with init_sz capacity
-    explicit concurrent_priority_queue(size_type init_capacity, const allocator_type& a = allocator_type()) :
-        mark(0), my_size(0), data(a)
-    {
-        data.reserve(init_capacity);
-        my_aggregator.initialize_handler(my_functor_t(this));
-    }
-
-    //! [begin,end) constructor
-    template<typename InputIterator>
-    concurrent_priority_queue(InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) :
-        mark(0), data(begin, end, a)
-    {
-        my_aggregator.initialize_handler(my_functor_t(this));
-        heapify();
-        my_size = data.size();
-    }
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    //! Constructor from std::initializer_list
-    concurrent_priority_queue(std::initializer_list<T> init_list, const allocator_type &a = allocator_type()) :
-        mark(0),data(init_list.begin(), init_list.end(), a)
-    {
-        my_aggregator.initialize_handler(my_functor_t(this));
-        heapify();
-        my_size = data.size();
-    }
-#endif //# __TBB_INITIALIZER_LISTS_PRESENT
-
-    //! Copy constructor
-    /** This operation is unsafe if there are pending concurrent operations on the src queue. */
-    explicit concurrent_priority_queue(const concurrent_priority_queue& src) : mark(src.mark),
-        my_size(src.my_size), data(src.data.begin(), src.data.end(), src.data.get_allocator())
-    {
-        my_aggregator.initialize_handler(my_functor_t(this));
-        heapify();
-    }
-
-    //! Copy constructor with specific allocator
-    /** This operation is unsafe if there are pending concurrent operations on the src queue. */
-    concurrent_priority_queue(const concurrent_priority_queue& src, const allocator_type& a) : mark(src.mark),
-        my_size(src.my_size), data(src.data.begin(), src.data.end(), a)
-    {
-        my_aggregator.initialize_handler(my_functor_t(this));
-        heapify();
-    }
-
-    //! Assignment operator
-    /** This operation is unsafe if there are pending concurrent operations on the src queue. */
-    concurrent_priority_queue& operator=(const concurrent_priority_queue& src) {
-        if (this != &src) {
-            vector_t(src.data.begin(), src.data.end(), src.data.get_allocator()).swap(data);
-            mark = src.mark;
-            my_size = src.my_size;
-        }
-        return *this;
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    //! Move constructor
-    /** This operation is unsafe if there are pending concurrent operations on the src queue. */
-    concurrent_priority_queue(concurrent_priority_queue&& src) : mark(src.mark),
-        my_size(src.my_size), data(std::move(src.data))
-    {
-        my_aggregator.initialize_handler(my_functor_t(this));
-    }
-
-    //! Move constructor with specific allocator
-    /** This operation is unsafe if there are pending concurrent operations on the src queue. */
-    concurrent_priority_queue(concurrent_priority_queue&& src, const allocator_type& a) : mark(src.mark),
-        my_size(src.my_size),
-#if __TBB_ALLOCATOR_TRAITS_PRESENT
-        data(std::move(src.data), a)
-#else
-    // Some early version of C++11 STL vector does not have a constructor of vector(vector&& , allocator).
-    // It seems that the reason is absence of support of allocator_traits (stateful allocators).
-        data(a)
-#endif //__TBB_ALLOCATOR_TRAITS_PRESENT
-    {
-        my_aggregator.initialize_handler(my_functor_t(this));
-#if !__TBB_ALLOCATOR_TRAITS_PRESENT
-        if (a != src.data.get_allocator()){
-            data.reserve(src.data.size());
-            data.assign(std::make_move_iterator(src.data.begin()), std::make_move_iterator(src.data.end()));
-        }else{
-            data = std::move(src.data);
-        }
-#endif //!__TBB_ALLOCATOR_TRAITS_PRESENT
-    }
-
-    //! Move assignment operator
-    /** This operation is unsafe if there are pending concurrent operations on the src queue. */
-    concurrent_priority_queue& operator=( concurrent_priority_queue&& src) {
-        if (this != &src) {
-            mark = src.mark;
-            my_size = src.my_size;
-#if !__TBB_ALLOCATOR_TRAITS_PRESENT
-            if (data.get_allocator() != src.data.get_allocator()){
-                vector_t(std::make_move_iterator(src.data.begin()), std::make_move_iterator(src.data.end()), data.get_allocator()).swap(data);
-            }else
-#endif //!__TBB_ALLOCATOR_TRAITS_PRESENT
-            {
-                data = std::move(src.data);
-            }
-        }
-        return *this;
-    }
-#endif //__TBB_CPP11_RVALUE_REF_PRESENT
-
-    //! Assign the queue from [begin,end) range, not thread-safe
-    template<typename InputIterator>
-    void assign(InputIterator begin, InputIterator end) {
-        vector_t(begin, end, data.get_allocator()).swap(data);
-        mark = 0;
-        my_size = data.size();
-        heapify();
-    }
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    //! Assign the queue from std::initializer_list, not thread-safe
-    void assign(std::initializer_list<T> il) { this->assign(il.begin(), il.end()); }
-
-    //! Assign from std::initializer_list, not thread-safe
-    concurrent_priority_queue& operator=(std::initializer_list<T> il) {
-        this->assign(il.begin(), il.end());
-        return *this;
-    }
-#endif //# __TBB_INITIALIZER_LISTS_PRESENT
-
-    //! Returns true if empty, false otherwise
-    /** Returned value may not reflect results of pending operations.
-        This operation reads shared data and will trigger a race condition. */
-    bool empty() const { return size()==0; }
-
-    //! Returns the current number of elements contained in the queue
-    /** Returned value may not reflect results of pending operations.
-        This operation reads shared data and will trigger a race condition. */
-    size_type size() const { return __TBB_load_with_acquire(my_size); }
-
-    //! Pushes elem onto the queue, increasing capacity of queue if necessary
-    /** This operation can be safely used concurrently with other push, try_pop or emplace operations. */
-    void push(const_reference elem) {
-#if __TBB_CPP11_IS_COPY_CONSTRUCTIBLE_PRESENT
-        __TBB_STATIC_ASSERT( std::is_copy_constructible<value_type>::value, "The type is not copy constructible. Copying push operation is impossible." );
-#endif
-        cpq_operation op_data(elem, PUSH_OP);
-        my_aggregator.execute(&op_data);
-        if (op_data.status == FAILED) // exception thrown
-            throw_exception(eid_bad_alloc);
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    //! Pushes elem onto the queue, increasing capacity of queue if necessary
-    /** This operation can be safely used concurrently with other push, try_pop or emplace operations. */
-    void push(value_type &&elem) {
-        cpq_operation op_data(elem, PUSH_RVALUE_OP);
-        my_aggregator.execute(&op_data);
-        if (op_data.status == FAILED) // exception thrown
-            throw_exception(eid_bad_alloc);
-    }
-
-#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-    //! Constructs a new element using args as the arguments for its construction and pushes it onto the queue */
-    /** This operation can be safely used concurrently with other push, try_pop or emplace operations. */
-    template<typename... Args>
-    void emplace(Args&&... args) {
-        push(value_type(std::forward<Args>(args)...));
-    }
-#endif /* __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT */
-#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
-
-    //! Gets a reference to and removes highest priority element
-    /** If a highest priority element was found, sets elem and returns true,
-        otherwise returns false.
-        This operation can be safely used concurrently with other push, try_pop or emplace operations. */
-    bool try_pop(reference elem) {
-        cpq_operation op_data(POP_OP);
-        op_data.elem = &elem;
-        my_aggregator.execute(&op_data);
-        return op_data.status==SUCCEEDED;
-    }
-
-    //! Clear the queue; not thread-safe
-    /** This operation is unsafe if there are pending concurrent operations on the queue.
-        Resets size, effectively emptying queue; does not free space.
-        May not clear elements added in pending operations. */
-    void clear() {
-        data.clear();
-        mark = 0;
-        my_size = 0;
-    }
-
-    //! Swap this queue with another; not thread-safe
-    /** This operation is unsafe if there are pending concurrent operations on the queue. */
-    void swap(concurrent_priority_queue& q) {
-        using std::swap;
-        data.swap(q.data);
-        swap(mark, q.mark);
-        swap(my_size, q.my_size);
-    }
-
-    //! Return allocator object
-    allocator_type get_allocator() const { return data.get_allocator(); }
-
- private:
-    enum operation_type {INVALID_OP, PUSH_OP, POP_OP, PUSH_RVALUE_OP};
-    enum operation_status { WAIT=0, SUCCEEDED, FAILED };
-
-    class cpq_operation : public aggregated_operation<cpq_operation> {
-     public:
-        operation_type type;
-        union {
-            value_type *elem;
-            size_type sz;
-        };
-        cpq_operation(const_reference e, operation_type t) :
-            type(t), elem(const_cast<value_type*>(&e)) {}
-        cpq_operation(operation_type t) : type(t) {}
-    };
-
-    class my_functor_t {
-        concurrent_priority_queue<T, Compare, A> *cpq;
-     public:
-        my_functor_t() {}
-        my_functor_t(concurrent_priority_queue<T, Compare, A> *cpq_) : cpq(cpq_) {}
-        void operator()(cpq_operation* op_list) {
-            cpq->handle_operations(op_list);
-        }
-    };
-
-    typedef tbb::internal::aggregator< my_functor_t, cpq_operation > aggregator_t;
-    aggregator_t my_aggregator;
-    //! Padding added to avoid false sharing
-    char padding1[NFS_MaxLineSize - sizeof(aggregator_t)];
-    //! The point at which unsorted elements begin
-    size_type mark;
-    __TBB_atomic size_type my_size;
-    Compare compare;
-    //! Padding added to avoid false sharing
-    char padding2[NFS_MaxLineSize - (2*sizeof(size_type)) - sizeof(Compare)];
-    //! Storage for the heap of elements in queue, plus unheapified elements
-    /** data has the following structure:
-
-         binary unheapified
-          heap   elements
-        ____|_______|____
-        |       |       |
-        v       v       v
-        [_|...|_|_|...|_| |...| ]
-         0       ^       ^       ^
-                 |       |       |__capacity
-                 |       |__my_size
-                 |__mark
-
-        Thus, data stores the binary heap starting at position 0 through
-        mark-1 (it may be empty).  Then there are 0 or more elements
-        that have not yet been inserted into the heap, in positions
-        mark through my_size-1. */
-    typedef std::vector<value_type, allocator_type> vector_t;
-    vector_t data;
-
-    void handle_operations(cpq_operation *op_list) {
-        cpq_operation *tmp, *pop_list=NULL;
-
-        __TBB_ASSERT(mark == data.size(), NULL);
-
-        // First pass processes all constant (amortized; reallocation may happen) time pushes and pops.
-        while (op_list) {
-            // ITT note: &(op_list->status) tag is used to cover accesses to op_list
-            // node. This thread is going to handle the operation, and so will acquire it
-            // and perform the associated operation w/o triggering a race condition; the
-            // thread that created the operation is waiting on the status field, so when
-            // this thread is done with the operation, it will perform a
-            // store_with_release to give control back to the waiting thread in
-            // aggregator::insert_operation.
-            call_itt_notify(acquired, &(op_list->status));
-            __TBB_ASSERT(op_list->type != INVALID_OP, NULL);
-            tmp = op_list;
-            op_list = itt_hide_load_word(op_list->next);
-            if (tmp->type == POP_OP) {
-                if (mark < data.size() &&
-                    compare(data[0], data[data.size()-1])) {
-                    // there are newly pushed elems and the last one
-                    // is higher than top
-                    *(tmp->elem) = tbb::internal::move(data[data.size()-1]);
-                    __TBB_store_with_release(my_size, my_size-1);
-                    itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
-                    data.pop_back();
-                    __TBB_ASSERT(mark<=data.size(), NULL);
-                }
-                else { // no convenient item to pop; postpone
-                    itt_hide_store_word(tmp->next, pop_list);
-                    pop_list = tmp;
-                }
-            } else { // PUSH_OP or PUSH_RVALUE_OP
-                __TBB_ASSERT(tmp->type == PUSH_OP || tmp->type == PUSH_RVALUE_OP, "Unknown operation" );
-                __TBB_TRY{
-                    if (tmp->type == PUSH_OP) {
-                        push_back_helper(*(tmp->elem), typename internal::use_element_copy_constructor<value_type>::type());
-                    } else {
-                        data.push_back(tbb::internal::move(*(tmp->elem)));
-                    }
-                    __TBB_store_with_release(my_size, my_size + 1);
-                    itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
-                } __TBB_CATCH(...) {
-                    itt_store_word_with_release(tmp->status, uintptr_t(FAILED));
-                }
-            }
-        }
-
-        // second pass processes pop operations
-        while (pop_list) {
-            tmp = pop_list;
-            pop_list = itt_hide_load_word(pop_list->next);
-            __TBB_ASSERT(tmp->type == POP_OP, NULL);
-            if (data.empty()) {
-                itt_store_word_with_release(tmp->status, uintptr_t(FAILED));
-            }
-            else {
-                __TBB_ASSERT(mark<=data.size(), NULL);
-                if (mark < data.size() &&
-                    compare(data[0], data[data.size()-1])) {
-                    // there are newly pushed elems and the last one is
-                    // higher than top
-                    *(tmp->elem) = tbb::internal::move(data[data.size()-1]);
-                    __TBB_store_with_release(my_size, my_size-1);
-                    itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
-                    data.pop_back();
-                }
-                else { // extract top and push last element down heap
-                    *(tmp->elem) = tbb::internal::move(data[0]);
-                    __TBB_store_with_release(my_size, my_size-1);
-                    itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
-                    reheap();
-                }
-            }
-        }
-
-        // heapify any leftover pushed elements before doing the next
-        // batch of operations
-        if (mark<data.size()) heapify();
-        __TBB_ASSERT(mark == data.size(), NULL);
-    }
-
-    //! Merge unsorted elements into heap
-    void heapify() {
-        if (!mark && data.size()>0) mark = 1;
-        for (; mark<data.size(); ++mark) {
-            // for each unheapified element under size
-            size_type cur_pos = mark;
-            value_type to_place = tbb::internal::move(data[mark]);
-            do { // push to_place up the heap
-                size_type parent = (cur_pos-1)>>1;
-                if (!compare(data[parent], to_place)) break;
-                data[cur_pos] = tbb::internal::move(data[parent]);
-                cur_pos = parent;
-            } while( cur_pos );
-            data[cur_pos] = tbb::internal::move(to_place);
-        }
-    }
-
-    //! Re-heapify after an extraction
-    /** Re-heapify by pushing last element down the heap from the root. */
-    void reheap() {
-        size_type cur_pos=0, child=1;
-
-        while (child < mark) {
-            size_type target = child;
-            if (child+1 < mark && compare(data[child], data[child+1]))
-                ++target;
-            // target now has the higher priority child
-            if (compare(data[target], data[data.size()-1])) break;
-            data[cur_pos] = tbb::internal::move(data[target]);
-            cur_pos = target;
-            child = (cur_pos<<1)+1;
-        }
-        if (cur_pos != data.size()-1)
-            data[cur_pos] = tbb::internal::move(data[data.size()-1]);
-        data.pop_back();
-        if (mark > data.size()) mark = data.size();
-    }
-
-    void push_back_helper(const T& t, tbb::internal::true_type) {
-        data.push_back(t);
-    }
-
-    void push_back_helper(const T&, tbb::internal::false_type) {
-        __TBB_ASSERT( false, "The type is not copy constructible. Copying push operation is impossible." );
-    }
-};
-
-} // namespace interface5
-
-using interface5::concurrent_priority_queue;
-
-} // namespace tbb
-
-#endif /* __TBB_concurrent_priority_queue_H */
diff --git a/lib/3rdParty/tbb/include/tbb/concurrent_queue.h b/lib/3rdParty/tbb/include/tbb/concurrent_queue.h
deleted file mode 100644
index 81db58a3..00000000
--- a/lib/3rdParty/tbb/include/tbb/concurrent_queue.h
+++ /dev/null
@@ -1,458 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_concurrent_queue_H
-#define __TBB_concurrent_queue_H
-
-#include "internal/_concurrent_queue_impl.h"
-
-namespace tbb {
-
-namespace strict_ppl {
-
-//! A high-performance thread-safe non-blocking concurrent queue.
-/** Multiple threads may each push and pop concurrently.
-    Assignment construction is not allowed.
-    @ingroup containers */
-template<typename T, typename A = cache_aligned_allocator<T> >
-class concurrent_queue: public internal::concurrent_queue_base_v3<T> {
-    template<typename Container, typename Value> friend class internal::concurrent_queue_iterator;
-
-    //! Allocator type
-    typedef typename A::template rebind<char>::other page_allocator_type;
-    page_allocator_type my_allocator;
-
-    //! Allocates a block of size n (bytes)
-    virtual void *allocate_block( size_t n ) __TBB_override {
-        void *b = reinterpret_cast<void*>(my_allocator.allocate( n ));
-        if( !b )
-            internal::throw_exception(internal::eid_bad_alloc);
-        return b;
-    }
-
-    //! Deallocates block created by allocate_block.
-    virtual void deallocate_block( void *b, size_t n ) __TBB_override {
-        my_allocator.deallocate( reinterpret_cast<char*>(b), n );
-    }
-
-    static void copy_construct_item(T* location, const void* src){
-        new (location) T(*static_cast<const T*>(src));
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    static void move_construct_item(T* location, const void* src) {
-        new (location) T( std::move(*static_cast<T*>(const_cast<void*>(src))) );
-    }
-#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
-public:
-    //! Element type in the queue.
-    typedef T value_type;
-
-    //! Reference type
-    typedef T& reference;
-
-    //! Const reference type
-    typedef const T& const_reference;
-
-    //! Integral type for representing size of the queue.
-    typedef size_t size_type;
-
-    //! Difference type for iterator
-    typedef ptrdiff_t difference_type;
-
-    //! Allocator type
-    typedef A allocator_type;
-
-    //! Construct empty queue
-    explicit concurrent_queue(const allocator_type& a = allocator_type()) :
-        my_allocator( a )
-    {
-    }
-
-    //! [begin,end) constructor
-    template<typename InputIterator>
-    concurrent_queue( InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) :
-        my_allocator( a )
-    {
-        for( ; begin != end; ++begin )
-            this->push(*begin);
-    }
-
-    //! Copy constructor
-    concurrent_queue( const concurrent_queue& src, const allocator_type& a = allocator_type()) :
-        internal::concurrent_queue_base_v3<T>(), my_allocator( a )
-    {
-        this->assign( src, copy_construct_item );
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    //! Move constructors
-    concurrent_queue( concurrent_queue&& src ) :
-        internal::concurrent_queue_base_v3<T>(), my_allocator( std::move(src.my_allocator) )
-    {
-        this->internal_swap( src );
-    }
-
-    concurrent_queue( concurrent_queue&& src, const allocator_type& a ) :
-        internal::concurrent_queue_base_v3<T>(), my_allocator( a )
-    {
-        // checking that memory allocated by one instance of allocator can be deallocated
-        // with another
-        if( my_allocator == src.my_allocator) {
-            this->internal_swap( src );
-        } else {
-            // allocators are different => performing per-element move
-            this->assign( src, move_construct_item );
-            src.clear();
-        }
-    }
-#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
-
-    //! Destroy queue
-    ~concurrent_queue();
-
-    //! Enqueue an item at tail of queue.
-    void push( const T& source ) {
-        this->internal_push( &source, copy_construct_item );
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    void push( T&& source ) {
-        this->internal_push( &source, move_construct_item );
-    }
-
-#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-    template<typename... Arguments>
-    void emplace( Arguments&&... args ) {
-        push( T(std::forward<Arguments>( args )...) );
-    }
-#endif //__TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
-
-    //! Attempt to dequeue an item from head of queue.
-    /** Does not wait for item to become available.
-        Returns true if successful; false otherwise. */
-    bool try_pop( T& result ) {
-        return this->internal_try_pop( &result );
-    }
-
-    //! Return the number of items in the queue; thread unsafe
-    size_type unsafe_size() const {return this->internal_size();}
-
-    //! Equivalent to size()==0.
-    bool empty() const {return this->internal_empty();}
-
-    //! Clear the queue. not thread-safe.
-    void clear() ;
-
-    //! Return allocator object
-    allocator_type get_allocator() const { return this->my_allocator; }
-
-    typedef internal::concurrent_queue_iterator<concurrent_queue,T> iterator;
-    typedef internal::concurrent_queue_iterator<concurrent_queue,const T> const_iterator;
-
-    //------------------------------------------------------------------------
-    // The iterators are intended only for debugging.  They are slow and not thread safe.
-    //------------------------------------------------------------------------
-    iterator unsafe_begin() {return iterator(*this);}
-    iterator unsafe_end() {return iterator();}
-    const_iterator unsafe_begin() const {return const_iterator(*this);}
-    const_iterator unsafe_end() const {return const_iterator();}
-} ;
-
-template<typename T, class A>
-concurrent_queue<T,A>::~concurrent_queue() {
-    clear();
-    this->internal_finish_clear();
-}
-
-template<typename T, class A>
-void concurrent_queue<T,A>::clear() {
-    T value;
-    while( !empty() ) try_pop(value);
-}
-
-} // namespace strict_ppl
-
-//! A high-performance thread-safe blocking concurrent bounded queue.
-/** This is the pre-PPL TBB concurrent queue which supports boundedness and blocking semantics.
-    Note that method names agree with the PPL-style concurrent queue.
-    Multiple threads may each push and pop concurrently.
-    Assignment construction is not allowed.
-    @ingroup containers */
-template<typename T, class A = cache_aligned_allocator<T> >
-class concurrent_bounded_queue: public internal::concurrent_queue_base_v8 {
-    template<typename Container, typename Value> friend class internal::concurrent_queue_iterator;
-
-    //! Allocator type
-    typedef typename A::template rebind<char>::other page_allocator_type;
-    page_allocator_type my_allocator;
-
-    typedef typename concurrent_queue_base_v3::padded_page<T> padded_page;
-    typedef typename concurrent_queue_base_v3::copy_specifics copy_specifics;
-
-    //! Class used to ensure exception-safety of method "pop"
-    class destroyer: internal::no_copy {
-        T& my_value;
-    public:
-        destroyer( T& value ) : my_value(value) {}
-        ~destroyer() {my_value.~T();}
-    };
-
-    T& get_ref( page& p, size_t index ) {
-        __TBB_ASSERT( index<items_per_page, NULL );
-        return (&static_cast<padded_page*>(static_cast<void*>(&p))->last)[index];
-    }
-
-    virtual void copy_item( page& dst, size_t index, const void* src ) __TBB_override {
-        new( &get_ref(dst,index) ) T(*static_cast<const T*>(src));
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    virtual void move_item( page& dst, size_t index, const void* src ) __TBB_override {
-        new( &get_ref(dst,index) ) T( std::move(*static_cast<T*>(const_cast<void*>(src))) );
-    }
-#else
-    virtual void move_item( page&, size_t, const void* ) __TBB_override {
-        __TBB_ASSERT( false, "Unreachable code" );
-    }
-#endif
-
-    virtual void copy_page_item( page& dst, size_t dindex, const page& src, size_t sindex ) __TBB_override {
-        new( &get_ref(dst,dindex) ) T( get_ref( const_cast<page&>(src), sindex ) );
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    virtual void move_page_item( page& dst, size_t dindex, const page& src, size_t sindex ) __TBB_override {
-        new( &get_ref(dst,dindex) ) T( std::move(get_ref( const_cast<page&>(src), sindex )) );
-    }
-#else
-    virtual void move_page_item( page&, size_t, const page&, size_t ) __TBB_override {
-        __TBB_ASSERT( false, "Unreachable code" );
-    }
-#endif
-
-    virtual void assign_and_destroy_item( void* dst, page& src, size_t index ) __TBB_override {
-        T& from = get_ref(src,index);
-        destroyer d(from);
-        *static_cast<T*>(dst) = tbb::internal::move( from );
-    }
-
-    virtual page *allocate_page() __TBB_override {
-        size_t n = sizeof(padded_page) + (items_per_page-1)*sizeof(T);
-        page *p = reinterpret_cast<page*>(my_allocator.allocate( n ));
-        if( !p )
-            internal::throw_exception(internal::eid_bad_alloc);
-        return p;
-    }
-
-    virtual void deallocate_page( page *p ) __TBB_override {
-        size_t n = sizeof(padded_page) + (items_per_page-1)*sizeof(T);
-        my_allocator.deallocate( reinterpret_cast<char*>(p), n );
-    }
-
-public:
-    //! Element type in the queue.
-    typedef T value_type;
-
-    //! Allocator type
-    typedef A allocator_type;
-
-    //! Reference type
-    typedef T& reference;
-
-    //! Const reference type
-    typedef const T& const_reference;
-
-    //! Integral type for representing size of the queue.
-    /** Note that the size_type is a signed integral type.
-        This is because the size can be negative if there are pending pops without corresponding pushes. */
-    typedef std::ptrdiff_t size_type;
-
-    //! Difference type for iterator
-    typedef std::ptrdiff_t difference_type;
-
-    //! Construct empty queue
-    explicit concurrent_bounded_queue(const allocator_type& a = allocator_type()) :
-        concurrent_queue_base_v8( sizeof(T) ), my_allocator( a )
-    {
-    }
-
-    //! Copy constructor
-    concurrent_bounded_queue( const concurrent_bounded_queue& src, const allocator_type& a = allocator_type())
-        : concurrent_queue_base_v8( sizeof(T) ), my_allocator( a )
-    {
-        assign( src );
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    //! Move constructors
-    concurrent_bounded_queue( concurrent_bounded_queue&& src )
-        : concurrent_queue_base_v8( sizeof(T) ), my_allocator( std::move(src.my_allocator) )
-    {
-        internal_swap( src );
-    }
-
-    concurrent_bounded_queue( concurrent_bounded_queue&& src, const allocator_type& a )
-        : concurrent_queue_base_v8( sizeof(T) ), my_allocator( a )
-    {
-        // checking that memory allocated by one instance of allocator can be deallocated
-        // with another
-        if( my_allocator == src.my_allocator) {
-            this->internal_swap( src );
-        } else {
-            // allocators are different => performing per-element move
-            this->move_content( src );
-            src.clear();
-        }
-    }
-#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
-
-    //! [begin,end) constructor
-    template<typename InputIterator>
-    concurrent_bounded_queue( InputIterator begin, InputIterator end,
-                              const allocator_type& a = allocator_type())
-        : concurrent_queue_base_v8( sizeof(T) ), my_allocator( a )
-    {
-        for( ; begin != end; ++begin )
-            internal_push_if_not_full(&*begin);
-    }
-
-    //! Destroy queue
-    ~concurrent_bounded_queue();
-
-    //! Enqueue an item at tail of queue.
-    void push( const T& source ) {
-        internal_push( &source );
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    //! Move an item at tail of queue.
-    void push( T&& source ) {
-        internal_push_move( &source );
-    }
-
-#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-    template<typename... Arguments>
-    void emplace( Arguments&&... args ) {
-        push( T(std::forward<Arguments>( args )...) );
-    }
-#endif /* __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT */
-#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
-
-    //! Dequeue item from head of queue.
-    /** Block until an item becomes available, and then dequeue it. */
-    void pop( T& destination ) {
-        internal_pop( &destination );
-    }
-
-#if TBB_USE_EXCEPTIONS
-    //! Abort all pending queue operations
-    void abort() {
-        internal_abort();
-    }
-#endif
-
-    //! Enqueue an item at tail of queue if queue is not already full.
-    /** Does not wait for queue to become not full.
-        Returns true if item is pushed; false if queue was already full. */
-    bool try_push( const T& source ) {
-        return internal_push_if_not_full( &source );
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    //! Move an item at tail of queue if queue is not already full.
-    /** Does not wait for queue to become not full.
-        Returns true if item is pushed; false if queue was already full. */
-    bool try_push( T&& source ) {
-        return internal_push_move_if_not_full( &source );
-    }
-#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-    template<typename... Arguments>
-    bool try_emplace( Arguments&&... args ) {
-        return try_push( T(std::forward<Arguments>( args )...) );
-    }
-#endif /* __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT */
-#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
-
-    //! Attempt to dequeue an item from head of queue.
-    /** Does not wait for item to become available.
-        Returns true if successful; false otherwise. */
-    bool try_pop( T& destination ) {
-        return internal_pop_if_present( &destination );
-    }
-
-    //! Return number of pushes minus number of pops.
-    /** Note that the result can be negative if there are pops waiting for the
-        corresponding pushes.  The result can also exceed capacity() if there
-        are push operations in flight. */
-    size_type size() const {return internal_size();}
-
-    //! Equivalent to size()<=0.
-    bool empty() const {return internal_empty();}
-
-    //! Maximum number of allowed elements
-    size_type capacity() const {
-        return my_capacity;
-    }
-
-    //! Set the capacity
-    /** Setting the capacity to 0 causes subsequent try_push operations to always fail,
-        and subsequent push operations to block forever. */
-    void set_capacity( size_type new_capacity ) {
-        internal_set_capacity( new_capacity, sizeof(T) );
-    }
-
-    //! return allocator object
-    allocator_type get_allocator() const { return this->my_allocator; }
-
-    //! clear the queue. not thread-safe.
-    void clear() ;
-
-    typedef internal::concurrent_queue_iterator<concurrent_bounded_queue,T> iterator;
-    typedef internal::concurrent_queue_iterator<concurrent_bounded_queue,const T> const_iterator;
-
-    //------------------------------------------------------------------------
-    // The iterators are intended only for debugging.  They are slow and not thread safe.
-    //------------------------------------------------------------------------
-    iterator unsafe_begin() {return iterator(*this);}
-    iterator unsafe_end() {return iterator();}
-    const_iterator unsafe_begin() const {return const_iterator(*this);}
-    const_iterator unsafe_end() const {return const_iterator();}
-
-};
-
-template<typename T, class A>
-concurrent_bounded_queue<T,A>::~concurrent_bounded_queue() {
-    clear();
-    internal_finish_clear();
-}
-
-template<typename T, class A>
-void concurrent_bounded_queue<T,A>::clear() {
-    T value;
-    while( try_pop(value) ) /*noop*/;
-}
-
-using strict_ppl::concurrent_queue;
-
-} // namespace tbb
-
-#endif /* __TBB_concurrent_queue_H */
diff --git a/lib/3rdParty/tbb/include/tbb/concurrent_unordered_map.h b/lib/3rdParty/tbb/include/tbb/concurrent_unordered_map.h
deleted file mode 100644
index c959a7ec..00000000
--- a/lib/3rdParty/tbb/include/tbb/concurrent_unordered_map.h
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-/* Container implementations in this header are based on PPL implementations
-   provided by Microsoft. */
-
-#ifndef __TBB_concurrent_unordered_map_H
-#define __TBB_concurrent_unordered_map_H
-
-#include "internal/_concurrent_unordered_impl.h"
-
-namespace tbb
-{
-
-namespace interface5 {
-
-// Template class for hash map traits
-template<typename Key, typename T, typename Hash_compare, typename Allocator, bool Allow_multimapping>
-class concurrent_unordered_map_traits
-{
-protected:
-    typedef std::pair<const Key, T> value_type;
-    typedef Key key_type;
-    typedef Hash_compare hash_compare;
-    typedef typename Allocator::template rebind<value_type>::other allocator_type;
-    enum { allow_multimapping = Allow_multimapping };
-
-    concurrent_unordered_map_traits() : my_hash_compare() {}
-    concurrent_unordered_map_traits(const hash_compare& hc) : my_hash_compare(hc) {}
-
-    template<class Type1, class Type2>
-    static const Key& get_key(const std::pair<Type1, Type2>& value) {
-        return (value.first);
-    }
-
-    hash_compare my_hash_compare; // the comparator predicate for keys
-};
-
-template <typename Key, typename T, typename Hasher = tbb::tbb_hash<Key>, typename Key_equality = std::equal_to<Key>,
-         typename Allocator = tbb::tbb_allocator<std::pair<const Key, T> > >
-class concurrent_unordered_map :
-    public internal::concurrent_unordered_base< concurrent_unordered_map_traits<Key, T,
-    internal::hash_compare<Key, Hasher, Key_equality>, Allocator, false> >
-{
-    // Base type definitions
-    typedef internal::hash_compare<Key, Hasher, Key_equality> hash_compare;
-    typedef concurrent_unordered_map_traits<Key, T, hash_compare, Allocator, false> traits_type;
-    typedef internal::concurrent_unordered_base< traits_type > base_type;
-#if __TBB_EXTRA_DEBUG
-public:
-#endif
-    using traits_type::allow_multimapping;
-public:
-    using base_type::end;
-    using base_type::find;
-    using base_type::insert;
-
-    // Type definitions
-    typedef Key key_type;
-    typedef typename base_type::value_type value_type;
-    typedef T mapped_type;
-    typedef Hasher hasher;
-    typedef Key_equality key_equal;
-    typedef hash_compare key_compare;
-
-    typedef typename base_type::allocator_type allocator_type;
-    typedef typename base_type::pointer pointer;
-    typedef typename base_type::const_pointer const_pointer;
-    typedef typename base_type::reference reference;
-    typedef typename base_type::const_reference const_reference;
-
-    typedef typename base_type::size_type size_type;
-    typedef typename base_type::difference_type difference_type;
-
-    typedef typename base_type::iterator iterator;
-    typedef typename base_type::const_iterator const_iterator;
-    typedef typename base_type::iterator local_iterator;
-    typedef typename base_type::const_iterator const_local_iterator;
-
-    // Construction/destruction/copying
-    explicit concurrent_unordered_map(size_type n_of_buckets = base_type::initial_bucket_number,
-        const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
-        const allocator_type& a = allocator_type())
-        : base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
-    {}
-
-    explicit concurrent_unordered_map(const Allocator& a) : base_type(base_type::initial_bucket_number, key_compare(), a)
-    {}
-
-    template <typename Iterator>
-    concurrent_unordered_map(Iterator first, Iterator last, size_type n_of_buckets = base_type::initial_bucket_number,
-        const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
-        const allocator_type& a = allocator_type())
-        : base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
-    {
-        insert(first, last);
-    }
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    //! Constructor from initializer_list
-    concurrent_unordered_map(std::initializer_list<value_type> il, size_type n_of_buckets = base_type::initial_bucket_number,
-        const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
-        const allocator_type& a = allocator_type())
-        : base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
-    {
-        this->insert(il.begin(),il.end());
-    }
-#endif //# __TBB_INITIALIZER_LISTS_PRESENT
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-#if !__TBB_IMPLICIT_MOVE_PRESENT
-    concurrent_unordered_map(const concurrent_unordered_map& table)
-        : base_type(table)
-    {}
-
-    concurrent_unordered_map& operator=(const concurrent_unordered_map& table)
-    {
-        return static_cast<concurrent_unordered_map&>(base_type::operator=(table));
-    }
-
-    concurrent_unordered_map(concurrent_unordered_map&& table)
-        : base_type(std::move(table))
-    {}
-
-    concurrent_unordered_map& operator=(concurrent_unordered_map&& table)
-    {
-        return static_cast<concurrent_unordered_map&>(base_type::operator=(std::move(table)));
-    }
-#endif //!__TBB_IMPLICIT_MOVE_PRESENT
-
-    concurrent_unordered_map(concurrent_unordered_map&& table, const Allocator& a) : base_type(std::move(table), a)
-    {}
-#endif //__TBB_CPP11_RVALUE_REF_PRESENT
-
-    concurrent_unordered_map(const concurrent_unordered_map& table, const Allocator& a)
-        : base_type(table, a)
-    {}
-
-    // Observers
-    mapped_type& operator[](const key_type& key)
-    {
-        iterator where = find(key);
-
-        if (where == end())
-        {
-            where = insert(std::pair<key_type, mapped_type>(key, mapped_type())).first;
-        }
-
-        return ((*where).second);
-    }
-
-    mapped_type& at(const key_type& key)
-    {
-        iterator where = find(key);
-
-        if (where == end())
-        {
-            tbb::internal::throw_exception(tbb::internal::eid_invalid_key);
-        }
-
-        return ((*where).second);
-    }
-
-    const mapped_type& at(const key_type& key) const
-    {
-        const_iterator where = find(key);
-
-        if (where == end())
-        {
-            tbb::internal::throw_exception(tbb::internal::eid_invalid_key);
-        }
-
-        return ((*where).second);
-    }
-};
-
-template < typename Key, typename T, typename Hasher = tbb::tbb_hash<Key>, typename Key_equality = std::equal_to<Key>,
-        typename Allocator = tbb::tbb_allocator<std::pair<const Key, T> > >
-class concurrent_unordered_multimap :
-    public internal::concurrent_unordered_base< concurrent_unordered_map_traits< Key, T,
-    internal::hash_compare<Key, Hasher, Key_equality>, Allocator, true> >
-{
-    // Base type definitions
-    typedef internal::hash_compare<Key, Hasher, Key_equality> hash_compare;
-    typedef concurrent_unordered_map_traits<Key, T, hash_compare, Allocator, true> traits_type;
-    typedef internal::concurrent_unordered_base<traits_type> base_type;
-#if __TBB_EXTRA_DEBUG
-public:
-#endif
-    using traits_type::allow_multimapping;
-public:
-    using base_type::insert;
-
-    // Type definitions
-    typedef Key key_type;
-    typedef typename base_type::value_type value_type;
-    typedef T mapped_type;
-    typedef Hasher hasher;
-    typedef Key_equality key_equal;
-    typedef hash_compare key_compare;
-
-    typedef typename base_type::allocator_type allocator_type;
-    typedef typename base_type::pointer pointer;
-    typedef typename base_type::const_pointer const_pointer;
-    typedef typename base_type::reference reference;
-    typedef typename base_type::const_reference const_reference;
-
-    typedef typename base_type::size_type size_type;
-    typedef typename base_type::difference_type difference_type;
-
-    typedef typename base_type::iterator iterator;
-    typedef typename base_type::const_iterator const_iterator;
-    typedef typename base_type::iterator local_iterator;
-    typedef typename base_type::const_iterator const_local_iterator;
-
-    // Construction/destruction/copying
-    explicit concurrent_unordered_multimap(size_type n_of_buckets = base_type::initial_bucket_number,
-        const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
-        const allocator_type& a = allocator_type())
-        : base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
-    {}
-
-    explicit concurrent_unordered_multimap(const Allocator& a) : base_type(base_type::initial_bucket_number, key_compare(), a)
-    {}
-
-    template <typename Iterator>
-    concurrent_unordered_multimap(Iterator first, Iterator last, size_type n_of_buckets = base_type::initial_bucket_number,
-        const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
-        const allocator_type& a = allocator_type())
-        : base_type(n_of_buckets,key_compare(_Hasher,_Key_equality), a)
-    {
-        insert(first, last);
-    }
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    //! Constructor from initializer_list
-    concurrent_unordered_multimap(std::initializer_list<value_type> il, size_type n_of_buckets = base_type::initial_bucket_number,
-        const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
-        const allocator_type& a = allocator_type())
-        : base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
-    {
-        this->insert(il.begin(),il.end());
-    }
-#endif //# __TBB_INITIALIZER_LISTS_PRESENT
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-#if !__TBB_IMPLICIT_MOVE_PRESENT
-    concurrent_unordered_multimap(const concurrent_unordered_multimap& table)
-        : base_type(table)
-    {}
-
-    concurrent_unordered_multimap& operator=(const concurrent_unordered_multimap& table)
-    {
-        return static_cast<concurrent_unordered_multimap&>(base_type::operator=(table));
-    }
-
-    concurrent_unordered_multimap(concurrent_unordered_multimap&& table)
-        : base_type(std::move(table))
-    {}
-
-    concurrent_unordered_multimap& operator=(concurrent_unordered_multimap&& table)
-    {
-        return static_cast<concurrent_unordered_multimap&>(base_type::operator=(std::move(table)));
-    }
-#endif //!__TBB_IMPLICIT_MOVE_PRESENT
-
-    concurrent_unordered_multimap(concurrent_unordered_multimap&& table, const Allocator& a) : base_type(std::move(table), a)
-    {}
-#endif //__TBB_CPP11_RVALUE_REF_PRESENT
-
-    concurrent_unordered_multimap(const concurrent_unordered_multimap& table, const Allocator& a)
-        : base_type(table, a)
-    {}
-};
-} // namespace interface5
-
-using interface5::concurrent_unordered_map;
-using interface5::concurrent_unordered_multimap;
-
-} // namespace tbb
-
-#endif// __TBB_concurrent_unordered_map_H
diff --git a/lib/3rdParty/tbb/include/tbb/concurrent_unordered_set.h b/lib/3rdParty/tbb/include/tbb/concurrent_unordered_set.h
deleted file mode 100644
index a26fee1a..00000000
--- a/lib/3rdParty/tbb/include/tbb/concurrent_unordered_set.h
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-/* Container implementations in this header are based on PPL implementations
-   provided by Microsoft. */
-
-#ifndef __TBB_concurrent_unordered_set_H
-#define __TBB_concurrent_unordered_set_H
-
-#include "internal/_concurrent_unordered_impl.h"
-
-namespace tbb
-{
-
-namespace interface5 {
-
-// Template class for hash set traits
-template<typename Key, typename Hash_compare, typename Allocator, bool Allow_multimapping>
-class concurrent_unordered_set_traits
-{
-protected:
-    typedef Key value_type;
-    typedef Key key_type;
-    typedef Hash_compare hash_compare;
-    typedef typename Allocator::template rebind<value_type>::other allocator_type;
-    enum { allow_multimapping = Allow_multimapping };
-
-    concurrent_unordered_set_traits() : my_hash_compare() {}
-    concurrent_unordered_set_traits(const hash_compare& hc) : my_hash_compare(hc) {}
-
-    static const Key& get_key(const value_type& value) {
-        return value;
-    }
-
-    hash_compare my_hash_compare; // the comparator predicate for keys
-};
-
-template <typename Key, typename Hasher = tbb::tbb_hash<Key>, typename Key_equality = std::equal_to<Key>, typename Allocator = tbb::tbb_allocator<Key> >
-class concurrent_unordered_set : public internal::concurrent_unordered_base< concurrent_unordered_set_traits<Key, internal::hash_compare<Key, Hasher, Key_equality>, Allocator, false> >
-{
-    // Base type definitions
-    typedef internal::hash_compare<Key, Hasher, Key_equality> hash_compare;
-    typedef concurrent_unordered_set_traits<Key, hash_compare, Allocator, false> traits_type;
-    typedef internal::concurrent_unordered_base< traits_type > base_type;
-#if __TBB_EXTRA_DEBUG
-public:
-#endif
-    using traits_type::allow_multimapping;
-public:
-    using base_type::insert;
-
-    // Type definitions
-    typedef Key key_type;
-    typedef typename base_type::value_type value_type;
-    typedef Key mapped_type;
-    typedef Hasher hasher;
-    typedef Key_equality key_equal;
-    typedef hash_compare key_compare;
-
-    typedef typename base_type::allocator_type allocator_type;
-    typedef typename base_type::pointer pointer;
-    typedef typename base_type::const_pointer const_pointer;
-    typedef typename base_type::reference reference;
-    typedef typename base_type::const_reference const_reference;
-
-    typedef typename base_type::size_type size_type;
-    typedef typename base_type::difference_type difference_type;
-
-    typedef typename base_type::iterator iterator;
-    typedef typename base_type::const_iterator const_iterator;
-    typedef typename base_type::iterator local_iterator;
-    typedef typename base_type::const_iterator const_local_iterator;
-
-    // Construction/destruction/copying
-    explicit concurrent_unordered_set(size_type n_of_buckets = base_type::initial_bucket_number, const hasher& a_hasher = hasher(),
-        const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
-        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
-    {}
-
-    explicit concurrent_unordered_set(const Allocator& a) : base_type(base_type::initial_bucket_number, key_compare(), a)
-    {}
-
-    template <typename Iterator>
-    concurrent_unordered_set(Iterator first, Iterator last, size_type n_of_buckets = base_type::initial_bucket_number, const hasher& a_hasher = hasher(),
-        const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
-        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
-    {
-        insert(first, last);
-    }
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    //! Constructor from initializer_list
-    concurrent_unordered_set(std::initializer_list<value_type> il, size_type n_of_buckets = base_type::initial_bucket_number, const hasher& a_hasher = hasher(),
-        const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
-        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
-    {
-        this->insert(il.begin(),il.end());
-    }
-#endif //# __TBB_INITIALIZER_LISTS_PRESENT
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-#if !__TBB_IMPLICIT_MOVE_PRESENT
-    concurrent_unordered_set(const concurrent_unordered_set& table)
-        : base_type(table)
-    {}
-
-    concurrent_unordered_set& operator=(const concurrent_unordered_set& table)
-    {
-        return static_cast<concurrent_unordered_set&>(base_type::operator=(table));
-    }
-
-    concurrent_unordered_set(concurrent_unordered_set&& table)
-        : base_type(std::move(table))
-    {}
-
-    concurrent_unordered_set& operator=(concurrent_unordered_set&& table)
-    {
-        return static_cast<concurrent_unordered_set&>(base_type::operator=(std::move(table)));
-    }
-#endif //!__TBB_IMPLICIT_MOVE_PRESENT
-
-    concurrent_unordered_set(concurrent_unordered_set&& table, const Allocator& a)
-        : base_type(std::move(table), a)
-    {}
-#endif //__TBB_CPP11_RVALUE_REF_PRESENT
-
-    concurrent_unordered_set(const concurrent_unordered_set& table, const Allocator& a)
-        : base_type(table, a)
-    {}
-
-};
-
-template <typename Key, typename Hasher = tbb::tbb_hash<Key>, typename Key_equality = std::equal_to<Key>,
-         typename Allocator = tbb::tbb_allocator<Key> >
-class concurrent_unordered_multiset :
-    public internal::concurrent_unordered_base< concurrent_unordered_set_traits<Key,
-    internal::hash_compare<Key, Hasher, Key_equality>, Allocator, true> >
-{
-    // Base type definitions
-    typedef internal::hash_compare<Key, Hasher, Key_equality> hash_compare;
-    typedef concurrent_unordered_set_traits<Key, hash_compare, Allocator, true> traits_type;
-    typedef internal::concurrent_unordered_base< traits_type > base_type;
-#if __TBB_EXTRA_DEBUG
-public:
-#endif
-    using traits_type::allow_multimapping;
-public:
-    using base_type::insert;
-
-    // Type definitions
-    typedef Key key_type;
-    typedef typename base_type::value_type value_type;
-    typedef Key mapped_type;
-    typedef Hasher hasher;
-    typedef Key_equality key_equal;
-    typedef hash_compare key_compare;
-
-    typedef typename base_type::allocator_type allocator_type;
-    typedef typename base_type::pointer pointer;
-    typedef typename base_type::const_pointer const_pointer;
-    typedef typename base_type::reference reference;
-    typedef typename base_type::const_reference const_reference;
-
-    typedef typename base_type::size_type size_type;
-    typedef typename base_type::difference_type difference_type;
-
-    typedef typename base_type::iterator iterator;
-    typedef typename base_type::const_iterator const_iterator;
-    typedef typename base_type::iterator local_iterator;
-    typedef typename base_type::const_iterator const_local_iterator;
-
-    // Construction/destruction/copying
-    explicit concurrent_unordered_multiset(size_type n_of_buckets = base_type::initial_bucket_number,
-        const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
-        const allocator_type& a = allocator_type())
-        : base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
-    {}
-
-    explicit concurrent_unordered_multiset(const Allocator& a) : base_type(base_type::initial_bucket_number, key_compare(), a)
-    {}
-
-    template <typename Iterator>
-    concurrent_unordered_multiset(Iterator first, Iterator last, size_type n_of_buckets = base_type::initial_bucket_number,
-        const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
-        const allocator_type& a = allocator_type())
-        : base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
-    {
-        insert(first, last);
-    }
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    //! Constructor from initializer_list
-    concurrent_unordered_multiset(std::initializer_list<value_type> il, size_type n_of_buckets = base_type::initial_bucket_number, const hasher& a_hasher = hasher(),
-        const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
-        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
-    {
-        this->insert(il.begin(),il.end());
-    }
-#endif //# __TBB_INITIALIZER_LISTS_PRESENT
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-#if !__TBB_IMPLICIT_MOVE_PRESENT
-    concurrent_unordered_multiset(const concurrent_unordered_multiset& table)
-        : base_type(table)
-    {}
-
-    concurrent_unordered_multiset& operator=(const concurrent_unordered_multiset& table)
-    {
-        return static_cast<concurrent_unordered_multiset&>(base_type::operator=(table));
-    }
-
-    concurrent_unordered_multiset(concurrent_unordered_multiset&& table)
-        : base_type(std::move(table))
-    {}
-
-    concurrent_unordered_multiset& operator=(concurrent_unordered_multiset&& table)
-    {
-        return static_cast<concurrent_unordered_multiset&>(base_type::operator=(std::move(table)));
-    }
-#endif //!__TBB_IMPLICIT_MOVE_PRESENT
-
-    concurrent_unordered_multiset(concurrent_unordered_multiset&& table, const Allocator& a)
-        : base_type(std::move(table), a)
-    {
-    }
-#endif //__TBB_CPP11_RVALUE_REF_PRESENT
-
-    concurrent_unordered_multiset(const concurrent_unordered_multiset& table, const Allocator& a)
-        : base_type(table, a)
-    {}
-};
-} // namespace interface5
-
-using interface5::concurrent_unordered_set;
-using interface5::concurrent_unordered_multiset;
-
-} // namespace tbb
-
-#endif// __TBB_concurrent_unordered_set_H
diff --git a/lib/3rdParty/tbb/include/tbb/concurrent_vector.h b/lib/3rdParty/tbb/include/tbb/concurrent_vector.h
deleted file mode 100644
index 370129ef..00000000
--- a/lib/3rdParty/tbb/include/tbb/concurrent_vector.h
+++ /dev/null
@@ -1,1374 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_concurrent_vector_H
-#define __TBB_concurrent_vector_H
-
-#include "tbb_stddef.h"
-#include "tbb_exception.h"
-#include "atomic.h"
-#include "cache_aligned_allocator.h"
-#include "blocked_range.h"
-#include "tbb_machine.h"
-#include "tbb_profiling.h"
-#include <new>
-#include <cstring>   // for memset()
-#include __TBB_STD_SWAP_HEADER
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
-    #pragma warning (push)
-    #pragma warning (disable: 4530)
-#endif
-
-#include <algorithm>
-#include <iterator>
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    #pragma warning (pop)
-#endif
-
-#if _MSC_VER==1500 && !__INTEL_COMPILER
-    // VS2008/VC9 seems to have an issue; limits pull in math.h
-    #pragma warning( push )
-    #pragma warning( disable: 4985 )
-#endif
-#include <limits> /* std::numeric_limits */
-#if _MSC_VER==1500 && !__INTEL_COMPILER
-    #pragma warning( pop )
-#endif
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    #include <initializer_list>
-#endif
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-    // Workaround for overzealous compiler warnings in /Wp64 mode
-    #pragma warning (push)
-#if defined(_Wp64)
-    #pragma warning (disable: 4267)
-#endif
-    #pragma warning (disable: 4127) //warning C4127: conditional expression is constant
-#endif
-
-namespace tbb {
-
-template<typename T, class A = cache_aligned_allocator<T> >
-class concurrent_vector;
-
-//! @cond INTERNAL
-namespace internal {
-
-    template<typename Container, typename Value>
-    class vector_iterator;
-
-    //! Bad allocation marker
-    static void *const vector_allocation_error_flag = reinterpret_cast<void*>(size_t(63));
-
-    //! Exception helper function
-    template<typename T>
-    void handle_unconstructed_elements(T* array, size_t n_of_elements){
-        std::memset( array, 0, n_of_elements * sizeof( T ) );
-    }
-
-    //! Base class of concurrent vector implementation.
-    /** @ingroup containers */
-    class concurrent_vector_base_v3 {
-    protected:
-
-        // Basic types declarations
-        typedef size_t segment_index_t;
-        typedef size_t size_type;
-
-        // Using enumerations due to Mac linking problems of static const variables
-        enum {
-            // Size constants
-            default_initial_segments = 1, // 2 initial items
-            //! Number of slots for segment pointers inside the class
-            pointers_per_short_table = 3, // to fit into 8 words of entire structure
-            pointers_per_long_table = sizeof(segment_index_t) * 8 // one segment per bit
-        };
-
-        struct segment_not_used {};
-        struct segment_allocated {};
-        struct segment_allocation_failed {};
-
-        class segment_t;
-        class segment_value_t {
-            void* array;
-        private:
-            //TODO: More elegant way to grant access to selected functions _only_?
-            friend class segment_t;
-            explicit segment_value_t(void* an_array):array(an_array) {}
-        public:
-            friend bool operator==(segment_value_t const& lhs, segment_not_used ) { return lhs.array == 0;}
-            friend bool operator==(segment_value_t const& lhs, segment_allocated) { return lhs.array > internal::vector_allocation_error_flag;}
-            friend bool operator==(segment_value_t const& lhs, segment_allocation_failed) { return lhs.array == internal::vector_allocation_error_flag;}
-            template<typename argument_type>
-            friend bool operator!=(segment_value_t const& lhs, argument_type arg) { return ! (lhs == arg);}
-
-            template<typename T>
-            T* pointer() const {  return static_cast<T*>(const_cast<void*>(array)); }
-        };
-
-        friend void enforce_segment_allocated(segment_value_t const& s, internal::exception_id exception = eid_bad_last_alloc){
-            if(s != segment_allocated()){
-                internal::throw_exception(exception);
-            }
-        }
-
-        // Segment pointer.
-        class segment_t {
-            atomic<void*> array;
-        public:
-            segment_t(){ store<relaxed>(segment_not_used());}
-            //Copy ctor and assignment operator are defined to ease using of stl algorithms.
-            //These algorithms usually not a synchronization point, so, semantic is
-            //intentionally relaxed here.
-            segment_t(segment_t const& rhs ){ array.store<relaxed>(rhs.array.load<relaxed>());}
-
-            void swap(segment_t & rhs ){
-                tbb::internal::swap<relaxed>(array, rhs.array);
-            }
-
-            segment_t& operator=(segment_t const& rhs ){
-                array.store<relaxed>(rhs.array.load<relaxed>());
-                return *this;
-            }
-
-            template<memory_semantics M>
-            segment_value_t load() const { return segment_value_t(array.load<M>());}
-
-            template<memory_semantics M>
-            void store(segment_not_used) {
-                array.store<M>(0);
-            }
-
-            template<memory_semantics M>
-            void store(segment_allocation_failed) {
-                __TBB_ASSERT(load<relaxed>() != segment_allocated(),"transition from \"allocated\" to \"allocation failed\" state looks non-logical");
-                array.store<M>(internal::vector_allocation_error_flag);
-            }
-
-            template<memory_semantics M>
-            void store(void* allocated_segment_pointer) __TBB_NOEXCEPT(true) {
-                __TBB_ASSERT(segment_value_t(allocated_segment_pointer) == segment_allocated(),
-                     "other overloads of store should be used for marking segment as not_used or allocation_failed" );
-                array.store<M>(allocated_segment_pointer);
-            }
-
-#if TBB_USE_ASSERT
-            ~segment_t() {
-                __TBB_ASSERT(load<relaxed>() != segment_allocated(), "should have been freed by clear" );
-            }
-#endif /* TBB_USE_ASSERT */
-        };
-        friend void swap(segment_t & , segment_t & ) __TBB_NOEXCEPT(true);
-
-        // Data fields
-
-        //! allocator function pointer
-        void* (*vector_allocator_ptr)(concurrent_vector_base_v3 &, size_t);
-
-        //! count of segments in the first block
-        atomic<size_type> my_first_block;
-
-        //! Requested size of vector
-        atomic<size_type> my_early_size;
-
-        //! Pointer to the segments table
-        atomic<segment_t*> my_segment;
-
-        //! embedded storage of segment pointers
-        segment_t my_storage[pointers_per_short_table];
-
-        // Methods
-
-        concurrent_vector_base_v3() {
-            //Here the semantic is intentionally relaxed.
-            //The reason this is next:
-            //Object that is in middle of construction (i.e. its constructor is not yet finished)
-            //cannot be used concurrently until the construction is finished.
-            //Thus to flag other threads that construction is finished, some synchronization with
-            //acquire-release semantic should be done by the (external) code that uses the vector.
-            //So, no need to do the synchronization inside the vector.
-
-            my_early_size.store<relaxed>(0);
-            my_first_block.store<relaxed>(0); // here is not default_initial_segments
-            my_segment.store<relaxed>(my_storage);
-        }
-
-        __TBB_EXPORTED_METHOD ~concurrent_vector_base_v3();
-
-        //these helpers methods use the fact that segments are allocated so
-        //that every segment size is a (increasing) power of 2.
-        //with one exception 0 segment has size of 2 as well segment 1;
-        //e.g. size of segment with index of 3 is 2^3=8;
-        static segment_index_t segment_index_of( size_type index ) {
-            return segment_index_t( __TBB_Log2( index|1 ) );
-        }
-
-        static segment_index_t segment_base( segment_index_t k ) {
-            return (segment_index_t(1)<<k & ~segment_index_t(1));
-        }
-
-        static inline segment_index_t segment_base_index_of( segment_index_t &index ) {
-            segment_index_t k = segment_index_of( index );
-            index -= segment_base(k);
-            return k;
-        }
-
-        static size_type segment_size( segment_index_t k ) {
-            return segment_index_t(1)<<k; // fake value for k==0
-        }
-
-
-        static bool is_first_element_in_segment(size_type element_index){
-            //check if element_index is a power of 2 that is at least 2.
-            //The idea is to detect if the iterator crosses a segment boundary,
-            //and 2 is the minimal index for which it's true
-            __TBB_ASSERT(element_index, "there should be no need to call "
-                                        "is_first_element_in_segment for 0th element" );
-            return is_power_of_two_at_least( element_index, 2 );
-        }
-
-        //! An operation on an n-element array starting at begin.
-        typedef void (__TBB_EXPORTED_FUNC *internal_array_op1)(void* begin, size_type n );
-
-        //! An operation on n-element destination array and n-element source array.
-        typedef void (__TBB_EXPORTED_FUNC *internal_array_op2)(void* dst, const void* src, size_type n );
-
-        //! Internal structure for compact()
-        struct internal_segments_table {
-            segment_index_t first_block;
-            segment_t table[pointers_per_long_table];
-        };
-
-        void __TBB_EXPORTED_METHOD internal_reserve( size_type n, size_type element_size, size_type max_size );
-        size_type __TBB_EXPORTED_METHOD internal_capacity() const;
-        void internal_grow( size_type start, size_type finish, size_type element_size, internal_array_op2 init, const void *src );
-        size_type __TBB_EXPORTED_METHOD internal_grow_by( size_type delta, size_type element_size, internal_array_op2 init, const void *src );
-        void* __TBB_EXPORTED_METHOD internal_push_back( size_type element_size, size_type& index );
-        segment_index_t __TBB_EXPORTED_METHOD internal_clear( internal_array_op1 destroy );
-        void* __TBB_EXPORTED_METHOD internal_compact( size_type element_size, void *table, internal_array_op1 destroy, internal_array_op2 copy );
-        void __TBB_EXPORTED_METHOD internal_copy( const concurrent_vector_base_v3& src, size_type element_size, internal_array_op2 copy );
-        void __TBB_EXPORTED_METHOD internal_assign( const concurrent_vector_base_v3& src, size_type element_size,
-                              internal_array_op1 destroy, internal_array_op2 assign, internal_array_op2 copy );
-        //! Obsolete
-        void __TBB_EXPORTED_METHOD internal_throw_exception(size_type) const;
-        void __TBB_EXPORTED_METHOD internal_swap(concurrent_vector_base_v3& v);
-
-        void __TBB_EXPORTED_METHOD internal_resize( size_type n, size_type element_size, size_type max_size, const void *src,
-                                                    internal_array_op1 destroy, internal_array_op2 init );
-        size_type __TBB_EXPORTED_METHOD internal_grow_to_at_least_with_result( size_type new_size, size_type element_size, internal_array_op2 init, const void *src );
-
-        //! Deprecated entry point for backwards compatibility to TBB 2.1.
-        void __TBB_EXPORTED_METHOD internal_grow_to_at_least( size_type new_size, size_type element_size, internal_array_op2 init, const void *src );
-private:
-        //! Private functionality
-        class helper;
-        friend class helper;
-
-        template<typename Container, typename Value>
-        friend class vector_iterator;
-
-    };
-
-    inline void swap(concurrent_vector_base_v3::segment_t & lhs, concurrent_vector_base_v3::segment_t & rhs) __TBB_NOEXCEPT(true) {
-        lhs.swap(rhs);
-    }
-
-    typedef concurrent_vector_base_v3 concurrent_vector_base;
-
-    //! Meets requirements of a forward iterator for STL and a Value for a blocked_range.*/
-    /** Value is either the T or const T type of the container.
-        @ingroup containers */
-    template<typename Container, typename Value>
-    class vector_iterator
-    {
-        //! concurrent_vector over which we are iterating.
-        Container* my_vector;
-
-        //! Index into the vector
-        size_t my_index;
-
-        //! Caches my_vector-&gt;internal_subscript(my_index)
-        /** NULL if cached value is not available */
-        mutable Value* my_item;
-
-        template<typename C, typename T>
-        friend vector_iterator<C,T> operator+( ptrdiff_t offset, const vector_iterator<C,T>& v );
-
-        template<typename C, typename T, typename U>
-        friend bool operator==( const vector_iterator<C,T>& i, const vector_iterator<C,U>& j );
-
-        template<typename C, typename T, typename U>
-        friend bool operator<( const vector_iterator<C,T>& i, const vector_iterator<C,U>& j );
-
-        template<typename C, typename T, typename U>
-        friend ptrdiff_t operator-( const vector_iterator<C,T>& i, const vector_iterator<C,U>& j );
-
-        template<typename C, typename U>
-        friend class internal::vector_iterator;
-
-#if !__TBB_TEMPLATE_FRIENDS_BROKEN
-        template<typename T, class A>
-        friend class tbb::concurrent_vector;
-#else
-public:
-#endif
-
-        vector_iterator( const Container& vector, size_t index, void *ptr = 0 ) :
-            my_vector(const_cast<Container*>(&vector)),
-            my_index(index),
-            my_item(static_cast<Value*>(ptr))
-        {}
-
-    public:
-        //! Default constructor
-        vector_iterator() : my_vector(NULL), my_index(~size_t(0)), my_item(NULL) {}
-
-        vector_iterator( const vector_iterator<Container,typename Container::value_type>& other ) :
-            my_vector(other.my_vector),
-            my_index(other.my_index),
-            my_item(other.my_item)
-        {}
-
-        vector_iterator operator+( ptrdiff_t offset ) const {
-            return vector_iterator( *my_vector, my_index+offset );
-        }
-        vector_iterator &operator+=( ptrdiff_t offset ) {
-            my_index+=offset;
-            my_item = NULL;
-            return *this;
-        }
-        vector_iterator operator-( ptrdiff_t offset ) const {
-            return vector_iterator( *my_vector, my_index-offset );
-        }
-        vector_iterator &operator-=( ptrdiff_t offset ) {
-            my_index-=offset;
-            my_item = NULL;
-            return *this;
-        }
-        Value& operator*() const {
-            Value* item = my_item;
-            if( !item ) {
-                item = my_item = &my_vector->internal_subscript(my_index);
-            }
-            __TBB_ASSERT( item==&my_vector->internal_subscript(my_index), "corrupt cache" );
-            return *item;
-        }
-        Value& operator[]( ptrdiff_t k ) const {
-            return my_vector->internal_subscript(my_index+k);
-        }
-        Value* operator->() const {return &operator*();}
-
-        //! Pre increment
-        vector_iterator& operator++() {
-            size_t element_index = ++my_index;
-            if( my_item ) {
-                //TODO: consider using of knowledge about "first_block optimization" here as well?
-                if( concurrent_vector_base::is_first_element_in_segment(element_index)) {
-                    //if the iterator crosses a segment boundary, the pointer become invalid
-                    //as possibly next segment is in another memory location
-                    my_item= NULL;
-                } else {
-                    ++my_item;
-                }
-            }
-            return *this;
-        }
-
-        //! Pre decrement
-        vector_iterator& operator--() {
-            __TBB_ASSERT( my_index>0, "operator--() applied to iterator already at beginning of concurrent_vector" );
-            size_t element_index = my_index--;
-            if( my_item ) {
-                if(concurrent_vector_base::is_first_element_in_segment(element_index)) {
-                    //if the iterator crosses a segment boundary, the pointer become invalid
-                    //as possibly next segment is in another memory location
-                    my_item= NULL;
-                } else {
-                    --my_item;
-                }
-            }
-            return *this;
-        }
-
-        //! Post increment
-        vector_iterator operator++(int) {
-            vector_iterator result = *this;
-            operator++();
-            return result;
-        }
-
-        //! Post decrement
-        vector_iterator operator--(int) {
-            vector_iterator result = *this;
-            operator--();
-            return result;
-        }
-
-        // STL support
-
-        typedef ptrdiff_t difference_type;
-        typedef Value value_type;
-        typedef Value* pointer;
-        typedef Value& reference;
-        typedef std::random_access_iterator_tag iterator_category;
-    };
-
-    template<typename Container, typename T>
-    vector_iterator<Container,T> operator+( ptrdiff_t offset, const vector_iterator<Container,T>& v ) {
-        return vector_iterator<Container,T>( *v.my_vector, v.my_index+offset );
-    }
-
-    template<typename Container, typename T, typename U>
-    bool operator==( const vector_iterator<Container,T>& i, const vector_iterator<Container,U>& j ) {
-        return i.my_index==j.my_index && i.my_vector == j.my_vector;
-    }
-
-    template<typename Container, typename T, typename U>
-    bool operator!=( const vector_iterator<Container,T>& i, const vector_iterator<Container,U>& j ) {
-        return !(i==j);
-    }
-
-    template<typename Container, typename T, typename U>
-    bool operator<( const vector_iterator<Container,T>& i, const vector_iterator<Container,U>& j ) {
-        return i.my_index<j.my_index;
-    }
-
-    template<typename Container, typename T, typename U>
-    bool operator>( const vector_iterator<Container,T>& i, const vector_iterator<Container,U>& j ) {
-        return j<i;
-    }
-
-    template<typename Container, typename T, typename U>
-    bool operator>=( const vector_iterator<Container,T>& i, const vector_iterator<Container,U>& j ) {
-        return !(i<j);
-    }
-
-    template<typename Container, typename T, typename U>
-    bool operator<=( const vector_iterator<Container,T>& i, const vector_iterator<Container,U>& j ) {
-        return !(j<i);
-    }
-
-    template<typename Container, typename T, typename U>
-    ptrdiff_t operator-( const vector_iterator<Container,T>& i, const vector_iterator<Container,U>& j ) {
-        return ptrdiff_t(i.my_index)-ptrdiff_t(j.my_index);
-    }
-
-    template<typename T, class A>
-    class allocator_base {
-    public:
-        typedef typename A::template
-            rebind<T>::other allocator_type;
-        allocator_type my_allocator;
-
-        allocator_base(const allocator_type &a = allocator_type() ) : my_allocator(a) {}
-
-    };
-
-} // namespace internal
-//! @endcond
-
-//! Concurrent vector container
-/** concurrent_vector is a container having the following main properties:
-    - It provides random indexed access to its elements. The index of the first element is 0.
-    - It ensures safe concurrent growing its size (different threads can safely append new elements).
-    - Adding new elements does not invalidate existing iterators and does not change indices of existing items.
-
-@par Compatibility
-    The class meets all Container Requirements and Reversible Container Requirements from
-    C++ Standard (See ISO/IEC 14882:2003(E), clause 23.1). But it doesn't meet
-    Sequence Requirements due to absence of insert() and erase() methods.
-
-@par Exception Safety
-    Methods working with memory allocation and/or new elements construction can throw an
-    exception if allocator fails to allocate memory or element's default constructor throws one.
-    Concurrent vector's element of type T must conform to the following requirements:
-    - Throwing an exception is forbidden for destructor of T.
-    - Default constructor of T must not throw an exception OR its non-virtual destructor must safely work when its object memory is zero-initialized.
-    .
-    Otherwise, the program's behavior is undefined.
-@par
-    If an exception happens inside growth or assignment operation, an instance of the vector becomes invalid unless it is stated otherwise in the method documentation.
-    Invalid state means:
-    - There are no guarantees that all items were initialized by a constructor. The rest of items is zero-filled, including item where exception happens.
-    - An invalid vector instance cannot be repaired; it is unable to grow anymore.
-    - Size and capacity reported by the vector are incorrect, and calculated as if the failed operation were successful.
-    - Attempt to access not allocated elements using operator[] or iterators results in access violation or segmentation fault exception, and in case of using at() method a C++ exception is thrown.
-    .
-    If a concurrent grow operation successfully completes, all the elements it has added to the vector will remain valid and accessible even if one of subsequent grow operations fails.
-
-@par Fragmentation
-    Unlike an STL vector, a concurrent_vector does not move existing elements if it needs
-    to allocate more memory. The container is divided into a series of contiguous arrays of
-    elements. The first reservation, growth, or assignment operation determines the size of
-    the first array. Using small number of elements as initial size incurs fragmentation that
-    may increase element access time. Internal layout can be optimized by method compact() that
-    merges several smaller arrays into one solid.
-
-@par Changes since TBB 2.1
-    - Fixed guarantees of concurrent_vector::size() and grow_to_at_least() methods to assure elements are allocated.
-    - Methods end()/rbegin()/back() are partly thread-safe since they use size() to get the end of vector
-    - Added resize() methods (not thread-safe)
-    - Added cbegin/cend/crbegin/crend methods
-    - Changed return type of methods grow* and push_back to iterator
-
-@par Changes since TBB 2.0
-    - Implemented exception-safety guarantees
-    - Added template argument for allocator
-    - Added allocator argument in constructors
-    - Faster index calculation
-    - First growth call specifies a number of segments to be merged in the first allocation.
-    - Fixed memory blow up for swarm of vector's instances of small size
-    - Added grow_by(size_type n, const_reference t) growth using copying constructor to init new items.
-    - Added STL-like constructors.
-    - Added operators ==, < and derivatives
-    - Added at() method, approved for using after an exception was thrown inside the vector
-    - Added get_allocator() method.
-    - Added assign() methods
-    - Added compact() method to defragment first segments
-    - Added swap() method
-    - range() defaults on grainsize = 1 supporting auto grainsize algorithms.
-
-    @ingroup containers */
-template<typename T, class A>
-class concurrent_vector: protected internal::allocator_base<T, A>,
-                         private internal::concurrent_vector_base {
-private:
-    template<typename I>
-    class generic_range_type: public blocked_range<I> {
-    public:
-        typedef T value_type;
-        typedef T& reference;
-        typedef const T& const_reference;
-        typedef I iterator;
-        typedef ptrdiff_t difference_type;
-        generic_range_type( I begin_, I end_, size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {}
-        template<typename U>
-        generic_range_type( const generic_range_type<U>& r) : blocked_range<I>(r.begin(),r.end(),r.grainsize()) {}
-        generic_range_type( generic_range_type& r, split ) : blocked_range<I>(r,split()) {}
-    };
-
-    template<typename C, typename U>
-    friend class internal::vector_iterator;
-
-public:
-    //------------------------------------------------------------------------
-    // STL compatible types
-    //------------------------------------------------------------------------
-    typedef internal::concurrent_vector_base_v3::size_type size_type;
-    typedef typename internal::allocator_base<T, A>::allocator_type allocator_type;
-
-    typedef T value_type;
-    typedef ptrdiff_t difference_type;
-    typedef T& reference;
-    typedef const T& const_reference;
-    typedef T *pointer;
-    typedef const T *const_pointer;
-
-    typedef internal::vector_iterator<concurrent_vector,T> iterator;
-    typedef internal::vector_iterator<concurrent_vector,const T> const_iterator;
-
-#if !defined(_MSC_VER) || _CPPLIB_VER>=300
-    // Assume ISO standard definition of std::reverse_iterator
-    typedef std::reverse_iterator<iterator> reverse_iterator;
-    typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
-#else
-    // Use non-standard std::reverse_iterator
-    typedef std::reverse_iterator<iterator,T,T&,T*> reverse_iterator;
-    typedef std::reverse_iterator<const_iterator,T,const T&,const T*> const_reverse_iterator;
-#endif /* defined(_MSC_VER) && (_MSC_VER<1300) */
-
-    //------------------------------------------------------------------------
-    // Parallel algorithm support
-    //------------------------------------------------------------------------
-    typedef generic_range_type<iterator> range_type;
-    typedef generic_range_type<const_iterator> const_range_type;
-
-    //------------------------------------------------------------------------
-    // STL compatible constructors & destructors
-    //------------------------------------------------------------------------
-
-    //! Construct empty vector.
-    explicit concurrent_vector(const allocator_type &a = allocator_type())
-        : internal::allocator_base<T, A>(a), internal::concurrent_vector_base()
-    {
-        vector_allocator_ptr = &internal_allocator;
-    }
-
-    //Constructors are not required to have synchronization
-    //(for more details see comment in the concurrent_vector_base constructor).
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    //! Constructor from initializer_list
-    concurrent_vector(std::initializer_list<T> init_list, const allocator_type &a = allocator_type())
-        : internal::allocator_base<T, A>(a), internal::concurrent_vector_base()
-    {
-        vector_allocator_ptr = &internal_allocator;
-        __TBB_TRY {
-            internal_assign_iterators(init_list.begin(), init_list.end());
-        } __TBB_CATCH(...) {
-            segment_t *table = my_segment.load<relaxed>();;
-            internal_free_segments( table, internal_clear(&destroy_array), my_first_block.load<relaxed>());
-            __TBB_RETHROW();
-        }
-
-    }
-#endif //# __TBB_INITIALIZER_LISTS_PRESENT
-
-    //! Copying constructor
-    concurrent_vector( const concurrent_vector& vector, const allocator_type& a = allocator_type() )
-        : internal::allocator_base<T, A>(a), internal::concurrent_vector_base()
-    {
-        vector_allocator_ptr = &internal_allocator;
-        __TBB_TRY {
-            internal_copy(vector, sizeof(T), &copy_array);
-        } __TBB_CATCH(...) {
-            segment_t *table = my_segment.load<relaxed>();
-            internal_free_segments( table, internal_clear(&destroy_array), my_first_block.load<relaxed>());
-            __TBB_RETHROW();
-        }
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    //! Move constructor
-    //TODO add __TBB_NOEXCEPT(true) and static_assert(std::has_nothrow_move_constructor<A>::value)
-    concurrent_vector( concurrent_vector&& source)
-        : internal::allocator_base<T, A>(std::move(source)), internal::concurrent_vector_base()
-    {
-        vector_allocator_ptr = &internal_allocator;
-        concurrent_vector_base_v3::internal_swap(source);
-    }
-
-    concurrent_vector( concurrent_vector&& source, const allocator_type& a)
-        : internal::allocator_base<T, A>(a), internal::concurrent_vector_base()
-    {
-        vector_allocator_ptr = &internal_allocator;
-        //C++ standard requires instances of an allocator being compared for equality,
-        //which means that memory allocated by one instance is possible to deallocate with the other one.
-        if (a == source.my_allocator) {
-            concurrent_vector_base_v3::internal_swap(source);
-        } else {
-            __TBB_TRY {
-                internal_copy(source, sizeof(T), &move_array);
-            } __TBB_CATCH(...) {
-                segment_t *table = my_segment.load<relaxed>();
-                internal_free_segments( table, internal_clear(&destroy_array), my_first_block.load<relaxed>());
-                __TBB_RETHROW();
-            }
-        }
-    }
-
-#endif
-
-    //! Copying constructor for vector with different allocator type
-    template<class M>
-    concurrent_vector( const concurrent_vector<T, M>& vector, const allocator_type& a = allocator_type() )
-        : internal::allocator_base<T, A>(a), internal::concurrent_vector_base()
-    {
-        vector_allocator_ptr = &internal_allocator;
-        __TBB_TRY {
-            internal_copy(vector.internal_vector_base(), sizeof(T), &copy_array);
-        } __TBB_CATCH(...) {
-            segment_t *table = my_segment.load<relaxed>();
-            internal_free_segments( table, internal_clear(&destroy_array), my_first_block.load<relaxed>() );
-            __TBB_RETHROW();
-        }
-    }
-
-    //! Construction with initial size specified by argument n
-    explicit concurrent_vector(size_type n)
-    {
-        vector_allocator_ptr = &internal_allocator;
-        __TBB_TRY {
-            internal_resize( n, sizeof(T), max_size(), NULL, &destroy_array, &initialize_array );
-        } __TBB_CATCH(...) {
-            segment_t *table = my_segment.load<relaxed>();
-            internal_free_segments( table, internal_clear(&destroy_array), my_first_block.load<relaxed>() );
-            __TBB_RETHROW();
-        }
-    }
-
-    //! Construction with initial size specified by argument n, initialization by copying of t, and given allocator instance
-    concurrent_vector(size_type n, const_reference t, const allocator_type& a = allocator_type())
-        : internal::allocator_base<T, A>(a)
-    {
-        vector_allocator_ptr = &internal_allocator;
-        __TBB_TRY {
-            internal_resize( n, sizeof(T), max_size(), static_cast<const void*>(&t), &destroy_array, &initialize_array_by );
-        } __TBB_CATCH(...) {
-            segment_t *table = my_segment.load<relaxed>();
-            internal_free_segments( table, internal_clear(&destroy_array), my_first_block.load<relaxed>() );
-            __TBB_RETHROW();
-        }
-    }
-
-    //! Construction with copying iteration range and given allocator instance
-    template<class I>
-    concurrent_vector(I first, I last, const allocator_type &a = allocator_type())
-        : internal::allocator_base<T, A>(a)
-    {
-        vector_allocator_ptr = &internal_allocator;
-        __TBB_TRY {
-            internal_assign_range(first, last, static_cast<is_integer_tag<std::numeric_limits<I>::is_integer> *>(0) );
-        } __TBB_CATCH(...) {
-            segment_t *table = my_segment.load<relaxed>();
-            internal_free_segments( table, internal_clear(&destroy_array), my_first_block.load<relaxed>() );
-            __TBB_RETHROW();
-        }
-    }
-
-    //! Assignment
-    concurrent_vector& operator=( const concurrent_vector& vector ) {
-        if( this != &vector )
-            internal_assign(vector, sizeof(T), &destroy_array, &assign_array, &copy_array);
-        return *this;
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    //TODO: add __TBB_NOEXCEPT()
-    //! Move assignment
-    concurrent_vector& operator=( concurrent_vector&& other ) {
-        __TBB_ASSERT(this != &other, "Move assignment to itself is prohibited ");
-        typedef typename tbb::internal::allocator_traits<A>::propagate_on_container_move_assignment pocma_t;
-        if(pocma_t::value || this->my_allocator == other.my_allocator) {
-            concurrent_vector trash (std::move(*this));
-            internal_swap(other);
-            if (pocma_t::value) {
-                this->my_allocator = std::move(other.my_allocator);
-            }
-        } else {
-            internal_assign(other, sizeof(T), &destroy_array, &move_assign_array, &move_array);
-        }
-        return *this;
-    }
-#endif
-    //TODO: add an template assignment operator? (i.e. with different element type)
-
-    //! Assignment for vector with different allocator type
-    template<class M>
-    concurrent_vector& operator=( const concurrent_vector<T, M>& vector ) {
-        if( static_cast<void*>( this ) != static_cast<const void*>( &vector ) )
-            internal_assign(vector.internal_vector_base(),
-                sizeof(T), &destroy_array, &assign_array, &copy_array);
-        return *this;
-    }
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    //! Assignment for initializer_list
-    concurrent_vector& operator=( std::initializer_list<T> init_list ) {
-        internal_clear(&destroy_array);
-        internal_assign_iterators(init_list.begin(), init_list.end());
-        return *this;
-    }
-#endif //#if __TBB_INITIALIZER_LISTS_PRESENT
-
-    //------------------------------------------------------------------------
-    // Concurrent operations
-    //------------------------------------------------------------------------
-    //! Grow by "delta" elements.
-    /** Returns iterator pointing to the first new element. */
-    iterator grow_by( size_type delta ) {
-        return iterator(*this, delta ? internal_grow_by( delta, sizeof(T), &initialize_array, NULL ) : my_early_size.load());
-    }
-
-    //! Grow by "delta" elements using copying constructor.
-    /** Returns iterator pointing to the first new element. */
-    iterator grow_by( size_type delta, const_reference t ) {
-        return iterator(*this, delta ? internal_grow_by( delta, sizeof(T), &initialize_array_by, static_cast<const void*>(&t) ) : my_early_size.load());
-    }
-
-    /** Returns iterator pointing to the first new element. */
-    template<typename I>
-    iterator grow_by( I first, I last ) {
-        typename std::iterator_traits<I>::difference_type delta = std::distance(first, last);
-        __TBB_ASSERT( delta >= 0, NULL);
-
-        return iterator(*this, delta ? internal_grow_by(delta, sizeof(T), &copy_range<I>, static_cast<const void*>(&first)) : my_early_size.load());
-    }
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    /** Returns iterator pointing to the first new element. */
-    iterator grow_by( std::initializer_list<T> init_list ) {
-        return grow_by( init_list.begin(), init_list.end() );
-    }
-#endif //#if __TBB_INITIALIZER_LISTS_PRESENT
-
-    //! Append minimal sequence of elements such that size()>=n.
-    /** The new elements are default constructed.  Blocks until all elements in range [0..n) are allocated.
-        May return while other elements are being constructed by other threads.
-        Returns iterator that points to beginning of appended sequence.
-        If no elements were appended, returns iterator pointing to nth element. */
-    iterator grow_to_at_least( size_type n ) {
-        size_type m=0;
-        if( n ) {
-            m = internal_grow_to_at_least_with_result( n, sizeof(T), &initialize_array, NULL );
-            if( m>n ) m=n;
-        }
-        return iterator(*this, m);
-    };
-
-    /** Analogous to grow_to_at_least( size_type n ) with exception that the new
-        elements are initialized by copying of t instead of default construction. */
-    iterator grow_to_at_least( size_type n, const_reference t ) {
-        size_type m=0;
-        if( n ) {
-            m = internal_grow_to_at_least_with_result( n, sizeof(T), &initialize_array_by, &t);
-            if( m>n ) m=n;
-        }
-        return iterator(*this, m);
-    };
-
-    //! Push item
-    /** Returns iterator pointing to the new element. */
-    iterator push_back( const_reference item )
-    {
-        push_back_helper prolog(*this);
-        new(prolog.internal_push_back_result()) T(item);
-        return prolog.return_iterator_and_dismiss();
-    }
-
-#if    __TBB_CPP11_RVALUE_REF_PRESENT
-    //! Push item, move-aware
-    /** Returns iterator pointing to the new element. */
-    iterator push_back(  T&& item )
-    {
-        push_back_helper prolog(*this);
-        new(prolog.internal_push_back_result()) T(std::move(item));
-        return prolog.return_iterator_and_dismiss();
-    }
-#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-    //! Push item, create item "in place" with provided arguments
-    /** Returns iterator pointing to the new element. */
-    template<typename... Args>
-    iterator emplace_back(  Args&&... args )
-    {
-        push_back_helper prolog(*this);
-        new(prolog.internal_push_back_result()) T(std::forward<Args>(args)...);
-        return prolog.return_iterator_and_dismiss();
-    }
-#endif //__TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-#endif //__TBB_CPP11_RVALUE_REF_PRESENT
-    //! Get reference to element at given index.
-    /** This method is thread-safe for concurrent reads, and also while growing the vector,
-        as long as the calling thread has checked that index < size(). */
-    reference operator[]( size_type index ) {
-        return internal_subscript(index);
-    }
-
-    //! Get const reference to element at given index.
-    const_reference operator[]( size_type index ) const {
-        return internal_subscript(index);
-    }
-
-    //! Get reference to element at given index. Throws exceptions on errors.
-    reference at( size_type index ) {
-        return internal_subscript_with_exceptions(index);
-    }
-
-    //! Get const reference to element at given index. Throws exceptions on errors.
-    const_reference at( size_type index ) const {
-        return internal_subscript_with_exceptions(index);
-    }
-
-    //! Get range for iterating with parallel algorithms
-    range_type range( size_t grainsize = 1 ) {
-        return range_type( begin(), end(), grainsize );
-    }
-
-    //! Get const range for iterating with parallel algorithms
-    const_range_type range( size_t grainsize = 1 ) const {
-        return const_range_type( begin(), end(), grainsize );
-    }
-
-    //------------------------------------------------------------------------
-    // Capacity
-    //------------------------------------------------------------------------
-    //! Return size of vector. It may include elements under construction
-    size_type size() const {
-        size_type sz = my_early_size, cp = internal_capacity();
-        return cp < sz ? cp : sz;
-    }
-
-    //! Return false if vector is not empty or has elements under construction at least.
-    bool empty() const {return !my_early_size;}
-
-    //! Maximum size to which array can grow without allocating more memory. Concurrent allocations are not included in the value.
-    size_type capacity() const {return internal_capacity();}
-
-    //! Allocate enough space to grow to size n without having to allocate more memory later.
-    /** Like most of the methods provided for STL compatibility, this method is *not* thread safe.
-        The capacity afterwards may be bigger than the requested reservation. */
-    void reserve( size_type n ) {
-        if( n )
-            internal_reserve(n, sizeof(T), max_size());
-    }
-
-    //! Resize the vector. Not thread-safe.
-    void resize( size_type n ) {
-        internal_resize( n, sizeof(T), max_size(), NULL, &destroy_array, &initialize_array );
-    }
-
-    //! Resize the vector, copy t for new elements. Not thread-safe.
-    void resize( size_type n, const_reference t ) {
-        internal_resize( n, sizeof(T), max_size(), static_cast<const void*>(&t), &destroy_array, &initialize_array_by );
-    }
-
-    //! Optimize memory usage and fragmentation.
-    void shrink_to_fit();
-
-    //! Upper bound on argument to reserve.
-    size_type max_size() const {return (~size_type(0))/sizeof(T);}
-
-    //------------------------------------------------------------------------
-    // STL support
-    //------------------------------------------------------------------------
-
-    //! start iterator
-    iterator begin() {return iterator(*this,0);}
-    //! end iterator
-    iterator end() {return iterator(*this,size());}
-    //! start const iterator
-    const_iterator begin() const {return const_iterator(*this,0);}
-    //! end const iterator
-    const_iterator end() const {return const_iterator(*this,size());}
-    //! start const iterator
-    const_iterator cbegin() const {return const_iterator(*this,0);}
-    //! end const iterator
-    const_iterator cend() const {return const_iterator(*this,size());}
-    //! reverse start iterator
-    reverse_iterator rbegin() {return reverse_iterator(end());}
-    //! reverse end iterator
-    reverse_iterator rend() {return reverse_iterator(begin());}
-    //! reverse start const iterator
-    const_reverse_iterator rbegin() const {return const_reverse_iterator(end());}
-    //! reverse end const iterator
-    const_reverse_iterator rend() const {return const_reverse_iterator(begin());}
-    //! reverse start const iterator
-    const_reverse_iterator crbegin() const {return const_reverse_iterator(end());}
-    //! reverse end const iterator
-    const_reverse_iterator crend() const {return const_reverse_iterator(begin());}
-    //! the first item
-    reference front() {
-        __TBB_ASSERT( size()>0, NULL);
-        const segment_value_t& segment_value = my_segment[0].template load<relaxed>();
-        return (segment_value.template pointer<T>())[0];
-    }
-    //! the first item const
-    const_reference front() const {
-        __TBB_ASSERT( size()>0, NULL);
-        const segment_value_t& segment_value = my_segment[0].template load<relaxed>();
-        return (segment_value.template pointer<const T>())[0];
-    }
-    //! the last item
-    reference back() {
-        __TBB_ASSERT( size()>0, NULL);
-        return internal_subscript( size()-1 );
-    }
-    //! the last item const
-    const_reference back() const {
-        __TBB_ASSERT( size()>0, NULL);
-        return internal_subscript( size()-1 );
-    }
-    //! return allocator object
-    allocator_type get_allocator() const { return this->my_allocator; }
-
-    //! assign n items by copying t item
-    void assign(size_type n, const_reference t) {
-        clear();
-        internal_resize( n, sizeof(T), max_size(), static_cast<const void*>(&t), &destroy_array, &initialize_array_by );
-    }
-
-    //! assign range [first, last)
-    template<class I>
-    void assign(I first, I last) {
-        clear(); internal_assign_range( first, last, static_cast<is_integer_tag<std::numeric_limits<I>::is_integer> *>(0) );
-    }
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    //! assigns an initializer list
-    void assign(std::initializer_list<T> init_list) {
-        clear(); internal_assign_iterators( init_list.begin(), init_list.end());
-    }
-#endif //# __TBB_INITIALIZER_LISTS_PRESENT
-
-    //! swap two instances
-    void swap(concurrent_vector &vector) {
-        using std::swap;
-        if( this != &vector ) {
-            concurrent_vector_base_v3::internal_swap(static_cast<concurrent_vector_base_v3&>(vector));
-            swap(this->my_allocator, vector.my_allocator);
-        }
-    }
-
-    //! Clear container while keeping memory allocated.
-    /** To free up the memory, use in conjunction with method compact(). Not thread safe **/
-    void clear() {
-        internal_clear(&destroy_array);
-    }
-
-    //! Clear and destroy vector.
-    ~concurrent_vector() {
-        segment_t *table = my_segment.load<relaxed>();
-        internal_free_segments( table, internal_clear(&destroy_array), my_first_block.load<relaxed>() );
-        // base class destructor call should be then
-    }
-
-    const internal::concurrent_vector_base_v3 &internal_vector_base() const { return *this; }
-private:
-    //! Allocate k items
-    static void *internal_allocator(internal::concurrent_vector_base_v3 &vb, size_t k) {
-        return static_cast<concurrent_vector<T, A>&>(vb).my_allocator.allocate(k);
-    }
-    //! Free k segments from table
-    void internal_free_segments(segment_t table[], segment_index_t k, segment_index_t first_block);
-
-    //! Get reference to element at given index.
-    T& internal_subscript( size_type index ) const;
-
-    //! Get reference to element at given index with errors checks
-    T& internal_subscript_with_exceptions( size_type index ) const;
-
-    //! assign n items by copying t
-    void internal_assign_n(size_type n, const_pointer p) {
-        internal_resize( n, sizeof(T), max_size(), static_cast<const void*>(p), &destroy_array, p? &initialize_array_by : &initialize_array );
-    }
-
-    //! helper class
-    template<bool B> class is_integer_tag;
-
-    //! assign integer items by copying when arguments are treated as iterators. See C++ Standard 2003 23.1.1p9
-    template<class I>
-    void internal_assign_range(I first, I last, is_integer_tag<true> *) {
-        internal_assign_n(static_cast<size_type>(first), &static_cast<T&>(last));
-    }
-    //! inline proxy assign by iterators
-    template<class I>
-    void internal_assign_range(I first, I last, is_integer_tag<false> *) {
-        internal_assign_iterators(first, last);
-    }
-    //! assign by iterators
-    template<class I>
-    void internal_assign_iterators(I first, I last);
-
-    //these functions are marked __TBB_EXPORTED_FUNC as they are called from within the library
-
-    //! Construct n instances of T, starting at "begin".
-    static void __TBB_EXPORTED_FUNC initialize_array( void* begin, const void*, size_type n );
-
-    //! Copy-construct n instances of T, starting at "begin".
-    static void __TBB_EXPORTED_FUNC initialize_array_by( void* begin, const void* src, size_type n );
-
-    //! Copy-construct n instances of T by copying single element pointed to by src, starting at "dst".
-    static void __TBB_EXPORTED_FUNC copy_array( void* dst, const void* src, size_type n );
-
-#if __TBB_MOVE_IF_NOEXCEPT_PRESENT
-    //! Either opy or move-construct n instances of T, starting at "dst" by copying according element of src array.
-    static void __TBB_EXPORTED_FUNC move_array_if_noexcept( void* dst, const void* src, size_type n );
-#endif //__TBB_MOVE_IF_NO_EXCEPT_PRESENT
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    //! Move-construct n instances of T, starting at "dst" by copying according element of src array.
-    static void __TBB_EXPORTED_FUNC move_array( void* dst, const void* src, size_type n );
-
-    //! Move-assign (using operator=) n instances of T, starting at "dst" by assigning according element of src array.
-    static void __TBB_EXPORTED_FUNC move_assign_array( void* dst, const void* src, size_type n );
-#endif
-    //! Copy-construct n instances of T, starting at "dst" by iterator range of [p_type_erased_iterator, p_type_erased_iterator+n).
-    template<typename Iterator>
-    static void __TBB_EXPORTED_FUNC copy_range( void* dst, const void* p_type_erased_iterator, size_type n );
-
-    //! Assign (using operator=) n instances of T, starting at "dst" by assigning according element of src array.
-    static void __TBB_EXPORTED_FUNC assign_array( void* dst, const void* src, size_type n );
-
-    //! Destroy n instances of T, starting at "begin".
-    static void __TBB_EXPORTED_FUNC destroy_array( void* begin, size_type n );
-
-    //! Exception-aware helper class for filling a segment by exception-danger operators of user class
-    class internal_loop_guide : internal::no_copy {
-    public:
-        const pointer array;
-        const size_type n;
-        size_type i;
-
-        static const T* as_const_pointer(const void *ptr) { return static_cast<const T *>(ptr); }
-        static T* as_pointer(const void *src) { return static_cast<T*>(const_cast<void *>(src)); }
-
-        internal_loop_guide(size_type ntrials, void *ptr)
-            : array(as_pointer(ptr)), n(ntrials), i(0) {}
-        void init() {   for(; i < n; ++i) new( &array[i] ) T(); }
-        void init(const void *src) { for(; i < n; ++i) new( &array[i] ) T(*as_const_pointer(src)); }
-        void copy(const void *src) { for(; i < n; ++i) new( &array[i] ) T(as_const_pointer(src)[i]); }
-        void assign(const void *src) { for(; i < n; ++i) array[i] = as_const_pointer(src)[i]; }
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-        void move_assign(const void *src)       { for(; i < n; ++i) array[i]         =  std::move(as_pointer(src)[i]);   }
-        void move_construct(const void *src)    { for(; i < n; ++i) new( &array[i] ) T( std::move(as_pointer(src)[i]) ); }
-#endif
-#if __TBB_MOVE_IF_NOEXCEPT_PRESENT
-        void move_construct_if_noexcept(const void *src)    { for(; i < n; ++i) new( &array[i] ) T( std::move_if_noexcept(as_pointer(src)[i]) ); }
-#endif //__TBB_MOVE_IF_NOEXCEPT_PRESENT
-
-        //TODO: rename to construct_range
-        template<class I> void iterate(I &src) { for(; i < n; ++i, ++src) new( &array[i] ) T( *src ); }
-        ~internal_loop_guide() {
-            if(i < n) {// if an exception was raised, fill the rest of items with zeros
-                internal::handle_unconstructed_elements(array+i, n-i);
-            }
-        }
-    };
-
-    struct push_back_helper : internal::no_copy{
-        struct element_construction_guard : internal::no_copy{
-            pointer element;
-
-            element_construction_guard(pointer an_element) : element (an_element){}
-            void dismiss(){ element = NULL; }
-            ~element_construction_guard(){
-                if (element){
-                    internal::handle_unconstructed_elements(element, 1);
-                }
-            }
-        };
-
-        concurrent_vector & v;
-        size_type k;
-        element_construction_guard g;
-
-        push_back_helper(concurrent_vector & vector) :
-            v(vector),
-            g (static_cast<T*>(v.internal_push_back(sizeof(T),k)))
-        {}
-
-        pointer internal_push_back_result(){ return g.element;}
-        iterator return_iterator_and_dismiss(){
-            pointer ptr = g.element;
-            g.dismiss();
-            return iterator(v, k, ptr);
-        }
-    };
-};
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-#pragma warning (push)
-#pragma warning (disable: 4701) // potentially uninitialized local variable "old"
-#endif
-template<typename T, class A>
-void concurrent_vector<T, A>::shrink_to_fit() {
-    internal_segments_table old;
-    __TBB_TRY {
-        internal_array_op2 copy_or_move_array =
-#if __TBB_MOVE_IF_NOEXCEPT_PRESENT
-                &move_array_if_noexcept
-#else
-                &copy_array
-#endif
-        ;
-        if( internal_compact( sizeof(T), &old, &destroy_array, copy_or_move_array ) )
-            internal_free_segments( old.table, pointers_per_long_table, old.first_block ); // free joined and unnecessary segments
-    } __TBB_CATCH(...) {
-        if( old.first_block ) // free segment allocated for compacting. Only for support of exceptions in ctor of user T[ype]
-            internal_free_segments( old.table, 1, old.first_block );
-        __TBB_RETHROW();
-    }
-}
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-#pragma warning (pop)
-#endif // warning 4701 is back
-
-template<typename T, class A>
-void concurrent_vector<T, A>::internal_free_segments(segment_t table[], segment_index_t k, segment_index_t first_block) {
-    // Free the arrays
-    while( k > first_block ) {
-        --k;
-        segment_value_t segment_value = table[k].load<relaxed>();
-        table[k].store<relaxed>(segment_not_used());
-        if( segment_value == segment_allocated() ) // check for correct segment pointer
-            this->my_allocator.deallocate( (segment_value.pointer<T>()), segment_size(k) );
-    }
-    segment_value_t segment_value = table[0].load<relaxed>();
-    if( segment_value == segment_allocated() ) {
-        __TBB_ASSERT( first_block > 0, NULL );
-        while(k > 0) table[--k].store<relaxed>(segment_not_used());
-        this->my_allocator.deallocate( (segment_value.pointer<T>()), segment_size(first_block) );
-    }
-}
-
-template<typename T, class A>
-T& concurrent_vector<T, A>::internal_subscript( size_type index ) const {
-    //TODO: unify both versions of internal_subscript
-    __TBB_ASSERT( index < my_early_size, "index out of bounds" );
-    size_type j = index;
-    segment_index_t k = segment_base_index_of( j );
-    __TBB_ASSERT( my_segment.load<acquire>() != my_storage || k < pointers_per_short_table, "index is being allocated" );
-    //no need in load with acquire (load<acquire>) since thread works in own space or gets
-    //the information about added elements via some form of external synchronization
-    //TODO: why not make a load of my_segment relaxed as well ?
-    //TODO: add an assertion that my_segment[k] is properly aligned to please ITT
-    segment_value_t segment_value =  my_segment[k].template load<relaxed>();
-    __TBB_ASSERT( segment_value != segment_allocation_failed(), "the instance is broken by bad allocation. Use at() instead" );
-    __TBB_ASSERT( segment_value != segment_not_used(), "index is being allocated" );
-    return (( segment_value.pointer<T>()))[j];
-}
-
-template<typename T, class A>
-T& concurrent_vector<T, A>::internal_subscript_with_exceptions( size_type index ) const {
-    if( index >= my_early_size )
-        internal::throw_exception(internal::eid_out_of_range); // throw std::out_of_range
-    size_type j = index;
-    segment_index_t k = segment_base_index_of( j );
-    //TODO: refactor this condition into separate helper function, e.g. fits_into_small_table
-    if( my_segment.load<acquire>() == my_storage && k >= pointers_per_short_table )
-        internal::throw_exception(internal::eid_segment_range_error); // throw std::range_error
-    // no need in load with acquire (load<acquire>) since thread works in own space or gets
-    //the information about added elements via some form of external synchronization
-    //TODO: why not make a load of my_segment relaxed as well ?
-    //TODO: add an assertion that my_segment[k] is properly aligned to please ITT
-    segment_value_t segment_value =  my_segment[k].template load<relaxed>();
-    enforce_segment_allocated(segment_value, internal::eid_index_range_error);
-    return (segment_value.pointer<T>())[j];
-}
-
-template<typename T, class A> template<class I>
-void concurrent_vector<T, A>::internal_assign_iterators(I first, I last) {
-    __TBB_ASSERT(my_early_size == 0, NULL);
-    size_type n = std::distance(first, last);
-    if( !n ) return;
-    internal_reserve(n, sizeof(T), max_size());
-    my_early_size = n;
-    segment_index_t k = 0;
-    //TODO: unify segment iteration code with concurrent_base_v3::helper
-    size_type sz = segment_size( my_first_block );
-    while( sz < n ) {
-        internal_loop_guide loop(sz, my_segment[k].template load<relaxed>().template pointer<void>());
-        loop.iterate(first);
-        n -= sz;
-        if( !k ) k = my_first_block;
-        else { ++k; sz <<= 1; }
-    }
-    internal_loop_guide loop(n, my_segment[k].template load<relaxed>().template pointer<void>());
-    loop.iterate(first);
-}
-
-template<typename T, class A>
-void concurrent_vector<T, A>::initialize_array( void* begin, const void *, size_type n ) {
-    internal_loop_guide loop(n, begin); loop.init();
-}
-
-template<typename T, class A>
-void concurrent_vector<T, A>::initialize_array_by( void* begin, const void *src, size_type n ) {
-    internal_loop_guide loop(n, begin); loop.init(src);
-}
-
-template<typename T, class A>
-void concurrent_vector<T, A>::copy_array( void* dst, const void* src, size_type n ) {
-    internal_loop_guide loop(n, dst); loop.copy(src);
-}
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-template<typename T, class A>
-void concurrent_vector<T, A>::move_array( void* dst, const void* src, size_type n ) {
-    internal_loop_guide loop(n, dst); loop.move_construct(src);
-}
-template<typename T, class A>
-void concurrent_vector<T, A>::move_assign_array( void* dst, const void* src, size_type n ) {
-    internal_loop_guide loop(n, dst); loop.move_assign(src);
-}
-#endif
-
-#if __TBB_MOVE_IF_NOEXCEPT_PRESENT
-template<typename T, class A>
-void concurrent_vector<T, A>::move_array_if_noexcept( void* dst, const void* src, size_type n ) {
-    internal_loop_guide loop(n, dst); loop.move_construct_if_noexcept(src);
-}
-#endif //__TBB_MOVE_IF_NOEXCEPT_PRESENT
-
-template<typename T, class A>
-template<typename I>
-void concurrent_vector<T, A>::copy_range( void* dst, const void* p_type_erased_iterator, size_type n ){
-    I & iterator ((*const_cast<I*>(static_cast<const I*>(p_type_erased_iterator))));
-    internal_loop_guide loop(n, dst); loop.iterate(iterator);
-}
-
-template<typename T, class A>
-void concurrent_vector<T, A>::assign_array( void* dst, const void* src, size_type n ) {
-    internal_loop_guide loop(n, dst); loop.assign(src);
-}
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-    // Workaround for overzealous compiler warning
-    #pragma warning (push)
-    #pragma warning (disable: 4189)
-#endif
-template<typename T, class A>
-void concurrent_vector<T, A>::destroy_array( void* begin, size_type n ) {
-    T* array = static_cast<T*>(begin);
-    for( size_type j=n; j>0; --j )
-        array[j-1].~T(); // destructors are supposed to not throw any exceptions
-}
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-    #pragma warning (pop)
-#endif // warning 4189 is back
-
-// concurrent_vector's template functions
-template<typename T, class A1, class A2>
-inline bool operator==(const concurrent_vector<T, A1> &a, const concurrent_vector<T, A2> &b) {
-    //TODO: call size() only once per vector (in operator==)
-    // Simply:    return a.size() == b.size() && std::equal(a.begin(), a.end(), b.begin());
-    if(a.size() != b.size()) return false;
-    typename concurrent_vector<T, A1>::const_iterator i(a.begin());
-    typename concurrent_vector<T, A2>::const_iterator j(b.begin());
-    for(; i != a.end(); ++i, ++j)
-        if( !(*i == *j) ) return false;
-    return true;
-}
-
-template<typename T, class A1, class A2>
-inline bool operator!=(const concurrent_vector<T, A1> &a, const concurrent_vector<T, A2> &b)
-{    return !(a == b); }
-
-template<typename T, class A1, class A2>
-inline bool operator<(const concurrent_vector<T, A1> &a, const concurrent_vector<T, A2> &b)
-{    return (std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end())); }
-
-template<typename T, class A1, class A2>
-inline bool operator>(const concurrent_vector<T, A1> &a, const concurrent_vector<T, A2> &b)
-{    return b < a; }
-
-template<typename T, class A1, class A2>
-inline bool operator<=(const concurrent_vector<T, A1> &a, const concurrent_vector<T, A2> &b)
-{    return !(b < a); }
-
-template<typename T, class A1, class A2>
-inline bool operator>=(const concurrent_vector<T, A1> &a, const concurrent_vector<T, A2> &b)
-{    return !(a < b); }
-
-template<typename T, class A>
-inline void swap(concurrent_vector<T, A> &a, concurrent_vector<T, A> &b)
-{    a.swap( b ); }
-
-} // namespace tbb
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-    #pragma warning (pop)
-#endif // warning 4267,4127 are back
-
-#endif /* __TBB_concurrent_vector_H */
diff --git a/lib/3rdParty/tbb/include/tbb/critical_section.h b/lib/3rdParty/tbb/include/tbb/critical_section.h
deleted file mode 100644
index 324b3e13..00000000
--- a/lib/3rdParty/tbb/include/tbb/critical_section.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef _TBB_CRITICAL_SECTION_H_
-#define _TBB_CRITICAL_SECTION_H_
-
-#if _WIN32||_WIN64
-#include "machine/windows_api.h"
-#else
-#include <pthread.h>
-#include <errno.h>
-#endif  // _WIN32||WIN64
-
-#include "tbb_stddef.h"
-#include "tbb_thread.h"
-#include "tbb_exception.h"
-
-#include "tbb_profiling.h"
-
-namespace tbb {
-
-    namespace internal {
-class critical_section_v4 : internal::no_copy {
-#if _WIN32||_WIN64
-    CRITICAL_SECTION my_impl;
-#else
-    pthread_mutex_t my_impl;
-#endif
-    tbb_thread::id my_tid;
-public:
-
-    void __TBB_EXPORTED_METHOD internal_construct();
-
-    critical_section_v4() {
-#if _WIN32||_WIN64
-        InitializeCriticalSectionEx( &my_impl, 4000, 0 );
-#else
-        pthread_mutex_init(&my_impl, NULL);
-#endif
-        internal_construct();
-    }
-
-    ~critical_section_v4() {
-        __TBB_ASSERT(my_tid == tbb_thread::id(), "Destroying a still-held critical section");
-#if _WIN32||_WIN64
-        DeleteCriticalSection(&my_impl);
-#else
-        pthread_mutex_destroy(&my_impl);
-#endif
-    }
-
-    class scoped_lock : internal::no_copy {
-    private:
-        critical_section_v4 &my_crit;
-    public:
-        scoped_lock( critical_section_v4& lock_me) :my_crit(lock_me) {
-            my_crit.lock();
-        }
-
-        ~scoped_lock() {
-            my_crit.unlock();
-        }
-    };
-
-    void lock() {
-        tbb_thread::id local_tid = this_tbb_thread::get_id();
-        if(local_tid == my_tid) throw_exception( eid_improper_lock );
-#if _WIN32||_WIN64
-        EnterCriticalSection( &my_impl );
-#else
-        int rval = pthread_mutex_lock(&my_impl);
-        __TBB_ASSERT_EX(!rval, "critical_section::lock: pthread_mutex_lock failed");
-#endif
-        __TBB_ASSERT(my_tid == tbb_thread::id(), NULL);
-        my_tid = local_tid;
-    }
-
-    bool try_lock() {
-        bool gotlock;
-        tbb_thread::id local_tid = this_tbb_thread::get_id();
-        if(local_tid == my_tid) return false;
-#if _WIN32||_WIN64
-        gotlock = TryEnterCriticalSection( &my_impl ) != 0;
-#else
-        int rval = pthread_mutex_trylock(&my_impl);
-        // valid returns are 0 (locked) and [EBUSY]
-        __TBB_ASSERT(rval == 0 || rval == EBUSY, "critical_section::trylock: pthread_mutex_trylock failed");
-        gotlock = rval == 0;
-#endif
-        if(gotlock)  {
-            my_tid = local_tid;
-        }
-        return gotlock;
-    }
-
-    void unlock() {
-        __TBB_ASSERT(this_tbb_thread::get_id() == my_tid, "thread unlocking critical_section is not thread that locked it");
-        my_tid = tbb_thread::id();
-#if _WIN32||_WIN64
-        LeaveCriticalSection( &my_impl );
-#else
-        int rval = pthread_mutex_unlock(&my_impl);
-        __TBB_ASSERT_EX(!rval, "critical_section::unlock: pthread_mutex_unlock failed");
-#endif
-    }
-
-    static const bool is_rw_mutex = false;
-    static const bool is_recursive_mutex = false;
-    static const bool is_fair_mutex = true;
-}; // critical_section_v4
-} // namespace internal
-typedef internal::critical_section_v4 critical_section;
-
-__TBB_DEFINE_PROFILING_SET_NAME(critical_section)
-} // namespace tbb
-#endif  // _TBB_CRITICAL_SECTION_H_
diff --git a/lib/3rdParty/tbb/include/tbb/enumerable_thread_specific.h b/lib/3rdParty/tbb/include/tbb/enumerable_thread_specific.h
deleted file mode 100644
index f31f533b..00000000
--- a/lib/3rdParty/tbb/include/tbb/enumerable_thread_specific.h
+++ /dev/null
@@ -1,1137 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_enumerable_thread_specific_H
-#define __TBB_enumerable_thread_specific_H
-
-#include "atomic.h"
-#include "concurrent_vector.h"
-#include "tbb_thread.h"
-#include "tbb_allocator.h"
-#include "cache_aligned_allocator.h"
-#include "aligned_space.h"
-#include "internal/_template_helpers.h"
-#include "internal/_tbb_hash_compare_impl.h"
-#include "tbb_profiling.h"
-#include <string.h>  // for memcpy
-
-#if _WIN32||_WIN64
-#include "machine/windows_api.h"
-#else
-#include <pthread.h>
-#endif
-
-#define __TBB_ETS_USE_CPP11 \
-    (__TBB_CPP11_RVALUE_REF_PRESENT && __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT \
-     && __TBB_CPP11_DECLTYPE_PRESENT && __TBB_CPP11_LAMBDAS_PRESENT)
-
-namespace tbb {
-
-//! enum for selecting between single key and key-per-instance versions
-enum ets_key_usage_type { ets_key_per_instance, ets_no_key };
-
-namespace interface6 {
-
-    // Forward declaration to use in internal classes
-    template <typename T, typename Allocator, ets_key_usage_type ETS_key_type>
-    class enumerable_thread_specific;
-
-    //! @cond
-    namespace internal {
-
-        using namespace tbb::internal;
-
-        template<ets_key_usage_type ETS_key_type>
-        class ets_base: tbb::internal::no_copy {
-        protected:
-            typedef tbb_thread::id key_type;
-#if __TBB_PROTECTED_NESTED_CLASS_BROKEN
-        public:
-#endif
-            struct slot;
-
-            struct array {
-                array* next;
-                size_t lg_size;
-                slot& at( size_t k ) {
-                    return ((slot*)(void*)(this+1))[k];
-                }
-                size_t size() const {return size_t(1)<<lg_size;}
-                size_t mask() const {return size()-1;}
-                size_t start( size_t h ) const {
-                    return h>>(8*sizeof(size_t)-lg_size);
-                }
-            };
-            struct slot {
-                key_type key;
-                void* ptr;
-                bool empty() const {return key == key_type();}
-                bool match( key_type k ) const {return key == k;}
-                bool claim( key_type k ) {
-                    // TODO: maybe claim ptr, because key_type is not guaranteed to fit into word size
-                    return atomic_compare_and_swap(key, k, key_type()) == key_type();
-                }
-            };
-#if __TBB_PROTECTED_NESTED_CLASS_BROKEN
-        protected:
-#endif
-
-            //! Root of linked list of arrays of decreasing size.
-            /** NULL if and only if my_count==0.
-                Each array in the list is half the size of its predecessor. */
-            atomic<array*> my_root;
-            atomic<size_t> my_count;
-            virtual void* create_local() = 0;
-            virtual void* create_array(size_t _size) = 0;  // _size in bytes
-            virtual void free_array(void* ptr, size_t _size) = 0; // _size in bytes
-            array* allocate( size_t lg_size ) {
-                size_t n = size_t(1)<<lg_size;
-                array* a = static_cast<array*>(create_array( sizeof(array)+n*sizeof(slot) ));
-                a->lg_size = lg_size;
-                std::memset( a+1, 0, n*sizeof(slot) );
-                return a;
-            }
-            void free(array* a) {
-                size_t n = size_t(1)<<(a->lg_size);
-                free_array( (void *)a, size_t(sizeof(array)+n*sizeof(slot)) );
-            }
-
-            ets_base() {my_root=NULL; my_count=0;}
-            virtual ~ets_base();  // g++ complains if this is not virtual
-            void* table_lookup( bool& exists );
-            void table_clear();
-            // The following functions are not used in concurrent context,
-            // so we don't need synchronization and ITT annotations there.
-            void table_elementwise_copy( const ets_base& other,
-                                         void*(*add_element)(ets_base&, void*) ) {
-                __TBB_ASSERT(!my_root,NULL);
-                __TBB_ASSERT(!my_count,NULL);
-                if( !other.my_root ) return;
-                array* root = my_root = allocate(other.my_root->lg_size);
-                root->next = NULL;
-                my_count = other.my_count;
-                size_t mask = root->mask();
-                for( array* r=other.my_root; r; r=r->next ) {
-                    for( size_t i=0; i<r->size(); ++i ) {
-                        slot& s1 = r->at(i);
-                        if( !s1.empty() ) {
-                            for( size_t j = root->start(tbb::tbb_hash<key_type>()(s1.key)); ; j=(j+1)&mask ) {
-                                slot& s2 = root->at(j);
-                                if( s2.empty() ) {
-                                    s2.ptr = add_element(*this, s1.ptr);
-                                    s2.key = s1.key;
-                                    break;
-                                }
-                                else if( s2.match(s1.key) )
-                                    break;
-                            }
-                        }
-                    }
-                }
-            }
-            void table_swap( ets_base& other ) {
-               __TBB_ASSERT(this!=&other, "Don't swap an instance with itself");
-               tbb::internal::swap<relaxed>(my_root, other.my_root);
-               tbb::internal::swap<relaxed>(my_count, other.my_count);
-            }
-        };
-
-        template<ets_key_usage_type ETS_key_type>
-        ets_base<ETS_key_type>::~ets_base() {
-            __TBB_ASSERT(!my_root, NULL);
-        }
-
-        template<ets_key_usage_type ETS_key_type>
-        void ets_base<ETS_key_type>::table_clear() {
-            while( array* r = my_root ) {
-                my_root = r->next;
-                free(r);
-            }
-            my_count = 0;
-        }
-
-        template<ets_key_usage_type ETS_key_type>
-        void* ets_base<ETS_key_type>::table_lookup( bool& exists ) {
-            const key_type k = tbb::this_tbb_thread::get_id();
-
-            __TBB_ASSERT(k != key_type(),NULL);
-            void* found;
-            size_t h = tbb::tbb_hash<key_type>()(k);
-            for( array* r=my_root; r; r=r->next ) {
-                call_itt_notify(acquired,r);
-                size_t mask=r->mask();
-                for(size_t i = r->start(h); ;i=(i+1)&mask) {
-                    slot& s = r->at(i);
-                    if( s.empty() ) break;
-                    if( s.match(k) ) {
-                        if( r==my_root ) {
-                            // Success at top level
-                            exists = true;
-                            return s.ptr;
-                        } else {
-                            // Success at some other level.  Need to insert at top level.
-                            exists = true;
-                            found = s.ptr;
-                            goto insert;
-                        }
-                    }
-                }
-            }
-            // Key does not yet exist.  The density of slots in the table does not exceed 0.5,
-            // for if this will occur a new table is allocated with double the current table
-            // size, which is swapped in as the new root table.  So an empty slot is guaranteed.
-            exists = false;
-            found = create_local();
-            {
-                size_t c = ++my_count;
-                array* r = my_root;
-                call_itt_notify(acquired,r);
-                if( !r || c>r->size()/2 ) {
-                    size_t s = r ? r->lg_size : 2;
-                    while( c>size_t(1)<<(s-1) ) ++s;
-                    array* a = allocate(s);
-                    for(;;) {
-                        a->next = r;
-                        call_itt_notify(releasing,a);
-                        array* new_r = my_root.compare_and_swap(a,r);
-                        if( new_r==r ) break;
-                        call_itt_notify(acquired, new_r);
-                        if( new_r->lg_size>=s ) {
-                            // Another thread inserted an equal or  bigger array, so our array is superfluous.
-                            free(a);
-                            break;
-                        }
-                        r = new_r;
-                    }
-                }
-            }
-        insert:
-        // Whether a slot has been found in an older table, or if it has been inserted at this level,
-        // it has already been accounted for in the total.  Guaranteed to be room for it, and it is
-        // not present, so search for empty slot and use it.
-            array* ir = my_root;
-            call_itt_notify(acquired, ir);
-            size_t mask = ir->mask();
-            for(size_t i = ir->start(h);;i=(i+1)&mask) {
-                slot& s = ir->at(i);
-                if( s.empty() ) {
-                    if( s.claim(k) ) {
-                        s.ptr = found;
-                        return found;
-                    }
-                }
-            }
-        }
-
-        //! Specialization that exploits native TLS
-        template <>
-        class ets_base<ets_key_per_instance>: protected ets_base<ets_no_key> {
-            typedef ets_base<ets_no_key> super;
-#if _WIN32||_WIN64
-#if __TBB_WIN8UI_SUPPORT
-            typedef DWORD tls_key_t;
-            void create_key() { my_key = FlsAlloc(NULL); }
-            void destroy_key() { FlsFree(my_key); }
-            void set_tls(void * value) { FlsSetValue(my_key, (LPVOID)value); }
-            void* get_tls() { return (void *)FlsGetValue(my_key); }
-#else
-            typedef DWORD tls_key_t;
-            void create_key() { my_key = TlsAlloc(); }
-            void destroy_key() { TlsFree(my_key); }
-            void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value); }
-            void* get_tls() { return (void *)TlsGetValue(my_key); }
-#endif
-#else
-            typedef pthread_key_t tls_key_t;
-            void create_key() { pthread_key_create(&my_key, NULL); }
-            void destroy_key() { pthread_key_delete(my_key); }
-            void set_tls( void * value ) const { pthread_setspecific(my_key, value); }
-            void* get_tls() const { return pthread_getspecific(my_key); }
-#endif
-            tls_key_t my_key;
-            virtual void* create_local() __TBB_override = 0;
-            virtual void* create_array(size_t _size) __TBB_override = 0;  // _size in bytes
-            virtual void free_array(void* ptr, size_t _size) __TBB_override = 0; // size in bytes
-        protected:
-            ets_base() {create_key();}
-            ~ets_base() {destroy_key();}
-            void* table_lookup( bool& exists ) {
-                void* found = get_tls();
-                if( found ) {
-                    exists=true;
-                } else {
-                    found = super::table_lookup(exists);
-                    set_tls(found);
-                }
-                return found;
-            }
-            void table_clear() {
-                destroy_key();
-                create_key();
-                super::table_clear();
-            }
-            void table_swap( ets_base& other ) {
-               using std::swap;
-               __TBB_ASSERT(this!=&other, "Don't swap an instance with itself");
-               swap(my_key, other.my_key);
-               super::table_swap(other);
-            }
-        };
-
-        //! Random access iterator for traversing the thread local copies.
-        template< typename Container, typename Value >
-        class enumerable_thread_specific_iterator
-#if defined(_WIN64) && defined(_MSC_VER)
-            // Ensure that Microsoft's internal template function _Val_type works correctly.
-            : public std::iterator<std::random_access_iterator_tag,Value>
-#endif /* defined(_WIN64) && defined(_MSC_VER) */
-        {
-            //! current position in the concurrent_vector
-
-            Container *my_container;
-            typename Container::size_type my_index;
-            mutable Value *my_value;
-
-            template<typename C, typename T>
-            friend enumerable_thread_specific_iterator<C,T>
-            operator+( ptrdiff_t offset, const enumerable_thread_specific_iterator<C,T>& v );
-
-            template<typename C, typename T, typename U>
-            friend bool operator==( const enumerable_thread_specific_iterator<C,T>& i,
-                                    const enumerable_thread_specific_iterator<C,U>& j );
-
-            template<typename C, typename T, typename U>
-            friend bool operator<( const enumerable_thread_specific_iterator<C,T>& i,
-                                   const enumerable_thread_specific_iterator<C,U>& j );
-
-            template<typename C, typename T, typename U>
-            friend ptrdiff_t operator-( const enumerable_thread_specific_iterator<C,T>& i,
-                                        const enumerable_thread_specific_iterator<C,U>& j );
-
-            template<typename C, typename U>
-            friend class enumerable_thread_specific_iterator;
-
-            public:
-
-            enumerable_thread_specific_iterator( const Container &container, typename Container::size_type index ) :
-                my_container(&const_cast<Container &>(container)), my_index(index), my_value(NULL) {}
-
-            //! Default constructor
-            enumerable_thread_specific_iterator() : my_container(NULL), my_index(0), my_value(NULL) {}
-
-            template<typename U>
-            enumerable_thread_specific_iterator( const enumerable_thread_specific_iterator<Container, U>& other ) :
-                    my_container( other.my_container ), my_index( other.my_index), my_value( const_cast<Value *>(other.my_value) ) {}
-
-            enumerable_thread_specific_iterator operator+( ptrdiff_t offset ) const {
-                return enumerable_thread_specific_iterator(*my_container, my_index + offset);
-            }
-
-            enumerable_thread_specific_iterator &operator+=( ptrdiff_t offset ) {
-                my_index += offset;
-                my_value = NULL;
-                return *this;
-            }
-
-            enumerable_thread_specific_iterator operator-( ptrdiff_t offset ) const {
-                return enumerable_thread_specific_iterator( *my_container, my_index-offset );
-            }
-
-            enumerable_thread_specific_iterator &operator-=( ptrdiff_t offset ) {
-                my_index -= offset;
-                my_value = NULL;
-                return *this;
-            }
-
-            Value& operator*() const {
-                Value* value = my_value;
-                if( !value ) {
-                    value = my_value = (*my_container)[my_index].value();
-                }
-                __TBB_ASSERT( value==(*my_container)[my_index].value(), "corrupt cache" );
-                return *value;
-            }
-
-            Value& operator[]( ptrdiff_t k ) const {
-               return (*my_container)[my_index + k].value;
-            }
-
-            Value* operator->() const {return &operator*();}
-
-            enumerable_thread_specific_iterator& operator++() {
-                ++my_index;
-                my_value = NULL;
-                return *this;
-            }
-
-            enumerable_thread_specific_iterator& operator--() {
-                --my_index;
-                my_value = NULL;
-                return *this;
-            }
-
-            //! Post increment
-            enumerable_thread_specific_iterator operator++(int) {
-                enumerable_thread_specific_iterator result = *this;
-                ++my_index;
-                my_value = NULL;
-                return result;
-            }
-
-            //! Post decrement
-            enumerable_thread_specific_iterator operator--(int) {
-                enumerable_thread_specific_iterator result = *this;
-                --my_index;
-                my_value = NULL;
-                return result;
-            }
-
-            // STL support
-            typedef ptrdiff_t difference_type;
-            typedef Value value_type;
-            typedef Value* pointer;
-            typedef Value& reference;
-            typedef std::random_access_iterator_tag iterator_category;
-        };
-
-        template<typename Container, typename T>
-        enumerable_thread_specific_iterator<Container,T>
-        operator+( ptrdiff_t offset, const enumerable_thread_specific_iterator<Container,T>& v ) {
-            return enumerable_thread_specific_iterator<Container,T>( v.my_container, v.my_index + offset );
-        }
-
-        template<typename Container, typename T, typename U>
-        bool operator==( const enumerable_thread_specific_iterator<Container,T>& i,
-                         const enumerable_thread_specific_iterator<Container,U>& j ) {
-            return i.my_index==j.my_index && i.my_container == j.my_container;
-        }
-
-        template<typename Container, typename T, typename U>
-        bool operator!=( const enumerable_thread_specific_iterator<Container,T>& i,
-                         const enumerable_thread_specific_iterator<Container,U>& j ) {
-            return !(i==j);
-        }
-
-        template<typename Container, typename T, typename U>
-        bool operator<( const enumerable_thread_specific_iterator<Container,T>& i,
-                        const enumerable_thread_specific_iterator<Container,U>& j ) {
-            return i.my_index<j.my_index;
-        }
-
-        template<typename Container, typename T, typename U>
-        bool operator>( const enumerable_thread_specific_iterator<Container,T>& i,
-                        const enumerable_thread_specific_iterator<Container,U>& j ) {
-            return j<i;
-        }
-
-        template<typename Container, typename T, typename U>
-        bool operator>=( const enumerable_thread_specific_iterator<Container,T>& i,
-                         const enumerable_thread_specific_iterator<Container,U>& j ) {
-            return !(i<j);
-        }
-
-        template<typename Container, typename T, typename U>
-        bool operator<=( const enumerable_thread_specific_iterator<Container,T>& i,
-                         const enumerable_thread_specific_iterator<Container,U>& j ) {
-            return !(j<i);
-        }
-
-        template<typename Container, typename T, typename U>
-        ptrdiff_t operator-( const enumerable_thread_specific_iterator<Container,T>& i,
-                             const enumerable_thread_specific_iterator<Container,U>& j ) {
-            return i.my_index-j.my_index;
-        }
-
-    template<typename SegmentedContainer, typename Value >
-        class segmented_iterator
-#if defined(_WIN64) && defined(_MSC_VER)
-        : public std::iterator<std::input_iterator_tag, Value>
-#endif
-        {
-            template<typename C, typename T, typename U>
-            friend bool operator==(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j);
-
-            template<typename C, typename T, typename U>
-            friend bool operator!=(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j);
-
-            template<typename C, typename U>
-            friend class segmented_iterator;
-
-            public:
-
-                segmented_iterator() {my_segcont = NULL;}
-
-                segmented_iterator( const SegmentedContainer& _segmented_container ) :
-                    my_segcont(const_cast<SegmentedContainer*>(&_segmented_container)),
-                    outer_iter(my_segcont->end()) { }
-
-                ~segmented_iterator() {}
-
-                typedef typename SegmentedContainer::iterator outer_iterator;
-                typedef typename SegmentedContainer::value_type InnerContainer;
-                typedef typename InnerContainer::iterator inner_iterator;
-
-                // STL support
-                typedef ptrdiff_t difference_type;
-                typedef Value value_type;
-                typedef typename SegmentedContainer::size_type size_type;
-                typedef Value* pointer;
-                typedef Value& reference;
-                typedef std::input_iterator_tag iterator_category;
-
-                // Copy Constructor
-                template<typename U>
-                segmented_iterator(const segmented_iterator<SegmentedContainer, U>& other) :
-                    my_segcont(other.my_segcont),
-                    outer_iter(other.outer_iter),
-                    // can we assign a default-constructed iterator to inner if we're at the end?
-                    inner_iter(other.inner_iter)
-                {}
-
-                // assignment
-                template<typename U>
-                segmented_iterator& operator=( const segmented_iterator<SegmentedContainer, U>& other) {
-                    if(this != &other) {
-                        my_segcont = other.my_segcont;
-                        outer_iter = other.outer_iter;
-                        if(outer_iter != my_segcont->end()) inner_iter = other.inner_iter;
-                    }
-                    return *this;
-                }
-
-                // allow assignment of outer iterator to segmented iterator.  Once it is
-                // assigned, move forward until a non-empty inner container is found or
-                // the end of the outer container is reached.
-                segmented_iterator& operator=(const outer_iterator& new_outer_iter) {
-                    __TBB_ASSERT(my_segcont != NULL, NULL);
-                    // check that this iterator points to something inside the segmented container
-                    for(outer_iter = new_outer_iter ;outer_iter!=my_segcont->end(); ++outer_iter) {
-                        if( !outer_iter->empty() ) {
-                            inner_iter = outer_iter->begin();
-                            break;
-                        }
-                    }
-                    return *this;
-                }
-
-                // pre-increment
-                segmented_iterator& operator++() {
-                    advance_me();
-                    return *this;
-                }
-
-                // post-increment
-                segmented_iterator operator++(int) {
-                    segmented_iterator tmp = *this;
-                    operator++();
-                    return tmp;
-                }
-
-                bool operator==(const outer_iterator& other_outer) const {
-                    __TBB_ASSERT(my_segcont != NULL, NULL);
-                    return (outer_iter == other_outer &&
-                            (outer_iter == my_segcont->end() || inner_iter == outer_iter->begin()));
-                }
-
-                bool operator!=(const outer_iterator& other_outer) const {
-                    return !operator==(other_outer);
-
-                }
-
-                // (i)* RHS
-                reference operator*() const {
-                    __TBB_ASSERT(my_segcont != NULL, NULL);
-                    __TBB_ASSERT(outer_iter != my_segcont->end(), "Dereferencing a pointer at end of container");
-                    __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // should never happen
-                    return *inner_iter;
-                }
-
-                // i->
-                pointer operator->() const { return &operator*();}
-
-            private:
-                SegmentedContainer*             my_segcont;
-                outer_iterator outer_iter;
-                inner_iterator inner_iter;
-
-                void advance_me() {
-                    __TBB_ASSERT(my_segcont != NULL, NULL);
-                    __TBB_ASSERT(outer_iter != my_segcont->end(), NULL); // not true if there are no inner containers
-                    __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // not true if the inner containers are all empty.
-                    ++inner_iter;
-                    while(inner_iter == outer_iter->end() && ++outer_iter != my_segcont->end()) {
-                        inner_iter = outer_iter->begin();
-                    }
-                }
-        };    // segmented_iterator
-
-        template<typename SegmentedContainer, typename T, typename U>
-        bool operator==( const segmented_iterator<SegmentedContainer,T>& i,
-                         const segmented_iterator<SegmentedContainer,U>& j ) {
-            if(i.my_segcont != j.my_segcont) return false;
-            if(i.my_segcont == NULL) return true;
-            if(i.outer_iter != j.outer_iter) return false;
-            if(i.outer_iter == i.my_segcont->end()) return true;
-            return i.inner_iter == j.inner_iter;
-        }
-
-        // !=
-        template<typename SegmentedContainer, typename T, typename U>
-        bool operator!=( const segmented_iterator<SegmentedContainer,T>& i,
-                         const segmented_iterator<SegmentedContainer,U>& j ) {
-            return !(i==j);
-        }
-
-        template<typename T>
-        struct construct_by_default: tbb::internal::no_assign {
-            void construct(void*where) {new(where) T();} // C++ note: the () in T() ensure zero initialization.
-            construct_by_default( int ) {}
-        };
-
-        template<typename T>
-        struct construct_by_exemplar: tbb::internal::no_assign {
-            const T exemplar;
-            void construct(void*where) {new(where) T(exemplar);}
-            construct_by_exemplar( const T& t ) : exemplar(t) {}
-#if __TBB_ETS_USE_CPP11
-            construct_by_exemplar( T&& t ) : exemplar(std::move(t)) {}
-#endif
-        };
-
-        template<typename T, typename Finit>
-        struct construct_by_finit: tbb::internal::no_assign {
-            Finit f;
-            void construct(void* where) {new(where) T(f());}
-            construct_by_finit( const Finit& f_ ) : f(f_) {}
-#if __TBB_ETS_USE_CPP11
-            construct_by_finit( Finit&& f_ ) : f(std::move(f_)) {}
-#endif
-        };
-
-#if __TBB_ETS_USE_CPP11
-        template<typename T, typename... P>
-        struct construct_by_args: tbb::internal::no_assign {
-            internal::stored_pack<P...> pack;
-            void construct(void* where) {
-                internal::call( [where](const typename strip<P>::type&... args ){
-                   new(where) T(args...);
-                }, pack );
-            }
-            construct_by_args( P&& ... args ) : pack(std::forward<P>(args)...) {}
-        };
-#endif
-
-        // storage for initialization function pointer
-        // TODO: consider removing the template parameter T here and in callback_leaf
-        template<typename T>
-        class callback_base {
-        public:
-            // Clone *this
-            virtual callback_base* clone() const = 0;
-            // Destruct and free *this
-            virtual void destroy() = 0;
-            // Need virtual destructor to satisfy GCC compiler warning
-            virtual ~callback_base() { }
-            // Construct T at where
-            virtual void construct(void* where) = 0;
-        };
-
-        template <typename T, typename Constructor>
-        class callback_leaf: public callback_base<T>, Constructor {
-#if __TBB_ETS_USE_CPP11
-            template<typename... P> callback_leaf( P&& ... params ) : Constructor(std::forward<P>(params)...) {}
-#else
-            template<typename X> callback_leaf( const X& x ) : Constructor(x) {}
-#endif
-            // TODO: make the construction/destruction consistent (use allocator.construct/destroy)
-            typedef typename tbb::tbb_allocator<callback_leaf> my_allocator_type;
-
-            callback_base<T>* clone() const __TBB_override {
-                return make(*this);
-            }
-
-            void destroy() __TBB_override {
-                my_allocator_type().destroy(this);
-                my_allocator_type().deallocate(this,1);
-            }
-
-            void construct(void* where) __TBB_override {
-                Constructor::construct(where);
-            }
-        public:
-#if __TBB_ETS_USE_CPP11
-            template<typename... P>
-            static callback_base<T>* make( P&& ... params ) {
-                void* where = my_allocator_type().allocate(1);
-                return new(where) callback_leaf( std::forward<P>(params)... );
-            }
-#else
-            template<typename X>
-            static callback_base<T>* make( const X& x ) {
-                void* where = my_allocator_type().allocate(1);
-                return new(where) callback_leaf(x);
-            }
-#endif
-        };
-
-        //! Template for recording construction of objects in table
-        /** All maintenance of the space will be done explicitly on push_back,
-            and all thread local copies must be destroyed before the concurrent
-            vector is deleted.
-
-            The flag is_built is initialized to false.  When the local is
-            successfully-constructed, set the flag to true or call value_committed().
-            If the constructor throws, the flag will be false.
-        */
-        template<typename U>
-        struct ets_element {
-            tbb::aligned_space<U> my_space;
-            bool is_built;
-            ets_element() { is_built = false; }  // not currently-built
-            U* value() { return my_space.begin(); }
-            U* value_committed() { is_built = true; return my_space.begin(); }
-            ~ets_element() {
-                if(is_built) {
-                    my_space.begin()->~U();
-                    is_built = false;
-                }
-            }
-        };
-
-        // A predicate that can be used for a compile-time compatibility check of ETS instances
-        // Ideally, it should have been declared inside the ETS class, but unfortunately
-        // in that case VS2013 does not enable the variadic constructor.
-        template<typename T, typename ETS> struct is_compatible_ets { static const bool value = false; };
-        template<typename T, typename U, typename A, ets_key_usage_type C>
-        struct is_compatible_ets< T, enumerable_thread_specific<U,A,C> > { static const bool value = internal::is_same_type<T,U>::value; };
-
-#if __TBB_ETS_USE_CPP11
-        // A predicate that checks whether, for a variable 'foo' of type T, foo() is a valid expression
-        template <typename T>
-        class is_callable_no_args {
-        private:
-            typedef char yes[1];
-            typedef char no [2];
-
-            template<typename U> static yes& decide( decltype(declval<U>()())* );
-            template<typename U> static no&  decide(...);
-        public:
-            static const bool value = (sizeof(decide<T>(NULL)) == sizeof(yes));
-        };
-#endif
-
-    } // namespace internal
-    //! @endcond
-
-    //! The enumerable_thread_specific container
-    /** enumerable_thread_specific has the following properties:
-        - thread-local copies are lazily created, with default, exemplar or function initialization.
-        - thread-local copies do not move (during lifetime, and excepting clear()) so the address of a copy is invariant.
-        - the contained objects need not have operator=() defined if combine is not used.
-        - enumerable_thread_specific containers may be copy-constructed or assigned.
-        - thread-local copies can be managed by hash-table, or can be accessed via TLS storage for speed.
-        - outside of parallel contexts, the contents of all thread-local copies are accessible by iterator or using combine or combine_each methods
-
-    @par Segmented iterator
-        When the thread-local objects are containers with input_iterators defined, a segmented iterator may
-        be used to iterate over all the elements of all thread-local copies.
-
-    @par combine and combine_each
-        - Both methods are defined for enumerable_thread_specific.
-        - combine() requires the type T have operator=() defined.
-        - neither method modifies the contents of the object (though there is no guarantee that the applied methods do not modify the object.)
-        - Both are evaluated in serial context (the methods are assumed to be non-benign.)
-
-    @ingroup containers */
-    template <typename T,
-              typename Allocator=cache_aligned_allocator<T>,
-              ets_key_usage_type ETS_key_type=ets_no_key >
-    class enumerable_thread_specific: internal::ets_base<ETS_key_type> {
-
-        template<typename U, typename A, ets_key_usage_type C> friend class enumerable_thread_specific;
-
-        typedef internal::padded< internal::ets_element<T> > padded_element;
-
-        //! A generic range, used to create range objects from the iterators
-        template<typename I>
-        class generic_range_type: public blocked_range<I> {
-        public:
-            typedef T value_type;
-            typedef T& reference;
-            typedef const T& const_reference;
-            typedef I iterator;
-            typedef ptrdiff_t difference_type;
-            generic_range_type( I begin_, I end_, size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {}
-            template<typename U>
-            generic_range_type( const generic_range_type<U>& r) : blocked_range<I>(r.begin(),r.end(),r.grainsize()) {}
-            generic_range_type( generic_range_type& r, split ) : blocked_range<I>(r,split()) {}
-        };
-
-        typedef typename Allocator::template rebind< padded_element >::other padded_allocator_type;
-        typedef tbb::concurrent_vector< padded_element, padded_allocator_type > internal_collection_type;
-
-        internal::callback_base<T> *my_construct_callback;
-
-        internal_collection_type my_locals;
-
-        // TODO: consider unifying the callback mechanism for all create_local* methods below
-        //   (likely non-compatible and requires interface version increase)
-        void* create_local() __TBB_override {
-            padded_element& lref = *my_locals.grow_by(1);
-            my_construct_callback->construct(lref.value());
-            return lref.value_committed();
-        }
-
-        static void* create_local_by_copy( internal::ets_base<ets_no_key>& base, void* p ) {
-            enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base);
-            padded_element& lref = *ets.my_locals.grow_by(1);
-            new(lref.value()) T(*static_cast<T*>(p));
-            return lref.value_committed();
-        }
-
-#if __TBB_ETS_USE_CPP11
-        static void* create_local_by_move( internal::ets_base<ets_no_key>& base, void* p ) {
-            enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base);
-            padded_element& lref = *ets.my_locals.grow_by(1);
-            new(lref.value()) T(std::move(*static_cast<T*>(p)));
-            return lref.value_committed();
-        }
-#endif
-
-        typedef typename Allocator::template rebind< uintptr_t >::other array_allocator_type;
-
-        // _size is in bytes
-        void* create_array(size_t _size) __TBB_override {
-            size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t);
-            return array_allocator_type().allocate(nelements);
-        }
-
-        void free_array( void* _ptr, size_t _size) __TBB_override {
-            size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t);
-            array_allocator_type().deallocate( reinterpret_cast<uintptr_t *>(_ptr),nelements);
-        }
-
-    public:
-
-        //! Basic types
-        typedef Allocator allocator_type;
-        typedef T value_type;
-        typedef T& reference;
-        typedef const T& const_reference;
-        typedef T* pointer;
-        typedef const T* const_pointer;
-        typedef typename internal_collection_type::size_type size_type;
-        typedef typename internal_collection_type::difference_type difference_type;
-
-        // Iterator types
-        typedef typename internal::enumerable_thread_specific_iterator< internal_collection_type, value_type > iterator;
-        typedef typename internal::enumerable_thread_specific_iterator< internal_collection_type, const value_type > const_iterator;
-
-        // Parallel range types
-        typedef generic_range_type< iterator > range_type;
-        typedef generic_range_type< const_iterator > const_range_type;
-
-        //! Default constructor.  Each local instance of T is default constructed.
-        enumerable_thread_specific() : my_construct_callback(
-            internal::callback_leaf<T,internal::construct_by_default<T> >::make(/*dummy argument*/0)
-        ){}
-
-        //! Constructor with initializer functor.  Each local instance of T is constructed by T(finit()).
-        template <typename Finit
-#if __TBB_ETS_USE_CPP11
-                  , typename = typename internal::enable_if<internal::is_callable_no_args<typename internal::strip<Finit>::type>::value>::type
-#endif
-        >
-        explicit enumerable_thread_specific( Finit finit ) : my_construct_callback(
-            internal::callback_leaf<T,internal::construct_by_finit<T,Finit> >::make( tbb::internal::move(finit) )
-        ){}
-
-        //! Constructor with exemplar. Each local instance of T is copy-constructed from the exemplar.
-        explicit enumerable_thread_specific( const T& exemplar ) : my_construct_callback(
-            internal::callback_leaf<T,internal::construct_by_exemplar<T> >::make( exemplar )
-        ){}
-
-#if __TBB_ETS_USE_CPP11
-        explicit enumerable_thread_specific( T&& exemplar ) : my_construct_callback(
-            internal::callback_leaf<T,internal::construct_by_exemplar<T> >::make( std::move(exemplar) )
-        ){}
-
-        //! Variadic constructor with initializer arguments.  Each local instance of T is constructed by T(args...)
-        template <typename P1, typename... P,
-                  typename = typename internal::enable_if<!internal::is_callable_no_args<typename internal::strip<P1>::type>::value
-                                                          && !internal::is_compatible_ets<T, typename internal::strip<P1>::type>::value
-                                                          && !internal::is_same_type<T, typename internal::strip<P1>::type>::value
-                                                         >::type>
-        enumerable_thread_specific( P1&& arg1, P&& ... args ) : my_construct_callback(
-            internal::callback_leaf<T,internal::construct_by_args<T,P1,P...> >::make( std::forward<P1>(arg1), std::forward<P>(args)... )
-        ){}
-#endif
-
-        //! Destructor
-        ~enumerable_thread_specific() {
-            if(my_construct_callback) my_construct_callback->destroy();
-            // Deallocate the hash table before overridden free_array() becomes inaccessible
-            this->internal::ets_base<ets_no_key>::table_clear();
-        }
-
-        //! returns reference to local, discarding exists
-        reference local() {
-            bool exists;
-            return local(exists);
-        }
-
-        //! Returns reference to calling thread's local copy, creating one if necessary
-        reference local(bool& exists)  {
-            void* ptr = this->table_lookup(exists);
-            return *(T*)ptr;
-        }
-
-        //! Get the number of local copies
-        size_type size() const { return my_locals.size(); }
-
-        //! true if there have been no local copies created
-        bool empty() const { return my_locals.empty(); }
-
-        //! begin iterator
-        iterator begin() { return iterator( my_locals, 0 ); }
-        //! end iterator
-        iterator end() { return iterator(my_locals, my_locals.size() ); }
-
-        //! begin const iterator
-        const_iterator begin() const { return const_iterator(my_locals, 0); }
-
-        //! end const iterator
-        const_iterator end() const { return const_iterator(my_locals, my_locals.size()); }
-
-        //! Get range for parallel algorithms
-        range_type range( size_t grainsize=1 ) { return range_type( begin(), end(), grainsize ); }
-
-        //! Get const range for parallel algorithms
-        const_range_type range( size_t grainsize=1 ) const { return const_range_type( begin(), end(), grainsize ); }
-
-        //! Destroys local copies
-        void clear() {
-            my_locals.clear();
-            this->table_clear();
-            // callback is not destroyed
-        }
-
-    private:
-
-        template<typename A2, ets_key_usage_type C2>
-        void internal_copy(const enumerable_thread_specific<T, A2, C2>& other) {
-#if __TBB_ETS_USE_CPP11 && TBB_USE_ASSERT
-            // this tests is_compatible_ets
-            __TBB_STATIC_ASSERT( (internal::is_compatible_ets<T, typename internal::strip<decltype(other)>::type>::value), "is_compatible_ets fails" );
-#endif
-            // Initialize my_construct_callback first, so that it is valid even if rest of this routine throws an exception.
-            my_construct_callback = other.my_construct_callback->clone();
-            __TBB_ASSERT(my_locals.size()==0,NULL);
-            my_locals.reserve(other.size());
-            this->table_elementwise_copy( other, create_local_by_copy );
-        }
-
-        void internal_swap(enumerable_thread_specific& other) {
-            using std::swap;
-            __TBB_ASSERT( this!=&other, NULL );
-            swap(my_construct_callback, other.my_construct_callback);
-            // concurrent_vector::swap() preserves storage space,
-            // so addresses to the vector kept in ETS hash table remain valid.
-            swap(my_locals, other.my_locals);
-            this->internal::ets_base<ETS_key_type>::table_swap(other);
-        }
-
-#if __TBB_ETS_USE_CPP11
-        template<typename A2, ets_key_usage_type C2>
-        void internal_move(enumerable_thread_specific<T, A2, C2>&& other) {
-#if TBB_USE_ASSERT
-            // this tests is_compatible_ets
-            __TBB_STATIC_ASSERT( (internal::is_compatible_ets<T, typename internal::strip<decltype(other)>::type>::value), "is_compatible_ets fails" );
-#endif
-            my_construct_callback = other.my_construct_callback;
-            other.my_construct_callback = NULL;
-            __TBB_ASSERT(my_locals.size()==0,NULL);
-            my_locals.reserve(other.size());
-            this->table_elementwise_copy( other, create_local_by_move );
-        }
-#endif
-
-    public:
-
-        enumerable_thread_specific( const enumerable_thread_specific& other )
-        : internal::ets_base<ETS_key_type>() /* prevents GCC warnings with -Wextra */
-        {
-            internal_copy(other);
-        }
-
-        template<typename Alloc, ets_key_usage_type Cachetype>
-        enumerable_thread_specific( const enumerable_thread_specific<T, Alloc, Cachetype>& other )
-        {
-            internal_copy(other);
-        }
-
-#if __TBB_ETS_USE_CPP11
-        enumerable_thread_specific( enumerable_thread_specific&& other ) : my_construct_callback()
-        {
-            internal_swap(other);
-        }
-
-        template<typename Alloc, ets_key_usage_type Cachetype>
-        enumerable_thread_specific( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) : my_construct_callback()
-        {
-            internal_move(std::move(other));
-        }
-#endif
-
-        enumerable_thread_specific& operator=( const enumerable_thread_specific& other )
-        {
-            if( this != &other ) {
-                this->clear();
-                my_construct_callback->destroy();
-                internal_copy( other );
-            }
-            return *this;
-        }
-
-        template<typename Alloc, ets_key_usage_type Cachetype>
-        enumerable_thread_specific& operator=( const enumerable_thread_specific<T, Alloc, Cachetype>& other )
-        {
-            __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), NULL ); // Objects of different types
-            this->clear();
-            my_construct_callback->destroy();
-            internal_copy(other);
-            return *this;
-        }
-
-#if __TBB_ETS_USE_CPP11
-        enumerable_thread_specific& operator=( enumerable_thread_specific&& other )
-        {
-            if( this != &other )
-                internal_swap(other);
-            return *this;
-        }
-
-        template<typename Alloc, ets_key_usage_type Cachetype>
-        enumerable_thread_specific& operator=( enumerable_thread_specific<T, Alloc, Cachetype>&& other )
-        {
-            __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), NULL ); // Objects of different types
-            this->clear();
-            my_construct_callback->destroy();
-            internal_move(std::move(other));
-            return *this;
-        }
-#endif
-
-        // combine_func_t has signature T(T,T) or T(const T&, const T&)
-        template <typename combine_func_t>
-        T combine(combine_func_t f_combine) {
-            if(begin() == end()) {
-                internal::ets_element<T> location;
-                my_construct_callback->construct(location.value());
-                return *location.value_committed();
-            }
-            const_iterator ci = begin();
-            T my_result = *ci;
-            while(++ci != end())
-                my_result = f_combine( my_result, *ci );
-            return my_result;
-        }
-
-        // combine_func_t takes T by value or by [const] reference, and returns nothing
-        template <typename combine_func_t>
-        void combine_each(combine_func_t f_combine) {
-            for(iterator ci = begin(); ci != end(); ++ci) {
-                f_combine( *ci );
-            }
-        }
-
-    }; // enumerable_thread_specific
-
-    template< typename Container >
-    class flattened2d {
-
-        // This intermediate typedef is to address issues with VC7.1 compilers
-        typedef typename Container::value_type conval_type;
-
-    public:
-
-        //! Basic types
-        typedef typename conval_type::size_type size_type;
-        typedef typename conval_type::difference_type difference_type;
-        typedef typename conval_type::allocator_type allocator_type;
-        typedef typename conval_type::value_type value_type;
-        typedef typename conval_type::reference reference;
-        typedef typename conval_type::const_reference const_reference;
-        typedef typename conval_type::pointer pointer;
-        typedef typename conval_type::const_pointer const_pointer;
-
-        typedef typename internal::segmented_iterator<Container, value_type> iterator;
-        typedef typename internal::segmented_iterator<Container, const value_type> const_iterator;
-
-        flattened2d( const Container &c, typename Container::const_iterator b, typename Container::const_iterator e ) :
-            my_container(const_cast<Container*>(&c)), my_begin(b), my_end(e) { }
-
-        explicit flattened2d( const Container &c ) :
-            my_container(const_cast<Container*>(&c)), my_begin(c.begin()), my_end(c.end()) { }
-
-        iterator begin() { return iterator(*my_container) = my_begin; }
-        iterator end() { return iterator(*my_container) = my_end; }
-        const_iterator begin() const { return const_iterator(*my_container) = my_begin; }
-        const_iterator end() const { return const_iterator(*my_container) = my_end; }
-
-        size_type size() const {
-            size_type tot_size = 0;
-            for(typename Container::const_iterator i = my_begin; i != my_end; ++i) {
-                tot_size += i->size();
-            }
-            return tot_size;
-        }
-
-    private:
-
-        Container *my_container;
-        typename Container::const_iterator my_begin;
-        typename Container::const_iterator my_end;
-
-    };
-
-    template <typename Container>
-    flattened2d<Container> flatten2d(const Container &c, const typename Container::const_iterator b, const typename Container::const_iterator e) {
-        return flattened2d<Container>(c, b, e);
-    }
-
-    template <typename Container>
-    flattened2d<Container> flatten2d(const Container &c) {
-        return flattened2d<Container>(c);
-    }
-
-} // interface6
-
-namespace internal {
-using interface6::internal::segmented_iterator;
-}
-
-using interface6::enumerable_thread_specific;
-using interface6::flattened2d;
-using interface6::flatten2d;
-
-} // namespace tbb
-
-#endif
diff --git a/lib/3rdParty/tbb/include/tbb/flow_graph.h b/lib/3rdParty/tbb/include/tbb/flow_graph.h
deleted file mode 100644
index 7b03c0e2..00000000
--- a/lib/3rdParty/tbb/include/tbb/flow_graph.h
+++ /dev/null
@@ -1,4075 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_flow_graph_H
-#define __TBB_flow_graph_H
-
-#include "tbb_stddef.h"
-#include "atomic.h"
-#include "spin_mutex.h"
-#include "null_mutex.h"
-#include "spin_rw_mutex.h"
-#include "null_rw_mutex.h"
-#include "task.h"
-#include "cache_aligned_allocator.h"
-#include "tbb_exception.h"
-#include "internal/_template_helpers.h"
-#include "internal/_aggregator_impl.h"
-#include "tbb_profiling.h"
-#include "task_arena.h"
-#include "flow_graph_abstractions.h"
-
-#if __TBB_PREVIEW_ASYNC_MSG
-#include <vector>    // std::vector in internal::async_storage
-#include <memory>    // std::shared_ptr in async_msg
-#endif
-
-#if __TBB_PREVIEW_STREAMING_NODE
-// For streaming_node
-#include <array>            // std::array
-#include <unordered_map>    // std::unordered_map
-#include <type_traits>      // std::decay, std::true_type, std::false_type
-#endif // __TBB_PREVIEW_STREAMING_NODE
-
-#if TBB_DEPRECATED_FLOW_ENQUEUE
-#define FLOW_SPAWN(a) tbb::task::enqueue((a))
-#else
-#define FLOW_SPAWN(a) tbb::task::spawn((a))
-#endif
-
-// use the VC10 or gcc version of tuple if it is available.
-#if __TBB_CPP11_TUPLE_PRESENT
-    #include <tuple>
-namespace tbb {
-    namespace flow {
-        using std::tuple;
-        using std::tuple_size;
-        using std::tuple_element;
-        using std::get;
-    }
-}
-#else
-    #include "compat/tuple"
-#endif
-
-#include<list>
-#include<queue>
-
-/** @file
-  \brief The graph related classes and functions
-
-  There are some applications that best express dependencies as messages
-  passed between nodes in a graph.  These messages may contain data or
-  simply act as signals that a predecessors has completed. The graph
-  class and its associated node classes can be used to express such
-  applications.
-*/
-
-namespace tbb {
-namespace flow {
-
-//! An enumeration the provides the two most common concurrency levels: unlimited and serial
-enum concurrency { unlimited = 0, serial = 1 };
-
-namespace internal {
-static tbb::task * const SUCCESSFULLY_ENQUEUED = (task *)-1;
-}
-
-namespace interface9 {
-
-using tbb::flow::internal::SUCCESSFULLY_ENQUEUED;
-
-namespace internal {
-    template<typename T, typename M> class successor_cache;
-    template<typename T, typename M> class broadcast_cache;
-    template<typename T, typename M> class round_robin_cache;
-    template<typename T, typename M> class predecessor_cache;
-    template<typename T, typename M> class reservable_predecessor_cache;
-}
-
-//A generic null type
-struct null_type {};
-
-//! An empty class used for messages that mean "I'm done"
-class continue_msg {};
-
-template< typename T > class sender;
-template< typename T > class receiver;
-class continue_receiver;
-
-template< typename T > class limiter_node;  // needed for resetting decrementer
-template< typename R, typename B > class run_and_put_task;
-
-// flags to modify the behavior of the graph reset().  Can be combined.
-enum reset_flags {
-    rf_reset_protocol   = 0,
-    rf_reset_bodies     = 1<<0,  // delete the current node body, reset to a copy of the initial node body.
-    rf_clear_edges      = 1<<1   // delete edges
-};
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-//* holder of edges both for caches and for those nodes which do not have predecessor caches.
-// C == receiver< ... > or sender< ... >, depending.
-namespace internal {
-template<typename C>
-class edge_container {
-
-public:
-    typedef std::list<C *, tbb::tbb_allocator<C *> > edge_list_type;
-
-    void add_edge( C &s) {
-        built_edges.push_back( &s );
-    }
-
-    void delete_edge( C &s) {
-        for ( typename edge_list_type::iterator i = built_edges.begin(); i != built_edges.end(); ++i ) {
-            if ( *i == &s )  {
-                (void)built_edges.erase(i);
-                return;  // only remove one predecessor per request
-            }
-        }
-    }
-
-    void copy_edges( edge_list_type &v) {
-        v = built_edges;
-    }
-
-    size_t edge_count() {
-        return (size_t)(built_edges.size());
-    }
-
-    void clear() {
-        built_edges.clear();
-    }
-
-    // methods remove the statement from all predecessors/successors liste in the edge
-    // container.
-    template< typename S > void sender_extract( S &s ); 
-    template< typename R > void receiver_extract( R &r ); 
-
-private: 
-    edge_list_type built_edges;
-};  // class edge_container
-}  // namespace internal
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-#if __TBB_PREVIEW_ASYNC_MSG
-
-#include "internal/_flow_graph_async_msg_impl.h"
-
-namespace internal {
-
-class untyped_receiver;
-
-class untyped_sender {
-    template< typename, typename > friend class internal::predecessor_cache;
-    template< typename, typename > friend class internal::reservable_predecessor_cache;
-public:
-    //! The successor type for this node
-    typedef untyped_receiver successor_type;
-
-    virtual ~untyped_sender() {}
-
-    // NOTE: Following part of PUBLIC section is copy-paste from original sender<T> class
-
-    // TODO: Prevent untyped successor registration
-
-    //! Add a new successor to this node
-    virtual bool register_successor( successor_type &r ) = 0;
-
-    //! Removes a successor from this node
-    virtual bool remove_successor( successor_type &r ) = 0;
-
-    //! Releases the reserved item
-    virtual bool try_release( ) { return false; }
-
-    //! Consumes the reserved item
-    virtual bool try_consume( ) { return false; }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    //! interface to record edges for traversal & deletion
-    typedef internal::edge_container<successor_type> built_successors_type;
-    typedef built_successors_type::edge_list_type successor_list_type;
-    virtual built_successors_type &built_successors()                   = 0;
-    virtual void    internal_add_built_successor( successor_type & )    = 0;
-    virtual void    internal_delete_built_successor( successor_type & ) = 0;
-    virtual void    copy_successors( successor_list_type &)             = 0;
-    virtual size_t  successor_count()                                   = 0;
-#endif
-protected:
-    //! Request an item from the sender
-    template< typename X >
-    bool try_get( X &t ) {
-        return try_get_wrapper( internal::async_helpers<X>::to_void_ptr(t), internal::async_helpers<X>::is_async_type );
-    }
-
-    //! Reserves an item in the sender
-    template< typename X >
-    bool try_reserve( X &t ) {
-        return try_reserve_wrapper( internal::async_helpers<X>::to_void_ptr(t), internal::async_helpers<X>::is_async_type );
-    }
-
-    virtual bool try_get_wrapper( void* p, bool is_async ) = 0;
-    virtual bool try_reserve_wrapper( void* p, bool is_async ) = 0;
-};
-
-class untyped_receiver  {
-    template< typename, typename > friend class run_and_put_task;
-    template< typename > friend class limiter_node;
-
-    template< typename, typename > friend class internal::broadcast_cache;
-    template< typename, typename > friend class internal::round_robin_cache;
-    template< typename, typename > friend class internal::successor_cache;
-
-#if __TBB_PREVIEW_OPENCL_NODE
-    template< typename, typename > friend class proxy_dependency_receiver;
-#endif /* __TBB_PREVIEW_OPENCL_NODE */
-public:
-    //! The predecessor type for this node
-    typedef untyped_sender predecessor_type;
-
-    //! Destructor
-    virtual ~untyped_receiver() {}
-
-    //! Put an item to the receiver
-    template<typename X>
-    bool try_put(const X& t) {
-        task *res = try_put_task(t);
-        if (!res) return false;
-        if (res != SUCCESSFULLY_ENQUEUED) FLOW_SPAWN(*res);
-        return true;
-    }
-
-    // NOTE: Following part of PUBLIC section is copy-paste from original receiver<T> class
-
-    // TODO: Prevent untyped predecessor registration
-
-    //! Add a predecessor to the node
-    virtual bool register_predecessor( predecessor_type & ) { return false; }
-
-    //! Remove a predecessor from the node
-    virtual bool remove_predecessor( predecessor_type & ) { return false; }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    typedef internal::edge_container<predecessor_type> built_predecessors_type;
-    typedef built_predecessors_type::edge_list_type predecessor_list_type;
-    virtual built_predecessors_type &built_predecessors()                  = 0;
-    virtual void   internal_add_built_predecessor( predecessor_type & )    = 0;
-    virtual void   internal_delete_built_predecessor( predecessor_type & ) = 0;
-    virtual void   copy_predecessors( predecessor_list_type & )            = 0;
-    virtual size_t predecessor_count()                                     = 0;
-#endif
-protected:
-    template<typename X>
-    task *try_put_task(const X& t) {
-        return try_put_task_wrapper( internal::async_helpers<X>::to_void_ptr(t), internal::async_helpers<X>::is_async_type );
-    }
-
-    virtual task* try_put_task_wrapper( const void* p, bool is_async ) = 0;
-
-    // NOTE: Following part of PROTECTED and PRIVATE sections is copy-paste from original receiver<T> class
-
-    //! put receiver back in initial state
-    virtual void reset_receiver(reset_flags f = rf_reset_protocol) = 0;
-
-    virtual bool is_continue_receiver() { return false; }
-};
-
-} // namespace internal
-
-//! Pure virtual template class that defines a sender of messages of type T
-template< typename T >
-class sender : public internal::untyped_sender {
-public:
-    //! The output type of this sender
-    typedef T output_type;
-
-    typedef typename internal::async_helpers<T>::filtered_type filtered_type;
-
-    //! Request an item from the sender
-    virtual bool try_get( T & ) { return false; }
-
-    //! Reserves an item in the sender
-    virtual bool try_reserve( T & ) { return false; }
-
-protected:
-    virtual bool try_get_wrapper( void* p, bool is_async ) __TBB_override {
-        // Both async OR both are NOT async
-        if ( internal::async_helpers<T>::is_async_type == is_async ) {
-            return try_get( internal::async_helpers<T>::from_void_ptr(p) );
-        }
-        // Else: this (T) is async OR incoming 't' is async
-        __TBB_ASSERT(false, "async_msg interface does not support 'pull' protocol in try_get()");
-        return false;
-    }
-
-    virtual bool try_reserve_wrapper( void* p, bool is_async ) __TBB_override {
-        // Both async OR both are NOT async
-        if ( internal::async_helpers<T>::is_async_type == is_async ) {
-            return try_reserve( internal::async_helpers<T>::from_void_ptr(p) );
-        }
-        // Else: this (T) is async OR incoming 't' is async
-        __TBB_ASSERT(false, "async_msg interface does not support 'pull' protocol in try_reserve()");
-        return false;
-    }
-};  // class sender<T>
-
-//! Pure virtual template class that defines a receiver of messages of type T
-template< typename T >
-class receiver : public internal::untyped_receiver {
-    template< typename > friend class internal::async_storage;
-    template< typename, typename > friend struct internal::async_helpers;
-public:
-    //! The input type of this receiver
-    typedef T input_type;
-
-    typedef typename internal::async_helpers<T>::filtered_type filtered_type;
-
-    //! Put an item to the receiver
-    bool try_put( const typename internal::async_helpers<T>::filtered_type& t ) {
-        return internal::untyped_receiver::try_put(t);
-    }
-
-    bool try_put( const typename internal::async_helpers<T>::async_type& t ) {
-        return internal::untyped_receiver::try_put(t);
-    }
-
-protected:
-    virtual task* try_put_task_wrapper( const void *p, bool is_async ) __TBB_override {
-        return internal::async_helpers<T>::try_put_task_wrapper_impl(this, p, is_async);
-    }
-
-    //! Put item to successor; return task to run the successor if possible.
-    virtual task *try_put_task(const T& t) = 0;
-
-}; // class receiver<T>
-
-#else // __TBB_PREVIEW_ASYNC_MSG
-
-//! Pure virtual template class that defines a sender of messages of type T
-template< typename T >
-class sender {
-public:
-    //! The output type of this sender
-    typedef T output_type;
-
-    //! The successor type for this node
-    typedef receiver<T> successor_type;
-
-    virtual ~sender() {}
-
-    // NOTE: Following part of PUBLIC section is partly copy-pasted in sender<T> under #if __TBB_PREVIEW_ASYNC_MSG
-
-    //! Add a new successor to this node
-    virtual bool register_successor( successor_type &r ) = 0;
-
-    //! Removes a successor from this node
-    virtual bool remove_successor( successor_type &r ) = 0;
-
-    //! Request an item from the sender
-    virtual bool try_get( T & ) { return false; }
-
-    //! Reserves an item in the sender
-    virtual bool try_reserve( T & ) { return false; }
-
-    //! Releases the reserved item
-    virtual bool try_release( ) { return false; }
-
-    //! Consumes the reserved item
-    virtual bool try_consume( ) { return false; }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    //! interface to record edges for traversal & deletion
-    typedef typename  internal::edge_container<successor_type> built_successors_type;
-    typedef typename  built_successors_type::edge_list_type successor_list_type;
-    virtual built_successors_type &built_successors()                   = 0;
-    virtual void    internal_add_built_successor( successor_type & )    = 0;
-    virtual void    internal_delete_built_successor( successor_type & ) = 0;
-    virtual void    copy_successors( successor_list_type &)             = 0;
-    virtual size_t  successor_count()                                   = 0;
-#endif
-};  // class sender<T>
-
-//! Pure virtual template class that defines a receiver of messages of type T
-template< typename T >
-class receiver {
-public:
-    //! The input type of this receiver
-    typedef T input_type;
-
-    //! The predecessor type for this node
-    typedef sender<T> predecessor_type;
-
-    //! Destructor
-    virtual ~receiver() {}
-
-    //! Put an item to the receiver
-    bool try_put( const T& t ) {
-        task *res = try_put_task(t);
-        if (!res) return false;
-        if (res != SUCCESSFULLY_ENQUEUED) FLOW_SPAWN(*res);
-        return true;
-    }
-
-    //! put item to successor; return task to run the successor if possible.
-protected:
-    template< typename R, typename B > friend class run_and_put_task;
-    template< typename X, typename Y > friend class internal::broadcast_cache;
-    template< typename X, typename Y > friend class internal::round_robin_cache;
-    virtual task *try_put_task(const T& t) = 0;
-public:
-    // NOTE: Following part of PUBLIC and PROTECTED sections is copy-pasted in receiver<T> under #if __TBB_PREVIEW_ASYNC_MSG
-
-    //! Add a predecessor to the node
-    virtual bool register_predecessor( predecessor_type & ) { return false; }
-
-    //! Remove a predecessor from the node
-    virtual bool remove_predecessor( predecessor_type & ) { return false; }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    typedef typename internal::edge_container<predecessor_type> built_predecessors_type;
-    typedef typename built_predecessors_type::edge_list_type predecessor_list_type;
-    virtual built_predecessors_type &built_predecessors()                  = 0;
-    virtual void   internal_add_built_predecessor( predecessor_type & )    = 0;
-    virtual void   internal_delete_built_predecessor( predecessor_type & ) = 0;
-    virtual void   copy_predecessors( predecessor_list_type & )            = 0;
-    virtual size_t predecessor_count()                                     = 0;
-#endif
-
-protected:
-    //! put receiver back in initial state
-    template<typename U> friend class limiter_node;
-    virtual void reset_receiver(reset_flags f = rf_reset_protocol) = 0;
-
-    template<typename TT, typename M> friend class internal::successor_cache;
-    virtual bool is_continue_receiver() { return false; }
-
-#if __TBB_PREVIEW_OPENCL_NODE
-    template< typename, typename > friend class proxy_dependency_receiver;
-#endif /* __TBB_PREVIEW_OPENCL_NODE */
-}; // class receiver<T>
-
-#endif // __TBB_PREVIEW_ASYNC_MSG
-
-// enqueue left task if necessary.  Returns the non-enqueued task if there is one.
-static inline tbb::task *combine_tasks( tbb::task * left, tbb::task * right) {
-    // if no RHS task, don't change left.
-    if(right == NULL) return left;
-    // right != NULL
-    if(left == NULL) return right;
-    if(left == SUCCESSFULLY_ENQUEUED) return right;
-    // left contains a task
-    if(right != SUCCESSFULLY_ENQUEUED) {
-        // both are valid tasks
-        FLOW_SPAWN(*left);
-        return right;
-    }
-    return left;
-}
-
-//! Base class for receivers of completion messages
-/** These receivers automatically reset, but cannot be explicitly waited on */
-class continue_receiver : public receiver< continue_msg > {
-public:
-
-    //! The input type
-    typedef continue_msg input_type;
-
-    //! The predecessor type for this node
-    typedef receiver<input_type>::predecessor_type predecessor_type;
-
-    //! Constructor
-    explicit continue_receiver( int number_of_predecessors = 0 ) {
-        my_predecessor_count = my_initial_predecessor_count = number_of_predecessors;
-        my_current_count = 0;
-    }
-
-    //! Copy constructor
-    continue_receiver( const continue_receiver& src ) : receiver<continue_msg>() {
-        my_predecessor_count = my_initial_predecessor_count = src.my_initial_predecessor_count;
-        my_current_count = 0;
-    }
-
-    //! Increments the trigger threshold
-    bool register_predecessor( predecessor_type & ) __TBB_override {
-        spin_mutex::scoped_lock l(my_mutex);
-        ++my_predecessor_count;
-        return true;
-    }
-
-    //! Decrements the trigger threshold
-    /** Does not check to see if the removal of the predecessor now makes the current count
-        exceed the new threshold.  So removing a predecessor while the graph is active can cause
-        unexpected results. */
-    bool remove_predecessor( predecessor_type & ) __TBB_override {
-        spin_mutex::scoped_lock l(my_mutex);
-        --my_predecessor_count;
-        return true;
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    typedef internal::edge_container<predecessor_type> built_predecessors_type;
-    typedef built_predecessors_type::edge_list_type predecessor_list_type;
-    built_predecessors_type &built_predecessors() __TBB_override { return my_built_predecessors; }
-
-    void internal_add_built_predecessor( predecessor_type &s) __TBB_override {
-        spin_mutex::scoped_lock l(my_mutex);
-        my_built_predecessors.add_edge( s );
-    }
-
-    void internal_delete_built_predecessor( predecessor_type &s) __TBB_override {
-        spin_mutex::scoped_lock l(my_mutex);
-        my_built_predecessors.delete_edge(s);
-    }
-
-    void copy_predecessors( predecessor_list_type &v) __TBB_override {
-        spin_mutex::scoped_lock l(my_mutex);
-        my_built_predecessors.copy_edges(v);
-    }
-
-    size_t predecessor_count() __TBB_override {
-        spin_mutex::scoped_lock l(my_mutex);
-        return my_built_predecessors.edge_count();
-    }
-
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-protected:
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class internal::broadcast_cache;
-    template<typename X, typename Y> friend class internal::round_robin_cache;
-    // execute body is supposed to be too small to create a task for.
-    task *try_put_task( const input_type & ) __TBB_override {
-        {
-            spin_mutex::scoped_lock l(my_mutex);
-            if ( ++my_current_count < my_predecessor_count )
-                return SUCCESSFULLY_ENQUEUED;
-            else
-                my_current_count = 0;
-        }
-        task * res = execute();
-        return res? res : SUCCESSFULLY_ENQUEUED;
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    // continue_receiver must contain its own built_predecessors because it does
-    // not have a node_cache.
-    built_predecessors_type my_built_predecessors;
-#endif
-    spin_mutex my_mutex;
-    int my_predecessor_count;
-    int my_current_count;
-    int my_initial_predecessor_count;
-    // the friend declaration in the base class did not eliminate the "protected class"
-    // error in gcc 4.1.2
-    template<typename U> friend class limiter_node;
-
-    void reset_receiver( reset_flags f ) __TBB_override {
-        my_current_count = 0;
-        if (f & rf_clear_edges) {
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-            my_built_predecessors.clear();
-#endif
-            my_predecessor_count = my_initial_predecessor_count;
-        }
-    }
-
-    //! Does whatever should happen when the threshold is reached
-    /** This should be very fast or else spawn a task.  This is
-        called while the sender is blocked in the try_put(). */
-    virtual task * execute() = 0;
-    template<typename TT, typename M> friend class internal::successor_cache;
-    bool is_continue_receiver() __TBB_override { return true; }
-
-}; // class continue_receiver
-}  // interface9
-
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-    template <typename K, typename T>
-    K key_from_message( const T &t ) {
-        return t.key();
-    }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-
-    using interface9::sender;
-    using interface9::receiver;
-    using interface9::continue_receiver;
-}  // flow
-}  // tbb
-
-#include "internal/_flow_graph_trace_impl.h"
-#include "internal/_tbb_hash_compare_impl.h"
-
-namespace tbb {
-namespace flow {
-namespace interface9 {
-
-#include "internal/_flow_graph_impl.h"
-#include "internal/_flow_graph_types_impl.h"
-using namespace internal::graph_policy_namespace;
-
-class graph;
-class graph_node;
-
-template <typename GraphContainerType, typename GraphNodeType>
-class graph_iterator {
-    friend class graph;
-    friend class graph_node;
-public:
-    typedef size_t size_type;
-    typedef GraphNodeType value_type;
-    typedef GraphNodeType* pointer;
-    typedef GraphNodeType& reference;
-    typedef const GraphNodeType& const_reference;
-    typedef std::forward_iterator_tag iterator_category;
-
-    //! Default constructor
-    graph_iterator() : my_graph(NULL), current_node(NULL) {}
-
-    //! Copy constructor
-    graph_iterator(const graph_iterator& other) :
-        my_graph(other.my_graph), current_node(other.current_node)
-    {}
-
-    //! Assignment
-    graph_iterator& operator=(const graph_iterator& other) {
-        if (this != &other) {
-            my_graph = other.my_graph;
-            current_node = other.current_node;
-        }
-        return *this;
-    }
-
-    //! Dereference
-    reference operator*() const;
-
-    //! Dereference
-    pointer operator->() const;
-
-    //! Equality
-    bool operator==(const graph_iterator& other) const {
-        return ((my_graph == other.my_graph) && (current_node == other.current_node));
-    }
-
-    //! Inequality
-    bool operator!=(const graph_iterator& other) const { return !(operator==(other)); }
-
-    //! Pre-increment
-    graph_iterator& operator++() {
-        internal_forward();
-        return *this;
-    }
-
-    //! Post-increment
-    graph_iterator operator++(int) {
-        graph_iterator result = *this;
-        operator++();
-        return result;
-    }
-
-private:
-    // the graph over which we are iterating
-    GraphContainerType *my_graph;
-    // pointer into my_graph's my_nodes list
-    pointer current_node;
-
-    //! Private initializing constructor for begin() and end() iterators
-    graph_iterator(GraphContainerType *g, bool begin);
-    void internal_forward();
-};  // class graph_iterator
-
-//! The graph class
-/** This class serves as a handle to the graph */
-class graph : tbb::internal::no_copy, public graph_proxy {
-    friend class graph_node;
-
-    template< typename Body >
-    class run_task : public task {
-    public:
-        run_task( Body& body ) : my_body(body) {}
-        task *execute() __TBB_override {
-            my_body();
-            return NULL;
-        }
-    private:
-        Body my_body;
-    };
-
-    template< typename Receiver, typename Body >
-    class run_and_put_task : public task {
-    public:
-        run_and_put_task( Receiver &r, Body& body ) : my_receiver(r), my_body(body) {}
-        task *execute() __TBB_override {
-            task *res = my_receiver.try_put_task( my_body() );
-            if (res == SUCCESSFULLY_ENQUEUED) res = NULL;
-            return res;
-        }
-    private:
-        Receiver &my_receiver;
-        Body my_body;
-    };
-    typedef std::list<task *> task_list_type;
-
-    class wait_functor {
-        task* graph_root_task;
-    public:
-        wait_functor( task* t ) : graph_root_task(t) {}
-        void operator()() const { graph_root_task->wait_for_all(); }
-    };
-
-    void prepare_task_arena( bool reinit = false ) {
-        if (reinit) {
-            __TBB_ASSERT( my_task_arena, "task arena is NULL");
-            my_task_arena->terminate();
-            my_task_arena->initialize(tbb::task_arena::attach());
-        } else {
-            __TBB_ASSERT(my_task_arena == NULL, "task arena is not NULL");
-            my_task_arena = new tbb::task_arena(tbb::task_arena::attach());
-        }
-        if (!my_task_arena->is_active()) // failed to attach
-            my_task_arena->initialize(); // create a new, default-initialized arena
-        __TBB_ASSERT(my_task_arena->is_active(), "task arena is not active");
-    }
-
-public:
-    //! Constructs a graph with isolated task_group_context
-    graph() : my_nodes(NULL), my_nodes_last(NULL), my_task_arena(NULL) {
-        prepare_task_arena();
-        own_context = true;
-        cancelled = false;
-        caught_exception = false;
-        my_context = new task_group_context();
-        my_root_task = ( new ( task::allocate_root(*my_context) ) empty_task );
-        my_root_task->set_ref_count(1);
-        tbb::internal::fgt_graph( this );
-        my_is_active = true;
-    }
-
-    //! Constructs a graph with use_this_context as context
-    explicit graph(task_group_context& use_this_context) :
-      my_context(&use_this_context), my_nodes(NULL), my_nodes_last(NULL), my_task_arena(NULL) {
-        prepare_task_arena();
-        own_context = false;
-        my_root_task = ( new ( task::allocate_root(*my_context) ) empty_task );
-        my_root_task->set_ref_count(1);
-        tbb::internal::fgt_graph( this );
-        my_is_active = true;
-    }
-
-    //! Destroys the graph.
-    /** Calls wait_for_all, then destroys the root task and context. */
-    ~graph() {
-        wait_for_all();
-        my_root_task->set_ref_count(0);
-        task::destroy( *my_root_task );
-        if (own_context) delete my_context;
-        delete my_task_arena;
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) {
-        tbb::internal::fgt_graph_desc( this, name );
-    }
-#endif
-
-    //! Used to register that an external entity may still interact with the graph.
-    /** The graph will not return from wait_for_all until a matching number of decrement_wait_count calls
-        is made. */
-    void increment_wait_count() {
-        if (my_root_task)
-            my_root_task->increment_ref_count();
-    }
-
-    //! Deregisters an external entity that may have interacted with the graph.
-    /** The graph will not return from wait_for_all until all the number of decrement_wait_count calls
-        matches the number of increment_wait_count calls. */
-    void decrement_wait_count() {
-        if (my_root_task)
-            my_root_task->decrement_ref_count();
-    }
-
-    void reserve_wait() __TBB_override {
-        increment_wait_count();
-    }
-
-    void release_wait() __TBB_override {
-        decrement_wait_count();
-    }
-
-    //! Spawns a task that runs a body and puts its output to a specific receiver
-    /** The task is spawned as a child of the graph. This is useful for running tasks
-        that need to block a wait_for_all() on the graph.  For example a one-off source. */
-    template< typename Receiver, typename Body >
-        void run( Receiver &r, Body body ) {
-            if(is_active()) {
-                FLOW_SPAWN( (* new ( task::allocate_additional_child_of( *root_task() ) )
-                   run_and_put_task< Receiver, Body >( r, body )) );
-            }
-    }
-
-    //! Spawns a task that runs a function object
-    /** The task is spawned as a child of the graph. This is useful for running tasks
-        that need to block a wait_for_all() on the graph. For example a one-off source. */
-    template< typename Body >
-    void run( Body body ) {
-        if(is_active()) {
-            FLOW_SPAWN( * new ( task::allocate_additional_child_of( *root_task() ) ) run_task< Body >( body ) );
-        }
-    }
-
-    //! Wait until graph is idle and decrement_wait_count calls equals increment_wait_count calls.
-    /** The waiting thread will go off and steal work while it is block in the wait_for_all. */
-    void wait_for_all() {
-        cancelled = false;
-        caught_exception = false;
-        if (my_root_task) {
-#if TBB_USE_EXCEPTIONS
-            try {
-#endif
-                my_task_arena->execute(wait_functor(my_root_task));
-                cancelled = my_context->is_group_execution_cancelled();
-#if TBB_USE_EXCEPTIONS
-            }
-            catch(...) {
-                my_root_task->set_ref_count(1);
-                my_context->reset();
-                caught_exception = true;
-                cancelled = true;
-                throw;
-            }
-#endif
-            // TODO: the "if" condition below is just a work-around to support the concurrent wait
-            // mode. The cancellation and exception mechanisms are still broken in this mode.
-            // Consider using task group not to re-implement the same functionality.
-            if ( !(my_context->traits() & task_group_context::concurrent_wait) ) {
-                my_context->reset();  // consistent with behavior in catch()
-                my_root_task->set_ref_count(1);
-            }
-        }
-    }
-
-    //! Returns the root task of the graph
-    task * root_task() {
-        return my_root_task;
-    }
-
-    void set_active(bool a = true) {
-       my_is_active = a;
-    }
-
-    bool is_active() {
-       return my_is_active;
-    }
-
-    void add_task_to_reset_list(task *tp) {
-        my_reset_task_list.push_back(tp);
-    }
-
-    // ITERATORS
-    template<typename C, typename N>
-    friend class graph_iterator;
-
-    // Graph iterator typedefs
-    typedef graph_iterator<graph,graph_node> iterator;
-    typedef graph_iterator<const graph,const graph_node> const_iterator;
-
-    // Graph iterator constructors
-    //! start iterator
-    iterator begin() { return iterator(this, true); }
-    //! end iterator
-    iterator end() { return iterator(this, false); }
-     //! start const iterator
-    const_iterator begin() const { return const_iterator(this, true); }
-    //! end const iterator
-    const_iterator end() const { return const_iterator(this, false); }
-    //! start const iterator
-    const_iterator cbegin() const { return const_iterator(this, true); }
-    //! end const iterator
-    const_iterator cend() const { return const_iterator(this, false); }
-
-    //! return status of graph execution
-    bool is_cancelled() { return cancelled; }
-    bool exception_thrown() { return caught_exception; }
-
-    // thread-unsafe state reset.
-    void reset(reset_flags f = rf_reset_protocol);
-
-private:
-    task *my_root_task;
-    task_group_context *my_context;
-    bool own_context;
-    bool cancelled;
-    bool caught_exception;
-    bool my_is_active;
-    task_list_type my_reset_task_list;
-
-    graph_node *my_nodes, *my_nodes_last;
-
-    spin_mutex nodelist_mutex;
-    void register_node(graph_node *n);
-    void remove_node(graph_node *n);
-
-    template < typename Input, typename Output, typename Policy, typename Allocator >
-    friend class async_node;
-    task_arena* my_task_arena;
-};  // class graph
-
-template <typename C, typename N>
-graph_iterator<C,N>::graph_iterator(C *g, bool begin) : my_graph(g), current_node(NULL)
-{
-    if (begin) current_node = my_graph->my_nodes;
-    //else it is an end iterator by default
-}
-
-template <typename C, typename N>
-typename graph_iterator<C,N>::reference graph_iterator<C,N>::operator*() const {
-    __TBB_ASSERT(current_node, "graph_iterator at end");
-    return *operator->();
-}
-
-template <typename C, typename N>
-typename graph_iterator<C,N>::pointer graph_iterator<C,N>::operator->() const {
-    return current_node;
-}
-
-template <typename C, typename N>
-void graph_iterator<C,N>::internal_forward() {
-    if (current_node) current_node = current_node->next;
-}
-
-//! The base of all graph nodes.
-class graph_node : tbb::internal::no_copy {
-    friend class graph;
-    template<typename C, typename N>
-    friend class graph_iterator;
-protected:
-    graph& my_graph;
-    graph_node *next, *prev;
-public:
-    explicit graph_node(graph& g) : my_graph(g) {
-        my_graph.register_node(this);
-    }
-    virtual ~graph_node() {
-        my_graph.remove_node(this);
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    virtual void set_name( const char *name ) = 0;
-#endif
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    virtual void extract( ) = 0;
-#endif
-
-protected:
-    // performs the reset on an individual node.
-    virtual void reset_node(reset_flags f=rf_reset_protocol) = 0;
-};  // class graph_node
-
-inline void graph::register_node(graph_node *n) {
-    n->next = NULL;
-    {
-        spin_mutex::scoped_lock lock(nodelist_mutex);
-        n->prev = my_nodes_last;
-        if (my_nodes_last) my_nodes_last->next = n;
-        my_nodes_last = n;
-        if (!my_nodes) my_nodes = n;
-    }
-}
-
-inline void graph::remove_node(graph_node *n) {
-    {
-        spin_mutex::scoped_lock lock(nodelist_mutex);
-        __TBB_ASSERT(my_nodes && my_nodes_last, "graph::remove_node: Error: no registered nodes");
-        if (n->prev) n->prev->next = n->next;
-        if (n->next) n->next->prev = n->prev;
-        if (my_nodes_last == n) my_nodes_last = n->prev;
-        if (my_nodes == n) my_nodes = n->next;
-    }
-    n->prev = n->next = NULL;
-}
-
-inline void graph::reset( reset_flags f ) {
-    // reset context
-    set_active(false);
-    if(my_context) my_context->reset();
-    cancelled = false;
-    caught_exception = false;
-    // reset all the nodes comprising the graph
-    for(iterator ii = begin(); ii != end(); ++ii) {
-        graph_node *my_p = &(*ii);
-        my_p->reset_node(f);
-    }
-    // Reattach the arena. Might be useful to run the graph in a particular task_arena
-    // while not limiting graph lifetime to a single task_arena::execute() call.
-    prepare_task_arena( /*reinit=*/true );
-    set_active(true);
-    // now spawn the tasks necessary to start the graph
-    for(task_list_type::iterator rti = my_reset_task_list.begin(); rti != my_reset_task_list.end(); ++rti) {
-        FLOW_SPAWN(*(*rti));
-    }
-    my_reset_task_list.clear();
-}
-
-#include "internal/_flow_graph_node_impl.h"
-
-//! An executable node that acts as a source, i.e. it has no predecessors
-template < typename Output >
-class source_node : public graph_node, public sender< Output > {
-public:
-    //! The type of the output message, which is complete
-    typedef Output output_type;
-
-    //! The type of successors of this node
-    typedef typename sender<output_type>::successor_type successor_type;
-
-    //Source node has no input type
-    typedef null_type input_type;
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    typedef typename sender<output_type>::built_successors_type built_successors_type;
-    typedef typename sender<output_type>::successor_list_type successor_list_type;
-#endif
-
-    //! Constructor for a node with a successor
-    template< typename Body >
-    source_node( graph &g, Body body, bool is_active = true )
-        : graph_node(g), my_active(is_active), init_my_active(is_active),
-        my_body( new internal::source_body_leaf< output_type, Body>(body) ),
-        my_init_body( new internal::source_body_leaf< output_type, Body>(body) ),
-        my_reserved(false), my_has_cached_item(false)
-    {
-        my_successors.set_owner(this);
-        tbb::internal::fgt_node_with_body( tbb::internal::FLOW_SOURCE_NODE, &this->my_graph,
-                                           static_cast<sender<output_type> *>(this), this->my_body );
-    }
-
-    //! Copy constructor
-    source_node( const source_node& src ) :
-        graph_node(src.my_graph), sender<Output>(),
-        my_active(src.init_my_active),
-        init_my_active(src.init_my_active), my_body( src.my_init_body->clone() ), my_init_body(src.my_init_body->clone() ),
-        my_reserved(false), my_has_cached_item(false)
-    {
-        my_successors.set_owner(this);
-        tbb::internal::fgt_node_with_body( tbb::internal::FLOW_SOURCE_NODE, &this->my_graph,
-                                           static_cast<sender<output_type> *>(this), this->my_body );
-    }
-
-    //! The destructor
-    ~source_node() { delete my_body; delete my_init_body; }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-
-    //! Add a new successor to this node
-    bool register_successor( successor_type &r ) __TBB_override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        my_successors.register_successor(r);
-        if ( my_active )
-            spawn_put();
-        return true;
-    }
-
-    //! Removes a successor from this node
-    bool remove_successor( successor_type &r ) __TBB_override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        my_successors.remove_successor(r);
-        return true;
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-
-    built_successors_type &built_successors() __TBB_override { return my_successors.built_successors(); }
-
-    void internal_add_built_successor( successor_type &r) __TBB_override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        my_successors.internal_add_built_successor(r);
-    }
-
-    void internal_delete_built_successor( successor_type &r) __TBB_override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        my_successors.internal_delete_built_successor(r);
-    }
-
-    size_t successor_count() __TBB_override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        return my_successors.successor_count();
-    }
-
-    void copy_successors(successor_list_type &v) __TBB_override {
-        spin_mutex::scoped_lock l(my_mutex);
-        my_successors.copy_successors(v);
-    }
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-    //! Request an item from the node
-    bool try_get( output_type &v ) __TBB_override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        if ( my_reserved )
-            return false;
-
-        if ( my_has_cached_item ) {
-            v = my_cached_item;
-            my_has_cached_item = false;
-            return true;
-        }
-        // we've been asked to provide an item, but we have none.  enqueue a task to
-        // provide one.
-        spawn_put();
-        return false;
-    }
-
-    //! Reserves an item.
-    bool try_reserve( output_type &v ) __TBB_override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        if ( my_reserved ) {
-            return false;
-        }
-
-        if ( my_has_cached_item ) {
-            v = my_cached_item;
-            my_reserved = true;
-            return true;
-        } else {
-            return false;
-        }
-    }
-
-    //! Release a reserved item.
-    /** true = item has been released and so remains in sender, dest must request or reserve future items */
-    bool try_release( ) __TBB_override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        __TBB_ASSERT( my_reserved && my_has_cached_item, "releasing non-existent reservation" );
-        my_reserved = false;
-        if(!my_successors.empty())
-            spawn_put();
-        return true;
-    }
-
-    //! Consumes a reserved item
-    bool try_consume( ) __TBB_override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        __TBB_ASSERT( my_reserved && my_has_cached_item, "consuming non-existent reservation" );
-        my_reserved = false;
-        my_has_cached_item = false;
-        if ( !my_successors.empty() ) {
-            spawn_put();
-        }
-        return true;
-    }
-
-    //! Activates a node that was created in the inactive state
-    void activate() {
-        spin_mutex::scoped_lock lock(my_mutex);
-        my_active = true;
-        if ( !my_successors.empty() )
-            spawn_put();
-    }
-
-    template<typename Body>
-    Body copy_function_object() {
-        internal::source_body<output_type> &body_ref = *this->my_body;
-        return dynamic_cast< internal::source_body_leaf<output_type, Body> & >(body_ref).get_body();
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    void extract( ) __TBB_override {
-        my_successors.built_successors().sender_extract(*this);   // removes "my_owner" == this from each successor
-        my_active = init_my_active;
-        my_reserved = false;
-        if(my_has_cached_item) my_has_cached_item = false;
-    }
-#endif
-
-protected:
-
-    //! resets the source_node to its initial state
-    void reset_node( reset_flags f) __TBB_override {
-        my_active = init_my_active;
-        my_reserved =false;
-        if(my_has_cached_item) {
-            my_has_cached_item = false;
-        }
-        if(f & rf_clear_edges) my_successors.clear();
-        if(f & rf_reset_bodies) {
-            internal::source_body<output_type> *tmp = my_init_body->clone();
-            delete my_body;
-            my_body = tmp;
-        }
-        if(my_active)
-            this->my_graph.add_task_to_reset_list(create_put_task());
-    }
-
-private:
-    spin_mutex my_mutex;
-    bool my_active;
-    bool init_my_active;
-    internal::source_body<output_type> *my_body;
-    internal::source_body<output_type> *my_init_body;
-    internal::broadcast_cache< output_type > my_successors;
-    bool my_reserved;
-    bool my_has_cached_item;
-    output_type my_cached_item;
-
-    // used by apply_body_bypass, can invoke body of node.
-    bool try_reserve_apply_body(output_type &v) {
-        spin_mutex::scoped_lock lock(my_mutex);
-        if ( my_reserved ) {
-            return false;
-        }
-        if ( !my_has_cached_item ) {
-            tbb::internal::fgt_begin_body( my_body );
-            bool r = (*my_body)(my_cached_item);
-            tbb::internal::fgt_end_body( my_body );
-            if (r) {
-                my_has_cached_item = true;
-            }
-        }
-        if ( my_has_cached_item ) {
-            v = my_cached_item;
-            my_reserved = true;
-            return true;
-        } else {
-            return false;
-        }
-    }
-
-    // when resetting, and if the source_node was created with my_active == true, then
-    // when we reset the node we must store a task to run the node, and spawn it only
-    // after the reset is complete and is_active() is again true.  This is why we don't
-    // test for is_active() here.
-    task* create_put_task() {
-        return ( new ( task::allocate_additional_child_of( *(this->my_graph.root_task()) ) )
-                        internal:: source_task_bypass < source_node< output_type > >( *this ) );
-    }
-
-    //! Spawns a task that applies the body
-    void spawn_put( ) {
-        if(this->my_graph.is_active()) {
-            FLOW_SPAWN( *create_put_task());
-        }
-    }
-
-    friend class internal::source_task_bypass< source_node< output_type > >;
-    //! Applies the body.  Returning SUCCESSFULLY_ENQUEUED okay; forward_task_bypass will handle it.
-    task * apply_body_bypass( ) {
-        output_type v;
-        if ( !try_reserve_apply_body(v) )
-            return NULL;
-
-        task *last_task = my_successors.try_put_task(v);
-        if ( last_task )
-            try_consume();
-        else
-            try_release();
-        return last_task;
-    }
-};  // class source_node
-
-template<typename T>
-struct allocate_buffer {
-    static const bool value = false;
-};
-
-template<>
-struct allocate_buffer<queueing> {
-    static const bool value = true;
-};
-
-//! Implements a function node that supports Input -> Output
-template < typename Input, typename Output = continue_msg, typename Policy = queueing, typename Allocator=cache_aligned_allocator<Input> >
-class function_node : public graph_node, public internal::function_input<Input,Output,Allocator>, public internal::function_output<Output> {
-public:
-    typedef Input input_type;
-    typedef Output output_type;
-    typedef internal::function_input<input_type,output_type,Allocator> fInput_type;
-    typedef internal::function_input_queue<input_type, Allocator> input_queue_type;
-    typedef internal::function_output<output_type> fOutput_type;
-    typedef typename fInput_type::predecessor_type predecessor_type;
-    typedef typename fOutput_type::successor_type successor_type;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    typedef typename fInput_type::predecessor_list_type predecessor_list_type;
-    typedef typename fOutput_type::successor_list_type successor_list_type;
-#endif
-    using fInput_type::my_predecessors;
-
-    //! Constructor
-    // input_queue_type is allocated here, but destroyed in the function_input_base.
-    // TODO: pass the graph_buffer_policy to the function_input_base so it can all
-    // be done in one place.  This would be an interface-breaking change.
-    template< typename Body >
-    function_node( graph &g, size_t concurrency, Body body ) :
-        graph_node(g), fInput_type(g, concurrency, body, allocate_buffer<Policy>::value ?
-               new input_queue_type( ) : NULL ) {
-        tbb::internal::fgt_node_with_body( tbb::internal::FLOW_FUNCTION_NODE, &this->my_graph,
-                static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this), this->my_body );
-    }
-
-    //! Copy constructor
-    function_node( const function_node& src ) :
-        graph_node(src.my_graph),
-        fInput_type(src, allocate_buffer<Policy>::value ? new input_queue_type : NULL),
-        fOutput_type() {
-        tbb::internal::fgt_node_with_body( tbb::internal::FLOW_FUNCTION_NODE, &this->my_graph,
-                static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this), this->my_body );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    void extract( ) __TBB_override {
-        my_predecessors.built_predecessors().receiver_extract(*this);
-        successors().built_successors().sender_extract(*this);
-    }
-#endif
-
-protected:
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class internal::broadcast_cache;
-    template<typename X, typename Y> friend class internal::round_robin_cache;
-    using fInput_type::try_put_task;
-
-    internal::broadcast_cache<output_type> &successors () __TBB_override { return fOutput_type::my_successors; }
-
-    void reset_node(reset_flags f) __TBB_override {
-        fInput_type::reset_function_input(f);
-        // TODO: use clear() instead.
-        if(f & rf_clear_edges) {
-            successors().clear();
-            my_predecessors.clear();
-        }
-        __TBB_ASSERT(!(f & rf_clear_edges) || successors().empty(), "function_node successors not empty");
-        __TBB_ASSERT(this->my_predecessors.empty(), "function_node predecessors not empty");
-    }
-
-};  // class function_node
-
-//! implements a function node that supports Input -> (set of outputs)
-// Output is a tuple of output types.
-template < typename Input, typename Output, typename Policy = queueing, typename Allocator=cache_aligned_allocator<Input> >
-class multifunction_node :
-    public graph_node,
-    public internal::multifunction_input
-    <
-        Input,
-        typename internal::wrap_tuple_elements<
-            tbb::flow::tuple_size<Output>::value,  // #elements in tuple
-            internal::multifunction_output,  // wrap this around each element
-            Output // the tuple providing the types
-        >::type,
-        Allocator
-    > {
-protected:
-    static const int N = tbb::flow::tuple_size<Output>::value;
-public:
-    typedef Input input_type;
-    typedef null_type output_type;
-    typedef typename internal::wrap_tuple_elements<N,internal::multifunction_output, Output>::type output_ports_type;
-    typedef internal::multifunction_input<input_type, output_ports_type, Allocator> fInput_type;
-    typedef internal::function_input_queue<input_type, Allocator> input_queue_type;
-private:
-    typedef typename internal::multifunction_input<input_type, output_ports_type, Allocator> base_type;
-    using fInput_type::my_predecessors;
-public:
-    template<typename Body>
-    multifunction_node( graph &g, size_t concurrency, Body body ) :
-        graph_node(g), base_type(g,concurrency, body,  allocate_buffer<Policy>::value ? new input_queue_type : NULL) {
-        tbb::internal::fgt_multioutput_node_with_body<N>( tbb::internal::FLOW_MULTIFUNCTION_NODE,
-                &this->my_graph, static_cast<receiver<input_type> *>(this),
-                this->output_ports(), this->my_body );
-    }
-
-    multifunction_node( const multifunction_node &other) :
-        graph_node(other.my_graph), base_type(other,  allocate_buffer<Policy>::value ? new input_queue_type : NULL) {
-        tbb::internal::fgt_multioutput_node_with_body<N>( tbb::internal::FLOW_MULTIFUNCTION_NODE,
-                &this->my_graph, static_cast<receiver<input_type> *>(this),
-                this->output_ports(), this->my_body );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_multioutput_node_desc( this, name );
-    }
-#endif
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    void extract( ) __TBB_override {
-        my_predecessors.built_predecessors().receiver_extract(*this);
-        base_type::extract();
-    }
-#endif
-    // all the guts are in multifunction_input...
-protected:
-    void reset_node(reset_flags f) __TBB_override { base_type::reset(f); }
-};  // multifunction_node
-
-//! split_node: accepts a tuple as input, forwards each element of the tuple to its
-//  successors.  The node has unlimited concurrency, so it does not reject inputs.
-template<typename TupleType, typename Allocator=cache_aligned_allocator<TupleType> >
-class split_node : public graph_node, public receiver<TupleType> {
-    static const int N = tbb::flow::tuple_size<TupleType>::value;
-    typedef receiver<TupleType> base_type;
-public:
-    typedef TupleType input_type;
-    typedef Allocator allocator_type;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    typedef typename base_type::predecessor_type predecessor_type;
-    typedef typename base_type::predecessor_list_type predecessor_list_type;
-    typedef internal::predecessor_cache<input_type, null_mutex > predecessor_cache_type;
-    typedef typename predecessor_cache_type::built_predecessors_type built_predecessors_type;
-#endif
-
-    typedef typename internal::wrap_tuple_elements<
-            N,  // #elements in tuple
-            internal::multifunction_output,  // wrap this around each element
-            TupleType // the tuple providing the types
-        >::type  output_ports_type;
-
-    explicit split_node(graph &g) : graph_node(g)
-    {
-        tbb::internal::fgt_multioutput_node<N>(tbb::internal::FLOW_SPLIT_NODE, &this->my_graph,
-            static_cast<receiver<input_type> *>(this), this->output_ports());
-    }
-    split_node( const split_node & other) : graph_node(other.my_graph), base_type(other)
-    {
-        tbb::internal::fgt_multioutput_node<N>(tbb::internal::FLOW_SPLIT_NODE, &this->my_graph,
-            static_cast<receiver<input_type> *>(this), this->output_ports());
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_multioutput_node_desc( this, name );
-    }
-#endif
-
-    output_ports_type &output_ports() { return my_output_ports; }
-
-protected:
-    task *try_put_task(const TupleType& t) __TBB_override {
-        // Sending split messages in parallel is not justified, as overheads would prevail
-        internal::emit_element<N>::emit_this(t, output_ports());
-
-        //we do not have successors here.So we just tell the task is successful.
-        return SUCCESSFULLY_ENQUEUED;
-    }
-    void reset_node(reset_flags f) __TBB_override {
-        if (f & rf_clear_edges)
-            internal::clear_element<N>::clear_this(my_output_ports);
-
-        __TBB_ASSERT(!(f & rf_clear_edges) || internal::clear_element<N>::this_empty(my_output_ports), "split_node reset failed");
-    }
-    void reset_receiver(reset_flags /*f*/) __TBB_override {}
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-private: //! split_node doesn't use this "predecessors" functionality; so, we have "dummies" here;
-    void extract() __TBB_override {}
-
-    //! Adds to list of predecessors added by make_edge
-    void internal_add_built_predecessor(predecessor_type&) __TBB_override {}
-
-    //! removes from to list of predecessors (used by remove_edge)
-    void internal_delete_built_predecessor(predecessor_type&) __TBB_override {}
-
-    size_t predecessor_count() __TBB_override { return 0; }
-
-    void copy_predecessors(predecessor_list_type&) __TBB_override {}
-
-    built_predecessors_type &built_predecessors() __TBB_override { return my_predessors; }
-
-    //! dummy member
-    built_predecessors_type my_predessors;
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-private:
-    output_ports_type my_output_ports;
-};
-
-//! Implements an executable node that supports continue_msg -> Output
-template <typename Output>
-class continue_node : public graph_node, public internal::continue_input<Output>, public internal::function_output<Output> {
-public:
-    typedef continue_msg input_type;
-    typedef Output output_type;
-    typedef internal::continue_input<Output> fInput_type;
-    typedef internal::function_output<output_type> fOutput_type;
-    typedef typename fInput_type::predecessor_type predecessor_type;
-    typedef typename fOutput_type::successor_type successor_type;
-
-    //! Constructor for executable node with continue_msg -> Output
-    template <typename Body >
-    continue_node( graph &g, Body body ) :
-        graph_node(g), internal::continue_input<output_type>( g, body ) {
-        tbb::internal::fgt_node_with_body( tbb::internal::FLOW_CONTINUE_NODE, &this->my_graph,
-                                           static_cast<receiver<input_type> *>(this),
-                                           static_cast<sender<output_type> *>(this), this->my_body );
-    }
-
-
-    //! Constructor for executable node with continue_msg -> Output
-    template <typename Body >
-    continue_node( graph &g, int number_of_predecessors, Body body ) :
-        graph_node(g), internal::continue_input<output_type>( g, number_of_predecessors, body ) {
-        tbb::internal::fgt_node_with_body( tbb::internal::FLOW_CONTINUE_NODE, &this->my_graph,
-                                           static_cast<receiver<input_type> *>(this),
-                                           static_cast<sender<output_type> *>(this), this->my_body );
-    }
-
-    //! Copy constructor
-    continue_node( const continue_node& src ) :
-        graph_node(src.my_graph), internal::continue_input<output_type>(src),
-        internal::function_output<Output>() {
-        tbb::internal::fgt_node_with_body( tbb::internal::FLOW_CONTINUE_NODE, &this->my_graph,
-                                           static_cast<receiver<input_type> *>(this),
-                                           static_cast<sender<output_type> *>(this), this->my_body );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    void extract() __TBB_override {
-        fInput_type::my_built_predecessors.receiver_extract(*this);
-        successors().built_successors().sender_extract(*this);
-    }
-#endif
-
-protected:
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class internal::broadcast_cache;
-    template<typename X, typename Y> friend class internal::round_robin_cache;
-    using fInput_type::try_put_task;
-    internal::broadcast_cache<output_type> &successors () __TBB_override { return fOutput_type::my_successors; }
-
-    void reset_node(reset_flags f) __TBB_override {
-        fInput_type::reset_receiver(f);
-        if(f & rf_clear_edges)successors().clear();
-        __TBB_ASSERT(!(f & rf_clear_edges) || successors().empty(), "continue_node not reset");
-    }
-};  // continue_node
-
-template< typename T >
-class overwrite_node : public graph_node, public receiver<T>, public sender<T> {
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    typedef typename receiver<input_type>::built_predecessors_type built_predecessors_type;
-    typedef typename sender<output_type>::built_successors_type built_successors_type;
-    typedef typename receiver<input_type>::predecessor_list_type predecessor_list_type;
-    typedef typename sender<output_type>::successor_list_type successor_list_type;
-#endif
-
-    explicit overwrite_node(graph &g) : graph_node(g), my_buffer_is_valid(false) {
-        my_successors.set_owner( this );
-        tbb::internal::fgt_node( tbb::internal::FLOW_OVERWRITE_NODE, &this->my_graph,
-                                 static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) );
-    }
-
-    // Copy constructor; doesn't take anything from src; default won't work
-    overwrite_node( const overwrite_node& src ) :
-        graph_node(src.my_graph), receiver<T>(), sender<T>(), my_buffer_is_valid(false)
-    {
-        my_successors.set_owner( this );
-        tbb::internal::fgt_node( tbb::internal::FLOW_OVERWRITE_NODE, &this->my_graph,
-                                 static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) );
-    }
-
-    ~overwrite_node() {}
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-
-    bool register_successor( successor_type &s ) __TBB_override {
-        spin_mutex::scoped_lock l( my_mutex );
-        if (my_buffer_is_valid && this->my_graph.is_active()) {
-            // We have a valid value that must be forwarded immediately.
-            if ( s.try_put( my_buffer ) || !s.register_predecessor( *this  ) ) {
-                // We add the successor: it accepted our put or it rejected it but won't let us become a predecessor
-                my_successors.register_successor( s );
-            } else {
-                // We don't add the successor: it rejected our put and we became its predecessor instead
-                return false;
-            }
-        } else {
-            // No valid value yet, just add as successor
-            my_successors.register_successor( s );
-        }
-        return true;
-    }
-
-    bool remove_successor( successor_type &s ) __TBB_override {
-        spin_mutex::scoped_lock l( my_mutex );
-        my_successors.remove_successor(s);
-        return true;
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    built_predecessors_type &built_predecessors() __TBB_override { return my_built_predecessors; }
-    built_successors_type   &built_successors()   __TBB_override { return my_successors.built_successors(); }
-
-    void internal_add_built_successor( successor_type &s) __TBB_override {
-        spin_mutex::scoped_lock l( my_mutex );
-        my_successors.internal_add_built_successor(s);
-    }
-
-    void internal_delete_built_successor( successor_type &s) __TBB_override {
-        spin_mutex::scoped_lock l( my_mutex );
-        my_successors.internal_delete_built_successor(s);
-    }
-
-    size_t successor_count() __TBB_override {
-        spin_mutex::scoped_lock l( my_mutex );
-        return my_successors.successor_count();
-    }
-
-    void copy_successors(successor_list_type &v) __TBB_override {
-        spin_mutex::scoped_lock l( my_mutex );
-        my_successors.copy_successors(v);
-    }
-
-    void internal_add_built_predecessor( predecessor_type &p) __TBB_override {
-        spin_mutex::scoped_lock l( my_mutex );
-        my_built_predecessors.add_edge(p);
-    }
-
-    void internal_delete_built_predecessor( predecessor_type &p) __TBB_override {
-        spin_mutex::scoped_lock l( my_mutex );
-        my_built_predecessors.delete_edge(p);
-    }
-
-    size_t predecessor_count() __TBB_override {
-        spin_mutex::scoped_lock l( my_mutex );
-        return my_built_predecessors.edge_count();
-    }
-
-    void copy_predecessors(predecessor_list_type &v) __TBB_override {
-        spin_mutex::scoped_lock l( my_mutex );
-        my_built_predecessors.copy_edges(v);
-    }
-
-    void extract() __TBB_override {
-        my_buffer_is_valid = false;
-        built_successors().sender_extract(*this);
-        built_predecessors().receiver_extract(*this);
-    }
-
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-    bool try_get( input_type &v ) __TBB_override {
-        spin_mutex::scoped_lock l( my_mutex );
-        if ( my_buffer_is_valid ) {
-            v = my_buffer;
-            return true;
-        }
-        return false;
-    }
-
-    bool is_valid() {
-       spin_mutex::scoped_lock l( my_mutex );
-       return my_buffer_is_valid;
-    }
-
-    void clear() {
-       spin_mutex::scoped_lock l( my_mutex );
-       my_buffer_is_valid = false;
-    }
-
-protected:
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class internal::broadcast_cache;
-    template<typename X, typename Y> friend class internal::round_robin_cache;
-    task * try_put_task( const input_type &v ) __TBB_override {
-        spin_mutex::scoped_lock l( my_mutex );
-        return try_put_task_impl(v);
-    }
-
-    task * try_put_task_impl(const input_type &v) {
-        my_buffer = v;
-        my_buffer_is_valid = true;
-        task * rtask = my_successors.try_put_task(v);
-        if (!rtask) rtask = SUCCESSFULLY_ENQUEUED;
-        return rtask;
-    }
-
-    spin_mutex my_mutex;
-    internal::broadcast_cache< input_type, null_rw_mutex > my_successors;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    internal::edge_container<predecessor_type> my_built_predecessors;
-#endif
-    input_type my_buffer;
-    bool my_buffer_is_valid;
-    void reset_receiver(reset_flags /*f*/) __TBB_override {}
-
-    void reset_node( reset_flags f) __TBB_override {
-        my_buffer_is_valid = false;
-       if (f&rf_clear_edges) {
-           my_successors.clear();
-       }
-    }
-};  // overwrite_node
-
-template< typename T >
-class write_once_node : public overwrite_node<T> {
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-
-    //! Constructor
-    explicit write_once_node(graph& g) : overwrite_node<T>(g) {
-        tbb::internal::fgt_node( tbb::internal::FLOW_WRITE_ONCE_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-    //! Copy constructor: call base class copy constructor
-    write_once_node( const write_once_node& src ) : overwrite_node<T>(src) {
-        tbb::internal::fgt_node( tbb::internal::FLOW_WRITE_ONCE_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-
-protected:
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class internal::broadcast_cache;
-    template<typename X, typename Y> friend class internal::round_robin_cache;
-    task *try_put_task( const T &v ) __TBB_override {
-        spin_mutex::scoped_lock l( this->my_mutex );
-        return this->my_buffer_is_valid ? NULL : this->try_put_task_impl(v);
-    }
-};
-
-//! Forwards messages of type T to all successors
-template <typename T>
-class broadcast_node : public graph_node, public receiver<T>, public sender<T> {
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    typedef typename receiver<input_type>::predecessor_list_type predecessor_list_type;
-    typedef typename sender<output_type>::successor_list_type successor_list_type;
-#endif
-private:
-    internal::broadcast_cache<input_type> my_successors;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    internal::edge_container<predecessor_type> my_built_predecessors;
-    spin_mutex pred_mutex;  // serialize accesses on edge_container
-#endif
-public:
-
-    explicit broadcast_node(graph& g) : graph_node(g) {
-        my_successors.set_owner( this );
-        tbb::internal::fgt_node( tbb::internal::FLOW_BROADCAST_NODE, &this->my_graph,
-                                 static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) );
-    }
-
-    // Copy constructor
-    broadcast_node( const broadcast_node& src ) :
-        graph_node(src.my_graph), receiver<T>(), sender<T>()
-    {
-        my_successors.set_owner( this );
-        tbb::internal::fgt_node( tbb::internal::FLOW_BROADCAST_NODE, &this->my_graph,
-                                 static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-
-    //! Adds a successor
-    bool register_successor( successor_type &r ) __TBB_override {
-        my_successors.register_successor( r );
-        return true;
-    }
-
-    //! Removes s as a successor
-    bool remove_successor( successor_type &r ) __TBB_override {
-        my_successors.remove_successor( r );
-        return true;
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    typedef typename sender<T>::built_successors_type built_successors_type;
-
-    built_successors_type &built_successors() __TBB_override { return my_successors.built_successors(); }
-
-    void internal_add_built_successor(successor_type &r) __TBB_override {
-        my_successors.internal_add_built_successor(r);
-    }
-
-    void internal_delete_built_successor(successor_type &r) __TBB_override {
-        my_successors.internal_delete_built_successor(r);
-    }
-
-    size_t successor_count() __TBB_override {
-        return my_successors.successor_count();
-    }
-
-    void copy_successors(successor_list_type &v) __TBB_override {
-        my_successors.copy_successors(v);
-    }
-
-    typedef typename receiver<T>::built_predecessors_type built_predecessors_type;
-
-    built_predecessors_type &built_predecessors() __TBB_override { return my_built_predecessors; } 
-
-    void internal_add_built_predecessor( predecessor_type &p) __TBB_override {
-        spin_mutex::scoped_lock l(pred_mutex);
-        my_built_predecessors.add_edge(p);
-    }
-
-    void internal_delete_built_predecessor( predecessor_type &p) __TBB_override {
-        spin_mutex::scoped_lock l(pred_mutex);
-        my_built_predecessors.delete_edge(p);
-    }
-
-    size_t predecessor_count() __TBB_override {
-        spin_mutex::scoped_lock l(pred_mutex);
-        return my_built_predecessors.edge_count();
-    }
-
-    void copy_predecessors(predecessor_list_type &v) __TBB_override {
-        spin_mutex::scoped_lock l(pred_mutex);
-        my_built_predecessors.copy_edges(v);
-    }
-
-    void extract() __TBB_override {
-        my_built_predecessors.receiver_extract(*this);
-        my_successors.built_successors().sender_extract(*this);
-    }
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-protected:
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class internal::broadcast_cache;
-    template<typename X, typename Y> friend class internal::round_robin_cache;
-    //! build a task to run the successor if possible.  Default is old behavior.
-    task *try_put_task(const T& t) __TBB_override {
-        task *new_task = my_successors.try_put_task(t);
-        if (!new_task) new_task = SUCCESSFULLY_ENQUEUED;
-        return new_task;
-    }
-
-    void reset_receiver(reset_flags /*f*/) __TBB_override {}
-
-    void reset_node(reset_flags f) __TBB_override {
-        if (f&rf_clear_edges) {
-           my_successors.clear();
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-           my_built_predecessors.clear();
-#endif
-        }
-        __TBB_ASSERT(!(f & rf_clear_edges) || my_successors.empty(), "Error resetting broadcast_node");
-    }
-};  // broadcast_node
-
-//! Forwards messages in arbitrary order
-template <typename T, typename A=cache_aligned_allocator<T> >
-class buffer_node : public graph_node, public internal::reservable_item_buffer<T, A>, public receiver<T>, public sender<T> {
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-    typedef buffer_node<T, A> class_type;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    typedef typename receiver<input_type>::predecessor_list_type predecessor_list_type;
-    typedef typename sender<output_type>::successor_list_type successor_list_type;
-#endif
-protected:
-    typedef size_t size_type;
-    internal::round_robin_cache< T, null_rw_mutex > my_successors;
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    internal::edge_container<predecessor_type> my_built_predecessors;
-#endif
-
-    friend class internal::forward_task_bypass< buffer_node< T, A > >;
-
-    enum op_type {reg_succ, rem_succ, req_item, res_item, rel_res, con_res, put_item, try_fwd_task
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        , add_blt_succ, del_blt_succ,
-        add_blt_pred, del_blt_pred,
-        blt_succ_cnt, blt_pred_cnt,
-        blt_succ_cpy, blt_pred_cpy   // create vector copies of preds and succs
-#endif
-    };
-
-    // implements the aggregator_operation concept
-    class buffer_operation : public internal::aggregated_operation< buffer_operation > {
-    public:
-        char type;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        task * ltask;
-        union {
-            input_type *elem;
-            successor_type *r;
-            predecessor_type *p;
-            size_t cnt_val;
-            successor_list_type *svec;
-            predecessor_list_type *pvec;
-        };
-#else
-        T *elem;
-        task * ltask;
-        successor_type *r;
-#endif
-        buffer_operation(const T& e, op_type t) : type(char(t))
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-                                                  , ltask(NULL), elem(const_cast<T*>(&e))
-#else
-                                                  , elem(const_cast<T*>(&e)) , ltask(NULL)
-#endif
-        {}
-        buffer_operation(op_type t) : type(char(t)),  ltask(NULL) {}
-    };
-
-    bool forwarder_busy;
-    typedef internal::aggregating_functor<class_type, buffer_operation> handler_type;
-    friend class internal::aggregating_functor<class_type, buffer_operation>;
-    internal::aggregator< handler_type, buffer_operation> my_aggregator;
-
-    virtual void handle_operations(buffer_operation *op_list) {
-        handle_operations_impl(op_list, this);
-    }
-
-    template<typename derived_type>
-    void handle_operations_impl(buffer_operation *op_list, derived_type* derived) {
-        __TBB_ASSERT(static_cast<class_type*>(derived) == this, "'this' is not a base class for derived");
-
-        buffer_operation *tmp = NULL;
-        bool try_forwarding = false;
-        while (op_list) {
-            tmp = op_list;
-            op_list = op_list->next;
-            switch (tmp->type) {
-            case reg_succ: internal_reg_succ(tmp); try_forwarding = true; break;
-            case rem_succ: internal_rem_succ(tmp); break;
-            case req_item: internal_pop(tmp); break;
-            case res_item: internal_reserve(tmp); break;
-            case rel_res:  internal_release(tmp); try_forwarding = true; break;
-            case con_res:  internal_consume(tmp); try_forwarding = true; break;
-            case put_item: try_forwarding = internal_push(tmp); break;
-            case try_fwd_task: internal_forward_task(tmp); break;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-            // edge recording
-            case add_blt_succ: internal_add_built_succ(tmp); break;
-            case del_blt_succ: internal_del_built_succ(tmp); break;
-            case add_blt_pred: internal_add_built_pred(tmp); break;
-            case del_blt_pred: internal_del_built_pred(tmp); break;
-            case blt_succ_cnt: internal_succ_cnt(tmp); break;
-            case blt_pred_cnt: internal_pred_cnt(tmp); break;
-            case blt_succ_cpy: internal_copy_succs(tmp); break;
-            case blt_pred_cpy: internal_copy_preds(tmp); break;
-#endif
-            }
-        }
-
-        derived->order();
-
-        if (try_forwarding && !forwarder_busy) {
-            if(this->my_graph.is_active()) {
-                forwarder_busy = true;
-                task *new_task = new(task::allocate_additional_child_of(*(this->my_graph.root_task()))) internal::
-                        forward_task_bypass
-                        < buffer_node<input_type, A> >(*this);
-                // tmp should point to the last item handled by the aggregator.  This is the operation
-                // the handling thread enqueued.  So modifying that record will be okay.
-                tbb::task *z = tmp->ltask;
-                tmp->ltask = combine_tasks(z, new_task);  // in case the op generated a task
-            }
-        }
-    }  // handle_operations
-
-    inline task *grab_forwarding_task( buffer_operation &op_data) {
-        return op_data.ltask;
-    }
-
-    inline bool enqueue_forwarding_task(buffer_operation &op_data) {
-        task *ft = grab_forwarding_task(op_data);
-        if(ft) {
-            FLOW_SPAWN(*ft);
-            return true;
-        }
-        return false;
-    }
-
-    //! This is executed by an enqueued task, the "forwarder"
-    virtual task *forward_task() {
-        buffer_operation op_data(try_fwd_task);
-        task *last_task = NULL;
-        do {
-            op_data.status = internal::WAIT;
-            op_data.ltask = NULL;
-            my_aggregator.execute(&op_data);
-            tbb::task *xtask = op_data.ltask;
-            last_task = combine_tasks(last_task, xtask);
-        } while (op_data.status ==internal::SUCCEEDED);
-        return last_task;
-    }
-
-    //! Register successor
-    virtual void internal_reg_succ(buffer_operation *op) {
-        my_successors.register_successor(*(op->r));
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-    }
-
-    //! Remove successor
-    virtual void internal_rem_succ(buffer_operation *op) {
-        my_successors.remove_successor(*(op->r));
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    typedef typename sender<T>::built_successors_type built_successors_type;
-
-    built_successors_type &built_successors() __TBB_override { return my_successors.built_successors(); }
-
-    virtual void internal_add_built_succ(buffer_operation *op) {
-        my_successors.internal_add_built_successor(*(op->r));
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-    }
-
-    virtual void internal_del_built_succ(buffer_operation *op) {
-        my_successors.internal_delete_built_successor(*(op->r));
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-    }
-
-    typedef typename receiver<T>::built_predecessors_type built_predecessors_type;
-
-    built_predecessors_type &built_predecessors() __TBB_override { return my_built_predecessors; } 
-
-    virtual void internal_add_built_pred(buffer_operation *op) {
-        my_built_predecessors.add_edge(*(op->p));
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-    }
-
-    virtual void internal_del_built_pred(buffer_operation *op) {
-        my_built_predecessors.delete_edge(*(op->p));
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-    }
-
-    virtual void internal_succ_cnt(buffer_operation *op) {
-        op->cnt_val = my_successors.successor_count();
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-    }
-
-    virtual void internal_pred_cnt(buffer_operation *op) {
-        op->cnt_val = my_built_predecessors.edge_count();
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-    }
-
-    virtual void internal_copy_succs(buffer_operation *op) {
-        my_successors.copy_successors(*(op->svec));
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-    }
-
-    virtual void internal_copy_preds(buffer_operation *op) {
-        my_built_predecessors.copy_edges(*(op->pvec));
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-    }
-
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-private:
-    void order() {}
-
-    bool is_item_valid() {
-        return this->my_item_valid(this->my_tail - 1);
-    }
-
-    void try_put_and_add_task(task*& last_task) {
-        task *new_task = my_successors.try_put_task(this->back());
-        if (new_task) {
-            last_task = combine_tasks(last_task, new_task);
-            this->destroy_back();
-        }
-    }
-
-protected:
-    //! Tries to forward valid items to successors
-    virtual void internal_forward_task(buffer_operation *op) {
-        internal_forward_task_impl(op, this);
-    }
-
-    template<typename derived_type>
-    void internal_forward_task_impl(buffer_operation *op, derived_type* derived) {
-        __TBB_ASSERT(static_cast<class_type*>(derived) == this, "'this' is not a base class for derived");
-
-        if (this->my_reserved || !derived->is_item_valid()) {
-            __TBB_store_with_release(op->status, internal::FAILED);
-            this->forwarder_busy = false;
-            return;
-        }
-        // Try forwarding, giving each successor a chance
-        task * last_task = NULL;
-        size_type counter = my_successors.size();
-        for (; counter > 0 && derived->is_item_valid(); --counter)
-            derived->try_put_and_add_task(last_task);
-
-        op->ltask = last_task;  // return task
-        if (last_task && !counter) {
-            __TBB_store_with_release(op->status, internal::SUCCEEDED);
-        }
-        else {
-            __TBB_store_with_release(op->status, internal::FAILED);
-            forwarder_busy = false;
-        }
-    }
-
-    virtual bool internal_push(buffer_operation *op) {
-        this->push_back(*(op->elem));
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-        return true;
-    }
-
-    virtual void internal_pop(buffer_operation *op) {
-        if(this->pop_back(*(op->elem))) {
-            __TBB_store_with_release(op->status, internal::SUCCEEDED);
-        }
-        else {
-            __TBB_store_with_release(op->status, internal::FAILED);
-        }
-    }
-
-    virtual void internal_reserve(buffer_operation *op) {
-        if(this->reserve_front(*(op->elem))) {
-            __TBB_store_with_release(op->status, internal::SUCCEEDED);
-        }
-        else {
-            __TBB_store_with_release(op->status, internal::FAILED);
-        }
-    }
-
-    virtual void internal_consume(buffer_operation *op) {
-        this->consume_front();
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-    }
-
-    virtual void internal_release(buffer_operation *op) {
-        this->release_front();
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-    }
-
-public:
-    //! Constructor
-    explicit buffer_node( graph &g ) : graph_node(g), internal::reservable_item_buffer<T>(),
-        forwarder_busy(false) {
-        my_successors.set_owner(this);
-        my_aggregator.initialize_handler(handler_type(this));
-        tbb::internal::fgt_node( tbb::internal::FLOW_BUFFER_NODE, &this->my_graph,
-                                 static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) );
-    }
-
-    //! Copy constructor
-    buffer_node( const buffer_node& src ) : graph_node(src.my_graph),
-        internal::reservable_item_buffer<T>(), receiver<T>(), sender<T>() {
-        forwarder_busy = false;
-        my_successors.set_owner(this);
-        my_aggregator.initialize_handler(handler_type(this));
-        tbb::internal::fgt_node( tbb::internal::FLOW_BUFFER_NODE, &this->my_graph,
-                                 static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-
-    //
-    // message sender implementation
-    //
-
-    //! Adds a new successor.
-    /** Adds successor r to the list of successors; may forward tasks.  */
-    bool register_successor( successor_type &r ) __TBB_override {
-        buffer_operation op_data(reg_succ);
-        op_data.r = &r;
-        my_aggregator.execute(&op_data);
-        (void)enqueue_forwarding_task(op_data);
-        return true;
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    void internal_add_built_successor( successor_type &r) __TBB_override {
-        buffer_operation op_data(add_blt_succ);
-        op_data.r = &r;
-        my_aggregator.execute(&op_data);
-    }
-
-    void internal_delete_built_successor( successor_type &r) __TBB_override {
-        buffer_operation op_data(del_blt_succ);
-        op_data.r = &r;
-        my_aggregator.execute(&op_data);
-    }
-
-    void internal_add_built_predecessor( predecessor_type &p) __TBB_override {
-        buffer_operation op_data(add_blt_pred);
-        op_data.p = &p;
-        my_aggregator.execute(&op_data);
-    }
-
-    void internal_delete_built_predecessor( predecessor_type &p) __TBB_override {
-        buffer_operation op_data(del_blt_pred);
-        op_data.p = &p;
-        my_aggregator.execute(&op_data);
-    }
-
-    size_t predecessor_count() __TBB_override {
-        buffer_operation op_data(blt_pred_cnt);
-        my_aggregator.execute(&op_data);
-        return op_data.cnt_val;
-    }
-
-    size_t successor_count() __TBB_override {
-        buffer_operation op_data(blt_succ_cnt);
-        my_aggregator.execute(&op_data);
-        return op_data.cnt_val;
-    }
-
-    void copy_predecessors( predecessor_list_type &v ) __TBB_override {
-        buffer_operation op_data(blt_pred_cpy);
-        op_data.pvec = &v;
-        my_aggregator.execute(&op_data);
-    }
-
-    void copy_successors( successor_list_type &v ) __TBB_override {
-        buffer_operation op_data(blt_succ_cpy);
-        op_data.svec = &v;
-        my_aggregator.execute(&op_data);
-    }
-
-#endif
-
-    //! Removes a successor.
-    /** Removes successor r from the list of successors.
-        It also calls r.remove_predecessor(*this) to remove this node as a predecessor. */
-    bool remove_successor( successor_type &r ) __TBB_override {
-        r.remove_predecessor(*this);
-        buffer_operation op_data(rem_succ);
-        op_data.r = &r;
-        my_aggregator.execute(&op_data);
-        // even though this operation does not cause a forward, if we are the handler, and
-        // a forward is scheduled, we may be the first to reach this point after the aggregator,
-        // and so should check for the task.
-        (void)enqueue_forwarding_task(op_data);
-        return true;
-    }
-
-    //! Request an item from the buffer_node
-    /**  true = v contains the returned item<BR>
-         false = no item has been returned */
-    bool try_get( T &v ) __TBB_override {
-        buffer_operation op_data(req_item);
-        op_data.elem = &v;
-        my_aggregator.execute(&op_data);
-        (void)enqueue_forwarding_task(op_data);
-        return (op_data.status==internal::SUCCEEDED);
-    }
-
-    //! Reserves an item.
-    /**  false = no item can be reserved<BR>
-         true = an item is reserved */
-    bool try_reserve( T &v ) __TBB_override {
-        buffer_operation op_data(res_item);
-        op_data.elem = &v;
-        my_aggregator.execute(&op_data);
-        (void)enqueue_forwarding_task(op_data);
-        return (op_data.status==internal::SUCCEEDED);
-    }
-
-    //! Release a reserved item.
-    /**  true = item has been released and so remains in sender */
-    bool try_release() __TBB_override {
-        buffer_operation op_data(rel_res);
-        my_aggregator.execute(&op_data);
-        (void)enqueue_forwarding_task(op_data);
-        return true;
-    }
-
-    //! Consumes a reserved item.
-    /** true = item is removed from sender and reservation removed */
-    bool try_consume() __TBB_override {
-        buffer_operation op_data(con_res);
-        my_aggregator.execute(&op_data);
-        (void)enqueue_forwarding_task(op_data);
-        return true;
-    }
-
-protected:
-
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class internal::broadcast_cache;
-    template<typename X, typename Y> friend class internal::round_robin_cache;
-    //! receive an item, return a task *if possible
-    task *try_put_task(const T &t) __TBB_override {
-        buffer_operation op_data(t, put_item);
-        my_aggregator.execute(&op_data);
-        task *ft = grab_forwarding_task(op_data);
-        // sequencer_nodes can return failure (if an item has been previously inserted)
-        // We have to spawn the returned task if our own operation fails.
-
-        if(ft && op_data.status ==internal::FAILED) {
-            // we haven't succeeded queueing the item, but for some reason the
-            // call returned a task (if another request resulted in a successful
-            // forward this could happen.)  Queue the task and reset the pointer.
-            FLOW_SPAWN(*ft); ft = NULL;
-        }
-        else if(!ft && op_data.status ==internal::SUCCEEDED) {
-            ft = SUCCESSFULLY_ENQUEUED;
-        }
-        return ft;
-    }
-
-    void reset_receiver(reset_flags /*f*/) __TBB_override { }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-public:
-    void extract() __TBB_override {
-        my_built_predecessors.receiver_extract(*this);
-        my_successors.built_successors().sender_extract(*this);
-    }
-#endif
-
-protected:
-    void reset_node( reset_flags f) __TBB_override {
-        internal::reservable_item_buffer<T, A>::reset();
-        // TODO: just clear structures
-        if (f&rf_clear_edges) {
-            my_successors.clear();
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-            my_built_predecessors.clear();
-#endif
-        }
-        forwarder_busy = false;
-    }
-};  // buffer_node
-
-//! Forwards messages in FIFO order
-template <typename T, typename A=cache_aligned_allocator<T> >
-class queue_node : public buffer_node<T, A> {
-protected:
-    typedef buffer_node<T, A> base_type;
-    typedef typename base_type::size_type size_type;
-    typedef typename base_type::buffer_operation queue_operation;
-    typedef queue_node class_type;
-
-private:
-    template<typename, typename> friend class buffer_node;
-
-    bool is_item_valid() {
-        return this->my_item_valid(this->my_head);
-    }
-
-    void try_put_and_add_task(task*& last_task) {
-        task *new_task = this->my_successors.try_put_task(this->front());
-        if (new_task) {
-            last_task = combine_tasks(last_task, new_task);
-            this->destroy_front();
-        }
-    }
-
-protected:
-    void internal_forward_task(queue_operation *op) __TBB_override {
-        this->internal_forward_task_impl(op, this);
-    }
-
-    void internal_pop(queue_operation *op) __TBB_override {
-        if ( this->my_reserved || !this->my_item_valid(this->my_head)){
-            __TBB_store_with_release(op->status, internal::FAILED);
-        }
-        else {
-            this->pop_front(*(op->elem));
-            __TBB_store_with_release(op->status, internal::SUCCEEDED);
-        }
-    }
-    void internal_reserve(queue_operation *op) __TBB_override {
-        if (this->my_reserved || !this->my_item_valid(this->my_head)) {
-            __TBB_store_with_release(op->status, internal::FAILED);
-        }
-        else {
-            this->reserve_front(*(op->elem));
-            __TBB_store_with_release(op->status, internal::SUCCEEDED);
-        }
-    }
-    void internal_consume(queue_operation *op) __TBB_override {
-        this->consume_front();
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-    }
-
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-
-    //! Constructor
-    explicit queue_node( graph &g ) : base_type(g) {
-        tbb::internal::fgt_node( tbb::internal::FLOW_QUEUE_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-    //! Copy constructor
-    queue_node( const queue_node& src) : base_type(src) {
-        tbb::internal::fgt_node( tbb::internal::FLOW_QUEUE_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-
-protected:
-    void reset_node( reset_flags f) __TBB_override {
-        base_type::reset_node(f);
-    }
-};  // queue_node
-
-//! Forwards messages in sequence order
-template< typename T, typename A=cache_aligned_allocator<T> >
-class sequencer_node : public queue_node<T, A> {
-    internal::function_body< T, size_t > *my_sequencer;
-    // my_sequencer should be a benign function and must be callable
-    // from a parallel context.  Does this mean it needn't be reset?
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-
-    //! Constructor
-    template< typename Sequencer >
-    sequencer_node( graph &g, const Sequencer& s ) : queue_node<T, A>(g),
-        my_sequencer(new internal::function_body_leaf< T, size_t, Sequencer>(s) ) {
-        tbb::internal::fgt_node( tbb::internal::FLOW_SEQUENCER_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-    //! Copy constructor
-    sequencer_node( const sequencer_node& src ) : queue_node<T, A>(src),
-        my_sequencer( src.my_sequencer->clone() ) {
-        tbb::internal::fgt_node( tbb::internal::FLOW_SEQUENCER_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-    //! Destructor
-    ~sequencer_node() { delete my_sequencer; }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-
-protected:
-    typedef typename buffer_node<T, A>::size_type size_type;
-    typedef typename buffer_node<T, A>::buffer_operation sequencer_operation;
-
-private:
-    bool internal_push(sequencer_operation *op) __TBB_override {
-        size_type tag = (*my_sequencer)(*(op->elem));
-#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES
-        if (tag < this->my_head) {
-            // have already emitted a message with this tag
-            __TBB_store_with_release(op->status, internal::FAILED);
-            return false;
-        }
-#endif
-        // cannot modify this->my_tail now; the buffer would be inconsistent.
-        size_t new_tail = (tag+1 > this->my_tail) ? tag+1 : this->my_tail;
-
-        if (this->size(new_tail) > this->capacity()) {
-            this->grow_my_array(this->size(new_tail));
-        }
-        this->my_tail = new_tail;
-
-        const internal::op_stat res = this->place_item(tag, *(op->elem)) ? internal::SUCCEEDED : internal::FAILED;
-        __TBB_store_with_release(op->status, res);
-        return res ==internal::SUCCEEDED;
-    }
-};  // sequencer_node
-
-//! Forwards messages in priority order
-template< typename T, typename Compare = std::less<T>, typename A=cache_aligned_allocator<T> >
-class priority_queue_node : public buffer_node<T, A> {
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef buffer_node<T,A> base_type;
-    typedef priority_queue_node class_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-
-    //! Constructor
-    explicit priority_queue_node( graph &g ) : buffer_node<T, A>(g), mark(0) {
-        tbb::internal::fgt_node( tbb::internal::FLOW_PRIORITY_QUEUE_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-    //! Copy constructor
-    priority_queue_node( const priority_queue_node &src ) : buffer_node<T, A>(src), mark(0) {
-        tbb::internal::fgt_node( tbb::internal::FLOW_PRIORITY_QUEUE_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-
-protected:
-
-    void reset_node( reset_flags f) __TBB_override {
-        mark = 0;
-        base_type::reset_node(f);
-    }
-
-    typedef typename buffer_node<T, A>::size_type size_type;
-    typedef typename buffer_node<T, A>::item_type item_type;
-    typedef typename buffer_node<T, A>::buffer_operation prio_operation;
-
-    //! Tries to forward valid items to successors
-    void internal_forward_task(prio_operation *op) __TBB_override {
-        this->internal_forward_task_impl(op, this);
-    }
-
-    void handle_operations(prio_operation *op_list) __TBB_override {
-        this->handle_operations_impl(op_list, this);
-    }
-
-    bool internal_push(prio_operation *op) __TBB_override {
-        prio_push(*(op->elem));
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-        return true;
-    }
-
-    void internal_pop(prio_operation *op) __TBB_override {
-        // if empty or already reserved, don't pop
-        if ( this->my_reserved == true || this->my_tail == 0 ) {
-            __TBB_store_with_release(op->status, internal::FAILED);
-            return;
-        }
-
-        *(op->elem) = prio();
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-        prio_pop();
-
-    }
-
-    // pops the highest-priority item, saves copy
-    void internal_reserve(prio_operation *op) __TBB_override {
-        if (this->my_reserved == true || this->my_tail == 0) {
-            __TBB_store_with_release(op->status, internal::FAILED);
-            return;
-        }
-        this->my_reserved = true;
-        *(op->elem) = prio();
-        reserved_item = *(op->elem);
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-        prio_pop();
-    }
-
-    void internal_consume(prio_operation *op) __TBB_override {
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-        this->my_reserved = false;
-        reserved_item = input_type();
-    }
-
-    void internal_release(prio_operation *op) __TBB_override {
-        __TBB_store_with_release(op->status, internal::SUCCEEDED);
-        prio_push(reserved_item);
-        this->my_reserved = false;
-        reserved_item = input_type();
-    }
-
-private:
-    template<typename, typename> friend class buffer_node;
-
-    void order() {
-        if (mark < this->my_tail) heapify();
-        __TBB_ASSERT(mark == this->my_tail, "mark unequal after heapify");
-    }
-
-    bool is_item_valid() {
-        return this->my_tail > 0;
-    }
-
-    void try_put_and_add_task(task*& last_task) {
-        task * new_task = this->my_successors.try_put_task(this->prio());
-        if (new_task) {
-            last_task = combine_tasks(last_task, new_task);
-            prio_pop();
-        }
-    }
-
-private:
-    Compare compare;
-    size_type mark;
-
-    input_type reserved_item;
-
-    // in case a reheap has not been done after a push, check if the mark item is higher than the 0'th item
-    bool prio_use_tail() {
-        __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds before test");
-        return mark < this->my_tail && compare(this->get_my_item(0), this->get_my_item(this->my_tail - 1));
-    }
-
-    // prio_push: checks that the item will fit, expand array if necessary, put at end
-    void prio_push(const T &src) {
-        if ( this->my_tail >= this->my_array_size )
-            this->grow_my_array( this->my_tail + 1 );
-        (void) this->place_item(this->my_tail, src);
-        ++(this->my_tail);
-        __TBB_ASSERT(mark < this->my_tail, "mark outside bounds after push");
-    }
-
-    // prio_pop: deletes highest priority item from the array, and if it is item
-    // 0, move last item to 0 and reheap.  If end of array, just destroy and decrement tail
-    // and mark.  Assumes the array has already been tested for emptiness; no failure.
-    void prio_pop()  {
-        if (prio_use_tail()) {
-            // there are newly pushed elements; last one higher than top
-            // copy the data
-            this->destroy_item(this->my_tail-1);
-            --(this->my_tail);
-            __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds after pop");
-            return;
-        }
-        this->destroy_item(0);
-        if(this->my_tail > 1) {
-            // push the last element down heap
-            __TBB_ASSERT(this->my_item_valid(this->my_tail - 1), NULL);
-            this->move_item(0,this->my_tail - 1);
-        }
-        --(this->my_tail);
-        if(mark > this->my_tail) --mark;
-        if (this->my_tail > 1) // don't reheap for heap of size 1
-            reheap();
-        __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds after pop");
-    }
-
-    const T& prio() {
-        return this->get_my_item(prio_use_tail() ? this->my_tail-1 : 0);
-    }
-
-    // turn array into heap
-    void heapify() {
-        if(this->my_tail == 0) {
-            mark = 0;
-            return;
-        }
-        if (!mark) mark = 1;
-        for (; mark<this->my_tail; ++mark) { // for each unheaped element
-            size_type cur_pos = mark;
-            input_type to_place;
-            this->fetch_item(mark,to_place);
-            do { // push to_place up the heap
-                size_type parent = (cur_pos-1)>>1;
-                if (!compare(this->get_my_item(parent), to_place))
-                    break;
-                this->move_item(cur_pos, parent);
-                cur_pos = parent;
-            } while( cur_pos );
-            (void) this->place_item(cur_pos, to_place);
-        }
-    }
-
-    // otherwise heapified array with new root element; rearrange to heap
-    void reheap() {
-        size_type cur_pos=0, child=1;
-        while (child < mark) {
-            size_type target = child;
-            if (child+1<mark &&
-                compare(this->get_my_item(child),
-                        this->get_my_item(child+1)))
-                ++target;
-            // target now has the higher priority child
-            if (compare(this->get_my_item(target),
-                        this->get_my_item(cur_pos)))
-                break;
-            // swap
-            this->swap_items(cur_pos, target);
-            cur_pos = target;
-            child = (cur_pos<<1)+1;
-        }
-    }
-};  // priority_queue_node
-
-//! Forwards messages only if the threshold has not been reached
-/** This node forwards items until its threshold is reached.
-    It contains no buffering.  If the downstream node rejects, the
-    message is dropped. */
-template< typename T >
-class limiter_node : public graph_node, public receiver< T >, public sender< T > {
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    typedef typename receiver<input_type>::built_predecessors_type built_predecessors_type;
-    typedef typename sender<output_type>::built_successors_type built_successors_type;
-    typedef typename receiver<input_type>::predecessor_list_type predecessor_list_type;
-    typedef typename sender<output_type>::successor_list_type successor_list_type;
-#endif
-    //TODO: There is a lack of predefined types for its controlling "decrementer" port. It should be fixed later.
-
-private:
-    size_t my_threshold;
-    size_t my_count; //number of successful puts
-    size_t my_tries; //number of active put attempts
-    internal::reservable_predecessor_cache< T, spin_mutex > my_predecessors;
-    spin_mutex my_mutex;
-    internal::broadcast_cache< T > my_successors;
-    int init_decrement_predecessors;
-
-    friend class internal::forward_task_bypass< limiter_node<T> >;
-
-    // Let decrementer call decrement_counter()
-    friend class internal::decrementer< limiter_node<T> >;
-
-    bool check_conditions() {  // always called under lock
-        return ( my_count + my_tries < my_threshold && !my_predecessors.empty() && !my_successors.empty() );
-    }
-
-    // only returns a valid task pointer or NULL, never SUCCESSFULLY_ENQUEUED
-    task *forward_task() {
-        input_type v;
-        task *rval = NULL;
-        bool reserved = false;
-            {
-                spin_mutex::scoped_lock lock(my_mutex);
-                if ( check_conditions() )
-                    ++my_tries;
-                else
-                    return NULL;
-            }
-
-        //SUCCESS
-        // if we can reserve and can put, we consume the reservation
-        // we increment the count and decrement the tries
-        if ( (my_predecessors.try_reserve(v)) == true ){
-            reserved=true;
-            if ( (rval = my_successors.try_put_task(v)) != NULL ){
-                {
-                    spin_mutex::scoped_lock lock(my_mutex);
-                    ++my_count;
-                    --my_tries;
-                    my_predecessors.try_consume();
-                    if ( check_conditions() ) {
-                        if ( this->my_graph.is_active() ) {
-                            task *rtask = new ( task::allocate_additional_child_of( *(this->my_graph.root_task()) ) )
-                                internal::forward_task_bypass< limiter_node<T> >( *this );
-                            FLOW_SPAWN (*rtask);
-                        }
-                    }
-                }
-                return rval;
-            }
-        }
-        //FAILURE
-        //if we can't reserve, we decrement the tries
-        //if we can reserve but can't put, we decrement the tries and release the reservation
-        {
-            spin_mutex::scoped_lock lock(my_mutex);
-            --my_tries;
-            if (reserved) my_predecessors.try_release();
-            if ( check_conditions() ) {
-                if ( this->my_graph.is_active() ) {
-                    task *rtask = new ( task::allocate_additional_child_of( *(this->my_graph.root_task()) ) )
-                        internal::forward_task_bypass< limiter_node<T> >( *this );
-                    __TBB_ASSERT(!rval, "Have two tasks to handle");
-                    return rtask;
-                }
-            }
-            return rval;
-        }
-    }
-
-    void forward() {
-        __TBB_ASSERT(false, "Should never be called");
-        return;
-    }
-
-    task * decrement_counter() {
-        {
-            spin_mutex::scoped_lock lock(my_mutex);
-            if(my_count) --my_count;
-        }
-        return forward_task();
-    }
-
-public:
-    //! The internal receiver< continue_msg > that decrements the count
-    internal::decrementer< limiter_node<T> > decrement;
-
-    //! Constructor
-    limiter_node(graph &g, size_t threshold, int num_decrement_predecessors=0) :
-        graph_node(g), my_threshold(threshold), my_count(0), my_tries(0),
-        init_decrement_predecessors(num_decrement_predecessors),
-        decrement(num_decrement_predecessors)
-    {
-        my_predecessors.set_owner(this);
-        my_successors.set_owner(this);
-        decrement.set_owner(this);
-        tbb::internal::fgt_node( tbb::internal::FLOW_LIMITER_NODE, &this->my_graph,
-                                 static_cast<receiver<input_type> *>(this), static_cast<receiver<continue_msg> *>(&decrement),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-    //! Copy constructor
-    limiter_node( const limiter_node& src ) :
-        graph_node(src.my_graph), receiver<T>(), sender<T>(),
-        my_threshold(src.my_threshold), my_count(0), my_tries(0),
-        init_decrement_predecessors(src.init_decrement_predecessors),
-        decrement(src.init_decrement_predecessors)
-    {
-        my_predecessors.set_owner(this);
-        my_successors.set_owner(this);
-        decrement.set_owner(this);
-        tbb::internal::fgt_node( tbb::internal::FLOW_LIMITER_NODE, &this->my_graph,
-                                 static_cast<receiver<input_type> *>(this), static_cast<receiver<continue_msg> *>(&decrement),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-
-    //! Replace the current successor with this new successor
-    bool register_successor( successor_type &r ) __TBB_override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        bool was_empty = my_successors.empty();
-        my_successors.register_successor(r);
-        //spawn a forward task if this is the only successor
-        if ( was_empty && !my_predecessors.empty() && my_count + my_tries < my_threshold ) {
-            if ( this->my_graph.is_active() ) {
-                FLOW_SPAWN( (* new ( task::allocate_additional_child_of( *(this->my_graph.root_task()) ) )
-                            internal::forward_task_bypass < limiter_node<T> >( *this ) ) );
-            }
-        }
-        return true;
-    }
-
-    //! Removes a successor from this node
-    /** r.remove_predecessor(*this) is also called. */
-    bool remove_successor( successor_type &r ) __TBB_override {
-        r.remove_predecessor(*this);
-        my_successors.remove_successor(r);
-        return true;
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    built_successors_type &built_successors() __TBB_override { return my_successors.built_successors(); }
-    built_predecessors_type &built_predecessors() __TBB_override { return my_predecessors.built_predecessors(); }
-
-    void internal_add_built_successor(successor_type &src) __TBB_override {
-        my_successors.internal_add_built_successor(src);
-    }
-
-    void internal_delete_built_successor(successor_type &src) __TBB_override {
-        my_successors.internal_delete_built_successor(src);
-    }
-
-    size_t successor_count() __TBB_override { return my_successors.successor_count(); }
-
-    void copy_successors(successor_list_type &v) __TBB_override {
-        my_successors.copy_successors(v);
-    }
-
-    void internal_add_built_predecessor(predecessor_type &src) __TBB_override {
-        my_predecessors.internal_add_built_predecessor(src);
-    }
-
-    void internal_delete_built_predecessor(predecessor_type &src) __TBB_override {
-        my_predecessors.internal_delete_built_predecessor(src);
-    }
-
-    size_t predecessor_count() __TBB_override { return my_predecessors.predecessor_count(); }
-
-    void copy_predecessors(predecessor_list_type &v) __TBB_override {
-        my_predecessors.copy_predecessors(v);
-    }
-
-    void extract() __TBB_override {
-        my_count = 0;
-        my_successors.built_successors().sender_extract(*this);
-        my_predecessors.built_predecessors().receiver_extract(*this);
-        decrement.built_predecessors().receiver_extract(decrement);
-    }
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-    //! Adds src to the list of cached predecessors.
-    bool register_predecessor( predecessor_type &src ) __TBB_override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        my_predecessors.add( src );
-        if ( my_count + my_tries < my_threshold && !my_successors.empty() && this->my_graph.is_active() ) {
-            FLOW_SPAWN( (* new ( task::allocate_additional_child_of( *(this->my_graph.root_task()) ) )
-                        internal::forward_task_bypass < limiter_node<T> >( *this ) ) );
-        }
-        return true;
-    }
-
-    //! Removes src from the list of cached predecessors.
-    bool remove_predecessor( predecessor_type &src ) __TBB_override {
-        my_predecessors.remove( src );
-        return true;
-    }
-
-protected:
-
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class internal::broadcast_cache;
-    template<typename X, typename Y> friend class internal::round_robin_cache;
-    //! Puts an item to this receiver
-    task *try_put_task( const T &t ) __TBB_override {
-        {
-            spin_mutex::scoped_lock lock(my_mutex);
-            if ( my_count + my_tries >= my_threshold )
-                return NULL;
-            else
-                ++my_tries;
-        }
-
-        task * rtask = my_successors.try_put_task(t);
-
-        if ( !rtask ) {  // try_put_task failed.
-            spin_mutex::scoped_lock lock(my_mutex);
-            --my_tries;
-            if ( check_conditions() && this->my_graph.is_active() ) {
-                rtask = new ( task::allocate_additional_child_of( *(this->my_graph.root_task()) ) )
-                    internal::forward_task_bypass< limiter_node<T> >( *this );
-            }
-        }
-        else {
-            spin_mutex::scoped_lock lock(my_mutex);
-            ++my_count;
-            --my_tries;
-             }
-        return rtask;
-    }
-
-    void reset_receiver(reset_flags /*f*/) __TBB_override {
-        __TBB_ASSERT(false,NULL);  // should never be called
-    }
-
-    void reset_node( reset_flags f) __TBB_override {
-        my_count = 0;
-        if(f & rf_clear_edges) {
-            my_predecessors.clear();
-            my_successors.clear();
-        }
-        else
-        {
-            my_predecessors.reset( );
-        }
-        decrement.reset_receiver(f);
-    }
-};  // limiter_node
-
-#include "internal/_flow_graph_join_impl.h"
-
-using internal::reserving_port;
-using internal::queueing_port;
-using internal::key_matching_port;
-using internal::input_port;
-using internal::tag_value;
-
-template<typename OutputTuple, typename JP=queueing> class join_node;
-
-template<typename OutputTuple>
-class join_node<OutputTuple,reserving>: public internal::unfolded_join_node<tbb::flow::tuple_size<OutputTuple>::value, reserving_port, OutputTuple, reserving> {
-private:
-    static const int N = tbb::flow::tuple_size<OutputTuple>::value;
-    typedef typename internal::unfolded_join_node<N, reserving_port, OutputTuple, reserving> unfolded_type;
-public:
-    typedef OutputTuple output_type;
-    typedef typename unfolded_type::input_ports_type input_ports_type;
-    explicit join_node(graph &g) : unfolded_type(g) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_JOIN_NODE_RESERVING, &this->my_graph,
-                                            this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    join_node(const join_node &other) : unfolded_type(other) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_JOIN_NODE_RESERVING, &this->my_graph,
-                                            this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-
-};
-
-template<typename OutputTuple>
-class join_node<OutputTuple,queueing>: public internal::unfolded_join_node<tbb::flow::tuple_size<OutputTuple>::value, queueing_port, OutputTuple, queueing> {
-private:
-    static const int N = tbb::flow::tuple_size<OutputTuple>::value;
-    typedef typename internal::unfolded_join_node<N, queueing_port, OutputTuple, queueing> unfolded_type;
-public:
-    typedef OutputTuple output_type;
-    typedef typename unfolded_type::input_ports_type input_ports_type;
-    explicit join_node(graph &g) : unfolded_type(g) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_JOIN_NODE_QUEUEING, &this->my_graph,
-                                            this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    join_node(const join_node &other) : unfolded_type(other) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_JOIN_NODE_QUEUEING, &this->my_graph,
-                                            this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-
-};
-
-// template for key_matching join_node
-// tag_matching join_node is a specialization of key_matching, and is source-compatible.
-template<typename OutputTuple, typename K, typename KHash>
-class join_node<OutputTuple, key_matching<K, KHash> > : public internal::unfolded_join_node<tbb::flow::tuple_size<OutputTuple>::value,
-      key_matching_port, OutputTuple, key_matching<K,KHash> > {
-private:
-    static const int N = tbb::flow::tuple_size<OutputTuple>::value;
-    typedef typename internal::unfolded_join_node<N, key_matching_port, OutputTuple, key_matching<K,KHash> > unfolded_type;
-public:
-    typedef OutputTuple output_type;
-    typedef typename unfolded_type::input_ports_type input_ports_type;
-
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-    join_node(graph &g) : unfolded_type(g) {}
-#endif  /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-
-    template<typename __TBB_B0, typename __TBB_B1>
-    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1) : unfolded_type(g, b0, b1) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2>
-    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2) : unfolded_type(g, b0, b1, b2) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3>
-    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3) : unfolded_type(g, b0, b1, b2, b3) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4>
-    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4) :
-            unfolded_type(g, b0, b1, b2, b3, b4) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-#if __TBB_VARIADIC_MAX >= 6
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4,
-        typename __TBB_B5>
-    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5) :
-            unfolded_type(g, b0, b1, b2, b3, b4, b5) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-#endif
-#if __TBB_VARIADIC_MAX >= 7
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4,
-        typename __TBB_B5, typename __TBB_B6>
-    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6) :
-            unfolded_type(g, b0, b1, b2, b3, b4, b5, b6) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-#endif
-#if __TBB_VARIADIC_MAX >= 8
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4,
-        typename __TBB_B5, typename __TBB_B6, typename __TBB_B7>
-    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6,
-            __TBB_B7 b7) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-#endif
-#if __TBB_VARIADIC_MAX >= 9
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4,
-        typename __TBB_B5, typename __TBB_B6, typename __TBB_B7, typename __TBB_B8>
-    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6,
-            __TBB_B7 b7, __TBB_B8 b8) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-#endif
-#if __TBB_VARIADIC_MAX >= 10
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4,
-        typename __TBB_B5, typename __TBB_B6, typename __TBB_B7, typename __TBB_B8, typename __TBB_B9>
-    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6,
-            __TBB_B7 b7, __TBB_B8 b8, __TBB_B9 b9) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-#endif
-    join_node(const join_node &other) : unfolded_type(other) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-
-};
-
-// indexer node
-#include "internal/_flow_graph_indexer_impl.h"
-
-// TODO: Implement interface with variadic template or tuple
-template<typename T0, typename T1=null_type, typename T2=null_type, typename T3=null_type,
-                      typename T4=null_type, typename T5=null_type, typename T6=null_type,
-                      typename T7=null_type, typename T8=null_type, typename T9=null_type> class indexer_node;
-
-//indexer node specializations
-template<typename T0>
-class indexer_node<T0> : public internal::unfolded_indexer_node<tuple<T0> > {
-private:
-    static const int N = 1;
-public:
-    typedef tuple<T0> InputTuple;
-    typedef typename internal::tagged_msg<size_t, T0> output_type;
-    typedef typename internal::unfolded_indexer_node<InputTuple> unfolded_type;
-    indexer_node(graph& g) : unfolded_type(g) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    // Copy constructor
-    indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-     void set_name( const char *name ) {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-};
-
-template<typename T0, typename T1>
-class indexer_node<T0, T1> : public internal::unfolded_indexer_node<tuple<T0, T1> > {
-private:
-    static const int N = 2;
-public:
-    typedef tuple<T0, T1> InputTuple;
-    typedef typename internal::tagged_msg<size_t, T0, T1> output_type;
-    typedef typename internal::unfolded_indexer_node<InputTuple> unfolded_type;
-    indexer_node(graph& g) : unfolded_type(g) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    // Copy constructor
-    indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-     void set_name( const char *name ) {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-};
-
-template<typename T0, typename T1, typename T2>
-class indexer_node<T0, T1, T2> : public internal::unfolded_indexer_node<tuple<T0, T1, T2> > {
-private:
-    static const int N = 3;
-public:
-    typedef tuple<T0, T1, T2> InputTuple;
-    typedef typename internal::tagged_msg<size_t, T0, T1, T2> output_type;
-    typedef typename internal::unfolded_indexer_node<InputTuple> unfolded_type;
-    indexer_node(graph& g) : unfolded_type(g) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    // Copy constructor
-    indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-        void set_name( const char *name ) {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-};
-
-template<typename T0, typename T1, typename T2, typename T3>
-class indexer_node<T0, T1, T2, T3> : public internal::unfolded_indexer_node<tuple<T0, T1, T2, T3> > {
-private:
-    static const int N = 4;
-public:
-    typedef tuple<T0, T1, T2, T3> InputTuple;
-    typedef typename internal::tagged_msg<size_t, T0, T1, T2, T3> output_type;
-    typedef typename internal::unfolded_indexer_node<InputTuple> unfolded_type;
-    indexer_node(graph& g) : unfolded_type(g) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    // Copy constructor
-    indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-};
-
-template<typename T0, typename T1, typename T2, typename T3, typename T4>
-class indexer_node<T0, T1, T2, T3, T4> : public internal::unfolded_indexer_node<tuple<T0, T1, T2, T3, T4> > {
-private:
-    static const int N = 5;
-public:
-    typedef tuple<T0, T1, T2, T3, T4> InputTuple;
-    typedef typename internal::tagged_msg<size_t, T0, T1, T2, T3, T4> output_type;
-    typedef typename internal::unfolded_indexer_node<InputTuple> unfolded_type;
-    indexer_node(graph& g) : unfolded_type(g) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    // Copy constructor
-    indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-};
-
-#if __TBB_VARIADIC_MAX >= 6
-template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
-class indexer_node<T0, T1, T2, T3, T4, T5> : public internal::unfolded_indexer_node<tuple<T0, T1, T2, T3, T4, T5> > {
-private:
-    static const int N = 6;
-public:
-    typedef tuple<T0, T1, T2, T3, T4, T5> InputTuple;
-    typedef typename internal::tagged_msg<size_t, T0, T1, T2, T3, T4, T5> output_type;
-    typedef typename internal::unfolded_indexer_node<InputTuple> unfolded_type;
-    indexer_node(graph& g) : unfolded_type(g) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    // Copy constructor
-    indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-};
-#endif //variadic max 6
-
-#if __TBB_VARIADIC_MAX >= 7
-template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
-         typename T6>
-class indexer_node<T0, T1, T2, T3, T4, T5, T6> : public internal::unfolded_indexer_node<tuple<T0, T1, T2, T3, T4, T5, T6> > {
-private:
-    static const int N = 7;
-public:
-    typedef tuple<T0, T1, T2, T3, T4, T5, T6> InputTuple;
-    typedef typename internal::tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6> output_type;
-    typedef typename internal::unfolded_indexer_node<InputTuple> unfolded_type;
-    indexer_node(graph& g) : unfolded_type(g) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    // Copy constructor
-    indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-};
-#endif //variadic max 7
-
-#if __TBB_VARIADIC_MAX >= 8
-template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
-         typename T6, typename T7>
-class indexer_node<T0, T1, T2, T3, T4, T5, T6, T7> : public internal::unfolded_indexer_node<tuple<T0, T1, T2, T3, T4, T5, T6, T7> > {
-private:
-    static const int N = 8;
-public:
-    typedef tuple<T0, T1, T2, T3, T4, T5, T6, T7> InputTuple;
-    typedef typename internal::tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7> output_type;
-    typedef typename internal::unfolded_indexer_node<InputTuple> unfolded_type;
-    indexer_node(graph& g) : unfolded_type(g) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    // Copy constructor
-    indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-};
-#endif //variadic max 8
-
-#if __TBB_VARIADIC_MAX >= 9
-template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
-         typename T6, typename T7, typename T8>
-class indexer_node<T0, T1, T2, T3, T4, T5, T6, T7, T8> : public internal::unfolded_indexer_node<tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8> > {
-private:
-    static const int N = 9;
-public:
-    typedef tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8> InputTuple;
-    typedef typename internal::tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7, T8> output_type;
-    typedef typename internal::unfolded_indexer_node<InputTuple> unfolded_type;
-    indexer_node(graph& g) : unfolded_type(g) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    // Copy constructor
-    indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-};
-#endif //variadic max 9
-
-#if __TBB_VARIADIC_MAX >= 10
-template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
-         typename T6, typename T7, typename T8, typename T9>
-class indexer_node/*default*/ : public internal::unfolded_indexer_node<tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> > {
-private:
-    static const int N = 10;
-public:
-    typedef tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> InputTuple;
-    typedef typename internal::tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> output_type;
-    typedef typename internal::unfolded_indexer_node<InputTuple> unfolded_type;
-    indexer_node(graph& g) : unfolded_type(g) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    // Copy constructor
-    indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        tbb::internal::fgt_multiinput_node<N>( tbb::internal::FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-};
-#endif //variadic max 10
-
-#if __TBB_PREVIEW_ASYNC_MSG
-inline void internal_make_edge( internal::untyped_sender &p, internal::untyped_receiver &s ) {
-#else
-template< typename T >
-inline void internal_make_edge( sender<T> &p, receiver<T> &s ) {
-#endif
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    s.internal_add_built_predecessor(p);
-    p.internal_add_built_successor(s);
-#endif
-    p.register_successor( s );
-    tbb::internal::fgt_make_edge( &p, &s );
-}
-
-//! Makes an edge between a single predecessor and a single successor
-template< typename T >
-inline void make_edge( sender<T> &p, receiver<T> &s ) {
-    internal_make_edge( p, s );
-}
-
-#if __TBB_PREVIEW_ASYNC_MSG
-template< typename TS, typename TR,
-    typename = typename tbb::internal::enable_if<tbb::internal::is_same_type<TS, internal::untyped_sender>::value
-                                              || tbb::internal::is_same_type<TR, internal::untyped_receiver>::value>::type>
-inline void make_edge( TS &p, TR &s ) {
-    internal_make_edge( p, s );
-}
-
-template< typename T >
-inline void make_edge( sender<T> &p, receiver<typename T::async_msg_data_type> &s ) {
-    internal_make_edge( p, s );
-}
-
-template< typename T >
-inline void make_edge( sender<typename T::async_msg_data_type> &p, receiver<T> &s ) {
-    internal_make_edge( p, s );
-}
-
-#endif // __TBB_PREVIEW_ASYNC_MSG
-
-#if __TBB_FLOW_GRAPH_CPP11_FEATURES
-//Makes an edge from port 0 of a multi-output predecessor to port 0 of a multi-input successor.
-template< typename T, typename V,
-          typename = typename T::output_ports_type, typename = typename V::input_ports_type >
-inline void make_edge( T& output, V& input) {
-    make_edge(get<0>(output.output_ports()), get<0>(input.input_ports()));
-}
-
-//Makes an edge from port 0 of a multi-output predecessor to a receiver.
-template< typename T, typename R,
-          typename = typename T::output_ports_type >
-inline void make_edge( T& output, receiver<R>& input) {
-     make_edge(get<0>(output.output_ports()), input);
-}
-
-//Makes an edge from a sender to port 0 of a multi-input successor.
-template< typename S,  typename V,
-          typename = typename V::input_ports_type >
-inline void make_edge( sender<S>& output, V& input) {
-     make_edge(output, get<0>(input.input_ports()));
-}
-#endif
-
-#if __TBB_PREVIEW_ASYNC_MSG
-inline void internal_remove_edge( internal::untyped_sender &p, internal::untyped_receiver &s ) {
-#else
-template< typename T >
-inline void internal_remove_edge( sender<T> &p, receiver<T> &s ) {
-#endif
-    p.remove_successor( s );
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    // TODO: should we try to remove p from the predecessor list of s, in case the edge is reversed?
-    p.internal_delete_built_successor(s);
-    s.internal_delete_built_predecessor(p);
-#endif
-    tbb::internal::fgt_remove_edge( &p, &s );
-}
-
-//! Removes an edge between a single predecessor and a single successor
-template< typename T >
-inline void remove_edge( sender<T> &p, receiver<T> &s ) {
-    internal_remove_edge( p, s );
-}
-
-#if __TBB_PREVIEW_ASYNC_MSG
-template< typename TS, typename TR,
-    typename = typename tbb::internal::enable_if<tbb::internal::is_same_type<TS, internal::untyped_sender>::value
-                                              || tbb::internal::is_same_type<TR, internal::untyped_receiver>::value>::type>
-inline void remove_edge( TS &p, TR &s ) {
-    internal_remove_edge( p, s );
-}
-
-template< typename T >
-inline void remove_edge( sender<T> &p, receiver<typename T::async_msg_data_type> &s ) {
-    internal_remove_edge( p, s );
-}
-
-template< typename T >
-inline void remove_edge( sender<typename T::async_msg_data_type> &p, receiver<T> &s ) {
-    internal_remove_edge( p, s );
-}
-#endif // __TBB_PREVIEW_ASYNC_MSG
-
-#if __TBB_FLOW_GRAPH_CPP11_FEATURES
-//Removes an edge between port 0 of a multi-output predecessor and port 0 of a multi-input successor.
-template< typename T, typename V,
-          typename = typename T::output_ports_type, typename = typename V::input_ports_type >
-inline void remove_edge( T& output, V& input) {
-    remove_edge(get<0>(output.output_ports()), get<0>(input.input_ports()));
-}
-
-//Removes an edge between port 0 of a multi-output predecessor and a receiver.
-template< typename T, typename R,
-          typename = typename T::output_ports_type >
-inline void remove_edge( T& output, receiver<R>& input) {
-     remove_edge(get<0>(output.output_ports()), input);
-}
-//Removes an edge between a sender and port 0 of a multi-input successor.
-template< typename S,  typename V,
-          typename = typename V::input_ports_type >
-inline void remove_edge( sender<S>& output, V& input) {
-     remove_edge(output, get<0>(input.input_ports()));
-}
-#endif
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-template<typename C >
-template< typename S >
-void internal::edge_container<C>::sender_extract( S &s ) {
-    edge_list_type e = built_edges;
-    for ( typename edge_list_type::iterator i = e.begin(); i != e.end(); ++i ) {
-        remove_edge(s, **i);
-    }
-}
-
-template<typename C >
-template< typename R >
-void internal::edge_container<C>::receiver_extract( R &r ) {
-    edge_list_type e = built_edges;
-    for ( typename edge_list_type::iterator i = e.begin(); i != e.end(); ++i ) {
-        remove_edge(**i, r);
-    }
-}
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-//! Returns a copy of the body from a function or continue node
-template< typename Body, typename Node >
-Body copy_body( Node &n ) {
-    return n.template copy_function_object<Body>();
-}
-
-#if __TBB_FLOW_GRAPH_CPP11_FEATURES
-
-//composite_node
-template< typename InputTuple, typename OutputTuple > class composite_node;
-
-template< typename... InputTypes, typename... OutputTypes>
-class composite_node <tbb::flow::tuple<InputTypes...>, tbb::flow::tuple<OutputTypes...> > : public graph_node{
-
-public:
-    typedef tbb::flow::tuple< receiver<InputTypes>&... > input_ports_type;
-    typedef tbb::flow::tuple< sender<OutputTypes>&... > output_ports_type;
-
-private:
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    const char *my_type_name;
-#endif
-    input_ports_type *my_input_ports;
-    output_ports_type *my_output_ports;
-
-    static const size_t NUM_INPUTS = sizeof...(InputTypes);
-    static const size_t NUM_OUTPUTS = sizeof...(OutputTypes);
-
-protected:
-    void reset_node(reset_flags) __TBB_override {}
-
-public:
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    composite_node( graph &g, const char *type_name = "composite_node") : graph_node(g), my_type_name(type_name), my_input_ports(NULL), my_output_ports(NULL)  {
-        tbb::internal::itt_make_task_group( tbb::internal::ITT_DOMAIN_FLOW, this, tbb::internal::FLOW_NODE, &g, tbb::internal::FLOW_GRAPH, tbb::internal::FLOW_COMPOSITE_NODE );
-        tbb::internal::fgt_multiinput_multioutput_node_desc( this, my_type_name );
-    }
-#else
-    composite_node( graph &g) : graph_node(g), my_input_ports(NULL), my_output_ports(NULL) {}
-#endif
-
-   template<typename T1, typename T2>
-   void set_external_ports(T1&& input_ports_tuple, T2&& output_ports_tuple) {
-       __TBB_STATIC_ASSERT(NUM_INPUTS == tbb::flow::tuple_size<input_ports_type>::value, "number of arguments does not match number of input ports");
-       __TBB_STATIC_ASSERT(NUM_OUTPUTS == tbb::flow::tuple_size<output_ports_type>::value, "number of arguments does not match number of output ports");
-      my_input_ports =  new input_ports_type(std::forward<T1>(input_ports_tuple));
-      my_output_ports = new output_ports_type(std::forward<T2>(output_ports_tuple));
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-      tbb::internal::fgt_internal_input_helper<T1, NUM_INPUTS>::register_port( this, input_ports_tuple);
-      tbb::internal::fgt_internal_output_helper<T2, NUM_OUTPUTS>::register_port( this, output_ports_tuple);
-#endif
-   }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    template< typename... NodeTypes >
-    void add_visible_nodes(const NodeTypes&... n) { internal::add_nodes_impl(this, true, n...); }
-
-    template< typename... NodeTypes >
-    void add_nodes(const NodeTypes&... n) { internal::add_nodes_impl(this, false, n...); }
-#else
-    template<typename... Nodes> void add_nodes(Nodes&...) { }
-    template<typename... Nodes> void add_visible_nodes(Nodes&...) { }
-#endif
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_multiinput_multioutput_node_desc( this, name );
-    }
-#endif
-
-    input_ports_type input_ports() {
-         __TBB_ASSERT(my_input_ports, "input ports not set, call set_external_ports to set input ports");
-         return *my_input_ports;
-    }
-
-    output_ports_type output_ports() {
-         __TBB_ASSERT(my_output_ports, "output ports not set, call set_external_ports to set output ports");
-         return *my_output_ports;
-    }
-
-    virtual ~composite_node() {
-        if(my_input_ports) delete my_input_ports;
-        if(my_output_ports) delete my_output_ports;
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    void extract() __TBB_override {
-        __TBB_ASSERT(false, "Current composite_node implementation does not support extract");
-    }
-#endif
-};  // class composite_node
-
-//composite_node with only input ports
-template< typename... InputTypes>
-class composite_node <tbb::flow::tuple<InputTypes...>, tbb::flow::tuple<> > : public graph_node {
-public:
-    typedef tbb::flow::tuple< receiver<InputTypes>&... > input_ports_type;
-
-private:
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    const char *my_type_name;
-#endif
-    input_ports_type *my_input_ports;
-    static const size_t NUM_INPUTS = sizeof...(InputTypes);
-
-protected:
-    void reset_node(reset_flags) __TBB_override {}
-
-public:
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    composite_node( graph &g, const char *type_name = "composite_node") : graph_node(g), my_type_name(type_name), my_input_ports(NULL)  {
-        tbb::internal::itt_make_task_group( tbb::internal::ITT_DOMAIN_FLOW, this, tbb::internal::FLOW_NODE, &g, tbb::internal::FLOW_GRAPH, tbb::internal::FLOW_COMPOSITE_NODE );
-        tbb::internal::fgt_multiinput_multioutput_node_desc( this, my_type_name );
-    }
-#else
-    composite_node( graph &g) : graph_node(g), my_input_ports(NULL) {}
-#endif
-
-   template<typename T>
-   void set_external_ports(T&& input_ports_tuple) {
-       __TBB_STATIC_ASSERT(NUM_INPUTS == tbb::flow::tuple_size<input_ports_type>::value, "number of arguments does not match number of input ports");
-
-      my_input_ports =  new input_ports_type(std::forward<T>(input_ports_tuple));
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-      tbb::internal::fgt_internal_input_helper<T, NUM_INPUTS>::register_port( this, std::forward<T>(input_ports_tuple));
-#endif
-   }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    template< typename... NodeTypes >
-    void add_visible_nodes(const NodeTypes&... n) { internal::add_nodes_impl(this, true, n...); }
-
-    template< typename... NodeTypes >
-    void add_nodes( const NodeTypes&... n) { internal::add_nodes_impl(this, false, n...); }
-#else
-    template<typename... Nodes> void add_nodes(Nodes&...) {}
-    template<typename... Nodes> void add_visible_nodes(Nodes&...) {}
-#endif
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_multiinput_multioutput_node_desc( this, name );
-    }
-#endif
-
-    input_ports_type input_ports() {
-         __TBB_ASSERT(my_input_ports, "input ports not set, call set_external_ports to set input ports");
-         return *my_input_ports;
-    }
-
-    virtual ~composite_node() {
-        if(my_input_ports) delete my_input_ports;
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    void extract() __TBB_override {
-        __TBB_ASSERT(false, "Current composite_node implementation does not support extract");
-    }
-#endif
-
-};  // class composite_node
-
-//composite_nodes with only output_ports
-template<typename... OutputTypes>
-class composite_node <tbb::flow::tuple<>, tbb::flow::tuple<OutputTypes...> > : public graph_node {
-public:
-    typedef tbb::flow::tuple< sender<OutputTypes>&... > output_ports_type;
-
-private:
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    const char *my_type_name;
-#endif
-    output_ports_type *my_output_ports;
-    static const size_t NUM_OUTPUTS = sizeof...(OutputTypes);
-
-protected:
-    void reset_node(reset_flags) __TBB_override {}
-
-public:
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    composite_node( graph &g, const char *type_name = "composite_node") : graph_node(g), my_type_name(type_name), my_output_ports(NULL) {
-        tbb::internal::itt_make_task_group( tbb::internal::ITT_DOMAIN_FLOW, this, tbb::internal::FLOW_NODE, &g, tbb::internal::FLOW_GRAPH, tbb::internal::FLOW_COMPOSITE_NODE );
-        tbb::internal::fgt_multiinput_multioutput_node_desc( this, my_type_name );
-    }
-#else
-    composite_node( graph &g) : graph_node(g), my_output_ports(NULL) {}
-#endif
-
-   template<typename T>
-   void set_external_ports(T&& output_ports_tuple) {
-       __TBB_STATIC_ASSERT(NUM_OUTPUTS == tbb::flow::tuple_size<output_ports_type>::value, "number of arguments does not match number of output ports");
-
-      my_output_ports = new output_ports_type(std::forward<T>(output_ports_tuple));
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-      tbb::internal::fgt_internal_output_helper<T, NUM_OUTPUTS>::register_port( this, std::forward<T>(output_ports_tuple));
-#endif
-   }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    template<typename... NodeTypes >
-    void add_visible_nodes(const NodeTypes&... n) { internal::add_nodes_impl(this, true, n...); }
-
-    template<typename... NodeTypes >
-    void add_nodes(const NodeTypes&... n) { internal::add_nodes_impl(this, false, n...); }
-#else
-    template<typename... Nodes> void add_nodes(Nodes&...) {}
-    template<typename... Nodes> void add_visible_nodes(Nodes&...) {}
-#endif
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-        tbb::internal::fgt_multiinput_multioutput_node_desc( this, name );
-    }
-#endif
-
-    output_ports_type output_ports() {
-         __TBB_ASSERT(my_output_ports, "output ports not set, call set_external_ports to set output ports");
-         return *my_output_ports;
-    }
-
-    virtual ~composite_node() {
-        if(my_output_ports) delete my_output_ports;
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    void extract() __TBB_override {
-        __TBB_ASSERT(false, "Current composite_node implementation does not support extract");
-    }
-#endif
-
-};  // class composite_node
-
-#endif // __TBB_FLOW_GRAPH_CPP11_FEATURES
-
-namespace internal {
-
-template<typename Gateway>
-class async_body_base: tbb::internal::no_assign {
-public:
-    typedef Gateway gateway_type;
-
-    async_body_base(gateway_type *gateway): my_gateway(gateway) { }
-    void set_gateway(gateway_type *gateway) {
-        my_gateway = gateway;
-    }
-
-protected:
-    gateway_type *my_gateway;
-};
-
-template<typename Input, typename Ports, typename Gateway, typename Body>
-class async_body: public async_body_base<Gateway> {
-public:
-    typedef async_body_base<Gateway> base_type;
-    typedef Gateway gateway_type;
-
-    async_body(const Body &body, gateway_type *gateway) 
-        : base_type(gateway), my_body(body) { }
-
-    void operator()( const Input &v, Ports & ) {
-        my_body(v, *this->my_gateway);
-    }
-
-    Body get_body() { return my_body; }
-
-private:
-    Body my_body;
-};
-
-}
-
-//! Implements async node
-template < typename Input, typename Output, typename Policy = queueing, typename Allocator=cache_aligned_allocator<Input> >
-class async_node : public multifunction_node< Input, tuple< Output >, Policy, Allocator >, public sender< Output > {
-    typedef multifunction_node< Input, tuple< Output >, Policy, Allocator > base_type;
-    typedef typename internal::multifunction_input<Input, typename base_type::output_ports_type, Allocator> mfn_input_type;
-
-public:
-    typedef Input input_type;
-    typedef Output output_type;
-    typedef receiver<input_type> receiver_type;
-    typedef typename receiver_type::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-    typedef receiver_gateway<output_type> gateway_type;
-    typedef internal::async_body_base<gateway_type> async_body_base_type;
-
-private:
-    struct try_put_functor {
-        typedef internal::multifunction_output<Output> output_port_type;
-        output_port_type *port;
-        const Output *value;
-        bool result;
-        try_put_functor(output_port_type &p, const Output &v) : port(&p), value(&v), result(false) { }
-        void operator()() {
-            result = port->try_put(*value);
-        }
-    };
-
-    class receiver_gateway_impl: public receiver_gateway<Output> {
-    public:
-        receiver_gateway_impl(async_node* node): my_node(node) {}
-        void reserve_wait() __TBB_override {
-            my_node->my_graph.reserve_wait();
-            tbb::internal::fgt_async_reserve(static_cast<typename async_node::receiver_type *>(my_node), &my_node->my_graph);
-        }
-
-        void release_wait() __TBB_override {
-            my_node->my_graph.release_wait();
-            tbb::internal::fgt_async_commit(static_cast<typename async_node::receiver_type *>(my_node), &my_node->my_graph);
-        }
-
-        //! Implements gateway_type::try_put for an external activity to submit a message to FG
-        bool try_put(const Output &i) __TBB_override {
-            return my_node->try_put_impl(i);
-        }
-
-    private:
-        async_node* my_node;
-    } my_gateway;
-
-    //The substitute of 'this' for member construction, to prevent compiler warnings
-    async_node* self() { return this; }
-
-    //! Implements gateway_type::try_put for an external activity to submit a message to FG
-    bool try_put_impl(const Output &i) {
-        internal::multifunction_output<Output> &port_0 = internal::output_port<0>(*this);
-        tbb::internal::fgt_async_try_put_begin(this, &port_0);
-        __TBB_ASSERT(this->my_graph.my_task_arena && this->my_graph.my_task_arena->is_active(), NULL);
-        try_put_functor tpf(port_0, i);
-        this->my_graph.my_task_arena->execute(tpf);
-        tbb::internal::fgt_async_try_put_end(this, &port_0);
-        return tpf.result;
-    }
-
-public:
-    template<typename Body>
-    async_node( graph &g, size_t concurrency, Body body ) :
-        base_type( g, concurrency, internal::async_body<Input, typename base_type::output_ports_type, gateway_type, Body>(body, &my_gateway) ), my_gateway(self()) {
-        tbb::internal::fgt_multioutput_node<1>( tbb::internal::FLOW_ASYNC_NODE,
-                                                &this->my_graph,
-                                                static_cast<receiver<input_type> *>(this),
-                                                this->output_ports() );
-    }
-
-    async_node( const async_node &other ) : base_type(other), sender<Output>(), my_gateway(self()) {
-        static_cast<async_body_base_type*>(this->my_body->get_body_ptr())->set_gateway(&my_gateway);
-        static_cast<async_body_base_type*>(this->my_init_body->get_body_ptr())->set_gateway(&my_gateway);
-
-        tbb::internal::fgt_multioutput_node<1>( tbb::internal::FLOW_ASYNC_NODE, &this->my_graph, static_cast<receiver<input_type> *>(this), this->output_ports() );
-    }
-
-    gateway_type& gateway() {
-        return my_gateway;
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-    void set_name( const char *name ) __TBB_override {
-            tbb::internal::fgt_node_desc( this, name );
-    }
-#endif
-
-    // Define sender< Output >
-
-    //! Add a new successor to this node
-    bool register_successor( successor_type &r ) __TBB_override {
-        return internal::output_port<0>(*this).register_successor(r);
-    }
-
-    //! Removes a successor from this node
-    bool remove_successor( successor_type &r ) __TBB_override {
-        return internal::output_port<0>(*this).remove_successor(r);
-    }
-
-    template<typename Body>
-    Body copy_function_object() {
-        typedef internal::multifunction_body<input_type, typename base_type::output_ports_type> mfn_body_type;
-        typedef internal::async_body<Input, typename base_type::output_ports_type, gateway_type, Body> async_body_type;
-        mfn_body_type &body_ref = *this->my_body;
-        async_body_type ab = *static_cast<async_body_type*>(dynamic_cast< internal::multifunction_body_leaf<input_type, typename base_type::output_ports_type, async_body_type> & >(body_ref).get_body_ptr());
-        return ab.get_body();
-    }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    //! interface to record edges for traversal & deletion
-    typedef typename  internal::edge_container<successor_type> built_successors_type;
-    typedef typename  built_successors_type::edge_list_type successor_list_type;
-    built_successors_type &built_successors() __TBB_override {
-        return internal::output_port<0>(*this).built_successors();
-    }
-
-    void internal_add_built_successor( successor_type &r ) __TBB_override {
-        internal::output_port<0>(*this).internal_add_built_successor(r);
-    }
-
-    void internal_delete_built_successor( successor_type &r ) __TBB_override {
-        internal::output_port<0>(*this).internal_delete_built_successor(r);
-    }
-
-    void copy_successors( successor_list_type &l ) __TBB_override {
-        internal::output_port<0>(*this).copy_successors(l);
-    }
-
-    size_t  successor_count() __TBB_override {
-        return internal::output_port<0>(*this).successor_count();
-    }
-#endif
-
-protected:
-
-    void reset_node( reset_flags f) __TBB_override {
-       base_type::reset_node(f);
-    }
-};
-
-#if __TBB_PREVIEW_STREAMING_NODE
-#include "internal/_flow_graph_streaming_node.h"
-#endif // __TBB_PREVIEW_STREAMING_NODE
-
-} // interface9
-
-    using interface9::reset_flags;
-    using interface9::rf_reset_protocol;
-    using interface9::rf_reset_bodies;
-    using interface9::rf_clear_edges;
-
-    using interface9::graph;
-    using interface9::graph_node;
-    using interface9::continue_msg;
-
-    using interface9::source_node;
-    using interface9::function_node;
-    using interface9::multifunction_node;
-    using interface9::split_node;
-    using interface9::internal::output_port;
-    using interface9::indexer_node;
-    using interface9::internal::tagged_msg;
-    using interface9::internal::cast_to;
-    using interface9::internal::is_a;
-    using interface9::continue_node;
-    using interface9::overwrite_node;
-    using interface9::write_once_node;
-    using interface9::broadcast_node;
-    using interface9::buffer_node;
-    using interface9::queue_node;
-    using interface9::sequencer_node;
-    using interface9::priority_queue_node;
-    using interface9::limiter_node;
-    using namespace interface9::internal::graph_policy_namespace;
-    using interface9::join_node;
-    using interface9::input_port;
-    using interface9::copy_body;
-    using interface9::make_edge;
-    using interface9::remove_edge;
-    using interface9::internal::tag_value;
-#if __TBB_FLOW_GRAPH_CPP11_FEATURES
-     using interface9::composite_node;
-#endif
-    using interface9::async_node;
-#if __TBB_PREVIEW_ASYNC_MSG
-    using interface9::async_msg;
-#endif
-#if __TBB_PREVIEW_STREAMING_NODE
-    using interface9::port_ref;
-    using interface9::streaming_node;
-#endif // __TBB_PREVIEW_STREAMING_NODE
-
-} // flow
-} // tbb
-
-#undef __TBB_PFG_RESET_ARG
-#undef __TBB_COMMA
-
-#endif // __TBB_flow_graph_H
diff --git a/lib/3rdParty/tbb/include/tbb/flow_graph_abstractions.h b/lib/3rdParty/tbb/include/tbb/flow_graph_abstractions.h
deleted file mode 100644
index f6eb3fb3..00000000
--- a/lib/3rdParty/tbb/include/tbb/flow_graph_abstractions.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_flow_graph_abstractions_H
-#define __TBB_flow_graph_abstractions_H
-
-namespace tbb {
-namespace flow {
-namespace interface9 {
-
-//! Pure virtual template classes that define interfaces for async communication
-class graph_proxy {
-public:
-    //! Inform a graph that messages may come from outside, to prevent premature graph completion
-    virtual void reserve_wait() = 0;
-
-    //! Inform a graph that a previous call to reserve_wait is no longer in effect
-    virtual void release_wait() = 0;
-
-    virtual ~graph_proxy() {}
-};
-
-template <typename Input>
-class receiver_gateway : public graph_proxy {
-public:
-    //! Type of inputing data into FG.
-    typedef Input input_type;
-
-    //! Submit signal from an asynchronous activity to FG.
-    virtual bool try_put(const input_type&) = 0;
-};
-
-} //interface9
-
-using interface9::graph_proxy;
-using interface9::receiver_gateway;
-
-} //flow
-} //tbb
-#endif
diff --git a/lib/3rdParty/tbb/include/tbb/flow_graph_opencl_node.h b/lib/3rdParty/tbb/include/tbb/flow_graph_opencl_node.h
deleted file mode 100644
index 0a98a775..00000000
--- a/lib/3rdParty/tbb/include/tbb/flow_graph_opencl_node.h
+++ /dev/null
@@ -1,1537 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_flow_graph_opencl_node_H
-#define __TBB_flow_graph_opencl_node_H
-
-#include "tbb/tbb_config.h"
-#if __TBB_PREVIEW_OPENCL_NODE
-
-#include "flow_graph.h"
-
-#include <vector>
-#include <string>
-#include <algorithm>
-#include <iostream>
-#include <fstream>
-#include <map>
-#include <mutex>
-
-#ifdef __APPLE__
-#include <OpenCL/opencl.h>
-#else
-#include <CL/cl.h>
-#endif
-
-namespace tbb {
-namespace flow {
-
-namespace interface9 {
-
-class opencl_foundation;
-class opencl_device_list;
-
-template <typename Factory>
-class opencl_buffer_impl;
-
-template <typename Factory>
-class opencl_program;
-
-class default_opencl_factory;
-
-class opencl_graph : public graph {
-public:
-    //! Constructs a graph with isolated task_group_context
-    opencl_graph() : my_opencl_foundation( NULL ) {}
-    //! Constructs a graph with an user context
-    explicit opencl_graph( task_group_context& context ) : graph( context ), my_opencl_foundation( NULL ) {}
-    //! Destroys a graph
-    ~opencl_graph();
-    //! Available devices
-    const opencl_device_list& available_devices();
-    default_opencl_factory& opencl_factory();
-protected:
-    opencl_foundation *my_opencl_foundation;
-    opencl_foundation &get_opencl_foundation();
-
-    template <typename T, typename Factory>
-    friend class opencl_buffer;
-    template <cl_channel_order channel_order, cl_channel_type channel_type, typename Factory>
-    friend class opencl_image2d;
-    template<typename... Args>
-    friend class opencl_node;
-    template <typename DeviceFilter>
-    friend class opencl_factory;
-};
-
-template <typename DeviceFilter>
-class opencl_factory;
-
-template <typename T, typename Factory>
-class dependency_msg;
-
-
-inline void enforce_cl_retcode( cl_int err, std::string msg ) {
-    if ( err != CL_SUCCESS ) {
-        std::cerr << msg << "; error code: " << err << std::endl;
-        throw msg;
-    }
-}
-
-template <typename T>
-T event_info( cl_event e, cl_event_info i ) {
-    T res;
-    enforce_cl_retcode( clGetEventInfo( e, i, sizeof( res ), &res, NULL ), "Failed to get OpenCL event information" );
-    return res;
-}
-
-template <typename T>
-T device_info( cl_device_id d, cl_device_info i ) {
-    T res;
-    enforce_cl_retcode( clGetDeviceInfo( d, i, sizeof( res ), &res, NULL ), "Failed to get OpenCL device information" );
-    return res;
-}
-template <>
-std::string device_info<std::string>( cl_device_id d, cl_device_info i ) {
-    size_t required;
-    enforce_cl_retcode( clGetDeviceInfo( d, i, 0, NULL, &required ), "Failed to get OpenCL device information" );
-
-    char *buff = (char*)alloca( required );
-    enforce_cl_retcode( clGetDeviceInfo( d, i, required, buff, NULL ), "Failed to get OpenCL device information" );
-
-    return buff;
-}
-template <typename T>
-T platform_info( cl_platform_id p, cl_platform_info i ) {
-    T res;
-    enforce_cl_retcode( clGetPlatformInfo( p, i, sizeof( res ), &res, NULL ), "Failed to get OpenCL platform information" );
-    return res;
-}
-template <>
-std::string platform_info<std::string>( cl_platform_id p, cl_platform_info  i ) {
-    size_t required;
-    enforce_cl_retcode( clGetPlatformInfo( p, i, 0, NULL, &required ), "Failed to get OpenCL platform information" );
-
-    char *buff = (char*)alloca( required );
-    enforce_cl_retcode( clGetPlatformInfo( p, i, required, buff, NULL ), "Failed to get OpenCL platform information" );
-
-    return buff;
-}
-
-
-class opencl_device {
-public:
-    typedef size_t device_id_type;
-    enum : device_id_type {
-        unknown = device_id_type( -2 ),
-        host = device_id_type( -1 )
-    };
-
-    opencl_device() : my_device_id( unknown ) {}
-
-    opencl_device( cl_device_id cl_d_id, device_id_type device_id ) : my_device_id( device_id ), my_cl_device_id( cl_d_id ) {}
-
-    std::string platform_profile() const {
-        return platform_info<std::string>( platform(), CL_PLATFORM_PROFILE );
-    }
-    std::string platform_version() const {
-        return platform_info<std::string>( platform(), CL_PLATFORM_VERSION );
-    }
-    std::string platform_name() const {
-        return platform_info<std::string>( platform(), CL_PLATFORM_NAME );
-    }
-    std::string platform_vendor() const {
-        return platform_info<std::string>( platform(), CL_PLATFORM_VENDOR );
-    }
-    std::string platform_extensions() const {
-        return platform_info<std::string>( platform(), CL_PLATFORM_EXTENSIONS );
-    }
-
-    template <typename T>
-    void info( cl_device_info i, T &t ) const {
-        t = device_info<T>( my_cl_device_id, i );
-    }
-    std::string version() const {
-        // The version string format: OpenCL<space><major_version.minor_version><space><vendor-specific information>
-        return device_info<std::string>( my_cl_device_id, CL_DEVICE_VERSION );
-    }
-    int major_version() const {
-        int major;
-        std::sscanf( version().c_str(), "OpenCL %d", &major );
-        return major;
-    }
-    int minor_version() const {
-        int major, minor;
-        std::sscanf( version().c_str(), "OpenCL %d.%d", &major, &minor );
-        return minor;
-    }
-    bool out_of_order_exec_mode_on_host_present() const {
-#if CL_VERSION_2_0
-        if ( major_version() >= 2 )
-            return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
-        else
-#endif /* CL_VERSION_2_0 */
-            return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
-    }
-    bool out_of_order_exec_mode_on_device_present() const {
-#if CL_VERSION_2_0
-        if ( major_version() >= 2 )
-            return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
-        else
-#endif /* CL_VERSION_2_0 */
-            return false;
-    }
-    std::array<size_t, 3> max_work_item_sizes() const {
-        return device_info<std::array<size_t, 3>>( my_cl_device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES );
-    }
-    size_t max_work_group_size() const {
-        return device_info<size_t>( my_cl_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE );
-    }
-    bool built_in_kernel_available( const std::string& k ) const {
-        const std::string semi = ";";
-        // Added semicolumns to force an exact match (to avoid a partial match, e.g. "add" is partly matched with "madd").
-        return (semi + built_in_kernels() + semi).find( semi + k + semi ) != std::string::npos;
-    }
-    std::string built_in_kernels() const {
-        return device_info<std::string>( my_cl_device_id, CL_DEVICE_BUILT_IN_KERNELS );
-    }
-    std::string name() const {
-        return device_info<std::string>( my_cl_device_id, CL_DEVICE_NAME );
-    }
-    cl_bool available() const {
-        return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_AVAILABLE );
-    }
-    cl_bool compiler_available() const {
-        return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_COMPILER_AVAILABLE );
-    }
-    cl_bool linker_available() const {
-        return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_LINKER_AVAILABLE );
-    }
-    bool extension_available( const std::string &ext ) const {
-        const std::string space = " ";
-        // Added space to force an exact match (to avoid a partial match, e.g. "ext" is partly matched with "ext2").
-        return (space + extensions() + space).find( space + ext + space ) != std::string::npos;
-    }
-    std::string extensions() const {
-        return device_info<std::string>( my_cl_device_id, CL_DEVICE_EXTENSIONS );
-    }
-
-    cl_device_type type() const {
-        return device_info<cl_device_type>( my_cl_device_id, CL_DEVICE_TYPE );
-    }
-
-    std::string vendor() const {
-        return device_info<std::string>( my_cl_device_id, CL_DEVICE_VENDOR );
-    }
-
-    cl_uint address_bits() const {
-        return device_info<cl_uint>( my_cl_device_id, CL_DEVICE_ADDRESS_BITS );
-    }
-
-    cl_device_id device_id() const {
-        return my_cl_device_id;
-    }
-
-    cl_command_queue command_queue() const {
-        return my_cl_command_queue;
-    }
-
-    void set_command_queue( cl_command_queue cmd_queue ) {
-        my_cl_command_queue = cmd_queue;
-    }
-
-private:
-    opencl_device( cl_device_id d_id ) : my_device_id( unknown ), my_cl_device_id( d_id ) {}
-
-    cl_platform_id platform() const {
-        return device_info<cl_platform_id>( my_cl_device_id, CL_DEVICE_PLATFORM );
-    }
-
-    device_id_type my_device_id;
-    cl_device_id my_cl_device_id;
-    cl_command_queue my_cl_command_queue;
-
-    friend bool operator==(opencl_device d1, opencl_device d2) { return d1.my_cl_device_id == d2.my_cl_device_id; }
-
-    template <typename DeviceFilter>
-    friend class opencl_factory;
-    template <typename Factory>
-    friend class opencl_memory;
-    template <typename Factory>
-    friend class opencl_program;
-    friend class opencl_foundation;
-
-#if TBB_USE_ASSERT
-    template <typename T, typename Factory>
-    friend class opencl_buffer;
-#endif
-};
-
-class opencl_device_list {
-    typedef std::vector<opencl_device> container_type;
-public:
-    typedef container_type::iterator iterator;
-    typedef container_type::const_iterator const_iterator;
-    typedef container_type::size_type size_type;
-
-    opencl_device_list() {}
-    opencl_device_list( std::initializer_list<opencl_device> il ) : my_container( il ) {}
-
-    void add( opencl_device d ) { my_container.push_back( d ); }
-    size_type size() const { return my_container.size(); }
-    bool empty() const { return my_container.empty(); }
-    iterator begin() { return my_container.begin(); }
-    iterator end() { return my_container.end(); }
-    const_iterator begin() const { return my_container.begin(); }
-    const_iterator end() const { return my_container.end(); }
-    const_iterator cbegin() const { return my_container.cbegin(); }
-    const_iterator cend() const { return my_container.cend(); }
-private:
-    container_type my_container;
-};
-
-class callback_base : tbb::internal::no_copy {
-public:
-    virtual void call() = 0;
-    virtual ~callback_base() {}
-};
-
-template <typename Callback, typename T>
-class callback : public callback_base {
-    graph &my_graph;
-    Callback my_callback;
-    T my_data;
-public:
-    callback( graph &g, Callback c, const T& t ) : my_graph( g ), my_callback( c ), my_data( t ) {
-        // Extend the graph lifetime until the callback completion.
-        my_graph.increment_wait_count();
-    }
-    ~callback() {
-        // Release the reference to the graph.
-        my_graph.decrement_wait_count();
-    }
-    void call() __TBB_override {
-        my_callback( my_data );
-    }
-};
-
-template <typename T, typename Factory = default_opencl_factory>
-class dependency_msg : public async_msg<T> {
-public:
-    typedef T value_type;
-
-    dependency_msg() : my_callback_flag_ptr( std::make_shared< tbb::atomic<bool>>() ) {
-        my_callback_flag_ptr->store<tbb::relaxed>(false);
-    }
-
-    explicit dependency_msg( const T& data ) : my_data(data), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
-        my_callback_flag_ptr->store<tbb::relaxed>(false);
-    }
-
-    dependency_msg( opencl_graph &g, const T& data ) : my_data(data), my_graph(&g), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
-        my_callback_flag_ptr->store<tbb::relaxed>(false);
-    }
-
-    dependency_msg( const T& data, cl_event event ) : my_data(data), my_event(event), my_is_event(true), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
-        my_callback_flag_ptr->store<tbb::relaxed>(false);
-        enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
-    }
-
-    T& data( bool wait = true ) {
-        if ( my_is_event && wait ) {
-            enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
-            enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
-            my_is_event = false;
-        }
-        return my_data;
-    }
-
-    const T& data( bool wait = true ) const {
-        if ( my_is_event && wait ) {
-            enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
-            enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
-            my_is_event = false;
-        }
-        return my_data;
-    }
-
-    dependency_msg( const dependency_msg &dmsg ) : async_msg<T>(dmsg),
-        my_data(dmsg.my_data), my_event(dmsg.my_event), my_is_event( dmsg.my_is_event ), my_graph( dmsg.my_graph ),
-        my_callback_flag_ptr(dmsg.my_callback_flag_ptr)
-    {
-        if ( my_is_event )
-            enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
-    }
-
-    dependency_msg( dependency_msg &&dmsg ) : async_msg<T>(std::move(dmsg)),
-        my_data(std::move(dmsg.my_data)), my_event(dmsg.my_event), my_is_event(dmsg.my_is_event), my_graph(dmsg.my_graph),
-        my_callback_flag_ptr( std::move(dmsg.my_callback_flag_ptr) )
-    {
-        dmsg.my_is_event = false;
-    }
-
-    dependency_msg& operator=(const dependency_msg &dmsg) {
-        async_msg<T>::operator =(dmsg);
-
-        // Release original event
-        if ( my_is_event )
-            enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to retain an event" );
-
-        my_data = dmsg.my_data;
-        my_event = dmsg.my_event;
-        my_is_event = dmsg.my_is_event;
-        my_graph = dmsg.my_graph;
-
-        // Retain copied event
-        if ( my_is_event )
-            enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
-
-        my_callback_flag_ptr = dmsg.my_callback_flag_ptr;
-        return *this;
-    }
-
-    ~dependency_msg() {
-        if ( my_is_event )
-            enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
-    }
-
-    cl_event const * get_event() const { return my_is_event ? &my_event : NULL; }
-    void set_event( cl_event e ) const {
-        if ( my_is_event ) {
-            cl_command_queue cq = event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE );
-            if ( cq != event_info<cl_command_queue>( e, CL_EVENT_COMMAND_QUEUE ) )
-                enforce_cl_retcode( clFlush( cq ), "Failed to flush an OpenCL command queue" );
-            enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
-        }
-        my_is_event = true;
-        my_event = e;
-        clRetainEvent( my_event );
-    }
-
-    void set_graph( graph &g ) {
-        my_graph = &g;
-    }
-
-    void clear_event() const {
-        if ( my_is_event ) {
-            enforce_cl_retcode( clFlush( event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE ) ), "Failed to flush an OpenCL command queue" );
-            enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
-        }
-        my_is_event = false;
-    }
-
-    template <typename Callback>
-    void register_callback( Callback c ) const {
-        __TBB_ASSERT( my_is_event, "The OpenCL event is not set" );
-        __TBB_ASSERT( my_graph, "The graph is not set" );
-        enforce_cl_retcode( clSetEventCallback( my_event, CL_COMPLETE, register_callback_func, new callback<Callback, T>( *my_graph, c, my_data ) ), "Failed to set an OpenCL callback" );
-    }
-
-    operator T&() { return data(); }
-    operator const T&() const { return data(); }
-
-protected:
-    // Overridden in this derived class to inform that 
-    // async calculation chain is over
-    void finalize() const __TBB_override {
-        receive_if_memory_object(*this);
-        if (! my_callback_flag_ptr->fetch_and_store(true)) {
-            dependency_msg a(*this);
-            if (my_is_event) {
-                register_callback([a](const T& t) mutable {
-                    a.set(t);
-                });
-            }
-            else {
-                a.set(my_data);
-            }
-        }
-        clear_event();
-    }
-
-private:
-    static void CL_CALLBACK register_callback_func( cl_event, cl_int event_command_exec_status, void *data ) {
-        tbb::internal::suppress_unused_warning( event_command_exec_status );
-        __TBB_ASSERT( event_command_exec_status == CL_COMPLETE, NULL );
-        __TBB_ASSERT( data, NULL );
-        callback_base *c = static_cast<callback_base*>(data);
-        c->call();
-        delete c;
-    }
-
-    T my_data;
-    mutable cl_event my_event;
-    mutable bool my_is_event = false;
-    graph *my_graph = NULL;
-
-    std::shared_ptr< tbb::atomic<bool> > my_callback_flag_ptr;
-};
-
-template <typename K, typename T, typename Factory>
-K key_from_message( const dependency_msg<T, Factory> &dmsg ) {
-    using tbb::flow::key_from_message;
-    const T &t = dmsg.data( false );
-    __TBB_STATIC_ASSERT( true, "" );
-    return key_from_message<K, T>( t );
-}
-
-template <typename Factory>
-class opencl_memory {
-public:
-    opencl_memory() {}
-    opencl_memory( Factory &f ) : my_host_ptr( NULL ), my_factory( &f ), my_sending_event_present( false ) {
-        my_curr_device_id = my_factory->devices().begin()->my_device_id;
-    }
-
-    ~opencl_memory() {
-        if ( my_sending_event_present ) enforce_cl_retcode( clReleaseEvent( my_sending_event ), "Failed to release an event for the OpenCL buffer" );
-        enforce_cl_retcode( clReleaseMemObject( my_cl_mem ), "Failed to release an memory object" );
-    }
-
-    cl_mem get_cl_mem() const {
-        return my_cl_mem;
-    }
-
-    void* get_host_ptr() {
-        if ( !my_host_ptr ) {
-            dependency_msg<void*, Factory> d = receive( NULL );
-            d.data();
-            __TBB_ASSERT( d.data() == my_host_ptr, NULL );
-        }
-        return my_host_ptr;
-    }
-
-    Factory *factory() const { return my_factory; }
-
-    dependency_msg<void*, Factory> send( opencl_device d, const cl_event *e );
-    dependency_msg<void*, Factory> receive( const cl_event *e );
-    virtual void map_memory( opencl_device, dependency_msg<void*, Factory> & ) = 0;
-protected:
-    cl_mem my_cl_mem;
-    tbb::atomic<opencl_device::device_id_type> my_curr_device_id;
-    void* my_host_ptr;
-    Factory *my_factory;
-
-    tbb::spin_mutex my_sending_lock;
-    bool my_sending_event_present;
-    cl_event my_sending_event;
-};
-
-template <typename Factory>
-class opencl_buffer_impl : public opencl_memory<Factory> {
-    size_t my_size;
-public:
-    opencl_buffer_impl( size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
-        cl_int err;
-        this->my_cl_mem = clCreateBuffer( this->my_factory->context(), CL_MEM_ALLOC_HOST_PTR, size, NULL, &err );
-        enforce_cl_retcode( err, "Failed to create an OpenCL buffer" );
-    }
-
-    // The constructor for subbuffers.
-    opencl_buffer_impl( cl_mem m, size_t index, size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
-        cl_int err;
-        cl_buffer_region region = { index, size };
-        this->my_cl_mem = clCreateSubBuffer( m, 0, CL_BUFFER_CREATE_TYPE_REGION, &region, &err );
-        enforce_cl_retcode( err, "Failed to create an OpenCL subbuffer" );
-    }
-
-    size_t size() const {
-        return my_size;
-    }
-
-    void map_memory( opencl_device device, dependency_msg<void*, Factory> &dmsg ) __TBB_override {
-        this->my_factory->enque_map_buffer( device, *this, dmsg );
-    }
-
-#if TBB_USE_ASSERT
-    template <typename, typename>
-    friend class opencl_buffer;
-#endif
-};
-
-enum access_type {
-    read_write,
-    write_only,
-    read_only
-};
-
-template <typename T, typename Factory = default_opencl_factory>
-class opencl_subbuffer;
-
-template <typename T, typename Factory = default_opencl_factory>
-class opencl_buffer {
-public:
-    typedef cl_mem native_object_type;
-    typedef opencl_buffer memory_object_type;
-    typedef Factory opencl_factory_type;
-
-    template<access_type a> using iterator = T*;
-
-    template <access_type a>
-    iterator<a> access() const {
-        T* ptr = (T*)my_impl->get_host_ptr();
-        __TBB_ASSERT( ptr, NULL );
-        return iterator<a>( ptr );
-    }
-
-    T* data() const { return &access<read_write>()[0]; }
-
-    template <access_type a = read_write>
-    iterator<a> begin() const { return access<a>(); }
-
-    template <access_type a = read_write>
-    iterator<a> end() const { return access<a>()+my_impl->size()/sizeof(T); }
-
-    size_t size() const { return my_impl->size()/sizeof(T); }
-
-    T& operator[] ( ptrdiff_t k ) { return begin()[k]; }
-
-    opencl_buffer() {}
-    opencl_buffer( opencl_graph &g, size_t size );
-    opencl_buffer( Factory &f, size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), f ) ) {}
-
-    cl_mem native_object() const {
-        return my_impl->get_cl_mem();
-    }
-
-    const opencl_buffer& memory_object() const {
-        return *this;
-    }
-
-    void send( opencl_device device, dependency_msg<opencl_buffer, Factory> &dependency ) const {
-        __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
-        dependency_msg<void*, Factory> d = my_impl->send( device, dependency.get_event() );
-        const cl_event *e = d.get_event();
-        if ( e ) dependency.set_event( *e );
-        else dependency.clear_event();
-    }
-    void receive( const dependency_msg<opencl_buffer, Factory> &dependency ) const {
-        __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
-        dependency_msg<void*, Factory> d = my_impl->receive( dependency.get_event() );
-        const cl_event *e = d.get_event();
-        if ( e ) dependency.set_event( *e );
-        else dependency.clear_event();
-    }
-
-    opencl_subbuffer<T, Factory> subbuffer( size_t index, size_t size ) const;
-private:
-    // The constructor for subbuffers.
-    opencl_buffer( Factory &f, cl_mem m, size_t index, size_t size ) : my_impl( std::make_shared<impl_type>( m, index*sizeof(T), size*sizeof(T), f ) ) {}
-
-    typedef opencl_buffer_impl<Factory> impl_type;
-
-    std::shared_ptr<impl_type> my_impl;
-
-    friend bool operator==(const opencl_buffer<T, Factory> &lhs, const opencl_buffer<T, Factory> &rhs) {
-        return lhs.my_impl == rhs.my_impl;
-    }
-
-    template <typename>
-    friend class opencl_factory;
-    template <typename, typename>
-    friend class opencl_subbuffer;
-};
-
-template <typename T, typename Factory>
-class opencl_subbuffer : public opencl_buffer<T, Factory> {
-    opencl_buffer<T, Factory> my_owner;
-public:
-    opencl_subbuffer() {}
-    opencl_subbuffer( const opencl_buffer<T, Factory> &owner, size_t index, size_t size ) :
-        opencl_buffer<T, Factory>( *owner.my_impl->factory(), owner.native_object(), index, size ), my_owner( owner ) {}
-};
-
-template <typename T, typename Factory>
-opencl_subbuffer<T, Factory> opencl_buffer<T, Factory>::subbuffer( size_t index, size_t size ) const {
-    return opencl_subbuffer<T, Factory>( *this, index, size );
-}
-
-
-#define is_typedef(type)                                                    \
-    template <typename T>                                                   \
-    struct is_##type {                                                      \
-        template <typename C>                                               \
-        static std::true_type check( typename C::type* );                   \
-        template <typename C>                                               \
-        static std::false_type check( ... );                                \
-                                                                            \
-        static const bool value = decltype(check<T>(0))::value;             \
-    }
-
-is_typedef( native_object_type );
-is_typedef( memory_object_type );
-
-template <typename T>
-typename std::enable_if<is_native_object_type<T>::value, typename T::native_object_type>::type get_native_object( const T &t ) {
-    return t.native_object();
-}
-
-template <typename T>
-typename std::enable_if<!is_native_object_type<T>::value, T>::type get_native_object( T t ) {
-    return t;
-}
-
-// send_if_memory_object checks if the T type has memory_object_type and call the send method for the object.
-template <typename T, typename Factory>
-typename std::enable_if<is_memory_object_type<T>::value>::type send_if_memory_object( opencl_device device, dependency_msg<T, Factory> &dmsg ) {
-    const T &t = dmsg.data( false );
-    typedef typename T::memory_object_type mem_obj_t;
-    mem_obj_t mem_obj = t.memory_object();
-    dependency_msg<mem_obj_t, Factory> d( mem_obj );
-    if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
-    mem_obj.send( device, d );
-    if ( d.get_event() ) dmsg.set_event( *d.get_event() );
-}
-
-template <typename T>
-typename std::enable_if<is_memory_object_type<T>::value>::type send_if_memory_object( opencl_device device, T &t ) {
-    typedef typename T::memory_object_type mem_obj_t;
-    mem_obj_t mem_obj = t.memory_object();
-    dependency_msg<mem_obj_t, typename mem_obj_t::opencl_factory_type> dmsg( mem_obj );
-    mem_obj.send( device, dmsg );
-}
-
-template <typename T>
-typename std::enable_if<!is_memory_object_type<T>::value>::type send_if_memory_object( opencl_device, T& ) {};
-
-// receive_if_memory_object checks if the T type has memory_object_type and call the receive method for the object.
-template <typename T, typename Factory>
-typename std::enable_if<is_memory_object_type<T>::value>::type receive_if_memory_object( const dependency_msg<T, Factory> &dmsg ) {
-    const T &t = dmsg.data( false );
-    typedef typename T::memory_object_type mem_obj_t;
-    mem_obj_t mem_obj = t.memory_object();
-    dependency_msg<mem_obj_t, Factory> d( mem_obj );
-    if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
-    mem_obj.receive( d );
-    if ( d.get_event() ) dmsg.set_event( *d.get_event() );
-}
-
-template <typename T>
-typename std::enable_if<!is_memory_object_type<T>::value>::type  receive_if_memory_object( const T& ) {}
-
-class opencl_range {
-public:
-    typedef size_t range_index_type;
-    typedef std::array<range_index_type, 3> nd_range_type;
-
-    template <typename G = std::initializer_list<int>, typename L = std::initializer_list<int>,
-        typename = typename std::enable_if<!std::is_same<typename std::decay<G>::type, opencl_range>::value>::type>
-    opencl_range(G&& global_work = std::initializer_list<int>({ 0 }), L&& local_work = std::initializer_list<int>({ 0, 0, 0 })) {
-        auto g_it = global_work.begin();
-        auto l_it = local_work.begin();
-        my_global_work_size = { size_t(-1), size_t(-1), size_t(-1) };
-        // my_local_work_size is still uninitialized
-        for (int s = 0; s < 3 && g_it != global_work.end(); ++g_it, ++l_it, ++s) {
-            __TBB_ASSERT(l_it != local_work.end(), "global_work & local_work must have same size");
-            my_global_work_size[s] = *g_it;
-            my_local_work_size[s] = *l_it;
-        }
-    }
-
-    const nd_range_type& global_range() const { return my_global_work_size; }
-    const nd_range_type& local_range() const { return my_local_work_size; }
-
-private:
-    nd_range_type my_global_work_size;
-    nd_range_type my_local_work_size;
-};
-
-template <typename DeviceFilter>
-class opencl_factory {
-public:
-    template<typename T> using async_msg_type = dependency_msg<T, opencl_factory<DeviceFilter>>;
-    typedef opencl_device device_type;
-
-    class kernel : tbb::internal::no_assign {
-    public:
-        kernel( const kernel& k ) : my_factory( k.my_factory ) {
-            // Clone my_cl_kernel via opencl_program
-            size_t ret_size = 0;
-
-            std::vector<char> kernel_name;
-            for ( size_t curr_size = 32;; curr_size <<= 1 ) {
-                kernel_name.resize( curr_size <<= 1 );
-                enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_FUNCTION_NAME, curr_size, kernel_name.data(), &ret_size ), "Failed to get kernel info" );
-                if ( ret_size < curr_size ) break;
-            }
-
-            cl_program program;
-            enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_PROGRAM, sizeof(program), &program, &ret_size ), "Failed to get kernel info" );
-            __TBB_ASSERT( ret_size == sizeof(program), NULL );
-
-            my_cl_kernel = opencl_program< factory_type >( my_factory, program ).get_cl_kernel( kernel_name.data() );
-        }
-
-        ~kernel() {
-            enforce_cl_retcode( clReleaseKernel( my_cl_kernel ), "Failed to release a kernel" );
-        }
-
-    private:
-        typedef opencl_factory<DeviceFilter> factory_type;
-
-        kernel( const cl_kernel& k, factory_type& f ) : my_cl_kernel( k ), my_factory( f ) {}
-
-        // Data
-        cl_kernel my_cl_kernel;
-        factory_type& my_factory;
-
-        template <typename DeviceFilter_>
-        friend class opencl_factory;
-
-        template <typename Factory>
-        friend class opencl_program;
-    };
-
-    typedef kernel kernel_type;
-
-    // 'range_type' enables kernel_executor with range support
-    // it affects expectations for enqueue_kernel(.....) interface method
-    typedef opencl_range range_type;
-
-    opencl_factory( opencl_graph &g ) : my_graph( g ) {}
-    ~opencl_factory() {
-        if ( my_devices.size() ) {
-            for ( auto d = my_devices.begin(); d != my_devices.end(); ++d ) {
-                enforce_cl_retcode( clReleaseCommandQueue( (*d).my_cl_command_queue ), "Failed to release a command queue" );
-            }
-            enforce_cl_retcode( clReleaseContext( my_cl_context ), "Failed to release a context" );
-        }
-    }
-
-    bool init( const opencl_device_list &device_list ) {
-        tbb::spin_mutex::scoped_lock lock( my_devices_mutex );
-        if ( !my_devices.size() ) {
-            my_devices = device_list;
-            return true;
-        }
-        return false;
-    }
-
-
-private:
-    template <typename Factory>
-    void enque_map_buffer( opencl_device device, opencl_buffer_impl<Factory> &buffer, dependency_msg<void*, Factory>& dmsg ) {
-        cl_event const* e1 = dmsg.get_event();
-        cl_event e2;
-        cl_int err;
-        void *ptr = clEnqueueMapBuffer( device.my_cl_command_queue, buffer.get_cl_mem(), false, CL_MAP_READ | CL_MAP_WRITE, 0, buffer.size(),
-            e1 == NULL ? 0 : 1, e1, &e2, &err );
-        enforce_cl_retcode( err, "Failed to map a buffer" );
-        dmsg.data( false ) = ptr;
-        dmsg.set_event( e2 );
-        enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
-    }
-
-
-    template <typename Factory>
-    void enque_unmap_buffer( opencl_device device, opencl_memory<Factory> &memory, dependency_msg<void*, Factory>& dmsg ) {
-        cl_event const* e1 = dmsg.get_event();
-        cl_event e2;
-        enforce_cl_retcode(
-            clEnqueueUnmapMemObject( device.my_cl_command_queue, memory.get_cl_mem(), memory.get_host_ptr(), e1 == NULL ? 0 : 1, e1, &e2 ),
-           "Failed to unmap a buffer" );
-        dmsg.set_event( e2 );
-        enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
-    }
-
-    // --------- Kernel argument & event list helpers --------- //
-    template <size_t NUM_ARGS, typename T>
-    void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>&, int&, int& place, const T& t ) {
-        auto p = get_native_object(t);
-        enforce_cl_retcode( clSetKernelArg(kernel.my_cl_kernel, place++, sizeof(p), &p), "Failed to set a kernel argument" );
-    }
-
-    template <size_t NUM_ARGS, typename T, typename F>
-    void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const dependency_msg<T, F>& msg ) {
-        __TBB_ASSERT((static_cast<typename std::array<cl_event, NUM_ARGS>::size_type>(num_events) < events.size()), NULL);
-
-        const cl_event * const e = msg.get_event();
-        if (e != NULL) {
-            events[num_events++] = *e;
-        }
-
-        process_one_arg( kernel, events, num_events, place, msg.data(false) );
-    }
-
-    template <size_t NUM_ARGS, typename T, typename ...Rest>
-    void process_arg_list( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const T& t, const Rest&... args ) {
-        process_one_arg( kernel, events, num_events, place, t );
-        process_arg_list( kernel, events, num_events, place, args... );
-    }
-
-    template <size_t NUM_ARGS>
-    void process_arg_list( const kernel_type&, std::array<cl_event, NUM_ARGS>&, int&, int& ) {}
-    // ------------------------------------------- //
-    template <typename T>
-    void update_one_arg( cl_event, T& ) {}
-
-    template <typename T, typename F>
-    void update_one_arg( cl_event e, dependency_msg<T, F>& msg ) {
-        msg.set_event( e );
-        msg.set_graph( my_graph );
-    }
-
-    template <typename T, typename ...Rest>
-    void update_arg_list( cl_event e, T& t, Rest&... args ) {
-        update_one_arg( e, t );
-        update_arg_list( e, args... );
-    }
-
-    void update_arg_list( cl_event ) {}
-    // ------------------------------------------- //
-public:
-    template <typename ...Args>
-    void send_kernel( opencl_device device, const kernel_type& kernel, const range_type& work_size, Args&... args ) {
-        std::array<cl_event, sizeof...(Args)> events;
-        int num_events = 0;
-        int place = 0;
-        process_arg_list( kernel, events, num_events, place, args... );
-
-        const cl_event e = send_kernel_impl( device, kernel.my_cl_kernel, work_size, num_events, events.data() );
-
-        update_arg_list(e, args...);
-
-        // Release our own reference to cl_event
-        enforce_cl_retcode( clReleaseEvent(e), "Failed to release an event" );
-    }
-
-    // ------------------------------------------- //
-    template <typename T, typename ...Rest>
-    void send_data(opencl_device device, T& t, Rest&... args) {
-        send_if_memory_object( device, t );
-        send_data( device, args... );
-    }
-
-    void send_data(opencl_device) {}
-    // ------------------------------------------- //
-
-private:
-    cl_event send_kernel_impl( opencl_device device, const cl_kernel& kernel,
-        const range_type& work_size, cl_uint num_events, cl_event* event_list ) {
-        const typename range_type::nd_range_type g_offset = { { 0, 0, 0 } };
-        const typename range_type::nd_range_type& g_size = work_size.global_range();
-        const typename range_type::nd_range_type& l_size = work_size.local_range();
-        cl_uint s;
-        for ( s = 1; s < 3 && g_size[s] != size_t(-1); ++s) {}
-        cl_event event;
-        enforce_cl_retcode(
-            clEnqueueNDRangeKernel( device.my_cl_command_queue, kernel, s,
-                g_offset.data(), g_size.data(), l_size[0] ? l_size.data() : NULL, num_events, num_events ? event_list : NULL, &event ),
-            "Failed to enqueue a kernel" );
-        return event;
-    }
-
-    // ------------------------------------------- //
-    template <typename T>
-    bool get_event_from_one_arg( cl_event&, const T& ) {
-        return false;
-    }
-
-    template <typename T, typename F>
-    bool get_event_from_one_arg( cl_event& e, const dependency_msg<T, F>& msg) {
-        cl_event const *e_ptr = msg.get_event();
-
-        if ( e_ptr != NULL ) {
-            e = *e_ptr;
-            return true;
-        }
-
-        return false;
-    }
-
-    template <typename T, typename ...Rest>
-    bool get_event_from_args( cl_event& e, const T& t, const Rest&... args ) {
-        if ( get_event_from_one_arg( e, t ) ) {
-            return true;
-        }
-
-        return get_event_from_args( e, args... );
-    }
-
-    bool get_event_from_args( cl_event& ) {
-        return false;
-    }
-    // ------------------------------------------- //
-
-    struct finalize_fn : tbb::internal::no_assign {
-        virtual ~finalize_fn() {}
-        virtual void operator() () {}
-    };
-
-    template<typename Fn>
-    struct finalize_fn_leaf : public finalize_fn {
-        Fn my_fn;
-        finalize_fn_leaf(Fn fn) : my_fn(fn) {}
-        void operator() () __TBB_override { my_fn(); }
-    };
-
-    static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data) {
-        tbb::internal::suppress_unused_warning(event_command_exec_status);
-        __TBB_ASSERT(event_command_exec_status == CL_COMPLETE, NULL);
-
-        finalize_fn * const fn_ptr = static_cast<finalize_fn*>(data);
-        __TBB_ASSERT(fn_ptr != NULL, "Invalid finalize function pointer");
-        (*fn_ptr)();
-
-        // Function pointer was created by 'new' & this callback must be called once only
-        delete fn_ptr;
-    }
-public:
-    template <typename FinalizeFn, typename ...Args>
-    void finalize( opencl_device device, FinalizeFn fn, Args&... args ) {
-        cl_event e;
-
-        if ( get_event_from_args( e, args... ) ) {
-            enforce_cl_retcode( clSetEventCallback( e, CL_COMPLETE, finalize_callback,
-                new finalize_fn_leaf<FinalizeFn>(fn) ), "Failed to set a callback" );
-        }
-
-        enforce_cl_retcode( clFlush( device.my_cl_command_queue ), "Failed to flush an OpenCL command queue" );
-    }
-
-    const opencl_device_list& devices() {
-        std::call_once( my_once_flag, &opencl_factory::init_once, this );
-        return my_devices;
-    }
-
-private:
-    bool is_same_context( opencl_device::device_id_type d1, opencl_device::device_id_type d2 ) {
-        __TBB_ASSERT( d1 != opencl_device::unknown && d2 != opencl_device::unknown, NULL );
-        // Currently, factory supports only one context so if the both devices are not host it means the are in the same context.
-        if ( d1 != opencl_device::host && d2 != opencl_device::host )
-            return true;
-        return d1 == d2;
-    }
-private:
-    opencl_factory( const opencl_factory& );
-    opencl_factory& operator=(const opencl_factory&);
-
-    cl_context context() {
-        std::call_once( my_once_flag, &opencl_factory::init_once, this );
-        return my_cl_context;
-    }
-
-    void init_once();
-
-    std::once_flag my_once_flag;
-    opencl_device_list my_devices;
-    cl_context my_cl_context;
-    opencl_graph &my_graph;
-
-    tbb::spin_mutex my_devices_mutex;
-
-    template <typename Factory>
-    friend class opencl_program;
-    template <typename Factory>
-    friend class opencl_buffer_impl;
-    template <typename Factory>
-    friend class opencl_memory;
-};
-
-template <typename Factory>
-dependency_msg<void*, Factory> opencl_memory<Factory>::receive( const cl_event *e ) {
-    dependency_msg<void*, Factory> d = e ? dependency_msg<void*, Factory>( my_host_ptr, *e ) : dependency_msg<void*, Factory>( my_host_ptr );
-    // Concurrent receives are prohibited so we do not worry about synchronization.
-    if ( my_curr_device_id.load<tbb::relaxed>() != opencl_device::host ) {
-        map_memory( *my_factory->devices().begin(), d );
-        my_curr_device_id.store<tbb::relaxed>( opencl_device::host );
-        my_host_ptr = d.data( false );
-    }
-    // Release the sending event
-    if ( my_sending_event_present ) {
-        enforce_cl_retcode( clReleaseEvent( my_sending_event ), "Failed to release an event" );
-        my_sending_event_present = false;
-    }
-    return d;
-}
-
-template <typename Factory>
-dependency_msg<void*, Factory> opencl_memory<Factory>::send( opencl_device device, const cl_event *e ) {
-    opencl_device::device_id_type device_id = device.my_device_id;
-    if ( !my_factory->is_same_context( my_curr_device_id.load<tbb::acquire>(), device_id ) ) {
-        {
-            tbb::spin_mutex::scoped_lock lock( my_sending_lock );
-            if ( !my_factory->is_same_context( my_curr_device_id.load<tbb::relaxed>(), device_id ) ) {
-                __TBB_ASSERT( my_host_ptr, "The buffer has not been mapped" );
-                dependency_msg<void*, Factory> d( my_host_ptr );
-                my_factory->enque_unmap_buffer( device, *this, d );
-                my_sending_event = *d.get_event();
-                my_sending_event_present = true;
-                enforce_cl_retcode( clRetainEvent( my_sending_event ), "Failed to retain an event" );
-                my_host_ptr = NULL;
-                my_curr_device_id.store<tbb::release>(device_id);
-            }
-        }
-        __TBB_ASSERT( my_sending_event_present, NULL );
-    }
-
-    // !e means that buffer has come from the host
-    if ( !e && my_sending_event_present ) e = &my_sending_event;
-
-    __TBB_ASSERT( !my_host_ptr, "The buffer has not been unmapped" );
-    return e ? dependency_msg<void*, Factory>( NULL, *e ) : dependency_msg<void*, Factory>( NULL );
-}
-
-struct default_opencl_factory_device_filter {
-    opencl_device_list operator()( const opencl_device_list &devices ) {
-        opencl_device_list dl;
-        dl.add( *devices.begin() );
-        return dl;
-    }
-};
-
-class default_opencl_factory : public opencl_factory < default_opencl_factory_device_filter > {
-public:
-    template<typename T> using async_msg_type = dependency_msg<T, default_opencl_factory>;
-    
-    default_opencl_factory( opencl_graph &g ) : opencl_factory( g ) {}
-private:
-    default_opencl_factory( const default_opencl_factory& );
-    default_opencl_factory& operator=(const default_opencl_factory&);
-};
-
-class opencl_foundation : tbb::internal::no_assign {
-    struct default_device_selector_type {
-        opencl_device operator()( default_opencl_factory& f ) {
-            __TBB_ASSERT( ! f.devices().empty(), "No available devices" );
-            return *( f.devices().begin() );
-        }
-    };
-public:
-    opencl_foundation(opencl_graph &g) : my_default_opencl_factory(g), my_default_device_selector() {
-        cl_uint num_platforms;
-        enforce_cl_retcode(clGetPlatformIDs(0, NULL, &num_platforms), "clGetPlatformIDs failed");
-
-        std::vector<cl_platform_id> platforms(num_platforms);
-        enforce_cl_retcode(clGetPlatformIDs(num_platforms, platforms.data(), NULL), "clGetPlatformIDs failed");
-
-        cl_uint num_devices;
-        std::vector<cl_platform_id>::iterator platforms_it = platforms.begin();
-        cl_uint num_all_devices = 0;
-        while (platforms_it != platforms.end()) {
-            cl_int err = clGetDeviceIDs(*platforms_it, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
-            if (err == CL_DEVICE_NOT_FOUND) {
-                platforms_it = platforms.erase(platforms_it);
-            } else {
-                enforce_cl_retcode(err, "clGetDeviceIDs failed");
-                num_all_devices += num_devices;
-                ++platforms_it;
-            }
-        }
-
-        std::vector<cl_device_id> devices(num_all_devices);
-        std::vector<cl_device_id>::iterator devices_it = devices.begin();
-        for (auto p = platforms.begin(); p != platforms.end(); ++p) {
-            enforce_cl_retcode(clGetDeviceIDs((*p), CL_DEVICE_TYPE_ALL, (cl_uint)std::distance(devices_it, devices.end()), &*devices_it, &num_devices), "clGetDeviceIDs failed");
-            devices_it += num_devices;
-        }
-
-        for (auto d = devices.begin(); d != devices.end(); ++d) {
-            my_devices.add(opencl_device((*d)));
-        }
-    }
-
-    default_opencl_factory &get_default_opencl_factory() {
-        return my_default_opencl_factory;
-    }
-
-    const opencl_device_list &get_all_devices() {
-        return my_devices;
-    }
-
-    default_device_selector_type get_default_device_selector() { return my_default_device_selector; }
-
-private:
-    default_opencl_factory my_default_opencl_factory;
-    opencl_device_list my_devices;
-
-    const default_device_selector_type my_default_device_selector;
-};
-
-opencl_foundation &opencl_graph::get_opencl_foundation() {
-    opencl_foundation* INITIALIZATION = (opencl_foundation*)1;
-    if ( my_opencl_foundation <= INITIALIZATION ) {
-        if ( tbb::internal::as_atomic( my_opencl_foundation ).compare_and_swap( INITIALIZATION, NULL ) == 0 ) {
-            my_opencl_foundation = new opencl_foundation( *this );
-        }
-        else {
-            tbb::internal::spin_wait_while_eq( my_opencl_foundation, INITIALIZATION );
-        }
-    }
-
-    __TBB_ASSERT( my_opencl_foundation > INITIALIZATION, "opencl_foundation is not initialized");
-    return *my_opencl_foundation;
-}
-
-opencl_graph::~opencl_graph() {
-    if ( my_opencl_foundation )
-        delete my_opencl_foundation;
-}
-
-template <typename DeviceFilter>
-void opencl_factory<DeviceFilter>::init_once() {
-        {
-            tbb::spin_mutex::scoped_lock lock( my_devices_mutex );
-            if ( !my_devices.size() )
-                my_devices = DeviceFilter()(my_graph.get_opencl_foundation().get_all_devices());
-        }
-
-    enforce_cl_retcode( my_devices.size() ? CL_SUCCESS : CL_INVALID_DEVICE, "No devices in the device list" );
-    cl_platform_id platform_id = my_devices.begin()->platform();
-    for ( opencl_device_list::iterator it = ++my_devices.begin(); it != my_devices.end(); ++it )
-        enforce_cl_retcode( it->platform() == platform_id ? CL_SUCCESS : CL_INVALID_PLATFORM, "All devices should be in the same platform" );
-
-    std::vector<cl_device_id> cl_device_ids;
-    for (auto d = my_devices.begin(); d != my_devices.end(); ++d) {
-        cl_device_ids.push_back((*d).my_cl_device_id);
-    }
-
-    cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, (cl_context_properties)NULL };
-    cl_int err;
-    cl_context ctx = clCreateContext( context_properties,
-        (cl_uint)cl_device_ids.size(),
-        cl_device_ids.data(),
-        NULL, NULL, &err );
-    enforce_cl_retcode( err, "Failed to create context" );
-    my_cl_context = ctx;
-
-    size_t device_counter = 0;
-    for ( auto d = my_devices.begin(); d != my_devices.end(); d++ ) {
-        (*d).my_device_id = device_counter++;
-        cl_int err2;
-        cl_command_queue cq;
-#if CL_VERSION_2_0
-        if ( (*d).major_version() >= 2 ) {
-            if ( (*d).out_of_order_exec_mode_on_host_present() ) {
-                cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 };
-                cq = clCreateCommandQueueWithProperties( ctx, (*d).my_cl_device_id, props, &err2 );
-            } else {
-                cl_queue_properties props[] = { 0 };
-                cq = clCreateCommandQueueWithProperties( ctx, (*d).my_cl_device_id, props, &err2 );
-            }
-        } else
-#endif
-        {
-            cl_command_queue_properties props = (*d).out_of_order_exec_mode_on_host_present() ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
-            // Suppress "declared deprecated" warning for the next line.
-#if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
-#if _MSC_VER || __INTEL_COMPILER
-#pragma warning( push )
-#if __INTEL_COMPILER
-#pragma warning (disable: 1478)
-#else
-#pragma warning (disable: 4996)
-#endif
-#endif
-            cq = clCreateCommandQueue( ctx, (*d).my_cl_device_id, props, &err2 );
-#if _MSC_VER || __INTEL_COMPILER
-#pragma warning( pop )
-#endif
-#if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
-#pragma GCC diagnostic pop
-#endif
-        }
-        enforce_cl_retcode( err2, "Failed to create command queue" );
-        (*d).my_cl_command_queue = cq;
-    }
-}
-
-const opencl_device_list &opencl_graph::available_devices() {
-    return get_opencl_foundation().get_all_devices();
-}
-
-default_opencl_factory &opencl_graph::opencl_factory() {
-    return get_opencl_foundation().get_default_opencl_factory();
-}
-
-template <typename T, typename Factory>
-opencl_buffer<T, Factory>::opencl_buffer( opencl_graph &g, size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), g.get_opencl_foundation().get_default_opencl_factory() ) ) {}
-
-    
-enum class opencl_program_type {
-    SOURCE,
-    PRECOMPILED,
-    SPIR
-};
-
-template <typename Factory = default_opencl_factory>
-class opencl_program : tbb::internal::no_assign {
-public:
-    typedef typename Factory::kernel_type kernel_type;
-
-    opencl_program( Factory& factory, opencl_program_type type, const std::string& program_name ) : my_factory( factory ), my_type(type) , my_arg_str( program_name) {}
-    opencl_program( Factory& factory, const char* program_name ) : opencl_program( factory, std::string( program_name ) ) {}
-    opencl_program( Factory& factory, const std::string& program_name ) : opencl_program( factory, opencl_program_type::SOURCE, program_name ) {}
-
-    opencl_program( opencl_graph& graph, opencl_program_type type, const std::string& program_name ) : opencl_program( graph.opencl_factory(), type, program_name ) {}
-    opencl_program( opencl_graph& graph, const char* program_name ) : opencl_program( graph.opencl_factory(), program_name ) {}
-    opencl_program( opencl_graph& graph, const std::string& program_name ) : opencl_program( graph.opencl_factory(), program_name ) {}
-    opencl_program( opencl_graph& graph, opencl_program_type type ) : opencl_program( graph.opencl_factory(), type ) {}
-
-    opencl_program( const opencl_program &src ) : my_factory( src.my_factory ), my_type( src.type ), my_arg_str( src.my_arg_str ), my_cl_program( src.my_cl_program ) {
-        // Set my_do_once_flag to the called state.
-        std::call_once( my_do_once_flag, [](){} );
-    }
-
-    kernel_type get_kernel( const std::string& k ) const {
-        return kernel_type( get_cl_kernel(k), my_factory );
-    }
-
-private:
-    opencl_program( Factory& factory, cl_program program ) : my_factory( factory ), my_cl_program( program ) {
-        // Set my_do_once_flag to the called state.
-        std::call_once( my_do_once_flag, [](){} );
-    }
-
-    cl_kernel get_cl_kernel( const std::string& k ) const {
-        std::call_once( my_do_once_flag, [this, &k](){ this->init( k ); } );
-        cl_int err;
-        cl_kernel kernel = clCreateKernel( my_cl_program, k.c_str(), &err );
-        enforce_cl_retcode( err, std::string( "Failed to create kernel: " ) + k );
-        return kernel;
-    }
-
-    class file_reader {
-    public:
-        file_reader( const std::string& filepath ) {
-            std::ifstream file_descriptor( filepath, std::ifstream::binary );
-            if ( !file_descriptor.is_open() ) {
-                std::string str = std::string( "Could not open file: " ) + filepath;
-                std::cerr << str << std::endl;
-                throw str;
-            }
-            file_descriptor.seekg( 0, file_descriptor.end );
-            size_t length = size_t( file_descriptor.tellg() );
-            file_descriptor.seekg( 0, file_descriptor.beg );
-            my_content.resize( length );
-            char* begin = &*my_content.begin();
-            file_descriptor.read( begin, length );
-            file_descriptor.close();
-        }
-        const char* content() { return &*my_content.cbegin(); }
-        size_t length() { return my_content.length(); }
-    private:
-        std::string my_content;
-    };
-
-    class opencl_program_builder {
-    public:
-        typedef void (CL_CALLBACK *cl_callback_type)(cl_program, void*);
-        opencl_program_builder( Factory& f, const std::string& name, cl_program program,
-                                cl_uint num_devices, cl_device_id* device_list,
-                                const char* options, cl_callback_type callback,
-                                void* user_data ) {
-            cl_int err = clBuildProgram( program, num_devices, device_list, options,
-                                         callback, user_data );
-            if( err == CL_SUCCESS )
-                return;
-            std::string str = std::string( "Failed to build program: " ) + name;
-            if ( err == CL_BUILD_PROGRAM_FAILURE ) {
-                const opencl_device_list &devices = f.devices();
-                for ( auto d = devices.begin(); d != devices.end(); ++d ) {
-                    std::cerr << "Build log for device: " << (*d).name() << std::endl;
-                    size_t log_size;
-                    cl_int query_err = clGetProgramBuildInfo(
-                        program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL,
-                        &log_size );
-                    enforce_cl_retcode( query_err, "Failed to get build log size" );
-                    if( log_size ) {
-                        std::vector<char> output;
-                        output.resize( log_size );
-                        query_err = clGetProgramBuildInfo(
-                            program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG,
-                            output.size(), output.data(), NULL );
-                        enforce_cl_retcode( query_err, "Failed to get build output" );
-                        std::cerr << output.data() << std::endl;
-                    } else {
-                        std::cerr << "No build log available" << std::endl;
-                    }
-                }
-            }
-            enforce_cl_retcode( err, str );
-        }
-    };
-
-    class opencl_device_filter {
-    public:
-        template<typename Filter>
-        opencl_device_filter( cl_uint& num_devices, cl_device_id* device_list,
-                              Filter filter, const char* message ) {
-            for ( cl_uint i = 0; i < num_devices; ++i )
-                if ( filter(device_list[i]) ) {
-                    device_list[i--] = device_list[--num_devices];
-                }
-            if ( !num_devices )
-                enforce_cl_retcode( CL_DEVICE_NOT_AVAILABLE, message );
-        }
-    };
-
-    void init( const std::string& ) const {
-        cl_uint num_devices;
-        enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_NUM_DEVICES, sizeof( num_devices ), &num_devices, NULL ),
-            "Failed to get OpenCL context info" );
-        if ( !num_devices )
-            enforce_cl_retcode( CL_DEVICE_NOT_FOUND, "No supported devices found" );
-        cl_device_id *device_list = (cl_device_id *)alloca( num_devices*sizeof( cl_device_id ) );
-        enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_DEVICES, num_devices*sizeof( cl_device_id ), device_list, NULL ),
-            "Failed to get OpenCL context info" );
-        const char *options = NULL;
-        switch ( my_type ) {
-        case opencl_program_type::SOURCE: {
-            file_reader fr( my_arg_str );
-            const char *s[] = { fr.content() };
-            const size_t l[] = { fr.length() };
-            cl_int err;
-            my_cl_program = clCreateProgramWithSource( my_factory.context(), 1, s, l, &err );
-            enforce_cl_retcode( err, std::string( "Failed to create program: " ) + my_arg_str );
-            opencl_device_filter(
-                num_devices, device_list,
-                []( const opencl_device& d ) -> bool {
-                    return !d.compiler_available() || !d.linker_available();
-                }, "No one device supports building program from sources" );
-            opencl_program_builder(
-                my_factory, my_arg_str, my_cl_program, num_devices, device_list,
-                options, /*callback*/ NULL, /*user data*/NULL );
-            break;
-        }
-        case opencl_program_type::SPIR:
-            options = "-x spir";
-        case opencl_program_type::PRECOMPILED: {
-            file_reader fr( my_arg_str );
-            std::vector<const unsigned char*> s(
-                num_devices, reinterpret_cast<const unsigned char*>(fr.content()) );
-            std::vector<size_t> l( num_devices, fr.length() );
-            std::vector<cl_int> bin_statuses( num_devices, -1 );
-            cl_int err;
-            my_cl_program = clCreateProgramWithBinary( my_factory.context(), num_devices,
-                                                       device_list, l.data(), s.data(),
-                                                       bin_statuses.data(), &err );
-            if( err != CL_SUCCESS ) {
-                std::string statuses_str;
-                for (auto st = bin_statuses.begin(); st != bin_statuses.end(); ++st) {
-                    statuses_str += std::to_string((*st));
-                }
-
-                enforce_cl_retcode( err, std::string( "Failed to create program, error " + std::to_string( err ) + " : " ) + my_arg_str +
-                                    std::string( ", binary_statuses = " ) + statuses_str );
-            }
-            opencl_program_builder(
-                my_factory, my_arg_str, my_cl_program, num_devices, device_list,
-                options, /*callback*/ NULL, /*user data*/NULL );
-            break;
-        }
-        default:
-            __TBB_ASSERT( false, "Unsupported program type" );
-        }
-    }
-
-    Factory& my_factory;
-    opencl_program_type my_type;
-    std::string my_arg_str;
-    mutable cl_program my_cl_program;
-    mutable std::once_flag my_do_once_flag;
-
-    template <typename DeviceFilter>
-    friend class opencl_factory;
-
-    template <typename DeviceFilter>
-    friend class opencl_factory<DeviceFilter>::kernel;
-};
-
-template<typename... Args>
-class opencl_node;
-
-template<typename JP, typename Factory, typename... Ports>
-class opencl_node< tuple<Ports...>, JP, Factory > : public streaming_node< tuple<Ports...>, JP, Factory > {
-    typedef streaming_node < tuple<Ports...>, JP, Factory > base_type;
-public:
-    typedef typename base_type::kernel_type kernel_type;
-
-    opencl_node( opencl_graph &g, const kernel_type& kernel )
-        : base_type( g, kernel, g.get_opencl_foundation().get_default_device_selector(), g.get_opencl_foundation().get_default_opencl_factory() )
-    {}
-
-    opencl_node( opencl_graph &g, const kernel_type& kernel, Factory &f )
-        : base_type( g, kernel, g.get_opencl_foundation().get_default_device_selector(), f )
-    {}
-
-    template <typename DeviceSelector>
-    opencl_node( opencl_graph &g, const kernel_type& kernel, DeviceSelector d, Factory &f)
-        : base_type( g, kernel, d, f)
-    {}
-};
-
-template<typename JP, typename... Ports>
-class opencl_node< tuple<Ports...>, JP > : public opencl_node < tuple<Ports...>, JP, default_opencl_factory > {
-    typedef opencl_node < tuple<Ports...>, JP, default_opencl_factory > base_type;
-public:
-    typedef typename base_type::kernel_type kernel_type;
-
-    opencl_node( opencl_graph &g, const kernel_type& kernel )
-        : base_type( g, kernel, g.get_opencl_foundation().get_default_device_selector(), g.get_opencl_foundation().get_default_opencl_factory() )
-    {}
-
-    template <typename DeviceSelector>
-    opencl_node( opencl_graph &g, const kernel_type& kernel, DeviceSelector d )
-        : base_type( g, kernel, d, g.get_opencl_foundation().get_default_opencl_factory() )
-    {}
-};
-
-template<typename... Ports>
-class opencl_node< tuple<Ports...> > : public opencl_node < tuple<Ports...>, queueing, default_opencl_factory > {
-    typedef opencl_node < tuple<Ports...>, queueing, default_opencl_factory > base_type;
-public:
-    typedef typename base_type::kernel_type kernel_type;
-
-    opencl_node( opencl_graph &g, const kernel_type& kernel )
-        : base_type( g, kernel, g.get_opencl_foundation().get_default_device_selector(), g.get_opencl_foundation().get_default_opencl_factory() )
-    {}
-
-    template <typename DeviceSelector>
-    opencl_node( opencl_graph &g, const kernel_type& kernel, DeviceSelector d )
-        : base_type( g, kernel, d, g.get_opencl_foundation().get_default_opencl_factory() )
-    {}
-};
-
-} // namespace interface9
-
-using interface9::opencl_graph;
-using interface9::opencl_node;
-using interface9::read_only;
-using interface9::read_write;
-using interface9::write_only;
-using interface9::opencl_buffer;
-using interface9::opencl_subbuffer;
-using interface9::opencl_device;
-using interface9::opencl_device_list;
-using interface9::opencl_program;
-using interface9::opencl_program_type;
-using interface9::dependency_msg;
-using interface9::opencl_factory;
-using interface9::opencl_range;
-
-} // namespace flow
-} // namespace tbb
-#endif /* __TBB_PREVIEW_OPENCL_NODE */
-
-#endif // __TBB_flow_graph_opencl_node_H
diff --git a/lib/3rdParty/tbb/include/tbb/gfx_factory.h b/lib/3rdParty/tbb/include/tbb/gfx_factory.h
deleted file mode 100644
index 76ca3559..00000000
--- a/lib/3rdParty/tbb/include/tbb/gfx_factory.h
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_flow_graph_gfx_factory_H
-#define __TBB_flow_graph_gfx_factory_H
-
-#include "tbb/tbb_config.h"
-
-#if __TBB_PREVIEW_GFX_FACTORY
-
-#include <vector>
-#include <future>
-#include <mutex>
-#include <iostream>
-
-#include <gfx/gfx_rt.h>
-#include <gfx/gfx_intrin.h>
-#include <gfx/gfx_types.h>
-
-namespace tbb {
-
-namespace flow {
-
-namespace interface9 {
-
-template <typename T>
-class gfx_buffer;
-
-namespace gfx_offload {
-
-    typedef GfxTaskId task_id_type;
-
-    //-----------------------------------------------------------------------
-    // GFX errors checkers.
-    // For more debug output, set GFX_LOG_OFFLOAD=2 macro
-    //-----------------------------------------------------------------------
-
-    // TODO: reconsider error handling approach. If exception is the right way
-    // then need to define and document a specific exception type.
-    inline void throw_gfx_exception() {
-        std::string msg = "GFX error occurred: " + std::to_string(_GFX_get_last_error());
-        std::cerr << msg << std::endl;
-        throw msg;
-    }
-
-    inline void check_enqueue_retcode(task_id_type err) {
-        if (err == 0) {
-            throw_gfx_exception();
-        }
-    }
-
-    inline void check_gfx_retcode(task_id_type err) {
-        if (err != GFX_SUCCESS) {
-            throw_gfx_exception();
-        }
-    }
-
-    //---------------------------------------------------------------------
-    // GFX asynchronous offload and share API
-    //---------------------------------------------------------------------
-
-    // Sharing and unsharing data API
-    template<typename DataType, typename SizeType>
-    void share(DataType* p, SizeType n) { check_gfx_retcode(_GFX_share(p, sizeof(*p)*n)); }
-    template<typename DataType>
-    void unshare(DataType* p) { check_gfx_retcode(_GFX_unshare(p)); }
-
-    // Retrieving array pointer from shared gfx_buffer
-    // Other types remain the same
-    template <typename T>
-    T* raw_data(gfx_buffer<T>& buffer) { return buffer.data(); }
-    template <typename T>
-    const T* raw_data(const gfx_buffer<T>& buffer) { return buffer.data(); }
-    template <typename T>
-    T& raw_data(T& data) { return data; }
-    template <typename T>
-    const T& raw_data(const T& data) { return data; }
-
-    // Kernel enqueuing on device with arguments
-    template <typename F, typename ...ArgType>
-    task_id_type run_kernel(F ptr, ArgType&... args) {
-        task_id_type id = _GFX_offload(ptr, raw_data(args)...);
-
-        // Check if something during offload went wrong (ex: driver initialization failure)
-        gfx_offload::check_enqueue_retcode(id);
-
-        return id;
-    }
-
-    // Waiting for tasks completion
-    void wait_for_task(task_id_type id) { check_gfx_retcode(_GFX_wait(id)); }
-
-} // namespace gfx_offload
-
-template <typename T>
-class gfx_buffer {
-public:
-
-    typedef typename std::vector<T>::iterator iterator;
-    typedef typename std::vector<T>::const_iterator const_iterator;
-
-    typedef std::size_t size_type;
-
-    gfx_buffer() : my_vector_ptr(std::make_shared< std::vector<T> >()) {}
-    gfx_buffer(size_type size) : my_vector_ptr(std::make_shared< std::vector<T> >(size)) {}
-
-    T* data() { return &(my_vector_ptr->front()); }
-    const T* data() const { return &(my_vector_ptr->front()); }
-
-    size_type size() const { return my_vector_ptr->size(); }
-
-    const_iterator cbegin() const { return my_vector_ptr->cbegin(); }
-    const_iterator cend() const { return my_vector_ptr->cend(); }
-    iterator begin() { return my_vector_ptr->begin(); }
-    iterator end() { return my_vector_ptr->end(); }
-
-    T& operator[](size_type pos) { return (*my_vector_ptr)[pos]; }
-    const T& operator[](size_type pos) const { return (*my_vector_ptr)[pos]; }
-
-private:
-    std::shared_ptr< std::vector<T> > my_vector_ptr;
-};
-
-template<typename T>
-class gfx_async_msg : public tbb::flow::async_msg<T> {
-public:
-    typedef gfx_offload::task_id_type kernel_id_type;
-
-    gfx_async_msg() : my_task_id(0) {}
-    gfx_async_msg(const T& input_data) : my_data(input_data), my_task_id(0) {}
-
-    T& data() { return my_data; }
-    const T& data() const { return my_data; }
-
-    void set_task_id(kernel_id_type id) { my_task_id = id; }
-    kernel_id_type task_id() const { return my_task_id; }
-
-private:
-    T my_data;
-    kernel_id_type my_task_id;
-};
-
-class gfx_factory {
-private:
-
-    // Wrapper for GFX kernel which is just a function
-    class func_wrapper {
-    public:
-
-        template <typename F>
-        func_wrapper(F ptr) { my_ptr = reinterpret_cast<void*>(ptr); }
-
-        template<typename ...Args>
-        void operator()(Args&&... args) {}
-
-        operator void*() { return my_ptr; }
-
-    private:
-        void* my_ptr;
-    };
-
-public:
-
-    // Device specific types
-    template<typename T> using async_msg_type = gfx_async_msg<T>;
-
-    typedef func_wrapper kernel_type;
-
-    // Empty device type that is needed for Factory Concept
-    // but is not used in gfx_factory
-    typedef struct {} device_type;
-
-    typedef gfx_offload::task_id_type kernel_id_type;
-
-    gfx_factory(tbb::flow::graph& g) : m_graph(g), current_task_id(0) {}
-
-    // Upload data to the device
-    template <typename ...Args>
-    void send_data(device_type /*device*/, Args&... args) {
-        send_data_impl(args...);
-    }
-
-    // Run kernel on the device
-    template <typename ...Args>
-    void send_kernel(device_type /*device*/, const kernel_type& kernel, Args&... args) {
-        // Get packed T data from async_msg<T> and pass it to kernel
-        kernel_id_type id = gfx_offload::run_kernel(kernel, args.data()...);
-
-        // Set id to async_msg
-        set_kernel_id(id, args...);
-
-        // Extend the graph lifetime until the callback completion.
-        m_graph.reserve_wait();
-
-        // Mutex for future assignment
-        std::lock_guard<std::mutex> lock(future_assignment_mutex);
-
-        // Set callback that waits for kernel execution
-        callback_future = std::async(std::launch::async, &gfx_factory::callback<Args...>, this, id, args...);
-    }
-
-    // Finalization action after the kernel run
-    template <typename FinalizeFn, typename ...Args>
-    void finalize(device_type /*device*/, FinalizeFn fn, Args&... /*args*/) {
-        fn();
-    }
-
-    // Empty device selector.
-    // No way to choose a device with GFX API.
-    class dummy_device_selector {
-    public:
-        device_type operator()(gfx_factory& /*factory*/) {
-            return device_type();
-        }
-    };
-
-private:
-
-    //---------------------------------------------------------------------
-    // Callback for kernel result
-    //---------------------------------------------------------------------
-
-    template <typename ...Args>
-    void callback(kernel_id_type id, Args... args) {
-        // Waiting for specific tasks id to complete
-        {
-            std::lock_guard<std::mutex> lock(task_wait_mutex);
-            if (current_task_id < id) {
-                gfx_offload::wait_for_task(id);
-                current_task_id = id;
-            }
-        }
-
-        // Get result from device and set to async_msg (args)
-        receive_data(args...);
-
-        // Data was sent to the graph, release the reference
-        m_graph.release_wait();
-    }
-
-    //---------------------------------------------------------------------
-    // send_data() arguments processing
-    //---------------------------------------------------------------------
-
-    // GFX buffer shared data with device that will be executed on
-    template <typename T>
-    void share_data(T) {}
-
-    template <typename T>
-    void share_data(gfx_buffer<T>& buffer) {
-        gfx_offload::share(buffer.data(), buffer.size());
-    }
-
-    template <typename T>
-    void send_arg(T) {}
-
-    template <typename T>
-    void send_arg(async_msg_type<T>& msg) {
-        share_data(msg.data());
-    }
-
-    void send_data_impl() {}
-
-    template <typename T, typename ...Rest>
-    void send_data_impl(T& arg, Rest&... args) {
-        send_arg(arg);
-        send_data_impl(args...);
-    }
-
-    //----------------------------------------------------------------------
-    // send_kernel() arguments processing
-    //----------------------------------------------------------------------
-
-    template <typename T>
-    void set_kernel_id_arg(kernel_id_type, T) {}
-
-    template <typename T>
-    void set_kernel_id_arg(kernel_id_type id, async_msg_type<T>& msg) {
-        msg.set_task_id(id);
-    }
-
-    void set_kernel_id(kernel_id_type) {}
-
-    template <typename T, typename ...Rest>
-    void set_kernel_id(kernel_id_type id, T& arg, Rest&... args) {
-        set_kernel_id_arg(id, arg);
-        set_kernel_id(id, args...);
-    }
-
-    //-----------------------------------------------------------------------
-    // Arguments processing after kernel execution.
-    // Unsharing buffers and forwarding results to the graph
-    //-----------------------------------------------------------------------
-
-    // After kernel execution the data should be unshared
-    template <typename T>
-    void unshare_data(T) {}
-
-    template <typename T>
-    void unshare_data(gfx_buffer<T>& buffer) {
-        gfx_offload::unshare(buffer.data());
-    }
-
-    template <typename T>
-    void receive_arg(T) {}
-
-    template <typename T>
-    void receive_arg(async_msg_type<T>& msg) {
-        unshare_data(msg.data());
-        msg.set(msg.data());
-    }
-
-    void receive_data() {}
-
-    template <typename T, typename ...Rest>
-    void receive_data(T& arg, Rest&... args) {
-        receive_arg(arg);
-        receive_data(args...);
-    }
-
-    //-----------------------------------------------------------------------
-    int current_task_id;
-
-    std::future<void> callback_future;
-    tbb::flow::graph& m_graph;
-
-    std::mutex future_assignment_mutex;
-    std::mutex task_wait_mutex;
-};
-
-} // namespace interface9
-
-using interface9::gfx_factory;
-using interface9::gfx_buffer;
-
-} // namespace flow
-
-} // namespace tbb
-
-#endif // __TBB_PREVIEW_GFX_FACTORY
-
-#endif // __TBB_flow_graph_gfx_factory_H
diff --git a/lib/3rdParty/tbb/include/tbb/global_control.h b/lib/3rdParty/tbb/include/tbb/global_control.h
deleted file mode 100644
index fe742020..00000000
--- a/lib/3rdParty/tbb/include/tbb/global_control.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_global_control_H
-#define __TBB_global_control_H
-
-#if !TBB_PREVIEW_GLOBAL_CONTROL && !__TBB_BUILD
-#error Set TBB_PREVIEW_GLOBAL_CONTROL before including global_control.h
-#endif
-
-#include "tbb_stddef.h"
-
-namespace tbb {
-namespace interface9 {
-
-class global_control {
-public:
-    enum parameter {
-        max_allowed_parallelism,
-        thread_stack_size,
-        parameter_max // insert new parameters above this point
-    };
-
-    global_control(parameter p, size_t value) :
-        my_value(value), my_next(NULL), my_param(p) {
-        __TBB_ASSERT(my_param < parameter_max, "Invalid parameter");
-#if __TBB_WIN8UI_SUPPORT
-        // For Windows Store* apps it's impossible to set stack size
-        if (p==thread_stack_size)
-            return;
-#elif __TBB_x86_64 && (_WIN32 || _WIN64)
-        if (p==thread_stack_size)
-            __TBB_ASSERT_RELEASE((unsigned)value == value, "Stack size is limited to unsigned int range");
-#endif
-        if (my_param==max_allowed_parallelism)
-            __TBB_ASSERT_RELEASE(my_value>0, "max_allowed_parallelism cannot be 0.");
-        internal_create();
-    }
-
-    ~global_control() {
-        __TBB_ASSERT(my_param < parameter_max, "Invalid parameter. Probably the object was corrupted.");
-#if __TBB_WIN8UI_SUPPORT
-        // For Windows Store* apps it's impossible to set stack size
-        if (my_param==thread_stack_size)
-            return;
-#endif
-        internal_destroy();
-    }
-
-    static size_t active_value(parameter p) {
-        __TBB_ASSERT(p < parameter_max, "Invalid parameter");
-        return active_value((int)p);
-    }
-private:
-    size_t    my_value;
-    global_control *my_next;
-    parameter my_param;
-
-    void __TBB_EXPORTED_METHOD internal_create();
-    void __TBB_EXPORTED_METHOD internal_destroy();
-    static size_t __TBB_EXPORTED_FUNC active_value(int param);
-};
-} // namespace interface9
-
-using interface9::global_control;
-
-} // tbb
-
-#endif // __TBB_global_control_H
diff --git a/lib/3rdParty/tbb/include/tbb/index.html b/lib/3rdParty/tbb/include/tbb/index.html
deleted file mode 100644
index 2eddd450..00000000
--- a/lib/3rdParty/tbb/include/tbb/index.html
+++ /dev/null
@@ -1,29 +0,0 @@
-<HTML>
-<BODY>
-
-<H2>Overview</H2>
-Include files for Intel&reg; Threading Building Blocks classes and functions.
-
-<BR><A HREF=".">Click here</A> to see all files in the directory.
-
-<H2>Directories</H2>
-<DL>
-<DT><A HREF="compat">compat</A>
-<DD>Include files for source level compatibility with other frameworks.
-<DT><A HREF="internal">internal</A>
-<DD>Include files with implementation details; not for direct use.
-<DT><A HREF="machine">machine</A>
-<DD>Include files for low-level architecture specific functionality; not for direct use.
-</DL>
-
-<HR>
-<A HREF="../index.html">Up to parent directory</A>
-<p></p>
-Copyright &copy; 2005-2017 Intel Corporation.  All Rights Reserved.
-<P></P>
-Intel is a registered trademark or trademark of Intel Corporation
-or its subsidiaries in the United States and other countries.
-<p></p>
-* Other names and brands may be claimed as the property of others.
-</BODY>
-</HTML>
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_aggregator_impl.h b/lib/3rdParty/tbb/include/tbb/internal/_aggregator_impl.h
deleted file mode 100644
index 40bbd491..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_aggregator_impl.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB__aggregator_impl_H
-#define __TBB__aggregator_impl_H
-
-#include "../atomic.h"
-#if !__TBBMALLOC_BUILD
-#include "../tbb_profiling.h"
-#endif
-
-namespace tbb {
-namespace interface6 {
-namespace internal {
-
-using namespace tbb::internal;
-
-//! aggregated_operation base class
-template <typename Derived>
-class aggregated_operation {
- public:
-    //! Zero value means "wait" status, all other values are "user" specified values and are defined into the scope of a class which uses "status".
-    uintptr_t status;
-
-    Derived *next;
-    aggregated_operation() : status(0), next(NULL) {}
-};
-
-//! Aggregator base class
-/** An aggregator for collecting operations coming from multiple sources and executing
-    them serially on a single thread.  operation_type must be derived from
-    aggregated_operation. The parameter handler_type is a functor that will be passed the
-    list of operations and is expected to handle each operation appropriately, setting the
-    status of each operation to non-zero.*/
-template < typename operation_type >
-class aggregator_generic {
-public:
-    aggregator_generic() : handler_busy(false) { pending_operations = NULL; }
-
-    //! Execute an operation
-    /** Places an operation into the waitlist (pending_operations), and either handles the list,
-        or waits for the operation to complete, or returns.
-        The long_life_time parameter specifies the life time of the given operation object.
-        Operations with long_life_time == true may be accessed after execution.
-        A "short" life time operation (long_life_time == false) can be destroyed
-        during execution, and so any access to it after it was put into the waitlist,
-        including status check, is invalid. As a consequence, waiting for completion
-        of such operation causes undefined behavior.
-    */
-    template < typename handler_type >
-    void execute(operation_type *op, handler_type &handle_operations, bool long_life_time = true) {
-        operation_type *res;
-        // op->status should be read before inserting the operation into the
-        // aggregator waitlist since it can become invalid after executing a
-        // handler (if the operation has 'short' life time.)
-        const uintptr_t status = op->status;
-
-        // ITT note: &(op->status) tag is used to cover accesses to this op node. This
-        // thread has created the operation, and now releases it so that the handler
-        // thread may handle the associated operation w/o triggering a race condition;
-        // thus this tag will be acquired just before the operation is handled in the
-        // handle_operations functor.
-        call_itt_notify(releasing, &(op->status));
-        // insert the operation in the queue.
-        do {
-            // Tools may flag the following line as a race; it is a false positive:
-            // This is an atomic read; we don't provide itt_hide_load_word for atomics
-            op->next = res = pending_operations; // NOT A RACE
-        } while (pending_operations.compare_and_swap(op, res) != res);
-        if (!res) { // first in the list; handle the operations.
-            // ITT note: &pending_operations tag covers access to the handler_busy flag,
-            // which this waiting handler thread will try to set before entering
-            // handle_operations.
-            call_itt_notify(acquired, &pending_operations);
-            start_handle_operations(handle_operations);
-            // The operation with 'short' life time can already be destroyed.
-            if (long_life_time)
-                __TBB_ASSERT(op->status, NULL);
-        }
-        // not first; wait for op to be ready.
-        else if (!status) { // operation is blocking here.
-            __TBB_ASSERT(long_life_time, "Waiting for an operation object that might be destroyed during processing.");
-            call_itt_notify(prepare, &(op->status));
-            spin_wait_while_eq(op->status, uintptr_t(0));
-            itt_load_word_with_acquire(op->status);
-        }
-    }
-
- private:
-    //! An atomically updated list (aka mailbox) of pending operations
-    atomic<operation_type *> pending_operations;
-    //! Controls thread access to handle_operations
-    uintptr_t handler_busy;
-
-    //! Trigger the handling of operations when the handler is free
-    template < typename handler_type >
-    void start_handle_operations( handler_type &handle_operations ) {
-        operation_type *op_list;
-
-        // ITT note: &handler_busy tag covers access to pending_operations as it is passed
-        // between active and waiting handlers.  Below, the waiting handler waits until
-        // the active handler releases, and the waiting handler acquires &handler_busy as
-        // it becomes the active_handler. The release point is at the end of this
-        // function, when all operations in pending_operations have been handled by the
-        // owner of this aggregator.
-        call_itt_notify(prepare, &handler_busy);
-        // get the handler_busy:
-        // only one thread can possibly spin here at a time
-        spin_wait_until_eq(handler_busy, uintptr_t(0));
-        call_itt_notify(acquired, &handler_busy);
-        // acquire fence not necessary here due to causality rule and surrounding atomics
-        __TBB_store_with_release(handler_busy, uintptr_t(1));
-
-        // ITT note: &pending_operations tag covers access to the handler_busy flag
-        // itself. Capturing the state of the pending_operations signifies that
-        // handler_busy has been set and a new active handler will now process that list's
-        // operations.
-        call_itt_notify(releasing, &pending_operations);
-        // grab pending_operations
-        op_list = pending_operations.fetch_and_store(NULL);
-
-        // handle all the operations
-        handle_operations(op_list);
-
-        // release the handler
-        itt_store_word_with_release(handler_busy, uintptr_t(0));
-    }
-};
-
-template < typename handler_type, typename operation_type >
-class aggregator : public aggregator_generic<operation_type> {
-    handler_type handle_operations;
-public:
-    aggregator() {}
-    explicit aggregator(handler_type h) : handle_operations(h) {}
-
-    void initialize_handler(handler_type h) { handle_operations = h; }
-
-    void execute(operation_type *op) {
-        aggregator_generic<operation_type>::execute(op, handle_operations);
-    }
-};
-
-// the most-compatible friend declaration (vs, gcc, icc) is
-//    template<class U, class V> friend class aggregating_functor;
-template<typename aggregating_class, typename operation_list>
-class aggregating_functor {
-    aggregating_class *fi;
-public:
-    aggregating_functor() {}
-    aggregating_functor(aggregating_class *fi_) : fi(fi_) {}
-    void operator()(operation_list* op_list) { fi->handle_operations(op_list); }
-};
-
-} // namespace internal
-} // namespace interface6
-
-namespace internal {
-    using interface6::internal::aggregated_operation;
-    using interface6::internal::aggregator_generic;
-    using interface6::internal::aggregator;
-    using interface6::internal::aggregating_functor;
-} // namespace internal
-
-} // namespace tbb
-
-#endif  // __TBB__aggregator_impl_H
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_concurrent_queue_impl.h b/lib/3rdParty/tbb/include/tbb/internal/_concurrent_queue_impl.h
deleted file mode 100644
index a99a68f6..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_concurrent_queue_impl.h
+++ /dev/null
@@ -1,1086 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB__concurrent_queue_impl_H
-#define __TBB__concurrent_queue_impl_H
-
-#ifndef __TBB_concurrent_queue_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#include "../tbb_stddef.h"
-#include "../tbb_machine.h"
-#include "../atomic.h"
-#include "../spin_mutex.h"
-#include "../cache_aligned_allocator.h"
-#include "../tbb_exception.h"
-#include "../tbb_profiling.h"
-#include <new>
-#include __TBB_STD_SWAP_HEADER
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
-    #pragma warning (push)
-    #pragma warning (disable: 4530)
-#endif
-
-#include <iterator>
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    #pragma warning (pop)
-#endif
-
-namespace tbb {
-
-#if !__TBB_TEMPLATE_FRIENDS_BROKEN
-
-// forward declaration
-namespace strict_ppl {
-template<typename T, typename A> class concurrent_queue;
-}
-
-template<typename T, typename A> class concurrent_bounded_queue;
-
-#endif
-
-//! For internal use only.
-namespace strict_ppl {
-
-//! @cond INTERNAL
-namespace internal {
-
-using namespace tbb::internal;
-
-typedef size_t ticket;
-
-template<typename T> class micro_queue ;
-template<typename T> class micro_queue_pop_finalizer ;
-template<typename T> class concurrent_queue_base_v3;
-template<typename T> struct concurrent_queue_rep;
-
-//! parts of concurrent_queue_rep that do not have references to micro_queue
-/**
- * For internal use only.
- */
-struct concurrent_queue_rep_base : no_copy {
-    template<typename T> friend class micro_queue;
-    template<typename T> friend class concurrent_queue_base_v3;
-
-protected:
-    //! Approximately n_queue/golden ratio
-    static const size_t phi = 3;
-
-public:
-    // must be power of 2
-    static const size_t n_queue = 8;
-
-    //! Prefix on a page
-    struct page {
-        page* next;
-        uintptr_t mask;
-    };
-
-    atomic<ticket> head_counter;
-    char pad1[NFS_MaxLineSize-sizeof(atomic<ticket>)];
-    atomic<ticket> tail_counter;
-    char pad2[NFS_MaxLineSize-sizeof(atomic<ticket>)];
-
-    //! Always a power of 2
-    size_t items_per_page;
-
-    //! Size of an item
-    size_t item_size;
-
-    //! number of invalid entries in the queue
-    atomic<size_t> n_invalid_entries;
-
-    char pad3[NFS_MaxLineSize-sizeof(size_t)-sizeof(size_t)-sizeof(atomic<size_t>)];
-} ;
-
-inline bool is_valid_page(const concurrent_queue_rep_base::page* p) {
-    return uintptr_t(p)>1;
-}
-
-//! Abstract class to define interface for page allocation/deallocation
-/**
- * For internal use only.
- */
-class concurrent_queue_page_allocator
-{
-    template<typename T> friend class micro_queue ;
-    template<typename T> friend class micro_queue_pop_finalizer ;
-protected:
-    virtual ~concurrent_queue_page_allocator() {}
-private:
-    virtual concurrent_queue_rep_base::page* allocate_page() = 0;
-    virtual void deallocate_page( concurrent_queue_rep_base::page* p ) = 0;
-} ;
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-// unary minus operator applied to unsigned type, result still unsigned
-#pragma warning( push )
-#pragma warning( disable: 4146 )
-#endif
-
-//! A queue using simple locking.
-/** For efficiency, this class has no constructor.
-    The caller is expected to zero-initialize it. */
-template<typename T>
-class micro_queue : no_copy {
-public:
-    typedef void (*item_constructor_t)(T* location, const void* src);
-private:
-    typedef concurrent_queue_rep_base::page page;
-
-    //! Class used to ensure exception-safety of method "pop"
-    class destroyer: no_copy {
-        T& my_value;
-    public:
-        destroyer( T& value ) : my_value(value) {}
-        ~destroyer() {my_value.~T();}
-    };
-
-    void copy_item( page& dst, size_t dindex, const void* src, item_constructor_t construct_item ) {
-        construct_item( &get_ref(dst, dindex), src );
-    }
-
-    void copy_item( page& dst, size_t dindex, const page& src, size_t sindex,
-        item_constructor_t construct_item )
-    {
-        T& src_item = get_ref( const_cast<page&>(src), sindex );
-        construct_item( &get_ref(dst, dindex), static_cast<const void*>(&src_item) );
-    }
-
-    void assign_and_destroy_item( void* dst, page& src, size_t index ) {
-        T& from = get_ref(src,index);
-        destroyer d(from);
-        *static_cast<T*>(dst) = tbb::internal::move( from );
-    }
-
-    void spin_wait_until_my_turn( atomic<ticket>& counter, ticket k, concurrent_queue_rep_base& rb ) const ;
-
-public:
-    friend class micro_queue_pop_finalizer<T>;
-
-    struct padded_page: page {
-        //! Not defined anywhere - exists to quiet warnings.
-        padded_page();
-        //! Not defined anywhere - exists to quiet warnings.
-        void operator=( const padded_page& );
-        //! Must be last field.
-        T last;
-    };
-
-    static T& get_ref( page& p, size_t index ) {
-        return (&static_cast<padded_page*>(static_cast<void*>(&p))->last)[index];
-    }
-
-    atomic<page*> head_page;
-    atomic<ticket> head_counter;
-
-    atomic<page*> tail_page;
-    atomic<ticket> tail_counter;
-
-    spin_mutex page_mutex;
-
-    void push( const void* item, ticket k, concurrent_queue_base_v3<T>& base,
-        item_constructor_t construct_item ) ;
-
-    bool pop( void* dst, ticket k, concurrent_queue_base_v3<T>& base ) ;
-
-    micro_queue& assign( const micro_queue& src, concurrent_queue_base_v3<T>& base,
-        item_constructor_t construct_item ) ;
-
-    page* make_copy( concurrent_queue_base_v3<T>& base, const page* src_page, size_t begin_in_page,
-        size_t end_in_page, ticket& g_index, item_constructor_t construct_item ) ;
-
-    void invalidate_page_and_rethrow( ticket k ) ;
-};
-
-template<typename T>
-void micro_queue<T>::spin_wait_until_my_turn( atomic<ticket>& counter, ticket k, concurrent_queue_rep_base& rb ) const {
-    for( atomic_backoff b(true);;b.pause() ) {
-        ticket c = counter;
-        if( c==k ) return;
-        else if( c&1 ) {
-            ++rb.n_invalid_entries;
-            throw_exception( eid_bad_last_alloc );
-        }
-    }
-}
-
-template<typename T>
-void micro_queue<T>::push( const void* item, ticket k, concurrent_queue_base_v3<T>& base,
-    item_constructor_t construct_item )
-{
-    k &= -concurrent_queue_rep_base::n_queue;
-    page* p = NULL;
-    size_t index = modulo_power_of_two( k/concurrent_queue_rep_base::n_queue, base.my_rep->items_per_page);
-    if( !index ) {
-        __TBB_TRY {
-            concurrent_queue_page_allocator& pa = base;
-            p = pa.allocate_page();
-        } __TBB_CATCH (...) {
-            ++base.my_rep->n_invalid_entries;
-            invalidate_page_and_rethrow( k );
-        }
-        p->mask = 0;
-        p->next = NULL;
-    }
-
-    if( tail_counter != k ) spin_wait_until_my_turn( tail_counter, k, *base.my_rep );
-    call_itt_notify(acquired, &tail_counter);
-
-    if( p ) {
-        spin_mutex::scoped_lock lock( page_mutex );
-        page* q = tail_page;
-        if( is_valid_page(q) )
-            q->next = p;
-        else
-            head_page = p;
-        tail_page = p;
-    } else {
-        p = tail_page;
-    }
-
-    __TBB_TRY {
-        copy_item( *p, index, item, construct_item );
-        // If no exception was thrown, mark item as present.
-        itt_hide_store_word(p->mask,  p->mask | uintptr_t(1)<<index);
-        call_itt_notify(releasing, &tail_counter);
-        tail_counter += concurrent_queue_rep_base::n_queue;
-    } __TBB_CATCH (...) {
-        ++base.my_rep->n_invalid_entries;
-        call_itt_notify(releasing, &tail_counter);
-        tail_counter += concurrent_queue_rep_base::n_queue;
-        __TBB_RETHROW();
-    }
-}
-
-template<typename T>
-bool micro_queue<T>::pop( void* dst, ticket k, concurrent_queue_base_v3<T>& base ) {
-    k &= -concurrent_queue_rep_base::n_queue;
-    if( head_counter!=k ) spin_wait_until_eq( head_counter, k );
-    call_itt_notify(acquired, &head_counter);
-    if( tail_counter==k ) spin_wait_while_eq( tail_counter, k );
-    call_itt_notify(acquired, &tail_counter);
-    page *p = head_page;
-    __TBB_ASSERT( p, NULL );
-    size_t index = modulo_power_of_two( k/concurrent_queue_rep_base::n_queue, base.my_rep->items_per_page );
-    bool success = false;
-    {
-        micro_queue_pop_finalizer<T> finalizer( *this, base, k+concurrent_queue_rep_base::n_queue, index==base.my_rep->items_per_page-1 ? p : NULL );
-        if( p->mask & uintptr_t(1)<<index ) {
-            success = true;
-            assign_and_destroy_item( dst, *p, index );
-        } else {
-            --base.my_rep->n_invalid_entries;
-        }
-    }
-    return success;
-}
-
-template<typename T>
-micro_queue<T>& micro_queue<T>::assign( const micro_queue<T>& src, concurrent_queue_base_v3<T>& base,
-    item_constructor_t construct_item )
-{
-    head_counter = src.head_counter;
-    tail_counter = src.tail_counter;
-
-    const page* srcp = src.head_page;
-    if( is_valid_page(srcp) ) {
-        ticket g_index = head_counter;
-        __TBB_TRY {
-            size_t n_items  = (tail_counter-head_counter)/concurrent_queue_rep_base::n_queue;
-            size_t index = modulo_power_of_two( head_counter/concurrent_queue_rep_base::n_queue, base.my_rep->items_per_page );
-            size_t end_in_first_page = (index+n_items<base.my_rep->items_per_page)?(index+n_items):base.my_rep->items_per_page;
-
-            head_page = make_copy( base, srcp, index, end_in_first_page, g_index, construct_item );
-            page* cur_page = head_page;
-
-            if( srcp != src.tail_page ) {
-                for( srcp = srcp->next; srcp!=src.tail_page; srcp=srcp->next ) {
-                    cur_page->next = make_copy( base, srcp, 0, base.my_rep->items_per_page, g_index, construct_item );
-                    cur_page = cur_page->next;
-                }
-
-                __TBB_ASSERT( srcp==src.tail_page, NULL );
-                size_t last_index = modulo_power_of_two( tail_counter/concurrent_queue_rep_base::n_queue, base.my_rep->items_per_page );
-                if( last_index==0 ) last_index = base.my_rep->items_per_page;
-
-                cur_page->next = make_copy( base, srcp, 0, last_index, g_index, construct_item );
-                cur_page = cur_page->next;
-            }
-            tail_page = cur_page;
-        } __TBB_CATCH (...) {
-            invalidate_page_and_rethrow( g_index );
-        }
-    } else {
-        head_page = tail_page = NULL;
-    }
-    return *this;
-}
-
-template<typename T>
-void micro_queue<T>::invalidate_page_and_rethrow( ticket k ) {
-    // Append an invalid page at address 1 so that no more pushes are allowed.
-    page* invalid_page = (page*)uintptr_t(1);
-    {
-        spin_mutex::scoped_lock lock( page_mutex );
-        itt_store_word_with_release(tail_counter, k+concurrent_queue_rep_base::n_queue+1);
-        page* q = tail_page;
-        if( is_valid_page(q) )
-            q->next = invalid_page;
-        else
-            head_page = invalid_page;
-        tail_page = invalid_page;
-    }
-    __TBB_RETHROW();
-}
-
-template<typename T>
-concurrent_queue_rep_base::page* micro_queue<T>::make_copy( concurrent_queue_base_v3<T>& base,
-    const concurrent_queue_rep_base::page* src_page, size_t begin_in_page, size_t end_in_page,
-    ticket& g_index, item_constructor_t construct_item )
-{
-    concurrent_queue_page_allocator& pa = base;
-    page* new_page = pa.allocate_page();
-    new_page->next = NULL;
-    new_page->mask = src_page->mask;
-    for( ; begin_in_page!=end_in_page; ++begin_in_page, ++g_index )
-        if( new_page->mask & uintptr_t(1)<<begin_in_page )
-            copy_item( *new_page, begin_in_page, *src_page, begin_in_page, construct_item );
-    return new_page;
-}
-
-template<typename T>
-class micro_queue_pop_finalizer: no_copy {
-    typedef concurrent_queue_rep_base::page page;
-    ticket my_ticket;
-    micro_queue<T>& my_queue;
-    page* my_page;
-    concurrent_queue_page_allocator& allocator;
-public:
-    micro_queue_pop_finalizer( micro_queue<T>& queue, concurrent_queue_base_v3<T>& b, ticket k, page* p ) :
-        my_ticket(k), my_queue(queue), my_page(p), allocator(b)
-    {}
-    ~micro_queue_pop_finalizer() ;
-};
-
-template<typename T>
-micro_queue_pop_finalizer<T>::~micro_queue_pop_finalizer() {
-    page* p = my_page;
-    if( is_valid_page(p) ) {
-        spin_mutex::scoped_lock lock( my_queue.page_mutex );
-        page* q = p->next;
-        my_queue.head_page = q;
-        if( !is_valid_page(q) ) {
-            my_queue.tail_page = NULL;
-        }
-    }
-    itt_store_word_with_release(my_queue.head_counter, my_ticket);
-    if( is_valid_page(p) ) {
-        allocator.deallocate_page( p );
-    }
-}
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-#pragma warning( pop )
-#endif // warning 4146 is back
-
-template<typename T> class concurrent_queue_iterator_rep ;
-template<typename T> class concurrent_queue_iterator_base_v3;
-
-//! representation of concurrent_queue_base
-/**
- * the class inherits from concurrent_queue_rep_base and defines an array of micro_queue<T>'s
- */
-template<typename T>
-struct concurrent_queue_rep : public concurrent_queue_rep_base {
-    micro_queue<T> array[n_queue];
-
-    //! Map ticket to an array index
-    static size_t index( ticket k ) {
-        return k*phi%n_queue;
-    }
-
-    micro_queue<T>& choose( ticket k ) {
-        // The formula here approximates LRU in a cache-oblivious way.
-        return array[index(k)];
-    }
-};
-
-//! base class of concurrent_queue
-/**
- * The class implements the interface defined by concurrent_queue_page_allocator
- * and has a pointer to an instance of concurrent_queue_rep.
- */
-template<typename T>
-class concurrent_queue_base_v3: public concurrent_queue_page_allocator {
-private:
-    //! Internal representation
-    concurrent_queue_rep<T>* my_rep;
-
-    friend struct concurrent_queue_rep<T>;
-    friend class micro_queue<T>;
-    friend class concurrent_queue_iterator_rep<T>;
-    friend class concurrent_queue_iterator_base_v3<T>;
-
-protected:
-    typedef typename concurrent_queue_rep<T>::page page;
-
-private:
-    typedef typename micro_queue<T>::padded_page padded_page;
-    typedef typename micro_queue<T>::item_constructor_t item_constructor_t;
-
-    virtual page *allocate_page() __TBB_override {
-        concurrent_queue_rep<T>& r = *my_rep;
-        size_t n = sizeof(padded_page) + (r.items_per_page-1)*sizeof(T);
-        return reinterpret_cast<page*>(allocate_block ( n ));
-    }
-
-    virtual void deallocate_page( concurrent_queue_rep_base::page *p ) __TBB_override {
-        concurrent_queue_rep<T>& r = *my_rep;
-        size_t n = sizeof(padded_page) + (r.items_per_page-1)*sizeof(T);
-        deallocate_block( reinterpret_cast<void*>(p), n );
-    }
-
-    //! custom allocator
-    virtual void *allocate_block( size_t n ) = 0;
-
-    //! custom de-allocator
-    virtual void deallocate_block( void *p, size_t n ) = 0;
-
-protected:
-    concurrent_queue_base_v3();
-
-    virtual ~concurrent_queue_base_v3() {
-#if TBB_USE_ASSERT
-        size_t nq = my_rep->n_queue;
-        for( size_t i=0; i<nq; i++ )
-            __TBB_ASSERT( my_rep->array[i].tail_page==NULL, "pages were not freed properly" );
-#endif /* TBB_USE_ASSERT */
-        cache_aligned_allocator<concurrent_queue_rep<T> >().deallocate(my_rep,1);
-    }
-
-    //! Enqueue item at tail of queue
-    void internal_push( const void* src, item_constructor_t construct_item ) {
-         concurrent_queue_rep<T>& r = *my_rep;
-         ticket k = r.tail_counter++;
-         r.choose(k).push( src, k, *this, construct_item );
-    }
-
-    //! Attempt to dequeue item from queue.
-    /** NULL if there was no item to dequeue. */
-    bool internal_try_pop( void* dst ) ;
-
-    //! Get size of queue; result may be invalid if queue is modified concurrently
-    size_t internal_size() const ;
-
-    //! check if the queue is empty; thread safe
-    bool internal_empty() const ;
-
-    //! free any remaining pages
-    /* note that the name may be misleading, but it remains so due to a historical accident. */
-    void internal_finish_clear() ;
-
-    //! Obsolete
-    void internal_throw_exception() const {
-        throw_exception( eid_bad_alloc );
-    }
-
-    //! copy or move internal representation
-    void assign( const concurrent_queue_base_v3& src, item_constructor_t construct_item ) ;
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    //! swap internal representation
-    void internal_swap( concurrent_queue_base_v3& src ) {
-        std::swap( my_rep, src.my_rep );
-    }
-#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
-};
-
-template<typename T>
-concurrent_queue_base_v3<T>::concurrent_queue_base_v3() {
-    const size_t item_size = sizeof(T);
-    my_rep = cache_aligned_allocator<concurrent_queue_rep<T> >().allocate(1);
-    __TBB_ASSERT( (size_t)my_rep % NFS_GetLineSize()==0, "alignment error" );
-    __TBB_ASSERT( (size_t)&my_rep->head_counter % NFS_GetLineSize()==0, "alignment error" );
-    __TBB_ASSERT( (size_t)&my_rep->tail_counter % NFS_GetLineSize()==0, "alignment error" );
-    __TBB_ASSERT( (size_t)&my_rep->array % NFS_GetLineSize()==0, "alignment error" );
-    memset(my_rep,0,sizeof(concurrent_queue_rep<T>));
-    my_rep->item_size = item_size;
-    my_rep->items_per_page = item_size<=  8 ? 32 :
-                             item_size<= 16 ? 16 :
-                             item_size<= 32 ?  8 :
-                             item_size<= 64 ?  4 :
-                             item_size<=128 ?  2 :
-                             1;
-}
-
-template<typename T>
-bool concurrent_queue_base_v3<T>::internal_try_pop( void* dst ) {
-    concurrent_queue_rep<T>& r = *my_rep;
-    ticket k;
-    do {
-        k = r.head_counter;
-        for(;;) {
-            if( (ptrdiff_t)(r.tail_counter-k)<=0 ) {
-                // Queue is empty
-                return false;
-            }
-            // Queue had item with ticket k when we looked.  Attempt to get that item.
-            ticket tk=k;
-#if defined(_MSC_VER) && defined(_Wp64)
-    #pragma warning (push)
-    #pragma warning (disable: 4267)
-#endif
-            k = r.head_counter.compare_and_swap( tk+1, tk );
-#if defined(_MSC_VER) && defined(_Wp64)
-    #pragma warning (pop)
-#endif
-            if( k==tk )
-                break;
-            // Another thread snatched the item, retry.
-        }
-    } while( !r.choose( k ).pop( dst, k, *this ) );
-    return true;
-}
-
-template<typename T>
-size_t concurrent_queue_base_v3<T>::internal_size() const {
-    concurrent_queue_rep<T>& r = *my_rep;
-    __TBB_ASSERT( sizeof(ptrdiff_t)<=sizeof(size_t), NULL );
-    ticket hc = r.head_counter;
-    size_t nie = r.n_invalid_entries;
-    ticket tc = r.tail_counter;
-    __TBB_ASSERT( hc!=tc || !nie, NULL );
-    ptrdiff_t sz = tc-hc-nie;
-    return sz<0 ? 0 :  size_t(sz);
-}
-
-template<typename T>
-bool concurrent_queue_base_v3<T>::internal_empty() const {
-    concurrent_queue_rep<T>& r = *my_rep;
-    ticket tc = r.tail_counter;
-    ticket hc = r.head_counter;
-    // if tc!=r.tail_counter, the queue was not empty at some point between the two reads.
-    return tc==r.tail_counter && tc==hc+r.n_invalid_entries ;
-}
-
-template<typename T>
-void concurrent_queue_base_v3<T>::internal_finish_clear() {
-    concurrent_queue_rep<T>& r = *my_rep;
-    size_t nq = r.n_queue;
-    for( size_t i=0; i<nq; ++i ) {
-        page* tp = r.array[i].tail_page;
-        if( is_valid_page(tp) ) {
-            __TBB_ASSERT( r.array[i].head_page==tp, "at most one page should remain" );
-            deallocate_page( tp );
-            r.array[i].tail_page = NULL;
-        } else
-            __TBB_ASSERT( !is_valid_page(r.array[i].head_page), "head page pointer corrupt?" );
-    }
-}
-
-template<typename T>
-void concurrent_queue_base_v3<T>::assign( const concurrent_queue_base_v3& src,
-    item_constructor_t construct_item )
-{
-    concurrent_queue_rep<T>& r = *my_rep;
-    r.items_per_page = src.my_rep->items_per_page;
-
-    // copy concurrent_queue_rep data
-    r.head_counter = src.my_rep->head_counter;
-    r.tail_counter = src.my_rep->tail_counter;
-    r.n_invalid_entries = src.my_rep->n_invalid_entries;
-
-    // copy or move micro_queues
-    for( size_t i = 0; i < r.n_queue; ++i )
-        r.array[i].assign( src.my_rep->array[i], *this, construct_item);
-
-    __TBB_ASSERT( r.head_counter==src.my_rep->head_counter && r.tail_counter==src.my_rep->tail_counter,
-            "the source concurrent queue should not be concurrently modified." );
-}
-
-template<typename Container, typename Value> class concurrent_queue_iterator;
-
-template<typename T>
-class concurrent_queue_iterator_rep: no_assign {
-    typedef typename micro_queue<T>::padded_page padded_page;
-public:
-    ticket head_counter;
-    const concurrent_queue_base_v3<T>& my_queue;
-    typename concurrent_queue_base_v3<T>::page* array[concurrent_queue_rep<T>::n_queue];
-    concurrent_queue_iterator_rep( const concurrent_queue_base_v3<T>& queue ) :
-        head_counter(queue.my_rep->head_counter),
-        my_queue(queue)
-    {
-        for( size_t k=0; k<concurrent_queue_rep<T>::n_queue; ++k )
-            array[k] = queue.my_rep->array[k].head_page;
-    }
-
-    //! Set item to point to kth element.  Return true if at end of queue or item is marked valid; false otherwise.
-    bool get_item( T*& item, size_t k ) ;
-};
-
-template<typename T>
-bool concurrent_queue_iterator_rep<T>::get_item( T*& item, size_t k ) {
-    if( k==my_queue.my_rep->tail_counter ) {
-        item = NULL;
-        return true;
-    } else {
-        typename concurrent_queue_base_v3<T>::page* p = array[concurrent_queue_rep<T>::index(k)];
-        __TBB_ASSERT(p,NULL);
-        size_t i = modulo_power_of_two( k/concurrent_queue_rep<T>::n_queue, my_queue.my_rep->items_per_page );
-        item = &micro_queue<T>::get_ref(*p,i);
-        return (p->mask & uintptr_t(1)<<i)!=0;
-    }
-}
-
-//! Constness-independent portion of concurrent_queue_iterator.
-/** @ingroup containers */
-template<typename Value>
-class concurrent_queue_iterator_base_v3 : no_assign {
-    //! Represents concurrent_queue over which we are iterating.
-    /** NULL if one past last element in queue. */
-    concurrent_queue_iterator_rep<Value>* my_rep;
-
-    template<typename C, typename T, typename U>
-    friend bool operator==( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j );
-
-    template<typename C, typename T, typename U>
-    friend bool operator!=( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j );
-protected:
-    //! Pointer to current item
-    Value* my_item;
-
-    //! Default constructor
-    concurrent_queue_iterator_base_v3() : my_rep(NULL), my_item(NULL) {
-#if __TBB_GCC_OPTIMIZER_ORDERING_BROKEN
-        __TBB_compiler_fence();
-#endif
-    }
-
-    //! Copy constructor
-    concurrent_queue_iterator_base_v3( const concurrent_queue_iterator_base_v3& i )
-    : no_assign(), my_rep(NULL), my_item(NULL) {
-        assign(i);
-    }
-
-    //! Construct iterator pointing to head of queue.
-    concurrent_queue_iterator_base_v3( const concurrent_queue_base_v3<Value>& queue ) ;
-
-    //! Assignment
-    void assign( const concurrent_queue_iterator_base_v3<Value>& other ) ;
-
-    //! Advance iterator one step towards tail of queue.
-    void advance() ;
-
-    //! Destructor
-    ~concurrent_queue_iterator_base_v3() {
-        cache_aligned_allocator<concurrent_queue_iterator_rep<Value> >().deallocate(my_rep, 1);
-        my_rep = NULL;
-    }
-};
-
-template<typename Value>
-concurrent_queue_iterator_base_v3<Value>::concurrent_queue_iterator_base_v3( const concurrent_queue_base_v3<Value>& queue ) {
-    my_rep = cache_aligned_allocator<concurrent_queue_iterator_rep<Value> >().allocate(1);
-    new( my_rep ) concurrent_queue_iterator_rep<Value>(queue);
-    size_t k = my_rep->head_counter;
-    if( !my_rep->get_item(my_item, k) ) advance();
-}
-
-template<typename Value>
-void concurrent_queue_iterator_base_v3<Value>::assign( const concurrent_queue_iterator_base_v3<Value>& other ) {
-    if( my_rep!=other.my_rep ) {
-        if( my_rep ) {
-            cache_aligned_allocator<concurrent_queue_iterator_rep<Value> >().deallocate(my_rep, 1);
-            my_rep = NULL;
-        }
-        if( other.my_rep ) {
-            my_rep = cache_aligned_allocator<concurrent_queue_iterator_rep<Value> >().allocate(1);
-            new( my_rep ) concurrent_queue_iterator_rep<Value>( *other.my_rep );
-        }
-    }
-    my_item = other.my_item;
-}
-
-template<typename Value>
-void concurrent_queue_iterator_base_v3<Value>::advance() {
-    __TBB_ASSERT( my_item, "attempt to increment iterator past end of queue" );
-    size_t k = my_rep->head_counter;
-    const concurrent_queue_base_v3<Value>& queue = my_rep->my_queue;
-#if TBB_USE_ASSERT
-    Value* tmp;
-    my_rep->get_item(tmp,k);
-    __TBB_ASSERT( my_item==tmp, NULL );
-#endif /* TBB_USE_ASSERT */
-    size_t i = modulo_power_of_two( k/concurrent_queue_rep<Value>::n_queue, queue.my_rep->items_per_page );
-    if( i==queue.my_rep->items_per_page-1 ) {
-        typename concurrent_queue_base_v3<Value>::page*& root = my_rep->array[concurrent_queue_rep<Value>::index(k)];
-        root = root->next;
-    }
-    // advance k
-    my_rep->head_counter = ++k;
-    if( !my_rep->get_item(my_item, k) ) advance();
-}
-
-//! Similar to C++0x std::remove_cv
-/** "tbb_" prefix added to avoid overload confusion with C++0x implementations. */
-template<typename T> struct tbb_remove_cv {typedef T type;};
-template<typename T> struct tbb_remove_cv<const T> {typedef T type;};
-template<typename T> struct tbb_remove_cv<volatile T> {typedef T type;};
-template<typename T> struct tbb_remove_cv<const volatile T> {typedef T type;};
-
-//! Meets requirements of a forward iterator for STL.
-/** Value is either the T or const T type of the container.
-    @ingroup containers */
-template<typename Container, typename Value>
-class concurrent_queue_iterator: public concurrent_queue_iterator_base_v3<typename tbb_remove_cv<Value>::type>,
-        public std::iterator<std::forward_iterator_tag,Value> {
-#if !__TBB_TEMPLATE_FRIENDS_BROKEN
-    template<typename T, class A>
-    friend class ::tbb::strict_ppl::concurrent_queue;
-#else
-public:
-#endif
-    //! Construct iterator pointing to head of queue.
-    explicit concurrent_queue_iterator( const concurrent_queue_base_v3<typename tbb_remove_cv<Value>::type>& queue ) :
-        concurrent_queue_iterator_base_v3<typename tbb_remove_cv<Value>::type>(queue)
-    {
-    }
-
-public:
-    concurrent_queue_iterator() {}
-
-    /** If Value==Container::value_type, then this routine is the copy constructor.
-        If Value==const Container::value_type, then this routine is a conversion constructor. */
-    concurrent_queue_iterator( const concurrent_queue_iterator<Container,typename Container::value_type>& other ) :
-        concurrent_queue_iterator_base_v3<typename tbb_remove_cv<Value>::type>(other)
-    {}
-
-    //! Iterator assignment
-    concurrent_queue_iterator& operator=( const concurrent_queue_iterator& other ) {
-        this->assign(other);
-        return *this;
-    }
-
-    //! Reference to current item
-    Value& operator*() const {
-        return *static_cast<Value*>(this->my_item);
-    }
-
-    Value* operator->() const {return &operator*();}
-
-    //! Advance to next item in queue
-    concurrent_queue_iterator& operator++() {
-        this->advance();
-        return *this;
-    }
-
-    //! Post increment
-    Value* operator++(int) {
-        Value* result = &operator*();
-        operator++();
-        return result;
-    }
-}; // concurrent_queue_iterator
-
-
-template<typename C, typename T, typename U>
-bool operator==( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j ) {
-    return i.my_item==j.my_item;
-}
-
-template<typename C, typename T, typename U>
-bool operator!=( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j ) {
-    return i.my_item!=j.my_item;
-}
-
-} // namespace internal
-
-//! @endcond
-
-} // namespace strict_ppl
-
-//! @cond INTERNAL
-namespace internal {
-
-class concurrent_queue_rep;
-class concurrent_queue_iterator_rep;
-class concurrent_queue_iterator_base_v3;
-template<typename Container, typename Value> class concurrent_queue_iterator;
-
-//! For internal use only.
-/** Type-independent portion of concurrent_queue.
-    @ingroup containers */
-class concurrent_queue_base_v3: no_copy {
-private:
-    //! Internal representation
-    concurrent_queue_rep* my_rep;
-
-    friend class concurrent_queue_rep;
-    friend struct micro_queue;
-    friend class micro_queue_pop_finalizer;
-    friend class concurrent_queue_iterator_rep;
-    friend class concurrent_queue_iterator_base_v3;
-protected:
-    //! Prefix on a page
-    struct page {
-        page* next;
-        uintptr_t mask;
-    };
-
-    //! Capacity of the queue
-    ptrdiff_t my_capacity;
-
-    //! Always a power of 2
-    size_t items_per_page;
-
-    //! Size of an item
-    size_t item_size;
-
-    enum copy_specifics { copy, move };
-
-#if __TBB_PROTECTED_NESTED_CLASS_BROKEN
-public:
-#endif
-    template<typename T>
-    struct padded_page: page {
-        //! Not defined anywhere - exists to quiet warnings.
-        padded_page();
-        //! Not defined anywhere - exists to quiet warnings.
-        void operator=( const padded_page& );
-        //! Must be last field.
-        T last;
-    };
-
-private:
-    virtual void copy_item( page& dst, size_t index, const void* src ) = 0;
-    virtual void assign_and_destroy_item( void* dst, page& src, size_t index ) = 0;
-protected:
-    __TBB_EXPORTED_METHOD concurrent_queue_base_v3( size_t item_size );
-    virtual __TBB_EXPORTED_METHOD ~concurrent_queue_base_v3();
-
-    //! Enqueue item at tail of queue using copy operation
-    void __TBB_EXPORTED_METHOD internal_push( const void* src );
-
-    //! Dequeue item from head of queue
-    void __TBB_EXPORTED_METHOD internal_pop( void* dst );
-
-    //! Abort all pending queue operations
-    void __TBB_EXPORTED_METHOD internal_abort();
-
-    //! Attempt to enqueue item onto queue using copy operation
-    bool __TBB_EXPORTED_METHOD internal_push_if_not_full( const void* src );
-
-    //! Attempt to dequeue item from queue.
-    /** NULL if there was no item to dequeue. */
-    bool __TBB_EXPORTED_METHOD internal_pop_if_present( void* dst );
-
-    //! Get size of queue
-    ptrdiff_t __TBB_EXPORTED_METHOD internal_size() const;
-
-    //! Check if the queue is emtpy
-    bool __TBB_EXPORTED_METHOD internal_empty() const;
-
-    //! Set the queue capacity
-    void __TBB_EXPORTED_METHOD internal_set_capacity( ptrdiff_t capacity, size_t element_size );
-
-    //! custom allocator
-    virtual page *allocate_page() = 0;
-
-    //! custom de-allocator
-    virtual void deallocate_page( page *p ) = 0;
-
-    //! free any remaining pages
-    /* note that the name may be misleading, but it remains so due to a historical accident. */
-    void __TBB_EXPORTED_METHOD internal_finish_clear() ;
-
-    //! throw an exception
-    void __TBB_EXPORTED_METHOD internal_throw_exception() const;
-
-    //! copy internal representation
-    void __TBB_EXPORTED_METHOD assign( const concurrent_queue_base_v3& src ) ;
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    //! swap queues
-    void internal_swap( concurrent_queue_base_v3& src ) {
-        std::swap( my_capacity, src.my_capacity );
-        std::swap( items_per_page, src.items_per_page );
-        std::swap( item_size, src.item_size );
-        std::swap( my_rep, src.my_rep );
-    }
-#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
-
-    //! Enqueues item at tail of queue using specified operation (copy or move)
-    void internal_insert_item( const void* src, copy_specifics op_type );
-
-    //! Attempts to enqueue at tail of queue using specified operation (copy or move)
-    bool internal_insert_if_not_full( const void* src, copy_specifics op_type );
-
-    //! Assigns one queue to another using specified operation (copy or move)
-    void internal_assign( const concurrent_queue_base_v3& src, copy_specifics op_type );
-private:
-    virtual void copy_page_item( page& dst, size_t dindex, const page& src, size_t sindex ) = 0;
-};
-
-//! For internal use only.
-/** Backward compatible modification of concurrent_queue_base_v3
-    @ingroup containers */
-class concurrent_queue_base_v8: public concurrent_queue_base_v3 {
-protected:
-    concurrent_queue_base_v8( size_t item_sz ) : concurrent_queue_base_v3( item_sz ) {}
-
-    //! move items
-    void __TBB_EXPORTED_METHOD move_content( concurrent_queue_base_v8& src ) ;
-
-    //! Attempt to enqueue item onto queue using move operation
-    bool __TBB_EXPORTED_METHOD internal_push_move_if_not_full( const void* src );
-
-    //! Enqueue item at tail of queue using move operation
-    void __TBB_EXPORTED_METHOD internal_push_move( const void* src );
-private:
-    friend struct micro_queue;
-    virtual void move_page_item( page& dst, size_t dindex, const page& src, size_t sindex ) = 0;
-    virtual void move_item( page& dst, size_t index, const void* src ) = 0;
-};
-
-//! Type-independent portion of concurrent_queue_iterator.
-/** @ingroup containers */
-class concurrent_queue_iterator_base_v3 {
-    //! concurrent_queue over which we are iterating.
-    /** NULL if one past last element in queue. */
-    concurrent_queue_iterator_rep* my_rep;
-
-    template<typename C, typename T, typename U>
-    friend bool operator==( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j );
-
-    template<typename C, typename T, typename U>
-    friend bool operator!=( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j );
-
-    void initialize( const concurrent_queue_base_v3& queue, size_t offset_of_data );
-protected:
-    //! Pointer to current item
-    void* my_item;
-
-    //! Default constructor
-    concurrent_queue_iterator_base_v3() : my_rep(NULL), my_item(NULL) {}
-
-    //! Copy constructor
-    concurrent_queue_iterator_base_v3( const concurrent_queue_iterator_base_v3& i ) : my_rep(NULL), my_item(NULL) {
-        assign(i);
-    }
-
-    //! Obsolete entry point for constructing iterator pointing to head of queue.
-    /** Does not work correctly for SSE types. */
-    __TBB_EXPORTED_METHOD concurrent_queue_iterator_base_v3( const concurrent_queue_base_v3& queue );
-
-    //! Construct iterator pointing to head of queue.
-    __TBB_EXPORTED_METHOD concurrent_queue_iterator_base_v3( const concurrent_queue_base_v3& queue, size_t offset_of_data );
-
-    //! Assignment
-    void __TBB_EXPORTED_METHOD assign( const concurrent_queue_iterator_base_v3& i );
-
-    //! Advance iterator one step towards tail of queue.
-    void __TBB_EXPORTED_METHOD advance();
-
-    //! Destructor
-    __TBB_EXPORTED_METHOD ~concurrent_queue_iterator_base_v3();
-};
-
-typedef concurrent_queue_iterator_base_v3 concurrent_queue_iterator_base;
-
-//! Meets requirements of a forward iterator for STL.
-/** Value is either the T or const T type of the container.
-    @ingroup containers */
-template<typename Container, typename Value>
-class concurrent_queue_iterator: public concurrent_queue_iterator_base,
-        public std::iterator<std::forward_iterator_tag,Value> {
-
-#if !__TBB_TEMPLATE_FRIENDS_BROKEN
-    template<typename T, class A>
-    friend class ::tbb::concurrent_bounded_queue;
-#else
-public:
-#endif
-
-    //! Construct iterator pointing to head of queue.
-    explicit concurrent_queue_iterator( const concurrent_queue_base_v3& queue ) :
-        concurrent_queue_iterator_base_v3(queue,__TBB_offsetof(concurrent_queue_base_v3::padded_page<Value>,last))
-    {
-    }
-
-public:
-    concurrent_queue_iterator() {}
-
-    /** If Value==Container::value_type, then this routine is the copy constructor.
-        If Value==const Container::value_type, then this routine is a conversion constructor. */
-    concurrent_queue_iterator( const concurrent_queue_iterator<Container,typename Container::value_type>& other ) :
-        concurrent_queue_iterator_base_v3(other)
-    {}
-
-    //! Iterator assignment
-    concurrent_queue_iterator& operator=( const concurrent_queue_iterator& other ) {
-        assign(other);
-        return *this;
-    }
-
-    //! Reference to current item
-    Value& operator*() const {
-        return *static_cast<Value*>(my_item);
-    }
-
-    Value* operator->() const {return &operator*();}
-
-    //! Advance to next item in queue
-    concurrent_queue_iterator& operator++() {
-        advance();
-        return *this;
-    }
-
-    //! Post increment
-    Value* operator++(int) {
-        Value* result = &operator*();
-        operator++();
-        return result;
-    }
-}; // concurrent_queue_iterator
-
-
-template<typename C, typename T, typename U>
-bool operator==( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j ) {
-    return i.my_item==j.my_item;
-}
-
-template<typename C, typename T, typename U>
-bool operator!=( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j ) {
-    return i.my_item!=j.my_item;
-}
-
-} // namespace internal;
-
-//! @endcond
-
-} // namespace tbb
-
-#endif /* __TBB__concurrent_queue_impl_H */
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_concurrent_unordered_impl.h b/lib/3rdParty/tbb/include/tbb/internal/_concurrent_unordered_impl.h
deleted file mode 100644
index b34bc16f..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_concurrent_unordered_impl.h
+++ /dev/null
@@ -1,1528 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-/* Container implementations in this header are based on PPL implementations
-   provided by Microsoft. */
-
-#ifndef __TBB__concurrent_unordered_impl_H
-#define __TBB__concurrent_unordered_impl_H
-#if !defined(__TBB_concurrent_unordered_map_H) && !defined(__TBB_concurrent_unordered_set_H) && !defined(__TBB_concurrent_hash_map_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#include "../tbb_stddef.h"
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
-    #pragma warning (push)
-    #pragma warning (disable: 4530)
-#endif
-
-#include <iterator>
-#include <utility>      // Need std::pair
-#include <functional>   // Need std::equal_to (in ../concurrent_unordered_*.h)
-#include <string>       // For tbb_hasher
-#include <cstring>      // Need std::memset
-#include __TBB_STD_SWAP_HEADER
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    #pragma warning (pop)
-#endif
-
-#include "../atomic.h"
-#include "../tbb_exception.h"
-#include "../tbb_allocator.h"
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    #include <initializer_list>
-#endif
-
-#include "_tbb_hash_compare_impl.h"
-
-namespace tbb {
-namespace interface5 {
-//! @cond INTERNAL
-namespace internal {
-
-template <typename T, typename Allocator>
-class split_ordered_list;
-template <typename Traits>
-class concurrent_unordered_base;
-
-// Forward list iterators (without skipping dummy elements)
-template<class Solist, typename Value>
-class flist_iterator : public std::iterator<std::forward_iterator_tag, Value>
-{
-    template <typename T, typename Allocator>
-    friend class split_ordered_list;
-    template <typename Traits>
-    friend class concurrent_unordered_base;
-    template<class M, typename V>
-    friend class flist_iterator;
-
-    typedef typename Solist::nodeptr_t nodeptr_t;
-public:
-    typedef typename Solist::value_type value_type;
-    typedef typename Solist::difference_type difference_type;
-    typedef typename Solist::pointer pointer;
-    typedef typename Solist::reference reference;
-
-    flist_iterator() : my_node_ptr(0) {}
-    flist_iterator( const flist_iterator<Solist, typename Solist::value_type> &other )
-        : my_node_ptr(other.my_node_ptr) {}
-
-    reference operator*() const { return my_node_ptr->my_element; }
-    pointer operator->() const { return &**this; }
-
-    flist_iterator& operator++() {
-        my_node_ptr = my_node_ptr->my_next;
-        return *this;
-    }
-
-    flist_iterator operator++(int) {
-        flist_iterator tmp = *this;
-        ++*this;
-        return tmp;
-    }
-
-protected:
-    flist_iterator(nodeptr_t pnode) : my_node_ptr(pnode) {}
-    nodeptr_t get_node_ptr() const { return my_node_ptr; }
-
-    nodeptr_t my_node_ptr;
-
-    template<typename M, typename T, typename U>
-    friend bool operator==( const flist_iterator<M,T> &i, const flist_iterator<M,U> &j );
-    template<typename M, typename T, typename U>
-    friend bool operator!=( const flist_iterator<M,T>& i, const flist_iterator<M,U>& j );
-};
-
-template<typename Solist, typename T, typename U>
-bool operator==( const flist_iterator<Solist,T> &i, const flist_iterator<Solist,U> &j ) {
-    return i.my_node_ptr == j.my_node_ptr;
-}
-template<typename Solist, typename T, typename U>
-bool operator!=( const flist_iterator<Solist,T>& i, const flist_iterator<Solist,U>& j ) {
-    return i.my_node_ptr != j.my_node_ptr;
-}
-
-// Split-order list iterators, needed to skip dummy elements
-template<class Solist, typename Value>
-class solist_iterator : public flist_iterator<Solist, Value>
-{
-    typedef flist_iterator<Solist, Value> base_type;
-    typedef typename Solist::nodeptr_t nodeptr_t;
-    using base_type::get_node_ptr;
-    template <typename T, typename Allocator>
-    friend class split_ordered_list;
-    template<class M, typename V>
-    friend class solist_iterator;
-    template<typename M, typename T, typename U>
-    friend bool operator==( const solist_iterator<M,T> &i, const solist_iterator<M,U> &j );
-    template<typename M, typename T, typename U>
-    friend bool operator!=( const solist_iterator<M,T>& i, const solist_iterator<M,U>& j );
-
-    const Solist *my_list_ptr;
-    solist_iterator(nodeptr_t pnode, const Solist *plist) : base_type(pnode), my_list_ptr(plist) {}
-
-public:
-    typedef typename Solist::value_type value_type;
-    typedef typename Solist::difference_type difference_type;
-    typedef typename Solist::pointer pointer;
-    typedef typename Solist::reference reference;
-
-    solist_iterator() {}
-    solist_iterator(const solist_iterator<Solist, typename Solist::value_type> &other )
-        : base_type(other), my_list_ptr(other.my_list_ptr) {}
-
-    reference operator*() const {
-        return this->base_type::operator*();
-    }
-
-    pointer operator->() const {
-        return (&**this);
-    }
-
-    solist_iterator& operator++() {
-        do ++(*(base_type *)this);
-        while (get_node_ptr() != NULL && get_node_ptr()->is_dummy());
-
-        return (*this);
-    }
-
-    solist_iterator operator++(int) {
-        solist_iterator tmp = *this;
-        do ++*this;
-        while (get_node_ptr() != NULL && get_node_ptr()->is_dummy());
-
-        return (tmp);
-    }
-};
-
-template<typename Solist, typename T, typename U>
-bool operator==( const solist_iterator<Solist,T> &i, const solist_iterator<Solist,U> &j ) {
-    return i.my_node_ptr == j.my_node_ptr && i.my_list_ptr == j.my_list_ptr;
-}
-template<typename Solist, typename T, typename U>
-bool operator!=( const solist_iterator<Solist,T>& i, const solist_iterator<Solist,U>& j ) {
-    return i.my_node_ptr != j.my_node_ptr || i.my_list_ptr != j.my_list_ptr;
-}
-
-// Forward type and class definitions
-typedef size_t sokey_t;
-
-
-// Forward list in which elements are sorted in a split-order
-template <typename T, typename Allocator>
-class split_ordered_list
-{
-public:
-    typedef split_ordered_list<T, Allocator> self_type;
-    typedef typename Allocator::template rebind<T>::other allocator_type;
-    struct node;
-    typedef node *nodeptr_t;
-
-    typedef typename allocator_type::size_type size_type;
-    typedef typename allocator_type::difference_type difference_type;
-    typedef typename allocator_type::pointer pointer;
-    typedef typename allocator_type::const_pointer const_pointer;
-    typedef typename allocator_type::reference reference;
-    typedef typename allocator_type::const_reference const_reference;
-    typedef typename allocator_type::value_type value_type;
-
-    typedef solist_iterator<self_type, const value_type> const_iterator;
-    typedef solist_iterator<self_type, value_type> iterator;
-    typedef flist_iterator<self_type, const value_type> raw_const_iterator;
-    typedef flist_iterator<self_type, value_type> raw_iterator;
-
-    // Node that holds the element in a split-ordered list
-    struct node : tbb::internal::no_assign
-    {
-    private:
-        // for compilers that try to generate default constructors though they are not needed.
-        node();  // VS 2008, 2010, 2012
-    public:
-        // Initialize the node with the given order key
-        void init(sokey_t order_key) {
-            my_order_key = order_key;
-            my_next = NULL;
-        }
-
-        // Return the order key (needed for hashing)
-        sokey_t get_order_key() const { // TODO: remove
-            return my_order_key;
-        }
-
-        // Inserts the new element in the list in an atomic fashion
-        nodeptr_t atomic_set_next(nodeptr_t new_node, nodeptr_t current_node)
-        {
-            // Try to change the next pointer on the current element to a new element, only if it still points to the cached next
-            nodeptr_t exchange_node = tbb::internal::as_atomic(my_next).compare_and_swap(new_node, current_node);
-
-            if (exchange_node == current_node) // TODO: why this branch?
-            {
-                // Operation succeeded, return the new node
-                return new_node;
-            }
-            else
-            {
-                // Operation failed, return the "interfering" node
-                return exchange_node;
-            }
-        }
-
-        // Checks if this element in the list is a dummy, order enforcing node. Dummy nodes are used by buckets
-        // in the hash table to quickly index into the right subsection of the split-ordered list.
-        bool is_dummy() const {
-            return (my_order_key & 0x1) == 0;
-        }
-
-
-        nodeptr_t  my_next;      // Next element in the list
-        value_type my_element;   // Element storage
-        sokey_t    my_order_key; // Order key for this element
-    };
-
-    // Allocate a new node with the given order key; used to allocate dummy nodes
-    nodeptr_t create_node(sokey_t order_key) {
-        nodeptr_t pnode = my_node_allocator.allocate(1);
-        pnode->init(order_key);
-        return (pnode);
-    }
-
-    // Allocate a new node with the given order key and value
-    template<typename Arg>
-    nodeptr_t create_node(sokey_t order_key, __TBB_FORWARDING_REF(Arg) t,
-                          /*AllowCreate=*/tbb::internal::true_type=tbb::internal::true_type()){
-        nodeptr_t pnode = my_node_allocator.allocate(1);
-
-        //TODO: use RAII scoped guard instead of explicit catch
-        __TBB_TRY {
-            new(static_cast<void*>(&pnode->my_element)) T(tbb::internal::forward<Arg>(t));
-            pnode->init(order_key);
-        } __TBB_CATCH(...) {
-            my_node_allocator.deallocate(pnode, 1);
-            __TBB_RETHROW();
-        }
-
-        return (pnode);
-    }
-
-    // A helper to avoid excessive requiremens in internal_insert
-    template<typename Arg>
-    nodeptr_t create_node(sokey_t, __TBB_FORWARDING_REF(Arg),
-                          /*AllowCreate=*/tbb::internal::false_type){
-        __TBB_ASSERT(false, "This compile-time helper should never get called");
-        return nodeptr_t();
-    }
-
-    // Allocate a new node with the given parameters for constructing value
-    template<typename __TBB_PARAMETER_PACK Args>
-    nodeptr_t create_node_v( __TBB_FORWARDING_REF(Args) __TBB_PARAMETER_PACK args){
-        nodeptr_t pnode = my_node_allocator.allocate(1);
-
-        //TODO: use RAII scoped guard instead of explicit catch
-        __TBB_TRY {
-            new(static_cast<void*>(&pnode->my_element)) T(__TBB_PACK_EXPANSION(tbb::internal::forward<Args>(args)));
-        } __TBB_CATCH(...) {
-            my_node_allocator.deallocate(pnode, 1);
-            __TBB_RETHROW();
-        }
-
-        return (pnode);
-    }
-
-   split_ordered_list(allocator_type a = allocator_type())
-       : my_node_allocator(a), my_element_count(0)
-    {
-        // Immediately allocate a dummy node with order key of 0. This node
-        // will always be the head of the list.
-        my_head = create_node(sokey_t(0));
-    }
-
-    ~split_ordered_list()
-    {
-        // Clear the list
-        clear();
-
-        // Remove the head element which is not cleared by clear()
-        nodeptr_t pnode = my_head;
-        my_head = NULL;
-
-        __TBB_ASSERT(pnode != NULL && pnode->my_next == NULL, "Invalid head list node");
-
-        destroy_node(pnode);
-    }
-
-    // Common forward list functions
-
-    allocator_type get_allocator() const {
-        return (my_node_allocator);
-    }
-
-    void clear() {
-        nodeptr_t pnext;
-        nodeptr_t pnode = my_head;
-
-        __TBB_ASSERT(my_head != NULL, "Invalid head list node");
-        pnext = pnode->my_next;
-        pnode->my_next = NULL;
-        pnode = pnext;
-
-        while (pnode != NULL)
-        {
-            pnext = pnode->my_next;
-            destroy_node(pnode);
-            pnode = pnext;
-        }
-
-        my_element_count = 0;
-    }
-
-    // Returns a first non-dummy element in the SOL
-    iterator begin() {
-        return first_real_iterator(raw_begin());
-    }
-
-    // Returns a first non-dummy element in the SOL
-    const_iterator begin() const {
-        return first_real_iterator(raw_begin());
-    }
-
-    iterator end() {
-        return (iterator(0, this));
-    }
-
-    const_iterator end() const {
-        return (const_iterator(0, this));
-    }
-
-    const_iterator cbegin() const {
-        return (((const self_type *)this)->begin());
-    }
-
-    const_iterator cend() const {
-        return (((const self_type *)this)->end());
-    }
-
-    // Checks if the number of elements (non-dummy) is 0
-    bool empty() const {
-        return (my_element_count == 0);
-    }
-
-    // Returns the number of non-dummy elements in the list
-    size_type size() const {
-        return my_element_count;
-    }
-
-    // Returns the maximum size of the list, determined by the allocator
-    size_type max_size() const {
-        return my_node_allocator.max_size();
-    }
-
-    // Swaps 'this' list with the passed in one
-    void swap(self_type& other)
-    {
-        if (this == &other)
-        {
-            // Nothing to do
-            return;
-        }
-
-            std::swap(my_element_count, other.my_element_count);
-            std::swap(my_head, other.my_head);
-    }
-
-    // Split-order list functions
-
-    // Returns a first element in the SOL, which is always a dummy
-    raw_iterator raw_begin() {
-        return raw_iterator(my_head);
-    }
-
-    // Returns a first element in the SOL, which is always a dummy
-    raw_const_iterator raw_begin() const {
-        return raw_const_iterator(my_head);
-    }
-
-    raw_iterator raw_end() {
-        return raw_iterator(0);
-    }
-
-    raw_const_iterator raw_end() const {
-        return raw_const_iterator(0);
-    }
-
-    static sokey_t get_order_key(const raw_const_iterator& it) {
-        return it.get_node_ptr()->get_order_key();
-    }
-
-    static sokey_t get_safe_order_key(const raw_const_iterator& it) {
-        if( !it.get_node_ptr() )  return ~sokey_t(0);
-        return it.get_node_ptr()->get_order_key();
-    }
-
-    // Returns a public iterator version of the internal iterator. Public iterator must not
-    // be a dummy private iterator.
-    iterator get_iterator(raw_iterator it) {
-        __TBB_ASSERT(it.get_node_ptr() == NULL || !it.get_node_ptr()->is_dummy(), "Invalid user node (dummy)");
-        return iterator(it.get_node_ptr(), this);
-    }
-
-    // Returns a public iterator version of the internal iterator. Public iterator must not
-    // be a dummy private iterator.
-    const_iterator get_iterator(raw_const_iterator it) const {
-        __TBB_ASSERT(it.get_node_ptr() == NULL || !it.get_node_ptr()->is_dummy(), "Invalid user node (dummy)");
-        return const_iterator(it.get_node_ptr(), this);
-    }
-
-    // Returns a non-const version of the raw_iterator
-    raw_iterator get_iterator(raw_const_iterator it) {
-        return raw_iterator(it.get_node_ptr());
-    }
-
-    // Returns a non-const version of the iterator
-    static iterator get_iterator(const_iterator it) {
-        return iterator(it.my_node_ptr, it.my_list_ptr);
-    }
-
-    // Returns a public iterator version of a first non-dummy internal iterator at or after
-    // the passed in internal iterator.
-    iterator first_real_iterator(raw_iterator it)
-    {
-        // Skip all dummy, internal only iterators
-        while (it != raw_end() && it.get_node_ptr()->is_dummy())
-            ++it;
-
-        return iterator(it.get_node_ptr(), this);
-    }
-
-    // Returns a public iterator version of a first non-dummy internal iterator at or after
-    // the passed in internal iterator.
-    const_iterator first_real_iterator(raw_const_iterator it) const
-    {
-        // Skip all dummy, internal only iterators
-        while (it != raw_end() && it.get_node_ptr()->is_dummy())
-            ++it;
-
-        return const_iterator(it.get_node_ptr(), this);
-    }
-
-    // Erase an element using the allocator
-    void destroy_node(nodeptr_t pnode) {
-        if (!pnode->is_dummy()) my_node_allocator.destroy(pnode);
-        my_node_allocator.deallocate(pnode, 1);
-    }
-
-    // Try to insert a new element in the list.
-    // If insert fails, return the node that was inserted instead.
-    static nodeptr_t try_insert_atomic(nodeptr_t previous, nodeptr_t new_node, nodeptr_t current_node) {
-        new_node->my_next = current_node;
-        return previous->atomic_set_next(new_node, current_node);
-    }
-
-    // Insert a new element between passed in iterators
-    std::pair<iterator, bool> try_insert(raw_iterator it, raw_iterator next, nodeptr_t pnode, size_type *new_count)
-    {
-        nodeptr_t inserted_node = try_insert_atomic(it.get_node_ptr(), pnode, next.get_node_ptr());
-
-        if (inserted_node == pnode)
-        {
-            // If the insert succeeded, check that the order is correct and increment the element count
-            check_range(it, next);
-            *new_count = tbb::internal::as_atomic(my_element_count).fetch_and_increment();
-            return std::pair<iterator, bool>(iterator(pnode, this), true);
-        }
-        else
-        {
-            return std::pair<iterator, bool>(end(), false);
-        }
-    }
-
-    // Insert a new dummy element, starting search at a parent dummy element
-    raw_iterator insert_dummy(raw_iterator it, sokey_t order_key)
-    {
-        raw_iterator last = raw_end();
-        raw_iterator where = it;
-
-        __TBB_ASSERT(where != last, "Invalid head node");
-
-        ++where;
-
-        // Create a dummy element up front, even though it may be discarded (due to concurrent insertion)
-        nodeptr_t dummy_node = create_node(order_key);
-
-        for (;;)
-        {
-            __TBB_ASSERT(it != last, "Invalid head list node");
-
-            // If the head iterator is at the end of the list, or past the point where this dummy
-            // node needs to be inserted, then try to insert it.
-            if (where == last || get_order_key(where) > order_key)
-            {
-                __TBB_ASSERT(get_order_key(it) < order_key, "Invalid node order in the list");
-
-                // Try to insert it in the right place
-                nodeptr_t inserted_node = try_insert_atomic(it.get_node_ptr(), dummy_node, where.get_node_ptr());
-
-                if (inserted_node == dummy_node)
-                {
-                    // Insertion succeeded, check the list for order violations
-                    check_range(it, where);
-                    return raw_iterator(dummy_node);
-                }
-                else
-                {
-                    // Insertion failed: either dummy node was inserted by another thread, or
-                    // a real element was inserted at exactly the same place as dummy node.
-                    // Proceed with the search from the previous location where order key was
-                    // known to be larger (note: this is legal only because there is no safe
-                    // concurrent erase operation supported).
-                    where = it;
-                    ++where;
-                    continue;
-                }
-            }
-            else if (get_order_key(where) == order_key)
-            {
-                // Another dummy node with the same value found, discard the new one.
-                destroy_node(dummy_node);
-                return where;
-            }
-
-            // Move the iterator forward
-            it = where;
-            ++where;
-        }
-
-    }
-
-    // This erase function can handle both real and dummy nodes
-    void erase_node(raw_iterator previous, raw_const_iterator& where)
-    {
-        nodeptr_t pnode = (where++).get_node_ptr();
-        nodeptr_t prevnode = previous.get_node_ptr();
-        __TBB_ASSERT(prevnode->my_next == pnode, "Erase must take consecutive iterators");
-        prevnode->my_next = pnode->my_next;
-
-        destroy_node(pnode);
-    }
-
-    // Erase the element (previous node needs to be passed because this is a forward only list)
-    iterator erase_node(raw_iterator previous, const_iterator where)
-    {
-        raw_const_iterator it = where;
-        erase_node(previous, it);
-        my_element_count--;
-
-        return get_iterator(first_real_iterator(it));
-    }
-
-    // Move all elements from the passed in split-ordered list to this one
-    void move_all(self_type& source)
-    {
-        raw_const_iterator first = source.raw_begin();
-        raw_const_iterator last = source.raw_end();
-
-        if (first == last)
-            return;
-
-        nodeptr_t previous_node = my_head;
-        raw_const_iterator begin_iterator = first++;
-
-        // Move all elements one by one, including dummy ones
-        for (raw_const_iterator it = first; it != last;)
-        {
-            nodeptr_t pnode = it.get_node_ptr();
-
-            nodeptr_t dummy_node = pnode->is_dummy() ? create_node(pnode->get_order_key()) : create_node(pnode->get_order_key(), pnode->my_element);
-            previous_node = try_insert_atomic(previous_node, dummy_node, NULL);
-            __TBB_ASSERT(previous_node != NULL, "Insertion must succeed");
-            raw_const_iterator where = it++;
-            source.erase_node(get_iterator(begin_iterator), where);
-        }
-        check_range();
-    }
-
-
-private:
-    //Need to setup private fields of split_ordered_list in move constructor and assignment of concurrent_unordered_base
-    template <typename Traits>
-    friend class concurrent_unordered_base;
-
-    // Check the list for order violations
-    void check_range( raw_iterator first, raw_iterator last )
-    {
-#if TBB_USE_ASSERT
-        for (raw_iterator it = first; it != last; ++it)
-        {
-            raw_iterator next = it;
-            ++next;
-
-            __TBB_ASSERT(next == raw_end() || get_order_key(next) >= get_order_key(it), "!!! List order inconsistency !!!");
-        }
-#else
-        tbb::internal::suppress_unused_warning(first, last);
-#endif
-    }
-    void check_range()
-    {
-#if TBB_USE_ASSERT
-        check_range( raw_begin(), raw_end() );
-#endif
-    }
-
-    typename allocator_type::template rebind<node>::other my_node_allocator;  // allocator object for nodes
-    size_type                                             my_element_count;   // Total item count, not counting dummy nodes
-    nodeptr_t                                             my_head;            // pointer to head node
-};
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-#pragma warning(push)
-#pragma warning(disable: 4127) // warning C4127: conditional expression is constant
-#endif
-
-template <typename Traits>
-class concurrent_unordered_base : public Traits
-{
-protected:
-    // Type definitions
-    typedef concurrent_unordered_base<Traits> self_type;
-    typedef typename Traits::value_type value_type;
-    typedef typename Traits::key_type key_type;
-    typedef typename Traits::hash_compare hash_compare;
-    typedef typename Traits::allocator_type allocator_type;
-    typedef typename hash_compare::hasher hasher;
-    typedef typename hash_compare::key_equal key_equal;
-    typedef typename allocator_type::pointer pointer;
-    typedef typename allocator_type::const_pointer const_pointer;
-    typedef typename allocator_type::reference reference;
-    typedef typename allocator_type::const_reference const_reference;
-    typedef typename allocator_type::size_type size_type;
-    typedef typename allocator_type::difference_type difference_type;
-    typedef split_ordered_list<value_type, typename Traits::allocator_type> solist_t;
-    typedef typename solist_t::nodeptr_t nodeptr_t;
-    // Iterators that walk the entire split-order list, including dummy nodes
-    typedef typename solist_t::raw_iterator raw_iterator;
-    typedef typename solist_t::raw_const_iterator raw_const_iterator;
-    typedef typename solist_t::iterator iterator; // TODO: restore const iterator for unordered_sets
-    typedef typename solist_t::const_iterator const_iterator;
-    typedef iterator local_iterator;
-    typedef const_iterator const_local_iterator;
-    using Traits::my_hash_compare;
-    using Traits::get_key;
-    using Traits::allow_multimapping;
-
-    static const size_type initial_bucket_number = 8;                               // Initial number of buckets
-private:
-    typedef std::pair<iterator, iterator> pairii_t;
-    typedef std::pair<const_iterator, const_iterator> paircc_t;
-
-    static size_type const pointers_per_table = sizeof(size_type) * 8;              // One bucket segment per bit
-    static const size_type initial_bucket_load = 4;                                // Initial maximum number of elements per bucket
-
-    struct call_internal_clear_on_exit{
-        concurrent_unordered_base* my_instance;
-        call_internal_clear_on_exit(concurrent_unordered_base* instance) : my_instance(instance) {}
-        void dismiss(){ my_instance = NULL;}
-        ~call_internal_clear_on_exit(){
-            if (my_instance){
-                my_instance->internal_clear();
-            }
-        }
-    };
-protected:
-    // Constructors/Destructors
-    concurrent_unordered_base(size_type n_of_buckets = initial_bucket_number,
-        const hash_compare& hc = hash_compare(), const allocator_type& a = allocator_type())
-        : Traits(hc), my_solist(a),
-          my_allocator(a), my_maximum_bucket_size((float) initial_bucket_load)
-    {
-        if( n_of_buckets == 0) ++n_of_buckets;
-        my_number_of_buckets = size_type(1)<<__TBB_Log2((uintptr_t)n_of_buckets*2-1); // round up to power of 2
-        internal_init();
-    }
-
-    concurrent_unordered_base(const concurrent_unordered_base& right, const allocator_type& a)
-        : Traits(right.my_hash_compare), my_solist(a), my_allocator(a)
-    {
-        internal_init();
-        internal_copy(right);
-    }
-
-    concurrent_unordered_base(const concurrent_unordered_base& right)
-        : Traits(right.my_hash_compare), my_solist(right.get_allocator()), my_allocator(right.get_allocator())
-    {
-        //FIXME:exception safety seems to be broken here
-        internal_init();
-        internal_copy(right);
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    concurrent_unordered_base(concurrent_unordered_base&& right)
-        : Traits(right.my_hash_compare), my_solist(right.get_allocator()), my_allocator(right.get_allocator())
-    {
-        internal_init();
-        swap(right);
-    }
-
-    concurrent_unordered_base(concurrent_unordered_base&& right, const allocator_type& a)
-        : Traits(right.my_hash_compare), my_solist(a), my_allocator(a)
-    {
-        call_internal_clear_on_exit clear_buckets_on_exception(this);
-
-        internal_init();
-        if (a == right.get_allocator()){
-            this->swap(right);
-        }else{
-            my_maximum_bucket_size = right.my_maximum_bucket_size;
-            my_number_of_buckets = right.my_number_of_buckets;
-            my_solist.my_element_count = right.my_solist.my_element_count;
-
-            if (! right.my_solist.empty()){
-                nodeptr_t previous_node = my_solist.my_head;
-
-                // Move all elements one by one, including dummy ones
-                for (raw_const_iterator it = ++(right.my_solist.raw_begin()), last = right.my_solist.raw_end(); it != last; ++it)
-                {
-                    const nodeptr_t pnode = it.get_node_ptr();
-                    nodeptr_t node;
-                    if (pnode->is_dummy()) {
-                        node = my_solist.create_node(pnode->get_order_key());
-                        size_type bucket = __TBB_ReverseBits(pnode->get_order_key()) % my_number_of_buckets;
-                        set_bucket(bucket, node);
-                    }else{
-                        node = my_solist.create_node(pnode->get_order_key(), std::move(pnode->my_element));
-                    }
-
-                    previous_node = my_solist.try_insert_atomic(previous_node, node, NULL);
-                    __TBB_ASSERT(previous_node != NULL, "Insertion of node failed. Concurrent inserts in constructor ?");
-                }
-                my_solist.check_range();
-            }
-        }
-
-        clear_buckets_on_exception.dismiss();
-    }
-
-#endif // __TBB_CPP11_RVALUE_REF_PRESENT
-
-    concurrent_unordered_base& operator=(const concurrent_unordered_base& right) {
-        if (this != &right)
-            internal_copy(right);
-        return (*this);
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    concurrent_unordered_base& operator=(concurrent_unordered_base&& other)
-    {
-        if(this != &other){
-            typedef typename tbb::internal::allocator_traits<allocator_type>::propagate_on_container_move_assignment pocma_t;
-            if(pocma_t::value || this->my_allocator == other.my_allocator) {
-                concurrent_unordered_base trash (std::move(*this));
-                swap(other);
-                if (pocma_t::value) {
-                    using std::swap;
-                    //TODO: swapping allocators here may be a problem, replace with single direction moving
-                    swap(this->my_solist.my_node_allocator, other.my_solist.my_node_allocator);
-                    swap(this->my_allocator, other.my_allocator);
-                }
-            } else {
-                concurrent_unordered_base moved_copy(std::move(other),this->my_allocator);
-                this->swap(moved_copy);
-            }
-        }
-        return *this;
-    }
-
-#endif // __TBB_CPP11_RVALUE_REF_PRESENT
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    //! assignment operator from initializer_list
-    concurrent_unordered_base& operator=(std::initializer_list<value_type> il)
-    {
-        this->clear();
-        this->insert(il.begin(),il.end());
-        return (*this);
-    }
-#endif // __TBB_INITIALIZER_LISTS_PRESENT
-
-
-    ~concurrent_unordered_base() {
-        // Delete all node segments
-        internal_clear();
-    }
-
-public:
-    allocator_type get_allocator() const {
-        return my_solist.get_allocator();
-    }
-
-    // Size and capacity function
-    bool empty() const {
-        return my_solist.empty();
-    }
-
-    size_type size() const {
-        return my_solist.size();
-    }
-
-    size_type max_size() const {
-        return my_solist.max_size();
-    }
-
-    // Iterators
-    iterator begin() {
-        return my_solist.begin();
-    }
-
-    const_iterator begin() const {
-        return my_solist.begin();
-    }
-
-    iterator end() {
-        return my_solist.end();
-    }
-
-    const_iterator end() const {
-        return my_solist.end();
-    }
-
-    const_iterator cbegin() const {
-        return my_solist.cbegin();
-    }
-
-    const_iterator cend() const {
-        return my_solist.cend();
-    }
-
-    // Parallel traversal support
-    class const_range_type : tbb::internal::no_assign {
-        const concurrent_unordered_base &my_table;
-        raw_const_iterator my_begin_node;
-        raw_const_iterator my_end_node;
-        mutable raw_const_iterator my_midpoint_node;
-    public:
-        //! Type for size of a range
-        typedef typename concurrent_unordered_base::size_type size_type;
-        typedef typename concurrent_unordered_base::value_type value_type;
-        typedef typename concurrent_unordered_base::reference reference;
-        typedef typename concurrent_unordered_base::difference_type difference_type;
-        typedef typename concurrent_unordered_base::const_iterator iterator;
-
-        //! True if range is empty.
-        bool empty() const {return my_begin_node == my_end_node;}
-
-        //! True if range can be partitioned into two subranges.
-        bool is_divisible() const {
-            return my_midpoint_node != my_end_node;
-        }
-        //! Split range.
-        const_range_type( const_range_type &r, split ) :
-            my_table(r.my_table), my_end_node(r.my_end_node)
-        {
-            r.my_end_node = my_begin_node = r.my_midpoint_node;
-            __TBB_ASSERT( !empty(), "Splitting despite the range is not divisible" );
-            __TBB_ASSERT( !r.empty(), "Splitting despite the range is not divisible" );
-            set_midpoint();
-            r.set_midpoint();
-        }
-        //! Init range with container and grainsize specified
-        const_range_type( const concurrent_unordered_base &a_table ) :
-            my_table(a_table), my_begin_node(a_table.my_solist.begin()),
-            my_end_node(a_table.my_solist.end())
-        {
-            set_midpoint();
-        }
-        iterator begin() const { return my_table.my_solist.get_iterator(my_begin_node); }
-        iterator end() const { return my_table.my_solist.get_iterator(my_end_node); }
-        //! The grain size for this range.
-        size_type grainsize() const { return 1; }
-
-        //! Set my_midpoint_node to point approximately half way between my_begin_node and my_end_node.
-        void set_midpoint() const {
-            if( my_begin_node == my_end_node ) // not divisible
-                my_midpoint_node = my_end_node;
-            else {
-                sokey_t begin_key = solist_t::get_safe_order_key(my_begin_node);
-                sokey_t end_key = solist_t::get_safe_order_key(my_end_node);
-                size_t mid_bucket = __TBB_ReverseBits( begin_key + (end_key-begin_key)/2 ) % my_table.my_number_of_buckets;
-                while ( !my_table.is_initialized(mid_bucket) ) mid_bucket = my_table.get_parent(mid_bucket);
-                if(__TBB_ReverseBits(mid_bucket) > begin_key) {
-                    // found a dummy_node between begin and end
-                    my_midpoint_node = my_table.my_solist.first_real_iterator(my_table.get_bucket( mid_bucket ));
-                }
-                else {
-                    // didn't find a dummy node between begin and end.
-                    my_midpoint_node = my_end_node;
-                }
-#if TBB_USE_ASSERT
-                {
-                    sokey_t mid_key = solist_t::get_safe_order_key(my_midpoint_node);
-                    __TBB_ASSERT( begin_key < mid_key, "my_begin_node is after my_midpoint_node" );
-                    __TBB_ASSERT( mid_key <= end_key, "my_midpoint_node is after my_end_node" );
-                }
-#endif // TBB_USE_ASSERT
-            }
-        }
-    };
-
-    class range_type : public const_range_type {
-    public:
-        typedef typename concurrent_unordered_base::iterator iterator;
-        //! Split range.
-        range_type( range_type &r, split ) : const_range_type( r, split() ) {}
-        //! Init range with container and grainsize specified
-        range_type( const concurrent_unordered_base &a_table ) : const_range_type(a_table) {}
-
-        iterator begin() const { return solist_t::get_iterator( const_range_type::begin() ); }
-        iterator end() const { return solist_t::get_iterator( const_range_type::end() ); }
-    };
-
-    range_type range() {
-        return range_type( *this );
-    }
-
-    const_range_type range() const {
-        return const_range_type( *this );
-    }
-
-    // Modifiers
-    std::pair<iterator, bool> insert(const value_type& value) {
-        return internal_insert</*AllowCreate=*/tbb::internal::true_type>(value);
-    }
-
-    iterator insert(const_iterator, const value_type& value) {
-        // Ignore hint
-        return insert(value).first;
-    }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    std::pair<iterator, bool> insert(value_type&& value) {
-        return internal_insert</*AllowCreate=*/tbb::internal::true_type>(std::move(value));
-    }
-
-    iterator insert(const_iterator, value_type&& value) {
-        // Ignore hint
-        return insert(std::move(value)).first;
-    }
-
-#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-    template<typename... Args>
-    std::pair<iterator, bool> emplace(Args&&... args) {
-        nodeptr_t pnode = my_solist.create_node_v(tbb::internal::forward<Args>(args)...);
-        const sokey_t hashed_element_key = (sokey_t) my_hash_compare(get_key(pnode->my_element));
-        const sokey_t order_key = split_order_key_regular(hashed_element_key);
-        pnode->init(order_key);
-
-        return internal_insert</*AllowCreate=*/tbb::internal::false_type>(pnode->my_element, pnode);
-    }
-
-    template<typename... Args>
-    iterator emplace_hint(const_iterator, Args&&... args) {
-        // Ignore hint
-        return emplace(tbb::internal::forward<Args>(args)...).first;
-    }
-
-#endif // __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-#endif // __TBB_CPP11_RVALUE_REF_PRESENT
-
-    template<class Iterator>
-    void insert(Iterator first, Iterator last) {
-        for (Iterator it = first; it != last; ++it)
-            insert(*it);
-    }
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    //! Insert initializer list
-    void insert(std::initializer_list<value_type> il) {
-        insert(il.begin(), il.end());
-    }
-#endif
-
-    iterator unsafe_erase(const_iterator where) {
-        return internal_erase(where);
-    }
-
-    iterator unsafe_erase(const_iterator first, const_iterator last) {
-        while (first != last)
-            unsafe_erase(first++);
-        return my_solist.get_iterator(first);
-    }
-
-    size_type unsafe_erase(const key_type& key) {
-        pairii_t where = equal_range(key);
-        size_type item_count = internal_distance(where.first, where.second);
-        unsafe_erase(where.first, where.second);
-        return item_count;
-    }
-
-    void swap(concurrent_unordered_base& right) {
-        if (this != &right) {
-            std::swap(my_hash_compare, right.my_hash_compare); // TODO: check what ADL meant here
-            my_solist.swap(right.my_solist);
-            internal_swap_buckets(right);
-            std::swap(my_number_of_buckets, right.my_number_of_buckets);
-            std::swap(my_maximum_bucket_size, right.my_maximum_bucket_size);
-        }
-    }
-
-    // Observers
-    hasher hash_function() const {
-        return my_hash_compare.my_hash_object;
-    }
-
-    key_equal key_eq() const {
-        return my_hash_compare.my_key_compare_object;
-    }
-
-    void clear() {
-        // Clear list
-        my_solist.clear();
-
-        // Clear buckets
-        internal_clear();
-
-        // Initialize bucket 0
-        __TBB_ASSERT(my_buckets[0] == NULL, NULL);
-        raw_iterator dummy_node = my_solist.raw_begin();
-        set_bucket(0, dummy_node);
-    }
-
-    // Lookup
-    iterator find(const key_type& key) {
-        return internal_find(key);
-    }
-
-    const_iterator find(const key_type& key) const {
-        return const_cast<self_type*>(this)->internal_find(key);
-    }
-
-    size_type count(const key_type& key) const {
-        if(allow_multimapping) {
-            paircc_t answer = equal_range(key);
-            size_type item_count = internal_distance(answer.first, answer.second);
-            return item_count;
-        } else {
-            return const_cast<self_type*>(this)->internal_find(key) == end()?0:1;
-        }
-    }
-
-    std::pair<iterator, iterator> equal_range(const key_type& key) {
-        return internal_equal_range(key);
-    }
-
-    std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const {
-        return const_cast<self_type*>(this)->internal_equal_range(key);
-    }
-
-    // Bucket interface - for debugging
-    size_type unsafe_bucket_count() const {
-        return my_number_of_buckets;
-    }
-
-    size_type unsafe_max_bucket_count() const {
-        return segment_size(pointers_per_table-1);
-    }
-
-    size_type unsafe_bucket_size(size_type bucket) {
-        size_type item_count = 0;
-        if (is_initialized(bucket)) {
-            raw_iterator it = get_bucket(bucket);
-            ++it;
-            for (; it != my_solist.raw_end() && !it.get_node_ptr()->is_dummy(); ++it)
-                ++item_count;
-        }
-        return item_count;
-    }
-
-    size_type unsafe_bucket(const key_type& key) const {
-        sokey_t order_key = (sokey_t) my_hash_compare(key);
-        size_type bucket = order_key % my_number_of_buckets;
-        return bucket;
-    }
-
-    // If the bucket is initialized, return a first non-dummy element in it
-    local_iterator unsafe_begin(size_type bucket) {
-        if (!is_initialized(bucket))
-            return end();
-
-        raw_iterator it = get_bucket(bucket);
-        return my_solist.first_real_iterator(it);
-    }
-
-    // If the bucket is initialized, return a first non-dummy element in it
-    const_local_iterator unsafe_begin(size_type bucket) const
-    {
-        if (!is_initialized(bucket))
-            return end();
-
-        raw_const_iterator it = get_bucket(bucket);
-        return my_solist.first_real_iterator(it);
-    }
-
-    // @REVIEW: Takes O(n)
-    // Returns the iterator after the last non-dummy element in the bucket
-    local_iterator unsafe_end(size_type bucket)
-    {
-        if (!is_initialized(bucket))
-            return end();
-
-        raw_iterator it = get_bucket(bucket);
-
-        // Find the end of the bucket, denoted by the dummy element
-        do ++it;
-        while(it != my_solist.raw_end() && !it.get_node_ptr()->is_dummy());
-
-        // Return the first real element past the end of the bucket
-        return my_solist.first_real_iterator(it);
-    }
-
-    // @REVIEW: Takes O(n)
-    // Returns the iterator after the last non-dummy element in the bucket
-    const_local_iterator unsafe_end(size_type bucket) const
-    {
-        if (!is_initialized(bucket))
-            return end();
-
-        raw_const_iterator it = get_bucket(bucket);
-
-        // Find the end of the bucket, denoted by the dummy element
-        do ++it;
-        while(it != my_solist.raw_end() && !it.get_node_ptr()->is_dummy());
-
-        // Return the first real element past the end of the bucket
-        return my_solist.first_real_iterator(it);
-    }
-
-    const_local_iterator unsafe_cbegin(size_type bucket) const {
-        return ((const self_type *) this)->unsafe_begin(bucket);
-    }
-
-    const_local_iterator unsafe_cend(size_type bucket) const {
-        return ((const self_type *) this)->unsafe_end(bucket);
-    }
-
-    // Hash policy
-    float load_factor() const {
-        return (float) size() / (float) unsafe_bucket_count();
-    }
-
-    float max_load_factor() const {
-        return my_maximum_bucket_size;
-    }
-
-    void max_load_factor(float newmax) {
-        if (newmax != newmax || newmax < 0)
-            tbb::internal::throw_exception(tbb::internal::eid_invalid_load_factor);
-        my_maximum_bucket_size = newmax;
-    }
-
-    // This function is a noop, because the underlying split-ordered list
-    // is already sorted, so an increase in the bucket number will be
-    // reflected next time this bucket is touched.
-    void rehash(size_type buckets) {
-        size_type current_buckets = my_number_of_buckets;
-        if (current_buckets >= buckets)
-            return;
-        my_number_of_buckets = size_type(1)<<__TBB_Log2((uintptr_t)buckets*2-1); // round up to power of 2
-    }
-
-private:
-
-    // Initialize the hash and keep the first bucket open
-    void internal_init() {
-        // Allocate an array of segment pointers
-        memset(my_buckets, 0, pointers_per_table * sizeof(void *));
-
-        // Initialize bucket 0
-        raw_iterator dummy_node = my_solist.raw_begin();
-        set_bucket(0, dummy_node);
-    }
-
-    void internal_clear() {
-        for (size_type index = 0; index < pointers_per_table; ++index) {
-            if (my_buckets[index] != NULL) {
-                size_type sz = segment_size(index);
-                for (size_type index2 = 0; index2 < sz; ++index2)
-                    my_allocator.destroy(&my_buckets[index][index2]);
-                my_allocator.deallocate(my_buckets[index], sz);
-                my_buckets[index] = 0;
-            }
-        }
-    }
-
-    void internal_copy(const self_type& right) {
-        clear();
-
-        my_maximum_bucket_size = right.my_maximum_bucket_size;
-        my_number_of_buckets = right.my_number_of_buckets;
-
-        __TBB_TRY {
-            insert(right.begin(), right.end());
-            my_hash_compare = right.my_hash_compare;
-        } __TBB_CATCH(...) {
-            my_solist.clear();
-            __TBB_RETHROW();
-        }
-    }
-
-    void internal_swap_buckets(concurrent_unordered_base& right)
-    {
-        // Swap all node segments
-        for (size_type index = 0; index < pointers_per_table; ++index)
-        {
-            raw_iterator * iterator_pointer = my_buckets[index];
-            my_buckets[index] = right.my_buckets[index];
-            right.my_buckets[index] = iterator_pointer;
-        }
-    }
-
-    //TODO: why not use std::distance?
-    // Hash APIs
-    static size_type internal_distance(const_iterator first, const_iterator last)
-    {
-        size_type num = 0;
-
-        for (const_iterator it = first; it != last; ++it)
-            ++num;
-
-        return num;
-    }
-
-    // Insert an element in the hash given its value
-    template<typename AllowCreate, typename ValueType>
-    std::pair<iterator, bool> internal_insert(__TBB_FORWARDING_REF(ValueType) value, nodeptr_t pnode = NULL)
-    {
-        const key_type *pkey = &get_key(value);
-        sokey_t hash_key = (sokey_t) my_hash_compare(*pkey);
-        size_type new_count = 0;
-        sokey_t order_key = split_order_key_regular(hash_key);
-        raw_iterator previous = prepare_bucket(hash_key);
-        raw_iterator last = my_solist.raw_end();
-        __TBB_ASSERT(previous != last, "Invalid head node");
-
-        // First node is a dummy node
-        for (raw_iterator where = previous;;)
-        {
-            ++where;
-            if (where == last || solist_t::get_order_key(where) > order_key ||
-                // if multimapped, stop at the first item equal to us.
-                (allow_multimapping && solist_t::get_order_key(where) == order_key &&
-                 !my_hash_compare(get_key(*where), *pkey))) // TODO: fix negation
-            {
-                if (!pnode) {
-                    pnode = my_solist.create_node(order_key, tbb::internal::forward<ValueType>(value), AllowCreate());
-                    // If the value was moved, the known reference to key might be invalid
-                    pkey = &get_key(pnode->my_element);
-                }
-
-                // Try to insert 'pnode' between 'previous' and 'where'
-                std::pair<iterator, bool> result = my_solist.try_insert(previous, where, pnode, &new_count);
-
-                if (result.second)
-                {
-                    // Insertion succeeded, adjust the table size, if needed
-                    adjust_table_size(new_count, my_number_of_buckets);
-                    return result;
-                }
-                else
-                {
-                    // Insertion failed: either the same node was inserted by another thread, or
-                    // another element was inserted at exactly the same place as this node.
-                    // Proceed with the search from the previous location where order key was
-                    // known to be larger (note: this is legal only because there is no safe
-                    // concurrent erase operation supported).
-                    where = previous;
-                    continue;
-                }
-            }
-            else if (!allow_multimapping && solist_t::get_order_key(where) == order_key &&
-                     !my_hash_compare(get_key(*where), *pkey)) // TODO: fix negation
-            { // Element already in the list, return it
-                 if (pnode)
-                     my_solist.destroy_node(pnode);
-                return std::pair<iterator, bool>(my_solist.get_iterator(where), false);
-            }
-            // Move the iterator forward
-            previous = where;
-        }
-    }
-
-    // Find the element in the split-ordered list
-    iterator internal_find(const key_type& key)
-    {
-        sokey_t hash_key = (sokey_t) my_hash_compare(key);
-        sokey_t order_key = split_order_key_regular(hash_key);
-        raw_iterator last = my_solist.raw_end();
-
-        for (raw_iterator it = prepare_bucket(hash_key); it != last; ++it)
-        {
-            if (solist_t::get_order_key(it) > order_key)
-            {
-                // If the order key is smaller than the current order key, the element
-                // is not in the hash.
-                return end();
-            }
-            else if (solist_t::get_order_key(it) == order_key)
-            {
-                // The fact that order keys match does not mean that the element is found.
-                // Key function comparison has to be performed to check whether this is the
-                // right element. If not, keep searching while order key is the same.
-                if (!my_hash_compare(get_key(*it), key)) // TODO: fix negation
-                    return my_solist.get_iterator(it);
-            }
-        }
-
-        return end();
-    }
-
-    // Erase an element from the list. This is not a concurrency safe function.
-    iterator internal_erase(const_iterator it)
-    {
-        sokey_t hash_key = (sokey_t) my_hash_compare(get_key(*it));
-        raw_iterator previous = prepare_bucket(hash_key);
-        raw_iterator last = my_solist.raw_end();
-        __TBB_ASSERT(previous != last, "Invalid head node");
-
-        // First node is a dummy node
-        for (raw_iterator where = previous; ; previous = where) {
-            ++where;
-            if (where == last)
-                return end();
-            else if (my_solist.get_iterator(where) == it)
-                return my_solist.erase_node(previous, it);
-        }
-    }
-
-    // Return the [begin, end) pair of iterators with the same key values.
-    // This operation makes sense only if mapping is many-to-one.
-    pairii_t internal_equal_range(const key_type& key)
-    {
-        sokey_t hash_key = (sokey_t) my_hash_compare(key);
-        sokey_t order_key = split_order_key_regular(hash_key);
-        raw_iterator end_it = my_solist.raw_end();
-
-        for (raw_iterator it = prepare_bucket(hash_key); it != end_it; ++it)
-        {
-            if (solist_t::get_order_key(it) > order_key)
-            {
-                // There is no element with the given key
-                return pairii_t(end(), end());
-            }
-            else if (solist_t::get_order_key(it) == order_key &&
-                     !my_hash_compare(get_key(*it), key)) // TODO: fix negation; also below
-            {
-                iterator first = my_solist.get_iterator(it);
-                iterator last = first;
-                do ++last; while( allow_multimapping && last != end() && !my_hash_compare(get_key(*last), key) );
-                return pairii_t(first, last);
-            }
-        }
-
-        return pairii_t(end(), end());
-    }
-
-    // Bucket APIs
-    void init_bucket(size_type bucket)
-    {
-        // Bucket 0 has no parent.
-        __TBB_ASSERT( bucket != 0, "The first bucket must always be initialized");
-
-        size_type parent_bucket = get_parent(bucket);
-
-        // All parent_bucket buckets have to be initialized before this bucket is
-        if (!is_initialized(parent_bucket))
-            init_bucket(parent_bucket);
-
-        raw_iterator parent = get_bucket(parent_bucket);
-
-        // Create a dummy first node in this bucket
-        raw_iterator dummy_node = my_solist.insert_dummy(parent, split_order_key_dummy(bucket));
-        set_bucket(bucket, dummy_node);
-    }
-
-    void adjust_table_size(size_type total_elements, size_type current_size)
-    {
-        // Grow the table by a factor of 2 if possible and needed
-        if ( ((float) total_elements / (float) current_size) > my_maximum_bucket_size )
-        {
-            // Double the size of the hash only if size has not changed in between loads
-            my_number_of_buckets.compare_and_swap(2u*current_size, current_size);
-            //Simple "my_number_of_buckets.compare_and_swap( current_size<<1, current_size );" does not work for VC8
-            //due to overzealous compiler warnings in /Wp64 mode
-        }
-    }
-
-    size_type get_parent(size_type bucket) const
-    {
-        // Unsets bucket's most significant turned-on bit
-        size_type msb = __TBB_Log2((uintptr_t)bucket);
-        return bucket & ~(size_type(1) << msb);
-    }
-
-
-    // Dynamic sized array (segments)
-    //! @return segment index of given index in the array
-    static size_type segment_index_of( size_type index ) {
-        return size_type( __TBB_Log2( uintptr_t(index|1) ) );
-    }
-
-    //! @return the first array index of given segment
-    static size_type segment_base( size_type k ) {
-        return (size_type(1)<<k & ~size_type(1));
-    }
-
-    //! @return segment size
-    static size_type segment_size( size_type k ) {
-        return k? size_type(1)<<k : 2;
-    }
-
-    raw_iterator get_bucket(size_type bucket) const {
-        size_type segment = segment_index_of(bucket);
-        bucket -= segment_base(segment);
-        __TBB_ASSERT( my_buckets[segment], "bucket must be in an allocated segment" );
-        return my_buckets[segment][bucket];
-    }
-
-    raw_iterator prepare_bucket(sokey_t hash_key) {
-        size_type bucket = hash_key % my_number_of_buckets;
-        size_type segment = segment_index_of(bucket);
-        size_type index = bucket - segment_base(segment);
-        if (my_buckets[segment] == NULL || my_buckets[segment][index].get_node_ptr() == NULL)
-            init_bucket(bucket);
-        return my_buckets[segment][index];
-    }
-
-    void set_bucket(size_type bucket, raw_iterator dummy_head) {
-        size_type segment = segment_index_of(bucket);
-        bucket -= segment_base(segment);
-
-        if (my_buckets[segment] == NULL) {
-            size_type sz = segment_size(segment);
-            raw_iterator * new_segment = my_allocator.allocate(sz);
-            std::memset(new_segment, 0, sz*sizeof(raw_iterator));
-
-            if (my_buckets[segment].compare_and_swap( new_segment, NULL) != NULL)
-                my_allocator.deallocate(new_segment, sz);
-        }
-
-        my_buckets[segment][bucket] = dummy_head;
-    }
-
-    bool is_initialized(size_type bucket) const {
-        size_type segment = segment_index_of(bucket);
-        bucket -= segment_base(segment);
-
-        if (my_buckets[segment] == NULL)
-            return false;
-
-        raw_iterator it = my_buckets[segment][bucket];
-        return (it.get_node_ptr() != NULL);
-    }
-
-    // Utilities for keys
-
-    // A regular order key has its original hash value reversed and the last bit set
-    sokey_t split_order_key_regular(sokey_t order_key) const {
-        return __TBB_ReverseBits(order_key) | 0x1;
-    }
-
-    // A dummy order key has its original hash value reversed and the last bit unset
-    sokey_t split_order_key_dummy(sokey_t order_key) const {
-        return __TBB_ReverseBits(order_key) & ~sokey_t(0x1);
-    }
-
-    // Shared variables
-    atomic<size_type>                                             my_number_of_buckets;       // Current table size
-    solist_t                                                      my_solist;                  // List where all the elements are kept
-    typename allocator_type::template rebind<raw_iterator>::other my_allocator;               // Allocator object for segments
-    float                                                         my_maximum_bucket_size;     // Maximum size of the bucket
-    atomic<raw_iterator*>                                         my_buckets[pointers_per_table]; // The segment table
-};
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-#pragma warning(pop) // warning 4127 is back
-#endif
-
-} // namespace internal
-//! @endcond
-} // namespace interface5
-} // namespace tbb
-#endif // __TBB__concurrent_unordered_impl_H
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_async_msg_impl.h b/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_async_msg_impl.h
deleted file mode 100644
index 7d2c3aef..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_async_msg_impl.h
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB__flow_graph_async_msg_impl_H
-#define __TBB__flow_graph_async_msg_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-// included in namespace tbb::flow::interfaceX (in flow_graph.h)
-
-template< typename T > class async_msg;
-
-namespace internal {
-
-template< typename T, typename = void >
-struct async_helpers {
-    typedef async_msg<T> async_type;
-    typedef T filtered_type;
-
-    static const bool is_async_type = false;
-
-    static const void* to_void_ptr(const T& t) {
-        return static_cast<const void*>(&t);
-    }
-
-    static void* to_void_ptr(T& t) {
-        return static_cast<void*>(&t);
-    }
-
-    static const T& from_void_ptr(const void* p) {
-        return *static_cast<const T*>(p);
-    }
-
-    static T& from_void_ptr(void* p) {
-        return *static_cast<T*>(p);
-    }
-
-    static task* try_put_task_wrapper_impl( receiver<T>* const this_recv, const void *p, bool is_async ) {
-        if ( is_async ) {
-            // This (T) is NOT async and incoming 'A<X> t' IS async
-            // Get data from async_msg
-            const async_msg<filtered_type>& msg = async_helpers< async_msg<filtered_type> >::from_void_ptr(p);
-            task* const new_task = msg.my_storage->subscribe(*this_recv);
-            // finalize() must be called after subscribe() because set() can be called in finalize()
-            // and 'this_recv' client must be subscribed by this moment
-            msg.finalize();
-            return new_task;
-        } else {
-            // Incoming 't' is NOT async
-            return this_recv->try_put_task( from_void_ptr(p) );
-        }
-    }
-};
-
-template< typename T >
-struct async_helpers< T, typename std::enable_if< std::is_base_of<async_msg<typename T::async_msg_data_type>, T>::value >::type > {
-    typedef T async_type;
-    typedef typename T::async_msg_data_type filtered_type;
-
-    static const bool is_async_type = true;
-
-    // Receiver-classes use const interfaces
-    static const void* to_void_ptr(const T& t) {
-        return static_cast<const void*>( &static_cast<const async_msg<filtered_type>&>(t) );
-    }
-
-    static void* to_void_ptr(T& t) {
-        return static_cast<void*>( &static_cast<async_msg<filtered_type>&>(t) );
-    }
-
-    // Sender-classes use non-const interfaces
-    static const T& from_void_ptr(const void* p) {
-        return *static_cast<const T*>( static_cast<const async_msg<filtered_type>*>(p) );
-    }
-
-    static T& from_void_ptr(void* p) {
-        return *static_cast<T*>( static_cast<async_msg<filtered_type>*>(p) );
-    }
-
-    // Used in receiver<T> class
-    static task* try_put_task_wrapper_impl(receiver<T>* const this_recv, const void *p, bool is_async) {
-        if ( is_async ) {
-            // Both are async
-            return this_recv->try_put_task( from_void_ptr(p) );
-        } else {
-            // This (T) is async and incoming 'X t' is NOT async
-            // Create async_msg for X
-            const filtered_type& t = async_helpers<filtered_type>::from_void_ptr(p);
-            const T msg(t);
-            return this_recv->try_put_task(msg);
-        }
-    }
-};
-
-template <typename T>
-class async_storage {
-public:
-    typedef receiver<T> async_storage_client;
-
-    async_storage() { my_data_ready.store<tbb::relaxed>(false); }
-
-    template<typename C>
-    async_storage(C&& data) : my_data( std::forward<C>(data) ) {
-        using namespace tbb::internal;
-        __TBB_STATIC_ASSERT( (is_same_type<typename strip<C>::type, typename strip<T>::type>::value), "incoming type must be T" );
-
-        my_data_ready.store<tbb::relaxed>(true);
-    }
-
-    template<typename C>
-    bool set(C&& data) {
-        using namespace tbb::internal;
-        __TBB_STATIC_ASSERT( (is_same_type<typename strip<C>::type, typename strip<T>::type>::value), "incoming type must be T" );
-
-        {
-            tbb::spin_mutex::scoped_lock locker(my_mutex);
-
-            if (my_data_ready.load<tbb::relaxed>()) {
-                __TBB_ASSERT(false, "double set() call");
-                return false;
-            }
-
-            my_data = std::forward<C>(data);
-            my_data_ready.store<tbb::release>(true);
-        }
-
-        // Thread sync is on my_data_ready flag
-        for (typename subscriber_list_type::iterator it = my_clients.begin(); it != my_clients.end(); ++it) {
-            (*it)->try_put(my_data);
-        }
-
-        return true;
-    }
-
-    task* subscribe(async_storage_client& client) {
-        if (! my_data_ready.load<tbb::acquire>())
-        {
-            tbb::spin_mutex::scoped_lock locker(my_mutex);
-
-            if (! my_data_ready.load<tbb::relaxed>()) {
-#if TBB_USE_ASSERT
-                for (typename subscriber_list_type::iterator it = my_clients.begin(); it != my_clients.end(); ++it) {
-                    __TBB_ASSERT(*it != &client, "unexpected double subscription");
-                }
-#endif // TBB_USE_ASSERT
-
-                // Subscribe
-                my_clients.push_back(&client);
-                return SUCCESSFULLY_ENQUEUED;
-            }
-        }
-
-        __TBB_ASSERT(my_data_ready.load<tbb::relaxed>(), "data is NOT ready");
-        return client.try_put_task(my_data);
-    }
-
-private:
-    tbb::spin_mutex my_mutex;
-
-    tbb::atomic<bool> my_data_ready;
-    T my_data;
-
-    typedef std::vector<async_storage_client*> subscriber_list_type;
-    subscriber_list_type my_clients;
-};
-
-} // namespace internal
-
-template <typename T>
-class async_msg {
-    template< typename > friend class receiver;
-    template< typename, typename > friend struct internal::async_helpers;
-public:
-    typedef T async_msg_data_type;
-
-    async_msg() : my_storage(std::make_shared< internal::async_storage<T> >()) {}
-
-    async_msg(const T& t) : my_storage(std::make_shared< internal::async_storage<T> >(t)) {}
-
-    async_msg(T&& t) : my_storage(std::make_shared< internal::async_storage<T> >( std::move(t) )) {}
-
-    virtual ~async_msg() {}
-
-    void set(const T& t) {
-        my_storage->set(t);
-    }
-
-    void set(T&& t) {
-        my_storage->set( std::move(t) );
-    }
-
-protected:
-    // Can be overridden in derived class to inform that 
-    // async calculation chain is over
-    virtual void finalize() const {}
-
-private:
-    typedef std::shared_ptr< internal::async_storage<T> > async_storage_ptr;
-    async_storage_ptr my_storage;
-};
-
-#endif  // __TBB__flow_graph_async_msg_impl_H
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_impl.h b/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_impl.h
deleted file mode 100644
index 92278caf..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_impl.h
+++ /dev/null
@@ -1,839 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB__flow_graph_impl_H
-#define __TBB__flow_graph_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-// included in namespace tbb::flow::interfaceX (in flow_graph.h)
-
-namespace internal {
-
-    typedef tbb::internal::uint64_t tag_value;
-
-    using tbb::internal::strip;
-
-    namespace graph_policy_namespace {
-
-        struct rejecting { };
-        struct reserving { };
-        struct queueing  { };
-
-        // K == type of field used for key-matching.  Each tag-matching port will be provided
-        // functor that, given an object accepted by the port, will return the
-        /// field of type K being used for matching.
-        template<typename K, typename KHash=tbb_hash_compare<typename strip<K>::type > >
-        struct key_matching {
-            typedef K key_type;
-            typedef typename strip<K>::type base_key_type;
-            typedef KHash hash_compare_type;
-        };
-
-        // old tag_matching join's new specifier
-        typedef key_matching<tag_value> tag_matching;
-    }
-
-// -------------- function_body containers ----------------------
-
-    //! A functor that takes no input and generates a value of type Output
-    template< typename Output >
-    class source_body : tbb::internal::no_assign {
-    public:
-        virtual ~source_body() {}
-        virtual bool operator()(Output &output) = 0;
-        virtual source_body* clone() = 0;
-    };
-
-    //! The leaf for source_body
-    template< typename Output, typename Body>
-    class source_body_leaf : public source_body<Output> {
-    public:
-        source_body_leaf( const Body &_body ) : body(_body) { }
-        bool operator()(Output &output) __TBB_override { return body( output ); }
-        source_body_leaf* clone() __TBB_override {
-            return new source_body_leaf< Output, Body >(body);
-        }
-        Body get_body() { return body; }
-    private:
-        Body body;
-    };
-
-    //! A functor that takes an Input and generates an Output
-    template< typename Input, typename Output >
-    class function_body : tbb::internal::no_assign {
-    public:
-        virtual ~function_body() {}
-        virtual Output operator()(const Input &input) = 0;
-        virtual function_body* clone() = 0;
-    };
-
-    //! the leaf for function_body
-    template <typename Input, typename Output, typename B>
-    class function_body_leaf : public function_body< Input, Output > {
-    public:
-        function_body_leaf( const B &_body ) : body(_body) { }
-        Output operator()(const Input &i) __TBB_override { return body(i); }
-        B get_body() { return body; }
-        function_body_leaf* clone() __TBB_override {
-            return new function_body_leaf< Input, Output, B >(body);
-        }
-    private:
-        B body;
-    };
-
-    //! the leaf for function_body specialized for Input and output of continue_msg
-    template <typename B>
-    class function_body_leaf< continue_msg, continue_msg, B> : public function_body< continue_msg, continue_msg > {
-    public:
-        function_body_leaf( const B &_body ) : body(_body) { }
-        continue_msg operator()( const continue_msg &i ) __TBB_override {
-            body(i);
-            return i;
-        }
-        B get_body() { return body; }
-        function_body_leaf* clone() __TBB_override {
-           return new function_body_leaf< continue_msg, continue_msg, B >(body);
-        }
-    private:
-        B body;
-    };
-
-    //! the leaf for function_body specialized for Output of continue_msg
-    template <typename Input, typename B>
-    class function_body_leaf< Input, continue_msg, B> : public function_body< Input, continue_msg > {
-    public:
-        function_body_leaf( const B &_body ) : body(_body) { }
-        continue_msg operator()(const Input &i) __TBB_override {
-            body(i);
-            return continue_msg();
-        }
-        B get_body() { return body; }
-        function_body_leaf* clone() __TBB_override {
-            return new function_body_leaf< Input, continue_msg, B >(body);
-        }
-    private:
-        B body;
-    };
-
-    //! the leaf for function_body specialized for Input of continue_msg
-    template <typename Output, typename B>
-    class function_body_leaf< continue_msg, Output, B > : public function_body< continue_msg, Output > {
-    public:
-        function_body_leaf( const B &_body ) : body(_body) { }
-        Output operator()(const continue_msg &i) __TBB_override {
-            return body(i);
-        }
-        B get_body() { return body; }
-        function_body_leaf* clone() __TBB_override {
-            return new function_body_leaf< continue_msg, Output, B >(body);
-        }
-    private:
-        B body;
-    };
-
-    //! function_body that takes an Input and a set of output ports
-    template<typename Input, typename OutputSet>
-    class multifunction_body : tbb::internal::no_assign {
-    public:
-        virtual ~multifunction_body () {}
-        virtual void operator()(const Input &/* input*/, OutputSet &/*oset*/) = 0;
-        virtual multifunction_body* clone() = 0;
-        virtual void* get_body_ptr() = 0;
-    };
-
-    //! leaf for multifunction.  OutputSet can be a std::tuple or a vector.
-    template<typename Input, typename OutputSet, typename B >
-    class multifunction_body_leaf : public multifunction_body<Input, OutputSet> {
-    public:
-        multifunction_body_leaf(const B &_body) : body(_body) { }
-        void operator()(const Input &input, OutputSet &oset) __TBB_override {
-            body(input, oset); // body may explicitly put() to one or more of oset.
-        }
-        void* get_body_ptr() __TBB_override { return &body; }
-        multifunction_body_leaf* clone() __TBB_override {
-            return new multifunction_body_leaf<Input, OutputSet,B>(body);
-        }
-
-    private:
-        B body;
-    };
-
-// ------ function bodies for hash_buffers and key-matching joins.
-
-template<typename Input, typename Output>
-class type_to_key_function_body : tbb::internal::no_assign {
-    public:
-        virtual ~type_to_key_function_body() {}
-        virtual Output operator()(const Input &input) = 0;  // returns an Output
-        virtual type_to_key_function_body* clone() = 0;
-};
-
-// specialization for ref output
-template<typename Input, typename Output>
-class type_to_key_function_body<Input,Output&> : tbb::internal::no_assign {
-    public:
-        virtual ~type_to_key_function_body() {}
-        virtual const Output & operator()(const Input &input) = 0;  // returns a const Output&
-        virtual type_to_key_function_body* clone() = 0;
-};
-
-template <typename Input, typename Output, typename B>
-class type_to_key_function_body_leaf : public type_to_key_function_body<Input, Output> {
-public:
-    type_to_key_function_body_leaf( const B &_body ) : body(_body) { }
-    Output operator()(const Input &i) __TBB_override { return body(i); }
-    B get_body() { return body; }
-    type_to_key_function_body_leaf* clone() __TBB_override {
-        return new type_to_key_function_body_leaf< Input, Output, B>(body);
-    }
-private:
-    B body;
-};
-
-template <typename Input, typename Output, typename B>
-class type_to_key_function_body_leaf<Input,Output&,B> : public type_to_key_function_body< Input, Output&> {
-public:
-    type_to_key_function_body_leaf( const B &_body ) : body(_body) { }
-    const Output& operator()(const Input &i) __TBB_override {
-        return body(i);
-    }
-    B get_body() { return body; }
-    type_to_key_function_body_leaf* clone() __TBB_override {
-        return new type_to_key_function_body_leaf< Input, Output&, B>(body);
-    }
-private:
-    B body;
-};
-
-// --------------------------- end of function_body containers ------------------------
-
-// --------------------------- node task bodies ---------------------------------------
-
-    //! A task that calls a node's forward_task function
-    template< typename NodeType >
-    class forward_task_bypass : public task {
-
-        NodeType &my_node;
-
-    public:
-
-        forward_task_bypass( NodeType &n ) : my_node(n) {}
-
-        task *execute() __TBB_override {
-            task * new_task = my_node.forward_task();
-            if (new_task == SUCCESSFULLY_ENQUEUED) new_task = NULL;
-            return new_task;
-        }
-    };
-
-    //! A task that calls a node's apply_body_bypass function, passing in an input of type Input
-    //  return the task* unless it is SUCCESSFULLY_ENQUEUED, in which case return NULL
-    template< typename NodeType, typename Input >
-    class apply_body_task_bypass : public task {
-
-        NodeType &my_node;
-        Input my_input;
-
-    public:
-
-        apply_body_task_bypass( NodeType &n, const Input &i ) : my_node(n), my_input(i) {}
-
-        task *execute() __TBB_override {
-            task * next_task = my_node.apply_body_bypass( my_input );
-            if(next_task == SUCCESSFULLY_ENQUEUED) next_task = NULL;
-            return next_task;
-        }
-    };
-
-    //! A task that calls a node's apply_body_bypass function with no input
-    template< typename NodeType >
-    class source_task_bypass : public task {
-
-        NodeType &my_node;
-
-    public:
-
-        source_task_bypass( NodeType &n ) : my_node(n) {}
-
-        task *execute() __TBB_override {
-            task *new_task = my_node.apply_body_bypass( );
-            if(new_task == SUCCESSFULLY_ENQUEUED) return NULL;
-            return new_task;
-        }
-    };
-
-// ------------------------ end of node task bodies -----------------------------------
-
-    //! An empty functor that takes an Input and returns a default constructed Output
-    template< typename Input, typename Output >
-    struct empty_body {
-       Output operator()( const Input & ) const { return Output(); }
-    };
-
-    //! A node_cache maintains a std::queue of elements of type T.  Each operation is protected by a lock.
-    template< typename T, typename M=spin_mutex >
-    class node_cache {
-        public:
-
-        typedef size_t size_type;
-
-        bool empty() {
-            typename mutex_type::scoped_lock lock( my_mutex );
-            return internal_empty();
-        }
-
-        void add( T &n ) {
-            typename mutex_type::scoped_lock lock( my_mutex );
-            internal_push(n);
-        }
-
-        void remove( T &n ) {
-            typename mutex_type::scoped_lock lock( my_mutex );
-            for ( size_t i = internal_size(); i != 0; --i ) {
-                T &s = internal_pop();
-                if ( &s == &n )  return;  // only remove one predecessor per request
-                internal_push(s);
-            }
-        }
-
-        void clear() {
-            while( !my_q.empty()) (void)my_q.pop();
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-            my_built_predecessors.clear();
-#endif
-        }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        typedef edge_container<T> built_predecessors_type;
-        built_predecessors_type &built_predecessors() { return my_built_predecessors; }
-
-        typedef typename edge_container<T>::edge_list_type predecessor_list_type;
-        void internal_add_built_predecessor( T &n ) {
-            typename mutex_type::scoped_lock lock( my_mutex );
-            my_built_predecessors.add_edge(n);
-        }
-
-        void internal_delete_built_predecessor( T &n ) {
-            typename mutex_type::scoped_lock lock( my_mutex );
-            my_built_predecessors.delete_edge(n);
-        }
-
-        void copy_predecessors( predecessor_list_type &v) {
-            typename mutex_type::scoped_lock lock( my_mutex );
-            my_built_predecessors.copy_edges(v);
-        }
-
-        size_t predecessor_count() {
-            typename mutex_type::scoped_lock lock(my_mutex);
-            return (size_t)(my_built_predecessors.edge_count());
-        }
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-    protected:
-
-        typedef M mutex_type;
-        mutex_type my_mutex;
-        std::queue< T * > my_q;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        built_predecessors_type my_built_predecessors;
-#endif
-
-        // Assumes lock is held
-        inline bool internal_empty( )  {
-            return my_q.empty();
-        }
-
-        // Assumes lock is held
-        inline size_type internal_size( )  {
-            return my_q.size();
-        }
-
-        // Assumes lock is held
-        inline void internal_push( T &n )  {
-            my_q.push(&n);
-        }
-
-        // Assumes lock is held
-        inline T &internal_pop() {
-            T *v = my_q.front();
-            my_q.pop();
-            return *v;
-        }
-
-    };
-
-    //! A cache of predecessors that only supports try_get
-    template< typename T, typename M=spin_mutex >
-#if __TBB_PREVIEW_ASYNC_MSG
-    // TODO: make predecessor_cache type T-independent when async_msg becomes regular feature
-    class predecessor_cache : public node_cache< untyped_sender, M > {
-#else
-    class predecessor_cache : public node_cache< sender<T>, M > {
-#endif // __TBB_PREVIEW_ASYNC_MSG
-    public:
-        typedef M mutex_type;
-        typedef T output_type;
-#if __TBB_PREVIEW_ASYNC_MSG
-        typedef untyped_sender predecessor_type;
-        typedef untyped_receiver successor_type;
-#else
-        typedef sender<output_type> predecessor_type;
-        typedef receiver<output_type> successor_type;
-#endif // __TBB_PREVIEW_ASYNC_MSG
-
-        predecessor_cache( ) : my_owner( NULL ) { }
-
-        void set_owner( successor_type *owner ) { my_owner = owner; }
-
-        bool get_item( output_type &v ) {
-
-            bool msg = false;
-
-            do {
-                predecessor_type *src;
-                {
-                    typename mutex_type::scoped_lock lock(this->my_mutex);
-                    if ( this->internal_empty() ) {
-                        break;
-                    }
-                    src = &this->internal_pop();
-                }
-
-                // Try to get from this sender
-                msg = src->try_get( v );
-
-                if (msg == false) {
-                    // Relinquish ownership of the edge
-                    if (my_owner)
-                        src->register_successor( *my_owner );
-                } else {
-                    // Retain ownership of the edge
-                    this->add(*src);
-                }
-            } while ( msg == false );
-            return msg;
-        }
-
-        // If we are removing arcs (rf_clear_edges), call clear() rather than reset().
-        void reset() {
-            if (my_owner) {
-                for(;;) {
-                    predecessor_type *src;
-                    {
-                        if (this->internal_empty()) break;
-                        src = &this->internal_pop();
-                    }
-                    src->register_successor( *my_owner );
-                }
-            }
-        }
-
-    protected:
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        using node_cache< predecessor_type, M >::my_built_predecessors;
-#endif
-        successor_type *my_owner;
-    };
-
-    //! An cache of predecessors that supports requests and reservations
-    // TODO: make reservable_predecessor_cache type T-independent when async_msg becomes regular feature
-    template< typename T, typename M=spin_mutex >
-    class reservable_predecessor_cache : public predecessor_cache< T, M > {
-    public:
-        typedef M mutex_type;
-        typedef T output_type;
-#if __TBB_PREVIEW_ASYNC_MSG
-        typedef untyped_sender predecessor_type;
-        typedef untyped_receiver successor_type;
-#else
-        typedef sender<T> predecessor_type;
-        typedef receiver<T> successor_type;
-#endif // __TBB_PREVIEW_ASYNC_MSG
-
-        reservable_predecessor_cache( ) : reserved_src(NULL) { }
-
-        bool
-        try_reserve( output_type &v ) {
-            bool msg = false;
-
-            do {
-                {
-                    typename mutex_type::scoped_lock lock(this->my_mutex);
-                    if ( reserved_src || this->internal_empty() )
-                        return false;
-
-                    reserved_src = &this->internal_pop();
-                }
-
-                // Try to get from this sender
-                msg = reserved_src->try_reserve( v );
-
-                if (msg == false) {
-                    typename mutex_type::scoped_lock lock(this->my_mutex);
-                    // Relinquish ownership of the edge
-                    reserved_src->register_successor( *this->my_owner );
-                    reserved_src = NULL;
-                } else {
-                    // Retain ownership of the edge
-                    this->add( *reserved_src );
-                }
-            } while ( msg == false );
-
-            return msg;
-        }
-
-        bool
-        try_release( ) {
-            reserved_src->try_release( );
-            reserved_src = NULL;
-            return true;
-        }
-
-        bool
-        try_consume( ) {
-            reserved_src->try_consume( );
-            reserved_src = NULL;
-            return true;
-        }
-
-        void reset( ) {
-            reserved_src = NULL;
-            predecessor_cache<T,M>::reset( );
-        }
-
-        void clear() {
-            reserved_src = NULL;
-            predecessor_cache<T,M>::clear();
-        }
-
-    private:
-        predecessor_type *reserved_src;
-    };
-
-
-    //! An abstract cache of successors
-    // TODO: make successor_cache type T-independent when async_msg becomes regular feature
-    template<typename T, typename M=spin_rw_mutex >
-    class successor_cache : tbb::internal::no_copy {
-    protected:
-
-        typedef M mutex_type;
-        mutex_type my_mutex;
-
-#if __TBB_PREVIEW_ASYNC_MSG
-        typedef untyped_receiver successor_type;
-        typedef untyped_receiver *pointer_type;
-        typedef untyped_sender owner_type;
-#else
-        typedef receiver<T> successor_type;
-        typedef receiver<T> *pointer_type;
-        typedef sender<T> owner_type;
-#endif // __TBB_PREVIEW_ASYNC_MSG
-        typedef std::list< pointer_type > successors_type;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        edge_container<successor_type> my_built_successors;
-#endif
-        successors_type my_successors;
-
-        owner_type *my_owner;
-
-    public:
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        typedef typename edge_container<successor_type>::edge_list_type successor_list_type;
-
-        edge_container<successor_type> &built_successors() { return my_built_successors; }
-
-        void internal_add_built_successor( successor_type &r) {
-            typename mutex_type::scoped_lock l(my_mutex, true);
-            my_built_successors.add_edge( r );
-        }
-
-        void internal_delete_built_successor( successor_type &r) {
-            typename mutex_type::scoped_lock l(my_mutex, true);
-            my_built_successors.delete_edge(r);
-        }
-
-        void copy_successors( successor_list_type &v) {
-            typename mutex_type::scoped_lock l(my_mutex, false);
-            my_built_successors.copy_edges(v);
-        }
-
-        size_t successor_count() {
-            typename mutex_type::scoped_lock l(my_mutex,false);
-            return my_built_successors.edge_count();
-        }
-
-#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-        successor_cache( ) : my_owner(NULL) {}
-
-        void set_owner( owner_type *owner ) { my_owner = owner; }
-
-        virtual ~successor_cache() {}
-
-        void register_successor( successor_type &r ) {
-            typename mutex_type::scoped_lock l(my_mutex, true);
-            my_successors.push_back( &r );
-        }
-
-        void remove_successor( successor_type &r ) {
-            typename mutex_type::scoped_lock l(my_mutex, true);
-            for ( typename successors_type::iterator i = my_successors.begin();
-                  i != my_successors.end(); ++i ) {
-                if ( *i == & r ) {
-                    my_successors.erase(i);
-                    break;
-                }
-            }
-        }
-
-        bool empty() {
-            typename mutex_type::scoped_lock l(my_mutex, false);
-            return my_successors.empty();
-        }
-
-        void clear() {
-            my_successors.clear();
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-            my_built_successors.clear();
-#endif
-        }
-
-#if !__TBB_PREVIEW_ASYNC_MSG
-        virtual task * try_put_task( const T &t ) = 0;
-#endif // __TBB_PREVIEW_ASYNC_MSG
-     };  // successor_cache<T>
-
-    //! An abstract cache of successors, specialized to continue_msg
-    template<>
-    class successor_cache< continue_msg > : tbb::internal::no_copy {
-    protected:
-
-        typedef spin_rw_mutex mutex_type;
-        mutex_type my_mutex;
-
-#if __TBB_PREVIEW_ASYNC_MSG
-        typedef untyped_receiver successor_type;
-        typedef untyped_receiver *pointer_type;
-#else
-        typedef receiver<continue_msg> successor_type;
-        typedef receiver<continue_msg> *pointer_type;
-#endif // __TBB_PREVIEW_ASYNC_MSG
-        typedef std::list< pointer_type > successors_type;
-        successors_type my_successors;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        edge_container<successor_type> my_built_successors;
-        typedef edge_container<successor_type>::edge_list_type successor_list_type;
-#endif
-
-        sender<continue_msg> *my_owner;
-
-    public:
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-
-        edge_container<successor_type> &built_successors() { return my_built_successors; }
-
-        void internal_add_built_successor( successor_type &r) {
-            mutex_type::scoped_lock l(my_mutex, true);
-            my_built_successors.add_edge( r );
-        }
-
-        void internal_delete_built_successor( successor_type &r) {
-            mutex_type::scoped_lock l(my_mutex, true);
-            my_built_successors.delete_edge(r);
-        }
-
-        void copy_successors( successor_list_type &v) {
-            mutex_type::scoped_lock l(my_mutex, false);
-            my_built_successors.copy_edges(v);
-        }
-
-        size_t successor_count() {
-            mutex_type::scoped_lock l(my_mutex,false);
-            return my_built_successors.edge_count();
-        }
-
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-        successor_cache( ) : my_owner(NULL) {}
-
-        void set_owner( sender<continue_msg> *owner ) { my_owner = owner; }
-
-        virtual ~successor_cache() {}
-
-        void register_successor( successor_type &r ) {
-            mutex_type::scoped_lock l(my_mutex, true);
-            my_successors.push_back( &r );
-            if ( my_owner && r.is_continue_receiver() ) {
-                r.register_predecessor( *my_owner );
-            }
-        }
-
-        void remove_successor( successor_type &r ) {
-            mutex_type::scoped_lock l(my_mutex, true);
-            for ( successors_type::iterator i = my_successors.begin();
-                  i != my_successors.end(); ++i ) {
-                if ( *i == & r ) {
-                    // TODO: Check if we need to test for continue_receiver before
-                    // removing from r.
-                    if ( my_owner )
-                        r.remove_predecessor( *my_owner );
-                    my_successors.erase(i);
-                    break;
-                }
-            }
-        }
-
-        bool empty() {
-            mutex_type::scoped_lock l(my_mutex, false);
-            return my_successors.empty();
-        }
-
-        void clear() {
-            my_successors.clear();
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-            my_built_successors.clear();
-#endif
-        }
-
-#if !__TBB_PREVIEW_ASYNC_MSG
-        virtual task * try_put_task( const continue_msg &t ) = 0;
-#endif // __TBB_PREVIEW_ASYNC_MSG
-
-    };  // successor_cache< continue_msg >
-
-    //! A cache of successors that are broadcast to
-    // TODO: make broadcast_cache type T-independent when async_msg becomes regular feature
-    template<typename T, typename M=spin_rw_mutex>
-    class broadcast_cache : public successor_cache<T, M> {
-        typedef M mutex_type;
-        typedef typename successor_cache<T,M>::successors_type successors_type;
-
-    public:
-
-        broadcast_cache( ) {}
-
-        // as above, but call try_put_task instead, and return the last task we received (if any)
-#if __TBB_PREVIEW_ASYNC_MSG
-        template<typename X>
-        task * try_put_task( const X &t ) {
-#else
-        task * try_put_task( const T &t ) __TBB_override {
-#endif // __TBB_PREVIEW_ASYNC_MSG
-            task * last_task = NULL;
-            bool upgraded = true;
-            typename mutex_type::scoped_lock l(this->my_mutex, upgraded);
-            typename successors_type::iterator i = this->my_successors.begin();
-            while ( i != this->my_successors.end() ) {
-                task *new_task = (*i)->try_put_task(t);
-                last_task = combine_tasks(last_task, new_task);  // enqueue if necessary
-                if(new_task) {
-                    ++i;
-                }
-                else {  // failed
-                    if ( (*i)->register_predecessor(*this->my_owner) ) {
-                        if (!upgraded) {
-                            l.upgrade_to_writer();
-                            upgraded = true;
-                        }
-                        i = this->my_successors.erase(i);
-                    } else {
-                        ++i;
-                    }
-                }
-            }
-            return last_task;
-        }
-
-    };
-
-    //! A cache of successors that are put in a round-robin fashion
-    // TODO: make round_robin_cache type T-independent when async_msg becomes regular feature
-    template<typename T, typename M=spin_rw_mutex >
-    class round_robin_cache : public successor_cache<T, M> {
-        typedef size_t size_type;
-        typedef M mutex_type;
-        typedef typename successor_cache<T,M>::successors_type successors_type;
-
-    public:
-
-        round_robin_cache( ) {}
-
-        size_type size() {
-            typename mutex_type::scoped_lock l(this->my_mutex, false);
-            return this->my_successors.size();
-        }
-
-#if __TBB_PREVIEW_ASYNC_MSG
-        template<typename X>
-        task * try_put_task( const X &t ) {
-#else
-        task *try_put_task( const T &t ) __TBB_override {
-#endif // __TBB_PREVIEW_ASYNC_MSG
-            bool upgraded = true;
-            typename mutex_type::scoped_lock l(this->my_mutex, upgraded);
-            typename successors_type::iterator i = this->my_successors.begin();
-            while ( i != this->my_successors.end() ) {
-                task *new_task = (*i)->try_put_task(t);
-                if ( new_task ) {
-                    return new_task;
-                } else {
-                   if ( (*i)->register_predecessor(*this->my_owner) ) {
-                       if (!upgraded) {
-                           l.upgrade_to_writer();
-                           upgraded = true;
-                       }
-                       i = this->my_successors.erase(i);
-                   }
-                   else {
-                       ++i;
-                   }
-                }
-            }
-            return NULL;
-        }
-    };
-
-    template<typename T>
-    class decrementer : public continue_receiver, tbb::internal::no_copy {
-
-        T *my_node;
-
-        task *execute() __TBB_override {
-            return my_node->decrement_counter();
-        }
-
-    public:
-
-        typedef continue_msg input_type;
-        typedef continue_msg output_type;
-        decrementer( int number_of_predecessors = 0 ) : continue_receiver( number_of_predecessors ) { }
-        void set_owner( T *node ) { my_node = node; }
-    };
-
-}
-
-#endif // __TBB__flow_graph_impl_H
-
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_indexer_impl.h b/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_indexer_impl.h
deleted file mode 100644
index 1fc6690c..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_indexer_impl.h
+++ /dev/null
@@ -1,478 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB__flow_graph_indexer_impl_H
-#define __TBB__flow_graph_indexer_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#include "_flow_graph_types_impl.h"
-
-namespace internal {
-
-    // Output of the indexer_node is a tbb::flow::tagged_msg, and will be of
-    // the form  tagged_msg<tag, result>
-    // where the value of tag will indicate which result was put to the
-    // successor.
-
-    template<typename IndexerNodeBaseType, typename T, size_t K>
-    task* do_try_put(const T &v, void *p) {
-        typename IndexerNodeBaseType::output_type o(K, v);
-        return reinterpret_cast<IndexerNodeBaseType *>(p)->try_put_task(&o);
-    }
-
-    template<typename TupleTypes,int N>
-    struct indexer_helper {
-        template<typename IndexerNodeBaseType, typename PortTuple>
-        static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p) {
-            typedef typename tuple_element<N-1, TupleTypes>::type T;
-            task *(*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, N-1>;
-            tbb::flow::get<N-1>(my_input).set_up(p, indexer_node_put_task);
-            indexer_helper<TupleTypes,N-1>::template set_indexer_node_pointer<IndexerNodeBaseType,PortTuple>(my_input, p);
-        }
-        template<typename InputTuple>
-        static inline void reset_inputs(InputTuple &my_input, reset_flags f) {
-            indexer_helper<TupleTypes,N-1>::reset_inputs(my_input, f);
-            tbb::flow::get<N-1>(my_input).reset_receiver(f);
-        }
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        template<typename InputTuple>
-        static inline void extract(InputTuple &my_input) {
-            indexer_helper<TupleTypes,N-1>::extract(my_input);
-            tbb::flow::get<N-1>(my_input).extract_receiver();
-        }
-#endif
-    };
-
-    template<typename TupleTypes>
-    struct indexer_helper<TupleTypes,1> {
-        template<typename IndexerNodeBaseType, typename PortTuple>
-        static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p) {
-            typedef typename tuple_element<0, TupleTypes>::type T;
-            task *(*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, 0>;
-            tbb::flow::get<0>(my_input).set_up(p, indexer_node_put_task);
-        }
-        template<typename InputTuple>
-        static inline void reset_inputs(InputTuple &my_input, reset_flags f) {
-            tbb::flow::get<0>(my_input).reset_receiver(f);
-        }
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        template<typename InputTuple>
-        static inline void extract(InputTuple &my_input) {
-            tbb::flow::get<0>(my_input).extract_receiver();
-        }
-#endif
-    };
-
-    template<typename T>
-    class indexer_input_port : public receiver<T> {
-    private:
-        void* my_indexer_ptr;
-        typedef task* (* forward_function_ptr)(T const &, void* );
-        forward_function_ptr my_try_put_task;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        spin_mutex my_pred_mutex;
-        typedef typename receiver<T>::built_predecessors_type built_predecessors_type;
-        built_predecessors_type my_built_predecessors;
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-    public:
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        indexer_input_port() : my_pred_mutex() {}
-        indexer_input_port( const indexer_input_port & /*other*/ ) : receiver<T>(), my_pred_mutex() {
-        }
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-        void set_up(void *p, forward_function_ptr f) {
-                my_indexer_ptr = p;
-                my_try_put_task = f;
-            }
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        typedef typename receiver<T>::predecessor_list_type predecessor_list_type;
-        typedef typename receiver<T>::predecessor_type predecessor_type;
-
-        built_predecessors_type &built_predecessors() __TBB_override { return my_built_predecessors; }
-
-        size_t predecessor_count() __TBB_override {
-            spin_mutex::scoped_lock l(my_pred_mutex);
-            return my_built_predecessors.edge_count();
-        }
-        void internal_add_built_predecessor(predecessor_type &p) __TBB_override {
-            spin_mutex::scoped_lock l(my_pred_mutex);
-            my_built_predecessors.add_edge(p);
-        }
-        void internal_delete_built_predecessor(predecessor_type &p) __TBB_override {
-            spin_mutex::scoped_lock l(my_pred_mutex);
-            my_built_predecessors.delete_edge(p);
-        }
-        void copy_predecessors( predecessor_list_type &v) __TBB_override {
-            spin_mutex::scoped_lock l(my_pred_mutex);
-            my_built_predecessors.copy_edges(v);
-        }
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-    protected:
-        template< typename R, typename B > friend class run_and_put_task;
-        template<typename X, typename Y> friend class internal::broadcast_cache;
-        template<typename X, typename Y> friend class internal::round_robin_cache;
-        task *try_put_task(const T &v) __TBB_override {
-            return my_try_put_task(v, my_indexer_ptr);
-        }
-
-    public:
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        void reset_receiver(reset_flags f) __TBB_override { if(f&rf_clear_edges) my_built_predecessors.clear(); }
-#else
-        void reset_receiver(reset_flags /*f*/) __TBB_override { }
-#endif
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        void extract_receiver() { my_built_predecessors.receiver_extract(*this); }
-#endif
-    };
-
-    template<typename InputTuple, typename OutputType, typename StructTypes>
-    class indexer_node_FE {
-    public:
-        static const int N = tbb::flow::tuple_size<InputTuple>::value;
-        typedef OutputType output_type;
-        typedef InputTuple input_type;
-
-        // Some versions of Intel C++ compiler fail to generate an implicit constructor for the class which has std::tuple as a member.
-        indexer_node_FE() : my_inputs() {}
-
-        input_type &input_ports() { return my_inputs; }
-    protected:
-        input_type my_inputs;
-    };
-
-    //! indexer_node_base
-    template<typename InputTuple, typename OutputType, typename StructTypes>
-    class indexer_node_base : public graph_node, public indexer_node_FE<InputTuple, OutputType,StructTypes>,
-                           public sender<OutputType> {
-    protected:
-       using graph_node::my_graph;
-    public:
-        static const size_t N = tbb::flow::tuple_size<InputTuple>::value;
-        typedef OutputType output_type;
-        typedef StructTypes tuple_types;
-        typedef typename sender<output_type>::successor_type successor_type;
-        typedef indexer_node_FE<InputTuple, output_type,StructTypes> input_ports_type;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        typedef typename sender<output_type>::built_successors_type built_successors_type;
-        typedef typename sender<output_type>::successor_list_type successor_list_type;
-#endif
-
-    private:
-        // ----------- Aggregator ------------
-        enum op_type { reg_succ, rem_succ, try__put_task
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-            , add_blt_succ, del_blt_succ,
-             blt_succ_cnt, blt_succ_cpy
-#endif
-        };
-        typedef indexer_node_base<InputTuple,output_type,StructTypes> class_type;
-
-        class indexer_node_base_operation : public aggregated_operation<indexer_node_base_operation> {
-        public:
-            char type;
-            union {
-                output_type const *my_arg;
-                successor_type *my_succ;
-                task *bypass_t;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-                size_t cnt_val;
-                successor_list_type *succv;
-#endif
-            };
-            indexer_node_base_operation(const output_type* e, op_type t) :
-                type(char(t)), my_arg(e) {}
-            indexer_node_base_operation(const successor_type &s, op_type t) : type(char(t)),
-                my_succ(const_cast<successor_type *>(&s)) {}
-            indexer_node_base_operation(op_type t) : type(char(t)) {}
-        };
-
-        typedef internal::aggregating_functor<class_type, indexer_node_base_operation> handler_type;
-        friend class internal::aggregating_functor<class_type, indexer_node_base_operation>;
-        aggregator<handler_type, indexer_node_base_operation> my_aggregator;
-
-        void handle_operations(indexer_node_base_operation* op_list) {
-            indexer_node_base_operation *current;
-            while(op_list) {
-                current = op_list;
-                op_list = op_list->next;
-                switch(current->type) {
-
-                case reg_succ:
-                    my_successors.register_successor(*(current->my_succ));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-
-                case rem_succ:
-                    my_successors.remove_successor(*(current->my_succ));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case try__put_task: {
-                        current->bypass_t = my_successors.try_put_task(*(current->my_arg));
-                        __TBB_store_with_release(current->status, SUCCEEDED);  // return of try_put_task actual return value
-                    }
-                    break;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-                case add_blt_succ:
-                    my_successors.internal_add_built_successor(*(current->my_succ));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case del_blt_succ:
-                    my_successors.internal_delete_built_successor(*(current->my_succ));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case blt_succ_cnt:
-                    current->cnt_val = my_successors.successor_count();
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case blt_succ_cpy:
-                    my_successors.copy_successors(*(current->succv));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-                }
-            }
-        }
-        // ---------- end aggregator -----------
-    public:
-        indexer_node_base(graph& g) : graph_node(g), input_ports_type() {
-            indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this);
-            my_successors.set_owner(this);
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        indexer_node_base(const indexer_node_base& other) : graph_node(other.my_graph), input_ports_type(), sender<output_type>() {
-            indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this);
-            my_successors.set_owner(this);
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        bool register_successor(successor_type &r) __TBB_override {
-            indexer_node_base_operation op_data(r, reg_succ);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        bool remove_successor( successor_type &r) __TBB_override {
-            indexer_node_base_operation op_data(r, rem_succ);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        task * try_put_task(output_type const *v) { // not a virtual method in this class
-            indexer_node_base_operation op_data(v, try__put_task);
-            my_aggregator.execute(&op_data);
-            return op_data.bypass_t;
-        }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-
-        built_successors_type &built_successors() __TBB_override { return my_successors.built_successors(); }
-
-        void internal_add_built_successor( successor_type &r) __TBB_override {
-            indexer_node_base_operation op_data(r, add_blt_succ);
-            my_aggregator.execute(&op_data);
-        }
-
-        void internal_delete_built_successor( successor_type &r) __TBB_override {
-            indexer_node_base_operation op_data(r, del_blt_succ);
-            my_aggregator.execute(&op_data);
-        }
-
-        size_t successor_count() __TBB_override {
-            indexer_node_base_operation op_data(blt_succ_cnt);
-            my_aggregator.execute(&op_data);
-            return op_data.cnt_val;
-        }
-
-        void copy_successors( successor_list_type &v) __TBB_override {
-            indexer_node_base_operation op_data(blt_succ_cpy);
-            op_data.succv = &v;
-            my_aggregator.execute(&op_data);
-        }
-        void extract() __TBB_override {
-            my_successors.built_successors().sender_extract(*this);
-            indexer_helper<StructTypes,N>::extract(this->my_inputs);
-        }
-#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-    protected:
-        void reset_node(reset_flags f) __TBB_override {
-            if(f & rf_clear_edges) {
-                my_successors.clear();
-                indexer_helper<StructTypes,N>::reset_inputs(this->my_inputs,f);
-            }
-        }
-
-    private:
-        broadcast_cache<output_type, null_rw_mutex> my_successors;
-    };  //indexer_node_base
-
-
-    template<int N, typename InputTuple> struct input_types;
-
-    template<typename InputTuple>
-    struct input_types<1, InputTuple> {
-        typedef typename tuple_element<0, InputTuple>::type first_type;
-        typedef typename internal::tagged_msg<size_t, first_type > type;
-    };
-
-    template<typename InputTuple>
-    struct input_types<2, InputTuple> {
-        typedef typename tuple_element<0, InputTuple>::type first_type;
-        typedef typename tuple_element<1, InputTuple>::type second_type;
-        typedef typename internal::tagged_msg<size_t, first_type, second_type> type;
-    };
-
-    template<typename InputTuple>
-    struct input_types<3, InputTuple> {
-        typedef typename tuple_element<0, InputTuple>::type first_type;
-        typedef typename tuple_element<1, InputTuple>::type second_type;
-        typedef typename tuple_element<2, InputTuple>::type third_type;
-        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type> type;
-    };
-
-    template<typename InputTuple>
-    struct input_types<4, InputTuple> {
-        typedef typename tuple_element<0, InputTuple>::type first_type;
-        typedef typename tuple_element<1, InputTuple>::type second_type;
-        typedef typename tuple_element<2, InputTuple>::type third_type;
-        typedef typename tuple_element<3, InputTuple>::type fourth_type;
-        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
-                                                      fourth_type> type;
-    };
-
-    template<typename InputTuple>
-    struct input_types<5, InputTuple> {
-        typedef typename tuple_element<0, InputTuple>::type first_type;
-        typedef typename tuple_element<1, InputTuple>::type second_type;
-        typedef typename tuple_element<2, InputTuple>::type third_type;
-        typedef typename tuple_element<3, InputTuple>::type fourth_type;
-        typedef typename tuple_element<4, InputTuple>::type fifth_type;
-        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
-                                                      fourth_type, fifth_type> type;
-    };
-
-    template<typename InputTuple>
-    struct input_types<6, InputTuple> {
-        typedef typename tuple_element<0, InputTuple>::type first_type;
-        typedef typename tuple_element<1, InputTuple>::type second_type;
-        typedef typename tuple_element<2, InputTuple>::type third_type;
-        typedef typename tuple_element<3, InputTuple>::type fourth_type;
-        typedef typename tuple_element<4, InputTuple>::type fifth_type;
-        typedef typename tuple_element<5, InputTuple>::type sixth_type;
-        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
-                                                      fourth_type, fifth_type, sixth_type> type;
-    };
-
-    template<typename InputTuple>
-    struct input_types<7, InputTuple> {
-        typedef typename tuple_element<0, InputTuple>::type first_type;
-        typedef typename tuple_element<1, InputTuple>::type second_type;
-        typedef typename tuple_element<2, InputTuple>::type third_type;
-        typedef typename tuple_element<3, InputTuple>::type fourth_type;
-        typedef typename tuple_element<4, InputTuple>::type fifth_type;
-        typedef typename tuple_element<5, InputTuple>::type sixth_type;
-        typedef typename tuple_element<6, InputTuple>::type seventh_type;
-        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
-                                                      fourth_type, fifth_type, sixth_type,
-                                                      seventh_type> type;
-    };
-
-
-    template<typename InputTuple>
-    struct input_types<8, InputTuple> {
-        typedef typename tuple_element<0, InputTuple>::type first_type;
-        typedef typename tuple_element<1, InputTuple>::type second_type;
-        typedef typename tuple_element<2, InputTuple>::type third_type;
-        typedef typename tuple_element<3, InputTuple>::type fourth_type;
-        typedef typename tuple_element<4, InputTuple>::type fifth_type;
-        typedef typename tuple_element<5, InputTuple>::type sixth_type;
-        typedef typename tuple_element<6, InputTuple>::type seventh_type;
-        typedef typename tuple_element<7, InputTuple>::type eighth_type;
-        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
-                                                      fourth_type, fifth_type, sixth_type,
-                                                      seventh_type, eighth_type> type;
-    };
-
-
-    template<typename InputTuple>
-    struct input_types<9, InputTuple> {
-        typedef typename tuple_element<0, InputTuple>::type first_type;
-        typedef typename tuple_element<1, InputTuple>::type second_type;
-        typedef typename tuple_element<2, InputTuple>::type third_type;
-        typedef typename tuple_element<3, InputTuple>::type fourth_type;
-        typedef typename tuple_element<4, InputTuple>::type fifth_type;
-        typedef typename tuple_element<5, InputTuple>::type sixth_type;
-        typedef typename tuple_element<6, InputTuple>::type seventh_type;
-        typedef typename tuple_element<7, InputTuple>::type eighth_type;
-        typedef typename tuple_element<8, InputTuple>::type nineth_type;
-        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
-                                                      fourth_type, fifth_type, sixth_type,
-                                                      seventh_type, eighth_type, nineth_type> type;
-    };
-
-    template<typename InputTuple>
-    struct input_types<10, InputTuple> {
-        typedef typename tuple_element<0, InputTuple>::type first_type;
-        typedef typename tuple_element<1, InputTuple>::type second_type;
-        typedef typename tuple_element<2, InputTuple>::type third_type;
-        typedef typename tuple_element<3, InputTuple>::type fourth_type;
-        typedef typename tuple_element<4, InputTuple>::type fifth_type;
-        typedef typename tuple_element<5, InputTuple>::type sixth_type;
-        typedef typename tuple_element<6, InputTuple>::type seventh_type;
-        typedef typename tuple_element<7, InputTuple>::type eighth_type;
-        typedef typename tuple_element<8, InputTuple>::type nineth_type;
-        typedef typename tuple_element<9, InputTuple>::type tenth_type;
-        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
-                                                      fourth_type, fifth_type, sixth_type,
-                                                      seventh_type, eighth_type, nineth_type,
-                                                      tenth_type> type;
-    };
-
-    // type generators
-    template<typename OutputTuple>
-    struct indexer_types : public input_types<tuple_size<OutputTuple>::value, OutputTuple> {
-        static const int N = tbb::flow::tuple_size<OutputTuple>::value;
-        typedef typename input_types<N, OutputTuple>::type output_type;
-        typedef typename wrap_tuple_elements<N,indexer_input_port,OutputTuple>::type input_ports_type;
-        typedef internal::indexer_node_FE<input_ports_type,output_type,OutputTuple> indexer_FE_type;
-        typedef internal::indexer_node_base<input_ports_type, output_type, OutputTuple> indexer_base_type;
-    };
-
-    template<class OutputTuple>
-    class unfolded_indexer_node : public indexer_types<OutputTuple>::indexer_base_type {
-    public:
-        typedef typename indexer_types<OutputTuple>::input_ports_type input_ports_type;
-        typedef OutputTuple tuple_types;
-        typedef typename indexer_types<OutputTuple>::output_type output_type;
-    private:
-        typedef typename indexer_types<OutputTuple>::indexer_base_type base_type;
-    public:
-        unfolded_indexer_node(graph& g) : base_type(g) {}
-        unfolded_indexer_node(const unfolded_indexer_node &other) : base_type(other) {}
-    };
-
-} /* namespace internal */
-
-#endif  /* __TBB__flow_graph_indexer_impl_H */
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_item_buffer_impl.h b/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_item_buffer_impl.h
deleted file mode 100644
index 85d2686d..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_item_buffer_impl.h
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB__flow_graph_item_buffer_impl_H
-#define __TBB__flow_graph_item_buffer_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#include "tbb/internal/_flow_graph_types_impl.h"  // for aligned_pair
-
-// in namespace tbb::flow::interfaceX (included in _flow_graph_node_impl.h)
-
-    //! Expandable buffer of items.  The possible operations are push, pop,
-    //* tests for empty and so forth.  No mutual exclusion is built in.
-    //* objects are constructed into and explicitly-destroyed.  get_my_item gives
-    // a read-only reference to the item in the buffer.  set_my_item may be called
-    // with either an empty or occupied slot.
-
-    using internal::aligned_pair;
-    using internal::alignment_of;
-
-namespace internal {
-
-    template <typename T, typename A=cache_aligned_allocator<T> >
-    class item_buffer {
-    public:
-        typedef T item_type;
-        enum buffer_item_state { no_item=0, has_item=1, reserved_item=2 };
-    protected:
-        typedef size_t size_type;
-        typedef typename aligned_pair<item_type, buffer_item_state>::type buffer_item_type;
-        typedef typename A::template rebind<buffer_item_type>::other allocator_type;
-
-        buffer_item_type *my_array;
-        size_type my_array_size;
-        static const size_type initial_buffer_size = 4;
-        size_type my_head;
-        size_type my_tail;
-
-        bool buffer_empty() const { return my_head == my_tail; }
-
-        buffer_item_type &item(size_type i) {
-            __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].second))%alignment_of<buffer_item_state>::value),NULL);
-            __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].first))%alignment_of<item_type>::value), NULL);
-            return my_array[i & (my_array_size - 1) ];
-        }
-
-        const buffer_item_type &item(size_type i) const {
-            __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].second))%alignment_of<buffer_item_state>::value), NULL);
-            __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].first))%alignment_of<item_type>::value), NULL);
-            return my_array[i & (my_array_size-1)];
-        }
-
-        bool my_item_valid(size_type i) const { return (i < my_tail) && (i >= my_head) && (item(i).second != no_item); }
-        bool my_item_reserved(size_type i) const { return item(i).second == reserved_item; }
-
-        // object management in buffer
-        const item_type &get_my_item(size_t i) const {
-            __TBB_ASSERT(my_item_valid(i),"attempt to get invalid item");
-            item_type *itm = (tbb::internal::punned_cast<item_type *>(&(item(i).first)));
-            return *(const item_type *)itm;
-        }
-
-        // may be called with an empty slot or a slot that has already been constructed into.
-        void set_my_item(size_t i, const item_type &o) {
-            if(item(i).second != no_item) {
-                destroy_item(i);
-            }
-            new(&(item(i).first)) item_type(o);
-            item(i).second = has_item;
-        }
-
-        // destructively-fetch an object from the buffer
-        void fetch_item(size_t i, item_type &o) {
-            __TBB_ASSERT(my_item_valid(i), "Trying to fetch an empty slot");
-            o = get_my_item(i);  // could have std::move assign semantics
-            destroy_item(i);
-        }
-
-        // move an existing item from one slot to another.  The moved-to slot must be unoccupied,
-        // the moved-from slot must exist and not be reserved.  The after, from will be empty,
-        // to will be occupied but not reserved
-        void move_item(size_t to, size_t from) {
-            __TBB_ASSERT(!my_item_valid(to), "Trying to move to a non-empty slot");
-            __TBB_ASSERT(my_item_valid(from), "Trying to move from an empty slot");
-            set_my_item(to, get_my_item(from));   // could have std::move semantics
-            destroy_item(from);
-
-        }
-
-        // put an item in an empty slot.  Return true if successful, else false
-        bool place_item(size_t here, const item_type &me) {
-#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES
-            if(my_item_valid(here)) return false;
-#endif
-            set_my_item(here, me);
-            return true;
-        }
-
-        // could be implemented with std::move semantics
-        void swap_items(size_t i, size_t j) {
-            __TBB_ASSERT(my_item_valid(i) && my_item_valid(j), "attempt to swap invalid item(s)");
-            item_type temp = get_my_item(i);
-            set_my_item(i, get_my_item(j));
-            set_my_item(j, temp);
-        }
-
-        void destroy_item(size_type i) {
-            __TBB_ASSERT(my_item_valid(i), "destruction of invalid item");
-            (tbb::internal::punned_cast<item_type *>(&(item(i).first)))->~item_type();
-            item(i).second = no_item;
-        }
-
-        // returns the front element
-        const item_type& front() const
-        {
-            __TBB_ASSERT(my_item_valid(my_head), "attempt to fetch head non-item");
-            return get_my_item(my_head);
-        }
-
-        // returns  the back element
-        const item_type& back() const
-        {
-            __TBB_ASSERT(my_item_valid(my_tail - 1), "attempt to fetch head non-item");
-            return get_my_item(my_tail - 1);
-        }
-
-        // following methods are for reservation of the front of a bufffer.
-        void reserve_item(size_type i) { __TBB_ASSERT(my_item_valid(i) && !my_item_reserved(i), "item cannot be reserved"); item(i).second = reserved_item; }
-        void release_item(size_type i) { __TBB_ASSERT(my_item_reserved(i), "item is not reserved"); item(i).second = has_item; }
-
-        void destroy_front() { destroy_item(my_head); ++my_head; }
-        void destroy_back() { destroy_item(my_tail-1); --my_tail; }
-
-        // we have to be able to test against a new tail value without changing my_tail
-        // grow_array doesn't work if we change my_tail when the old array is too small
-        size_type size(size_t new_tail = 0) { return (new_tail ? new_tail : my_tail) - my_head; }
-        size_type capacity() { return my_array_size; }
-        // sequencer_node does not use this method, so we don't
-        // need a version that passes in the new_tail value.
-        bool buffer_full() { return size() >= capacity(); }
-
-        //! Grows the internal array.
-        void grow_my_array( size_t minimum_size ) {
-            // test that we haven't made the structure inconsistent.
-            __TBB_ASSERT(capacity() >= my_tail - my_head, "total items exceed capacity");
-            size_type new_size = my_array_size ? 2*my_array_size : initial_buffer_size;
-            while( new_size<minimum_size )
-                new_size*=2;
-
-            buffer_item_type* new_array = allocator_type().allocate(new_size);
-
-            // initialize validity to "no"
-            for( size_type i=0; i<new_size; ++i ) { new_array[i].second = no_item; }
-
-            for( size_type i=my_head; i<my_tail; ++i) {
-                if(my_item_valid(i)) {  // sequencer_node may have empty slots
-                    // placement-new copy-construct; could be std::move
-                    char *new_space = (char *)&(new_array[i&(new_size-1)].first);
-                    (void)new(new_space) item_type(get_my_item(i));
-                    new_array[i&(new_size-1)].second = item(i).second;
-                }
-            }
-
-            clean_up_buffer(/*reset_pointers*/false);
-
-            my_array = new_array;
-            my_array_size = new_size;
-        }
-
-        bool push_back(item_type &v) {
-            if(buffer_full()) {
-                grow_my_array(size() + 1);
-            }
-            set_my_item(my_tail, v);
-            ++my_tail;
-            return true;
-        }
-
-        bool pop_back(item_type &v) {
-            if (!my_item_valid(my_tail-1)) {
-                return false;
-            }
-            v = this->back();
-            destroy_back();
-            return true;
-        }
-
-        bool pop_front(item_type &v) {
-            if(!my_item_valid(my_head)) {
-                return false;
-            }
-            v = this->front();
-            destroy_front();
-            return true;
-        }
-
-        // This is used both for reset and for grow_my_array.  In the case of grow_my_array
-        // we want to retain the values of the head and tail.
-        void clean_up_buffer(bool reset_pointers) {
-            if (my_array) {
-                for( size_type i=my_head; i<my_tail; ++i ) {
-                    if(my_item_valid(i))
-                        destroy_item(i);
-                }
-                allocator_type().deallocate(my_array,my_array_size);
-            }
-            my_array = NULL;
-            if(reset_pointers) {
-                my_head = my_tail = my_array_size = 0;
-            }
-        }
-
-    public:
-        //! Constructor
-        item_buffer( ) : my_array(NULL), my_array_size(0),
-            my_head(0), my_tail(0) {
-            grow_my_array(initial_buffer_size);
-        }
-
-        ~item_buffer() {
-            clean_up_buffer(/*reset_pointers*/true);
-        }
-
-        void reset() { clean_up_buffer(/*reset_pointers*/true); grow_my_array(initial_buffer_size); }
-
-    };
-
-    //! item_buffer with reservable front-end.  NOTE: if reserving, do not
-    //* complete operation with pop_front(); use consume_front().
-    //* No synchronization built-in.
-    template<typename T, typename A=cache_aligned_allocator<T> >
-    class reservable_item_buffer : public item_buffer<T, A> {
-    protected:
-        using item_buffer<T, A>::my_item_valid;
-        using item_buffer<T, A>::my_head;
-
-    public:
-        reservable_item_buffer() : item_buffer<T, A>(), my_reserved(false) {}
-        void reset() {my_reserved = false; item_buffer<T,A>::reset(); }
-    protected:
-
-        bool reserve_front(T &v) {
-            if(my_reserved || !my_item_valid(this->my_head)) return false;
-            my_reserved = true;
-            // reserving the head
-            v = this->front();
-            this->reserve_item(this->my_head);
-            return true;
-        }
-
-        void consume_front() {
-            __TBB_ASSERT(my_reserved, "Attempt to consume a non-reserved item");
-            this->destroy_front();
-            my_reserved = false;
-        }
-
-        void release_front() {
-            __TBB_ASSERT(my_reserved, "Attempt to release a non-reserved item");
-            this->release_item(this->my_head);
-            my_reserved = false;
-        }
-
-        bool my_reserved;
-    };
-
-}  // namespace internal
-
-#endif // __TBB__flow_graph_item_buffer_impl_H
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_join_impl.h b/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_join_impl.h
deleted file mode 100644
index 4999bef7..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_join_impl.h
+++ /dev/null
@@ -1,1991 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB__flow_graph_join_impl_H
-#define __TBB__flow_graph_join_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-namespace internal {
-
-    struct forwarding_base {
-        forwarding_base(graph &g) : graph_pointer(&g) {}
-        virtual ~forwarding_base() {}
-        // decrement_port_count may create a forwarding task.  If we cannot handle the task
-        // ourselves, ask decrement_port_count to deal with it.
-        virtual task * decrement_port_count(bool handle_task) = 0;
-        virtual void increment_port_count() = 0;
-        // moved here so input ports can queue tasks
-        graph* graph_pointer;
-    };
-
-    // specialization that lets us keep a copy of the current_key for building results.
-    // KeyType can be a reference type.
-    template<typename KeyType>
-    struct matching_forwarding_base :public forwarding_base {
-        typedef typename tbb::internal::strip<KeyType>::type current_key_type;
-        matching_forwarding_base(graph &g) : forwarding_base(g) { }
-        virtual task * increment_key_count(current_key_type const & /*t*/, bool /*handle_task*/) = 0; // {return NULL;}
-        current_key_type current_key; // so ports can refer to FE's desired items
-    };
-
-    template< int N >
-    struct join_helper {
-
-        template< typename TupleType, typename PortType >
-        static inline void set_join_node_pointer(TupleType &my_input, PortType *port) {
-            tbb::flow::get<N-1>( my_input ).set_join_node_pointer(port);
-            join_helper<N-1>::set_join_node_pointer( my_input, port );
-        }
-        template< typename TupleType >
-        static inline void consume_reservations( TupleType &my_input ) {
-            tbb::flow::get<N-1>( my_input ).consume();
-            join_helper<N-1>::consume_reservations( my_input );
-        }
-
-        template< typename TupleType >
-        static inline void release_my_reservation( TupleType &my_input ) {
-            tbb::flow::get<N-1>( my_input ).release();
-        }
-
-        template <typename TupleType>
-        static inline void release_reservations( TupleType &my_input) {
-            join_helper<N-1>::release_reservations(my_input);
-            release_my_reservation(my_input);
-        }
-
-        template< typename InputTuple, typename OutputTuple >
-        static inline bool reserve( InputTuple &my_input, OutputTuple &out) {
-            if ( !tbb::flow::get<N-1>( my_input ).reserve( tbb::flow::get<N-1>( out ) ) ) return false;
-            if ( !join_helper<N-1>::reserve( my_input, out ) ) {
-                release_my_reservation( my_input );
-                return false;
-            }
-            return true;
-        }
-
-        template<typename InputTuple, typename OutputTuple>
-        static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) {
-            bool res = tbb::flow::get<N-1>(my_input).get_item(tbb::flow::get<N-1>(out) ); // may fail
-            return join_helper<N-1>::get_my_item(my_input, out) && res;       // do get on other inputs before returning
-        }
-
-        template<typename InputTuple, typename OutputTuple>
-        static inline bool get_items(InputTuple &my_input, OutputTuple &out) {
-            return get_my_item(my_input, out);
-        }
-
-        template<typename InputTuple>
-        static inline void reset_my_port(InputTuple &my_input) {
-            join_helper<N-1>::reset_my_port(my_input);
-            tbb::flow::get<N-1>(my_input).reset_port();
-        }
-
-        template<typename InputTuple>
-        static inline void reset_ports(InputTuple& my_input) {
-            reset_my_port(my_input);
-        }
-
-        template<typename InputTuple, typename KeyFuncTuple>
-        static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) {
-            tbb::flow::get<N-1>(my_input).set_my_key_func(tbb::flow::get<N-1>(my_key_funcs));
-            tbb::flow::get<N-1>(my_key_funcs) = NULL;
-            join_helper<N-1>::set_key_functors(my_input, my_key_funcs);
-        }
-
-        template< typename KeyFuncTuple>
-        static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) {
-            if(tbb::flow::get<N-1>(other_inputs).get_my_key_func()) {
-                tbb::flow::get<N-1>(my_inputs).set_my_key_func(tbb::flow::get<N-1>(other_inputs).get_my_key_func()->clone());
-            }
-            join_helper<N-1>::copy_key_functors(my_inputs, other_inputs);
-        }
-
-        template<typename InputTuple>
-        static inline void reset_inputs(InputTuple &my_input, reset_flags f) {
-            join_helper<N-1>::reset_inputs(my_input, f);
-            tbb::flow::get<N-1>(my_input).reset_receiver(f);
-        }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        template<typename InputTuple>
-        static inline void extract_inputs(InputTuple &my_input) {
-            join_helper<N-1>::extract_inputs(my_input);
-            tbb::flow::get<N-1>(my_input).extract_receiver();
-        }
-#endif
-    };  // join_helper<N>
-
-    template< >
-    struct join_helper<1> {
-
-        template< typename TupleType, typename PortType >
-        static inline void set_join_node_pointer(TupleType &my_input, PortType *port) {
-            tbb::flow::get<0>( my_input ).set_join_node_pointer(port);
-        }
-
-        template< typename TupleType >
-        static inline void consume_reservations( TupleType &my_input ) {
-            tbb::flow::get<0>( my_input ).consume();
-        }
-
-        template< typename TupleType >
-        static inline void release_my_reservation( TupleType &my_input ) {
-            tbb::flow::get<0>( my_input ).release();
-        }
-
-        template<typename TupleType>
-        static inline void release_reservations( TupleType &my_input) {
-            release_my_reservation(my_input);
-        }
-
-        template< typename InputTuple, typename OutputTuple >
-        static inline bool reserve( InputTuple &my_input, OutputTuple &out) {
-            return tbb::flow::get<0>( my_input ).reserve( tbb::flow::get<0>( out ) );
-        }
-
-        template<typename InputTuple, typename OutputTuple>
-        static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) {
-            return tbb::flow::get<0>(my_input).get_item(tbb::flow::get<0>(out));
-        }
-
-        template<typename InputTuple, typename OutputTuple>
-        static inline bool get_items(InputTuple &my_input, OutputTuple &out) {
-            return get_my_item(my_input, out);
-        }
-
-        template<typename InputTuple>
-        static inline void reset_my_port(InputTuple &my_input) {
-            tbb::flow::get<0>(my_input).reset_port();
-        }
-
-        template<typename InputTuple>
-        static inline void reset_ports(InputTuple& my_input) {
-            reset_my_port(my_input);
-        }
-
-        template<typename InputTuple, typename KeyFuncTuple>
-        static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) {
-            tbb::flow::get<0>(my_input).set_my_key_func(tbb::flow::get<0>(my_key_funcs));
-            tbb::flow::get<0>(my_key_funcs) = NULL;
-        }
-
-        template< typename KeyFuncTuple>
-        static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) {
-            if(tbb::flow::get<0>(other_inputs).get_my_key_func()) {
-                tbb::flow::get<0>(my_inputs).set_my_key_func(tbb::flow::get<0>(other_inputs).get_my_key_func()->clone());
-            }
-        }
-        template<typename InputTuple>
-        static inline void reset_inputs(InputTuple &my_input, reset_flags f) {
-            tbb::flow::get<0>(my_input).reset_receiver(f);
-        }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        template<typename InputTuple>
-        static inline void extract_inputs(InputTuple &my_input) {
-            tbb::flow::get<0>(my_input).extract_receiver();
-        }
-#endif
-    };  // join_helper<1>
-
-    //! The two-phase join port
-    template< typename T >
-    class reserving_port : public receiver<T> {
-    public:
-        typedef T input_type;
-        typedef typename receiver<input_type>::predecessor_type predecessor_type;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        typedef typename receiver<input_type>::predecessor_list_type predecessor_list_type;
-        typedef typename receiver<input_type>::built_predecessors_type built_predecessors_type;
-#endif
-    private:
-        // ----------- Aggregator ------------
-        enum op_type { reg_pred, rem_pred, res_item, rel_res, con_res
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-            , add_blt_pred, del_blt_pred, blt_pred_cnt, blt_pred_cpy
-#endif
-        };
-        enum op_stat {WAIT=0, SUCCEEDED, FAILED};
-        typedef reserving_port<T> class_type;
-
-        class reserving_port_operation : public aggregated_operation<reserving_port_operation> {
-        public:
-            char type;
-            union {
-                T *my_arg;
-                predecessor_type *my_pred;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-                size_t cnt_val;
-                predecessor_list_type *plist;
-#endif
-            };
-            reserving_port_operation(const T& e, op_type t) :
-                type(char(t)), my_arg(const_cast<T*>(&e)) {}
-            reserving_port_operation(const predecessor_type &s, op_type t) : type(char(t)),
-                my_pred(const_cast<predecessor_type *>(&s)) {}
-            reserving_port_operation(op_type t) : type(char(t)) {}
-        };
-
-        typedef internal::aggregating_functor<class_type, reserving_port_operation> handler_type;
-        friend class internal::aggregating_functor<class_type, reserving_port_operation>;
-        aggregator<handler_type, reserving_port_operation> my_aggregator;
-
-        void handle_operations(reserving_port_operation* op_list) {
-            reserving_port_operation *current;
-            bool no_predecessors;
-            while(op_list) {
-                current = op_list;
-                op_list = op_list->next;
-                switch(current->type) {
-                case reg_pred:
-                    no_predecessors = my_predecessors.empty();
-                    my_predecessors.add(*(current->my_pred));
-                    if ( no_predecessors ) {
-                        (void) my_join->decrement_port_count(true); // may try to forward
-                    }
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case rem_pred:
-                    my_predecessors.remove(*(current->my_pred));
-                    if(my_predecessors.empty()) my_join->increment_port_count();
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case res_item:
-                    if ( reserved ) {
-                        __TBB_store_with_release(current->status, FAILED);
-                    }
-                    else if ( my_predecessors.try_reserve( *(current->my_arg) ) ) {
-                        reserved = true;
-                        __TBB_store_with_release(current->status, SUCCEEDED);
-                    } else {
-                        if ( my_predecessors.empty() ) {
-                            my_join->increment_port_count();
-                        }
-                        __TBB_store_with_release(current->status, FAILED);
-                    }
-                    break;
-                case rel_res:
-                    reserved = false;
-                    my_predecessors.try_release( );
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case con_res:
-                    reserved = false;
-                    my_predecessors.try_consume( );
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-                case add_blt_pred:
-                    my_predecessors.internal_add_built_predecessor(*(current->my_pred));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case del_blt_pred:
-                    my_predecessors.internal_delete_built_predecessor(*(current->my_pred));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case blt_pred_cnt:
-                    current->cnt_val = my_predecessors.predecessor_count();
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case blt_pred_cpy:
-                    my_predecessors.copy_predecessors(*(current->plist));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-                }
-            }
-        }
-
-    protected:
-        template< typename R, typename B > friend class run_and_put_task;
-        template<typename X, typename Y> friend class internal::broadcast_cache;
-        template<typename X, typename Y> friend class internal::round_robin_cache;
-        task *try_put_task( const T & ) __TBB_override {
-            return NULL;
-        }
-
-    public:
-
-        //! Constructor
-        reserving_port() : reserved(false) {
-            my_join = NULL;
-            my_predecessors.set_owner( this );
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        // copy constructor
-        reserving_port(const reserving_port& /* other */) : receiver<T>() {
-            reserved = false;
-            my_join = NULL;
-            my_predecessors.set_owner( this );
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        void set_join_node_pointer(forwarding_base *join) {
-            my_join = join;
-        }
-
-        //! Add a predecessor
-        bool register_predecessor( predecessor_type &src ) __TBB_override {
-            reserving_port_operation op_data(src, reg_pred);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        //! Remove a predecessor
-        bool remove_predecessor( predecessor_type &src ) __TBB_override {
-            reserving_port_operation op_data(src, rem_pred);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        //! Reserve an item from the port
-        bool reserve( T &v ) {
-            reserving_port_operation op_data(v, res_item);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        //! Release the port
-        void release( ) {
-            reserving_port_operation op_data(rel_res);
-            my_aggregator.execute(&op_data);
-        }
-
-        //! Complete use of the port
-        void consume( ) {
-            reserving_port_operation op_data(con_res);
-            my_aggregator.execute(&op_data);
-        }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        built_predecessors_type &built_predecessors() __TBB_override { return my_predecessors.built_predecessors(); }
-        void internal_add_built_predecessor(predecessor_type &src) __TBB_override {
-            reserving_port_operation op_data(src, add_blt_pred);
-            my_aggregator.execute(&op_data);
-        }
-
-        void internal_delete_built_predecessor(predecessor_type &src) __TBB_override {
-            reserving_port_operation op_data(src, del_blt_pred);
-            my_aggregator.execute(&op_data);
-        }
-
-        size_t predecessor_count() __TBB_override {
-            reserving_port_operation op_data(blt_pred_cnt);
-            my_aggregator.execute(&op_data);
-            return op_data.cnt_val;
-        }
-
-        void copy_predecessors(predecessor_list_type &l) __TBB_override {
-            reserving_port_operation op_data(blt_pred_cpy);
-            op_data.plist = &l;
-            my_aggregator.execute(&op_data);
-        }
-
-        void extract_receiver() {
-            my_predecessors.built_predecessors().receiver_extract(*this);
-        }
-
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-        void reset_receiver( reset_flags f) __TBB_override {
-            if(f & rf_clear_edges) my_predecessors.clear();
-            else
-            my_predecessors.reset();
-            reserved = false;
-            __TBB_ASSERT(!(f&rf_clear_edges) || my_predecessors.empty(), "port edges not removed");
-        }
-
-    private:
-        forwarding_base *my_join;
-        reservable_predecessor_cache< T, null_mutex > my_predecessors;
-        bool reserved;
-    };  // reserving_port
-
-    //! queueing join_port
-    template<typename T>
-    class queueing_port : public receiver<T>, public item_buffer<T> {
-    public:
-        typedef T input_type;
-        typedef typename receiver<input_type>::predecessor_type predecessor_type;
-        typedef queueing_port<T> class_type;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        typedef typename receiver<input_type>::built_predecessors_type built_predecessors_type;
-        typedef typename receiver<input_type>::predecessor_list_type predecessor_list_type;
-#endif
-
-    // ----------- Aggregator ------------
-    private:
-        enum op_type { get__item, res_port, try__put_task
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-            , add_blt_pred, del_blt_pred, blt_pred_cnt, blt_pred_cpy
-#endif
-        };
-        enum op_stat {WAIT=0, SUCCEEDED, FAILED};
-
-        class queueing_port_operation : public aggregated_operation<queueing_port_operation> {
-        public:
-            char type;
-            T my_val;
-            T *my_arg;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-            predecessor_type *pred;
-            size_t cnt_val;
-            predecessor_list_type *plist;
-#endif
-            task * bypass_t;
-            // constructor for value parameter
-            queueing_port_operation(const T& e, op_type t) :
-                type(char(t)), my_val(e)
-                , bypass_t(NULL)
-            {}
-            // constructor for pointer parameter
-            queueing_port_operation(const T* p, op_type t) :
-                type(char(t)), my_arg(const_cast<T*>(p))
-                , bypass_t(NULL)
-            {}
-            // constructor with no parameter
-            queueing_port_operation(op_type t) : type(char(t))
-                , bypass_t(NULL)
-            {}
-        };
-
-        typedef internal::aggregating_functor<class_type, queueing_port_operation> handler_type;
-        friend class internal::aggregating_functor<class_type, queueing_port_operation>;
-        aggregator<handler_type, queueing_port_operation> my_aggregator;
-
-        void handle_operations(queueing_port_operation* op_list) {
-            queueing_port_operation *current;
-            bool was_empty;
-            while(op_list) {
-                current = op_list;
-                op_list = op_list->next;
-                switch(current->type) {
-                case try__put_task: {
-                        task *rtask = NULL;
-                        was_empty = this->buffer_empty();
-                        this->push_back(current->my_val);
-                        if (was_empty) rtask = my_join->decrement_port_count(false);
-                        else
-                            rtask = SUCCESSFULLY_ENQUEUED;
-                        current->bypass_t = rtask;
-                        __TBB_store_with_release(current->status, SUCCEEDED);
-                    }
-                    break;
-                case get__item:
-                    if(!this->buffer_empty()) {
-                        *(current->my_arg) = this->front();
-                        __TBB_store_with_release(current->status, SUCCEEDED);
-                    }
-                    else {
-                        __TBB_store_with_release(current->status, FAILED);
-                    }
-                    break;
-                case res_port:
-                    __TBB_ASSERT(this->my_item_valid(this->my_head), "No item to reset");
-                    this->destroy_front();
-                    if(this->my_item_valid(this->my_head)) {
-                        (void)my_join->decrement_port_count(true);
-                    }
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-                case add_blt_pred:
-                    my_built_predecessors.add_edge(*(current->pred));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case del_blt_pred:
-                    my_built_predecessors.delete_edge(*(current->pred));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case blt_pred_cnt:
-                    current->cnt_val = my_built_predecessors.edge_count();
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case blt_pred_cpy:
-                    my_built_predecessors.copy_edges(*(current->plist));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-                }
-            }
-        }
-    // ------------ End Aggregator ---------------
-
-    protected:
-        template< typename R, typename B > friend class run_and_put_task;
-        template<typename X, typename Y> friend class internal::broadcast_cache;
-        template<typename X, typename Y> friend class internal::round_robin_cache;
-        task *try_put_task(const T &v) __TBB_override {
-            queueing_port_operation op_data(v, try__put_task);
-            my_aggregator.execute(&op_data);
-            __TBB_ASSERT(op_data.status == SUCCEEDED || !op_data.bypass_t, "inconsistent return from aggregator");
-            if(!op_data.bypass_t) return SUCCESSFULLY_ENQUEUED;
-            return op_data.bypass_t;
-        }
-
-    public:
-
-        //! Constructor
-        queueing_port() : item_buffer<T>() {
-            my_join = NULL;
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        //! copy constructor
-        queueing_port(const queueing_port& /* other */) : receiver<T>(), item_buffer<T>() {
-            my_join = NULL;
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        //! record parent for tallying available items
-        void set_join_node_pointer(forwarding_base *join) {
-            my_join = join;
-        }
-
-        bool get_item( T &v ) {
-            queueing_port_operation op_data(&v, get__item);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        // reset_port is called when item is accepted by successor, but
-        // is initiated by join_node.
-        void reset_port() {
-            queueing_port_operation op_data(res_port);
-            my_aggregator.execute(&op_data);
-            return;
-        }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        built_predecessors_type &built_predecessors() __TBB_override { return my_built_predecessors; }
-
-        void internal_add_built_predecessor(predecessor_type &p) __TBB_override {
-            queueing_port_operation op_data(add_blt_pred);
-            op_data.pred = &p;
-            my_aggregator.execute(&op_data);
-        }
-
-        void internal_delete_built_predecessor(predecessor_type &p) __TBB_override {
-            queueing_port_operation op_data(del_blt_pred);
-            op_data.pred = &p;
-            my_aggregator.execute(&op_data);
-        }
-
-        size_t predecessor_count() __TBB_override {
-            queueing_port_operation op_data(blt_pred_cnt);
-            my_aggregator.execute(&op_data);
-            return op_data.cnt_val;
-        }
-
-        void copy_predecessors(predecessor_list_type &l) __TBB_override {
-            queueing_port_operation op_data(blt_pred_cpy);
-            op_data.plist = &l;
-            my_aggregator.execute(&op_data);
-        }
-
-        void extract_receiver() {
-            item_buffer<T>::reset();
-            my_built_predecessors.receiver_extract(*this);
-        }
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-        void reset_receiver(reset_flags f) __TBB_override {
-            tbb::internal::suppress_unused_warning(f);
-            item_buffer<T>::reset();
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-            if (f & rf_clear_edges)
-                my_built_predecessors.clear();
-#endif
-        }
-
-    private:
-        forwarding_base *my_join;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        edge_container<predecessor_type> my_built_predecessors;
-#endif
-    };  // queueing_port
-
-#include "_flow_graph_tagged_buffer_impl.h"
-
-    template<typename K>
-    struct count_element {
-        K my_key;
-        size_t my_value;
-    };
-
-    // method to access the key in the counting table
-    // the ref has already been removed from K
-    template< typename K >
-    struct key_to_count_functor {
-        typedef count_element<K> table_item_type;
-        const K& operator()(const table_item_type& v) { return v.my_key; }
-    };
-
-    // the ports can have only one template parameter.  We wrap the types needed in
-    // a traits type
-    template< class TraitsType >
-    class key_matching_port :
-        public receiver<typename TraitsType::T>,
-        public hash_buffer< typename TraitsType::K, typename TraitsType::T, typename TraitsType::TtoK,
-                typename TraitsType::KHash > {
-    public:
-        typedef TraitsType traits;
-        typedef key_matching_port<traits> class_type;
-        typedef typename TraitsType::T input_type;
-        typedef typename TraitsType::K key_type;
-        typedef typename tbb::internal::strip<key_type>::type noref_key_type;
-        typedef typename receiver<input_type>::predecessor_type predecessor_type;
-        typedef typename TraitsType::TtoK type_to_key_func_type;
-        typedef typename TraitsType::KHash hash_compare_type;
-        typedef hash_buffer< key_type, input_type, type_to_key_func_type, hash_compare_type > buffer_type;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        typedef typename receiver<input_type>::built_predecessors_type built_predecessors_type;
-        typedef typename receiver<input_type>::predecessor_list_type predecessor_list_type;
-#endif
-    private:
-// ----------- Aggregator ------------
-    private:
-        enum op_type { try__put, get__item, res_port
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-           , add_blt_pred, del_blt_pred, blt_pred_cnt, blt_pred_cpy
-#endif
-        };
-        enum op_stat {WAIT=0, SUCCEEDED, FAILED};
-
-        class key_matching_port_operation : public aggregated_operation<key_matching_port_operation> {
-        public:
-            char type;
-            input_type my_val;
-            input_type *my_arg;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-            predecessor_type *pred;
-            size_t cnt_val;
-            predecessor_list_type *plist;
-#endif
-            // constructor for value parameter
-            key_matching_port_operation(const input_type& e, op_type t) :
-                type(char(t)), my_val(e) {}
-            // constructor for pointer parameter
-            key_matching_port_operation(const input_type* p, op_type t) :
-                type(char(t)), my_arg(const_cast<input_type*>(p)) {}
-            // constructor with no parameter
-            key_matching_port_operation(op_type t) : type(char(t)) {}
-        };
-
-        typedef internal::aggregating_functor<class_type, key_matching_port_operation> handler_type;
-        friend class internal::aggregating_functor<class_type, key_matching_port_operation>;
-        aggregator<handler_type, key_matching_port_operation> my_aggregator;
-
-        void handle_operations(key_matching_port_operation* op_list) {
-            key_matching_port_operation *current;
-            while(op_list) {
-                current = op_list;
-                op_list = op_list->next;
-                switch(current->type) {
-                case try__put: {
-                        bool was_inserted = this->insert_with_key(current->my_val);
-                        // return failure if a duplicate insertion occurs
-                        __TBB_store_with_release(current->status, was_inserted ? SUCCEEDED : FAILED);
-                    }
-                    break;
-                case get__item:
-                    // use current_key from FE for item
-                    if(!this->find_with_key(my_join->current_key, *(current->my_arg))) {
-                        __TBB_ASSERT(false, "Failed to find item corresponding to current_key.");
-                    }
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case res_port:
-                    // use current_key from FE for item
-                    this->delete_with_key(my_join->current_key);
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-                case add_blt_pred:
-                    my_built_predecessors.add_edge(*(current->pred));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case del_blt_pred:
-                    my_built_predecessors.delete_edge(*(current->pred));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case blt_pred_cnt:
-                    current->cnt_val = my_built_predecessors.edge_count();
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case blt_pred_cpy:
-                    my_built_predecessors.copy_edges(*(current->plist));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-#endif
-                }
-            }
-        }
-// ------------ End Aggregator ---------------
-    protected:
-        template< typename R, typename B > friend class run_and_put_task;
-        template<typename X, typename Y> friend class internal::broadcast_cache;
-        template<typename X, typename Y> friend class internal::round_robin_cache;
-        task *try_put_task(const input_type& v) __TBB_override {
-            key_matching_port_operation op_data(v, try__put);
-            task *rtask = NULL;
-            my_aggregator.execute(&op_data);
-            if(op_data.status == SUCCEEDED) {
-                rtask = my_join->increment_key_count((*(this->get_key_func()))(v), false);  // may spawn
-                // rtask has to reflect the return status of the try_put
-                if(!rtask) rtask = SUCCESSFULLY_ENQUEUED;
-            }
-            return rtask;
-        }
-
-    public:
-
-        key_matching_port() : receiver<input_type>(), buffer_type() {
-            my_join = NULL;
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        // copy constructor
-        key_matching_port(const key_matching_port& /*other*/) : receiver<input_type>(), buffer_type() {
-            my_join = NULL;
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        ~key_matching_port() { }
-
-        void set_join_node_pointer(forwarding_base *join) {
-            my_join = dynamic_cast<matching_forwarding_base<key_type>*>(join);
-        }
-
-        void set_my_key_func(type_to_key_func_type *f) { this->set_key_func(f); }
-
-        type_to_key_func_type* get_my_key_func() { return this->get_key_func(); }
-
-        bool get_item( input_type &v ) {
-            // aggregator uses current_key from FE for Key
-            key_matching_port_operation op_data(&v, get__item);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        built_predecessors_type &built_predecessors() __TBB_override { return my_built_predecessors; }
-
-        void internal_add_built_predecessor(predecessor_type &p) __TBB_override {
-            key_matching_port_operation op_data(add_blt_pred);
-            op_data.pred = &p;
-            my_aggregator.execute(&op_data);
-        }
-
-        void internal_delete_built_predecessor(predecessor_type &p) __TBB_override {
-            key_matching_port_operation op_data(del_blt_pred);
-            op_data.pred = &p;
-            my_aggregator.execute(&op_data);
-        }
-
-        size_t predecessor_count() __TBB_override {
-            key_matching_port_operation op_data(blt_pred_cnt);
-            my_aggregator.execute(&op_data);
-            return op_data.cnt_val;
-        }
-
-        void copy_predecessors(predecessor_list_type &l) __TBB_override {
-            key_matching_port_operation op_data(blt_pred_cpy);
-            op_data.plist = &l;
-            my_aggregator.execute(&op_data);
-        }
-#endif
-
-        // reset_port is called when item is accepted by successor, but
-        // is initiated by join_node.
-        void reset_port() {
-            key_matching_port_operation op_data(res_port);
-            my_aggregator.execute(&op_data);
-            return;
-        }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        void extract_receiver() {
-            buffer_type::reset();
-            my_built_predecessors.receiver_extract(*this);
-        }
-#endif
-        void reset_receiver(reset_flags f ) __TBB_override {
-            tbb::internal::suppress_unused_warning(f);
-            buffer_type::reset();
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-           if (f & rf_clear_edges)
-              my_built_predecessors.clear();
-#endif
-        }
-
-    private:
-        // my_join forwarding base used to count number of inputs that
-        // received key.
-        matching_forwarding_base<key_type> *my_join;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        edge_container<predecessor_type> my_built_predecessors;
-#endif
-    };  // key_matching_port
-
-    using namespace graph_policy_namespace;
-
-    template<typename JP, typename InputTuple, typename OutputTuple>
-    class join_node_base;
-
-    //! join_node_FE : implements input port policy
-    template<typename JP, typename InputTuple, typename OutputTuple>
-    class join_node_FE;
-
-    template<typename InputTuple, typename OutputTuple>
-    class join_node_FE<reserving, InputTuple, OutputTuple> : public forwarding_base {
-    public:
-        static const int N = tbb::flow::tuple_size<OutputTuple>::value;
-        typedef OutputTuple output_type;
-        typedef InputTuple input_type;
-        typedef join_node_base<reserving, InputTuple, OutputTuple> base_node_type; // for forwarding
-
-        join_node_FE(graph &g) : forwarding_base(g), my_node(NULL) {
-            ports_with_no_inputs = N;
-            join_helper<N>::set_join_node_pointer(my_inputs, this);
-        }
-
-        join_node_FE(const join_node_FE& other) : forwarding_base(*(other.forwarding_base::graph_pointer)), my_node(NULL) {
-            ports_with_no_inputs = N;
-            join_helper<N>::set_join_node_pointer(my_inputs, this);
-        }
-
-        void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; }
-
-       void increment_port_count() __TBB_override {
-            ++ports_with_no_inputs;
-        }
-
-        // if all input_ports have predecessors, spawn forward to try and consume tuples
-        task * decrement_port_count(bool handle_task) __TBB_override {
-            if(ports_with_no_inputs.fetch_and_decrement() == 1) {
-                if(this->graph_pointer->is_active()) {
-                    task *rtask = new ( task::allocate_additional_child_of( *(this->graph_pointer->root_task()) ) )
-                        forward_task_bypass<base_node_type>(*my_node);
-                    if(!handle_task) return rtask;
-                    FLOW_SPAWN(*rtask);
-                }
-            }
-            return NULL;
-        }
-
-        input_type &input_ports() { return my_inputs; }
-
-    protected:
-
-        void reset(  reset_flags f) {
-            // called outside of parallel contexts
-            ports_with_no_inputs = N;
-            join_helper<N>::reset_inputs(my_inputs, f);
-        }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        void extract( ) {
-            // called outside of parallel contexts
-            ports_with_no_inputs = N;
-            join_helper<N>::extract_inputs(my_inputs);
-        }
-#endif
-
-        // all methods on input ports should be called under mutual exclusion from join_node_base.
-
-        bool tuple_build_may_succeed() {
-            return !ports_with_no_inputs;
-        }
-
-        bool try_to_make_tuple(output_type &out) {
-            if(ports_with_no_inputs) return false;
-            return join_helper<N>::reserve(my_inputs, out);
-        }
-
-        void tuple_accepted() {
-            join_helper<N>::consume_reservations(my_inputs);
-        }
-        void tuple_rejected() {
-            join_helper<N>::release_reservations(my_inputs);
-        }
-
-        input_type my_inputs;
-        base_node_type *my_node;
-        atomic<size_t> ports_with_no_inputs;
-    };  // join_node_FE<reserving, ... >
-
-    template<typename InputTuple, typename OutputTuple>
-    class join_node_FE<queueing, InputTuple, OutputTuple> : public forwarding_base {
-    public:
-        static const int N = tbb::flow::tuple_size<OutputTuple>::value;
-        typedef OutputTuple output_type;
-        typedef InputTuple input_type;
-        typedef join_node_base<queueing, InputTuple, OutputTuple> base_node_type; // for forwarding
-
-        join_node_FE(graph &g) : forwarding_base(g), my_node(NULL) {
-            ports_with_no_items = N;
-            join_helper<N>::set_join_node_pointer(my_inputs, this);
-        }
-
-        join_node_FE(const join_node_FE& other) : forwarding_base(*(other.forwarding_base::graph_pointer)), my_node(NULL) {
-            ports_with_no_items = N;
-            join_helper<N>::set_join_node_pointer(my_inputs, this);
-        }
-
-        // needed for forwarding
-        void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; }
-
-        void reset_port_count() {
-            ports_with_no_items = N;
-        }
-
-        // if all input_ports have items, spawn forward to try and consume tuples
-        task * decrement_port_count(bool handle_task) __TBB_override
-        {
-            if(ports_with_no_items.fetch_and_decrement() == 1) {
-                if(this->graph_pointer->is_active()) {
-                    task *rtask = new ( task::allocate_additional_child_of( *(this->graph_pointer->root_task()) ) )
-                        forward_task_bypass <base_node_type>(*my_node);
-                    if(!handle_task) return rtask;
-                    FLOW_SPAWN( *rtask);
-                }
-            }
-            return NULL;
-        }
-
-        void increment_port_count() __TBB_override { __TBB_ASSERT(false, NULL); }  // should never be called
-
-        input_type &input_ports() { return my_inputs; }
-
-    protected:
-
-        void reset(  reset_flags f) {
-            reset_port_count();
-            join_helper<N>::reset_inputs(my_inputs, f );
-        }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        void extract() {
-            reset_port_count();
-            join_helper<N>::extract_inputs(my_inputs);
-        }
-#endif
-        // all methods on input ports should be called under mutual exclusion from join_node_base.
-
-        bool tuple_build_may_succeed() {
-            return !ports_with_no_items;
-        }
-
-        bool try_to_make_tuple(output_type &out) {
-            if(ports_with_no_items) return false;
-            return join_helper<N>::get_items(my_inputs, out);
-        }
-
-        void tuple_accepted() {
-            reset_port_count();
-            join_helper<N>::reset_ports(my_inputs);
-        }
-        void tuple_rejected() {
-            // nothing to do.
-        }
-
-        input_type my_inputs;
-        base_node_type *my_node;
-        atomic<size_t> ports_with_no_items;
-    };  // join_node_FE<queueing, ...>
-
-    // key_matching join front-end.
-    template<typename InputTuple, typename OutputTuple, typename K, typename KHash>
-    class join_node_FE<key_matching<K,KHash>, InputTuple, OutputTuple> : public matching_forwarding_base<K>,
-             // buffer of key value counts
-              public hash_buffer<   // typedefed below to key_to_count_buffer_type
-                  typename tbb::internal::strip<K>::type&,        // force ref type on K
-                  count_element<typename tbb::internal::strip<K>::type>,
-                  internal::type_to_key_function_body<
-                      count_element<typename tbb::internal::strip<K>::type>,
-                      typename tbb::internal::strip<K>::type& >,
-                  KHash >,
-             // buffer of output items
-             public item_buffer<OutputTuple> {
-    public:
-        static const int N = tbb::flow::tuple_size<OutputTuple>::value;
-        typedef OutputTuple output_type;
-        typedef InputTuple input_type;
-        typedef K key_type;
-        typedef typename tbb::internal::strip<key_type>::type unref_key_type;
-        typedef KHash key_hash_compare;
-        // must use K without ref.
-        typedef count_element<unref_key_type> count_element_type;
-        // method that lets us refer to the key of this type.
-        typedef key_to_count_functor<unref_key_type> key_to_count_func;
-        typedef internal::type_to_key_function_body< count_element_type, unref_key_type&> TtoK_function_body_type;
-        typedef internal::type_to_key_function_body_leaf<count_element_type, unref_key_type&, key_to_count_func> TtoK_function_body_leaf_type;
-        // this is the type of the special table that keeps track of the number of discrete
-        // elements corresponding to each key that we've seen.
-        typedef hash_buffer< unref_key_type&, count_element_type, TtoK_function_body_type, key_hash_compare >
-                 key_to_count_buffer_type;
-        typedef item_buffer<output_type> output_buffer_type;
-        typedef join_node_base<key_matching<key_type,key_hash_compare>, InputTuple, OutputTuple> base_node_type; // for forwarding
-        typedef matching_forwarding_base<key_type> forwarding_base_type;
-
-// ----------- Aggregator ------------
-        // the aggregator is only needed to serialize the access to the hash table.
-        // and the output_buffer_type base class
-    private:
-        enum op_type { res_count, inc_count, may_succeed, try_make };
-        enum op_stat {WAIT=0, SUCCEEDED, FAILED};
-        typedef join_node_FE<key_matching<key_type,key_hash_compare>, InputTuple, OutputTuple> class_type;
-
-        class key_matching_FE_operation : public aggregated_operation<key_matching_FE_operation> {
-        public:
-            char type;
-            unref_key_type my_val;
-            output_type* my_output;
-            task *bypass_t;
-            bool enqueue_task;
-            // constructor for value parameter
-            key_matching_FE_operation(const unref_key_type& e , bool q_task , op_type t) : type(char(t)), my_val(e),
-                 my_output(NULL), bypass_t(NULL), enqueue_task(q_task) {}
-            key_matching_FE_operation(output_type *p, op_type t) : type(char(t)), my_output(p), bypass_t(NULL),
-                 enqueue_task(true) {}
-            // constructor with no parameter
-            key_matching_FE_operation(op_type t) : type(char(t)), my_output(NULL), bypass_t(NULL), enqueue_task(true) {}
-        };
-
-        typedef internal::aggregating_functor<class_type, key_matching_FE_operation> handler_type;
-        friend class internal::aggregating_functor<class_type, key_matching_FE_operation>;
-        aggregator<handler_type, key_matching_FE_operation> my_aggregator;
-
-        // called from aggregator, so serialized
-        // returns a task pointer if the a task would have been enqueued but we asked that
-        // it be returned.  Otherwise returns NULL.
-        task * fill_output_buffer(unref_key_type &t, bool should_enqueue, bool handle_task) {
-            output_type l_out;
-            task *rtask = NULL;
-            bool do_fwd = should_enqueue && this->buffer_empty() && this->graph_pointer->is_active();
-            this->current_key = t;
-            this->delete_with_key(this->current_key);   // remove the key
-            if(join_helper<N>::get_items(my_inputs, l_out)) {  //  <== call back
-                this->push_back(l_out);
-                if(do_fwd) {  // we enqueue if receiving an item from predecessor, not if successor asks for item
-                    rtask = new ( task::allocate_additional_child_of( *(this->graph_pointer->root_task()) ) )
-                        forward_task_bypass<base_node_type>(*my_node);
-                    if(handle_task) {
-                        FLOW_SPAWN(*rtask);
-                        rtask = NULL;
-                    }
-                    do_fwd = false;
-                }
-                // retire the input values
-                join_helper<N>::reset_ports(my_inputs);  //  <== call back
-            }
-            else {
-                __TBB_ASSERT(false, "should have had something to push");
-            }
-            return rtask;
-        }
-
-        void handle_operations(key_matching_FE_operation* op_list) {
-            key_matching_FE_operation *current;
-            while(op_list) {
-                current = op_list;
-                op_list = op_list->next;
-                switch(current->type) {
-                case res_count:  // called from BE
-                    {
-                        this->destroy_front();
-                        __TBB_store_with_release(current->status, SUCCEEDED);
-                    }
-                    break;
-                case inc_count: {  // called from input ports
-                        count_element_type *p = 0;
-                        unref_key_type &t = current->my_val;
-                        bool do_enqueue = current->enqueue_task;
-                        if(!(this->find_ref_with_key(t,p))) {
-                            count_element_type ev;
-                            ev.my_key = t;
-                            ev.my_value = 0;
-                            this->insert_with_key(ev);
-                            if(!(this->find_ref_with_key(t,p))) {
-                                __TBB_ASSERT(false, "should find key after inserting it");
-                            }
-                        }
-                        if(++(p->my_value) == size_t(N)) {
-                            task *rtask = fill_output_buffer(t, true, do_enqueue);
-                            __TBB_ASSERT(!rtask || !do_enqueue, "task should not be returned");
-                            current->bypass_t = rtask;
-                        }
-                    }
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case may_succeed:  // called from BE
-                    __TBB_store_with_release(current->status, this->buffer_empty() ? FAILED : SUCCEEDED);
-                    break;
-                case try_make:  // called from BE
-                    if(this->buffer_empty()) {
-                        __TBB_store_with_release(current->status, FAILED);
-                    }
-                    else {
-                        *(current->my_output) = this->front();
-                        __TBB_store_with_release(current->status, SUCCEEDED);
-                    }
-                    break;
-                }
-            }
-        }
-// ------------ End Aggregator ---------------
-
-    public:
-        template<typename FunctionTuple>
-        join_node_FE(graph &g, FunctionTuple &TtoK_funcs) : forwarding_base_type(g), my_node(NULL) {
-            join_helper<N>::set_join_node_pointer(my_inputs, this);
-            join_helper<N>::set_key_functors(my_inputs, TtoK_funcs);
-            my_aggregator.initialize_handler(handler_type(this));
-                    TtoK_function_body_type *cfb = new TtoK_function_body_leaf_type(key_to_count_func());
-            this->set_key_func(cfb);
-        }
-
-        join_node_FE(const join_node_FE& other) : forwarding_base_type(*(other.forwarding_base_type::graph_pointer)), key_to_count_buffer_type(),
-        output_buffer_type() {
-            my_node = NULL;
-            join_helper<N>::set_join_node_pointer(my_inputs, this);
-            join_helper<N>::copy_key_functors(my_inputs, const_cast<input_type &>(other.my_inputs));
-            my_aggregator.initialize_handler(handler_type(this));
-            TtoK_function_body_type *cfb = new TtoK_function_body_leaf_type(key_to_count_func());
-            this->set_key_func(cfb);
-        }
-
-        // needed for forwarding
-        void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; }
-
-        void reset_port_count() {  // called from BE
-            key_matching_FE_operation op_data(res_count);
-            my_aggregator.execute(&op_data);
-            return;
-        }
-
-        // if all input_ports have items, spawn forward to try and consume tuples
-        // return a task if we are asked and did create one.
-        task *increment_key_count(unref_key_type const & t, bool handle_task) __TBB_override {  // called from input_ports
-            key_matching_FE_operation op_data(t, handle_task, inc_count);
-            my_aggregator.execute(&op_data);
-            return op_data.bypass_t;
-        }
-
-        task *decrement_port_count(bool /*handle_task*/) __TBB_override { __TBB_ASSERT(false, NULL); return NULL; }
-
-        void increment_port_count() __TBB_override { __TBB_ASSERT(false, NULL); }  // should never be called
-
-        input_type &input_ports() { return my_inputs; }
-
-    protected:
-
-        void reset(  reset_flags f ) {
-            // called outside of parallel contexts
-            join_helper<N>::reset_inputs(my_inputs, f);
-
-            key_to_count_buffer_type::reset();
-            output_buffer_type::reset();
-        }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        void extract() {
-            // called outside of parallel contexts
-            join_helper<N>::extract_inputs(my_inputs);
-            key_to_count_buffer_type::reset();  // have to reset the tag counts
-            output_buffer_type::reset();  // also the queue of outputs
-            // my_node->current_tag = NO_TAG;
-        }
-#endif
-        // all methods on input ports should be called under mutual exclusion from join_node_base.
-
-        bool tuple_build_may_succeed() {  // called from back-end
-            key_matching_FE_operation op_data(may_succeed);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        // cannot lock while calling back to input_ports.  current_key will only be set
-        // and reset under the aggregator, so it will remain consistent.
-        bool try_to_make_tuple(output_type &out) {
-            key_matching_FE_operation op_data(&out,try_make);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        void tuple_accepted() {
-            reset_port_count();  // reset current_key after ports reset.
-        }
-
-        void tuple_rejected() {
-            // nothing to do.
-        }
-
-        input_type my_inputs;  // input ports
-        base_node_type *my_node;
-    }; // join_node_FE<key_matching<K,KHash>, InputTuple, OutputTuple>
-
-    //! join_node_base
-    template<typename JP, typename InputTuple, typename OutputTuple>
-    class join_node_base : public graph_node, public join_node_FE<JP, InputTuple, OutputTuple>,
-                           public sender<OutputTuple> {
-    protected:
-        using graph_node::my_graph;
-    public:
-        typedef OutputTuple output_type;
-
-        typedef typename sender<output_type>::successor_type successor_type;
-        typedef join_node_FE<JP, InputTuple, OutputTuple> input_ports_type;
-        using input_ports_type::tuple_build_may_succeed;
-        using input_ports_type::try_to_make_tuple;
-        using input_ports_type::tuple_accepted;
-        using input_ports_type::tuple_rejected;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        typedef typename sender<output_type>::built_successors_type built_successors_type;
-        typedef typename sender<output_type>::successor_list_type successor_list_type;
-#endif
-
-    private:
-        // ----------- Aggregator ------------
-        enum op_type { reg_succ, rem_succ, try__get, do_fwrd, do_fwrd_bypass
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-            , add_blt_succ, del_blt_succ, blt_succ_cnt, blt_succ_cpy
-#endif
-        };
-        enum op_stat {WAIT=0, SUCCEEDED, FAILED};
-        typedef join_node_base<JP,InputTuple,OutputTuple> class_type;
-
-        class join_node_base_operation : public aggregated_operation<join_node_base_operation> {
-        public:
-            char type;
-            union {
-                output_type *my_arg;
-                successor_type *my_succ;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-                size_t cnt_val;
-                successor_list_type *slist;
-#endif
-            };
-            task *bypass_t;
-            join_node_base_operation(const output_type& e, op_type t) : type(char(t)),
-                my_arg(const_cast<output_type*>(&e)), bypass_t(NULL) {}
-            join_node_base_operation(const successor_type &s, op_type t) : type(char(t)),
-                my_succ(const_cast<successor_type *>(&s)), bypass_t(NULL) {}
-            join_node_base_operation(op_type t) : type(char(t)), bypass_t(NULL) {}
-        };
-
-        typedef internal::aggregating_functor<class_type, join_node_base_operation> handler_type;
-        friend class internal::aggregating_functor<class_type, join_node_base_operation>;
-        bool forwarder_busy;
-        aggregator<handler_type, join_node_base_operation> my_aggregator;
-
-        void handle_operations(join_node_base_operation* op_list) {
-            join_node_base_operation *current;
-            while(op_list) {
-                current = op_list;
-                op_list = op_list->next;
-                switch(current->type) {
-                case reg_succ: {
-                        my_successors.register_successor(*(current->my_succ));
-                        if(tuple_build_may_succeed() && !forwarder_busy && this->graph_node::my_graph.is_active()) {
-                            task *rtask = new ( task::allocate_additional_child_of(*(this->graph_node::my_graph.root_task())) )
-                                    forward_task_bypass
-                                    <join_node_base<JP,InputTuple,OutputTuple> >(*this);
-                            FLOW_SPAWN(*rtask);
-                            forwarder_busy = true;
-                        }
-                        __TBB_store_with_release(current->status, SUCCEEDED);
-                    }
-                    break;
-                case rem_succ:
-                    my_successors.remove_successor(*(current->my_succ));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case try__get:
-                    if(tuple_build_may_succeed()) {
-                        if(try_to_make_tuple(*(current->my_arg))) {
-                            tuple_accepted();
-                            __TBB_store_with_release(current->status, SUCCEEDED);
-                        }
-                        else __TBB_store_with_release(current->status, FAILED);
-                    }
-                    else __TBB_store_with_release(current->status, FAILED);
-                    break;
-                case do_fwrd_bypass: {
-                        bool build_succeeded;
-                        task *last_task = NULL;
-                        output_type out;
-                        if(tuple_build_may_succeed()) {  // checks output queue of FE
-                            do {
-                                build_succeeded = try_to_make_tuple(out);  // fetch front_end of queue
-                                if(build_succeeded) {
-                                    task *new_task = my_successors.try_put_task(out);
-                                    last_task = combine_tasks(last_task, new_task);
-                                    if(new_task) {
-                                        tuple_accepted();
-                                    }
-                                    else {
-                                        tuple_rejected();
-                                        build_succeeded = false;
-                                    }
-                                }
-                            } while(build_succeeded);
-                        }
-                        current->bypass_t = last_task;
-                        __TBB_store_with_release(current->status, SUCCEEDED);
-                        forwarder_busy = false;
-                    }
-                    break;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-                case add_blt_succ:
-                    my_successors.internal_add_built_successor(*(current->my_succ));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case del_blt_succ:
-                    my_successors.internal_delete_built_successor(*(current->my_succ));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case blt_succ_cnt:
-                    current->cnt_val = my_successors.successor_count();
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-                case blt_succ_cpy:
-                    my_successors.copy_successors(*(current->slist));
-                    __TBB_store_with_release(current->status, SUCCEEDED);
-                    break;
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-                }
-            }
-        }
-        // ---------- end aggregator -----------
-    public:
-        join_node_base(graph &g) : graph_node(g), input_ports_type(g), forwarder_busy(false) {
-            my_successors.set_owner(this);
-            input_ports_type::set_my_node(this);
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        join_node_base(const join_node_base& other) :
-            graph_node(other.graph_node::my_graph), input_ports_type(other),
-            sender<OutputTuple>(), forwarder_busy(false), my_successors() {
-            my_successors.set_owner(this);
-            input_ports_type::set_my_node(this);
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        template<typename FunctionTuple>
-        join_node_base(graph &g, FunctionTuple f) : graph_node(g), input_ports_type(g, f), forwarder_busy(false) {
-            my_successors.set_owner(this);
-            input_ports_type::set_my_node(this);
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        bool register_successor(successor_type &r) __TBB_override {
-            join_node_base_operation op_data(r, reg_succ);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        bool remove_successor( successor_type &r) __TBB_override {
-            join_node_base_operation op_data(r, rem_succ);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        bool try_get( output_type &v) __TBB_override {
-            join_node_base_operation op_data(v, try__get);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        built_successors_type &built_successors() __TBB_override { return my_successors.built_successors(); }
-
-        void internal_add_built_successor( successor_type &r) __TBB_override {
-            join_node_base_operation op_data(r, add_blt_succ);
-            my_aggregator.execute(&op_data);
-        }
-
-        void internal_delete_built_successor( successor_type &r) __TBB_override {
-            join_node_base_operation op_data(r, del_blt_succ);
-            my_aggregator.execute(&op_data);
-        }
-
-        size_t successor_count() __TBB_override {
-            join_node_base_operation op_data(blt_succ_cnt);
-            my_aggregator.execute(&op_data);
-            return op_data.cnt_val;
-        }
-
-        void copy_successors(successor_list_type &l) __TBB_override {
-            join_node_base_operation op_data(blt_succ_cpy);
-            op_data.slist = &l;
-            my_aggregator.execute(&op_data);
-        }
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        void extract() __TBB_override {
-            input_ports_type::extract();
-            my_successors.built_successors().sender_extract(*this);
-        }
-#endif
-
-    protected:
-
-        void reset_node(reset_flags f) __TBB_override {
-            input_ports_type::reset(f);
-            if(f & rf_clear_edges) my_successors.clear();
-        }
-
-    private:
-        broadcast_cache<output_type, null_rw_mutex> my_successors;
-
-        friend class forward_task_bypass< join_node_base<JP, InputTuple, OutputTuple> >;
-        task *forward_task() {
-            join_node_base_operation op_data(do_fwrd_bypass);
-            my_aggregator.execute(&op_data);
-            return op_data.bypass_t;
-        }
-
-    };  // join_node_base
-
-    // join base class type generator
-    template<int N, template<class> class PT, typename OutputTuple, typename JP>
-    struct join_base {
-        typedef typename internal::join_node_base<JP, typename wrap_tuple_elements<N,PT,OutputTuple>::type, OutputTuple> type;
-    };
-
-    template<int N, typename OutputTuple, typename K, typename KHash>
-    struct join_base<N, key_matching_port, OutputTuple, key_matching<K,KHash> > {
-        typedef key_matching<K, KHash> key_traits_type;
-        typedef K key_type;
-        typedef KHash key_hash_compare;
-        typedef typename internal::join_node_base< key_traits_type,
-                // ports type
-                typename wrap_key_tuple_elements<N,key_matching_port,key_traits_type,OutputTuple>::type,
-                OutputTuple > type;
-    };
-
-    //! unfolded_join_node : passes input_ports_type to join_node_base.  We build the input port type
-    //  using tuple_element.  The class PT is the port type (reserving_port, queueing_port, key_matching_port)
-    //  and should match the typename.
-
-    template<int N, template<class> class PT, typename OutputTuple, typename JP>
-    class unfolded_join_node : public join_base<N,PT,OutputTuple,JP>::type {
-    public:
-        typedef typename wrap_tuple_elements<N, PT, OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<JP, input_ports_type, output_type > base_type;
-    public:
-        unfolded_join_node(graph &g) : base_type(g) {}
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-    template <typename K, typename T>
-    struct key_from_message_body {
-        K operator()(const T& t) const {
-            using tbb::flow::key_from_message;
-            return key_from_message<K>(t);
-        }
-    };
-    // Adds const to reference type
-    template <typename K, typename T>
-    struct key_from_message_body<K&,T> {
-        const K& operator()(const T& t) const {
-            using tbb::flow::key_from_message;
-            return key_from_message<const K&>(t);
-        }
-    };
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-    // key_matching unfolded_join_node.  This must be a separate specialization because the constructors
-    // differ.
-
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<2,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<2,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename tbb::flow::tuple_element<0, OutputTuple>::type T0;
-        typedef typename tbb::flow::tuple_element<1, OutputTuple>::type T1;
-    public:
-        typedef typename wrap_key_tuple_elements<2,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type;
-        typedef typename internal::type_to_key_function_body<T0, K> *f0_p;
-        typedef typename internal::type_to_key_function_body<T1, K> *f1_p;
-        typedef typename tbb::flow::tuple< f0_p, f1_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new internal::type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1) : base_type(g,
-                func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new internal::type_to_key_function_body_leaf<T1, K, Body1>(body1)
-                    ) ) {
-            __TBB_STATIC_ASSERT(tbb::flow::tuple_size<OutputTuple>::value == 2, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<3,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<3,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename tbb::flow::tuple_element<0, OutputTuple>::type T0;
-        typedef typename tbb::flow::tuple_element<1, OutputTuple>::type T1;
-        typedef typename tbb::flow::tuple_element<2, OutputTuple>::type T2;
-    public:
-        typedef typename wrap_key_tuple_elements<3,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type;
-        typedef typename internal::type_to_key_function_body<T0, K> *f0_p;
-        typedef typename internal::type_to_key_function_body<T1, K> *f1_p;
-        typedef typename internal::type_to_key_function_body<T2, K> *f2_p;
-        typedef typename tbb::flow::tuple< f0_p, f1_p, f2_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new internal::type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new internal::type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2) : base_type(g,
-                func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new internal::type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new internal::type_to_key_function_body_leaf<T2, K, Body2>(body2)
-                    ) ) {
-            __TBB_STATIC_ASSERT(tbb::flow::tuple_size<OutputTuple>::value == 3, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<4,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<4,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename tbb::flow::tuple_element<0, OutputTuple>::type T0;
-        typedef typename tbb::flow::tuple_element<1, OutputTuple>::type T1;
-        typedef typename tbb::flow::tuple_element<2, OutputTuple>::type T2;
-        typedef typename tbb::flow::tuple_element<3, OutputTuple>::type T3;
-    public:
-        typedef typename wrap_key_tuple_elements<4,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type;
-        typedef typename internal::type_to_key_function_body<T0, K> *f0_p;
-        typedef typename internal::type_to_key_function_body<T1, K> *f1_p;
-        typedef typename internal::type_to_key_function_body<T2, K> *f2_p;
-        typedef typename internal::type_to_key_function_body<T3, K> *f3_p;
-        typedef typename tbb::flow::tuple< f0_p, f1_p, f2_p, f3_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new internal::type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new internal::type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()),
-                    new internal::type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2, typename Body3>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3) : base_type(g,
-                func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new internal::type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new internal::type_to_key_function_body_leaf<T2, K, Body2>(body2),
-                    new internal::type_to_key_function_body_leaf<T3, K, Body3>(body3)
-                    ) ) {
-            __TBB_STATIC_ASSERT(tbb::flow::tuple_size<OutputTuple>::value == 4, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<5,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<5,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename tbb::flow::tuple_element<0, OutputTuple>::type T0;
-        typedef typename tbb::flow::tuple_element<1, OutputTuple>::type T1;
-        typedef typename tbb::flow::tuple_element<2, OutputTuple>::type T2;
-        typedef typename tbb::flow::tuple_element<3, OutputTuple>::type T3;
-        typedef typename tbb::flow::tuple_element<4, OutputTuple>::type T4;
-    public:
-        typedef typename wrap_key_tuple_elements<5,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type;
-        typedef typename internal::type_to_key_function_body<T0, K> *f0_p;
-        typedef typename internal::type_to_key_function_body<T1, K> *f1_p;
-        typedef typename internal::type_to_key_function_body<T2, K> *f2_p;
-        typedef typename internal::type_to_key_function_body<T3, K> *f3_p;
-        typedef typename internal::type_to_key_function_body<T4, K> *f4_p;
-        typedef typename tbb::flow::tuple< f0_p, f1_p, f2_p, f3_p, f4_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new internal::type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new internal::type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()),
-                    new internal::type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()),
-                    new internal::type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4) : base_type(g,
-                func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new internal::type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new internal::type_to_key_function_body_leaf<T2, K, Body2>(body2),
-                    new internal::type_to_key_function_body_leaf<T3, K, Body3>(body3),
-                    new internal::type_to_key_function_body_leaf<T4, K, Body4>(body4)
-                    ) ) {
-            __TBB_STATIC_ASSERT(tbb::flow::tuple_size<OutputTuple>::value == 5, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-
-#if __TBB_VARIADIC_MAX >= 6
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<6,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<6,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename tbb::flow::tuple_element<0, OutputTuple>::type T0;
-        typedef typename tbb::flow::tuple_element<1, OutputTuple>::type T1;
-        typedef typename tbb::flow::tuple_element<2, OutputTuple>::type T2;
-        typedef typename tbb::flow::tuple_element<3, OutputTuple>::type T3;
-        typedef typename tbb::flow::tuple_element<4, OutputTuple>::type T4;
-        typedef typename tbb::flow::tuple_element<5, OutputTuple>::type T5;
-    public:
-        typedef typename wrap_key_tuple_elements<6,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type;
-        typedef typename internal::type_to_key_function_body<T0, K> *f0_p;
-        typedef typename internal::type_to_key_function_body<T1, K> *f1_p;
-        typedef typename internal::type_to_key_function_body<T2, K> *f2_p;
-        typedef typename internal::type_to_key_function_body<T3, K> *f3_p;
-        typedef typename internal::type_to_key_function_body<T4, K> *f4_p;
-        typedef typename internal::type_to_key_function_body<T5, K> *f5_p;
-        typedef typename tbb::flow::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new internal::type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new internal::type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()),
-                    new internal::type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()),
-                    new internal::type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()),
-                    new internal::type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, typename Body5>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, Body5 body5)
-                : base_type(g, func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new internal::type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new internal::type_to_key_function_body_leaf<T2, K, Body2>(body2),
-                    new internal::type_to_key_function_body_leaf<T3, K, Body3>(body3),
-                    new internal::type_to_key_function_body_leaf<T4, K, Body4>(body4),
-                    new internal::type_to_key_function_body_leaf<T5, K, Body5>(body5)
-                    ) ) {
-            __TBB_STATIC_ASSERT(tbb::flow::tuple_size<OutputTuple>::value == 6, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-#endif
-
-#if __TBB_VARIADIC_MAX >= 7
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<7,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<7,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename tbb::flow::tuple_element<0, OutputTuple>::type T0;
-        typedef typename tbb::flow::tuple_element<1, OutputTuple>::type T1;
-        typedef typename tbb::flow::tuple_element<2, OutputTuple>::type T2;
-        typedef typename tbb::flow::tuple_element<3, OutputTuple>::type T3;
-        typedef typename tbb::flow::tuple_element<4, OutputTuple>::type T4;
-        typedef typename tbb::flow::tuple_element<5, OutputTuple>::type T5;
-        typedef typename tbb::flow::tuple_element<6, OutputTuple>::type T6;
-    public:
-        typedef typename wrap_key_tuple_elements<7,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type;
-        typedef typename internal::type_to_key_function_body<T0, K> *f0_p;
-        typedef typename internal::type_to_key_function_body<T1, K> *f1_p;
-        typedef typename internal::type_to_key_function_body<T2, K> *f2_p;
-        typedef typename internal::type_to_key_function_body<T3, K> *f3_p;
-        typedef typename internal::type_to_key_function_body<T4, K> *f4_p;
-        typedef typename internal::type_to_key_function_body<T5, K> *f5_p;
-        typedef typename internal::type_to_key_function_body<T6, K> *f6_p;
-        typedef typename tbb::flow::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new internal::type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new internal::type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()),
-                    new internal::type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()),
-                    new internal::type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()),
-                    new internal::type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()),
-                    new internal::type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4,
-                 typename Body5, typename Body6>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4,
-                Body5 body5, Body6 body6) : base_type(g, func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new internal::type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new internal::type_to_key_function_body_leaf<T2, K, Body2>(body2),
-                    new internal::type_to_key_function_body_leaf<T3, K, Body3>(body3),
-                    new internal::type_to_key_function_body_leaf<T4, K, Body4>(body4),
-                    new internal::type_to_key_function_body_leaf<T5, K, Body5>(body5),
-                    new internal::type_to_key_function_body_leaf<T6, K, Body6>(body6)
-                    ) ) {
-            __TBB_STATIC_ASSERT(tbb::flow::tuple_size<OutputTuple>::value == 7, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-#endif
-
-#if __TBB_VARIADIC_MAX >= 8
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<8,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<8,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename tbb::flow::tuple_element<0, OutputTuple>::type T0;
-        typedef typename tbb::flow::tuple_element<1, OutputTuple>::type T1;
-        typedef typename tbb::flow::tuple_element<2, OutputTuple>::type T2;
-        typedef typename tbb::flow::tuple_element<3, OutputTuple>::type T3;
-        typedef typename tbb::flow::tuple_element<4, OutputTuple>::type T4;
-        typedef typename tbb::flow::tuple_element<5, OutputTuple>::type T5;
-        typedef typename tbb::flow::tuple_element<6, OutputTuple>::type T6;
-        typedef typename tbb::flow::tuple_element<7, OutputTuple>::type T7;
-    public:
-        typedef typename wrap_key_tuple_elements<8,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type;
-        typedef typename internal::type_to_key_function_body<T0, K> *f0_p;
-        typedef typename internal::type_to_key_function_body<T1, K> *f1_p;
-        typedef typename internal::type_to_key_function_body<T2, K> *f2_p;
-        typedef typename internal::type_to_key_function_body<T3, K> *f3_p;
-        typedef typename internal::type_to_key_function_body<T4, K> *f4_p;
-        typedef typename internal::type_to_key_function_body<T5, K> *f5_p;
-        typedef typename internal::type_to_key_function_body<T6, K> *f6_p;
-        typedef typename internal::type_to_key_function_body<T7, K> *f7_p;
-        typedef typename tbb::flow::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new internal::type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new internal::type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()),
-                    new internal::type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()),
-                    new internal::type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()),
-                    new internal::type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()),
-                    new internal::type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()),
-                    new internal::type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4,
-                 typename Body5, typename Body6, typename Body7>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4,
-                Body5 body5, Body6 body6, Body7 body7) : base_type(g, func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new internal::type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new internal::type_to_key_function_body_leaf<T2, K, Body2>(body2),
-                    new internal::type_to_key_function_body_leaf<T3, K, Body3>(body3),
-                    new internal::type_to_key_function_body_leaf<T4, K, Body4>(body4),
-                    new internal::type_to_key_function_body_leaf<T5, K, Body5>(body5),
-                    new internal::type_to_key_function_body_leaf<T6, K, Body6>(body6),
-                    new internal::type_to_key_function_body_leaf<T7, K, Body7>(body7)
-                    ) ) {
-            __TBB_STATIC_ASSERT(tbb::flow::tuple_size<OutputTuple>::value == 8, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-#endif
-
-#if __TBB_VARIADIC_MAX >= 9
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<9,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<9,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename tbb::flow::tuple_element<0, OutputTuple>::type T0;
-        typedef typename tbb::flow::tuple_element<1, OutputTuple>::type T1;
-        typedef typename tbb::flow::tuple_element<2, OutputTuple>::type T2;
-        typedef typename tbb::flow::tuple_element<3, OutputTuple>::type T3;
-        typedef typename tbb::flow::tuple_element<4, OutputTuple>::type T4;
-        typedef typename tbb::flow::tuple_element<5, OutputTuple>::type T5;
-        typedef typename tbb::flow::tuple_element<6, OutputTuple>::type T6;
-        typedef typename tbb::flow::tuple_element<7, OutputTuple>::type T7;
-        typedef typename tbb::flow::tuple_element<8, OutputTuple>::type T8;
-    public:
-        typedef typename wrap_key_tuple_elements<9,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type;
-        typedef typename internal::type_to_key_function_body<T0, K> *f0_p;
-        typedef typename internal::type_to_key_function_body<T1, K> *f1_p;
-        typedef typename internal::type_to_key_function_body<T2, K> *f2_p;
-        typedef typename internal::type_to_key_function_body<T3, K> *f3_p;
-        typedef typename internal::type_to_key_function_body<T4, K> *f4_p;
-        typedef typename internal::type_to_key_function_body<T5, K> *f5_p;
-        typedef typename internal::type_to_key_function_body<T6, K> *f6_p;
-        typedef typename internal::type_to_key_function_body<T7, K> *f7_p;
-        typedef typename internal::type_to_key_function_body<T8, K> *f8_p;
-        typedef typename tbb::flow::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p, f8_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new internal::type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new internal::type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()),
-                    new internal::type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()),
-                    new internal::type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()),
-                    new internal::type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()),
-                    new internal::type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()),
-                    new internal::type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()),
-                    new internal::type_to_key_function_body_leaf<T8, K, key_from_message_body<K,T8> >(key_from_message_body<K,T8>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4,
-                 typename Body5, typename Body6, typename Body7, typename Body8>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4,
-                Body5 body5, Body6 body6, Body7 body7, Body8 body8) : base_type(g, func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new internal::type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new internal::type_to_key_function_body_leaf<T2, K, Body2>(body2),
-                    new internal::type_to_key_function_body_leaf<T3, K, Body3>(body3),
-                    new internal::type_to_key_function_body_leaf<T4, K, Body4>(body4),
-                    new internal::type_to_key_function_body_leaf<T5, K, Body5>(body5),
-                    new internal::type_to_key_function_body_leaf<T6, K, Body6>(body6),
-                    new internal::type_to_key_function_body_leaf<T7, K, Body7>(body7),
-                    new internal::type_to_key_function_body_leaf<T8, K, Body8>(body8)
-                    ) ) {
-            __TBB_STATIC_ASSERT(tbb::flow::tuple_size<OutputTuple>::value == 9, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-#endif
-
-#if __TBB_VARIADIC_MAX >= 10
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<10,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<10,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename tbb::flow::tuple_element<0, OutputTuple>::type T0;
-        typedef typename tbb::flow::tuple_element<1, OutputTuple>::type T1;
-        typedef typename tbb::flow::tuple_element<2, OutputTuple>::type T2;
-        typedef typename tbb::flow::tuple_element<3, OutputTuple>::type T3;
-        typedef typename tbb::flow::tuple_element<4, OutputTuple>::type T4;
-        typedef typename tbb::flow::tuple_element<5, OutputTuple>::type T5;
-        typedef typename tbb::flow::tuple_element<6, OutputTuple>::type T6;
-        typedef typename tbb::flow::tuple_element<7, OutputTuple>::type T7;
-        typedef typename tbb::flow::tuple_element<8, OutputTuple>::type T8;
-        typedef typename tbb::flow::tuple_element<9, OutputTuple>::type T9;
-    public:
-        typedef typename wrap_key_tuple_elements<10,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type;
-        typedef typename internal::type_to_key_function_body<T0, K> *f0_p;
-        typedef typename internal::type_to_key_function_body<T1, K> *f1_p;
-        typedef typename internal::type_to_key_function_body<T2, K> *f2_p;
-        typedef typename internal::type_to_key_function_body<T3, K> *f3_p;
-        typedef typename internal::type_to_key_function_body<T4, K> *f4_p;
-        typedef typename internal::type_to_key_function_body<T5, K> *f5_p;
-        typedef typename internal::type_to_key_function_body<T6, K> *f6_p;
-        typedef typename internal::type_to_key_function_body<T7, K> *f7_p;
-        typedef typename internal::type_to_key_function_body<T8, K> *f8_p;
-        typedef typename internal::type_to_key_function_body<T9, K> *f9_p;
-        typedef typename tbb::flow::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p, f8_p, f9_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new internal::type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new internal::type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()),
-                    new internal::type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()),
-                    new internal::type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()),
-                    new internal::type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()),
-                    new internal::type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()),
-                    new internal::type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()),
-                    new internal::type_to_key_function_body_leaf<T8, K, key_from_message_body<K,T8> >(key_from_message_body<K,T8>()),
-                    new internal::type_to_key_function_body_leaf<T9, K, key_from_message_body<K,T9> >(key_from_message_body<K,T9>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4,
-            typename Body5, typename Body6, typename Body7, typename Body8, typename Body9>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4,
-                Body5 body5, Body6 body6, Body7 body7, Body8 body8, Body9 body9) : base_type(g, func_initializer_type(
-                    new internal::type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new internal::type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new internal::type_to_key_function_body_leaf<T2, K, Body2>(body2),
-                    new internal::type_to_key_function_body_leaf<T3, K, Body3>(body3),
-                    new internal::type_to_key_function_body_leaf<T4, K, Body4>(body4),
-                    new internal::type_to_key_function_body_leaf<T5, K, Body5>(body5),
-                    new internal::type_to_key_function_body_leaf<T6, K, Body6>(body6),
-                    new internal::type_to_key_function_body_leaf<T7, K, Body7>(body7),
-                    new internal::type_to_key_function_body_leaf<T8, K, Body8>(body8),
-                    new internal::type_to_key_function_body_leaf<T9, K, Body9>(body9)
-                    ) ) {
-            __TBB_STATIC_ASSERT(tbb::flow::tuple_size<OutputTuple>::value == 10, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-#endif
-
-    //! templated function to refer to input ports of the join node
-    template<size_t N, typename JNT>
-    typename tbb::flow::tuple_element<N, typename JNT::input_ports_type>::type &input_port(JNT &jn) {
-        return tbb::flow::get<N>(jn.input_ports());
-    }
-
-}
-#endif // __TBB__flow_graph_join_impl_H
-
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_node_impl.h b/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_node_impl.h
deleted file mode 100644
index eeff72bc..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_node_impl.h
+++ /dev/null
@@ -1,798 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB__flow_graph_node_impl_H
-#define __TBB__flow_graph_node_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#include "_flow_graph_item_buffer_impl.h"
-
-//! @cond INTERNAL
-namespace internal {
-
-    using tbb::internal::aggregated_operation;
-    using tbb::internal::aggregating_functor;
-    using tbb::internal::aggregator;
-
-     template< typename T, typename A >
-     class function_input_queue : public item_buffer<T,A> {
-     public:
-         bool empty() const {
-             return this->buffer_empty();
-         }
-
-         const T& front() const {
-             return this->item_buffer<T, A>::front();
-         }
-
-         bool pop( T& t ) {
-             return this->pop_front( t );
-         }
-
-         void pop() {
-             this->destroy_front();
-         }
-
-         bool push( T& t ) {
-             return this->push_back( t );
-         }
-     };
-
-    //! Input and scheduling for a function node that takes a type Input as input
-    //  The only up-ref is apply_body_impl, which should implement the function
-    //  call and any handling of the result.
-    template< typename Input, typename A, typename ImplType >
-    class function_input_base : public receiver<Input>, tbb::internal::no_assign {
-        enum op_type {reg_pred, rem_pred, app_body, try_fwd, tryput_bypass, app_body_bypass
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-            , add_blt_pred, del_blt_pred,
-            blt_pred_cnt, blt_pred_cpy   // create vector copies of preds and succs
-#endif
-        };
-        typedef function_input_base<Input, A, ImplType> class_type;
-
-    public:
-
-        //! The input type of this receiver
-        typedef Input input_type;
-        typedef typename receiver<input_type>::predecessor_type predecessor_type;
-        typedef predecessor_cache<input_type, null_mutex > predecessor_cache_type;
-        typedef function_input_queue<input_type, A> input_queue_type;
-        typedef typename A::template rebind< input_queue_type >::other queue_allocator_type;
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        typedef typename predecessor_cache_type::built_predecessors_type built_predecessors_type;
-        typedef typename receiver<input_type>::predecessor_list_type predecessor_list_type;
-#endif
-
-        //! Constructor for function_input_base
-        function_input_base( graph &g, size_t max_concurrency, input_queue_type *q = NULL)
-            : my_graph_ptr(&g), my_max_concurrency(max_concurrency), my_concurrency(0),
-              my_queue(q), forwarder_busy(false) {
-            my_predecessors.set_owner(this);
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        //! Copy constructor
-        function_input_base( const function_input_base& src, input_queue_type *q = NULL) :
-            receiver<Input>(), tbb::internal::no_assign(),
-            my_graph_ptr(src.my_graph_ptr), my_max_concurrency(src.my_max_concurrency),
-            my_concurrency(0), my_queue(q), forwarder_busy(false)
-        {
-            my_predecessors.set_owner(this);
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        //! Destructor
-        // The queue is allocated by the constructor for {multi}function_node.
-        // TODO: pass the graph_buffer_policy to the base so it can allocate the queue instead.
-        // This would be an interface-breaking change.
-        virtual ~function_input_base() {
-            if ( my_queue ) delete my_queue;
-        }
-
-        //! Put to the node, returning a task if available
-        task * try_put_task( const input_type &t ) __TBB_override {
-           if ( my_max_concurrency == 0 ) {
-               return create_body_task( t );
-           } else {
-               operation_type op_data(t, tryput_bypass);
-               my_aggregator.execute(&op_data);
-               if(op_data.status == internal::SUCCEEDED) {
-                   return op_data.bypass_t;
-               }
-               return NULL;
-           }
-        }
-
-        //! Adds src to the list of cached predecessors.
-        bool register_predecessor( predecessor_type &src ) __TBB_override {
-            operation_type op_data(reg_pred);
-            op_data.r = &src;
-            my_aggregator.execute(&op_data);
-            return true;
-        }
-
-        //! Removes src from the list of cached predecessors.
-        bool remove_predecessor( predecessor_type &src ) __TBB_override {
-            operation_type op_data(rem_pred);
-            op_data.r = &src;
-            my_aggregator.execute(&op_data);
-            return true;
-        }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        //! Adds to list of predecessors added by make_edge
-        void internal_add_built_predecessor( predecessor_type &src) __TBB_override {
-            operation_type op_data(add_blt_pred);
-            op_data.r = &src;
-            my_aggregator.execute(&op_data);
-        }
-
-        //! removes from to list of predecessors (used by remove_edge)
-        void internal_delete_built_predecessor( predecessor_type &src) __TBB_override {
-            operation_type op_data(del_blt_pred);
-            op_data.r = &src;
-            my_aggregator.execute(&op_data);
-        }
-
-        size_t predecessor_count() __TBB_override {
-            operation_type op_data(blt_pred_cnt);
-            my_aggregator.execute(&op_data);
-            return op_data.cnt_val;
-        }
-
-        void copy_predecessors(predecessor_list_type &v) __TBB_override {
-            operation_type op_data(blt_pred_cpy);
-            op_data.predv = &v;
-            my_aggregator.execute(&op_data);
-        }
-
-        built_predecessors_type &built_predecessors() __TBB_override {
-            return my_predecessors.built_predecessors();
-        }
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-    protected:
-
-        void reset_function_input_base( reset_flags f) {
-            my_concurrency = 0;
-            if(my_queue) {
-                my_queue->reset();
-            }
-            reset_receiver(f);
-            forwarder_busy = false;
-        }
-
-        graph* my_graph_ptr;
-        const size_t my_max_concurrency;
-        size_t my_concurrency;
-        input_queue_type *my_queue;
-        predecessor_cache<input_type, null_mutex > my_predecessors;
-
-        void reset_receiver( reset_flags f) __TBB_override {
-            if( f & rf_clear_edges) my_predecessors.clear();
-            else
-                my_predecessors.reset();
-            __TBB_ASSERT(!(f & rf_clear_edges) || my_predecessors.empty(), "function_input_base reset failed");
-        }
-
-    private:
-
-        friend class apply_body_task_bypass< class_type, input_type >;
-        friend class forward_task_bypass< class_type >;
-
-        class operation_type : public aggregated_operation< operation_type > {
-        public:
-            char type;
-            union {
-                input_type *elem;
-                predecessor_type *r;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-                size_t cnt_val;
-                predecessor_list_type *predv;
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-            };
-            tbb::task *bypass_t;
-            operation_type(const input_type& e, op_type t) :
-                type(char(t)), elem(const_cast<input_type*>(&e)) {}
-            operation_type(op_type t) : type(char(t)), r(NULL) {}
-        };
-
-        bool forwarder_busy;
-        typedef internal::aggregating_functor<class_type, operation_type> handler_type;
-        friend class internal::aggregating_functor<class_type, operation_type>;
-        aggregator< handler_type, operation_type > my_aggregator;
-
-        task* create_and_spawn_task(bool spawn) {
-            task* new_task = NULL;
-            if(my_queue) {
-                if(!my_queue->empty()) {
-                    ++my_concurrency;
-                    new_task = create_body_task(my_queue->front());
-
-                    my_queue->pop();
-                }
-            }
-            else {
-                input_type i;
-                if(my_predecessors.get_item(i)) {
-                    ++my_concurrency;
-                    new_task = create_body_task(i);
-                }
-            }
-            //! Spawns a task that applies a body
-            // task == NULL => g.reset(), which shouldn't occur in concurrent context
-            if(spawn && new_task) {
-                FLOW_SPAWN(*new_task);
-                new_task = SUCCESSFULLY_ENQUEUED;
-            }
-
-            return new_task;
-        }
-        void handle_operations(operation_type *op_list) {
-            operation_type *tmp;
-            while (op_list) {
-                tmp = op_list;
-                op_list = op_list->next;
-                switch (tmp->type) {
-                case reg_pred:
-                    my_predecessors.add(*(tmp->r));
-                    __TBB_store_with_release(tmp->status, SUCCEEDED);
-                    if (!forwarder_busy) {
-                        forwarder_busy = true;
-                        spawn_forward_task();
-                    }
-                    break;
-                case rem_pred:
-                    my_predecessors.remove(*(tmp->r));
-                    __TBB_store_with_release(tmp->status, SUCCEEDED);
-                    break;
-                case app_body:
-                    __TBB_ASSERT(my_max_concurrency != 0, NULL);
-                    --my_concurrency;
-                    __TBB_store_with_release(tmp->status, SUCCEEDED);
-                    if (my_concurrency<my_max_concurrency) {
-                        create_and_spawn_task(/*spawn=*/true);
-                    }
-                    break;
-                case app_body_bypass: {
-                        tmp->bypass_t = NULL;
-                        __TBB_ASSERT(my_max_concurrency != 0, NULL);
-                        --my_concurrency;
-                        if(my_concurrency<my_max_concurrency)
-                            tmp->bypass_t = create_and_spawn_task(/*spawn=*/false);
-
-                        __TBB_store_with_release(tmp->status, SUCCEEDED);
-                    }
-                    break;
-                case tryput_bypass: internal_try_put_task(tmp);  break;
-                case try_fwd: internal_forward(tmp);  break;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-                case add_blt_pred: {
-                         my_predecessors.internal_add_built_predecessor(*(tmp->r));
-                        __TBB_store_with_release(tmp->status, SUCCEEDED);
-                    }
-                    break;
-                case del_blt_pred:
-                    my_predecessors.internal_delete_built_predecessor(*(tmp->r));
-                    __TBB_store_with_release(tmp->status, SUCCEEDED);
-                    break;
-                case blt_pred_cnt:
-                    tmp->cnt_val = my_predecessors.predecessor_count();
-                    __TBB_store_with_release(tmp->status, SUCCEEDED);
-                    break;
-                case blt_pred_cpy:
-                    my_predecessors.copy_predecessors( *(tmp->predv) );
-                    __TBB_store_with_release(tmp->status, SUCCEEDED);
-                    break;
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-                }
-            }
-        }
-
-        //! Put to the node, but return the task instead of enqueueing it
-        void internal_try_put_task(operation_type *op) {
-            __TBB_ASSERT(my_max_concurrency != 0, NULL);
-            if (my_concurrency < my_max_concurrency) {
-               ++my_concurrency;
-               task * new_task = create_body_task(*(op->elem));
-               op->bypass_t = new_task;
-               __TBB_store_with_release(op->status, SUCCEEDED);
-           } else if ( my_queue && my_queue->push(*(op->elem)) ) {
-               op->bypass_t = SUCCESSFULLY_ENQUEUED;
-               __TBB_store_with_release(op->status, SUCCEEDED);
-           } else {
-               op->bypass_t = NULL;
-               __TBB_store_with_release(op->status, FAILED);
-           }
-        }
-
-        //! Tries to spawn bodies if available and if concurrency allows
-        void internal_forward(operation_type *op) {
-            op->bypass_t = NULL;
-            if (my_concurrency < my_max_concurrency || !my_max_concurrency)
-                op->bypass_t = create_and_spawn_task(/*spawn=*/false);
-            if(op->bypass_t)
-                __TBB_store_with_release(op->status, SUCCEEDED);
-            else {
-                forwarder_busy = false;
-                __TBB_store_with_release(op->status, FAILED);
-            }
-        }
-
-        //! Applies the body to the provided input
-        //  then decides if more work is available
-        task * apply_body_bypass( input_type &i ) {
-            task * new_task = static_cast<ImplType *>(this)->apply_body_impl_bypass(i);
-            if ( my_max_concurrency != 0 ) {
-                operation_type op_data(app_body_bypass);  // tries to pop an item or get_item, enqueues another apply_body
-                my_aggregator.execute(&op_data);
-                tbb::task *ttask = op_data.bypass_t;
-                new_task = combine_tasks(new_task, ttask);
-            }
-            return new_task;
-        }
-
-        //! allocates a task to apply a body
-        inline task * create_body_task( const input_type &input ) {
-
-            return (my_graph_ptr->is_active()) ?
-                new(task::allocate_additional_child_of(*(my_graph_ptr->root_task())))
-                    apply_body_task_bypass < class_type, input_type >(*this, input) :
-                NULL;
-        }
-
-       //! This is executed by an enqueued task, the "forwarder"
-       task *forward_task() {
-           operation_type op_data(try_fwd);
-           task *rval = NULL;
-           do {
-               op_data.status = WAIT;
-               my_aggregator.execute(&op_data);
-               if(op_data.status == SUCCEEDED) {
-                   tbb::task *ttask = op_data.bypass_t;
-                   rval = combine_tasks(rval, ttask);
-               }
-           } while (op_data.status == SUCCEEDED);
-           return rval;
-       }
-
-       inline task *create_forward_task() {
-           return (my_graph_ptr->is_active()) ?
-               new(task::allocate_additional_child_of(*(my_graph_ptr->root_task()))) forward_task_bypass< class_type >(*this) :
-               NULL;
-       }
-
-       //! Spawns a task that calls forward()
-       inline void spawn_forward_task() {
-           task* tp = create_forward_task();
-           if(tp) {
-               FLOW_SPAWN(*tp);
-           }
-       }
-    };  // function_input_base
-
-    //! Implements methods for a function node that takes a type Input as input and sends
-    //  a type Output to its successors.
-    template< typename Input, typename Output, typename A>
-    class function_input : public function_input_base<Input, A, function_input<Input,Output,A> > {
-    public:
-        typedef Input input_type;
-        typedef Output output_type;
-        typedef function_body<input_type, output_type> function_body_type;
-        typedef function_input<Input,Output,A> my_class;
-        typedef function_input_base<Input, A, my_class> base_type;
-        typedef function_input_queue<input_type, A> input_queue_type;
-
-        // constructor
-        template<typename Body>
-        function_input( graph &g, size_t max_concurrency, Body& body, input_queue_type *q = NULL ) :
-            base_type(g, max_concurrency, q),
-            my_body( new internal::function_body_leaf< input_type, output_type, Body>(body) ),
-            my_init_body( new internal::function_body_leaf< input_type, output_type, Body>(body) ) {
-        }
-
-        //! Copy constructor
-        function_input( const function_input& src, input_queue_type *q = NULL ) :
-                base_type(src, q),
-                my_body( src.my_init_body->clone() ),
-                my_init_body(src.my_init_body->clone() ) {
-        }
-
-        ~function_input() {
-            delete my_body;
-            delete my_init_body;
-        }
-
-        template< typename Body >
-        Body copy_function_object() {
-            function_body_type &body_ref = *this->my_body;
-            return dynamic_cast< internal::function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body();
-        }
-
-        task * apply_body_impl_bypass( const input_type &i) {
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-            // There is an extra copied needed to capture the
-            // body execution without the try_put
-            tbb::internal::fgt_begin_body( my_body );
-            output_type v = (*my_body)(i);
-            tbb::internal::fgt_end_body( my_body );
-            task * new_task = successors().try_put_task( v );
-#else
-            task * new_task = successors().try_put_task( (*my_body)(i) );
-#endif
-            return new_task;
-        }
-
-    protected:
-
-        void reset_function_input(reset_flags f) {
-            base_type::reset_function_input_base(f);
-            if(f & rf_reset_bodies) {
-                function_body_type *tmp = my_init_body->clone();
-                delete my_body;
-                my_body = tmp;
-            }
-        }
-
-        function_body_type *my_body;
-        function_body_type *my_init_body;
-        virtual broadcast_cache<output_type > &successors() = 0;
-
-    };  // function_input
-
-
-    // helper templates to clear the successor edges of the output ports of an multifunction_node
-    template<int N> struct clear_element {
-        template<typename P> static void clear_this(P &p) {
-            (void)tbb::flow::get<N-1>(p).successors().clear();
-            clear_element<N-1>::clear_this(p);
-        }
-        template<typename P> static bool this_empty(P &p) {
-            if(tbb::flow::get<N-1>(p).successors().empty())
-                return clear_element<N-1>::this_empty(p);
-            return false;
-        }
-    };
-
-    template<> struct clear_element<1> {
-        template<typename P> static void clear_this(P &p) {
-            (void)tbb::flow::get<0>(p).successors().clear();
-        }
-        template<typename P> static bool this_empty(P &p) {
-            return tbb::flow::get<0>(p).successors().empty();
-        }
-    };
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    // helper templates to extract the output ports of an multifunction_node from graph
-    template<int N> struct extract_element {
-        template<typename P> static void extract_this(P &p) {
-            (void)tbb::flow::get<N-1>(p).successors().built_successors().sender_extract(tbb::flow::get<N-1>(p));
-            extract_element<N-1>::extract_this(p);
-        }
-    };
-
-    template<> struct extract_element<1> {
-        template<typename P> static void extract_this(P &p) {
-            (void)tbb::flow::get<0>(p).successors().built_successors().sender_extract(tbb::flow::get<0>(p));
-        }
-    };
-#endif
-
-    //! Implements methods for a function node that takes a type Input as input
-    //  and has a tuple of output ports specified.
-    template< typename Input, typename OutputPortSet, typename A>
-    class multifunction_input : public function_input_base<Input, A, multifunction_input<Input,OutputPortSet,A> > {
-    public:
-        static const int N = tbb::flow::tuple_size<OutputPortSet>::value;
-        typedef Input input_type;
-        typedef OutputPortSet output_ports_type;
-        typedef multifunction_body<input_type, output_ports_type> multifunction_body_type;
-        typedef multifunction_input<Input,OutputPortSet,A> my_class;
-        typedef function_input_base<Input, A, my_class> base_type;
-        typedef function_input_queue<input_type, A> input_queue_type;
-
-        // constructor
-        template<typename Body>
-        multifunction_input(
-                graph &g,
-                size_t max_concurrency,
-                Body& body,
-                input_queue_type *q = NULL ) :
-            base_type(g, max_concurrency, q),
-            my_body( new internal::multifunction_body_leaf<input_type, output_ports_type, Body>(body) ),
-            my_init_body( new internal::multifunction_body_leaf<input_type, output_ports_type, Body>(body) ) {
-        }
-
-        //! Copy constructor
-        multifunction_input( const multifunction_input& src, input_queue_type *q = NULL ) :
-                base_type(src, q),
-                my_body( src.my_init_body->clone() ),
-                my_init_body(src.my_init_body->clone() ) {
-        }
-
-        ~multifunction_input() {
-            delete my_body;
-            delete my_init_body;
-        }
-
-        template< typename Body >
-        Body copy_function_object() {
-            multifunction_body_type &body_ref = *this->my_body;
-            return *static_cast<Body*>(dynamic_cast< internal::multifunction_body_leaf<input_type, output_ports_type, Body> & >(body_ref).get_body_ptr());
-        }
-
-        // for multifunction nodes we do not have a single successor as such.  So we just tell
-        // the task we were successful.
-        task * apply_body_impl_bypass( const input_type &i) {
-            tbb::internal::fgt_begin_body( my_body );
-            (*my_body)(i, my_output_ports);
-            tbb::internal::fgt_end_body( my_body );
-            task * new_task = SUCCESSFULLY_ENQUEUED;
-            return new_task;
-        }
-
-        output_ports_type &output_ports(){ return my_output_ports; }
-
-    protected:
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        void extract() {
-            extract_element<N>::extract_this(my_output_ports);
-        }
-#endif
-
-        void reset(reset_flags f) {
-            base_type::reset_function_input_base(f);
-            if(f & rf_clear_edges)clear_element<N>::clear_this(my_output_ports);
-            if(f & rf_reset_bodies) {
-                multifunction_body_type *tmp = my_init_body->clone();
-                delete my_body;
-                my_body = tmp;
-            }
-            __TBB_ASSERT(!(f & rf_clear_edges) || clear_element<N>::this_empty(my_output_ports), "multifunction_node reset failed");
-        }
-
-        multifunction_body_type *my_body;
-        multifunction_body_type *my_init_body;
-        output_ports_type my_output_ports;
-
-    };  // multifunction_input
-
-    // template to refer to an output port of a multifunction_node
-    template<size_t N, typename MOP>
-    typename tbb::flow::tuple_element<N, typename MOP::output_ports_type>::type &output_port(MOP &op) {
-        return tbb::flow::get<N>(op.output_ports());
-    }
-
-// helper structs for split_node
-    template<int N>
-    struct emit_element {
-        template<typename T, typename P>
-        static void emit_this(const T &t, P &p) {
-            (void)tbb::flow::get<N-1>(p).try_put(tbb::flow::get<N-1>(t));
-            emit_element<N-1>::emit_this(t,p);
-        }
-    };
-
-    template<>
-    struct emit_element<1> {
-        template<typename T, typename P>
-        static void emit_this(const T &t, P &p) {
-            (void)tbb::flow::get<0>(p).try_put(tbb::flow::get<0>(t));
-        }
-    };
-
-    //! Implements methods for an executable node that takes continue_msg as input
-    template< typename Output >
-    class continue_input : public continue_receiver {
-    public:
-
-        //! The input type of this receiver
-        typedef continue_msg input_type;
-
-        //! The output type of this receiver
-        typedef Output output_type;
-        typedef function_body<input_type, output_type> function_body_type;
-
-        template< typename Body >
-        continue_input( graph &g, Body& body )
-            : my_graph_ptr(&g),
-             my_body( new internal::function_body_leaf< input_type, output_type, Body>(body) ),
-             my_init_body( new internal::function_body_leaf< input_type, output_type, Body>(body) ) { }
-
-        template< typename Body >
-        continue_input( graph &g, int number_of_predecessors, Body& body )
-            : continue_receiver( number_of_predecessors ), my_graph_ptr(&g),
-             my_body( new internal::function_body_leaf< input_type, output_type, Body>(body) ),
-             my_init_body( new internal::function_body_leaf< input_type, output_type, Body>(body) )
-        { }
-
-        continue_input( const continue_input& src ) : continue_receiver(src),
-            my_graph_ptr(src.my_graph_ptr),
-            my_body( src.my_init_body->clone() ),
-            my_init_body( src.my_init_body->clone() ) {}
-
-        ~continue_input() {
-            delete my_body;
-            delete my_init_body;
-        }
-
-        template< typename Body >
-        Body copy_function_object() {
-            function_body_type &body_ref = *my_body;
-            return dynamic_cast< internal::function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body();
-        }
-
-        void reset_receiver( reset_flags f) __TBB_override {
-            continue_receiver::reset_receiver(f);
-            if(f & rf_reset_bodies) {
-                function_body_type *tmp = my_init_body->clone();
-                delete my_body;
-                my_body = tmp;
-            }
-        }
-
-    protected:
-
-        graph* my_graph_ptr;
-        function_body_type *my_body;
-        function_body_type *my_init_body;
-
-        virtual broadcast_cache<output_type > &successors() = 0;
-
-        friend class apply_body_task_bypass< continue_input< Output >, continue_msg >;
-
-        //! Applies the body to the provided input
-        task *apply_body_bypass( input_type ) {
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-            // There is an extra copied needed to capture the
-            // body execution without the try_put
-            tbb::internal::fgt_begin_body( my_body );
-            output_type v = (*my_body)( continue_msg() );
-            tbb::internal::fgt_end_body( my_body );
-            return successors().try_put_task( v );
-#else
-            return successors().try_put_task( (*my_body)( continue_msg() ) );
-#endif
-        }
-
-        //! Spawns a task that applies the body
-        task *execute( ) __TBB_override {
-            return (my_graph_ptr->is_active()) ?
-                new ( task::allocate_additional_child_of( *(my_graph_ptr->root_task()) ) )
-                    apply_body_task_bypass< continue_input< Output >, continue_msg >( *this, continue_msg() ) :
-                NULL;
-        }
-
-    };  // continue_input
-
-    //! Implements methods for both executable and function nodes that puts Output to its successors
-    template< typename Output >
-    class function_output : public sender<Output> {
-    public:
-
-        template<int N> friend struct clear_element;
-        typedef Output output_type;
-        typedef typename sender<output_type>::successor_type successor_type;
-        typedef broadcast_cache<output_type> broadcast_cache_type;
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        typedef typename sender<output_type>::built_successors_type built_successors_type;
-        typedef typename sender<output_type>::successor_list_type successor_list_type;
-#endif
-
-        function_output() { my_successors.set_owner(this); }
-        function_output(const function_output & /*other*/) : sender<output_type>() {
-            my_successors.set_owner(this);
-        }
-
-        //! Adds a new successor to this node
-        bool register_successor( successor_type &r ) __TBB_override {
-            successors().register_successor( r );
-            return true;
-        }
-
-        //! Removes a successor from this node
-        bool remove_successor( successor_type &r ) __TBB_override {
-            successors().remove_successor( r );
-            return true;
-        }
-
-#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
-        built_successors_type &built_successors() __TBB_override { return successors().built_successors(); }
-
-
-        void internal_add_built_successor( successor_type &r) __TBB_override {
-            successors().internal_add_built_successor( r );
-        }
-
-        void internal_delete_built_successor( successor_type &r) __TBB_override {
-            successors().internal_delete_built_successor( r );
-        }
-
-        size_t successor_count() __TBB_override {
-            return successors().successor_count();
-        }
-
-        void  copy_successors( successor_list_type &v) __TBB_override {
-            successors().copy_successors(v);
-        }
-#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
-
-        // for multifunction_node.  The function_body that implements
-        // the node will have an input and an output tuple of ports.  To put
-        // an item to a successor, the body should
-        //
-        //    get<I>(output_ports).try_put(output_value);
-        //
-        // if task pointer is returned will always spawn and return true, else
-        // return value will be bool returned from successors.try_put.
-        task *try_put_task(const output_type &i) { // not a virtual method in this class
-            return my_successors.try_put_task(i);
-        }
-
-        broadcast_cache_type &successors() { return my_successors; }
-    protected:
-        broadcast_cache_type my_successors;
-
-    };  // function_output
-
-    template< typename Output >
-    class multifunction_output : public function_output<Output> {
-    public:
-        typedef Output output_type;
-        typedef function_output<output_type> base_type;
-        using base_type::my_successors;
-
-        multifunction_output() : base_type() {my_successors.set_owner(this);}
-        multifunction_output( const multifunction_output &/*other*/) : base_type() { my_successors.set_owner(this); }
-
-        bool try_put(const output_type &i) {
-            task *res = my_successors.try_put_task(i);
-            if(!res) return false;
-            if(res != SUCCESSFULLY_ENQUEUED) FLOW_SPAWN(*res);
-            return true;
-        }
-    };  // multifunction_output
-
-//composite_node
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE && __TBB_FLOW_GRAPH_CPP11_FEATURES
-    template<typename CompositeType>
-    void add_nodes_impl(CompositeType*, bool) {}
-
-    template< typename CompositeType, typename NodeType1, typename... NodeTypes >
-    void add_nodes_impl(CompositeType *c_node, bool visible, const NodeType1& n1, const NodeTypes&... n) {
-        void *addr = const_cast<NodeType1 *>(&n1);
-
-        if(visible)
-            tbb::internal::itt_relation_add( tbb::internal::ITT_DOMAIN_FLOW, c_node, tbb::internal::FLOW_NODE, tbb::internal::__itt_relation_is_parent_of, addr, tbb::internal::FLOW_NODE );
-        else
-            tbb::internal::itt_relation_add( tbb::internal::ITT_DOMAIN_FLOW, addr, tbb::internal::FLOW_NODE, tbb::internal::__itt_relation_is_child_of, c_node, tbb::internal::FLOW_NODE );
-        add_nodes_impl(c_node, visible, n...);
-    }
-#endif
-
-}  // internal
-
-#endif // __TBB__flow_graph_node_impl_H
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_streaming_node.h b/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_streaming_node.h
deleted file mode 100644
index 493f76e8..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_streaming_node.h
+++ /dev/null
@@ -1,745 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_flow_graph_streaming_H
-#define __TBB_flow_graph_streaming_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#if __TBB_PREVIEW_STREAMING_NODE
-
-// Included in namespace tbb::flow::interfaceX (in flow_graph.h)
-
-namespace internal {
-
-template <int N1, int N2>
-struct port_ref_impl {
-    // "+1" since the port_ref range is a closed interval (includes its endpoints).
-    static const int size = N2 - N1 + 1;
-};
-
-} // internal
-
-// The purpose of the port_ref_impl is the pretty syntax: the deduction of a compile-time constant is processed from the return type.
-// So it is possible to use this helper without parentheses, e.g. "port_ref<0>".
-template <int N1, int N2 = N1>
-internal::port_ref_impl<N1,N2> port_ref() {
-    return internal::port_ref_impl<N1,N2>();
-};
-
-namespace internal {
-
-template <typename T>
-struct num_arguments {
-    static const int value = 1;
-};
-
-template <int N1, int N2>
-struct num_arguments<port_ref_impl<N1,N2>(*)()> {
-    static const int value = port_ref_impl<N1,N2>::size;
-};
-
-template <int N1, int N2>
-struct num_arguments<port_ref_impl<N1,N2>> {
-    static const int value = port_ref_impl<N1,N2>::size;
-};
-
-template <typename... Args>
-void ignore_return_values( Args&&... ) {}
-
-template <typename T>
-T or_return_values( T&& t ) { return t; }
-template <typename T, typename... Rest>
-T or_return_values( T&& t, Rest&&... rest ) {
-    return t | or_return_values( std::forward<Rest>(rest)... );
-}
-
-template<typename JP>
-struct key_from_policy {
-    typedef size_t type;
-    typedef std::false_type is_key_matching;
-};
-
-template<typename Key>
-struct key_from_policy< key_matching<Key> > {
-    typedef Key type;
-    typedef std::true_type is_key_matching;
-};
-
-template<typename Key>
-struct key_from_policy< key_matching<Key&> > {
-    typedef const Key &type;
-    typedef std::true_type is_key_matching;
-};
-
-template<typename Device, typename Key>
-class streaming_device_with_key {
-    Device my_device;
-    typename std::decay<Key>::type my_key;
-public:
-    // TODO: investigate why default constructor is required
-    streaming_device_with_key() {}
-    streaming_device_with_key( const Device& d, Key k ) : my_device( d ), my_key( k ) {}
-    Key key() const { return my_key; }
-    const Device& device() const { return my_device; }
-};
-
-// --------- Kernel argument helpers --------- //
-template <typename T>
-struct is_port_ref_impl {
-    typedef std::false_type type;
-};
-
-template <int N1, int N2>
-struct is_port_ref_impl< port_ref_impl<N1, N2> > {
-    typedef std::true_type type;
-};
-
-template <int N1, int N2>
-struct is_port_ref_impl< port_ref_impl<N1, N2>( * )()  > {
-    typedef std::true_type type;
-};
-
-template <typename T>
-struct is_port_ref {
-    typedef typename is_port_ref_impl< typename tbb::internal::strip<T>::type >::type type;
-};
-
-template <typename ...Args1>
-struct convert_and_call_impl;
-
-template <typename A1, typename ...Args1>
-struct convert_and_call_impl<A1, Args1...> {
-    static const size_t my_delta = 1; // Index 0 contains device
-
-    template <typename F, typename Tuple, typename ...Args2>
-    static void doit(F& f, Tuple& t, A1& a1, Args1&... args1, Args2&... args2) {
-        convert_and_call_impl<A1, Args1...>::doit_impl(typename is_port_ref<A1>::type(), f, t, a1, args1..., args2...);
-    }
-    template <typename F, typename Tuple, typename ...Args2>
-    static void doit_impl(std::false_type, F& f, Tuple& t, A1& a1, Args1&... args1, Args2&... args2) {
-        convert_and_call_impl<Args1...>::doit(f, t, args1..., args2..., a1);
-    }
-    template <typename F, typename Tuple, int N1, int N2, typename ...Args2>
-    static void doit_impl(std::true_type x, F& f, Tuple& t, port_ref_impl<N1, N2>, Args1&... args1, Args2&... args2) {
-        convert_and_call_impl<port_ref_impl<N1 + 1,N2>, Args1...>::doit_impl(x, f, t, port_ref<N1 + 1, N2>(), args1...,
-            args2..., std::get<N1 + my_delta>(t));
-    }
-    template <typename F, typename Tuple, int N, typename ...Args2>
-    static void doit_impl(std::true_type, F& f, Tuple& t, port_ref_impl<N, N>, Args1&... args1, Args2&... args2) {
-        convert_and_call_impl<Args1...>::doit(f, t, args1..., args2..., std::get<N + my_delta>(t));
-    }
-
-    template <typename F, typename Tuple, int N1, int N2, typename ...Args2>
-    static void doit_impl(std::true_type x, F& f, Tuple& t, port_ref_impl<N1, N2>(* fn)(), Args1&... args1, Args2&... args2) {
-        doit_impl(x, f, t, fn(), args1..., args2...);
-    }
-    template <typename F, typename Tuple, int N, typename ...Args2>
-    static void doit_impl(std::true_type x, F& f, Tuple& t, port_ref_impl<N, N>(* fn)(), Args1&... args1, Args2&... args2) {
-        doit_impl(x, f, t, fn(), args1..., args2...);
-    }
-};
-
-template <>
-struct convert_and_call_impl<> {
-    template <typename F, typename Tuple, typename ...Args2>
-    static void doit(F& f, Tuple&, Args2&... args2) {
-        f(args2...);
-    }
-};
-// ------------------------------------------- //
-
-template<typename JP, typename StreamFactory, typename... Ports>
-struct streaming_node_traits {
-    // Do not use 'using' instead of 'struct' because Microsoft Visual C++ 12.0 fails to compile.
-    template <typename T>
-    struct async_msg_type {
-        typedef typename StreamFactory::template async_msg_type<T> type;
-    };
-
-    typedef tuple< typename async_msg_type<Ports>::type... > input_tuple;
-    typedef input_tuple output_tuple;
-    typedef tuple< streaming_device_with_key< typename StreamFactory::device_type, typename key_from_policy<JP>::type >,
-        typename async_msg_type<Ports>::type... > kernel_input_tuple;
-
-    // indexer_node parameters pack expansion workaround for VS2013 for streaming_node
-    typedef indexer_node< typename async_msg_type<Ports>::type... > indexer_node_type;
-};
-
-// Default empty implementation
-template<typename StreamFactory, typename KernelInputTuple, typename = void>
-class kernel_executor_helper {
-    typedef typename StreamFactory::device_type device_type;
-    typedef typename StreamFactory::kernel_type kernel_type;
-    typedef KernelInputTuple kernel_input_tuple;
-protected:
-    template <typename ...Args>
-    void enqueue_kernel_impl( kernel_input_tuple&, StreamFactory& factory, device_type device, const kernel_type& kernel, Args&... args ) const {
-        factory.send_kernel( device, kernel, args... );
-    }
-};
-
-// Implementation for StreamFactory supporting range
-template<typename StreamFactory, typename KernelInputTuple>
-class kernel_executor_helper<StreamFactory, KernelInputTuple, typename tbb::internal::void_t< typename StreamFactory::range_type >::type > {
-    typedef typename StreamFactory::device_type device_type;
-    typedef typename StreamFactory::kernel_type kernel_type;
-    typedef KernelInputTuple kernel_input_tuple;
-
-    typedef typename StreamFactory::range_type range_type;
-
-    // Container for randge. It can contain either port references or real range.
-    struct range_wrapper {
-        virtual range_type get_range( const kernel_input_tuple &ip ) const = 0;
-        virtual range_wrapper *clone() const = 0;
-        virtual ~range_wrapper() {}
-    };
-
-    struct range_value : public range_wrapper {
-        range_value( const range_type& value ) : my_value(value) {}
-
-        range_value( range_type&& value ) : my_value(std::move(value)) {}
-
-        range_type get_range( const kernel_input_tuple & ) const __TBB_override {
-            return my_value;
-        }
-
-        range_wrapper *clone() const __TBB_override {
-            return new range_value(my_value);
-        }
-    private:
-        range_type my_value;
-    };
-
-    template <int N>
-    struct range_mapper : public range_wrapper {
-        range_mapper() {}
-
-        range_type get_range( const kernel_input_tuple &ip ) const __TBB_override {
-            // "+1" since get<0>(ip) is StreamFactory::device.
-            return get<N + 1>(ip).data(false);
-        }
-
-        range_wrapper *clone() const __TBB_override {
-            return new range_mapper<N>;
-        }
-    };
-
-protected:
-    template <typename ...Args>
-    void enqueue_kernel_impl( kernel_input_tuple& ip, StreamFactory& factory, device_type device, const kernel_type& kernel, Args&... args ) const {
-        __TBB_ASSERT(my_range_wrapper, "Range is not set. Call set_range() before running streaming_node.");
-        factory.send_kernel( device, kernel, my_range_wrapper->get_range(ip), args... );
-    }
-
-public:
-    kernel_executor_helper() : my_range_wrapper(NULL) {}
-
-    kernel_executor_helper(const kernel_executor_helper& executor) : my_range_wrapper(executor.my_range_wrapper ? executor.my_range_wrapper->clone() : NULL) {}
-
-    kernel_executor_helper(kernel_executor_helper&& executor) : my_range_wrapper(executor.my_range_wrapper) {
-        // Set moving holder mappers to NULL to prevent double deallocation
-        executor.my_range_wrapper = NULL;
-    }
-
-    ~kernel_executor_helper() {
-        if (my_range_wrapper) delete my_range_wrapper;
-    }
-
-    void set_range(const range_type& work_size) {
-        my_range_wrapper = new range_value(work_size);
-    }
-
-    void set_range(range_type&& work_size) {
-        my_range_wrapper = new range_value(std::move(work_size));
-    }
-
-    template <int N>
-    void set_range(port_ref_impl<N, N>) {
-        my_range_wrapper = new range_mapper<N>;
-    }
-
-    template <int N>
-    void set_range(port_ref_impl<N, N>(*)()) {
-        my_range_wrapper = new range_mapper<N>;
-    }
-
-private:
-    range_wrapper* my_range_wrapper;
-};
-
-} // internal
-
-/*
-/---------------------------------------- streaming_node ------------------------------------\
-|                                                                                            |
-|   /--------------\   /----------------------\   /-----------\   /----------------------\   |
-|   |              |   |    (device_with_key) O---O           |   |                      |   |
-|   |              |   |                      |   |           |   |                      |   |
-O---O indexer_node O---O device_selector_node O---O join_node O---O      kernel_node     O---O
-|   |              |   | (multifunction_node) |   |           |   | (multifunction_node) |   |
-O---O              |   |                      O---O           |   |                      O---O
-|   \--------------/   \----------------------/   \-----------/   \----------------------/   |
-|                                                                                            |
-\--------------------------------------------------------------------------------------------/
-*/
-template<typename... Args>
-class streaming_node;
-
-template<typename... Ports, typename JP, typename StreamFactory>
-class streaming_node< tuple<Ports...>, JP, StreamFactory >
-    : public composite_node < typename internal::streaming_node_traits<JP, StreamFactory, Ports...>::input_tuple,
-                              typename internal::streaming_node_traits<JP, StreamFactory, Ports...>::output_tuple >
-    , public internal::kernel_executor_helper< StreamFactory, typename internal::streaming_node_traits<JP, StreamFactory, Ports...>::kernel_input_tuple >
-{
-    typedef typename internal::streaming_node_traits<JP, StreamFactory, Ports...>::input_tuple input_tuple;
-    typedef typename internal::streaming_node_traits<JP, StreamFactory, Ports...>::output_tuple output_tuple;
-    typedef typename internal::key_from_policy<JP>::type key_type;
-protected:
-    typedef typename StreamFactory::device_type device_type;
-    typedef typename StreamFactory::kernel_type kernel_type;
-private:
-    typedef internal::streaming_device_with_key<device_type, key_type> device_with_key_type;
-    typedef composite_node<input_tuple, output_tuple> base_type;
-    static const size_t NUM_INPUTS = tuple_size<input_tuple>::value;
-    static const size_t NUM_OUTPUTS = tuple_size<output_tuple>::value;
-
-    typedef typename internal::make_sequence<NUM_INPUTS>::type input_sequence;
-    typedef typename internal::make_sequence<NUM_OUTPUTS>::type output_sequence;
-
-    typedef typename internal::streaming_node_traits<JP, StreamFactory, Ports...>::indexer_node_type indexer_node_type;
-    typedef typename indexer_node_type::output_type indexer_node_output_type;
-    typedef typename internal::streaming_node_traits<JP, StreamFactory, Ports...>::kernel_input_tuple kernel_input_tuple;
-    typedef multifunction_node<indexer_node_output_type, kernel_input_tuple> device_selector_node;
-    typedef multifunction_node<kernel_input_tuple, output_tuple> kernel_multifunction_node;
-
-    template <int... S>
-    typename base_type::input_ports_type get_input_ports( internal::sequence<S...> ) {
-        return std::tie( internal::input_port<S>( my_indexer_node )... );
-    }
-
-    template <int... S>
-    typename base_type::output_ports_type get_output_ports( internal::sequence<S...> ) {
-        return std::tie( internal::output_port<S>( my_kernel_node )... );
-    }
-
-    typename base_type::input_ports_type get_input_ports() {
-        return get_input_ports( input_sequence() );
-    }
-
-    typename base_type::output_ports_type get_output_ports() {
-        return get_output_ports( output_sequence() );
-    }
-
-    template <int N>
-    int make_Nth_edge() {
-        make_edge( internal::output_port<N>( my_device_selector_node ), internal::input_port<N>( my_join_node ) );
-        return 0;
-    }
-
-    template <int... S>
-    void make_edges( internal::sequence<S...> ) {
-        make_edge( my_indexer_node, my_device_selector_node );
-        make_edge( my_device_selector_node, my_join_node );
-        internal::ignore_return_values( make_Nth_edge<S + 1>()... );
-        make_edge( my_join_node, my_kernel_node );
-    }
-
-    void make_edges() {
-        make_edges( input_sequence() );
-    }
-
-    class device_selector_base {
-    public:
-        virtual void operator()( const indexer_node_output_type &v, typename device_selector_node::output_ports_type &op ) = 0;
-        virtual device_selector_base *clone( streaming_node &n ) const = 0;
-        virtual ~device_selector_base() {}
-    };
-
-    template <typename UserFunctor>
-    class device_selector : public device_selector_base, tbb::internal::no_assign {
-    public:
-        device_selector( UserFunctor uf, streaming_node &n, StreamFactory &f )
-            : my_dispatch_funcs( create_dispatch_funcs( input_sequence() ) )
-            , my_user_functor( uf ), my_node(n), my_factory( f )
-        {
-            my_port_epoches.fill( 0 );
-        }
-
-        void operator()( const indexer_node_output_type &v, typename device_selector_node::output_ports_type &op ) __TBB_override {
-            (this->*my_dispatch_funcs[ v.tag() ])( my_port_epoches[ v.tag() ], v, op );
-            __TBB_ASSERT( (tbb::internal::is_same_type<typename internal::key_from_policy<JP>::is_key_matching, std::false_type>::value)
-                || my_port_epoches[v.tag()] == 0, "Epoch is changed when key matching is requested" );
-        }
-
-        device_selector_base *clone( streaming_node &n ) const __TBB_override {
-            return new device_selector( my_user_functor, n, my_factory );
-        }
-    private:
-        typedef void(device_selector<UserFunctor>::*send_and_put_fn_type)(size_t &, const indexer_node_output_type &, typename device_selector_node::output_ports_type &);
-        typedef std::array < send_and_put_fn_type, NUM_INPUTS > dispatch_funcs_type;
-
-        template <int... S>
-        static dispatch_funcs_type create_dispatch_funcs( internal::sequence<S...> ) {
-            dispatch_funcs_type dispatch = { { &device_selector<UserFunctor>::send_and_put_impl<S>... } };
-            return dispatch;
-        }
-
-        template <typename T>
-        key_type get_key( std::false_type, const T &, size_t &epoch ) {
-            __TBB_STATIC_ASSERT( (tbb::internal::is_same_type<key_type, size_t>::value), "" );
-            return epoch++;
-        }
-
-        template <typename T>
-        key_type get_key( std::true_type, const T &t, size_t &/*epoch*/ ) {
-            using tbb::flow::key_from_message;
-            return key_from_message<key_type>( t );
-        }
-
-        template <int N>
-        void send_and_put_impl( size_t &epoch, const indexer_node_output_type &v, typename device_selector_node::output_ports_type &op ) {
-            typedef typename tuple_element<N + 1, typename device_selector_node::output_ports_type>::type::output_type elem_type;
-            elem_type e = internal::cast_to<elem_type>( v );
-            device_type device = get_device( get_key( typename internal::key_from_policy<JP>::is_key_matching(), e, epoch ), get<0>( op ) );
-            my_factory.send_data( device, e );
-            get<N + 1>( op ).try_put( e );
-        }
-
-        template< typename DevicePort >
-        device_type get_device( key_type key, DevicePort& dp ) {
-            typename std::unordered_map<typename std::decay<key_type>::type, epoch_desc>::iterator it = my_devices.find( key );
-            if ( it == my_devices.end() ) {
-                device_type d = my_user_functor( my_factory );
-                std::tie( it, std::ignore ) = my_devices.insert( std::make_pair( key, d ) );
-                bool res = dp.try_put( device_with_key_type( d, key ) );
-                __TBB_ASSERT_EX( res, NULL );
-                my_node.notify_new_device( d );
-            }
-            epoch_desc &e = it->second;
-            device_type d = e.my_device;
-            if ( ++e.my_request_number == NUM_INPUTS ) my_devices.erase( it );
-            return d;
-        }
-
-        struct epoch_desc {
-            epoch_desc(device_type d ) : my_device( d ), my_request_number( 0 ) {}
-            device_type my_device;
-            size_t my_request_number;
-        };
-
-        std::unordered_map<typename std::decay<key_type>::type, epoch_desc> my_devices;
-        std::array<size_t, NUM_INPUTS> my_port_epoches;
-        dispatch_funcs_type my_dispatch_funcs;
-        UserFunctor my_user_functor;
-        streaming_node &my_node;
-        StreamFactory &my_factory;
-    };
-
-    class device_selector_body {
-    public:
-        device_selector_body( device_selector_base *d ) : my_device_selector( d ) {}
-
-        void operator()( const indexer_node_output_type &v, typename device_selector_node::output_ports_type &op ) {
-            (*my_device_selector)(v, op);
-        }
-    private:
-        device_selector_base *my_device_selector;
-    };
-
-    class args_storage_base : tbb::internal::no_copy {
-    public:
-        typedef typename kernel_multifunction_node::output_ports_type output_ports_type;
-
-        virtual void enqueue( kernel_input_tuple &ip, output_ports_type &op, const streaming_node &n ) = 0;
-        virtual void send( device_type d ) = 0;
-        virtual args_storage_base *clone() const = 0;
-        virtual ~args_storage_base () {}
-
-    protected:
-        args_storage_base( const kernel_type& kernel, StreamFactory &f )
-            : my_kernel( kernel ), my_factory( f )
-        {}
-
-        args_storage_base( const args_storage_base &k )
-            : my_kernel( k.my_kernel ), my_factory( k.my_factory )
-        {}
-
-        const kernel_type my_kernel;
-        StreamFactory &my_factory;
-    };
-
-    template <typename... Args>
-    class args_storage : public args_storage_base {
-        typedef typename args_storage_base::output_ports_type output_ports_type;
-
-        // ---------- Update events helpers ---------- //
-        template <int N>
-        bool do_try_put( const kernel_input_tuple& ip, output_ports_type &op ) const {
-            const auto& t = get<N + 1>( ip );
-            auto &port = get<N>( op );
-            return port.try_put( t );
-        }
-
-        template <int... S>
-        bool do_try_put( const kernel_input_tuple& ip, output_ports_type &op, internal::sequence<S...> ) const {
-            return internal::or_return_values( do_try_put<S>( ip, op )... );
-        }
-
-        // ------------------------------------------- //
-        class run_kernel_func : tbb::internal::no_assign {
-        public:
-            run_kernel_func( kernel_input_tuple &ip, const streaming_node &node, const args_storage& storage )
-                : my_kernel_func( ip, node, storage, get<0>(ip).device() ) {}
-
-            // It is immpossible to use Args... because a function pointer cannot be casted to a function reference implicitly.
-            // Allow the compiler to deduce types for function pointers automatically.
-            template <typename... FnArgs>
-            void operator()( FnArgs&... args ) {
-                internal::convert_and_call_impl<FnArgs...>::doit( my_kernel_func, my_kernel_func.my_ip, args... );
-            }
-        private:
-            struct kernel_func : tbb::internal::no_copy {
-                kernel_input_tuple &my_ip;
-                const streaming_node &my_node;
-                const args_storage& my_storage;
-                device_type my_device;
-
-                kernel_func( kernel_input_tuple &ip, const streaming_node &node, const args_storage& storage, device_type device )
-                    : my_ip( ip ), my_node( node ), my_storage( storage ), my_device( device )
-                {}
-
-                template <typename... FnArgs>
-                void operator()( FnArgs&... args ) {
-                    my_node.enqueue_kernel( my_ip, my_storage.my_factory, my_device, my_storage.my_kernel, args... );
-                }
-            } my_kernel_func;
-        };
-
-        template<typename FinalizeFn>
-        class run_finalize_func : tbb::internal::no_assign {
-        public:
-            run_finalize_func( kernel_input_tuple &ip, StreamFactory &factory, FinalizeFn fn )
-                : my_ip( ip ), my_finalize_func( factory, get<0>(ip).device(), fn ) {}
-
-            // It is immpossible to use Args... because a function pointer cannot be casted to a function reference implicitly.
-            // Allow the compiler to deduce types for function pointers automatically.
-            template <typename... FnArgs>
-            void operator()( FnArgs&... args ) {
-                internal::convert_and_call_impl<FnArgs...>::doit( my_finalize_func, my_ip, args... );
-            }
-        private:
-            kernel_input_tuple &my_ip;
-
-            struct finalize_func : tbb::internal::no_assign {
-                StreamFactory &my_factory;
-                device_type my_device;
-                FinalizeFn my_fn;
-
-                finalize_func( StreamFactory &factory, device_type device, FinalizeFn fn )
-                    : my_factory(factory), my_device(device), my_fn(fn) {}
-
-                template <typename... FnArgs>
-                void operator()( FnArgs&... args ) {
-                    my_factory.finalize( my_device, my_fn, args... );
-                }
-            } my_finalize_func;
-        };
-
-        template<typename FinalizeFn>
-        static run_finalize_func<FinalizeFn> make_run_finalize_func( kernel_input_tuple &ip, StreamFactory &factory, FinalizeFn fn ) {
-            return run_finalize_func<FinalizeFn>( ip, factory, fn );
-        }
-
-        class send_func : tbb::internal::no_assign {
-        public:
-            send_func( StreamFactory &factory, device_type d )
-                : my_factory(factory), my_device( d ) {}
-
-            template <typename... FnArgs>
-            void operator()( FnArgs&... args ) {
-                my_factory.send_data( my_device, args... );
-            }
-        private:
-            StreamFactory &my_factory;
-            device_type my_device;
-        };
-
-    public:
-        args_storage( const kernel_type& kernel, StreamFactory &f, Args&&... args )
-            : args_storage_base( kernel, f )
-            , my_args_pack( std::forward<Args>(args)... )
-        {}
-
-        args_storage( const args_storage &k ) : args_storage_base( k ), my_args_pack( k.my_args_pack ) {}
-
-        args_storage( const args_storage_base &k, Args&&... args ) : args_storage_base( k ), my_args_pack( std::forward<Args>(args)... ) {}
-
-        void enqueue( kernel_input_tuple &ip, output_ports_type &op, const streaming_node &n ) __TBB_override {
-            // Make const qualified args_pack (from non-const)
-            const args_pack_type& const_args_pack = my_args_pack;
-            // factory.enqure_kernel() gets
-            //  - 'ip' tuple elements by reference and updates it (and 'ip') with dependencies
-            //  - arguments (from my_args_pack) by const-reference via const_args_pack
-            tbb::internal::call( run_kernel_func( ip, n, *this ), const_args_pack );
-
-            if (! do_try_put( ip, op, input_sequence() ) ) {
-                graph& g = n.my_graph;
-                // No one message was passed to successors so set a callback to extend the graph lifetime until the kernel completion.
-                g.increment_wait_count();
-
-                // factory.finalize() gets
-                //  - 'ip' tuple elements by reference, so 'ip' might be changed
-                //  - arguments (from my_args_pack) by const-reference via const_args_pack
-                tbb::internal::call( make_run_finalize_func(ip, this->my_factory, [&g] {
-                    g.decrement_wait_count();
-                }), const_args_pack );
-            }
-        }
-
-        void send( device_type d ) __TBB_override {
-            // factory.send() gets arguments by reference and updates these arguments with dependencies
-            // (it gets but usually ignores port_ref-s)
-            tbb::internal::call( send_func( this->my_factory, d ), my_args_pack );
-        }
-
-        args_storage_base *clone() const __TBB_override {
-            // Create new args_storage with copying constructor.
-            return new args_storage<Args...>( *this );
-        }
-
-    private:
-        typedef tbb::internal::stored_pack<Args...> args_pack_type;
-        args_pack_type my_args_pack;
-    };
-
-    // Body for kernel_multifunction_node.
-    class kernel_body : tbb::internal::no_assign {
-    public:
-        kernel_body( const streaming_node &node ) : my_node( node ) {}
-
-        void operator()( kernel_input_tuple ip, typename args_storage_base::output_ports_type &op ) {
-            __TBB_ASSERT( (my_node.my_args_storage != NULL), "No arguments storage" );
-            // 'ip' is passed by value to create local copy for updating inside enqueue_kernel()
-            my_node.my_args_storage->enqueue( ip, op, my_node );
-        }
-    private:
-        const streaming_node &my_node;
-    };
-
-    template <typename T, typename U = typename internal::is_port_ref<T>::type >
-    struct wrap_to_async {
-        typedef T type; // Keep port_ref as it is
-    };
-
-    template <typename T>
-    struct wrap_to_async<T, std::false_type> {
-        typedef typename StreamFactory::template async_msg_type< typename tbb::internal::strip<T>::type > type;
-    };
-
-    template <typename... Args>
-    args_storage_base *make_args_storage(const args_storage_base& storage, Args&&... args) const {
-        // In this variadic template convert all simple types 'T' into 'async_msg_type<T>'
-        return new args_storage<Args...>(storage, std::forward<Args>(args)...);
-    }
-
-    void notify_new_device( device_type d ) {
-        my_args_storage->send( d );
-    }
-
-    template <typename ...Args>
-    void enqueue_kernel( kernel_input_tuple& ip, StreamFactory& factory, device_type device, const kernel_type& kernel, Args&... args ) const {
-        this->enqueue_kernel_impl( ip, factory, device, kernel, args... );
-    }
-
-public:
-    template <typename DeviceSelector>
-    streaming_node( graph &g, const kernel_type& kernel, DeviceSelector d, StreamFactory &f )
-        : base_type( g )
-        , my_indexer_node( g )
-        , my_device_selector( new device_selector<DeviceSelector>( d, *this, f ) )
-        , my_device_selector_node( g, serial, device_selector_body( my_device_selector ) )
-        , my_join_node( g )
-        , my_kernel_node( g, serial, kernel_body( *this ) )
-        // By default, streaming_node maps all its ports to the kernel arguments on a one-to-one basis.
-        , my_args_storage( make_args_storage( args_storage<>(kernel, f), port_ref<0, NUM_INPUTS - 1>() ) )
-    {
-        base_type::set_external_ports( get_input_ports(), get_output_ports() );
-        make_edges();
-    }
-
-    streaming_node( const streaming_node &node )
-        : base_type( node.my_graph )
-        , my_indexer_node( node.my_indexer_node )
-        , my_device_selector( node.my_device_selector->clone( *this ) )
-        , my_device_selector_node( node.my_graph, serial, device_selector_body( my_device_selector ) )
-        , my_join_node( node.my_join_node )
-        , my_kernel_node( node.my_graph, serial, kernel_body( *this ) )
-        , my_args_storage( node.my_args_storage->clone() )
-    {
-        base_type::set_external_ports( get_input_ports(), get_output_ports() );
-        make_edges();
-    }
-
-    streaming_node( streaming_node &&node )
-        : base_type( node.my_graph )
-        , my_indexer_node( std::move( node.my_indexer_node ) )
-        , my_device_selector( node.my_device_selector->clone(*this) )
-        , my_device_selector_node( node.my_graph, serial, device_selector_body( my_device_selector ) )
-        , my_join_node( std::move( node.my_join_node ) )
-        , my_kernel_node( node.my_graph, serial, kernel_body( *this ) )
-        , my_args_storage( node.my_args_storage )
-    {
-        base_type::set_external_ports( get_input_ports(), get_output_ports() );
-        make_edges();
-        // Set moving node mappers to NULL to prevent double deallocation.
-        node.my_args_storage = NULL;
-    }
-
-    ~streaming_node() {
-        if ( my_args_storage ) delete my_args_storage;
-        if ( my_device_selector ) delete my_device_selector;
-    }
-
-    template <typename... Args>
-    void set_args( Args&&... args ) {
-        // Copy the base class of args_storage and create new storage for "Args...".
-        args_storage_base * const new_args_storage = make_args_storage( *my_args_storage, typename wrap_to_async<Args>::type(std::forward<Args>(args))...);
-        delete my_args_storage;
-        my_args_storage = new_args_storage;
-    }
-
-protected:
-    void reset_node( reset_flags = rf_reset_protocol ) __TBB_override { __TBB_ASSERT( false, "Not implemented yet" ); }
-
-private:
-    indexer_node_type my_indexer_node;
-    device_selector_base *my_device_selector;
-    device_selector_node my_device_selector_node;
-    join_node<kernel_input_tuple, JP> my_join_node;
-    kernel_multifunction_node my_kernel_node;
-
-    args_storage_base *my_args_storage;
-};
-
-#endif // __TBB_PREVIEW_STREAMING_NODE
-#endif // __TBB_flow_graph_streaming_H
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_tagged_buffer_impl.h b/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_tagged_buffer_impl.h
deleted file mode 100644
index 46755fe0..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_tagged_buffer_impl.h
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-// a hash table buffer that can expand, and can support as many deletions as
-// additions, list-based, with elements of list held in array (for destruction
-// management), multiplicative hashing (like ets).  No synchronization built-in.
-//
-
-#ifndef __TBB__flow_graph_hash_buffer_impl_H
-#define __TBB__flow_graph_hash_buffer_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-// included in namespace tbb::flow::interfaceX::internal
-
-// elements in the table are a simple list; we need pointer to next element to
-// traverse the chain
-template<typename ValueType>
-struct buffer_element_type {
-    // the second parameter below is void * because we can't forward-declare the type
-    // itself, so we just reinterpret_cast below.
-    typedef typename aligned_pair<ValueType, void *>::type type;
-};
-
-template
-    <
-     typename Key,         // type of key within ValueType
-     typename ValueType,
-     typename ValueToKey,  // abstract method that returns "const Key" or "const Key&" given ValueType
-     typename HashCompare, // has hash and equal
-     typename Allocator=tbb::cache_aligned_allocator< typename aligned_pair<ValueType, void *>::type >
-    >
-class hash_buffer : public HashCompare {
-public:
-    static const size_t INITIAL_SIZE = 8;  // initial size of the hash pointer table
-    typedef ValueType value_type;
-    typedef typename buffer_element_type< value_type >::type element_type;
-    typedef value_type *pointer_type;
-    typedef element_type *list_array_type;  // array we manage manually
-    typedef list_array_type *pointer_array_type;
-    typedef typename Allocator::template rebind<list_array_type>::other pointer_array_allocator_type;
-    typedef typename Allocator::template rebind<element_type>::other elements_array_allocator;
-    typedef typename tbb::internal::strip<Key>::type Knoref;
-
-private:
-    ValueToKey *my_key;
-    size_t my_size;
-    size_t nelements;
-    pointer_array_type pointer_array;    // pointer_array[my_size]
-    list_array_type elements_array;      // elements_array[my_size / 2]
-    element_type* free_list;
-
-    size_t mask() { return my_size - 1; }
-
-    void set_up_free_list( element_type **p_free_list, list_array_type la, size_t sz) {
-        for(size_t i=0; i < sz - 1; ++i ) {  // construct free list
-            la[i].second = &(la[i+1]);
-        }
-        la[sz-1].second = NULL;
-        *p_free_list = (element_type *)&(la[0]);
-    }
-
-    // cleanup for exceptions
-    struct DoCleanup {
-        pointer_array_type *my_pa;
-        list_array_type *my_elements;
-        size_t my_size;
-
-        DoCleanup(pointer_array_type &pa, list_array_type &my_els, size_t sz) :
-            my_pa(&pa), my_elements(&my_els), my_size(sz) {  }
-        ~DoCleanup() {
-            if(my_pa) {
-                size_t dont_care = 0;
-                internal_free_buffer(*my_pa, *my_elements, my_size, dont_care);
-            }
-        }
-    };
-
-    // exception-safety requires we do all the potentially-throwing operations first
-    void grow_array() {
-        size_t new_size = my_size*2;
-        size_t new_nelements = nelements;  // internal_free_buffer zeroes this
-        list_array_type new_elements_array = NULL;
-        pointer_array_type new_pointer_array = NULL;
-        list_array_type new_free_list = NULL;
-        {
-            DoCleanup my_cleanup(new_pointer_array, new_elements_array, new_size);
-            new_elements_array = elements_array_allocator().allocate(my_size);
-            new_pointer_array = pointer_array_allocator_type().allocate(new_size);
-            for(size_t i=0; i < new_size; ++i) new_pointer_array[i] = NULL;
-            set_up_free_list(&new_free_list, new_elements_array, my_size );
-
-            for(size_t i=0; i < my_size; ++i) {
-                for( element_type* op = pointer_array[i]; op; op = (element_type *)(op->second)) {
-                    value_type *ov = reinterpret_cast<value_type *>(&(op->first));
-                    // could have std::move semantics
-                    internal_insert_with_key(new_pointer_array, new_size, new_free_list, *ov);
-                }
-            }
-            my_cleanup.my_pa = NULL;
-            my_cleanup.my_elements = NULL;
-        }
-
-        internal_free_buffer(pointer_array, elements_array, my_size, nelements);
-        free_list = new_free_list;
-        pointer_array = new_pointer_array;
-        elements_array = new_elements_array;
-        my_size = new_size;
-        nelements = new_nelements;
-    }
-
-    // v should have perfect forwarding if std::move implemented.
-    // we use this method to move elements in grow_array, so can't use class fields
-    void internal_insert_with_key( element_type **p_pointer_array, size_t p_sz, list_array_type &p_free_list,
-            const value_type &v) {
-        size_t l_mask = p_sz-1;
-        __TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
-        size_t h = this->hash((*my_key)(v)) & l_mask;
-        __TBB_ASSERT(p_free_list, "Error: free list not set up.");
-        element_type* my_elem = p_free_list; p_free_list = (element_type *)(p_free_list->second);
-        (void) new(&(my_elem->first)) value_type(v);
-        my_elem->second = p_pointer_array[h];
-        p_pointer_array[h] = my_elem;
-    }
-
-    void internal_initialize_buffer() {
-        pointer_array = pointer_array_allocator_type().allocate(my_size);
-        for(size_t i = 0; i < my_size; ++i) pointer_array[i] = NULL;
-        elements_array = elements_array_allocator().allocate(my_size / 2);
-        set_up_free_list(&free_list, elements_array, my_size / 2);
-    }
-
-    // made static so an enclosed class can use to properly dispose of the internals
-    static void internal_free_buffer( pointer_array_type &pa, list_array_type &el, size_t &sz, size_t &ne ) {
-        if(pa) {
-            for(size_t i = 0; i < sz; ++i ) {
-                element_type *p_next;
-                for( element_type *p = pa[i]; p; p = p_next) {
-                    p_next = (element_type *)p->second;
-                    internal::punned_cast<value_type *>(&(p->first))->~value_type();
-                }
-            }
-            pointer_array_allocator_type().deallocate(pa, sz);
-            pa = NULL;
-        }
-        // Separate test (if allocation of pa throws, el may be allocated.
-        // but no elements will be constructed.)
-        if(el) {
-            elements_array_allocator().deallocate(el, sz / 2);
-            el = NULL;
-        }
-        sz = INITIAL_SIZE;
-        ne = 0;
-    }
-
-public:
-    hash_buffer() : my_key(NULL), my_size(INITIAL_SIZE), nelements(0) {
-        internal_initialize_buffer();
-    }
-
-    ~hash_buffer() {
-        internal_free_buffer(pointer_array, elements_array, my_size, nelements);
-        if(my_key) delete my_key;
-    }
-
-    void reset() {
-        internal_free_buffer(pointer_array, elements_array, my_size, nelements);
-        internal_initialize_buffer();
-    }
-
-    // Take ownership of func object allocated with new.
-    // This method is only used internally, so can't be misused by user.
-    void set_key_func(ValueToKey *vtk) { my_key = vtk; }
-    // pointer is used to clone()
-    ValueToKey* get_key_func() { return my_key; }
-
-    bool insert_with_key(const value_type &v) {
-        pointer_type p = NULL;
-        __TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
-        if(find_ref_with_key((*my_key)(v), p)) {
-            p->~value_type();
-            (void) new(p) value_type(v);  // copy-construct into the space
-            return false;
-        }
-        ++nelements;
-        if(nelements*2 > my_size) grow_array();
-        internal_insert_with_key(pointer_array, my_size, free_list, v);
-        return true;
-    }
-
-    // returns true and sets v to array element if found, else returns false.
-    bool find_ref_with_key(const Knoref& k, pointer_type &v) {
-        size_t i = this->hash(k) & mask();
-        for(element_type* p = pointer_array[i]; p; p = (element_type *)(p->second)) {
-            pointer_type pv = reinterpret_cast<pointer_type>(&(p->first));
-            __TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
-            if(this->equal((*my_key)(*pv), k)) {
-                v = pv;
-                return true;
-            }
-        }
-        return false;
-    }
-
-    bool find_with_key( const Knoref& k, value_type &v) {
-        value_type *p;
-        if(find_ref_with_key(k, p)) {
-            v = *p;
-            return true;
-        }
-        else
-            return false;
-    }
-
-    void delete_with_key(const Knoref& k) {
-        size_t h = this->hash(k) & mask();
-        element_type* prev = NULL;
-        for(element_type* p = pointer_array[h]; p; prev = p, p = (element_type *)(p->second)) {
-            value_type *vp = reinterpret_cast<value_type *>(&(p->first));
-            __TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
-            if(this->equal((*my_key)(*vp), k)) {
-                vp->~value_type();
-                if(prev) prev->second = p->second;
-                else pointer_array[h] = (element_type *)(p->second);
-                p->second = free_list;
-                free_list = p;
-                --nelements;
-                return;
-            }
-        }
-        __TBB_ASSERT(false, "key not found for delete");
-    }
-};
-#endif // __TBB__flow_graph_hash_buffer_impl_H
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_trace_impl.h b/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_trace_impl.h
deleted file mode 100644
index b798888d..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_trace_impl.h
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef _FGT_GRAPH_TRACE_IMPL_H
-#define _FGT_GRAPH_TRACE_IMPL_H
-
-#include "../tbb_profiling.h"
-
-namespace tbb {
-    namespace internal {
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-
-static inline void fgt_internal_create_input_port( void *node, void *p, string_index name_index ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index );
-}
-
-static inline void fgt_internal_create_output_port( void *node, void *p, string_index name_index ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index );
-}
-
-template<typename InputType>
-void register_input_port(void *node, tbb::flow::receiver<InputType>* port, string_index name_index) {
-    //TODO: Make fgt_internal_create_input_port a function template?
-    fgt_internal_create_input_port( node, port, name_index);
-}
-
-template < typename PortsTuple, int N >
-struct fgt_internal_input_helper {
-    static void register_port( void *node, PortsTuple &ports ) {
-        register_input_port( node, &(tbb::flow::get<N-1>(ports)), static_cast<tbb::internal::string_index>(FLOW_INPUT_PORT_0 + N - 1) );
-        fgt_internal_input_helper<PortsTuple, N-1>::register_port( node, ports );
-    }
-};
-
-template < typename PortsTuple >
-struct fgt_internal_input_helper<PortsTuple, 1> {
-    static void register_port( void *node, PortsTuple &ports ) {
-        register_input_port( node, &(tbb::flow::get<0>(ports)), FLOW_INPUT_PORT_0 );
-    }
-};
-
-template<typename OutputType>
-void register_output_port(void *node, tbb::flow::sender<OutputType>* port, string_index name_index) {
-    //TODO: Make fgt_internal_create_output_port a function template?
-    fgt_internal_create_output_port( node, static_cast<void *>(port), name_index);
-}
-
-template < typename PortsTuple, int N >
-struct fgt_internal_output_helper {
-    static void register_port( void *node, PortsTuple &ports ) {
-        register_output_port( node, &(tbb::flow::get<N-1>(ports)), static_cast<tbb::internal::string_index>(FLOW_OUTPUT_PORT_0 + N - 1) );
-        fgt_internal_output_helper<PortsTuple, N-1>::register_port( node, ports );
-    }
-};
-
-template < typename PortsTuple >
-struct fgt_internal_output_helper<PortsTuple,1> {
-    static void register_port( void *node, PortsTuple &ports ) {
-        register_output_port( node, &(tbb::flow::get<0>(ports)), FLOW_OUTPUT_PORT_0 );
-    }
-};
-
-template< typename NodeType >
-void fgt_multioutput_node_desc( const NodeType *node, const char *desc ) {
-    void *addr =  (void *)( static_cast< tbb::flow::receiver< typename NodeType::input_type > * >(const_cast< NodeType *>(node)) );
-    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc );
-}
-
-template< typename NodeType >
-void fgt_multiinput_multioutput_node_desc( const NodeType *node, const char *desc ) {
-    void *addr =  const_cast<NodeType *>(node);
-    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc );
-}
-
-template< typename NodeType >
-static inline void fgt_node_desc( const NodeType *node, const char *desc ) {
-    void *addr =  (void *)( static_cast< tbb::flow::sender< typename NodeType::output_type > * >(const_cast< NodeType *>(node)) );
-    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc );
-}
-
-static inline void fgt_graph_desc( void *g, const char *desc ) {
-    itt_metadata_str_add( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, FLOW_OBJECT_NAME, desc );
-}
-
-static inline void fgt_body( void *node, void *body ) {
-    itt_relation_add( ITT_DOMAIN_FLOW, body, FLOW_BODY, __itt_relation_is_child_of, node, FLOW_NODE );
-}
-
-template< int N, typename PortsTuple >
-static inline void fgt_multioutput_node( string_index t, void *g, void *input_port, PortsTuple &ports ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t );
-    fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 );
-    fgt_internal_output_helper<PortsTuple, N>::register_port( input_port, ports );
-}
-
-template< int N, typename PortsTuple >
-static inline void fgt_multioutput_node_with_body( string_index t, void *g, void *input_port, PortsTuple &ports, void *body ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t );
-    fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 );
-    fgt_internal_output_helper<PortsTuple, N>::register_port( input_port, ports );
-    fgt_body( input_port, body );
-}
-
-template< int N, typename PortsTuple >
-static inline void fgt_multiinput_node( string_index t, void *g, PortsTuple &ports, void *output_port) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t );
-    fgt_internal_create_output_port( output_port, output_port, FLOW_OUTPUT_PORT_0 );
-    fgt_internal_input_helper<PortsTuple, N>::register_port( output_port, ports );
-}
-
-static inline void fgt_node( string_index t, void *g, void *output_port ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t );
-    fgt_internal_create_output_port( output_port, output_port, FLOW_OUTPUT_PORT_0 );
-}
-
-static inline void fgt_node_with_body( string_index t, void *g, void *output_port, void *body ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t );
-    fgt_internal_create_output_port( output_port, output_port, FLOW_OUTPUT_PORT_0 );
-    fgt_body( output_port, body );
-}
-
-
-static inline void fgt_node( string_index t, void *g, void *input_port, void *output_port ) {
-    fgt_node( t, g, output_port );
-    fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 );
-}
-
-static inline void fgt_node_with_body( string_index t, void *g, void *input_port, void *output_port, void *body ) {
-    fgt_node_with_body( t, g, output_port, body );
-    fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 );
-}
-
-
-static inline void  fgt_node( string_index t, void *g, void *input_port, void *decrement_port, void *output_port ) {
-    fgt_node( t, g, input_port, output_port );
-    fgt_internal_create_input_port( output_port, decrement_port, FLOW_INPUT_PORT_1 );
-}
-
-static inline void fgt_make_edge( void *output_port, void *input_port ) {
-    itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_predecessor_to, input_port, FLOW_INPUT_PORT);
-}
-
-static inline void fgt_remove_edge( void *output_port, void *input_port ) {
-    itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_sibling_of, input_port, FLOW_INPUT_PORT);
-}
-
-static inline void fgt_graph( void *g ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_GRAPH );
-}
-
-static inline void fgt_begin_body( void *body ) {
-    itt_task_begin( ITT_DOMAIN_FLOW, body, FLOW_BODY, NULL, FLOW_NULL, FLOW_BODY );
-}
-
-static inline void fgt_end_body( void * ) {
-    itt_task_end( ITT_DOMAIN_FLOW );
-}
-
-static inline void fgt_async_try_put_begin( void *node, void *port ) {
-    itt_task_begin( ITT_DOMAIN_FLOW, port, FLOW_OUTPUT_PORT, node, FLOW_NODE, FLOW_OUTPUT_PORT );
-}
-
-static inline void fgt_async_try_put_end( void *, void * ) {
-    itt_task_end( ITT_DOMAIN_FLOW );
-}
-
-static inline void fgt_async_reserve( void *node, void *graph ) {
-    itt_region_begin( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_NULL );
-}
-
-static inline void fgt_async_commit( void *node, void *graph ) {
-    itt_region_end( ITT_DOMAIN_FLOW, node, FLOW_NODE );
-}
-
-#else // TBB_PREVIEW_FLOW_GRAPH_TRACE
-
-static inline void fgt_graph( void * /*g*/ ) { }
-
-template< typename NodeType >
-static inline void fgt_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { }
-
-template< typename NodeType >
-static inline void fgt_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { }
-
-static inline void fgt_graph_desc( void * /*g*/, const char * /*desc*/ ) { }
-
-static inline void fgt_body( void * /*node*/, void * /*body*/ ) { }
-
-template< int N, typename PortsTuple >
-static inline void fgt_multioutput_node( string_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/ ) { }
-
-template< int N, typename PortsTuple >
-static inline void fgt_multioutput_node_with_body( string_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/, void * /*body*/ ) { }
-
-template< int N, typename PortsTuple >
-static inline void fgt_multiinput_node( string_index /*t*/, void * /*g*/, PortsTuple & /*ports*/, void * /*output_port*/ ) { }
-
-static inline void fgt_node( string_index /*t*/, void * /*g*/, void * /*output_port*/ ) { }
-static inline void fgt_node( string_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/ ) { }
-static inline void  fgt_node( string_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*decrement_port*/, void * /*output_port*/ ) { }
-
-static inline void fgt_node_with_body( string_index /*t*/, void * /*g*/, void * /*output_port*/, void * /*body*/ ) { }
-static inline void fgt_node_with_body( string_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/, void * /*body*/ ) { }
-
-static inline void fgt_make_edge( void * /*output_port*/, void * /*input_port*/ ) { }
-static inline void fgt_remove_edge( void * /*output_port*/, void * /*input_port*/ ) { }
-
-static inline void fgt_begin_body( void * /*body*/ ) { }
-static inline void fgt_end_body( void *  /*body*/) { }
-static inline void fgt_async_try_put_begin( void * /*node*/, void * /*port*/ ) { }
-static inline void fgt_async_try_put_end( void * /*node*/ , void * /*port*/ ) { }
-static inline void fgt_async_reserve( void * /*node*/, void * /*graph*/ ) { }
-static inline void fgt_async_commit( void * /*node*/, void * /*graph*/ ) { }
-
-#endif // TBB_PREVIEW_FLOW_GRAPH_TRACE
-
-    } // namespace internal
-} // namespace tbb
-
-#endif
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_types_impl.h b/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_types_impl.h
deleted file mode 100644
index 73b5f547..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_flow_graph_types_impl.h
+++ /dev/null
@@ -1,713 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB__flow_graph_types_impl_H
-#define __TBB__flow_graph_types_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-// included in namespace tbb::flow::interfaceX
-
-namespace internal {
-
-    // the change to key_matching (adding a K and KHash template parameter, making it a class)
-    // means we have to pass this data to the key_matching_port.  All the ports have only one
-    // template parameter, so we have to wrap the following types in a trait:
-    //
-    //    . K == key_type
-    //    . KHash == hash and compare for Key
-    //    . TtoK == function_body that given an object of T, returns its K
-    //    . T == type accepted by port, and stored in the hash table
-    //
-    // The port will have an additional parameter on node construction, which is a function_body
-    // that accepts a const T& and returns a K which is the field in T which is its K.
-    template<typename Kp, typename KHashp, typename Tp>
-    struct KeyTrait {
-        typedef Kp K;
-        typedef Tp T;
-        typedef internal::type_to_key_function_body<T,K> TtoK;
-        typedef KHashp KHash;
-    };
-
-// wrap each element of a tuple in a template, and make a tuple of the result.
-    template<int N, template<class> class PT, typename TypeTuple>
-    struct wrap_tuple_elements;
-
-    // A wrapper that generates the traits needed for each port of a key-matching join,
-    // and the type of the tuple of input ports.
-    template<int N, template<class> class PT, typename KeyTraits, typename TypeTuple>
-    struct wrap_key_tuple_elements;
-
-    template<template<class> class PT, typename TypeTuple>
-    struct wrap_tuple_elements<1, PT, TypeTuple> {
-        typedef typename tbb::flow::tuple<
-                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type> >
-            type;
-    };
-
-    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
-    struct wrap_key_tuple_elements<1, PT, KeyTraits, TypeTuple > {
-        typedef typename KeyTraits::key_type K;
-        typedef typename KeyTraits::hash_compare_type KHash;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
-        typedef typename tbb::flow::tuple< PT<KeyTrait0> > type;
-    };
-
-    template<template<class> class PT, typename TypeTuple>
-    struct wrap_tuple_elements<2, PT, TypeTuple> {
-        typedef typename tbb::flow::tuple<
-                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type> >
-            type;
-    };
-
-    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
-    struct wrap_key_tuple_elements<2, PT, KeyTraits, TypeTuple> {
-        typedef typename KeyTraits::key_type K;
-        typedef typename KeyTraits::hash_compare_type KHash;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
-        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1> > type;
-    };
-
-    template<template<class> class PT, typename TypeTuple>
-    struct wrap_tuple_elements<3, PT, TypeTuple> {
-        typedef typename tbb::flow::tuple<
-                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type> >
-            type;
-    };
-
-    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
-    struct wrap_key_tuple_elements<3, PT, KeyTraits, TypeTuple> {
-        typedef typename KeyTraits::key_type K;
-        typedef typename KeyTraits::hash_compare_type KHash;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
-        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2> > type;
-    };
-
-    template<template<class> class PT, typename TypeTuple>
-    struct wrap_tuple_elements<4, PT, TypeTuple> {
-        typedef typename tbb::flow::tuple<
-                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type> >
-            type;
-    };
-
-    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
-    struct wrap_key_tuple_elements<4, PT, KeyTraits, TypeTuple> {
-        typedef typename KeyTraits::key_type K;
-        typedef typename KeyTraits::hash_compare_type KHash;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<3,TypeTuple>::type> KeyTrait3;
-        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2>,
-                PT<KeyTrait3> > type;
-    };
-
-    template<template<class> class PT, typename TypeTuple>
-    struct wrap_tuple_elements<5, PT, TypeTuple> {
-        typedef typename tbb::flow::tuple<
-                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type> >
-            type;
-    };
-
-    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
-    struct wrap_key_tuple_elements<5, PT, KeyTraits, TypeTuple> {
-        typedef typename KeyTraits::key_type K;
-        typedef typename KeyTraits::hash_compare_type KHash;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<3,TypeTuple>::type> KeyTrait3;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<4,TypeTuple>::type> KeyTrait4;
-        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2>,
-                PT<KeyTrait3>, PT<KeyTrait4> > type;
-    };
-
-#if __TBB_VARIADIC_MAX >= 6
-    template<template<class> class PT, typename TypeTuple>
-    struct wrap_tuple_elements<6, PT, TypeTuple> {
-        typedef typename tbb::flow::tuple<
-                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<5,TypeTuple>::type> >
-            type;
-    };
-
-    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
-    struct wrap_key_tuple_elements<6, PT, KeyTraits, TypeTuple> {
-        typedef typename KeyTraits::key_type K;
-        typedef typename KeyTraits::hash_compare_type KHash;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<3,TypeTuple>::type> KeyTrait3;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<4,TypeTuple>::type> KeyTrait4;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<5,TypeTuple>::type> KeyTrait5;
-        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2>, PT<KeyTrait3>,
-                PT<KeyTrait4>, PT<KeyTrait5> > type;
-    };
-#endif
-
-#if __TBB_VARIADIC_MAX >= 7
-    template<template<class> class PT, typename TypeTuple>
-    struct wrap_tuple_elements<7, PT, TypeTuple> {
-        typedef typename tbb::flow::tuple<
-                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<6,TypeTuple>::type> >
-            type;
-    };
-
-    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
-    struct wrap_key_tuple_elements<7, PT, KeyTraits, TypeTuple> {
-        typedef typename KeyTraits::key_type K;
-        typedef typename KeyTraits::hash_compare_type KHash;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<3,TypeTuple>::type> KeyTrait3;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<4,TypeTuple>::type> KeyTrait4;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<5,TypeTuple>::type> KeyTrait5;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<6,TypeTuple>::type> KeyTrait6;
-        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2>, PT<KeyTrait3>,
-                PT<KeyTrait4>, PT<KeyTrait5>, PT<KeyTrait6> > type;
-    };
-#endif
-
-#if __TBB_VARIADIC_MAX >= 8
-    template<template<class> class PT, typename TypeTuple>
-    struct wrap_tuple_elements<8, PT, TypeTuple> {
-        typedef typename tbb::flow::tuple<
-                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<6,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<7,TypeTuple>::type> >
-            type;
-    };
-
-    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
-    struct wrap_key_tuple_elements<8, PT, KeyTraits, TypeTuple> {
-        typedef typename KeyTraits::key_type K;
-        typedef typename KeyTraits::hash_compare_type KHash;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<3,TypeTuple>::type> KeyTrait3;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<4,TypeTuple>::type> KeyTrait4;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<5,TypeTuple>::type> KeyTrait5;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<6,TypeTuple>::type> KeyTrait6;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<7,TypeTuple>::type> KeyTrait7;
-        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2>, PT<KeyTrait3>,
-                PT<KeyTrait4>, PT<KeyTrait5>, PT<KeyTrait6>, PT<KeyTrait7> > type;
-    };
-#endif
-
-#if __TBB_VARIADIC_MAX >= 9
-    template<template<class> class PT, typename TypeTuple>
-    struct wrap_tuple_elements<9, PT, TypeTuple> {
-        typedef typename tbb::flow::tuple<
-                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<6,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<7,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<8,TypeTuple>::type> >
-            type;
-    };
-
-    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
-    struct wrap_key_tuple_elements<9, PT, KeyTraits, TypeTuple> {
-        typedef typename KeyTraits::key_type K;
-        typedef typename KeyTraits::hash_compare_type KHash;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<3,TypeTuple>::type> KeyTrait3;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<4,TypeTuple>::type> KeyTrait4;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<5,TypeTuple>::type> KeyTrait5;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<6,TypeTuple>::type> KeyTrait6;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<7,TypeTuple>::type> KeyTrait7;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<8,TypeTuple>::type> KeyTrait8;
-        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2>, PT<KeyTrait3>,
-                PT<KeyTrait4>, PT<KeyTrait5>, PT<KeyTrait6>, PT<KeyTrait7>, PT<KeyTrait8> > type;
-    };
-#endif
-
-#if __TBB_VARIADIC_MAX >= 10
-    template<template<class> class PT, typename TypeTuple>
-    struct wrap_tuple_elements<10, PT, TypeTuple> {
-        typedef typename tbb::flow::tuple<
-                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<6,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<7,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<8,TypeTuple>::type>,
-                PT<typename tbb::flow::tuple_element<9,TypeTuple>::type> >
-            type;
-    };
-
-    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
-    struct wrap_key_tuple_elements<10, PT, KeyTraits, TypeTuple> {
-        typedef typename KeyTraits::key_type K;
-        typedef typename KeyTraits::hash_compare_type KHash;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<3,TypeTuple>::type> KeyTrait3;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<4,TypeTuple>::type> KeyTrait4;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<5,TypeTuple>::type> KeyTrait5;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<6,TypeTuple>::type> KeyTrait6;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<7,TypeTuple>::type> KeyTrait7;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<8,TypeTuple>::type> KeyTrait8;
-        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<9,TypeTuple>::type> KeyTrait9;
-        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2>, PT<KeyTrait3>,
-                PT<KeyTrait4>, PT<KeyTrait5>, PT<KeyTrait6>, PT<KeyTrait7>, PT<KeyTrait8>,
-                PT<KeyTrait9> > type;
-    };
-#endif
-
-#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-    template< int... S > class sequence {};
-
-    template< int N, int... S >
-    struct make_sequence : make_sequence < N - 1, N - 1, S... > {};
-
-    template< int... S >
-    struct make_sequence < 0, S... > {
-        typedef sequence<S...> type;
-    };
-#endif /* __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT */
-
-#if __TBB_INITIALIZER_LISTS_PRESENT
-    // Until C++14 std::initializer_list does not guarantee life time of contained objects.
-    template <typename T>
-    class initializer_list_wrapper {
-    public:
-        typedef T value_type;
-        typedef const T& reference;
-        typedef const T& const_reference;
-        typedef size_t size_type;
-
-        typedef T* iterator;
-        typedef const T* const_iterator;
-
-        initializer_list_wrapper( std::initializer_list<T> il ) __TBB_NOEXCEPT( true ) : my_begin( static_cast<T*>(malloc( il.size()*sizeof( T ) )) ) {
-            iterator dst = my_begin;
-            for ( typename std::initializer_list<T>::const_iterator src = il.begin(); src != il.end(); ++src )
-                new (dst++) T( *src );
-            my_end = dst;
-        }
-
-        initializer_list_wrapper( const initializer_list_wrapper<T>& ilw ) __TBB_NOEXCEPT( true ) : my_begin( static_cast<T*>(malloc( ilw.size()*sizeof( T ) )) ) {
-            iterator dst = my_begin;
-            for ( typename std::initializer_list<T>::const_iterator src = ilw.begin(); src != ilw.end(); ++src )
-                new (dst++) T( *src );
-            my_end = dst;
-        }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-        initializer_list_wrapper( initializer_list_wrapper<T>&& ilw ) __TBB_NOEXCEPT( true ) : my_begin( ilw.my_begin ), my_end( ilw.my_end ) {
-            ilw.my_begin = ilw.my_end = NULL;
-        }
-#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
-
-        ~initializer_list_wrapper() {
-            if ( my_begin )
-                free( my_begin );
-        }
-
-        const_iterator begin() const __TBB_NOEXCEPT(true) { return my_begin; }
-        const_iterator end() const __TBB_NOEXCEPT(true) { return my_end; }
-        size_t size() const __TBB_NOEXCEPT(true) { return (size_t)(my_end - my_begin); }
-
-    private:
-        iterator my_begin;
-        iterator my_end;
-    };
-#endif /* __TBB_INITIALIZER_LISTS_PRESENT */
-
-//! type mimicking std::pair but with trailing fill to ensure each element of an array
-//* will have the correct alignment
-    template<typename T1, typename T2, size_t REM>
-    struct type_plus_align {
-        char first[sizeof(T1)];
-        T2 second;
-        char fill1[REM];
-    };
-
-    template<typename T1, typename T2>
-    struct type_plus_align<T1,T2,0> {
-        char first[sizeof(T1)];
-        T2 second;
-    };
-
-    template<class U> struct alignment_of {
-        typedef struct { char t; U    padded; } test_alignment;
-        static const size_t value = sizeof(test_alignment) - sizeof(U);
-    };
-
-    // T1, T2 are actual types stored.  The space defined for T1 in the type returned
-    // is a char array of the correct size.  Type T2 should be trivially-constructible,
-    // T1 must be explicitly managed.
-    template<typename T1, typename T2>
-    struct aligned_pair {
-        static const size_t t1_align = alignment_of<T1>::value;
-        static const size_t t2_align = alignment_of<T2>::value;
-        typedef type_plus_align<T1, T2, 0 > just_pair;
-        static const size_t max_align = t1_align < t2_align ? t2_align : t1_align;
-        static const size_t extra_bytes = sizeof(just_pair) % max_align;
-        static const size_t remainder = extra_bytes ? max_align - extra_bytes : 0;
-    public:
-        typedef type_plus_align<T1,T2,remainder> type;
-    };  // aligned_pair
-
-// support for variant type
-// type we use when we're not storing a value
-struct default_constructed { };
-
-// type which contains another type, tests for what type is contained, and references to it.
-// internal::Wrapper<T>
-//     void CopyTo( void *newSpace) : builds a Wrapper<T> copy of itself in newSpace
-
-// struct to allow us to copy and test the type of objects
-struct WrapperBase {
-    virtual ~WrapperBase() {}
-    virtual void CopyTo(void* /*newSpace*/) const {  }
-};
-
-// Wrapper<T> contains a T, with the ability to test what T is.  The Wrapper<T> can be
-// constructed from a T, can be copy-constructed from another Wrapper<T>, and can be
-// examined via value(), but not modified.
-template<typename T>
-struct Wrapper: public WrapperBase {
-    typedef T value_type;
-    typedef T* pointer_type;
-private:
-    T value_space;
-public:
-    const value_type &value() const { return value_space; }
-
-private:
-    Wrapper();
-
-    // on exception will ensure the Wrapper will contain only a trivially-constructed object
-    struct _unwind_space {
-        pointer_type space;
-        _unwind_space(pointer_type p) : space(p) {}
-        ~_unwind_space() {
-            if(space) (void) new (space) Wrapper<default_constructed>(default_constructed());
-        }
-    };
-public:
-    explicit Wrapper( const T& other ) : value_space(other) { }
-    explicit Wrapper(const Wrapper& other) : value_space(other.value_space) { }
-
-    void CopyTo(void* newSpace) const __TBB_override {
-        _unwind_space guard((pointer_type)newSpace);
-        (void) new(newSpace) Wrapper(value_space);
-        guard.space = NULL;
-    }
-    ~Wrapper() { }
-};
-
-// specialization for array objects
-template<typename T, size_t N>
-struct Wrapper<T[N]> : public WrapperBase {
-    typedef T value_type;
-    typedef T* pointer_type;
-    // space must be untyped.
-    typedef T ArrayType[N];
-private:
-    // The space is not of type T[N] because when copy-constructing, it would be
-    // default-initialized and then copied to in some fashion, resulting in two
-    // constructions and one destruction per element.  If the type is char[ ], we
-    // placement new into each element, resulting in one construction per element.
-    static const size_t space_size = sizeof(ArrayType) / sizeof(char);
-    char value_space[space_size];
-
-
-    // on exception will ensure the already-built objects will be destructed
-    // (the value_space is a char array, so it is already trivially-destructible.)
-    struct _unwind_class {
-        pointer_type space;
-        int    already_built;
-        _unwind_class(pointer_type p) : space(p), already_built(0) {}
-        ~_unwind_class() {
-            if(space) {
-                for(size_t i = already_built; i > 0 ; --i ) space[i-1].~value_type();
-                (void) new(space) Wrapper<default_constructed>(default_constructed());
-            }
-        }
-    };
-public:
-    const ArrayType &value() const {
-        char *vp = const_cast<char *>(value_space);
-        return reinterpret_cast<ArrayType &>(*vp);
-    }
-
-private:
-    Wrapper();
-public:
-    // have to explicitly construct because other decays to a const value_type*
-    explicit Wrapper(const ArrayType& other) {
-        _unwind_class guard((pointer_type)value_space);
-        pointer_type vp = reinterpret_cast<pointer_type>(&value_space);
-        for(size_t i = 0; i < N; ++i ) {
-            (void) new(vp++) value_type(other[i]);
-            ++(guard.already_built);
-        }
-        guard.space = NULL;
-    }
-    explicit Wrapper(const Wrapper& other) : WrapperBase() {
-        // we have to do the heavy lifting to copy contents
-        _unwind_class guard((pointer_type)value_space);
-        pointer_type dp = reinterpret_cast<pointer_type>(value_space);
-        pointer_type sp = reinterpret_cast<pointer_type>(const_cast<char *>(other.value_space));
-        for(size_t i = 0; i < N; ++i, ++dp, ++sp) {
-            (void) new(dp) value_type(*sp);
-            ++(guard.already_built);
-        }
-        guard.space = NULL;
-    }
-
-    void CopyTo(void* newSpace) const __TBB_override {
-        (void) new(newSpace) Wrapper(*this);  // exceptions handled in copy constructor
-    }
-
-    ~Wrapper() {
-        // have to destroy explicitly in reverse order
-        pointer_type vp = reinterpret_cast<pointer_type>(&value_space);
-        for(size_t i = N; i > 0 ; --i ) vp[i-1].~value_type();
-    }
-};
-
-// given a tuple, return the type of the element that has the maximum alignment requirement.
-// Given a tuple and that type, return the number of elements of the object with the max
-// alignment requirement that is at least as big as the largest object in the tuple.
-
-template<bool, class T1, class T2> struct pick_one;
-template<class T1, class T2> struct pick_one<true , T1, T2> { typedef T1 type; };
-template<class T1, class T2> struct pick_one<false, T1, T2> { typedef T2 type; };
-
-template< template<class> class Selector, typename T1, typename T2 >
-struct pick_max {
-    typedef typename pick_one< (Selector<T1>::value > Selector<T2>::value), T1, T2 >::type type;
-};
-
-template<typename T> struct size_of { static const int value = sizeof(T); };
-
-template< size_t N, class Tuple, template<class> class Selector > struct pick_tuple_max {
-    typedef typename pick_tuple_max<N-1, Tuple, Selector>::type LeftMaxType;
-    typedef typename tbb::flow::tuple_element<N-1, Tuple>::type ThisType;
-    typedef typename pick_max<Selector, LeftMaxType, ThisType>::type type;
-};
-
-template< class Tuple, template<class> class Selector > struct pick_tuple_max<0, Tuple, Selector> {
-    typedef typename tbb::flow::tuple_element<0, Tuple>::type type;
-};
-
-// is the specified type included in a tuple?
-template<class Q, size_t N, class Tuple>
-struct is_element_of {
-    typedef typename tbb::flow::tuple_element<N-1, Tuple>::type T_i;
-    static const bool value = tbb::internal::is_same_type<Q,T_i>::value || is_element_of<Q,N-1,Tuple>::value;
-};
-
-template<class Q, class Tuple>
-struct is_element_of<Q,0,Tuple> {
-    typedef typename tbb::flow::tuple_element<0, Tuple>::type T_i;
-    static const bool value = tbb::internal::is_same_type<Q,T_i>::value;
-};
-
-// allow the construction of types that are listed tuple.  If a disallowed type
-// construction is written, a method involving this type is created.  The
-// type has no definition, so a syntax error is generated.
-template<typename T> struct ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple;
-
-template<typename T, bool BUILD_IT> struct do_if;
-template<typename T>
-struct do_if<T, true> {
-    static void construct(void *mySpace, const T& x) {
-        (void) new(mySpace) Wrapper<T>(x);
-    }
-};
-template<typename T>
-struct do_if<T, false> {
-    static void construct(void * /*mySpace*/, const T& x) {
-        // This method is instantiated when the type T does not match any of the
-        // element types in the Tuple in variant<Tuple>.
-        ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple<T>::bad_type(x);
-    }
-};
-
-// Tuple tells us the allowed types that variant can hold.  It determines the alignment of the space in
-// Wrapper, and how big Wrapper is.
-//
-// the object can only be tested for type, and a read-only reference can be fetched by cast_to<T>().
-
-using tbb::internal::punned_cast;
-struct tagged_null_type {};
-template<typename TagType, typename T0, typename T1=tagged_null_type, typename T2=tagged_null_type, typename T3=tagged_null_type,
-                           typename T4=tagged_null_type, typename T5=tagged_null_type, typename T6=tagged_null_type,
-                           typename T7=tagged_null_type, typename T8=tagged_null_type, typename T9=tagged_null_type>
-class tagged_msg {
-    typedef tbb::flow::tuple<T0, T1, T2, T3, T4
-                  //TODO: Should we reject lists longer than a tuple can hold?
-                  #if __TBB_VARIADIC_MAX >= 6
-                  , T5
-                  #endif
-                  #if __TBB_VARIADIC_MAX >= 7
-                  , T6
-                  #endif
-                  #if __TBB_VARIADIC_MAX >= 8
-                  , T7
-                  #endif
-                  #if __TBB_VARIADIC_MAX >= 9
-                  , T8
-                  #endif
-                  #if __TBB_VARIADIC_MAX >= 10
-                  , T9
-                  #endif
-                  > Tuple;
-
-private:
-    class variant {
-        static const size_t N = tbb::flow::tuple_size<Tuple>::value;
-        typedef typename pick_tuple_max<N, Tuple, alignment_of>::type AlignType;
-        typedef typename pick_tuple_max<N, Tuple, size_of>::type MaxSizeType;
-        static const size_t MaxNBytes = (sizeof(Wrapper<MaxSizeType>)+sizeof(AlignType)-1);
-        static const size_t MaxNElements = MaxNBytes/sizeof(AlignType);
-        typedef typename tbb::aligned_space<AlignType, MaxNElements> SpaceType;
-        SpaceType my_space;
-        static const size_t MaxSize = sizeof(SpaceType);
-
-    public:
-        variant() { (void) new(&my_space) Wrapper<default_constructed>(default_constructed()); }
-
-        template<typename T>
-        variant( const T& x ) {
-            do_if<T, is_element_of<T, N, Tuple>::value>::construct(&my_space,x);
-        }
-
-        variant(const variant& other) {
-            const WrapperBase * h = punned_cast<const WrapperBase *>(&(other.my_space));
-            h->CopyTo(&my_space);
-        }
-
-        // assignment must destroy and re-create the Wrapper type, as there is no way
-        // to create a Wrapper-to-Wrapper assign even if we find they agree in type.
-        void operator=( const variant& rhs ) {
-            if(&rhs != this) {
-                WrapperBase *h = punned_cast<WrapperBase *>(&my_space);
-                h->~WrapperBase();
-                const WrapperBase *ch = punned_cast<const WrapperBase *>(&(rhs.my_space));
-                ch->CopyTo(&my_space);
-            }
-        }
-
-        template<typename U>
-        const U& variant_cast_to() const {
-            const Wrapper<U> *h = dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space));
-            if(!h) {
-                tbb::internal::throw_exception(tbb::internal::eid_bad_tagged_msg_cast);
-            }
-            return h->value();
-        }
-        template<typename U>
-        bool variant_is_a() const { return dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space)) != NULL; }
-
-        bool variant_is_default_constructed() const {return variant_is_a<default_constructed>();}
-
-        ~variant() {
-            WrapperBase *h = punned_cast<WrapperBase *>(&my_space);
-            h->~WrapperBase();
-        }
-    }; //class variant
-
-    TagType my_tag;
-    variant my_msg;
-
-public:
-    tagged_msg(): my_tag(TagType(~0)), my_msg(){}
-
-    template<typename T, typename R>
-    tagged_msg(T const &index, R const &value) : my_tag(index), my_msg(value) {}
-
-    #if __TBB_CONST_REF_TO_ARRAY_TEMPLATE_PARAM_BROKEN
-    template<typename T, typename R, size_t N>
-    tagged_msg(T const &index,  R (&value)[N]) : my_tag(index), my_msg(value) {}
-    #endif
-
-    void set_tag(TagType const &index) {my_tag = index;}
-    TagType tag() const {return my_tag;}
-
-    template<typename V>
-    const V& cast_to() const {return my_msg.template variant_cast_to<V>();}
-
-    template<typename V>
-    bool is_a() const {return my_msg.template variant_is_a<V>();}
-
-    bool is_default_constructed() const {return my_msg.variant_is_default_constructed();}
-}; //class tagged_msg
-
-// template to simplify cast and test for tagged_msg in template contexts
-template<typename V, typename T>
-const V& cast_to(T const &t) { return t.template cast_to<V>(); }
-
-template<typename V, typename T>
-bool is_a(T const &t) { return t.template is_a<V>(); }
-
-enum op_stat { WAIT = 0, SUCCEEDED, FAILED };
-
-}  // namespace internal
-
-#endif  /* __TBB__flow_graph_types_impl_H */
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_mutex_padding.h b/lib/3rdParty/tbb/include/tbb/internal/_mutex_padding.h
deleted file mode 100644
index 6c1d9b59..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_mutex_padding.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_mutex_padding_H
-#define __TBB_mutex_padding_H
-
-// wrapper for padding mutexes to be alone on a cache line, without requiring they be allocated
-// from a pool.  Because we allow them to be defined anywhere they must be two cache lines in size.
-
-
-namespace tbb {
-namespace interface7 {
-namespace internal {
-
-static const size_t cache_line_size = 64;
-
-// Pad a mutex to occupy a number of full cache lines sufficient to avoid false sharing
-// with other data; space overhead is up to 2*cache_line_size-1.
-template<typename Mutex, bool is_rw> class padded_mutex;
-
-template<typename Mutex>
-class padded_mutex<Mutex,false> : tbb::internal::mutex_copy_deprecated_and_disabled {
-    typedef long pad_type;
-    pad_type my_pad[((sizeof(Mutex)+cache_line_size-1)/cache_line_size+1)*cache_line_size/sizeof(pad_type)];
-
-    Mutex *impl() { return (Mutex *)((uintptr_t(this)|(cache_line_size-1))+1);}
-
-public:
-    static const bool is_rw_mutex = Mutex::is_rw_mutex;
-    static const bool is_recursive_mutex = Mutex::is_recursive_mutex;
-    static const bool is_fair_mutex = Mutex::is_fair_mutex;
-
-    padded_mutex() { new(impl()) Mutex(); }
-    ~padded_mutex() { impl()->~Mutex(); }
-
-    //! Represents acquisition of a mutex.
-    class scoped_lock :  tbb::internal::no_copy {
-        typename Mutex::scoped_lock my_scoped_lock;
-    public:
-        scoped_lock() : my_scoped_lock() {}
-        scoped_lock( padded_mutex& m ) : my_scoped_lock(*m.impl()) { }
-        ~scoped_lock() {  }
-
-        void acquire( padded_mutex& m ) { my_scoped_lock.acquire(*m.impl()); }
-        bool try_acquire( padded_mutex& m ) { return my_scoped_lock.try_acquire(*m.impl()); }
-        void release() { my_scoped_lock.release(); }
-    };
-};
-
-template<typename Mutex>
-class padded_mutex<Mutex,true> : tbb::internal::mutex_copy_deprecated_and_disabled {
-    typedef long pad_type;
-    pad_type my_pad[((sizeof(Mutex)+cache_line_size-1)/cache_line_size+1)*cache_line_size/sizeof(pad_type)];
-
-    Mutex *impl() { return (Mutex *)((uintptr_t(this)|(cache_line_size-1))+1);}
-
-public:
-    static const bool is_rw_mutex = Mutex::is_rw_mutex;
-    static const bool is_recursive_mutex = Mutex::is_recursive_mutex;
-    static const bool is_fair_mutex = Mutex::is_fair_mutex;
-
-    padded_mutex() { new(impl()) Mutex(); }
-    ~padded_mutex() { impl()->~Mutex(); }
-
-    //! Represents acquisition of a mutex.
-    class scoped_lock :  tbb::internal::no_copy {
-        typename Mutex::scoped_lock my_scoped_lock;
-    public:
-        scoped_lock() : my_scoped_lock() {}
-        scoped_lock( padded_mutex& m, bool write = true ) : my_scoped_lock(*m.impl(),write) { }
-        ~scoped_lock() {  }
-
-        void acquire( padded_mutex& m, bool write = true ) { my_scoped_lock.acquire(*m.impl(),write); }
-        bool try_acquire( padded_mutex& m, bool write = true ) { return my_scoped_lock.try_acquire(*m.impl(),write); }
-        bool upgrade_to_writer() { return my_scoped_lock.upgrade_to_writer(); }
-        bool downgrade_to_reader() { return my_scoped_lock.downgrade_to_reader(); }
-        void release() { my_scoped_lock.release(); }
-    };
-};
-
-} // namespace internal
-} // namespace interface7
-} // namespace tbb
-
-#endif /* __TBB_mutex_padding_H */
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_range_iterator.h b/lib/3rdParty/tbb/include/tbb/internal/_range_iterator.h
deleted file mode 100644
index 5ebc42e8..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_range_iterator.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_range_iterator_H
-#define __TBB_range_iterator_H
-
-#include "../tbb_stddef.h"
-
-#if __TBB_CPP11_STD_BEGIN_END_PRESENT && __TBB_CPP11_AUTO_PRESENT && __TBB_CPP11_DECLTYPE_PRESENT
-    #include <iterator>
-#endif
-
-namespace tbb {
-    // iterators to first and last elements of container
-    namespace internal {
-
-#if __TBB_CPP11_STD_BEGIN_END_PRESENT && __TBB_CPP11_AUTO_PRESENT && __TBB_CPP11_DECLTYPE_PRESENT
-        using std::begin;
-        using std::end;
-        template<typename Container>
-        auto first(Container& c)-> decltype(begin(c))  {return begin(c);}
-
-        template<typename Container>
-        auto first(const Container& c)-> decltype(begin(c))  {return begin(c);}
-
-        template<typename Container>
-        auto last(Container& c)-> decltype(begin(c))  {return end(c);}
-
-        template<typename Container>
-        auto last(const Container& c)-> decltype(begin(c)) {return end(c);}
-#else
-        template<typename Container>
-        typename Container::iterator first(Container& c) {return c.begin();}
-
-        template<typename Container>
-        typename Container::const_iterator first(const Container& c) {return c.begin();}
-
-        template<typename Container>
-        typename Container::iterator last(Container& c) {return c.end();}
-
-        template<typename Container>
-        typename Container::const_iterator last(const Container& c) {return c.end();}
-#endif
-
-        template<typename T, size_t size>
-        T* first(T (&arr) [size]) {return arr;}
-
-        template<typename T, size_t size>
-        T* last(T (&arr) [size]) {return arr + size;}
-    } //namespace internal
-}  //namespace tbb
-
-#endif // __TBB_range_iterator_H
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_tbb_hash_compare_impl.h b/lib/3rdParty/tbb/include/tbb/internal/_tbb_hash_compare_impl.h
deleted file mode 100644
index 6381e2dc..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_tbb_hash_compare_impl.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-// must be included outside namespaces.
-#ifndef __TBB_tbb_hash_compare_impl_H
-#define __TBB_tbb_hash_compare_impl_H
-
-#include <string>
-
-namespace tbb {
-namespace interface5 {
-namespace internal {
-
-// Template class for hash compare
-template<typename Key, typename Hasher, typename Key_equality>
-class hash_compare
-{
-public:
-    typedef Hasher hasher;
-    typedef Key_equality key_equal;
-
-    hash_compare() {}
-
-    hash_compare(Hasher a_hasher) : my_hash_object(a_hasher) {}
-
-    hash_compare(Hasher a_hasher, Key_equality a_keyeq) : my_hash_object(a_hasher), my_key_compare_object(a_keyeq) {}
-
-    size_t operator()(const Key& key) const {
-        return ((size_t)my_hash_object(key));
-    }
-
-    bool operator()(const Key& key1, const Key& key2) const {
-        // TODO: get rid of the result invertion
-        return (!my_key_compare_object(key1, key2));
-    }
-
-    Hasher       my_hash_object;        // The hash object
-    Key_equality my_key_compare_object; // The equality comparator object
-};
-
-//! Hash multiplier
-static const size_t hash_multiplier = tbb::internal::select_size_t_constant<2654435769U, 11400714819323198485ULL>::value;
-
-} // namespace internal
-
-//! Hasher functions
-template<typename T>
-inline size_t tbb_hasher( const T& t ) {
-    return static_cast<size_t>( t ) * internal::hash_multiplier;
-}
-template<typename P>
-inline size_t tbb_hasher( P* ptr ) {
-    size_t const h = reinterpret_cast<size_t>( ptr );
-    return (h >> 3) ^ h;
-}
-template<typename E, typename S, typename A>
-inline size_t tbb_hasher( const std::basic_string<E,S,A>& s ) {
-    size_t h = 0;
-    for( const E* c = s.c_str(); *c; ++c )
-        h = static_cast<size_t>(*c) ^ (h * internal::hash_multiplier);
-    return h;
-}
-template<typename F, typename S>
-inline size_t tbb_hasher( const std::pair<F,S>& p ) {
-    return tbb_hasher(p.first) ^ tbb_hasher(p.second);
-}
-
-} // namespace interface5
-using interface5::tbb_hasher;
-
-// Template class for hash compare
-template<typename Key>
-class tbb_hash
-{
-public:
-    tbb_hash() {}
-
-    size_t operator()(const Key& key) const
-    {
-        return tbb_hasher(key);
-    }
-};
-
-//! hash_compare that is default argument for concurrent_hash_map
-template<typename Key>
-struct tbb_hash_compare {
-    static size_t hash( const Key& a ) { return tbb_hasher(a); }
-    static bool equal( const Key& a, const Key& b ) { return a == b; }
-};
-
-}  // namespace tbb
-#endif  /*  __TBB_tbb_hash_compare_impl_H */
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_tbb_strings.h b/lib/3rdParty/tbb/include/tbb/internal/_tbb_strings.h
deleted file mode 100644
index 1aa532f3..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_tbb_strings.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-TBB_STRING_RESOURCE(FLOW_BROADCAST_NODE, "broadcast_node")
-TBB_STRING_RESOURCE(FLOW_BUFFER_NODE, "buffer_node")
-TBB_STRING_RESOURCE(FLOW_CONTINUE_NODE, "continue_node")
-TBB_STRING_RESOURCE(FLOW_FUNCTION_NODE, "function_node")
-TBB_STRING_RESOURCE(FLOW_JOIN_NODE_QUEUEING, "join_node (queueing)")
-TBB_STRING_RESOURCE(FLOW_JOIN_NODE_RESERVING, "join_node (reserving)")
-TBB_STRING_RESOURCE(FLOW_JOIN_NODE_TAG_MATCHING, "join_node (tag_matching)")
-TBB_STRING_RESOURCE(FLOW_LIMITER_NODE, "limiter_node")
-TBB_STRING_RESOURCE(FLOW_MULTIFUNCTION_NODE, "multifunction_node")
-TBB_STRING_RESOURCE(FLOW_OR_NODE, "or_node") //no longer in use, kept for backward compatibility
-TBB_STRING_RESOURCE(FLOW_OVERWRITE_NODE, "overwrite_node")
-TBB_STRING_RESOURCE(FLOW_PRIORITY_QUEUE_NODE, "priority_queue_node")
-TBB_STRING_RESOURCE(FLOW_QUEUE_NODE, "queue_node")
-TBB_STRING_RESOURCE(FLOW_SEQUENCER_NODE, "sequencer_node")
-TBB_STRING_RESOURCE(FLOW_SOURCE_NODE, "source_node")
-TBB_STRING_RESOURCE(FLOW_SPLIT_NODE, "split_node")
-TBB_STRING_RESOURCE(FLOW_WRITE_ONCE_NODE, "write_once_node")
-TBB_STRING_RESOURCE(FLOW_BODY, "body")
-TBB_STRING_RESOURCE(FLOW_GRAPH, "graph")
-TBB_STRING_RESOURCE(FLOW_NODE, "node")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT, "input_port")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_0, "input_port_0")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_1, "input_port_1")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_2, "input_port_2")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_3, "input_port_3")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_4, "input_port_4")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_5, "input_port_5")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_6, "input_port_6")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_7, "input_port_7")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_8, "input_port_8")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_9, "input_port_9")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT, "output_port")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_0, "output_port_0")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_1, "output_port_1")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_2, "output_port_2")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_3, "output_port_3")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_4, "output_port_4")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_5, "output_port_5")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_6, "output_port_6")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_7, "output_port_7")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_8, "output_port_8")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_9, "output_port_9")
-TBB_STRING_RESOURCE(FLOW_OBJECT_NAME, "object_name")
-TBB_STRING_RESOURCE(FLOW_NULL, "null")
-TBB_STRING_RESOURCE(FLOW_INDEXER_NODE, "indexer_node")
-TBB_STRING_RESOURCE(FLOW_COMPOSITE_NODE, "composite_node")
-TBB_STRING_RESOURCE(FLOW_ASYNC_NODE, "async_node")
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_tbb_windef.h b/lib/3rdParty/tbb/include/tbb/internal/_tbb_windef.h
deleted file mode 100644
index e798dee4..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_tbb_windef.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_tbb_windef_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif /* __TBB_tbb_windef_H */
-
-// Check that the target Windows version has all API calls requried for TBB.
-// Do not increase the version in condition beyond 0x0500 without prior discussion!
-#if defined(_WIN32_WINNT) && _WIN32_WINNT<0x0501
-#error TBB is unable to run on old Windows versions; _WIN32_WINNT must be 0x0501 or greater.
-#endif
-
-#if !defined(_MT)
-#error TBB requires linkage with multithreaded C/C++ runtime library. \
-       Choose multithreaded DLL runtime in project settings, or use /MD[d] compiler switch.
-#endif
-
-// Workaround for the problem with MVSC headers failing to define namespace std
-namespace std {
-  using ::size_t; using ::ptrdiff_t;
-}
-
-#define __TBB_STRING_AUX(x) #x
-#define __TBB_STRING(x) __TBB_STRING_AUX(x)
-
-// Default setting of TBB_USE_DEBUG
-#ifdef TBB_USE_DEBUG
-#    if TBB_USE_DEBUG
-#        if !defined(_DEBUG)
-#            pragma message(__FILE__ "(" __TBB_STRING(__LINE__) ") : Warning: Recommend using /MDd if compiling with TBB_USE_DEBUG!=0")
-#        endif
-#    else
-#        if defined(_DEBUG)
-#            pragma message(__FILE__ "(" __TBB_STRING(__LINE__) ") : Warning: Recommend using /MD if compiling with TBB_USE_DEBUG==0")
-#        endif
-#    endif
-#endif
-
-#if (__TBB_BUILD || __TBBMALLOC_BUILD) && !defined(__TBB_NO_IMPLICIT_LINKAGE)
-#define __TBB_NO_IMPLICIT_LINKAGE 1
-#endif
-
-#if _MSC_VER
-    #if !__TBB_NO_IMPLICIT_LINKAGE
-        #ifdef __TBB_LIB_NAME
-	        #pragma comment(lib, __TBB_STRING(__TBB_LIB_NAME))
-        #else
-			#ifdef _DEBUG
-				#pragma comment(lib, "tbb_debug.lib")
-			#else
-				#pragma comment(lib, "tbb.lib")
-			#endif
-        #endif
-    #endif
-#endif
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_template_helpers.h b/lib/3rdParty/tbb/include/tbb/internal/_template_helpers.h
deleted file mode 100644
index 1e0abbe8..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_template_helpers.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_template_helpers_H
-#define __TBB_template_helpers_H
-
-#include <utility>
-#include <cstddef>
-
-namespace tbb { namespace internal {
-
-//! Enables one or the other code branches
-template<bool Condition, typename T = void> struct enable_if {};
-template<typename T> struct enable_if<true, T> { typedef T type; };
-
-//! Strips its template type argument from cv- and ref-qualifiers
-template<typename T> struct strip                     { typedef T type; };
-template<typename T> struct strip<const T>            { typedef T type; };
-template<typename T> struct strip<volatile T>         { typedef T type; };
-template<typename T> struct strip<const volatile T>   { typedef T type; };
-template<typename T> struct strip<T&>                 { typedef T type; };
-template<typename T> struct strip<const T&>           { typedef T type; };
-template<typename T> struct strip<volatile T&>        { typedef T type; };
-template<typename T> struct strip<const volatile T&>  { typedef T type; };
-//! Specialization for function pointers
-template<typename T> struct strip<T(&)()>             { typedef T(*type)(); };
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-template<typename T> struct strip<T&&>                { typedef T type; };
-template<typename T> struct strip<const T&&>          { typedef T type; };
-template<typename T> struct strip<volatile T&&>       { typedef T type; };
-template<typename T> struct strip<const volatile T&&> { typedef T type; };
-#endif
-//! Specialization for arrays converts to a corresponding pointer
-template<typename T, std::size_t N> struct strip<T(&)[N]>                { typedef T* type; };
-template<typename T, std::size_t N> struct strip<const T(&)[N]>          { typedef const T* type; };
-template<typename T, std::size_t N> struct strip<volatile T(&)[N]>       { typedef volatile T* type; };
-template<typename T, std::size_t N> struct strip<const volatile T(&)[N]> { typedef const volatile T* type; };
-
-//! Detects whether two given types are the same
-template<class U, class V> struct is_same_type      { static const bool value = false; };
-template<class W>          struct is_same_type<W,W> { static const bool value = true; };
-
-template<typename T> struct is_ref { static const bool value = false; };
-template<typename U> struct is_ref<U&> { static const bool value = true; };
-
-#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-//! std::void_t internal implementation (to avoid GCC < 4.7 "template aliases" absence)
-template<typename...> struct void_t { typedef void type; };
-#endif
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT && __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-
-//! Allows to store a function parameter pack as a variable and later pass it to another function
-template< typename... Types >
-struct stored_pack;
-
-template<>
-struct stored_pack<>
-{
-    typedef stored_pack<> pack_type;
-    stored_pack() {}
-
-    // Friend front-end functions
-    template< typename F, typename Pack > friend void call( F&& f, Pack&& p );
-    template< typename Ret, typename F, typename Pack > friend Ret call_and_return( F&& f, Pack&& p );
-
-protected:
-    // Ideally, ref-qualified non-static methods would be used,
-    // but that would greatly reduce the set of compilers where it works.
-    template< typename Ret, typename F, typename... Preceding >
-    static Ret call( F&& f, const pack_type& /*pack*/, Preceding&&... params ) {
-        return std::forward<F>(f)( std::forward<Preceding>(params)... );
-    }
-    template< typename Ret, typename F, typename... Preceding >
-    static Ret call( F&& f, pack_type&& /*pack*/, Preceding&&... params ) {
-        return std::forward<F>(f)( std::forward<Preceding>(params)... );
-    }
-};
-
-template< typename T, typename... Types >
-struct stored_pack<T, Types...> : stored_pack<Types...>
-{
-    typedef stored_pack<T, Types...> pack_type;
-    typedef stored_pack<Types...> pack_remainder;
-    // Since lifetime of original values is out of control, copies should be made.
-    // Thus references should be stripped away from the deduced type.
-    typename strip<T>::type leftmost_value;
-
-    // Here rvalue references act in the same way as forwarding references,
-    // as long as class template parameters were deduced via forwarding references.
-    stored_pack( T&& t, Types&&... types )
-    : pack_remainder(std::forward<Types>(types)...), leftmost_value(std::forward<T>(t)) {}
-
-    // Friend front-end functions
-    template< typename F, typename Pack > friend void call( F&& f, Pack&& p );
-    template< typename Ret, typename F, typename Pack > friend Ret call_and_return( F&& f, Pack&& p );
-
-protected:
-    template< typename Ret, typename F, typename... Preceding >
-    static Ret call( F&& f, pack_type& pack, Preceding&&... params ) {
-        return pack_remainder::template call<Ret>(
-            std::forward<F>(f), static_cast<pack_remainder&>(pack),
-            std::forward<Preceding>(params)... , pack.leftmost_value
-        );
-    }
-    template< typename Ret, typename F, typename... Preceding >
-    static Ret call( F&& f, const pack_type& pack, Preceding&&... params ) {
-        return pack_remainder::template call<Ret>(
-            std::forward<F>(f), static_cast<const pack_remainder&>(pack),
-            std::forward<Preceding>(params)... , pack.leftmost_value
-        );
-    }
-    template< typename Ret, typename F, typename... Preceding >
-    static Ret call( F&& f, pack_type&& pack, Preceding&&... params ) {
-        return pack_remainder::template call<Ret>(
-            std::forward<F>(f), static_cast<pack_remainder&&>(pack),
-            std::forward<Preceding>(params)... , std::move(pack.leftmost_value)
-        );
-    }
-};
-
-//! Calls the given function with arguments taken from a stored_pack
-template< typename F, typename Pack >
-void call( F&& f, Pack&& p ) {
-    strip<Pack>::type::template call<void>( std::forward<F>(f), std::forward<Pack>(p) );
-}
-
-template< typename Ret, typename F, typename Pack >
-Ret call_and_return( F&& f, Pack&& p ) {
-    return strip<Pack>::type::template call<Ret>( std::forward<F>(f), std::forward<Pack>(p) );
-}
-
-template< typename... Types >
-stored_pack<Types...> save_pack( Types&&... types ) {
-    return stored_pack<Types...>( std::forward<Types>(types)... );
-}
-
-#endif /* __TBB_CPP11_RVALUE_REF_PRESENT && __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT */
-} } // namespace internal, namespace tbb
-
-#endif /* __TBB_template_helpers_H */
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_x86_eliding_mutex_impl.h b/lib/3rdParty/tbb/include/tbb/internal/_x86_eliding_mutex_impl.h
deleted file mode 100644
index ef5f9223..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_x86_eliding_mutex_impl.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB__x86_eliding_mutex_impl_H
-#define __TBB__x86_eliding_mutex_impl_H
-
-#ifndef __TBB_spin_mutex_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#if ( __TBB_x86_32 || __TBB_x86_64 )
-
-namespace tbb {
-namespace interface7 {
-namespace internal {
-
-template<typename Mutex, bool is_rw>
-class padded_mutex;
-
-//! An eliding lock that occupies a single byte.
-/** A x86_eliding_mutex is an HLE-enabled spin mutex. It is recommended to
-    put the mutex on a cache line that is not shared by the data it protects.
-    It should be used for locking short critical sections where the lock is
-    contended but the data it protects are not.  If zero-initialized, the
-    mutex is considered unheld.
-    @ingroup synchronization */
-class x86_eliding_mutex : tbb::internal::mutex_copy_deprecated_and_disabled {
-    //! 0 if lock is released, 1 if lock is acquired.
-    __TBB_atomic_flag flag;
-
-    friend class padded_mutex<x86_eliding_mutex, false>;
-
-public:
-    //! Construct unacquired lock.
-    /** Equivalent to zero-initialization of *this. */
-    x86_eliding_mutex() : flag(0) {}
-
-// bug in gcc 3.x.x causes syntax error in spite of the friend declaration above.
-// Make the scoped_lock public in that case.
-#if __TBB_USE_X86_ELIDING_MUTEX || __TBB_GCC_VERSION < 40000
-#else
-    // by default we will not provide the scoped_lock interface.  The user
-    // should use the padded version of the mutex.  scoped_lock is used in
-    // padded_mutex template.
-private:
-#endif
-    // scoped_lock in padded_mutex<> is the interface to use.
-    //! Represents acquisition of a mutex.
-    class scoped_lock : tbb::internal::no_copy {
-    private:
-        //! Points to currently held mutex, or NULL if no lock is held.
-        x86_eliding_mutex* my_mutex;
-
-    public:
-        //! Construct without acquiring a mutex.
-        scoped_lock() : my_mutex(NULL) {}
-
-        //! Construct and acquire lock on a mutex.
-        scoped_lock( x86_eliding_mutex& m ) : my_mutex(NULL) { acquire(m); }
-
-        //! Acquire lock.
-        void acquire( x86_eliding_mutex& m ) {
-            __TBB_ASSERT( !my_mutex, "already holding a lock" );
-
-            my_mutex=&m;
-            my_mutex->lock();
-        }
-
-        //! Try acquiring lock (non-blocking)
-        /** Return true if lock acquired; false otherwise. */
-        bool try_acquire( x86_eliding_mutex& m ) {
-            __TBB_ASSERT( !my_mutex, "already holding a lock" );
-
-            bool result = m.try_lock();
-            if( result ) {
-                my_mutex = &m;
-            }
-            return result;
-        }
-
-        //! Release lock
-        void release() {
-            __TBB_ASSERT( my_mutex, "release on scoped_lock that is not holding a lock" );
-
-            my_mutex->unlock();
-            my_mutex = NULL;
-        }
-
-        //! Destroy lock.  If holding a lock, releases the lock first.
-        ~scoped_lock() {
-            if( my_mutex ) {
-                release();
-            }
-        }
-    };
-#if __TBB_USE_X86_ELIDING_MUTEX || __TBB_GCC_VERSION < 40000
-#else
-public:
-#endif  /* __TBB_USE_X86_ELIDING_MUTEX */
-
-    // Mutex traits
-    static const bool is_rw_mutex = false;
-    static const bool is_recursive_mutex = false;
-    static const bool is_fair_mutex = false;
-
-    // ISO C++0x compatibility methods
-
-    //! Acquire lock
-    void lock() {
-        __TBB_LockByteElided(flag);
-    }
-
-    //! Try acquiring lock (non-blocking)
-    /** Return true if lock acquired; false otherwise. */
-    bool try_lock() {
-        return __TBB_TryLockByteElided(flag);
-    }
-
-    //! Release lock
-    void unlock() {
-        __TBB_UnlockByteElided( flag );
-    }
-}; // end of x86_eliding_mutex
-
-} // namespace internal
-} // namespace interface7
-} // namespace tbb
-
-#endif /* ( __TBB_x86_32 || __TBB_x86_64 ) */
-
-#endif /* __TBB__x86_eliding_mutex_impl_H */
diff --git a/lib/3rdParty/tbb/include/tbb/internal/_x86_rtm_rw_mutex_impl.h b/lib/3rdParty/tbb/include/tbb/internal/_x86_rtm_rw_mutex_impl.h
deleted file mode 100644
index b08c2331..00000000
--- a/lib/3rdParty/tbb/include/tbb/internal/_x86_rtm_rw_mutex_impl.h
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB__x86_rtm_rw_mutex_impl_H
-#define __TBB__x86_rtm_rw_mutex_impl_H
-
-#ifndef __TBB_spin_rw_mutex_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#if __TBB_TSX_AVAILABLE
-
-#include "../tbb_stddef.h"
-#include "../tbb_machine.h"
-#include "../tbb_profiling.h"
-#include "../spin_rw_mutex.h"
-
-namespace tbb {
-namespace interface8 {
-namespace internal {
-
-enum RTM_type {
-    RTM_not_in_mutex,
-    RTM_transacting_reader,
-    RTM_transacting_writer,
-    RTM_real_reader,
-    RTM_real_writer
-};
-
-static const unsigned long speculation_granularity = 64;
-
-//! Fast, unfair, spinning speculation-enabled reader-writer lock with backoff and
-//  writer-preference
-/** @ingroup synchronization */
-class x86_rtm_rw_mutex: private spin_rw_mutex {
-#if __TBB_USE_X86_RTM_RW_MUTEX || __TBB_GCC_VERSION < 40000
-// bug in gcc 3.x.x causes syntax error in spite of the friend declaration below.
-// Make the scoped_lock public in that case.
-public:
-#else
-private:
-#endif
-    friend class interface7::internal::padded_mutex<x86_rtm_rw_mutex,true>;
-    class scoped_lock;   // should be private
-    friend class scoped_lock;
-private:
-    //! @cond INTERNAL
-
-    //! Internal construct unacquired mutex.
-    void __TBB_EXPORTED_METHOD internal_construct();
-
-    //! Internal acquire write lock.
-    // only_speculate == true if we're doing a try_lock, else false.
-    void __TBB_EXPORTED_METHOD internal_acquire_writer(x86_rtm_rw_mutex::scoped_lock&, bool only_speculate=false);
-
-    //! Internal acquire read lock.
-    // only_speculate == true if we're doing a try_lock, else false.
-    void __TBB_EXPORTED_METHOD internal_acquire_reader(x86_rtm_rw_mutex::scoped_lock&, bool only_speculate=false);
-
-    //! Internal upgrade reader to become a writer.
-    bool __TBB_EXPORTED_METHOD internal_upgrade( x86_rtm_rw_mutex::scoped_lock& );
-
-    //! Out of line code for downgrading a writer to a reader.
-    bool __TBB_EXPORTED_METHOD internal_downgrade( x86_rtm_rw_mutex::scoped_lock& );
-
-    //! Internal try_acquire write lock.
-    bool __TBB_EXPORTED_METHOD internal_try_acquire_writer( x86_rtm_rw_mutex::scoped_lock& );
-
-    //! Internal release lock.
-    void __TBB_EXPORTED_METHOD internal_release( x86_rtm_rw_mutex::scoped_lock& );
-
-    static x86_rtm_rw_mutex* internal_get_mutex( const spin_rw_mutex::scoped_lock& lock )
-    {
-        return static_cast<x86_rtm_rw_mutex*>( lock.internal_get_mutex() );
-    }
-    static void internal_set_mutex( spin_rw_mutex::scoped_lock& lock, spin_rw_mutex* mtx )
-    {
-        lock.internal_set_mutex( mtx );
-    }
-    //! @endcond
-public:
-    //! Construct unacquired mutex.
-    x86_rtm_rw_mutex() {
-        w_flag = false;
-#if TBB_USE_THREADING_TOOLS
-        internal_construct();
-#endif
-    }
-
-#if TBB_USE_ASSERT
-    //! Empty destructor.
-    ~x86_rtm_rw_mutex() {}
-#endif /* TBB_USE_ASSERT */
-
-    // Mutex traits
-    static const bool is_rw_mutex = true;
-    static const bool is_recursive_mutex = false;
-    static const bool is_fair_mutex = false;
-
-#if __TBB_USE_X86_RTM_RW_MUTEX || __TBB_GCC_VERSION < 40000
-#else
-    // by default we will not provide the scoped_lock interface.  The user
-    // should use the padded version of the mutex.  scoped_lock is used in
-    // padded_mutex template.
-private:
-#endif
-    //! The scoped locking pattern
-    /** It helps to avoid the common problem of forgetting to release lock.
-        It also nicely provides the "node" for queuing locks. */
-    // Speculation-enabled scoped lock for spin_rw_mutex
-    // The idea is to be able to reuse the acquire/release methods of spin_rw_mutex
-    // and its scoped lock wherever possible.  The only way to use a speculative lock is to use
-    // a scoped_lock. (because transaction_state must be local)
-
-    class scoped_lock : tbb::internal::no_copy {
-        friend class x86_rtm_rw_mutex;
-        spin_rw_mutex::scoped_lock my_scoped_lock;
-
-        RTM_type transaction_state;
-
-    public:
-        //! Construct lock that has not acquired a mutex.
-        /** Equivalent to zero-initialization of *this. */
-        scoped_lock() : my_scoped_lock(), transaction_state(RTM_not_in_mutex) {
-        }
-
-        //! Acquire lock on given mutex.
-        scoped_lock( x86_rtm_rw_mutex& m, bool write = true ) : my_scoped_lock(),
-            transaction_state(RTM_not_in_mutex) {
-            acquire(m, write);
-        }
-
-        //! Release lock (if lock is held).
-        ~scoped_lock() {
-            if(transaction_state != RTM_not_in_mutex) release();
-        }
-
-        //! Acquire lock on given mutex.
-        void acquire( x86_rtm_rw_mutex& m, bool write = true ) {
-            if( write ) m.internal_acquire_writer(*this);
-            else        m.internal_acquire_reader(*this);
-        }
-
-        //! Release lock
-        void release() {
-            x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
-            __TBB_ASSERT( mutex, "lock is not acquired" );
-            __TBB_ASSERT( transaction_state!=RTM_not_in_mutex, "lock is not acquired" );
-            return mutex->internal_release(*this);
-        }
-
-        //! Upgrade reader to become a writer.
-        /** Returns whether the upgrade happened without releasing and re-acquiring the lock */
-        bool upgrade_to_writer() {
-            x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
-            __TBB_ASSERT( mutex, "lock is not acquired" );
-            __TBB_ASSERT( transaction_state==RTM_transacting_reader || transaction_state==RTM_real_reader, "Invalid state for upgrade" );
-            return mutex->internal_upgrade(*this);
-        }
-
-        //! Downgrade writer to become a reader.
-        /** Returns whether the downgrade happened without releasing and re-acquiring the lock */
-        bool downgrade_to_reader() {
-            x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
-            __TBB_ASSERT( mutex, "lock is not acquired" );
-            __TBB_ASSERT( transaction_state==RTM_transacting_writer || transaction_state==RTM_real_writer, "Invalid state for downgrade" );
-            return mutex->internal_downgrade(*this);
-        }
-
-        //! Attempt to acquire mutex.
-        /** returns true if successful.  */
-        bool try_acquire( x86_rtm_rw_mutex& m, bool write = true ) {
-#if TBB_USE_ASSERT
-            x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
-            __TBB_ASSERT( !mutex, "lock is already acquired" );
-#endif
-            // have to assign m to our mutex.
-            // cannot set the mutex, because try_acquire in spin_rw_mutex depends on it being NULL.
-            if(write) return m.internal_try_acquire_writer(*this);
-            // speculatively acquire the lock.  If this fails, do try_acquire on the spin_rw_mutex.
-            m.internal_acquire_reader(*this, /*only_speculate=*/true);
-            if(transaction_state == RTM_transacting_reader) return true;
-            if( my_scoped_lock.try_acquire(m, false)) {
-                transaction_state = RTM_real_reader;
-                return true;
-            }
-            return false;
-        }
-
-        };  // class x86_rtm_rw_mutex::scoped_lock
-
-    // ISO C++0x compatibility methods not provided because we cannot maintain
-    // state about whether a thread is in a transaction.
-
-private:
-    char pad[speculation_granularity-sizeof(spin_rw_mutex)]; // padding
-
-    // If true, writer holds the spin_rw_mutex.
-    tbb::atomic<bool> w_flag;  // want this on a separate cache line
-
-};  // x86_rtm_rw_mutex
-
-}  // namespace internal
-}  // namespace interface8
-}  // namespace tbb
-
-#endif  /* __TBB_TSX_AVAILABLE */
-#endif /* __TBB__x86_rtm_rw_mutex_impl_H */
diff --git a/lib/3rdParty/tbb/include/tbb/machine/gcc_armv7.h b/lib/3rdParty/tbb/include/tbb/machine/gcc_armv7.h
deleted file mode 100644
index 642c14fe..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/gcc_armv7.h
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-/*
-    Platform isolation layer for the ARMv7-a architecture.
-*/
-
-#ifndef __TBB_machine_H
-#error Do not include this file directly; include tbb_machine.h instead
-#endif
-
-//TODO: is ARMv7 is the only version ever to support?
-#if !(__ARM_ARCH_7A__)
-#error compilation requires an ARMv7-a architecture.
-#endif
-
-#include <sys/param.h>
-#include <unistd.h>
-
-#define __TBB_WORDSIZE 4
-
-// Traditionally ARM is little-endian.
-// Note that, since only the layout of aligned 32-bit words is of interest,
-// any apparent PDP-endianness of 32-bit words at half-word alignment or
-// any little-endian ordering of big-endian 32-bit words in 64-bit quantities
-// may be disregarded for this setting.
-#if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
-    #define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
-#elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
-    #define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
-#elif defined(__BYTE_ORDER__)
-    #define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
-#else
-    #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
-#endif
-
-
-#define __TBB_compiler_fence()    __asm__ __volatile__("": : :"memory")
-#define __TBB_full_memory_fence() __asm__ __volatile__("dmb ish": : :"memory")
-#define __TBB_control_consistency_helper() __TBB_full_memory_fence()
-#define __TBB_acquire_consistency_helper() __TBB_full_memory_fence()
-#define __TBB_release_consistency_helper() __TBB_full_memory_fence()
-
-//--------------------------------------------------
-// Compare and swap
-//--------------------------------------------------
-
-/**
- * Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
- * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
- * @param value value to assign *ptr to if *ptr==comparand
- * @param comparand value to compare with *ptr
- * @return value originally in memory at ptr, regardless of success
-*/
-static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand )
-{
-    int32_t oldval, res;
-
-    __TBB_full_memory_fence();
-
-    do {
-    __asm__ __volatile__(
-        "ldrex      %1, [%3]\n"
-        "mov        %0, #0\n"
-        "cmp        %1, %4\n"
-        "it         eq\n"
-        "strexeq    %0, %5, [%3]\n"
-        : "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int32_t*)ptr)
-        : "r" ((volatile int32_t *)ptr), "Ir" (comparand), "r" (value)
-        : "cc");
-    } while (res);
-
-    __TBB_full_memory_fence();
-
-    return oldval;
-}
-
-/**
- * Atomic CAS for 64 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
- * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
- * @param value value to assign *ptr to if *ptr==comparand
- * @param comparand value to compare with *ptr
- * @return value originally in memory at ptr, regardless of success
- */
-static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand )
-{
-    int64_t oldval;
-    int32_t res;
-
-    __TBB_full_memory_fence();
-
-    do {
-        __asm__ __volatile__(
-            "mov        %0, #0\n"
-            "ldrexd     %1, %H1, [%3]\n"
-            "cmp        %1, %4\n"
-            "it         eq\n"
-            "cmpeq      %H1, %H4\n"
-            "it         eq\n"
-            "strexdeq   %0, %5, %H5, [%3]"
-        : "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int64_t*)ptr)
-        : "r" ((volatile int64_t *)ptr), "r" (comparand), "r" (value)
-        : "cc");
-    } while (res);
-
-    __TBB_full_memory_fence();
-
-    return oldval;
-}
-
-static inline int32_t __TBB_machine_fetchadd4(volatile void* ptr, int32_t addend)
-{
-    unsigned long tmp;
-    int32_t result, tmp2;
-
-    __TBB_full_memory_fence();
-
-    __asm__ __volatile__(
-"1:     ldrex   %0, [%4]\n"
-"       add     %3, %0, %5\n"
-"       strex   %1, %3, [%4]\n"
-"       cmp     %1, #0\n"
-"       bne     1b\n"
-    : "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int32_t*)ptr), "=&r"(tmp2)
-    : "r" ((volatile int32_t *)ptr), "Ir" (addend)
-    : "cc");
-
-    __TBB_full_memory_fence();
-
-    return result;
-}
-
-static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
-{
-    unsigned long tmp;
-    int64_t result, tmp2;
-
-    __TBB_full_memory_fence();
-
-    __asm__ __volatile__(
-"1:     ldrexd  %0, %H0, [%4]\n"
-"       adds    %3, %0, %5\n"
-"       adc     %H3, %H0, %H5\n"
-"       strexd  %1, %3, %H3, [%4]\n"
-"       cmp     %1, #0\n"
-"       bne     1b"
-    : "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int64_t*)ptr), "=&r"(tmp2)
-    : "r" ((volatile int64_t *)ptr), "r" (addend)
-    : "cc");
-
-
-    __TBB_full_memory_fence();
-
-    return result;
-}
-
-inline void __TBB_machine_pause (int32_t delay )
-{
-    while(delay>0)
-    {
-	__TBB_compiler_fence();
-        delay--;
-    }
-}
-
-namespace tbb {
-namespace internal {
-    template <typename T, size_t S>
-    struct machine_load_store_relaxed {
-        static inline T load ( const volatile T& location ) {
-            const T value = location;
-
-            /*
-            * An extra memory barrier is required for errata #761319
-            * Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
-            */
-            __TBB_acquire_consistency_helper();
-            return value;
-        }
-
-        static inline void store ( volatile T& location, T value ) {
-            location = value;
-        }
-    };
-}} // namespaces internal, tbb
-
-// Machine specific atomic operations
-
-#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
-#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
-#define __TBB_Pause(V) __TBB_machine_pause(V)
-
-// Use generics for some things
-#define __TBB_USE_GENERIC_PART_WORD_CAS                         1
-#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD                   1
-#define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE                 1
-#define __TBB_USE_GENERIC_FETCH_STORE                           1
-#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE                1
-#define __TBB_USE_GENERIC_DWORD_LOAD_STORE                      1
-#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE     1
diff --git a/lib/3rdParty/tbb/include/tbb/machine/gcc_generic.h b/lib/3rdParty/tbb/include/tbb/machine/gcc_generic.h
deleted file mode 100644
index 5fc2a901..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/gcc_generic.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_generic_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#define __TBB_machine_gcc_generic_H
-
-#include <stdint.h>
-#include <unistd.h>
-
-#define __TBB_WORDSIZE __SIZEOF_POINTER__
-
-#if __TBB_GCC_64BIT_ATOMIC_BUILTINS_BROKEN
-    #define __TBB_64BIT_ATOMICS 0
-#endif
-
-/** FPU control setting not available for non-Intel architectures on Android **/
-#if __ANDROID__ && __TBB_generic_arch
-    #define __TBB_CPU_CTL_ENV_PRESENT 0
-#endif
-
-// __BYTE_ORDER__ is used in accordance with http://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html,
-// but __BIG_ENDIAN__ or __LITTLE_ENDIAN__ may be more commonly found instead.
-#if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
-    #define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
-#elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
-    #define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
-#elif defined(__BYTE_ORDER__)
-    #define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
-#else
-    #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
-#endif
-
-#if __TBB_GCC_VERSION < 40700
-// Use __sync_* builtins
-
-/** As this generic implementation has absolutely no information about underlying
-    hardware, its performance most likely will be sub-optimal because of full memory
-    fence usages where a more lightweight synchronization means (or none at all)
-    could suffice. Thus if you use this header to enable TBB on a new platform,
-    consider forking it and relaxing below helpers as appropriate. **/
-#define __TBB_acquire_consistency_helper()  __sync_synchronize()
-#define __TBB_release_consistency_helper()  __sync_synchronize()
-#define __TBB_full_memory_fence()           __sync_synchronize()
-#define __TBB_control_consistency_helper()  __sync_synchronize()
-
-#define __TBB_MACHINE_DEFINE_ATOMICS(S,T)                                                         \
-inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) {                    \
-    return __sync_val_compare_and_swap(reinterpret_cast<volatile T *>(ptr),comparand,value);      \
-}                                                                                                 \
-inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) {                               \
-    return __sync_fetch_and_add(reinterpret_cast<volatile T *>(ptr),value);                       \
-}
-
-#define __TBB_USE_GENERIC_FETCH_STORE 1
-
-#else
-// __TBB_GCC_VERSION >= 40700; use __atomic_* builtins available since gcc 4.7
-
-#define __TBB_compiler_fence()              __asm__ __volatile__("": : :"memory")
-// Acquire and release fence intrinsics in GCC might miss compiler fence.
-// Adding it at both sides of an intrinsic, as we do not know what reordering can be made.
-#define __TBB_acquire_consistency_helper()  __TBB_compiler_fence(); __atomic_thread_fence(__ATOMIC_ACQUIRE); __TBB_compiler_fence()
-#define __TBB_release_consistency_helper()  __TBB_compiler_fence(); __atomic_thread_fence(__ATOMIC_RELEASE); __TBB_compiler_fence()
-#define __TBB_full_memory_fence()           __atomic_thread_fence(__ATOMIC_SEQ_CST)
-#define __TBB_control_consistency_helper()  __TBB_acquire_consistency_helper()
-
-#define __TBB_MACHINE_DEFINE_ATOMICS(S,T)                                                         \
-inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) {                    \
-    (void)__atomic_compare_exchange_n(reinterpret_cast<volatile T *>(ptr), &comparand, value,     \
-                                      false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);                 \
-    return comparand;                                                                             \
-}                                                                                                 \
-inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) {                               \
-    return __atomic_fetch_add(reinterpret_cast<volatile T *>(ptr), value, __ATOMIC_SEQ_CST);      \
-}                                                                                                 \
-inline T __TBB_machine_fetchstore##S( volatile void *ptr, T value ) {                             \
-    return __atomic_exchange_n(reinterpret_cast<volatile T *>(ptr), value, __ATOMIC_SEQ_CST);     \
-}
-
-#endif // __TBB_GCC_VERSION < 40700
-
-__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t)
-__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t)
-__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t)
-__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t)
-
-#undef __TBB_MACHINE_DEFINE_ATOMICS
-
-namespace tbb{ namespace internal { namespace gcc_builtins {
-    inline int clz(unsigned int x){ return __builtin_clz(x);};
-    inline int clz(unsigned long int x){ return __builtin_clzl(x);};
-    inline int clz(unsigned long long int x){ return __builtin_clzll(x);};
-}}}
-//gcc __builtin_clz builtin count _number_ of leading zeroes
-static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
-    return sizeof(x)*8 - tbb::internal::gcc_builtins::clz(x) -1 ;
-}
-
-
-typedef unsigned char __TBB_Flag;
-typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag;
-
-#if __TBB_GCC_VERSION < 40700
-// Use __sync_* builtins
-
-static inline void __TBB_machine_or( volatile void *ptr, uintptr_t addend ) {
-    __sync_fetch_and_or(reinterpret_cast<volatile uintptr_t *>(ptr),addend);
-}
-
-static inline void __TBB_machine_and( volatile void *ptr, uintptr_t addend ) {
-    __sync_fetch_and_and(reinterpret_cast<volatile uintptr_t *>(ptr),addend);
-}
-
-inline bool __TBB_machine_try_lock_byte( __TBB_atomic_flag &flag ) {
-    return __sync_lock_test_and_set(&flag,1)==0;
-}
-
-inline void __TBB_machine_unlock_byte( __TBB_atomic_flag &flag ) {
-    __sync_lock_release(&flag);
-}
-
-#else
-// __TBB_GCC_VERSION >= 40700; use __atomic_* builtins available since gcc 4.7
-
-static inline void __TBB_machine_or( volatile void *ptr, uintptr_t addend ) {
-    __atomic_fetch_or(reinterpret_cast<volatile uintptr_t *>(ptr),addend,__ATOMIC_SEQ_CST);
-}
-
-static inline void __TBB_machine_and( volatile void *ptr, uintptr_t addend ) {
-    __atomic_fetch_and(reinterpret_cast<volatile uintptr_t *>(ptr),addend,__ATOMIC_SEQ_CST);
-}
-
-inline bool __TBB_machine_try_lock_byte( __TBB_atomic_flag &flag ) {
-    return !__atomic_test_and_set(&flag,__ATOMIC_ACQUIRE);
-}
-
-inline void __TBB_machine_unlock_byte( __TBB_atomic_flag &flag ) {
-    __atomic_clear(&flag,__ATOMIC_RELEASE);
-}
-
-#endif // __TBB_GCC_VERSION < 40700
-
-// Machine specific atomic operations
-#define __TBB_AtomicOR(P,V)     __TBB_machine_or(P,V)
-#define __TBB_AtomicAND(P,V)    __TBB_machine_and(P,V)
-
-#define __TBB_TryLockByte   __TBB_machine_try_lock_byte
-#define __TBB_UnlockByte    __TBB_machine_unlock_byte
-
-// Definition of other functions
-#define __TBB_Log2(V)           __TBB_machine_lg(V)
-
-// TODO: implement with __atomic_* builtins where available
-#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
-#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
-#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
-
-#if __TBB_WORDSIZE==4
-    #define __TBB_USE_GENERIC_DWORD_LOAD_STORE              1
-#endif
-
-#if __TBB_x86_32 || __TBB_x86_64
-#include "gcc_itsx.h"
-#endif
diff --git a/lib/3rdParty/tbb/include/tbb/machine/gcc_ia32_common.h b/lib/3rdParty/tbb/include/tbb/machine/gcc_ia32_common.h
deleted file mode 100644
index f5efc375..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/gcc_ia32_common.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_machine_gcc_ia32_common_H
-#define __TBB_machine_gcc_ia32_common_H
-
-//TODO: Add a higher-level function, e.g. tbb::internal::log2(), into tbb_stddef.h, which
-//uses __TBB_Log2 and contains the assert and remove the assert from here and all other
-//platform-specific headers.
-//TODO: Check if use of gcc intrinsic gives a better chance for cross call optimizations
-template <typename T>
-static inline intptr_t __TBB_machine_lg( T x ) {
-    __TBB_ASSERT(x>0, "The logarithm of a non-positive value is undefined.");
-    uintptr_t j, i = x;
-    __asm__("bsr %1,%0" : "=r"(j) : "r"(i));
-    return j;
-}
-#define __TBB_Log2(V)  __TBB_machine_lg(V)
-
-#ifndef __TBB_Pause
-//TODO: check if raising a ratio of pause instructions to loop control instructions
-//(via e.g. loop unrolling) gives any benefit for HT.  E.g, the current implementation
-//does about 2 CPU-consuming instructions for every pause instruction.  Perhaps for
-//high pause counts it should use an unrolled loop to raise the ratio, and thus free
-//up more integer cycles for the other hyperthread.  On the other hand, if the loop is
-//unrolled too far, it won't fit in the core's loop cache, and thus take away
-//instruction decode slots from the other hyperthread.
-
-//TODO: check if use of gcc __builtin_ia32_pause intrinsic gives a "some how" better performing code
-static inline void __TBB_machine_pause( int32_t delay ) {
-    for (int32_t i = 0; i < delay; i++) {
-       __asm__ __volatile__("pause;");
-    }
-    return;
-}
-#define __TBB_Pause(V) __TBB_machine_pause(V)
-#endif /* !__TBB_Pause */
-
-namespace tbb { namespace internal { typedef uint64_t machine_tsc_t; } }
-static inline tbb::internal::machine_tsc_t __TBB_machine_time_stamp() {
-#if __INTEL_COMPILER
-    return _rdtsc();
-#else
-    tbb::internal::uint32_t hi, lo;
-    __asm__ __volatile__("rdtsc" : "=d"(hi), "=a"(lo));
-    return (tbb::internal::machine_tsc_t( hi ) << 32) | lo;
-#endif
-}
-#define __TBB_time_stamp() __TBB_machine_time_stamp()
-
-// API to retrieve/update FPU control setting
-#ifndef __TBB_CPU_CTL_ENV_PRESENT
-#define __TBB_CPU_CTL_ENV_PRESENT 1
-namespace tbb {
-namespace internal {
-class cpu_ctl_env {
-private:
-    int     mxcsr;
-    short   x87cw;
-    static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
-public:
-    bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
-    void get_env() {
-    #if __TBB_ICC_12_0_INL_ASM_FSTCW_BROKEN
-        cpu_ctl_env loc_ctl;
-        __asm__ __volatile__ (
-                "stmxcsr %0\n\t"
-                "fstcw %1"
-                : "=m"(loc_ctl.mxcsr), "=m"(loc_ctl.x87cw)
-        );
-        *this = loc_ctl;
-    #else
-        __asm__ __volatile__ (
-                "stmxcsr %0\n\t"
-                "fstcw %1"
-                : "=m"(mxcsr), "=m"(x87cw)
-        );
-    #endif
-        mxcsr &= MXCSR_CONTROL_MASK;
-    }
-    void set_env() const {
-        __asm__ __volatile__ (
-                "ldmxcsr %0\n\t"
-                "fldcw %1"
-                : : "m"(mxcsr), "m"(x87cw)
-        );
-    }
-};
-} // namespace internal
-} // namespace tbb
-#endif /* !__TBB_CPU_CTL_ENV_PRESENT */
-
-#include "gcc_itsx.h"
-
-#endif /* __TBB_machine_gcc_ia32_common_H */
diff --git a/lib/3rdParty/tbb/include/tbb/machine/gcc_itsx.h b/lib/3rdParty/tbb/include/tbb/machine/gcc_itsx.h
deleted file mode 100644
index caa35441..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/gcc_itsx.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_itsx_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#define __TBB_machine_gcc_itsx_H
-
-#define __TBB_OP_XACQUIRE 0xF2
-#define __TBB_OP_XRELEASE 0xF3
-#define __TBB_OP_LOCK     0xF0
-
-#define __TBB_STRINGIZE_INTERNAL(arg) #arg
-#define __TBB_STRINGIZE(arg) __TBB_STRINGIZE_INTERNAL(arg)
-
-#ifdef __TBB_x86_64
-#define __TBB_r_out "=r"
-#else
-#define __TBB_r_out "=q"
-#endif
-
-inline static uint8_t __TBB_machine_try_lock_elided( volatile uint8_t* lk )
-{
-    uint8_t value = 1;
-    __asm__ volatile (".byte " __TBB_STRINGIZE(__TBB_OP_XACQUIRE)"; lock; xchgb %0, %1;"
-                      : __TBB_r_out(value), "=m"(*lk)  : "0"(value), "m"(*lk) : "memory" );
-    return uint8_t(value^1);
-}
-
-inline static void __TBB_machine_try_lock_elided_cancel()
-{
-    // 'pause' instruction aborts HLE/RTM transactions
-    __asm__ volatile ("pause\n" : : : "memory" );
-}
-
-inline static void __TBB_machine_unlock_elided( volatile uint8_t* lk )
-{
-    __asm__ volatile (".byte " __TBB_STRINGIZE(__TBB_OP_XRELEASE)"; movb $0, %0"
-                      : "=m"(*lk) : "m"(*lk) : "memory" );
-}
-
-#if __TBB_TSX_INTRINSICS_PRESENT
-#include <immintrin.h>
-
-#define __TBB_machine_is_in_transaction _xtest
-#define __TBB_machine_begin_transaction _xbegin
-#define __TBB_machine_end_transaction   _xend
-#define __TBB_machine_transaction_conflict_abort() _xabort(0xff)
-
-#else
-
-/*!
- * Check if the instruction is executed in a transaction or not
- */
-inline static bool __TBB_machine_is_in_transaction()
-{
-    int8_t res = 0;
-#if __TBB_x86_32
-    __asm__ volatile (".byte 0x0F; .byte 0x01; .byte 0xD6;\n"
-                      "setz %0" : "=q"(res) : : "memory" );
-#else
-    __asm__ volatile (".byte 0x0F; .byte 0x01; .byte 0xD6;\n"
-                      "setz %0" : "=r"(res) : : "memory" );
-#endif
-    return res==0;
-}
-
-/*!
- * Enter speculative execution mode.
- * @return -1 on success
- *         abort cause ( or 0 ) on abort
- */
-inline static uint32_t __TBB_machine_begin_transaction()
-{
-    uint32_t res = ~uint32_t(0);   // success value
-    __asm__ volatile ("1: .byte  0xC7; .byte 0xF8;\n"           //  XBEGIN <abort-offset>
-                      "   .long  2f-1b-6\n"                     //  2f-1b == difference in addresses of start
-                                                                //  of XBEGIN and the MOVL
-                                                                //  2f - 1b - 6 == that difference minus the size of the
-                                                                //  XBEGIN instruction.  This is the abort offset to
-                                                                //  2: below.
-                      "    jmp   3f\n"                          //  success (leave -1 in res)
-                      "2:  movl  %%eax,%0\n"                    //  store failure code in res
-                      "3:"
-                      :"=r"(res):"0"(res):"memory","%eax");
-    return res;
-}
-
-/*!
- * Attempt to commit/end transaction
- */
-inline static void __TBB_machine_end_transaction()
-{
-    __asm__ volatile (".byte 0x0F; .byte 0x01; .byte 0xD5" :::"memory");   // XEND
-}
-
-/*
- * aborts with code 0xFF (lock already held)
- */
-inline static void __TBB_machine_transaction_conflict_abort()
-{
-    __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte 0xFF" :::"memory");
-}
-
-#endif /* __TBB_TSX_INTRINSICS_PRESENT */
diff --git a/lib/3rdParty/tbb/include/tbb/machine/ibm_aix51.h b/lib/3rdParty/tbb/include/tbb/machine/ibm_aix51.h
deleted file mode 100644
index a905b4e1..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/ibm_aix51.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-// TODO: revise by comparing with mac_ppc.h
-
-#if !defined(__TBB_machine_H) || defined(__TBB_machine_ibm_aix51_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#define __TBB_machine_ibm_aix51_H
-
-#define __TBB_WORDSIZE 8
-#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG // assumption based on operating system
-
-#include <stdint.h>
-#include <unistd.h>
-#include <sched.h>
-
-extern "C" {
-int32_t __TBB_machine_cas_32 (volatile void* ptr, int32_t value, int32_t comparand);
-int64_t __TBB_machine_cas_64 (volatile void* ptr, int64_t value, int64_t comparand);
-void __TBB_machine_flush ();
-void __TBB_machine_lwsync ();
-void __TBB_machine_isync ();
-}
-
-// Mapping of old entry point names retained for the sake of backward binary compatibility
-#define __TBB_machine_cmpswp4 __TBB_machine_cas_32
-#define __TBB_machine_cmpswp8 __TBB_machine_cas_64
-
-#define __TBB_Yield() sched_yield()
-
-#define __TBB_USE_GENERIC_PART_WORD_CAS                     1
-#define __TBB_USE_GENERIC_FETCH_ADD                         1
-#define __TBB_USE_GENERIC_FETCH_STORE                       1
-#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
-#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
-#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
-
-#if __GNUC__
-    #define __TBB_control_consistency_helper() __asm__ __volatile__( "isync": : :"memory")
-    #define __TBB_acquire_consistency_helper() __asm__ __volatile__("lwsync": : :"memory")
-    #define __TBB_release_consistency_helper() __asm__ __volatile__("lwsync": : :"memory")
-    #define __TBB_full_memory_fence()          __asm__ __volatile__(  "sync": : :"memory")
-#else
-    // IBM C++ Compiler does not support inline assembly
-    // TODO: Since XL 9.0 or earlier GCC syntax is supported. Replace with more
-    //       lightweight implementation (like in mac_ppc.h)
-    #define __TBB_control_consistency_helper() __TBB_machine_isync ()
-    #define __TBB_acquire_consistency_helper() __TBB_machine_lwsync ()
-    #define __TBB_release_consistency_helper() __TBB_machine_lwsync ()
-    #define __TBB_full_memory_fence()          __TBB_machine_flush ()
-#endif
diff --git a/lib/3rdParty/tbb/include/tbb/machine/icc_generic.h b/lib/3rdParty/tbb/include/tbb/machine/icc_generic.h
deleted file mode 100644
index 04863000..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/icc_generic.h
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#if !defined(__TBB_machine_H) || defined(__TBB_machine_icc_generic_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#if ! __TBB_ICC_BUILTIN_ATOMICS_PRESENT
-    #error "Intel C++ Compiler of at least 12.0 version is needed to use ICC intrinsics port"
-#endif
-
-#define __TBB_machine_icc_generic_H
-
-//ICC mimics the "native" target compiler
-#if _MSC_VER
-    #include "msvc_ia32_common.h"
-#else
-    #include "gcc_ia32_common.h"
-#endif
-
-//TODO: Make __TBB_WORDSIZE macro optional for ICC intrinsics port.
-//As compiler intrinsics are used for all the operations it is possible to do.
-
-#if __TBB_x86_32
-    #define __TBB_WORDSIZE 4
-#else
-    #define __TBB_WORDSIZE 8
-#endif
-#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
-
-//__TBB_compiler_fence() defined just in case, as it seems not to be used on its own anywhere else
-#ifndef __TBB_compiler_fence
-#if _MSC_VER
-    //TODO: any way to use same intrinsics on windows and linux?
-    #pragma intrinsic(_ReadWriteBarrier)
-    #define __TBB_compiler_fence()    _ReadWriteBarrier()
-#else
-    #define __TBB_compiler_fence()    __asm__ __volatile__("": : :"memory")
-#endif
-#endif
-
-#ifndef __TBB_full_memory_fence
-#if _MSC_VER
-    //TODO: any way to use same intrinsics on windows and linux?
-    #pragma intrinsic(_mm_mfence)
-    #define __TBB_full_memory_fence() _mm_mfence()
-#else
-    #define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory")
-#endif
-#endif
-
-#ifndef __TBB_control_consistency_helper
-#define __TBB_control_consistency_helper() __TBB_compiler_fence()
-#endif
-
-namespace tbb { namespace internal {
-//TODO: is there any way to reuse definition of memory_order enum from ICC instead of copy paste.
-//however it seems unlikely that ICC will silently change exact enum values, as they are defined
-//in the ISO exactly like this.
-//TODO: add test that exact values of the enum are same as in the ISO C++11
-typedef enum memory_order {
-    memory_order_relaxed, memory_order_consume, memory_order_acquire,
-    memory_order_release, memory_order_acq_rel, memory_order_seq_cst
-} memory_order;
-
-namespace icc_intrinsics_port {
-    template <typename T>
-    T convert_argument(T value){
-        return value;
-    }
-    //The overload below is needed to have explicit conversion of pointer to void* in argument list.
-    //compiler bug?
-    //TODO: add according broken macro and recheck with ICC 13.0 if the overload is still needed
-    template <typename T>
-    void* convert_argument(T* value){
-        return (void*)value;
-    }
-}
-//TODO: code below is a bit repetitive, consider simplifying it
-template <typename T, size_t S>
-struct machine_load_store {
-    static T load_with_acquire ( const volatile T& location ) {
-        return __atomic_load_explicit(&location, memory_order_acquire);
-    }
-    static void store_with_release ( volatile T &location, T value ) {
-        __atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_release);
-    }
-};
-
-template <typename T, size_t S>
-struct machine_load_store_relaxed {
-    static inline T load ( const T& location ) {
-        return __atomic_load_explicit(&location, memory_order_relaxed);
-    }
-    static inline void store (  T& location, T value ) {
-        __atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_relaxed);
-    }
-};
-
-template <typename T, size_t S>
-struct machine_load_store_seq_cst {
-    static T load ( const volatile T& location ) {
-        return __atomic_load_explicit(&location, memory_order_seq_cst);
-    }
-
-    static void store ( volatile T &location, T value ) {
-        __atomic_store_explicit(&location, value, memory_order_seq_cst);
-    }
-};
-
-}} // namespace tbb::internal
-
-namespace tbb{ namespace internal { namespace icc_intrinsics_port{
-    typedef enum memory_order_map {
-        relaxed = memory_order_relaxed,
-        acquire = memory_order_acquire,
-        release = memory_order_release,
-        full_fence=  memory_order_seq_cst
-    } memory_order_map;
-}}}// namespace tbb::internal
-
-#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,M)                                                     \
-inline T __TBB_machine_cmpswp##S##M( volatile void *ptr, T value, T comparand ) {               \
-    __atomic_compare_exchange_strong_explicit(                                                  \
-            (T*)ptr                                                                             \
-            ,&comparand                                                                         \
-            ,value                                                                              \
-            , tbb::internal::icc_intrinsics_port::M                                             \
-            , tbb::internal::icc_intrinsics_port::M);                                           \
-    return comparand;                                                                           \
-}                                                                                               \
-                                                                                                \
-inline T __TBB_machine_fetchstore##S##M(volatile void *ptr, T value) {                          \
-    return __atomic_exchange_explicit((T*)ptr, value, tbb::internal::icc_intrinsics_port::M);   \
-}                                                                                               \
-                                                                                                \
-inline T __TBB_machine_fetchadd##S##M(volatile void *ptr, T value) {                            \
-    return __atomic_fetch_add_explicit((T*)ptr, value, tbb::internal::icc_intrinsics_port::M);  \
-}                                                                                               \
-
-__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, full_fence)
-__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, acquire)
-__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, release)
-__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, relaxed)
-
-__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, full_fence)
-__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, acquire)
-__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, release)
-__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, relaxed)
-
-__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, full_fence)
-__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, acquire)
-__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, release)
-__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, relaxed)
-
-__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, full_fence)
-__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, acquire)
-__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, release)
-__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, relaxed)
-
-
-#undef __TBB_MACHINE_DEFINE_ATOMICS
-
-#define __TBB_USE_FENCED_ATOMICS                            1
-
-namespace tbb { namespace internal {
-#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
-__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(full_fence)
-__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(full_fence)
-
-__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(acquire)
-__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(release)
-
-__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(relaxed)
-__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(relaxed)
-
-template <typename T>
-struct machine_load_store<T,8> {
-    static T load_with_acquire ( const volatile T& location ) {
-        if( tbb::internal::is_aligned(&location,8)) {
-            return __atomic_load_explicit(&location, memory_order_acquire);
-        } else {
-            return __TBB_machine_generic_load8acquire(&location);
-        }
-    }
-    static void store_with_release ( volatile T &location, T value ) {
-        if( tbb::internal::is_aligned(&location,8)) {
-            __atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_release);
-        } else {
-            return __TBB_machine_generic_store8release(&location,value);
-        }
-    }
-};
-
-template <typename T>
-struct machine_load_store_relaxed<T,8> {
-    static T load( const volatile T& location ) {
-        if( tbb::internal::is_aligned(&location,8)) {
-            return __atomic_load_explicit(&location, memory_order_relaxed);
-        } else {
-            return __TBB_machine_generic_load8relaxed(&location);
-        }
-    }
-    static void store( volatile T &location, T value ) {
-        if( tbb::internal::is_aligned(&location,8)) {
-            __atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_relaxed);
-        } else {
-            return __TBB_machine_generic_store8relaxed(&location,value);
-        }
-    }
-};
-
-template <typename T >
-struct machine_load_store_seq_cst<T,8> {
-    static T load ( const volatile T& location ) {
-        if( tbb::internal::is_aligned(&location,8)) {
-            return __atomic_load_explicit(&location, memory_order_seq_cst);
-        } else {
-            return __TBB_machine_generic_load8full_fence(&location);
-        }
-
-    }
-
-    static void store ( volatile T &location, T value ) {
-        if( tbb::internal::is_aligned(&location,8)) {
-            __atomic_store_explicit(&location, value, memory_order_seq_cst);
-        } else {
-            return __TBB_machine_generic_store8full_fence(&location,value);
-        }
-
-    }
-};
-
-#endif
-}} // namespace tbb::internal
-template <typename T>
-inline void __TBB_machine_OR( T *operand, T addend ) {
-    __atomic_fetch_or_explicit(operand, addend, tbb::internal::memory_order_seq_cst);
-}
-
-template <typename T>
-inline void __TBB_machine_AND( T *operand, T addend ) {
-    __atomic_fetch_and_explicit(operand, addend, tbb::internal::memory_order_seq_cst);
-}
-
diff --git a/lib/3rdParty/tbb/include/tbb/machine/linux_common.h b/lib/3rdParty/tbb/include/tbb/machine/linux_common.h
deleted file mode 100644
index 4d2d355b..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/linux_common.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_machine_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#include <sched.h>
-#define __TBB_Yield()  sched_yield()
-
-#include <unistd.h>
-/* Futex definitions */
-#include <sys/syscall.h>
-
-#if defined(SYS_futex)
-
-#define __TBB_USE_FUTEX 1
-#include <limits.h>
-#include <errno.h>
-// Unfortunately, some versions of Linux do not have a header that defines FUTEX_WAIT and FUTEX_WAKE.
-
-#ifdef FUTEX_WAIT
-#define __TBB_FUTEX_WAIT FUTEX_WAIT
-#else
-#define __TBB_FUTEX_WAIT 0
-#endif
-
-#ifdef FUTEX_WAKE
-#define __TBB_FUTEX_WAKE FUTEX_WAKE
-#else
-#define __TBB_FUTEX_WAKE 1
-#endif
-
-#ifndef __TBB_ASSERT
-#error machine specific headers must be included after tbb_stddef.h
-#endif
-
-namespace tbb {
-
-namespace internal {
-
-inline int futex_wait( void *futex, int comparand ) {
-    int r = syscall( SYS_futex,futex,__TBB_FUTEX_WAIT,comparand,NULL,NULL,0 );
-#if TBB_USE_ASSERT
-    int e = errno;
-    __TBB_ASSERT( r==0||r==EWOULDBLOCK||(r==-1&&(e==EAGAIN||e==EINTR)), "futex_wait failed." );
-#endif /* TBB_USE_ASSERT */
-    return r;
-}
-
-inline int futex_wakeup_one( void *futex ) {
-    int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,1,NULL,NULL,0 );
-    __TBB_ASSERT( r==0||r==1, "futex_wakeup_one: more than one thread woken up?" );
-    return r;
-}
-
-inline int futex_wakeup_all( void *futex ) {
-    int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,INT_MAX,NULL,NULL,0 );
-    __TBB_ASSERT( r>=0, "futex_wakeup_all: error in waking up threads" );
-    return r;
-}
-
-} /* namespace internal */
-
-} /* namespace tbb */
-
-#endif /* SYS_futex */
diff --git a/lib/3rdParty/tbb/include/tbb/machine/linux_ia32.h b/lib/3rdParty/tbb/include/tbb/machine/linux_ia32.h
deleted file mode 100644
index 932d343d..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/linux_ia32.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia32_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#define __TBB_machine_linux_ia32_H
-
-#include <stdint.h>
-#include "gcc_ia32_common.h"
-
-#define __TBB_WORDSIZE 4
-#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
-
-#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
-#define __TBB_control_consistency_helper() __TBB_compiler_fence()
-#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
-#define __TBB_release_consistency_helper() __TBB_compiler_fence()
-#define __TBB_full_memory_fence()          __asm__ __volatile__("mfence": : :"memory")
-
-#if __TBB_ICC_ASM_VOLATILE_BROKEN
-#define __TBB_VOLATILE
-#else
-#define __TBB_VOLATILE volatile
-#endif
-
-#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,X,R)                                        \
-static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T comparand )  \
-{                                                                                    \
-    T result;                                                                        \
-                                                                                     \
-    __asm__ __volatile__("lock\ncmpxchg" X " %2,%1"                                  \
-                          : "=a"(result), "=m"(*(__TBB_VOLATILE T*)ptr)              \
-                          : "q"(value), "0"(comparand), "m"(*(__TBB_VOLATILE T*)ptr) \
-                          : "memory");                                               \
-    return result;                                                                   \
-}                                                                                    \
-                                                                                     \
-static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend)              \
-{                                                                                    \
-    T result;                                                                        \
-    __asm__ __volatile__("lock\nxadd" X " %0,%1"                                     \
-                          : R (result), "=m"(*(__TBB_VOLATILE T*)ptr)                \
-                          : "0"(addend), "m"(*(__TBB_VOLATILE T*)ptr)                \
-                          : "memory");                                               \
-    return result;                                                                   \
-}                                                                                    \
-                                                                                     \
-static inline  T __TBB_machine_fetchstore##S(volatile void *ptr, T value)            \
-{                                                                                    \
-    T result;                                                                        \
-    __asm__ __volatile__("lock\nxchg" X " %0,%1"                                     \
-                          : R (result), "=m"(*(__TBB_VOLATILE T*)ptr)                \
-                          : "0"(value), "m"(*(__TBB_VOLATILE T*)ptr)                 \
-                          : "memory");                                               \
-    return result;                                                                   \
-}                                                                                    \
-
-__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"","=q")
-__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"","=r")
-__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"l","=r")
-
-#if __INTEL_COMPILER
-#pragma warning( push )
-// reference to EBX in a function requiring stack alignment
-#pragma warning( disable: 998 )
-#endif
-
-#if __TBB_GCC_CAS8_BUILTIN_INLINING_BROKEN
-#define  __TBB_IA32_CAS8_NOINLINE  __attribute__ ((noinline))
-#else
-#define  __TBB_IA32_CAS8_NOINLINE
-#endif
-
-static inline __TBB_IA32_CAS8_NOINLINE int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )  {
-//TODO: remove the extra part of condition once __TBB_GCC_BUILTIN_ATOMICS_PRESENT is lowered to gcc version 4.1.2
-#if (__TBB_GCC_BUILTIN_ATOMICS_PRESENT || (__TBB_GCC_VERSION >= 40102)) && !__TBB_GCC_64BIT_ATOMIC_BUILTINS_BROKEN
-    return __sync_val_compare_and_swap( reinterpret_cast<volatile int64_t*>(ptr), comparand, value );
-#else /* !__TBB_GCC_BUILTIN_ATOMICS_PRESENT */
-    //TODO: look like ICC 13.0 has some issues with this code, investigate it more deeply
-    int64_t result;
-    union {
-        int64_t i64;
-        int32_t i32[2];
-    };
-    i64 = value;
-#if __PIC__
-    /* compiling position-independent code */
-    // EBX register preserved for compliance with position-independent code rules on IA32
-    int32_t tmp;
-    __asm__ __volatile__ (
-            "movl  %%ebx,%2\n\t"
-            "movl  %5,%%ebx\n\t"
-#if __GNUC__==3
-            "lock\n\t cmpxchg8b %1\n\t"
-#else
-            "lock\n\t cmpxchg8b (%3)\n\t"
-#endif
-            "movl  %2,%%ebx"
-             : "=A"(result)
-             , "=m"(*(__TBB_VOLATILE int64_t *)ptr)
-             , "=m"(tmp)
-#if __GNUC__==3
-             : "m"(*(__TBB_VOLATILE int64_t *)ptr)
-#else
-             : "SD"(ptr)
-#endif
-             , "0"(comparand)
-             , "m"(i32[0]), "c"(i32[1])
-             : "memory"
-#if __INTEL_COMPILER
-             ,"ebx"
-#endif
-    );
-#else /* !__PIC__ */
-    __asm__ __volatile__ (
-            "lock\n\t cmpxchg8b %1\n\t"
-             : "=A"(result), "=m"(*(__TBB_VOLATILE int64_t *)ptr)
-             : "m"(*(__TBB_VOLATILE int64_t *)ptr)
-             , "0"(comparand)
-             , "b"(i32[0]), "c"(i32[1])
-             : "memory"
-    );
-#endif /* __PIC__ */
-    return result;
-#endif /* !__TBB_GCC_BUILTIN_ATOMICS_PRESENT */
-}
-
-#undef __TBB_IA32_CAS8_NOINLINE
-
-#if __INTEL_COMPILER
-#pragma warning( pop )
-#endif // warning 998 is back
-
-static inline void __TBB_machine_or( volatile void *ptr, uint32_t addend ) {
-    __asm__ __volatile__("lock\norl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory");
-}
-
-static inline void __TBB_machine_and( volatile void *ptr, uint32_t addend ) {
-    __asm__ __volatile__("lock\nandl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory");
-}
-
-//TODO: Check if it possible and profitable for IA-32 architecture on (Linux* and Windows*)
-//to use of 64-bit load/store via floating point registers together with full fence
-//for sequentially consistent load/store, instead of CAS.
-
-#if __clang__
-#define __TBB_fildq  "fildll"
-#define __TBB_fistpq "fistpll"
-#else
-#define __TBB_fildq  "fildq"
-#define __TBB_fistpq "fistpq"
-#endif
-
-static inline int64_t __TBB_machine_aligned_load8 (const volatile void *ptr) {
-    __TBB_ASSERT(tbb::internal::is_aligned(ptr,8),"__TBB_machine_aligned_load8 should be used with 8 byte aligned locations only \n");
-    int64_t result;
-    __asm__ __volatile__ ( __TBB_fildq  " %1\n\t"
-                           __TBB_fistpq " %0" :  "=m"(result) : "m"(*(const __TBB_VOLATILE uint64_t*)ptr) : "memory" );
-    return result;
-}
-
-static inline void __TBB_machine_aligned_store8 (volatile void *ptr, int64_t value ) {
-    __TBB_ASSERT(tbb::internal::is_aligned(ptr,8),"__TBB_machine_aligned_store8 should be used with 8 byte aligned locations only \n");
-    // Aligned store
-    __asm__ __volatile__ ( __TBB_fildq  " %1\n\t"
-                           __TBB_fistpq " %0" :  "=m"(*(__TBB_VOLATILE int64_t*)ptr) : "m"(value) : "memory" );
-}
-
-static inline int64_t __TBB_machine_load8 (const volatile void *ptr) {
-#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
-    if( tbb::internal::is_aligned(ptr,8)) {
-#endif
-        return __TBB_machine_aligned_load8(ptr);
-#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
-    } else {
-        // Unaligned load
-        return __TBB_machine_cmpswp8(const_cast<void*>(ptr),0,0);
-    }
-#endif
-}
-
-//! Handles misaligned 8-byte store
-/** Defined in tbb_misc.cpp */
-extern "C" void __TBB_machine_store8_slow( volatile void *ptr, int64_t value );
-extern "C" void __TBB_machine_store8_slow_perf_warning( volatile void *ptr );
-
-static inline void __TBB_machine_store8(volatile void *ptr, int64_t value) {
-#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
-    if( tbb::internal::is_aligned(ptr,8)) {
-#endif
-        __TBB_machine_aligned_store8(ptr,value);
-#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
-    } else {
-        // Unaligned store
-#if TBB_USE_PERFORMANCE_WARNINGS
-        __TBB_machine_store8_slow_perf_warning(ptr);
-#endif /* TBB_USE_PERFORMANCE_WARNINGS */
-        __TBB_machine_store8_slow(ptr,value);
-    }
-#endif
-}
-
-// Machine specific atomic operations
-#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
-#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
-
-#define __TBB_USE_GENERIC_DWORD_FETCH_ADD                   1
-#define __TBB_USE_GENERIC_DWORD_FETCH_STORE                 1
-#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE           1
-#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
-#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
-#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
-
diff --git a/lib/3rdParty/tbb/include/tbb/machine/linux_ia64.h b/lib/3rdParty/tbb/include/tbb/machine/linux_ia64.h
deleted file mode 100644
index f4772288..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/linux_ia64.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia64_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#define __TBB_machine_linux_ia64_H
-
-#include <stdint.h>
-#include <ia64intrin.h>
-
-#define __TBB_WORDSIZE 8
-#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
-
-#if __INTEL_COMPILER
-    #define __TBB_compiler_fence()
-    #define __TBB_control_consistency_helper() __TBB_compiler_fence()
-    #define __TBB_acquire_consistency_helper()
-    #define __TBB_release_consistency_helper()
-    #define __TBB_full_memory_fence()          __mf()
-#else
-    #define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
-    #define __TBB_control_consistency_helper() __TBB_compiler_fence()
-    // Even though GCC imbues volatile loads with acquire semantics, it sometimes moves
-    // loads over the acquire fence. The following helpers stop such incorrect code motion.
-    #define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
-    #define __TBB_release_consistency_helper() __TBB_compiler_fence()
-    #define __TBB_full_memory_fence()          __asm__ __volatile__("mf": : :"memory")
-#endif /* !__INTEL_COMPILER */
-
-// Most of the functions will be in a .s file
-// TODO: revise dynamic_link, memory pools and etc. if the library dependency is removed.
-
-extern "C" {
-    int8_t __TBB_machine_fetchadd1__TBB_full_fence (volatile void *ptr, int8_t addend);
-    int8_t __TBB_machine_fetchadd1acquire(volatile void *ptr, int8_t addend);
-    int8_t __TBB_machine_fetchadd1release(volatile void *ptr, int8_t addend);
-
-    int16_t __TBB_machine_fetchadd2__TBB_full_fence (volatile void *ptr, int16_t addend);
-    int16_t __TBB_machine_fetchadd2acquire(volatile void *ptr, int16_t addend);
-    int16_t __TBB_machine_fetchadd2release(volatile void *ptr, int16_t addend);
-
-    int32_t __TBB_machine_fetchadd4__TBB_full_fence (volatile void *ptr, int32_t value);
-    int32_t __TBB_machine_fetchadd4acquire(volatile void *ptr, int32_t addend);
-    int32_t __TBB_machine_fetchadd4release(volatile void *ptr, int32_t addend);
-
-    int64_t __TBB_machine_fetchadd8__TBB_full_fence (volatile void *ptr, int64_t value);
-    int64_t __TBB_machine_fetchadd8acquire(volatile void *ptr, int64_t addend);
-    int64_t __TBB_machine_fetchadd8release(volatile void *ptr, int64_t addend);
-
-    int8_t __TBB_machine_fetchstore1__TBB_full_fence (volatile void *ptr, int8_t value);
-    int8_t __TBB_machine_fetchstore1acquire(volatile void *ptr, int8_t value);
-    int8_t __TBB_machine_fetchstore1release(volatile void *ptr, int8_t value);
-
-    int16_t __TBB_machine_fetchstore2__TBB_full_fence (volatile void *ptr, int16_t value);
-    int16_t __TBB_machine_fetchstore2acquire(volatile void *ptr, int16_t value);
-    int16_t __TBB_machine_fetchstore2release(volatile void *ptr, int16_t value);
-
-    int32_t __TBB_machine_fetchstore4__TBB_full_fence (volatile void *ptr, int32_t value);
-    int32_t __TBB_machine_fetchstore4acquire(volatile void *ptr, int32_t value);
-    int32_t __TBB_machine_fetchstore4release(volatile void *ptr, int32_t value);
-
-    int64_t __TBB_machine_fetchstore8__TBB_full_fence (volatile void *ptr, int64_t value);
-    int64_t __TBB_machine_fetchstore8acquire(volatile void *ptr, int64_t value);
-    int64_t __TBB_machine_fetchstore8release(volatile void *ptr, int64_t value);
-
-    int8_t __TBB_machine_cmpswp1__TBB_full_fence (volatile void *ptr, int8_t value, int8_t comparand);
-    int8_t __TBB_machine_cmpswp1acquire(volatile void *ptr, int8_t value, int8_t comparand);
-    int8_t __TBB_machine_cmpswp1release(volatile void *ptr, int8_t value, int8_t comparand);
-
-    int16_t __TBB_machine_cmpswp2__TBB_full_fence (volatile void *ptr, int16_t value, int16_t comparand);
-    int16_t __TBB_machine_cmpswp2acquire(volatile void *ptr, int16_t value, int16_t comparand);
-    int16_t __TBB_machine_cmpswp2release(volatile void *ptr, int16_t value, int16_t comparand);
-
-    int32_t __TBB_machine_cmpswp4__TBB_full_fence (volatile void *ptr, int32_t value, int32_t comparand);
-    int32_t __TBB_machine_cmpswp4acquire(volatile void *ptr, int32_t value, int32_t comparand);
-    int32_t __TBB_machine_cmpswp4release(volatile void *ptr, int32_t value, int32_t comparand);
-
-    int64_t __TBB_machine_cmpswp8__TBB_full_fence (volatile void *ptr, int64_t value, int64_t comparand);
-    int64_t __TBB_machine_cmpswp8acquire(volatile void *ptr, int64_t value, int64_t comparand);
-    int64_t __TBB_machine_cmpswp8release(volatile void *ptr, int64_t value, int64_t comparand);
-
-    int64_t __TBB_machine_lg(uint64_t value);
-    void __TBB_machine_pause(int32_t delay);
-    bool __TBB_machine_trylockbyte( volatile unsigned char &ptr );
-    int64_t __TBB_machine_lockbyte( volatile unsigned char &ptr );
-
-    //! Retrieves the current RSE backing store pointer. IA64 specific.
-    void* __TBB_get_bsp();
-
-    int32_t __TBB_machine_load1_relaxed(const void *ptr);
-    int32_t __TBB_machine_load2_relaxed(const void *ptr);
-    int32_t __TBB_machine_load4_relaxed(const void *ptr);
-    int64_t __TBB_machine_load8_relaxed(const void *ptr);
-
-    void __TBB_machine_store1_relaxed(void *ptr, int32_t value);
-    void __TBB_machine_store2_relaxed(void *ptr, int32_t value);
-    void __TBB_machine_store4_relaxed(void *ptr, int32_t value);
-    void __TBB_machine_store8_relaxed(void *ptr, int64_t value);
-} // extern "C"
-
-// Mapping old entry points to the names corresponding to the new full_fence identifier.
-#define __TBB_machine_fetchadd1full_fence   __TBB_machine_fetchadd1__TBB_full_fence
-#define __TBB_machine_fetchadd2full_fence   __TBB_machine_fetchadd2__TBB_full_fence
-#define __TBB_machine_fetchadd4full_fence   __TBB_machine_fetchadd4__TBB_full_fence
-#define __TBB_machine_fetchadd8full_fence   __TBB_machine_fetchadd8__TBB_full_fence
-#define __TBB_machine_fetchstore1full_fence __TBB_machine_fetchstore1__TBB_full_fence
-#define __TBB_machine_fetchstore2full_fence __TBB_machine_fetchstore2__TBB_full_fence
-#define __TBB_machine_fetchstore4full_fence __TBB_machine_fetchstore4__TBB_full_fence
-#define __TBB_machine_fetchstore8full_fence __TBB_machine_fetchstore8__TBB_full_fence
-#define __TBB_machine_cmpswp1full_fence     __TBB_machine_cmpswp1__TBB_full_fence
-#define __TBB_machine_cmpswp2full_fence     __TBB_machine_cmpswp2__TBB_full_fence
-#define __TBB_machine_cmpswp4full_fence     __TBB_machine_cmpswp4__TBB_full_fence
-#define __TBB_machine_cmpswp8full_fence     __TBB_machine_cmpswp8__TBB_full_fence
-
-// Mapping relaxed operations to the entry points implementing them.
-/** On IA64 RMW operations implicitly have acquire semantics. Thus one cannot
-    actually have completely relaxed RMW operation here. **/
-#define __TBB_machine_fetchadd1relaxed      __TBB_machine_fetchadd1acquire
-#define __TBB_machine_fetchadd2relaxed      __TBB_machine_fetchadd2acquire
-#define __TBB_machine_fetchadd4relaxed      __TBB_machine_fetchadd4acquire
-#define __TBB_machine_fetchadd8relaxed      __TBB_machine_fetchadd8acquire
-#define __TBB_machine_fetchstore1relaxed    __TBB_machine_fetchstore1acquire
-#define __TBB_machine_fetchstore2relaxed    __TBB_machine_fetchstore2acquire
-#define __TBB_machine_fetchstore4relaxed    __TBB_machine_fetchstore4acquire
-#define __TBB_machine_fetchstore8relaxed    __TBB_machine_fetchstore8acquire
-#define __TBB_machine_cmpswp1relaxed        __TBB_machine_cmpswp1acquire
-#define __TBB_machine_cmpswp2relaxed        __TBB_machine_cmpswp2acquire
-#define __TBB_machine_cmpswp4relaxed        __TBB_machine_cmpswp4acquire
-#define __TBB_machine_cmpswp8relaxed        __TBB_machine_cmpswp8acquire
-
-#define __TBB_MACHINE_DEFINE_ATOMICS(S,V)                               \
-    template <typename T>                                               \
-    struct machine_load_store_relaxed<T,S> {                      \
-        static inline T load ( const T& location ) {                    \
-            return (T)__TBB_machine_load##S##_relaxed(&location);       \
-        }                                                               \
-        static inline void store ( T& location, T value ) {             \
-            __TBB_machine_store##S##_relaxed(&location, (V)value);      \
-        }                                                               \
-    }
-
-namespace tbb {
-namespace internal {
-    __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t);
-    __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t);
-    __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t);
-    __TBB_MACHINE_DEFINE_ATOMICS(8,int64_t);
-}} // namespaces internal, tbb
-
-#undef __TBB_MACHINE_DEFINE_ATOMICS
-
-#define __TBB_USE_FENCED_ATOMICS                            1
-#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
-#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
-
-// Definition of Lock functions
-#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
-#define __TBB_LockByte(P)    __TBB_machine_lockbyte(P)
-
-// Definition of other utility functions
-#define __TBB_Pause(V) __TBB_machine_pause(V)
-#define __TBB_Log2(V)  __TBB_machine_lg(V)
diff --git a/lib/3rdParty/tbb/include/tbb/machine/linux_intel64.h b/lib/3rdParty/tbb/include/tbb/machine/linux_intel64.h
deleted file mode 100644
index 02153c2a..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/linux_intel64.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_intel64_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#define __TBB_machine_linux_intel64_H
-
-#include <stdint.h>
-#include "gcc_ia32_common.h"
-
-#define __TBB_WORDSIZE 8
-#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
-
-#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
-#define __TBB_control_consistency_helper() __TBB_compiler_fence()
-#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
-#define __TBB_release_consistency_helper() __TBB_compiler_fence()
-
-#ifndef __TBB_full_memory_fence
-#define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory")
-#endif
-
-#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,X)                                          \
-static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T comparand )  \
-{                                                                                    \
-    T result;                                                                        \
-                                                                                     \
-    __asm__ __volatile__("lock\ncmpxchg" X " %2,%1"                                  \
-                          : "=a"(result), "=m"(*(volatile T*)ptr)                    \
-                          : "q"(value), "0"(comparand), "m"(*(volatile T*)ptr)       \
-                          : "memory");                                               \
-    return result;                                                                   \
-}                                                                                    \
-                                                                                     \
-static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend)              \
-{                                                                                    \
-    T result;                                                                        \
-    __asm__ __volatile__("lock\nxadd" X " %0,%1"                                     \
-                          : "=r"(result),"=m"(*(volatile T*)ptr)                     \
-                          : "0"(addend), "m"(*(volatile T*)ptr)                      \
-                          : "memory");                                               \
-    return result;                                                                   \
-}                                                                                    \
-                                                                                     \
-static inline  T __TBB_machine_fetchstore##S(volatile void *ptr, T value)            \
-{                                                                                    \
-    T result;                                                                        \
-    __asm__ __volatile__("lock\nxchg" X " %0,%1"                                     \
-                          : "=r"(result),"=m"(*(volatile T*)ptr)                     \
-                          : "0"(value), "m"(*(volatile T*)ptr)                       \
-                          : "memory");                                               \
-    return result;                                                                   \
-}                                                                                    \
-
-__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"")
-__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"")
-__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"")
-__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t,"q")
-
-#undef __TBB_MACHINE_DEFINE_ATOMICS
-
-static inline void __TBB_machine_or( volatile void *ptr, uint64_t value ) {
-    __asm__ __volatile__("lock\norq %1,%0" : "=m"(*(volatile uint64_t*)ptr) : "r"(value), "m"(*(volatile uint64_t*)ptr) : "memory");
-}
-
-static inline void __TBB_machine_and( volatile void *ptr, uint64_t value ) {
-    __asm__ __volatile__("lock\nandq %1,%0" : "=m"(*(volatile uint64_t*)ptr) : "r"(value), "m"(*(volatile uint64_t*)ptr) : "memory");
-}
-
-#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
-#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
-
-#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE           1
-#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
-#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
-#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
-
diff --git a/lib/3rdParty/tbb/include/tbb/machine/mac_ppc.h b/lib/3rdParty/tbb/include/tbb/machine/mac_ppc.h
deleted file mode 100644
index 13f387b4..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/mac_ppc.h
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_power_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#define __TBB_machine_gcc_power_H
-
-#include <stdint.h>
-#include <unistd.h>
-
-// TODO: rename to gcc_power.h?
-// This file is for Power Architecture with compilers supporting GNU inline-assembler syntax (currently GNU g++ and IBM XL).
-// Note that XL V9.0 (sometimes?) has trouble dealing with empty input and/or clobber lists, so they should be avoided.
-
-#if __powerpc64__ || __ppc64__
-    // IBM XL documents __powerpc64__ (and __PPC64__).
-    // Apple documents __ppc64__ (with __ppc__ only on 32-bit).
-    #define __TBB_WORDSIZE 8
-#else
-    #define __TBB_WORDSIZE 4
-#endif
-
-// Traditionally Power Architecture is big-endian.
-// Little-endian could be just an address manipulation (compatibility with TBB not verified),
-// or normal little-endian (on more recent systems). Embedded PowerPC systems may support
-// page-specific endianness, but then one endianness must be hidden from TBB so that it still sees only one.
-#if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
-    #define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
-#elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
-    #define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
-#elif defined(__BYTE_ORDER__)
-    #define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
-#else
-    #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
-#endif
-
-// On Power Architecture, (lock-free) 64-bit atomics require 64-bit hardware:
-#if __TBB_WORDSIZE==8
-    // Do not change the following definition, because TBB itself will use 64-bit atomics in 64-bit builds.
-    #define __TBB_64BIT_ATOMICS 1
-#elif __bgp__
-    // Do not change the following definition, because this is known 32-bit hardware.
-    #define __TBB_64BIT_ATOMICS 0
-#else
-    // To enable 64-bit atomics in 32-bit builds, set the value below to 1 instead of 0.
-    // You must make certain that the program will only use them on actual 64-bit hardware
-    // (which typically means that the entire program is only executed on such hardware),
-    // because their implementation involves machine instructions that are illegal elsewhere.
-    // The setting can be chosen independently per compilation unit,
-    // which also means that TBB itself does not need to be rebuilt.
-    // Alternatively (but only for the current architecture and TBB version),
-    // override the default as a predefined macro when invoking the compiler.
-    #ifndef __TBB_64BIT_ATOMICS
-    #define __TBB_64BIT_ATOMICS 0
-    #endif
-#endif
-
-inline int32_t __TBB_machine_cmpswp4 (volatile void *ptr, int32_t value, int32_t comparand )
-{
-    int32_t result;
-
-    __asm__ __volatile__("sync\n"
-                         "0:\n\t"
-                         "lwarx %[res],0,%[ptr]\n\t"     /* load w/ reservation */
-                         "cmpw %[res],%[cmp]\n\t"        /* compare against comparand */
-                         "bne- 1f\n\t"                   /* exit if not same */
-                         "stwcx. %[val],0,%[ptr]\n\t"    /* store new value */
-                         "bne- 0b\n"                     /* retry if reservation lost */
-                         "1:\n\t"                        /* the exit */
-                         "isync"
-                         : [res]"=&r"(result)
-                         , "+m"(* (int32_t*) ptr)        /* redundant with "memory" */
-                         : [ptr]"r"(ptr)
-                         , [val]"r"(value)
-                         , [cmp]"r"(comparand)
-                         : "memory"                      /* compiler full fence */
-                         , "cr0"                         /* clobbered by cmp and/or stwcx. */
-                         );
-    return result;
-}
-
-#if __TBB_WORDSIZE==8
-
-inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
-{
-    int64_t result;
-    __asm__ __volatile__("sync\n"
-                         "0:\n\t"
-                         "ldarx %[res],0,%[ptr]\n\t"     /* load w/ reservation */
-                         "cmpd %[res],%[cmp]\n\t"        /* compare against comparand */
-                         "bne- 1f\n\t"                   /* exit if not same */
-                         "stdcx. %[val],0,%[ptr]\n\t"    /* store new value */
-                         "bne- 0b\n"                     /* retry if reservation lost */
-                         "1:\n\t"                        /* the exit */
-                         "isync"
-                         : [res]"=&r"(result)
-                         , "+m"(* (int64_t*) ptr)        /* redundant with "memory" */
-                         : [ptr]"r"(ptr)
-                         , [val]"r"(value)
-                         , [cmp]"r"(comparand)
-                         : "memory"                      /* compiler full fence */
-                         , "cr0"                         /* clobbered by cmp and/or stdcx. */
-                         );
-    return result;
-}
-
-#elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
-
-inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
-{
-    int64_t result;
-    int64_t value_register, comparand_register, result_register; // dummy variables to allocate registers
-    __asm__ __volatile__("sync\n\t"
-                         "ld %[val],%[valm]\n\t"
-                         "ld %[cmp],%[cmpm]\n"
-                         "0:\n\t"
-                         "ldarx %[res],0,%[ptr]\n\t"     /* load w/ reservation */
-                         "cmpd %[res],%[cmp]\n\t"        /* compare against comparand */
-                         "bne- 1f\n\t"                   /* exit if not same */
-                         "stdcx. %[val],0,%[ptr]\n\t"    /* store new value */
-                         "bne- 0b\n"                     /* retry if reservation lost */
-                         "1:\n\t"                        /* the exit */
-                         "std %[res],%[resm]\n\t"
-                         "isync"
-                         : [resm]"=m"(result)
-                         , [res] "=&r"(   result_register)
-                         , [val] "=&r"(    value_register)
-                         , [cmp] "=&r"(comparand_register)
-                         , "+m"(* (int64_t*) ptr)        /* redundant with "memory" */
-                         : [ptr] "r"(ptr)
-                         , [valm]"m"(value)
-                         , [cmpm]"m"(comparand)
-                         : "memory"                      /* compiler full fence */
-                         , "cr0"                         /* clobbered by cmpd and/or stdcx. */
-                         );
-    return result;
-}
-
-#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
-
-#define __TBB_MACHINE_DEFINE_LOAD_STORE(S,ldx,stx,cmpx)                                                       \
-    template <typename T>                                                                                     \
-    struct machine_load_store<T,S> {                                                                          \
-        static inline T load_with_acquire(const volatile T& location) {                                       \
-            T result;                                                                                         \
-            __asm__ __volatile__(ldx " %[res],0(%[ptr])\n"                                                    \
-                                 "0:\n\t"                                                                     \
-                                 cmpx " %[res],%[res]\n\t"                                                    \
-                                 "bne- 0b\n\t"                                                                \
-                                 "isync"                                                                      \
-                                 : [res]"=r"(result)                                                          \
-                                 : [ptr]"b"(&location) /* cannot use register 0 here */                       \
-                                 , "m"(location)       /* redundant with "memory" */                          \
-                                 : "memory"            /* compiler acquire fence */                           \
-                                 , "cr0"               /* clobbered by cmpw/cmpd */);                         \
-            return result;                                                                                    \
-        }                                                                                                     \
-        static inline void store_with_release(volatile T &location, T value) {                                \
-            __asm__ __volatile__("lwsync\n\t"                                                                 \
-                                 stx " %[val],0(%[ptr])"                                                      \
-                                 : "=m"(location)      /* redundant with "memory" */                          \
-                                 : [ptr]"b"(&location) /* cannot use register 0 here */                       \
-                                 , [val]"r"(value)                                                            \
-                                 : "memory"/*compiler release fence*/ /*(cr0 not affected)*/);                \
-        }                                                                                                     \
-    };                                                                                                        \
-                                                                                                              \
-    template <typename T>                                                                                     \
-    struct machine_load_store_relaxed<T,S> {                                                                  \
-        static inline T load (const __TBB_atomic T& location) {                                               \
-            T result;                                                                                         \
-            __asm__ __volatile__(ldx " %[res],0(%[ptr])"                                                      \
-                                 : [res]"=r"(result)                                                          \
-                                 : [ptr]"b"(&location) /* cannot use register 0 here */                       \
-                                 , "m"(location)                                                              \
-                                 ); /*(no compiler fence)*/ /*(cr0 not affected)*/                            \
-            return result;                                                                                    \
-        }                                                                                                     \
-        static inline void store (__TBB_atomic T &location, T value) {                                        \
-            __asm__ __volatile__(stx " %[val],0(%[ptr])"                                                      \
-                                 : "=m"(location)                                                             \
-                                 : [ptr]"b"(&location) /* cannot use register 0 here */                       \
-                                 , [val]"r"(value)                                                            \
-                                 ); /*(no compiler fence)*/ /*(cr0 not affected)*/                            \
-        }                                                                                                     \
-    };
-
-namespace tbb {
-namespace internal {
-    __TBB_MACHINE_DEFINE_LOAD_STORE(1,"lbz","stb","cmpw")
-    __TBB_MACHINE_DEFINE_LOAD_STORE(2,"lhz","sth","cmpw")
-    __TBB_MACHINE_DEFINE_LOAD_STORE(4,"lwz","stw","cmpw")
-
-#if __TBB_WORDSIZE==8
-
-    __TBB_MACHINE_DEFINE_LOAD_STORE(8,"ld" ,"std","cmpd")
-
-#elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
-
-    template <typename T>
-    struct machine_load_store<T,8> {
-        static inline T load_with_acquire(const volatile T& location) {
-            T result;
-            T result_register; // dummy variable to allocate a register
-            __asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
-                                 "std %[res],%[resm]\n"
-                                 "0:\n\t"
-                                 "cmpd %[res],%[res]\n\t"
-                                 "bne- 0b\n\t"
-                                 "isync"
-                                 : [resm]"=m"(result)
-                                 , [res]"=&r"(result_register)
-                                 : [ptr]"b"(&location) /* cannot use register 0 here */
-                                 , "m"(location)       /* redundant with "memory" */
-                                 : "memory"            /* compiler acquire fence */
-                                 , "cr0"               /* clobbered by cmpd */);
-            return result;
-        }
-
-        static inline void store_with_release(volatile T &location, T value) {
-            T value_register; // dummy variable to allocate a register
-            __asm__ __volatile__("lwsync\n\t"
-                                 "ld %[val],%[valm]\n\t"
-                                 "std %[val],0(%[ptr])"
-                                 : "=m"(location)      /* redundant with "memory" */
-                                 , [val]"=&r"(value_register)
-                                 : [ptr]"b"(&location) /* cannot use register 0 here */
-                                 , [valm]"m"(value)
-                                 : "memory"/*compiler release fence*/ /*(cr0 not affected)*/);
-        }
-    };
-
-    struct machine_load_store_relaxed<T,8> {
-        static inline T load (const volatile T& location) {
-            T result;
-            T result_register; // dummy variable to allocate a register
-            __asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
-                                 "std %[res],%[resm]"
-                                 : [resm]"=m"(result)
-                                 , [res]"=&r"(result_register)
-                                 : [ptr]"b"(&location) /* cannot use register 0 here */
-                                 , "m"(location)
-                                 ); /*(no compiler fence)*/ /*(cr0 not affected)*/
-            return result;
-        }
-
-        static inline void store (volatile T &location, T value) {
-            T value_register; // dummy variable to allocate a register
-            __asm__ __volatile__("ld %[val],%[valm]\n\t"
-                                 "std %[val],0(%[ptr])"
-                                 : "=m"(location)
-                                 , [val]"=&r"(value_register)
-                                 : [ptr]"b"(&location) /* cannot use register 0 here */
-                                 , [valm]"m"(value)
-                                 ); /*(no compiler fence)*/ /*(cr0 not affected)*/
-        }
-    };
-    #define __TBB_machine_load_store_relaxed_8
-
-#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
-
-}} // namespaces internal, tbb
-
-#undef __TBB_MACHINE_DEFINE_LOAD_STORE
-
-#define __TBB_USE_GENERIC_PART_WORD_CAS                     1
-#define __TBB_USE_GENERIC_FETCH_ADD                         1
-#define __TBB_USE_GENERIC_FETCH_STORE                       1
-#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
-
-#define __TBB_control_consistency_helper() __asm__ __volatile__("isync": : :"memory")
-#define __TBB_full_memory_fence()          __asm__ __volatile__( "sync": : :"memory")
-
-static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
-    __TBB_ASSERT(x, "__TBB_Log2(0) undefined");
-    // cntlzd/cntlzw starts counting at 2^63/2^31 (ignoring any higher-order bits), and does not affect cr0
-#if __TBB_WORDSIZE==8
-    __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x));
-    return 63-static_cast<intptr_t>(x);
-#else
-    __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x));
-    return 31-static_cast<intptr_t>(x);
-#endif
-}
-#define __TBB_Log2(V) __TBB_machine_lg(V)
-
-// Assumes implicit alignment for any 32-bit value
-typedef uint32_t __TBB_Flag;
-#define __TBB_Flag __TBB_Flag
-
-inline bool __TBB_machine_trylockbyte( __TBB_atomic __TBB_Flag &flag ) {
-    return __TBB_machine_cmpswp4(&flag,1,0)==0;
-}
-#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
diff --git a/lib/3rdParty/tbb/include/tbb/machine/macos_common.h b/lib/3rdParty/tbb/include/tbb/machine/macos_common.h
deleted file mode 100644
index 119ad979..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/macos_common.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#if !defined(__TBB_machine_H) || defined(__TBB_machine_macos_common_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#define __TBB_machine_macos_common_H
-
-#include <sched.h>
-#define __TBB_Yield()  sched_yield()
-
-// __TBB_HardwareConcurrency
-
-#include <sys/types.h>
-#include <sys/sysctl.h>
-
-static inline int __TBB_macos_available_cpu() {
-    int name[2] = {CTL_HW, HW_AVAILCPU};
-    int ncpu;
-    size_t size = sizeof(ncpu);
-    sysctl( name, 2, &ncpu, &size, NULL, 0 );
-    return ncpu;
-}
-
-#define __TBB_HardwareConcurrency() __TBB_macos_available_cpu()
-
-#ifndef __TBB_full_memory_fence
-    // TBB has not recognized the architecture (none of the architecture abstraction
-    // headers was included).
-    #define __TBB_UnknownArchitecture 1
-#endif
-
-#if __TBB_UnknownArchitecture
-// Implementation of atomic operations based on OS provided primitives
-#include <libkern/OSAtomic.h>
-
-static inline int64_t __TBB_machine_cmpswp8_OsX(volatile void *ptr, int64_t value, int64_t comparand)
-{
-    __TBB_ASSERT( tbb::internal::is_aligned(ptr,8), "address not properly aligned for macOS* atomics");
-    int64_t* address = (int64_t*)ptr;
-    while( !OSAtomicCompareAndSwap64Barrier(comparand, value, address) ){
-#if __TBB_WORDSIZE==8
-        int64_t snapshot = *address;
-#else
-        int64_t snapshot = OSAtomicAdd64( 0, address );
-#endif
-        if( snapshot!=comparand ) return snapshot;
-    }
-    return comparand;
-}
-
-#define __TBB_machine_cmpswp8 __TBB_machine_cmpswp8_OsX
-
-#endif /* __TBB_UnknownArchitecture */
-
-#if __TBB_UnknownArchitecture
-
-#ifndef __TBB_WORDSIZE
-#define __TBB_WORDSIZE __SIZEOF_POINTER__
-#endif
-
-#ifdef __TBB_ENDIANNESS
-    // Already determined based on hardware architecture.
-#elif __BIG_ENDIAN__
-    #define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
-#elif __LITTLE_ENDIAN__
-    #define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
-#else
-    #define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
-#endif
-
-/** As this generic implementation has absolutely no information about underlying
-    hardware, its performance most likely will be sub-optimal because of full memory
-    fence usages where a more lightweight synchronization means (or none at all)
-    could suffice. Thus if you use this header to enable TBB on a new platform,
-    consider forking it and relaxing below helpers as appropriate. **/
-#define __TBB_control_consistency_helper() OSMemoryBarrier()
-#define __TBB_acquire_consistency_helper() OSMemoryBarrier()
-#define __TBB_release_consistency_helper() OSMemoryBarrier()
-#define __TBB_full_memory_fence()          OSMemoryBarrier()
-
-static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand)
-{
-    __TBB_ASSERT( tbb::internal::is_aligned(ptr,4), "address not properly aligned for macOS atomics");
-    int32_t* address = (int32_t*)ptr;
-    while( !OSAtomicCompareAndSwap32Barrier(comparand, value, address) ){
-        int32_t snapshot = *address;
-        if( snapshot!=comparand ) return snapshot;
-    }
-    return comparand;
-}
-
-static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t addend)
-{
-    __TBB_ASSERT( tbb::internal::is_aligned(ptr,4), "address not properly aligned for macOS atomics");
-    return OSAtomicAdd32Barrier(addend, (int32_t*)ptr) - addend;
-}
-
-static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
-{
-    __TBB_ASSERT( tbb::internal::is_aligned(ptr,8), "address not properly aligned for macOS atomics");
-    return OSAtomicAdd64Barrier(addend, (int64_t*)ptr) - addend;
-}
-
-#define __TBB_USE_GENERIC_PART_WORD_CAS                     1
-#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD               1
-#define __TBB_USE_GENERIC_FETCH_STORE                       1
-#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
-#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
-#if __TBB_WORDSIZE == 4
-    #define __TBB_USE_GENERIC_DWORD_LOAD_STORE              1
-#endif
-#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
-
-#endif /* __TBB_UnknownArchitecture */
diff --git a/lib/3rdParty/tbb/include/tbb/machine/mic_common.h b/lib/3rdParty/tbb/include/tbb/machine/mic_common.h
deleted file mode 100644
index 8765d39f..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/mic_common.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_mic_common_H
-#define __TBB_mic_common_H
-
-#ifndef __TBB_machine_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#if ! __TBB_DEFINE_MIC
-    #error mic_common.h should be included only when building for Intel(R) Many Integrated Core Architecture
-#endif
-
-#ifndef __TBB_PREFETCHING
-#define __TBB_PREFETCHING 1
-#endif
-#if __TBB_PREFETCHING
-#include <immintrin.h>
-#define __TBB_cl_prefetch(p) _mm_prefetch((const char*)p, _MM_HINT_T1)
-#define __TBB_cl_evict(p) _mm_clevict(p, _MM_HINT_T1)
-#endif
-
-/** Intel(R) Many Integrated Core Architecture does not support mfence and pause instructions **/
-#define __TBB_full_memory_fence() __asm__ __volatile__("lock; addl $0,(%%rsp)":::"memory")
-#define __TBB_Pause(x) _mm_delay_32(16*(x))
-#define __TBB_STEALING_PAUSE 1500/16
-#include <sched.h>
-#define __TBB_Yield() sched_yield()
-
-/** Specifics **/
-#define __TBB_STEALING_ABORT_ON_CONTENTION 1
-#define __TBB_YIELD2P 1
-#define __TBB_HOARD_NONLOCAL_TASKS 1
-
-#if ! ( __FreeBSD__ || __linux__ )
-    #error Intel(R) Many Integrated Core Compiler does not define __FreeBSD__ or __linux__ anymore. Check for the __TBB_XXX_BROKEN defined under __FreeBSD__ or __linux__.
-#endif /* ! ( __FreeBSD__ || __linux__ ) */
-
-#endif /* __TBB_mic_common_H */
diff --git a/lib/3rdParty/tbb/include/tbb/machine/msvc_armv7.h b/lib/3rdParty/tbb/include/tbb/machine/msvc_armv7.h
deleted file mode 100644
index 40d22020..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/msvc_armv7.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#if !defined(__TBB_machine_H) || defined(__TBB_msvc_armv7_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#define __TBB_msvc_armv7_H
-
-#include <intrin.h>
-#include <float.h>
-
-#define __TBB_WORDSIZE 4
-
-#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
-
-#if defined(TBB_WIN32_USE_CL_BUILTINS)
-// We can test this on _M_IX86
-#pragma intrinsic(_ReadWriteBarrier)
-#pragma intrinsic(_mm_mfence)
-#define __TBB_compiler_fence()    _ReadWriteBarrier()
-#define __TBB_full_memory_fence() _mm_mfence()
-#define __TBB_control_consistency_helper() __TBB_compiler_fence()
-#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
-#define __TBB_release_consistency_helper() __TBB_compiler_fence()
-#else
-//Now __dmb(_ARM_BARRIER_SY) is used for both compiler and memory fences
-//This might be changed later after testing
-#define __TBB_compiler_fence()    __dmb(_ARM_BARRIER_SY)
-#define __TBB_full_memory_fence() __dmb(_ARM_BARRIER_SY)
-#define __TBB_control_consistency_helper() __TBB_compiler_fence()
-#define __TBB_acquire_consistency_helper() __TBB_full_memory_fence()
-#define __TBB_release_consistency_helper() __TBB_full_memory_fence()
-#endif
-
-//--------------------------------------------------
-// Compare and swap
-//--------------------------------------------------
-
-/**
- * Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
- * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
- * @param value value to assign *ptr to if *ptr==comparand
- * @param comparand value to compare with *ptr
- * @return value originally in memory at ptr, regardless of success
-*/
-
-#define __TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(S,T,F)                                               \
-inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) {                   \
-    return _InterlockedCompareExchange##F(reinterpret_cast<volatile T *>(ptr),value,comparand);  \
-}                                                                                                \
-
-#define __TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(S,T,F)                                             \
-inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) {                              \
-    return _InterlockedExchangeAdd##F(reinterpret_cast<volatile T *>(ptr),value);                \
-}                                                                                                \
-
-__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(1,char,8)
-__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(2,short,16)
-__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(4,long,)
-__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(8,__int64,64)
-__TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(4,long,)
-#if defined(TBB_WIN32_USE_CL_BUILTINS)
-// No _InterlockedExchangeAdd64 intrinsic on _M_IX86
-#define __TBB_64BIT_ATOMICS 0
-#else
-__TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(8,__int64,64)
-#endif
-
-inline void __TBB_machine_pause (int32_t delay )
-{
-    while(delay>0)
-    {
-        __TBB_compiler_fence();
-        delay--;
-    }
-}
-
-// API to retrieve/update FPU control setting
-#define __TBB_CPU_CTL_ENV_PRESENT 1
-
-namespace tbb {
-namespace internal {
-
-template <typename T, size_t S>
-struct machine_load_store_relaxed {
-    static inline T load ( const volatile T& location ) {
-        const T value = location;
-
-        /*
-        * An extra memory barrier is required for errata #761319
-        * Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
-        */
-        __TBB_acquire_consistency_helper();
-        return value;
-    }
-
-    static inline void store ( volatile T& location, T value ) {
-        location = value;
-    }
-};
-
-class cpu_ctl_env {
-private:
-    unsigned int my_ctl;
-public:
-    bool operator!=( const cpu_ctl_env& ctl ) const { return my_ctl != ctl.my_ctl; }
-    void get_env() { my_ctl = _control87(0, 0); }
-    void set_env() const { _control87( my_ctl, ~0U ); }
-};
-
-} // namespace internal
-} // namespaces tbb
-
-// Machine specific atomic operations
-#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
-#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
-#define __TBB_Pause(V) __TBB_machine_pause(V)
-
-// Use generics for some things
-#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE               1
-#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE                1
-#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD                   1
-#define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE                 1
-#define __TBB_USE_GENERIC_FETCH_STORE                           1
-#define __TBB_USE_GENERIC_DWORD_LOAD_STORE                      1
-#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE     1
-
-#if defined(TBB_WIN32_USE_CL_BUILTINS)
-#if !__TBB_WIN8UI_SUPPORT
-extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
-#define __TBB_Yield()  SwitchToThread()
-#else
-#include<thread>
-#define __TBB_Yield()  std::this_thread::yield()
-#endif
-#else
-#define __TBB_Yield() __yield()
-#endif
-
-// Machine specific atomic operations
-#define __TBB_AtomicOR(P,V)     __TBB_machine_OR(P,V)
-#define __TBB_AtomicAND(P,V)    __TBB_machine_AND(P,V)
-
-template <typename T1,typename T2>
-inline void __TBB_machine_OR( T1 *operand, T2 addend ) {
-    _InterlockedOr((long volatile *)operand, (long)addend);
-}
-
-template <typename T1,typename T2>
-inline void __TBB_machine_AND( T1 *operand, T2 addend ) {
-    _InterlockedAnd((long volatile *)operand, (long)addend);
-}
-
diff --git a/lib/3rdParty/tbb/include/tbb/machine/msvc_ia32_common.h b/lib/3rdParty/tbb/include/tbb/machine/msvc_ia32_common.h
deleted file mode 100644
index 8b4814bc..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/msvc_ia32_common.h
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#if !defined(__TBB_machine_H) || defined(__TBB_machine_msvc_ia32_common_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#define __TBB_machine_msvc_ia32_common_H
-
-#include <intrin.h>
-
-//TODO: consider moving this macro to tbb_config.h and using where MSVC asm is used
-#if  !_M_X64 || __INTEL_COMPILER
-    #define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 1
-#else
-    //MSVC in x64 mode does not accept inline assembler
-    #define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 0
-    #define __TBB_NO_X86_MSVC_INLINE_ASM_MSG "The compiler being used is not supported (outdated?)"
-#endif
-
-#if _M_X64
-    #define __TBB_r(reg_name) r##reg_name
-    #define __TBB_W(name) name##64
-    namespace tbb { namespace internal { namespace msvc_intrinsics {
-        typedef __int64 word;
-    }}}
-#else
-    #define __TBB_r(reg_name) e##reg_name
-    #define __TBB_W(name) name
-    namespace tbb { namespace internal { namespace msvc_intrinsics {
-        typedef long word;
-    }}}
-#endif
-
-#if _MSC_VER>=1600 && (!__INTEL_COMPILER || __INTEL_COMPILER>=1310)
-    // S is the operand size in bytes, B is the suffix for intrinsics for that size
-    #define __TBB_MACHINE_DEFINE_ATOMICS(S,B,T,U)                                           \
-    __pragma(intrinsic( _InterlockedCompareExchange##B ))                                   \
-    static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U comparand ) { \
-        return _InterlockedCompareExchange##B ( (T*)ptr, value, comparand );                \
-    }                                                                                       \
-    __pragma(intrinsic( _InterlockedExchangeAdd##B ))                                       \
-    static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend ) {           \
-        return _InterlockedExchangeAdd##B ( (T*)ptr, addend );                              \
-    }                                                                                       \
-    __pragma(intrinsic( _InterlockedExchange##B ))                                          \
-    static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value ) {          \
-        return _InterlockedExchange##B ( (T*)ptr, value );                                  \
-    }
-
-    // Atomic intrinsics for 1, 2, and 4 bytes are available for x86 & x64
-    __TBB_MACHINE_DEFINE_ATOMICS(1,8,char,__int8)
-    __TBB_MACHINE_DEFINE_ATOMICS(2,16,short,__int16)
-    __TBB_MACHINE_DEFINE_ATOMICS(4,,long,__int32)
-
-    #if __TBB_WORDSIZE==8
-    __TBB_MACHINE_DEFINE_ATOMICS(8,64,__int64,__int64)
-    #endif
-
-    #undef __TBB_MACHINE_DEFINE_ATOMICS
-    #define __TBB_ATOMIC_PRIMITIVES_DEFINED
-#endif /*_MSC_VER>=1600*/
-
-#if _MSC_VER>=1300 || __INTEL_COMPILER>=1100
-    #pragma intrinsic(_ReadWriteBarrier)
-    #pragma intrinsic(_mm_mfence)
-    #define __TBB_compiler_fence()    _ReadWriteBarrier()
-    #define __TBB_full_memory_fence() _mm_mfence()
-#elif __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
-    #define __TBB_compiler_fence()    __asm { __asm nop }
-    #define __TBB_full_memory_fence() __asm { __asm mfence }
-#else
-    #error Unsupported compiler; define __TBB_{control,acquire,release}_consistency_helper to support it
-#endif
-
-#define __TBB_control_consistency_helper() __TBB_compiler_fence()
-#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
-#define __TBB_release_consistency_helper() __TBB_compiler_fence()
-
-#if (_MSC_VER>=1300) || (__INTEL_COMPILER)
-    #pragma intrinsic(_mm_pause)
-    namespace tbb { namespace internal { namespace msvc_intrinsics {
-        static inline void pause (uintptr_t delay ) {
-            for (;delay>0; --delay )
-                _mm_pause();
-        }
-    }}}
-    #define __TBB_Pause(V) tbb::internal::msvc_intrinsics::pause(V)
-    #define __TBB_SINGLE_PAUSE _mm_pause()
-#else
-    #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
-        #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
-    #endif
-    namespace tbb { namespace internal { namespace msvc_inline_asm
-        static inline void pause (uintptr_t delay ) {
-            _asm
-            {
-                mov __TBB_r(ax), delay
-              __TBB_L1:
-                pause
-                add __TBB_r(ax), -1
-                jne __TBB_L1
-            }
-            return;
-        }
-    }}}
-    #define __TBB_Pause(V) tbb::internal::msvc_inline_asm::pause(V)
-    #define __TBB_SINGLE_PAUSE __asm pause
-#endif
-
-#if (_MSC_VER>=1400 && !__INTEL_COMPILER) || (__INTEL_COMPILER>=1200)
-// MSVC did not have this intrinsic prior to VC8.
-// ICL 11.1 fails to compile a TBB example if __TBB_Log2 uses the intrinsic.
-    #pragma intrinsic(__TBB_W(_BitScanReverse))
-    namespace tbb { namespace internal { namespace msvc_intrinsics {
-        static inline uintptr_t lg_bsr( uintptr_t i ){
-            unsigned long j;
-            __TBB_W(_BitScanReverse)( &j, i );
-            return j;
-        }
-    }}}
-    #define __TBB_Log2(V) tbb::internal::msvc_intrinsics::lg_bsr(V)
-#else
-    #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
-        #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
-    #endif
-    namespace tbb { namespace internal { namespace msvc_inline_asm {
-        static inline uintptr_t lg_bsr( uintptr_t i ){
-            uintptr_t j;
-            __asm
-            {
-                bsr __TBB_r(ax), i
-                mov j, __TBB_r(ax)
-            }
-            return j;
-        }
-    }}}
-    #define __TBB_Log2(V) tbb::internal::msvc_inline_asm::lg_bsr(V)
-#endif
-
-#if _MSC_VER>=1400
-    #pragma intrinsic(__TBB_W(_InterlockedOr))
-    #pragma intrinsic(__TBB_W(_InterlockedAnd))
-    namespace tbb { namespace internal { namespace msvc_intrinsics {
-        static inline void lock_or( volatile void *operand, intptr_t addend ){
-            __TBB_W(_InterlockedOr)((volatile word*)operand, addend);
-        }
-        static inline void lock_and( volatile void *operand, intptr_t addend ){
-            __TBB_W(_InterlockedAnd)((volatile word*)operand, addend);
-        }
-    }}}
-    #define __TBB_AtomicOR(P,V)  tbb::internal::msvc_intrinsics::lock_or(P,V)
-    #define __TBB_AtomicAND(P,V) tbb::internal::msvc_intrinsics::lock_and(P,V)
-#else
-    #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
-        #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
-    #endif
-    namespace tbb { namespace internal { namespace msvc_inline_asm {
-        static inline void lock_or( volatile void *operand, __int32 addend ) {
-            __asm
-            {
-                mov eax, addend
-                mov edx, [operand]
-                lock or [edx], eax
-            }
-         }
-         static inline void lock_and( volatile void *operand, __int32 addend ) {
-            __asm
-            {
-                mov eax, addend
-                mov edx, [operand]
-                lock and [edx], eax
-            }
-         }
-    }}}
-    #define __TBB_AtomicOR(P,V)  tbb::internal::msvc_inline_asm::lock_or(P,V)
-    #define __TBB_AtomicAND(P,V) tbb::internal::msvc_inline_asm::lock_and(P,V)
-#endif
-
-#pragma intrinsic(__rdtsc)
-namespace tbb { namespace internal { typedef uint64_t machine_tsc_t; } }
-static inline tbb::internal::machine_tsc_t __TBB_machine_time_stamp() {
-    return __rdtsc();
-}
-#define __TBB_time_stamp() __TBB_machine_time_stamp()
-
-// API to retrieve/update FPU control setting
-#define __TBB_CPU_CTL_ENV_PRESENT 1
-
-namespace tbb { namespace internal { class cpu_ctl_env; } }
-#if __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
-    inline void __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* ctl ) {
-        __asm {
-            __asm mov     __TBB_r(ax), ctl
-            __asm stmxcsr [__TBB_r(ax)]
-            __asm fstcw   [__TBB_r(ax)+4]
-        }
-    }
-    inline void __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* ctl ) {
-        __asm {
-            __asm mov     __TBB_r(ax), ctl
-            __asm ldmxcsr [__TBB_r(ax)]
-            __asm fldcw   [__TBB_r(ax)+4]
-        }
-    }
-#else
-    extern "C" {
-        void __TBB_EXPORTED_FUNC __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* );
-        void __TBB_EXPORTED_FUNC __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* );
-    }
-#endif
-
-namespace tbb {
-namespace internal {
-class cpu_ctl_env {
-private:
-    int         mxcsr;
-    short       x87cw;
-    static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
-public:
-    bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
-    void get_env() {
-        __TBB_get_cpu_ctl_env( this );
-        mxcsr &= MXCSR_CONTROL_MASK;
-    }
-    void set_env() const { __TBB_set_cpu_ctl_env( this ); }
-};
-} // namespace internal
-} // namespace tbb
-
-#if !__TBB_WIN8UI_SUPPORT
-extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
-#define __TBB_Yield()  SwitchToThread()
-#else
-#include<thread>
-#define __TBB_Yield()  std::this_thread::yield()
-#endif
-
-#undef __TBB_r
-#undef __TBB_W
-#undef __TBB_word
-
-extern "C" {
-    __int8 __TBB_EXPORTED_FUNC __TBB_machine_try_lock_elided (volatile void* ptr);
-    void   __TBB_EXPORTED_FUNC __TBB_machine_unlock_elided (volatile void* ptr);
-
-    // 'pause' instruction aborts HLE/RTM transactions
-    inline static void __TBB_machine_try_lock_elided_cancel() { __TBB_SINGLE_PAUSE; }
-
-#if __TBB_TSX_INTRINSICS_PRESENT
-    #define __TBB_machine_is_in_transaction _xtest
-    #define __TBB_machine_begin_transaction _xbegin
-    #define __TBB_machine_end_transaction   _xend
-    // The value (0xFF) below comes from the
-    // Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free
-    #define __TBB_machine_transaction_conflict_abort() _xabort(0xFF)
-#else
-    __int8           __TBB_EXPORTED_FUNC __TBB_machine_is_in_transaction();
-    unsigned __int32 __TBB_EXPORTED_FUNC __TBB_machine_begin_transaction();
-    void             __TBB_EXPORTED_FUNC __TBB_machine_end_transaction();
-    void             __TBB_EXPORTED_FUNC __TBB_machine_transaction_conflict_abort();
-#endif /* __TBB_TSX_INTRINSICS_PRESENT */
-}
diff --git a/lib/3rdParty/tbb/include/tbb/machine/sunos_sparc.h b/lib/3rdParty/tbb/include/tbb/machine/sunos_sparc.h
deleted file mode 100644
index 9119f402..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/sunos_sparc.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-
-#if !defined(__TBB_machine_H) || defined(__TBB_machine_sunos_sparc_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#define __TBB_machine_sunos_sparc_H
-
-#include <stdint.h>
-#include <unistd.h>
-
-#define __TBB_WORDSIZE 8
-// Big endian is assumed for SPARC.
-// While hardware may support page-specific bi-endianness, only big endian pages may be exposed to TBB
-#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
-
-/** To those working on SPARC hardware. Consider relaxing acquire and release
-    consistency helpers to no-op (as this port covers TSO mode only). **/
-#define __TBB_compiler_fence()             __asm__ __volatile__ ("": : :"memory")
-#define __TBB_control_consistency_helper() __TBB_compiler_fence()
-#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
-#define __TBB_release_consistency_helper() __TBB_compiler_fence()
-#define __TBB_full_memory_fence()          __asm__ __volatile__("membar #LoadLoad|#LoadStore|#StoreStore|#StoreLoad": : : "memory")
-
-//--------------------------------------------------
-// Compare and swap
-//--------------------------------------------------
-
-/**
- * Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
- * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
- * @param value value to assign *ptr to if *ptr==comparand
- * @param comparand value to compare with *ptr
- ( @return value originally in memory at ptr, regardless of success
-*/
-static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand ){
-  int32_t result;
-  __asm__ __volatile__(
-                       "cas\t[%5],%4,%1"
-                       : "=m"(*(int32_t *)ptr), "=r"(result)
-                       : "m"(*(int32_t *)ptr), "1"(value), "r"(comparand), "r"(ptr)
-                       : "memory");
-  return result;
-}
-
-/**
- * Atomic CAS for 64 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
- * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
- * @param value value to assign *ptr to if *ptr==comparand
- * @param comparand value to compare with *ptr
- ( @return value originally in memory at ptr, regardless of success
- */
-static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand ){
-  int64_t result;
-  __asm__ __volatile__(
-                       "casx\t[%5],%4,%1"
-               : "=m"(*(int64_t *)ptr), "=r"(result)
-               : "m"(*(int64_t *)ptr), "1"(value), "r"(comparand), "r"(ptr)
-               : "memory");
-  return result;
-}
-
-//---------------------------------------------------
-// Fetch and add
-//---------------------------------------------------
-
-/**
- * Atomic fetch and add for 32 bit values, in this case implemented by continuously checking success of atomicity
- * @param ptr pointer to value to add addend to
- * @param addened value to add to *ptr
- * @return value at ptr before addened was added
- */
-static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t addend){
-  int32_t result;
-  __asm__ __volatile__ (
-                        "0:\t add\t %3, %4, %0\n"           // do addition
-                        "\t cas\t [%2], %3, %0\n"           // cas to store result in memory
-                        "\t cmp\t %3, %0\n"                 // check if value from memory is original
-                        "\t bne,a,pn\t %%icc, 0b\n"         // if not try again
-                        "\t mov %0, %3\n"                   // use branch delay slot to move new value in memory to be added
-               : "=&r"(result), "=m"(*(int32_t *)ptr)
-               : "r"(ptr), "r"(*(int32_t *)ptr), "r"(addend), "m"(*(int32_t *)ptr)
-               : "ccr", "memory");
-  return result;
-}
-
-/**
- * Atomic fetch and add for 64 bit values, in this case implemented by continuously checking success of atomicity
- * @param ptr pointer to value to add addend to
- * @param addened value to add to *ptr
- * @return value at ptr before addened was added
- */
-static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend){
-  int64_t result;
-  __asm__ __volatile__ (
-                        "0:\t add\t %3, %4, %0\n"           // do addition
-                        "\t casx\t [%2], %3, %0\n"          // cas to store result in memory
-                        "\t cmp\t %3, %0\n"                 // check if value from memory is original
-                        "\t bne,a,pn\t %%xcc, 0b\n"         // if not try again
-                        "\t mov %0, %3\n"                   // use branch delay slot to move new value in memory to be added
-                : "=&r"(result), "=m"(*(int64_t *)ptr)
-                : "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_t *)ptr)
-                : "ccr", "memory");
-  return result;
-}
-
-//--------------------------------------------------------
-// Logarithm (base two, integer)
-//--------------------------------------------------------
-
-static inline int64_t __TBB_machine_lg( uint64_t x ) {
-    __TBB_ASSERT(x, "__TBB_Log2(0) undefined");
-    uint64_t count;
-    // one hot encode
-    x |= (x >> 1);
-    x |= (x >> 2);
-    x |= (x >> 4);
-    x |= (x >> 8);
-    x |= (x >> 16);
-    x |= (x >> 32);
-    // count 1's
-    __asm__ ("popc %1, %0" : "=r"(count) : "r"(x) );
-    return count-1;
-}
-
-//--------------------------------------------------------
-
-static inline void __TBB_machine_or( volatile void *ptr, uint64_t value ) {
-  __asm__ __volatile__ (
-                        "0:\t or\t %2, %3, %%g1\n"          // do operation
-                        "\t casx\t [%1], %2, %%g1\n"        // cas to store result in memory
-                        "\t cmp\t %2, %%g1\n"               // check if value from memory is original
-                        "\t bne,a,pn\t %%xcc, 0b\n"         // if not try again
-                        "\t mov %%g1, %2\n"                 // use branch delay slot to move new value in memory to be added
-                : "=m"(*(int64_t *)ptr)
-                : "r"(ptr), "r"(*(int64_t *)ptr), "r"(value), "m"(*(int64_t *)ptr)
-                : "ccr", "g1", "memory");
-}
-
-static inline void __TBB_machine_and( volatile void *ptr, uint64_t value ) {
-  __asm__ __volatile__ (
-                        "0:\t and\t %2, %3, %%g1\n"         // do operation
-                        "\t casx\t [%1], %2, %%g1\n"        // cas to store result in memory
-                        "\t cmp\t %2, %%g1\n"               // check if value from memory is original
-                        "\t bne,a,pn\t %%xcc, 0b\n"         // if not try again
-                        "\t mov %%g1, %2\n"                 // use branch delay slot to move new value in memory to be added
-                : "=m"(*(int64_t *)ptr)
-                : "r"(ptr), "r"(*(int64_t *)ptr), "r"(value), "m"(*(int64_t *)ptr)
-                : "ccr", "g1", "memory");
-}
-
-
-static inline void __TBB_machine_pause( int32_t delay ) {
-    // do nothing, inlined, doesn't matter
-}
-
-// put 0xff in memory location, return memory value,
-//  generic trylockbyte puts 0x01, however this is fine
-//  because all that matters is that 0 is unlocked
-static inline bool __TBB_machine_trylockbyte(unsigned char &flag){
-    unsigned char result;
-    __asm__ __volatile__ (
-            "ldstub\t [%2], %0\n"
-        : "=r"(result), "=m"(flag)
-        : "r"(&flag), "m"(flag)
-        : "memory");
-    return result == 0;
-}
-
-#define __TBB_USE_GENERIC_PART_WORD_CAS                     1
-#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD               1
-#define __TBB_USE_GENERIC_FETCH_STORE                       1
-#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
-#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
-#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
-
-#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
-#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
-
-// Definition of other functions
-#define __TBB_Pause(V) __TBB_machine_pause(V)
-#define __TBB_Log2(V)  __TBB_machine_lg(V)
-
-#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
diff --git a/lib/3rdParty/tbb/include/tbb/machine/windows_api.h b/lib/3rdParty/tbb/include/tbb/machine/windows_api.h
deleted file mode 100644
index d362abc5..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/windows_api.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_machine_windows_api_H
-#define __TBB_machine_windows_api_H
-
-#if _WIN32 || _WIN64
-
-#include <windows.h>
-
-#if _WIN32_WINNT < 0x0600
-// The following Windows API function is declared explicitly;
-// otherwise it fails to compile by VS2005.
-#if !defined(WINBASEAPI) || (_WIN32_WINNT < 0x0501 && _MSC_VER == 1400)
-#define __TBB_WINBASEAPI extern "C"
-#else
-#define __TBB_WINBASEAPI WINBASEAPI
-#endif
-__TBB_WINBASEAPI BOOL WINAPI TryEnterCriticalSection( LPCRITICAL_SECTION );
-__TBB_WINBASEAPI BOOL WINAPI InitializeCriticalSectionAndSpinCount( LPCRITICAL_SECTION, DWORD );
-// Overloading WINBASEAPI macro and using local functions missing in Windows XP/2003
-#define InitializeCriticalSectionEx inlineInitializeCriticalSectionEx
-#define CreateSemaphoreEx inlineCreateSemaphoreEx
-#define CreateEventEx inlineCreateEventEx
-inline BOOL WINAPI inlineInitializeCriticalSectionEx( LPCRITICAL_SECTION lpCriticalSection, DWORD dwSpinCount, DWORD )
-{
-    return InitializeCriticalSectionAndSpinCount( lpCriticalSection, dwSpinCount );
-}
-inline HANDLE WINAPI inlineCreateSemaphoreEx( LPSECURITY_ATTRIBUTES lpSemaphoreAttributes, LONG lInitialCount, LONG lMaximumCount, LPCTSTR lpName, DWORD, DWORD )
-{
-    return CreateSemaphore( lpSemaphoreAttributes, lInitialCount, lMaximumCount, lpName );
-}
-inline HANDLE WINAPI inlineCreateEventEx( LPSECURITY_ATTRIBUTES lpEventAttributes, LPCTSTR lpName, DWORD dwFlags, DWORD )
-{
-    BOOL manual_reset = dwFlags&0x00000001 ? TRUE : FALSE; // CREATE_EVENT_MANUAL_RESET
-    BOOL initial_set  = dwFlags&0x00000002 ? TRUE : FALSE; // CREATE_EVENT_INITIAL_SET
-    return CreateEvent( lpEventAttributes, manual_reset, initial_set, lpName );
-}
-#endif
-
-#if defined(RTL_SRWLOCK_INIT)
-#ifndef __TBB_USE_SRWLOCK
-// TODO: turn it on when bug 1952 will be fixed
-#define __TBB_USE_SRWLOCK 0
-#endif
-#endif
-
-#else
-#error tbb/machine/windows_api.h should only be used for Windows based platforms
-#endif // _WIN32 || _WIN64
-
-#endif // __TBB_machine_windows_api_H
diff --git a/lib/3rdParty/tbb/include/tbb/machine/windows_ia32.h b/lib/3rdParty/tbb/include/tbb/machine/windows_ia32.h
deleted file mode 100644
index 8db0d2b8..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/windows_ia32.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#if !defined(__TBB_machine_H) || defined(__TBB_machine_windows_ia32_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#define __TBB_machine_windows_ia32_H
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-    // Workaround for overzealous compiler warnings in /Wp64 mode
-    #pragma warning (push)
-    #pragma warning (disable: 4244 4267)
-#endif
-
-#include "msvc_ia32_common.h"
-
-#define __TBB_WORDSIZE 4
-#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
-
-extern "C" {
-    __int64 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __int64 comparand );
-    __int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend );
-    __int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value );
-    void __TBB_EXPORTED_FUNC __TBB_machine_store8 (volatile void *ptr, __int64 value );
-    __int64 __TBB_EXPORTED_FUNC __TBB_machine_load8 (const volatile void *ptr);
-}
-
-#ifndef __TBB_ATOMIC_PRIMITIVES_DEFINED
-
-#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,U,A,C) \
-static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U comparand ) { \
-    T result; \
-    volatile T *p = (T *)ptr; \
-    __asm \
-    { \
-       __asm mov edx, p \
-       __asm mov C , value \
-       __asm mov A , comparand \
-       __asm lock cmpxchg [edx], C \
-       __asm mov result, A \
-    } \
-    return result; \
-} \
-\
-static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend ) { \
-    T result; \
-    volatile T *p = (T *)ptr; \
-    __asm \
-    { \
-        __asm mov edx, p \
-        __asm mov A, addend \
-        __asm lock xadd [edx], A \
-        __asm mov result, A \
-    } \
-    return result; \
-}\
-\
-static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value ) { \
-    T result; \
-    volatile T *p = (T *)ptr; \
-    __asm \
-    { \
-        __asm mov edx, p \
-        __asm mov A, value \
-        __asm lock xchg [edx], A \
-        __asm mov result, A \
-    } \
-    return result; \
-}
-
-
-__TBB_MACHINE_DEFINE_ATOMICS(1, __int8, __int8, al, cl)
-__TBB_MACHINE_DEFINE_ATOMICS(2, __int16, __int16, ax, cx)
-__TBB_MACHINE_DEFINE_ATOMICS(4, ptrdiff_t, ptrdiff_t, eax, ecx)
-
-#undef __TBB_MACHINE_DEFINE_ATOMICS
-
-#endif /*__TBB_ATOMIC_PRIMITIVES_DEFINED*/
-
-//TODO: Check if it possible and profitable for IA-32 architecture on (Linux and Windows)
-//to use of 64-bit load/store via floating point registers together with full fence
-//for sequentially consistent load/store, instead of CAS.
-#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE           1
-#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
-#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
-#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
-
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-    #pragma warning (pop)
-#endif // warnings 4244, 4267 are back
diff --git a/lib/3rdParty/tbb/include/tbb/machine/windows_intel64.h b/lib/3rdParty/tbb/include/tbb/machine/windows_intel64.h
deleted file mode 100644
index 86abd6ad..00000000
--- a/lib/3rdParty/tbb/include/tbb/machine/windows_intel64.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#if !defined(__TBB_machine_H) || defined(__TBB_machine_windows_intel64_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#define __TBB_machine_windows_intel64_H
-
-#define __TBB_WORDSIZE 8
-#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
-
-#include "msvc_ia32_common.h"
-
-#ifndef __TBB_ATOMIC_PRIMITIVES_DEFINED
-
-#include <intrin.h>
-#pragma intrinsic(_InterlockedCompareExchange,_InterlockedExchangeAdd,_InterlockedExchange)
-#pragma intrinsic(_InterlockedCompareExchange64,_InterlockedExchangeAdd64,_InterlockedExchange64)
-
-// ATTENTION: if you ever change argument types in machine-specific primitives,
-// please take care of atomic_word<> specializations in tbb/atomic.h
-extern "C" {
-    __int8 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp1 (volatile void *ptr, __int8 value, __int8 comparand );
-    __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd1 (volatile void *ptr, __int8 addend );
-    __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore1 (volatile void *ptr, __int8 value );
-    __int16 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp2 (volatile void *ptr, __int16 value, __int16 comparand );
-    __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd2 (volatile void *ptr, __int16 addend );
-    __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore2 (volatile void *ptr, __int16 value );
-}
-
-inline long __TBB_machine_cmpswp4 (volatile void *ptr, __int32 value, __int32 comparand ) {
-    return _InterlockedCompareExchange( (long*)ptr, value, comparand );
-}
-inline long __TBB_machine_fetchadd4 (volatile void *ptr, __int32 addend ) {
-    return _InterlockedExchangeAdd( (long*)ptr, addend );
-}
-inline long __TBB_machine_fetchstore4 (volatile void *ptr, __int32 value ) {
-    return _InterlockedExchange( (long*)ptr, value );
-}
-
-inline __int64 __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __int64 comparand ) {
-    return _InterlockedCompareExchange64( (__int64*)ptr, value, comparand );
-}
-inline __int64 __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend ) {
-    return _InterlockedExchangeAdd64( (__int64*)ptr, addend );
-}
-inline __int64 __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value ) {
-    return _InterlockedExchange64( (__int64*)ptr, value );
-}
-
-#endif /*__TBB_ATOMIC_PRIMITIVES_DEFINED*/
-
-#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE           1
-#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
-#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
-#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
diff --git a/lib/3rdParty/tbb/include/tbb/memory_pool.h b/lib/3rdParty/tbb/include/tbb/memory_pool.h
deleted file mode 100644
index b3bba397..00000000
--- a/lib/3rdParty/tbb/include/tbb/memory_pool.h
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_memory_pool_H
-#define __TBB_memory_pool_H
-
-#if !TBB_PREVIEW_MEMORY_POOL
-#error Set TBB_PREVIEW_MEMORY_POOL to include memory_pool.h
-#endif
-/** @file */
-
-#include "scalable_allocator.h"
-#include <new> // std::bad_alloc
-#include <stdexcept> // std::runtime_error, std::invalid_argument
-// required in C++03 to construct std::runtime_error and std::invalid_argument
-#include <string>
-#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
-#include <utility> // std::forward
-#endif
-
-#if __TBB_EXTRA_DEBUG
-#define __TBBMALLOC_ASSERT ASSERT
-#else
-#define __TBBMALLOC_ASSERT(a,b) ((void)0)
-#endif
-
-namespace tbb {
-namespace interface6 {
-//! @cond INTERNAL
-namespace internal {
-
-//! Base of thread-safe pool allocator for variable-size requests
-class pool_base : tbb::internal::no_copy {
-    // Pool interface is separate from standard allocator classes because it has
-    // to maintain internal state, no copy or assignment. Move and swap are possible.
-public:
-    //! Reset pool to reuse its memory (free all objects at once)
-    void recycle() { rml::pool_reset(my_pool); }
-
-    //! The "malloc" analogue to allocate block of memory of size bytes
-    void *malloc(size_t size) { return rml::pool_malloc(my_pool, size); }
-
-    //! The "free" analogue to discard a previously allocated piece of memory.
-    void free(void* ptr) { rml::pool_free(my_pool, ptr); }
-
-    //! The "realloc" analogue complementing pool_malloc.
-    // Enables some low-level optimization possibilities
-    void *realloc(void* ptr, size_t size) {
-        return rml::pool_realloc(my_pool, ptr, size);
-    }
-
-protected:
-    //! destroy pool - must be called in a child class
-    void destroy() { rml::pool_destroy(my_pool); }
-
-    rml::MemoryPool *my_pool;
-};
-
-} // namespace internal
-//! @endcond
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    // Workaround for erroneous "unreferenced parameter" warning in method destroy.
-    #pragma warning (push)
-    #pragma warning (disable: 4100)
-#endif
-
-//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
-/** @ingroup memory_allocation */
-template<typename T, typename P = internal::pool_base>
-class memory_pool_allocator {
-protected:
-    typedef P pool_type;
-    pool_type *my_pool;
-    template<typename U, typename R>
-    friend class memory_pool_allocator;
-    template<typename V, typename U, typename R>
-    friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
-    template<typename V, typename U, typename R>
-    friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
-public:
-    typedef typename tbb::internal::allocator_type<T>::value_type value_type;
-    typedef value_type* pointer;
-    typedef const value_type* const_pointer;
-    typedef value_type& reference;
-    typedef const value_type& const_reference;
-    typedef size_t size_type;
-    typedef ptrdiff_t difference_type;
-    template<typename U> struct rebind {
-        typedef memory_pool_allocator<U, P> other;
-    };
-
-    explicit memory_pool_allocator(pool_type &pool) throw() : my_pool(&pool) {}
-    memory_pool_allocator(const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {}
-    template<typename U>
-    memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {}
-
-    pointer address(reference x) const { return &x; }
-    const_pointer address(const_reference x) const { return &x; }
-
-    //! Allocate space for n objects.
-    pointer allocate( size_type n, const void* /*hint*/ = 0) {
-        pointer p = static_cast<pointer>( my_pool->malloc( n*sizeof(value_type) ) );
-        if (!p)
-            tbb::internal::throw_exception(std::bad_alloc());
-        return p;
-    }
-    //! Free previously allocated block of memory.
-    void deallocate( pointer p, size_type ) {
-        my_pool->free(p);
-    }
-    //! Largest value for which method allocate might succeed.
-    size_type max_size() const throw() {
-        size_type max = static_cast<size_type>(-1) / sizeof (value_type);
-        return (max > 0 ? max : 1);
-    }
-    //! Copy-construct value at location pointed to by p.
-#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
-    template<typename U, typename... Args>
-    void construct(U *p, Args&&... args)
-        { ::new((void *)p) U(std::forward<Args>(args)...); }
-#else // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    void construct( pointer p, value_type&& value ) {::new((void*)(p)) value_type(std::move(value));}
-#endif
-    void construct( pointer p, const value_type& value ) { ::new((void*)(p)) value_type(value); }
-#endif // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
-
-    //! Destroy value at location pointed to by p.
-    void destroy( pointer p ) { p->~value_type(); }
-
-};
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    #pragma warning (pop)
-#endif // warning 4100 is back
-
-//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
-/** @ingroup memory_allocation */
-template<typename P>
-class memory_pool_allocator<void, P> {
-public:
-    typedef P pool_type;
-    typedef void* pointer;
-    typedef const void* const_pointer;
-    typedef void value_type;
-    template<typename U> struct rebind {
-        typedef memory_pool_allocator<U, P> other;
-    };
-
-    explicit memory_pool_allocator( pool_type &pool) throw() : my_pool(&pool) {}
-    memory_pool_allocator( const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {}
-    template<typename U>
-    memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {}
-
-protected:
-    pool_type *my_pool;
-    template<typename U, typename R>
-    friend class memory_pool_allocator;
-    template<typename V, typename U, typename R>
-    friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
-    template<typename V, typename U, typename R>
-    friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
-};
-
-template<typename T, typename U, typename P>
-inline bool operator==( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool==b.my_pool;}
-
-template<typename T, typename U, typename P>
-inline bool operator!=( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool!=b.my_pool;}
-
-
-//! Thread-safe growable pool allocator for variable-size requests
-template <typename Alloc>
-class memory_pool : public internal::pool_base {
-    Alloc my_alloc; // TODO: base-class optimization
-    static void *allocate_request(intptr_t pool_id, size_t & bytes);
-    static int deallocate_request(intptr_t pool_id, void*, size_t raw_bytes);
-
-public:
-    //! construct pool with underlying allocator
-    explicit memory_pool(const Alloc &src = Alloc());
-
-    //! destroy pool
-    ~memory_pool() { destroy(); } // call the callbacks first and destroy my_alloc latter
-
-};
-
-class fixed_pool : public internal::pool_base {
-    void *my_buffer;
-    size_t my_size;
-    inline static void *allocate_request(intptr_t pool_id, size_t & bytes);
-
-public:
-    //! construct pool with underlying allocator
-    inline fixed_pool(void *buf, size_t size);
-    //! destroy pool
-    ~fixed_pool() { destroy(); }
-};
-
-//////////////// Implementation ///////////////
-
-template <typename Alloc>
-memory_pool<Alloc>::memory_pool(const Alloc &src) : my_alloc(src) {
-    rml::MemPoolPolicy args(allocate_request, deallocate_request,
-                            sizeof(typename Alloc::value_type));
-    rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool);
-    if (res!=rml::POOL_OK)
-        tbb::internal::throw_exception(std::runtime_error("Can't create pool"));
-}
-template <typename Alloc>
-void *memory_pool<Alloc>::allocate_request(intptr_t pool_id, size_t & bytes) {
-    memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id);
-    const size_t unit_size = sizeof(typename Alloc::value_type);
-    __TBBMALLOC_ASSERT( 0 == bytes%unit_size, NULL);
-    void *ptr;
-    __TBB_TRY { ptr = self.my_alloc.allocate( bytes/unit_size ); }
-    __TBB_CATCH(...) { return 0; }
-    return ptr;
-}
-#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
-    // Workaround for erroneous "unreachable code" warning in the template below.
-    // Specific for VC++ 17-18 compiler
-    #pragma warning (push)
-    #pragma warning (disable: 4702)
-#endif
-template <typename Alloc>
-int memory_pool<Alloc>::deallocate_request(intptr_t pool_id, void* raw_ptr, size_t raw_bytes) {
-    memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id);
-    const size_t unit_size = sizeof(typename Alloc::value_type);
-    __TBBMALLOC_ASSERT( 0 == raw_bytes%unit_size, NULL);
-    self.my_alloc.deallocate( static_cast<typename Alloc::value_type*>(raw_ptr), raw_bytes/unit_size );
-    return 0;
-}
-#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
-    #pragma warning (pop)
-#endif
-inline fixed_pool::fixed_pool(void *buf, size_t size) : my_buffer(buf), my_size(size) {
-    if (!buf || !size)
-        // TODO: improve support for mode with exceptions disabled
-        tbb::internal::throw_exception(std::invalid_argument("Zero in parameter is invalid"));
-    rml::MemPoolPolicy args(allocate_request, 0, size, /*fixedPool=*/true);
-    rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool);
-    if (res!=rml::POOL_OK)
-        tbb::internal::throw_exception(std::runtime_error("Can't create pool"));
-}
-inline void *fixed_pool::allocate_request(intptr_t pool_id, size_t & bytes) {
-    fixed_pool &self = *reinterpret_cast<fixed_pool*>(pool_id);
-    __TBBMALLOC_ASSERT(0 != self.my_size, "The buffer must not be used twice.");
-    bytes = self.my_size;
-    self.my_size = 0; // remember that buffer has been used
-    return self.my_buffer;
-}
-
-} //namespace interface6
-using interface6::memory_pool_allocator;
-using interface6::memory_pool;
-using interface6::fixed_pool;
-} //namespace tbb
-
-#undef __TBBMALLOC_ASSERT
-#endif// __TBB_memory_pool_H
diff --git a/lib/3rdParty/tbb/include/tbb/mutex.h b/lib/3rdParty/tbb/include/tbb/mutex.h
deleted file mode 100644
index e40b4cd0..00000000
--- a/lib/3rdParty/tbb/include/tbb/mutex.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_mutex_H
-#define __TBB_mutex_H
-
-#if _WIN32||_WIN64
-#include "machine/windows_api.h"
-#else
-#include <pthread.h>
-#endif /* _WIN32||_WIN64 */
-
-#include <new>
-#include "aligned_space.h"
-#include "tbb_stddef.h"
-#include "tbb_profiling.h"
-
-namespace tbb {
-
-//! Wrapper around the platform's native lock.
-/** @ingroup synchronization */
-class mutex : internal::mutex_copy_deprecated_and_disabled {
-public:
-    //! Construct unacquired mutex.
-    mutex() {
-#if TBB_USE_ASSERT || TBB_USE_THREADING_TOOLS
-    internal_construct();
-#else
-  #if _WIN32||_WIN64
-        InitializeCriticalSectionEx(&impl, 4000, 0);
-  #else
-        int error_code = pthread_mutex_init(&impl,NULL);
-        if( error_code )
-            tbb::internal::handle_perror(error_code,"mutex: pthread_mutex_init failed");
-  #endif /* _WIN32||_WIN64*/
-#endif /* TBB_USE_ASSERT */
-    };
-
-    ~mutex() {
-#if TBB_USE_ASSERT
-        internal_destroy();
-#else
-  #if _WIN32||_WIN64
-        DeleteCriticalSection(&impl);
-  #else
-        pthread_mutex_destroy(&impl);
-
-  #endif /* _WIN32||_WIN64 */
-#endif /* TBB_USE_ASSERT */
-    };
-
-    class scoped_lock;
-    friend class scoped_lock;
-
-    //! The scoped locking pattern
-    /** It helps to avoid the common problem of forgetting to release lock.
-        It also nicely provides the "node" for queuing locks. */
-    class scoped_lock : internal::no_copy {
-    public:
-        //! Construct lock that has not acquired a mutex.
-        scoped_lock() : my_mutex(NULL) {};
-
-        //! Acquire lock on given mutex.
-        scoped_lock( mutex& mutex ) {
-            acquire( mutex );
-        }
-
-        //! Release lock (if lock is held).
-        ~scoped_lock() {
-            if( my_mutex )
-                release();
-        }
-
-        //! Acquire lock on given mutex.
-        void acquire( mutex& mutex ) {
-#if TBB_USE_ASSERT
-            internal_acquire(mutex);
-#else
-            mutex.lock();
-            my_mutex = &mutex;
-#endif /* TBB_USE_ASSERT */
-        }
-
-        //! Try acquire lock on given mutex.
-        bool try_acquire( mutex& mutex ) {
-#if TBB_USE_ASSERT
-            return internal_try_acquire (mutex);
-#else
-            bool result = mutex.try_lock();
-            if( result )
-                my_mutex = &mutex;
-            return result;
-#endif /* TBB_USE_ASSERT */
-        }
-
-        //! Release lock
-        void release() {
-#if TBB_USE_ASSERT
-            internal_release ();
-#else
-            my_mutex->unlock();
-            my_mutex = NULL;
-#endif /* TBB_USE_ASSERT */
-        }
-
-    private:
-        //! The pointer to the current mutex to work
-        mutex* my_mutex;
-
-        //! All checks from acquire using mutex.state were moved here
-        void __TBB_EXPORTED_METHOD internal_acquire( mutex& m );
-
-        //! All checks from try_acquire using mutex.state were moved here
-        bool __TBB_EXPORTED_METHOD internal_try_acquire( mutex& m );
-
-        //! All checks from release using mutex.state were moved here
-        void __TBB_EXPORTED_METHOD internal_release();
-
-        friend class mutex;
-    };
-
-    // Mutex traits
-    static const bool is_rw_mutex = false;
-    static const bool is_recursive_mutex = false;
-    static const bool is_fair_mutex = false;
-
-    // ISO C++0x compatibility methods
-
-    //! Acquire lock
-    void lock() {
-#if TBB_USE_ASSERT
-        aligned_space<scoped_lock> tmp;
-        new(tmp.begin()) scoped_lock(*this);
-#else
-  #if _WIN32||_WIN64
-        EnterCriticalSection(&impl);
-  #else
-        int error_code = pthread_mutex_lock(&impl);
-        if( error_code )
-            tbb::internal::handle_perror(error_code,"mutex: pthread_mutex_lock failed");
-  #endif /* _WIN32||_WIN64 */
-#endif /* TBB_USE_ASSERT */
-    }
-
-    //! Try acquiring lock (non-blocking)
-    /** Return true if lock acquired; false otherwise. */
-    bool try_lock() {
-#if TBB_USE_ASSERT
-        aligned_space<scoped_lock> tmp;
-        scoped_lock& s = *tmp.begin();
-        s.my_mutex = NULL;
-        return s.internal_try_acquire(*this);
-#else
-  #if _WIN32||_WIN64
-        return TryEnterCriticalSection(&impl)!=0;
-  #else
-        return pthread_mutex_trylock(&impl)==0;
-  #endif /* _WIN32||_WIN64 */
-#endif /* TBB_USE_ASSERT */
-    }
-
-    //! Release lock
-    void unlock() {
-#if TBB_USE_ASSERT
-        aligned_space<scoped_lock> tmp;
-        scoped_lock& s = *tmp.begin();
-        s.my_mutex = this;
-        s.internal_release();
-#else
-  #if _WIN32||_WIN64
-        LeaveCriticalSection(&impl);
-  #else
-        pthread_mutex_unlock(&impl);
-  #endif /* _WIN32||_WIN64 */
-#endif /* TBB_USE_ASSERT */
-    }
-
-    //! Return native_handle
-  #if _WIN32||_WIN64
-    typedef LPCRITICAL_SECTION native_handle_type;
-  #else
-    typedef pthread_mutex_t* native_handle_type;
-  #endif
-    native_handle_type native_handle() { return (native_handle_type) &impl; }
-
-    enum state_t {
-        INITIALIZED=0x1234,
-        DESTROYED=0x789A,
-        HELD=0x56CD
-    };
-private:
-#if _WIN32||_WIN64
-    CRITICAL_SECTION impl;
-    enum state_t state;
-#else
-    pthread_mutex_t impl;
-#endif /* _WIN32||_WIN64 */
-
-    //! All checks from mutex constructor using mutex.state were moved here
-    void __TBB_EXPORTED_METHOD internal_construct();
-
-    //! All checks from mutex destructor using mutex.state were moved here
-    void __TBB_EXPORTED_METHOD internal_destroy();
-
-#if _WIN32||_WIN64
-public:
-    //!  Set the internal state
-    void set_state( state_t to ) { state = to; }
-#endif
-};
-
-__TBB_DEFINE_PROFILING_SET_NAME(mutex)
-
-} // namespace tbb
-
-#endif /* __TBB_mutex_H */
diff --git a/lib/3rdParty/tbb/include/tbb/null_mutex.h b/lib/3rdParty/tbb/include/tbb/null_mutex.h
deleted file mode 100644
index 85c660e3..00000000
--- a/lib/3rdParty/tbb/include/tbb/null_mutex.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_null_mutex_H
-#define __TBB_null_mutex_H
-
-#include "tbb_stddef.h"
-
-namespace tbb {
-
-//! A mutex which does nothing
-/** A null_mutex does no operation and simulates success.
-    @ingroup synchronization */
-class null_mutex : internal::mutex_copy_deprecated_and_disabled {
-public:
-    //! Represents acquisition of a mutex.
-    class scoped_lock : internal::no_copy {
-    public:
-        scoped_lock() {}
-        scoped_lock( null_mutex& ) {}
-        ~scoped_lock() {}
-        void acquire( null_mutex& ) {}
-        bool try_acquire( null_mutex& ) { return true; }
-        void release() {}
-    };
-
-    null_mutex() {}
-
-    // Mutex traits
-    static const bool is_rw_mutex = false;
-    static const bool is_recursive_mutex = true;
-    static const bool is_fair_mutex = true;
-};
-
-}
-
-#endif /* __TBB_null_mutex_H */
diff --git a/lib/3rdParty/tbb/include/tbb/null_rw_mutex.h b/lib/3rdParty/tbb/include/tbb/null_rw_mutex.h
deleted file mode 100644
index fa0c8035..00000000
--- a/lib/3rdParty/tbb/include/tbb/null_rw_mutex.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_null_rw_mutex_H
-#define __TBB_null_rw_mutex_H
-
-#include "tbb_stddef.h"
-
-namespace tbb {
-
-//! A rw mutex which does nothing
-/** A null_rw_mutex is a rw mutex that does nothing and simulates successful operation.
-    @ingroup synchronization */
-class null_rw_mutex : internal::mutex_copy_deprecated_and_disabled {
-public:
-    //! Represents acquisition of a mutex.
-    class scoped_lock : internal::no_copy {
-    public:
-        scoped_lock() {}
-        scoped_lock( null_rw_mutex& , bool = true ) {}
-        ~scoped_lock() {}
-        void acquire( null_rw_mutex& , bool = true ) {}
-        bool upgrade_to_writer() { return true; }
-        bool downgrade_to_reader() { return true; }
-        bool try_acquire( null_rw_mutex& , bool = true ) { return true; }
-        void release() {}
-    };
-
-    null_rw_mutex() {}
-
-    // Mutex traits
-    static const bool is_rw_mutex = true;
-    static const bool is_recursive_mutex = true;
-    static const bool is_fair_mutex = true;
-};
-
-}
-
-#endif /* __TBB_null_rw_mutex_H */
diff --git a/lib/3rdParty/tbb/include/tbb/parallel_do.h b/lib/3rdParty/tbb/include/tbb/parallel_do.h
deleted file mode 100644
index 15275682..00000000
--- a/lib/3rdParty/tbb/include/tbb/parallel_do.h
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_parallel_do_H
-#define __TBB_parallel_do_H
-
-#include "internal/_range_iterator.h"
-#include "internal/_template_helpers.h"
-#include "task.h"
-#include "aligned_space.h"
-#include <iterator>
-
-namespace tbb {
-namespace interface9 {
-//! @cond INTERNAL
-namespace internal {
-    template<typename Body, typename Item> class parallel_do_feeder_impl;
-} // namespace internal
-//! @endcond
-
-//! Class the user supplied algorithm body uses to add new tasks
-/** \param Item Work item type **/
-    template<typename Item>
-    class parallel_do_feeder: ::tbb::internal::no_copy
-    {
-        parallel_do_feeder() {}
-        virtual ~parallel_do_feeder () {}
-        virtual void internal_add_copy( const Item& item ) = 0;
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-        virtual void internal_add_move( Item&& item ) = 0;
-#endif
-        template<typename Body_, typename Item_> friend class internal::parallel_do_feeder_impl;
-    public:
-        //! Add a work item to a running parallel_do.
-        void add( const Item& item ) {internal_add_copy(item);}
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-        void add( Item&& item ) {internal_add_move(std::move(item));}
-#endif
-    };
-
-//! @cond INTERNAL
-namespace internal {
-    template<typename Body> class do_group_task;
-
-    //! For internal use only.
-    /** Selects one of the two possible forms of function call member operator.
-        @ingroup algorithms **/
-    template<class Body, typename Item>
-    class parallel_do_operator_selector
-    {
-        typedef parallel_do_feeder<Item> Feeder;
-        template<typename A1, typename A2, typename CvItem >
-        static void internal_call( const Body& obj, __TBB_FORWARDING_REF(A1) arg1, A2&, void (Body::*)(CvItem) const ) {
-            obj(tbb::internal::forward<A1>(arg1));
-        }
-        template<typename A1, typename A2, typename CvItem >
-        static void internal_call( const Body& obj, __TBB_FORWARDING_REF(A1) arg1, A2& arg2, void (Body::*)(CvItem, parallel_do_feeder<Item>&) const ) {
-            obj(tbb::internal::forward<A1>(arg1), arg2);
-        }
-        template<typename A1, typename A2, typename CvItem >
-        static void internal_call( const Body& obj, __TBB_FORWARDING_REF(A1) arg1, A2&, void (Body::*)(CvItem&) const ) {
-            obj(arg1);
-        }
-        template<typename A1, typename A2, typename CvItem >
-        static void internal_call( const Body& obj, __TBB_FORWARDING_REF(A1) arg1, A2& arg2, void (Body::*)(CvItem&, parallel_do_feeder<Item>&) const ) {
-            obj(arg1, arg2);
-        }
-    public:
-        template<typename A1, typename A2>
-        static void call( const Body& obj, __TBB_FORWARDING_REF(A1) arg1, A2& arg2 )
-        {
-            internal_call( obj, tbb::internal::forward<A1>(arg1), arg2, &Body::operator() );
-        }
-    };
-
-    //! For internal use only.
-    /** Executes one iteration of a do.
-        @ingroup algorithms */
-    template<typename Body, typename Item>
-    class do_iteration_task: public task
-    {
-        typedef parallel_do_feeder_impl<Body, Item> feeder_type;
-
-        Item my_value;
-        feeder_type& my_feeder;
-
-        do_iteration_task( const Item& value, feeder_type& feeder ) :
-            my_value(value), my_feeder(feeder)
-        {}
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-        do_iteration_task( Item&& value, feeder_type& feeder ) :
-            my_value(std::move(value)), my_feeder(feeder)
-        {}
-#endif
-
-        task* execute() __TBB_override
-        {
-            parallel_do_operator_selector<Body, Item>::call(*my_feeder.my_body, tbb::internal::move(my_value), my_feeder);
-            return NULL;
-        }
-
-        template<typename Body_, typename Item_> friend class parallel_do_feeder_impl;
-    }; // class do_iteration_task
-
-    template<typename Iterator, typename Body, typename Item>
-    class do_iteration_task_iter: public task
-    {
-        typedef parallel_do_feeder_impl<Body, Item> feeder_type;
-
-        Iterator my_iter;
-        feeder_type& my_feeder;
-
-        do_iteration_task_iter( const Iterator& iter, feeder_type& feeder ) :
-            my_iter(iter), my_feeder(feeder)
-        {}
-
-        task* execute() __TBB_override
-        {
-            parallel_do_operator_selector<Body, Item>::call(*my_feeder.my_body, *my_iter, my_feeder);
-            return NULL;
-        }
-
-        template<typename Iterator_, typename Body_, typename Item_> friend class do_group_task_forward;
-        template<typename Body_, typename Item_> friend class do_group_task_input;
-        template<typename Iterator_, typename Body_, typename Item_> friend class do_task_iter;
-    }; // class do_iteration_task_iter
-
-    //! For internal use only.
-    /** Implements new task adding procedure.
-        @ingroup algorithms **/
-    template<class Body, typename Item>
-    class parallel_do_feeder_impl : public parallel_do_feeder<Item>
-    {
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-        //Avoiding use of copy constructor in a virtual method if the type does not support it
-        void internal_add_copy_impl(std::true_type, const Item& item) {
-            typedef do_iteration_task<Body, Item> iteration_type;
-            iteration_type& t = *new (task::allocate_additional_child_of(*my_barrier)) iteration_type(item, *this);
-            task::spawn(t);
-        }
-        void internal_add_copy_impl(std::false_type, const Item&) {
-            __TBB_ASSERT(false, "Overloading for r-value reference doesn't work or it's not movable and not copyable object");
-        }
-        void internal_add_copy( const Item& item ) __TBB_override
-        {
-#if __TBB_CPP11_IS_COPY_CONSTRUCTIBLE_PRESENT
-            internal_add_copy_impl(typename std::is_copy_constructible<Item>::type(), item);
-#else
-            internal_add_copy_impl(std::true_type(), item);
-#endif
-        }
-        void internal_add_move( Item&& item ) __TBB_override
-        {
-            typedef do_iteration_task<Body, Item> iteration_type;
-            iteration_type& t = *new (task::allocate_additional_child_of(*my_barrier)) iteration_type(std::move(item), *this);
-            task::spawn(t);
-        }
-#else /* ! __TBB_CPP11_RVALUE_REF_PRESENT */
-        void internal_add_copy(const Item& item) __TBB_override {
-            typedef do_iteration_task<Body, Item> iteration_type;
-            iteration_type& t = *new (task::allocate_additional_child_of(*my_barrier)) iteration_type(item, *this);
-            task::spawn(t);
-        }
-#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
-    public:
-        const Body* my_body;
-        empty_task* my_barrier;
-
-        parallel_do_feeder_impl()
-        {
-            my_barrier = new( task::allocate_root() ) empty_task();
-            __TBB_ASSERT(my_barrier, "root task allocation failed");
-        }
-
-#if __TBB_TASK_GROUP_CONTEXT
-        parallel_do_feeder_impl(tbb::task_group_context &context)
-        {
-            my_barrier = new( task::allocate_root(context) ) empty_task();
-            __TBB_ASSERT(my_barrier, "root task allocation failed");
-        }
-#endif
-
-        ~parallel_do_feeder_impl()
-        {
-            my_barrier->destroy(*my_barrier);
-        }
-    }; // class parallel_do_feeder_impl
-
-
-    //! For internal use only
-    /** Unpacks a block of iterations.
-        @ingroup algorithms */
-
-    template<typename Iterator, typename Body, typename Item>
-    class do_group_task_forward: public task
-    {
-        static const size_t max_arg_size = 4;
-
-        typedef parallel_do_feeder_impl<Body, Item> feeder_type;
-
-        feeder_type& my_feeder;
-        Iterator my_first;
-        size_t my_size;
-
-        do_group_task_forward( Iterator first, size_t size, feeder_type& feeder )
-            : my_feeder(feeder), my_first(first), my_size(size)
-        {}
-
-        task* execute() __TBB_override
-        {
-            typedef do_iteration_task_iter<Iterator, Body, Item> iteration_type;
-            __TBB_ASSERT( my_size>0, NULL );
-            task_list list;
-            task* t;
-            size_t k=0;
-            for(;;) {
-                t = new( allocate_child() ) iteration_type( my_first, my_feeder );
-                ++my_first;
-                if( ++k==my_size ) break;
-                list.push_back(*t);
-            }
-            set_ref_count(int(k+1));
-            spawn(list);
-            spawn_and_wait_for_all(*t);
-            return NULL;
-        }
-
-        template<typename Iterator_, typename Body_, typename _Item> friend class do_task_iter;
-    }; // class do_group_task_forward
-
-    template<typename Body, typename Item>
-    class do_group_task_input: public task
-    {
-        static const size_t max_arg_size = 4;
-
-        typedef parallel_do_feeder_impl<Body, Item> feeder_type;
-
-        feeder_type& my_feeder;
-        size_t my_size;
-        aligned_space<Item, max_arg_size> my_arg;
-
-        do_group_task_input( feeder_type& feeder )
-            : my_feeder(feeder), my_size(0)
-        {}
-
-        task* execute() __TBB_override
-        {
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-            typedef std::move_iterator<Item*> Item_iterator;
-#else
-            typedef Item* Item_iterator;
-#endif
-            typedef do_iteration_task_iter<Item_iterator, Body, Item> iteration_type;
-            __TBB_ASSERT( my_size>0, NULL );
-            task_list list;
-            task* t;
-            size_t k=0;
-            for(;;) {
-                t = new( allocate_child() ) iteration_type( Item_iterator(my_arg.begin() + k), my_feeder );
-                if( ++k==my_size ) break;
-                list.push_back(*t);
-            }
-            set_ref_count(int(k+1));
-            spawn(list);
-            spawn_and_wait_for_all(*t);
-            return NULL;
-        }
-
-        ~do_group_task_input(){
-            for( size_t k=0; k<my_size; ++k)
-                (my_arg.begin() + k)->~Item();
-        }
-
-        template<typename Iterator_, typename Body_, typename Item_> friend class do_task_iter;
-    }; // class do_group_task_input
-
-    //! For internal use only.
-    /** Gets block of iterations and packages them into a do_group_task.
-        @ingroup algorithms */
-    template<typename Iterator, typename Body, typename Item>
-    class do_task_iter: public task
-    {
-        typedef parallel_do_feeder_impl<Body, Item> feeder_type;
-
-    public:
-        do_task_iter( Iterator first, Iterator last , feeder_type& feeder ) :
-            my_first(first), my_last(last), my_feeder(feeder)
-        {}
-
-    private:
-        Iterator my_first;
-        Iterator my_last;
-        feeder_type& my_feeder;
-
-        /* Do not merge run(xxx) and run_xxx() methods. They are separated in order
-            to make sure that compilers will eliminate unused argument of type xxx
-            (that is will not put it on stack). The sole purpose of this argument
-            is overload resolution.
-
-            An alternative could be using template functions, but explicit specialization
-            of member function templates is not supported for non specialized class
-            templates. Besides template functions would always fall back to the least
-            efficient variant (the one for input iterators) in case of iterators having
-            custom tags derived from basic ones. */
-        task* execute() __TBB_override
-        {
-            typedef typename std::iterator_traits<Iterator>::iterator_category iterator_tag;
-            return run( (iterator_tag*)NULL );
-        }
-
-        /** This is the most restricted variant that operates on input iterators or
-            iterators with unknown tags (tags not derived from the standard ones). **/
-        inline task* run( void* ) { return run_for_input_iterator(); }
-
-        task* run_for_input_iterator() {
-            typedef do_group_task_input<Body, Item> block_type;
-
-            block_type& t = *new( allocate_additional_child_of(*my_feeder.my_barrier) ) block_type(my_feeder);
-            size_t k=0;
-            while( !(my_first == my_last) ) {
-                // Move semantics are automatically used when supported by the iterator
-                new (t.my_arg.begin() + k) Item(*my_first);
-                ++my_first;
-                if( ++k==block_type::max_arg_size ) {
-                    if ( !(my_first == my_last) )
-                        recycle_to_reexecute();
-                    break;
-                }
-            }
-            if( k==0 ) {
-                destroy(t);
-                return NULL;
-            } else {
-                t.my_size = k;
-                return &t;
-            }
-        }
-
-        inline task* run( std::forward_iterator_tag* ) { return run_for_forward_iterator(); }
-
-        task* run_for_forward_iterator() {
-            typedef do_group_task_forward<Iterator, Body, Item> block_type;
-
-            Iterator first = my_first;
-            size_t k=0;
-            while( !(my_first==my_last) ) {
-                ++my_first;
-                if( ++k==block_type::max_arg_size ) {
-                    if ( !(my_first==my_last) )
-                        recycle_to_reexecute();
-                    break;
-                }
-            }
-            return k==0 ? NULL : new( allocate_additional_child_of(*my_feeder.my_barrier) ) block_type(first, k, my_feeder);
-        }
-
-        inline task* run( std::random_access_iterator_tag* ) { return run_for_random_access_iterator(); }
-
-        task* run_for_random_access_iterator() {
-            typedef do_group_task_forward<Iterator, Body, Item> block_type;
-            typedef do_iteration_task_iter<Iterator, Body, Item> iteration_type;
-
-            size_t k = static_cast<size_t>(my_last-my_first);
-            if( k > block_type::max_arg_size ) {
-                Iterator middle = my_first + k/2;
-
-                empty_task& c = *new( allocate_continuation() ) empty_task;
-                do_task_iter& b = *new( c.allocate_child() ) do_task_iter(middle, my_last, my_feeder);
-                recycle_as_child_of(c);
-
-                my_last = middle;
-                c.set_ref_count(2);
-                c.spawn(b);
-                return this;
-            }else if( k != 0 ) {
-                task_list list;
-                task* t;
-                size_t k1=0;
-                for(;;) {
-                    t = new( allocate_child() ) iteration_type(my_first, my_feeder);
-                    ++my_first;
-                    if( ++k1==k ) break;
-                    list.push_back(*t);
-                }
-                set_ref_count(int(k+1));
-                spawn(list);
-                spawn_and_wait_for_all(*t);
-            }
-            return NULL;
-        }
-    }; // class do_task_iter
-
-    //! For internal use only.
-    /** Implements parallel iteration over a range.
-        @ingroup algorithms */
-    template<typename Iterator, typename Body, typename Item>
-    void run_parallel_do( Iterator first, Iterator last, const Body& body
-#if __TBB_TASK_GROUP_CONTEXT
-        , task_group_context& context
-#endif
-        )
-    {
-        typedef do_task_iter<Iterator, Body, Item> root_iteration_task;
-#if __TBB_TASK_GROUP_CONTEXT
-        parallel_do_feeder_impl<Body, Item> feeder(context);
-#else
-        parallel_do_feeder_impl<Body, Item> feeder;
-#endif
-        feeder.my_body = &body;
-
-        root_iteration_task &t = *new( feeder.my_barrier->allocate_child() ) root_iteration_task(first, last, feeder);
-
-        feeder.my_barrier->set_ref_count(2);
-        feeder.my_barrier->spawn_and_wait_for_all(t);
-    }
-
-    //! For internal use only.
-    /** Detects types of Body's operator function arguments.
-        @ingroup algorithms **/
-    template<typename Iterator, typename Body, typename Item>
-    void select_parallel_do( Iterator first, Iterator last, const Body& body, void (Body::*)(Item) const
-#if __TBB_TASK_GROUP_CONTEXT
-        , task_group_context& context
-#endif
-        )
-    {
-        run_parallel_do<Iterator, Body, typename ::tbb::internal::strip<Item>::type>( first, last, body
-#if __TBB_TASK_GROUP_CONTEXT
-            , context
-#endif
-            );
-    }
-
-    //! For internal use only.
-    /** Detects types of Body's operator function arguments.
-        @ingroup algorithms **/
-    template<typename Iterator, typename Body, typename Item, typename _Item>
-    void select_parallel_do( Iterator first, Iterator last, const Body& body, void (Body::*)(Item, parallel_do_feeder<_Item>&) const
-#if __TBB_TASK_GROUP_CONTEXT
-        , task_group_context& context
-#endif
-        )
-    {
-        run_parallel_do<Iterator, Body, typename ::tbb::internal::strip<Item>::type>( first, last, body
-#if __TBB_TASK_GROUP_CONTEXT
-            , context
-#endif
-            );
-    }
-
-} // namespace internal
-} // namespace interface9
-//! @endcond
-
-/** \page parallel_do_body_req Requirements on parallel_do body
-    Class \c Body implementing the concept of parallel_do body must define:
-    - \code
-        B::operator()(
-                cv_item_type item,
-                parallel_do_feeder<item_type>& feeder
-        ) const
-
-        OR
-
-        B::operator()( cv_item_type& item ) const
-      \endcode                                               Process item.
-                                                             May be invoked concurrently  for the same \c this but different \c item.
-
-    - \code item_type( const item_type& ) \endcode
-                                                             Copy a work item.
-    - \code ~item_type() \endcode                            Destroy a work item
-**/
-
-/** \name parallel_do
-    See also requirements on \ref parallel_do_body_req "parallel_do Body". **/
-//@{
-//! Parallel iteration over a range, with optional addition of more work.
-/** @ingroup algorithms */
-template<typename Iterator, typename Body>
-void parallel_do( Iterator first, Iterator last, const Body& body )
-{
-    if ( first == last )
-        return;
-#if __TBB_TASK_GROUP_CONTEXT
-    task_group_context context;
-#endif
-    interface9::internal::select_parallel_do( first, last, body, &Body::operator()
-#if __TBB_TASK_GROUP_CONTEXT
-        , context
-#endif
-        );
-}
-
-template<typename Range, typename Body>
-void parallel_do(Range& rng, const Body& body) {
-    parallel_do(tbb::internal::first(rng), tbb::internal::last(rng), body);
-}
-
-template<typename Range, typename Body>
-void parallel_do(const Range& rng, const Body& body) {
-    parallel_do(tbb::internal::first(rng), tbb::internal::last(rng), body);
-}
-
-#if __TBB_TASK_GROUP_CONTEXT
-//! Parallel iteration over a range, with optional addition of more work and user-supplied context
-/** @ingroup algorithms */
-template<typename Iterator, typename Body>
-void parallel_do( Iterator first, Iterator last, const Body& body, task_group_context& context  )
-{
-    if ( first == last )
-        return;
-    interface9::internal::select_parallel_do( first, last, body, &Body::operator(), context );
-}
-
-template<typename Range, typename Body>
-void parallel_do(Range& rng, const Body& body, task_group_context& context) {
-    parallel_do(tbb::internal::first(rng), tbb::internal::last(rng), body, context);
-}
-
-template<typename Range, typename Body>
-void parallel_do(const Range& rng, const Body& body, task_group_context& context) {
-    parallel_do(tbb::internal::first(rng), tbb::internal::last(rng), body, context);
-}
-
-#endif // __TBB_TASK_GROUP_CONTEXT
-
-//@}
-
-using interface9::parallel_do_feeder;
-
-} // namespace
-
-#endif /* __TBB_parallel_do_H */
diff --git a/lib/3rdParty/tbb/include/tbb/parallel_for.h b/lib/3rdParty/tbb/include/tbb/parallel_for.h
deleted file mode 100644
index 2cab6586..00000000
--- a/lib/3rdParty/tbb/include/tbb/parallel_for.h
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_parallel_for_H
-#define __TBB_parallel_for_H
-
-#include <new>
-#include "task.h"
-#include "partitioner.h"
-#include "blocked_range.h"
-#include "tbb_exception.h"
-
-namespace tbb {
-
-namespace interface9 {
-//! @cond INTERNAL
-namespace internal {
-
-    //! allocate right task with new parent
-    void* allocate_sibling(task* start_for_task, size_t bytes);
-
-    //! Task type used in parallel_for
-    /** @ingroup algorithms */
-    template<typename Range, typename Body, typename Partitioner>
-    class start_for: public task {
-        Range my_range;
-        const Body my_body;
-        typename Partitioner::task_partition_type my_partition;
-        task* execute() __TBB_override;
-
-        //! Update affinity info, if any.
-        void note_affinity( affinity_id id ) __TBB_override {
-            my_partition.note_affinity( id );
-        }
-
-    public:
-        //! Constructor for root task.
-        start_for( const Range& range, const Body& body, Partitioner& partitioner ) :
-            my_range(range),
-            my_body(body),
-            my_partition(partitioner)
-        {
-        }
-        //! Splitting constructor used to generate children.
-        /** parent_ becomes left child.  Newly constructed object is right child. */
-        start_for( start_for& parent_, typename Partitioner::split_type& split_obj) :
-            my_range(parent_.my_range, split_obj),
-            my_body(parent_.my_body),
-            my_partition(parent_.my_partition, split_obj)
-        {
-            my_partition.set_affinity(*this);
-        }
-        //! Construct right child from the given range as response to the demand.
-        /** parent_ remains left child.  Newly constructed object is right child. */
-        start_for( start_for& parent_, const Range& r, depth_t d ) :
-            my_range(r),
-            my_body(parent_.my_body),
-            my_partition(parent_.my_partition, split())
-        {
-            my_partition.set_affinity(*this);
-            my_partition.align_depth( d );
-        }
-        static void run(  const Range& range, const Body& body, Partitioner& partitioner ) {
-            if( !range.empty() ) {
-#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
-                start_for& a = *new(task::allocate_root()) start_for(range,body,partitioner);
-#else
-                // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
-                // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
-                task_group_context context;
-                start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
-#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
-                task::spawn_root_and_wait(a);
-            }
-        }
-#if __TBB_TASK_GROUP_CONTEXT
-        static void run(  const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context ) {
-            if( !range.empty() ) {
-                start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
-                task::spawn_root_and_wait(a);
-            }
-        }
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-        //! Run body for range, serves as callback for partitioner
-        void run_body( Range &r ) { my_body( r ); }
-
-        //! spawn right task, serves as callback for partitioner
-        void offer_work(typename Partitioner::split_type& split_obj) {
-            spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, split_obj) );
-        }
-        //! spawn right task, serves as callback for partitioner
-        void offer_work(const Range& r, depth_t d = 0) {
-            spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, r, d) );
-        }
-    };
-
-    //! allocate right task with new parent
-    // TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined
-    inline void* allocate_sibling(task* start_for_task, size_t bytes) {
-        task* parent_ptr = new( start_for_task->allocate_continuation() ) flag_task();
-        start_for_task->set_parent(parent_ptr);
-        parent_ptr->set_ref_count(2);
-        return &parent_ptr->allocate_child().allocate(bytes);
-    }
-
-    //! execute task for parallel_for
-    template<typename Range, typename Body, typename Partitioner>
-    task* start_for<Range,Body,Partitioner>::execute() {
-        my_partition.check_being_stolen( *this );
-        my_partition.execute(*this, my_range);
-        return NULL;
-    }
-} // namespace internal
-//! @endcond
-} // namespace interfaceX
-
-//! @cond INTERNAL
-namespace internal {
-    using interface9::internal::start_for;
-
-    //! Calls the function with values from range [begin, end) with a step provided
-    template<typename Function, typename Index>
-    class parallel_for_body : internal::no_assign {
-        const Function &my_func;
-        const Index my_begin;
-        const Index my_step;
-    public:
-        parallel_for_body( const Function& _func, Index& _begin, Index& _step )
-            : my_func(_func), my_begin(_begin), my_step(_step) {}
-
-        void operator()( const tbb::blocked_range<Index>& r ) const {
-            // A set of local variables to help the compiler with vectorization of the following loop.
-            Index b = r.begin();
-            Index e = r.end();
-            Index ms = my_step;
-            Index k = my_begin + b*ms;
-
-#if __INTEL_COMPILER
-#pragma ivdep
-#if __TBB_ASSERT_ON_VECTORIZATION_FAILURE
-#pragma vector always assert
-#endif
-#endif
-            for ( Index i = b; i < e; ++i, k += ms ) {
-                my_func( k );
-            }
-        }
-    };
-} // namespace internal
-//! @endcond
-
-// Requirements on Range concept are documented in blocked_range.h
-
-/** \page parallel_for_body_req Requirements on parallel_for body
-    Class \c Body implementing the concept of parallel_for body must define:
-    - \code Body::Body( const Body& ); \endcode                 Copy constructor
-    - \code Body::~Body(); \endcode                             Destructor
-    - \code void Body::operator()( Range& r ) const; \endcode   Function call operator applying the body to range \c r.
-**/
-
-/** \name parallel_for
-    See also requirements on \ref range_req "Range" and \ref parallel_for_body_req "parallel_for Body". **/
-//@{
-
-//! Parallel iteration over range with default partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body ) {
-    internal::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
-}
-
-//! Parallel iteration over range with simple partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
-    internal::start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner);
-}
-
-//! Parallel iteration over range with auto_partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
-    internal::start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner);
-}
-
-//! Parallel iteration over range with static_partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) {
-    internal::start_for<Range,Body,const static_partitioner>::run(range,body,partitioner);
-}
-
-//! Parallel iteration over range with affinity_partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
-    internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner);
-}
-
-#if __TBB_TASK_GROUP_CONTEXT
-//! Parallel iteration over range with default partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, task_group_context& context ) {
-    internal::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context);
-}
-
-//! Parallel iteration over range with simple partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
-    internal::start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context);
-}
-
-//! Parallel iteration over range with auto_partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
-    internal::start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context);
-}
-
-//! Parallel iteration over range with static_partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) {
-    internal::start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context);
-}
-
-//! Parallel iteration over range with affinity_partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
-    internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context);
-}
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-//@}
-
-namespace strict_ppl {
-
-//@{
-//! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner
-template <typename Index, typename Function, typename Partitioner>
-void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) {
-    if (step <= 0 )
-        internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
-    else if (last > first) {
-        // Above "else" avoids "potential divide by zero" warning on some platforms
-        Index end = (last - first - Index(1)) / step + Index(1);
-        tbb::blocked_range<Index> range(static_cast<Index>(0), end);
-        internal::parallel_for_body<Function, Index> body(f, first, step);
-        tbb::parallel_for(range, body, partitioner);
-    }
-}
-
-//! Parallel iteration over a range of integers with a step provided and default partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
-}
-//! Parallel iteration over a range of integers with a step provided and simple partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) {
-    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner);
-}
-//! Parallel iteration over a range of integers with a step provided and auto partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner);
-}
-//! Parallel iteration over a range of integers with a step provided and static partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) {
-    parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner);
-}
-//! Parallel iteration over a range of integers with a step provided and affinity partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) {
-    parallel_for_impl(first, last, step, f, partitioner);
-}
-
-//! Parallel iteration over a range of integers with a default step value and default partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
-}
-//! Parallel iteration over a range of integers with a default step value and simple partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) {
-    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
-}
-//! Parallel iteration over a range of integers with a default step value and auto partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
-}
-//! Parallel iteration over a range of integers with a default step value and static partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) {
-    parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
-}
-//! Parallel iteration over a range of integers with a default step value and affinity partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) {
-    parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner);
-}
-
-#if __TBB_TASK_GROUP_CONTEXT
-//! Implementation of parallel iteration over stepped range of integers with explicit step, task group context, and partitioner
-template <typename Index, typename Function, typename Partitioner>
-void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, tbb::task_group_context &context) {
-    if (step <= 0 )
-        internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
-    else if (last > first) {
-        // Above "else" avoids "potential divide by zero" warning on some platforms
-        Index end = (last - first - Index(1)) / step + Index(1);
-        tbb::blocked_range<Index> range(static_cast<Index>(0), end);
-        internal::parallel_for_body<Function, Index> body(f, first, step);
-        tbb::parallel_for(range, body, partitioner, context);
-    }
-}
-
-//! Parallel iteration over a range of integers with explicit step, task group context, and default partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, tbb::task_group_context &context) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context);
-}
-//! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner
- template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
-    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context);
-}
-//! Parallel iteration over a range of integers with explicit step, task group context, and auto partitioner
- template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context);
-}
-//! Parallel iteration over a range of integers with explicit step, task group context, and static partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
-    parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context);
-}
-//! Parallel iteration over a range of integers with explicit step, task group context, and affinity partitioner
- template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
-    parallel_for_impl(first, last, step, f, partitioner, context);
-}
-
-
-//! Parallel iteration over a range of integers with a default step value, explicit task group context, and default partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, tbb::task_group_context &context) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context);
-}
-//! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner
- template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
-    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
-}
-//! Parallel iteration over a range of integers with a default step value, explicit task group context, and auto partitioner
- template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
-}
-//! Parallel iteration over a range of integers with a default step value, explicit task group context, and static partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
-    parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
-}
-//! Parallel iteration over a range of integers with a default step value, explicit task group context, and affinity_partitioner
- template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
-    parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context);
-}
-
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-//@}
-
-} // namespace strict_ppl
-
-using strict_ppl::parallel_for;
-
-} // namespace tbb
-
-#if TBB_PREVIEW_SERIAL_SUBSET
-#define __TBB_NORMAL_EXECUTION
-#include "../serial/tbb/parallel_for.h"
-#undef __TBB_NORMAL_EXECUTION
-#endif
-
-#endif /* __TBB_parallel_for_H */
diff --git a/lib/3rdParty/tbb/include/tbb/parallel_for_each.h b/lib/3rdParty/tbb/include/tbb/parallel_for_each.h
deleted file mode 100644
index 6c2ec9f0..00000000
--- a/lib/3rdParty/tbb/include/tbb/parallel_for_each.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_parallel_for_each_H
-#define __TBB_parallel_for_each_H
-
-#include "parallel_do.h"
-#include "parallel_for.h"
-
-namespace tbb {
-
-//! @cond INTERNAL
-namespace internal {
-    // The class calls user function in operator()
-    template <typename Function, typename Iterator>
-    class parallel_for_each_body_do : internal::no_assign {
-        const Function &my_func;
-    public:
-        parallel_for_each_body_do(const Function &_func) : my_func(_func) {}
-
-        void operator()(typename std::iterator_traits<Iterator>::reference value) const {
-            my_func(value);
-        }
-    };
-
-    // The class calls user function in operator()
-    template <typename Function, typename Iterator>
-    class parallel_for_each_body_for : internal::no_assign {
-        const Function &my_func;
-    public:
-        parallel_for_each_body_for(const Function &_func) : my_func(_func) {}
-
-        void operator()(tbb::blocked_range<Iterator> range) const {
-#if __INTEL_COMPILER
-#pragma ivdep
-#endif
-            for(Iterator it = range.begin(), end = range.end(); it != end; ++it) {
-                my_func(*it);
-            }
-        }
-    };
-
-    template<typename Iterator, typename Function, typename Generic>
-    struct parallel_for_each_impl {
-#if __TBB_TASK_GROUP_CONTEXT
-        static void doit(Iterator first, Iterator last, const Function& f, task_group_context &context) {
-            internal::parallel_for_each_body_do<Function, Iterator> body(f);
-            tbb::parallel_do(first, last, body, context);
-        }
-#endif
-        static void doit(Iterator first, Iterator last, const Function& f) {
-            internal::parallel_for_each_body_do<Function, Iterator> body(f);
-            tbb::parallel_do(first, last, body);
-        }
-    };
-    template<typename Iterator, typename Function>
-    struct parallel_for_each_impl<Iterator, Function, std::random_access_iterator_tag> {
-#if __TBB_TASK_GROUP_CONTEXT
-        static void doit(Iterator first, Iterator last, const Function& f, task_group_context &context) {
-            internal::parallel_for_each_body_for<Function, Iterator> body(f);
-            tbb::parallel_for(tbb::blocked_range<Iterator>(first, last), body, context);
-        }
-#endif
-        static void doit(Iterator first, Iterator last, const Function& f) {
-            internal::parallel_for_each_body_for<Function, Iterator> body(f);
-            tbb::parallel_for(tbb::blocked_range<Iterator>(first, last), body);
-        }
-    };
-} // namespace internal
-//! @endcond
-
-/** \name parallel_for_each
-    **/
-//@{
-//! Calls function f for all items from [first, last) interval using user-supplied context
-/** @ingroup algorithms */
-#if __TBB_TASK_GROUP_CONTEXT
-template<typename Iterator, typename Function>
-void parallel_for_each(Iterator first, Iterator last, const Function& f, task_group_context &context) {
-    internal::parallel_for_each_impl<Iterator, Function, typename std::iterator_traits<Iterator>::iterator_category>::doit(first, last, f, context);
-}
-
-//! Calls function f for all items from rng using user-supplied context
-/** @ingroup algorithms */
-template<typename Range, typename Function>
-void parallel_for_each(Range& rng, const Function& f, task_group_context& context) {
-    parallel_for_each(tbb::internal::first(rng), tbb::internal::last(rng), f, context);
-}
-
-//! Calls function f for all items from const rng user-supplied context
-/** @ingroup algorithms */
-template<typename Range, typename Function>
-void parallel_for_each(const Range& rng, const Function& f, task_group_context& context) {
-    parallel_for_each(tbb::internal::first(rng), tbb::internal::last(rng), f, context);
-}
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-//! Uses default context
-template<typename Iterator, typename Function>
-void parallel_for_each(Iterator first, Iterator last, const Function& f) {
-    internal::parallel_for_each_impl<Iterator, Function, typename std::iterator_traits<Iterator>::iterator_category>::doit(first, last, f);
-}
-
-//! Uses default context
-template<typename Range, typename Function>
-void parallel_for_each(Range& rng, const Function& f) {
-    parallel_for_each(tbb::internal::first(rng), tbb::internal::last(rng), f);
-}
-
-//! Uses default context
-template<typename Range, typename Function>
-void parallel_for_each(const Range& rng, const Function& f) {
-    parallel_for_each(tbb::internal::first(rng), tbb::internal::last(rng), f);
-}
-
-//@}
-
-} // namespace
-
-#endif /* __TBB_parallel_for_each_H */
diff --git a/lib/3rdParty/tbb/include/tbb/parallel_invoke.h b/lib/3rdParty/tbb/include/tbb/parallel_invoke.h
deleted file mode 100644
index 0dd7590e..00000000
--- a/lib/3rdParty/tbb/include/tbb/parallel_invoke.h
+++ /dev/null
@@ -1,455 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_parallel_invoke_H
-#define __TBB_parallel_invoke_H
-
-#include "task.h"
-
-#if __TBB_VARIADIC_PARALLEL_INVOKE
-    #include <utility> // std::forward
-#endif
-
-namespace tbb {
-
-#if !__TBB_TASK_GROUP_CONTEXT
-    /** Dummy to avoid cluttering the bulk of the header with enormous amount of ifdefs. **/
-    struct task_group_context {};
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-//! @cond INTERNAL
-namespace internal {
-    // Simple task object, executing user method
-    template<typename function>
-    class function_invoker : public task{
-    public:
-        function_invoker(const function& _function) : my_function(_function) {}
-    private:
-        const function &my_function;
-        task* execute() __TBB_override
-        {
-            my_function();
-            return NULL;
-        }
-    };
-
-    // The class spawns two or three child tasks
-    template <size_t N, typename function1, typename function2, typename function3>
-    class spawner : public task {
-    private:
-        const function1& my_func1;
-        const function2& my_func2;
-        const function3& my_func3;
-        bool is_recycled;
-
-        task* execute () __TBB_override {
-            if(is_recycled){
-                return NULL;
-            }else{
-                __TBB_ASSERT(N==2 || N==3, "Number of arguments passed to spawner is wrong");
-                set_ref_count(N);
-                recycle_as_safe_continuation();
-                internal::function_invoker<function2>* invoker2 = new (allocate_child()) internal::function_invoker<function2>(my_func2);
-                __TBB_ASSERT(invoker2, "Child task allocation failed");
-                spawn(*invoker2);
-                size_t n = N; // To prevent compiler warnings
-                if (n>2) {
-                    internal::function_invoker<function3>* invoker3 = new (allocate_child()) internal::function_invoker<function3>(my_func3);
-                    __TBB_ASSERT(invoker3, "Child task allocation failed");
-                    spawn(*invoker3);
-                }
-                my_func1();
-                is_recycled = true;
-                return NULL;
-            }
-        } // execute
-
-    public:
-        spawner(const function1& _func1, const function2& _func2, const function3& _func3) : my_func1(_func1), my_func2(_func2), my_func3(_func3), is_recycled(false) {}
-    };
-
-    // Creates and spawns child tasks
-    class parallel_invoke_helper : public empty_task {
-    public:
-        // Dummy functor class
-        class parallel_invoke_noop {
-        public:
-            void operator() () const {}
-        };
-        // Creates a helper object with user-defined number of children expected
-        parallel_invoke_helper(int number_of_children)
-        {
-            set_ref_count(number_of_children + 1);
-        }
-
-#if __TBB_VARIADIC_PARALLEL_INVOKE
-        void add_children() {}
-        void add_children(tbb::task_group_context&) {}
-
-        template <typename function>
-        void add_children(function&& _func)
-        {
-            internal::function_invoker<function>* invoker = new (allocate_child()) internal::function_invoker<function>(std::forward<function>(_func));
-            __TBB_ASSERT(invoker, "Child task allocation failed");
-            spawn(*invoker);
-        }
-
-        template<typename function>
-        void add_children(function&& _func, tbb::task_group_context&)
-        {
-            add_children(std::forward<function>(_func));
-        }
-
-        // Adds child(ren) task(s) and spawns them
-        template <typename function1, typename function2, typename... function>
-        void add_children(function1&& _func1, function2&& _func2, function&&... _func)
-        {
-            // The third argument is dummy, it is ignored actually.
-            parallel_invoke_noop noop;
-            typedef internal::spawner<2, function1, function2, parallel_invoke_noop> spawner_type;
-            spawner_type & sub_root = *new(allocate_child()) spawner_type(std::forward<function1>(_func1), std::forward<function2>(_func2), noop);
-            spawn(sub_root);
-            add_children(std::forward<function>(_func)...);
-        }
-#else
-        // Adds child task and spawns it
-        template <typename function>
-        void add_children (const function &_func)
-        {
-            internal::function_invoker<function>* invoker = new (allocate_child()) internal::function_invoker<function>(_func);
-            __TBB_ASSERT(invoker, "Child task allocation failed");
-            spawn(*invoker);
-        }
-
-        // Adds a task with multiple child tasks and spawns it
-        // two arguments
-        template <typename function1, typename function2>
-        void add_children (const function1& _func1, const function2& _func2)
-        {
-            // The third argument is dummy, it is ignored actually.
-            parallel_invoke_noop noop;
-            internal::spawner<2, function1, function2, parallel_invoke_noop>& sub_root = *new(allocate_child())internal::spawner<2, function1, function2, parallel_invoke_noop>(_func1, _func2, noop);
-            spawn(sub_root);
-        }
-        // three arguments
-        template <typename function1, typename function2, typename function3>
-        void add_children (const function1& _func1, const function2& _func2, const function3& _func3)
-        {
-            internal::spawner<3, function1, function2, function3>& sub_root = *new(allocate_child())internal::spawner<3, function1, function2, function3>(_func1, _func2, _func3);
-            spawn(sub_root);
-        }
-#endif // __TBB_VARIADIC_PARALLEL_INVOKE
-
-        // Waits for all child tasks
-        template <typename F0>
-        void run_and_finish(const F0& f0)
-        {
-            internal::function_invoker<F0>* invoker = new (allocate_child()) internal::function_invoker<F0>(f0);
-            __TBB_ASSERT(invoker, "Child task allocation failed");
-            spawn_and_wait_for_all(*invoker);
-        }
-    };
-    // The class destroys root if exception occurred as well as in normal case
-    class parallel_invoke_cleaner: internal::no_copy {
-    public:
-#if __TBB_TASK_GROUP_CONTEXT
-        parallel_invoke_cleaner(int number_of_children, tbb::task_group_context& context)
-            : root(*new(task::allocate_root(context)) internal::parallel_invoke_helper(number_of_children))
-#else
-        parallel_invoke_cleaner(int number_of_children, tbb::task_group_context&)
-            : root(*new(task::allocate_root()) internal::parallel_invoke_helper(number_of_children))
-#endif /* !__TBB_TASK_GROUP_CONTEXT */
-        {}
-
-        ~parallel_invoke_cleaner(){
-            root.destroy(root);
-        }
-        internal::parallel_invoke_helper& root;
-    };
-
-#if __TBB_VARIADIC_PARALLEL_INVOKE
-//  Determine whether the last parameter in a pack is task_group_context
-    template<typename... T> struct impl_selector; // to workaround a GCC bug
-
-    template<typename T1, typename... T> struct impl_selector<T1, T...> {
-        typedef typename impl_selector<T...>::type type;
-    };
-
-    template<typename T> struct impl_selector<T> {
-        typedef false_type type;
-    };
-    template<> struct impl_selector<task_group_context&> {
-        typedef true_type  type;
-    };
-
-    // Select task_group_context parameter from the back of a pack
-    inline task_group_context& get_context( task_group_context& tgc ) { return tgc; }
-
-    template<typename T1, typename... T>
-    task_group_context& get_context( T1&& /*ignored*/, T&&... t )
-    { return get_context( std::forward<T>(t)... ); }
-
-    // task_group_context is known to be at the back of the parameter pack
-    template<typename F0, typename F1, typename... F>
-    void parallel_invoke_impl(true_type, F0&& f0, F1&& f1, F&&... f) {
-        __TBB_STATIC_ASSERT(sizeof...(F)>0, "Variadic parallel_invoke implementation broken?");
-        // # of child tasks: f0, f1, and a task for each two elements of the pack except the last
-        const size_t number_of_children = 2 + sizeof...(F)/2;
-        parallel_invoke_cleaner cleaner(number_of_children, get_context(std::forward<F>(f)...));
-        parallel_invoke_helper& root = cleaner.root;
-
-        root.add_children(std::forward<F>(f)...);
-        root.add_children(std::forward<F1>(f1));
-        root.run_and_finish(std::forward<F0>(f0));
-    }
-
-    // task_group_context is not in the pack, needs to be added
-    template<typename F0, typename F1, typename... F>
-    void parallel_invoke_impl(false_type, F0&& f0, F1&& f1, F&&... f) {
-        tbb::task_group_context context;
-        // Add context to the arguments, and redirect to the other overload
-        parallel_invoke_impl(true_type(), std::forward<F0>(f0), std::forward<F1>(f1), std::forward<F>(f)..., context);
-    }
-#endif
-} // namespace internal
-//! @endcond
-
-/** \name parallel_invoke
-    **/
-//@{
-//! Executes a list of tasks in parallel and waits for all tasks to complete.
-/** @ingroup algorithms */
-
-#if __TBB_VARIADIC_PARALLEL_INVOKE
-
-// parallel_invoke for two or more arguments via variadic templates
-// presence of task_group_context is defined automatically
-template<typename F0, typename F1, typename... F>
-void parallel_invoke(F0&& f0, F1&& f1, F&&... f) {
-    typedef typename internal::impl_selector<internal::false_type, F...>::type selector_type;
-    internal::parallel_invoke_impl(selector_type(), std::forward<F0>(f0), std::forward<F1>(f1), std::forward<F>(f)...);
-}
-
-#else
-
-// parallel_invoke with user-defined context
-// two arguments
-template<typename F0, typename F1 >
-void parallel_invoke(const F0& f0, const F1& f1, tbb::task_group_context& context) {
-    internal::parallel_invoke_cleaner cleaner(2, context);
-    internal::parallel_invoke_helper& root = cleaner.root;
-
-    root.add_children(f1);
-
-    root.run_and_finish(f0);
-}
-
-// three arguments
-template<typename F0, typename F1, typename F2 >
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, tbb::task_group_context& context) {
-    internal::parallel_invoke_cleaner cleaner(3, context);
-    internal::parallel_invoke_helper& root = cleaner.root;
-
-    root.add_children(f2);
-    root.add_children(f1);
-
-    root.run_and_finish(f0);
-}
-
-// four arguments
-template<typename F0, typename F1, typename F2, typename F3>
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3,
-                     tbb::task_group_context& context)
-{
-    internal::parallel_invoke_cleaner cleaner(4, context);
-    internal::parallel_invoke_helper& root = cleaner.root;
-
-    root.add_children(f3);
-    root.add_children(f2);
-    root.add_children(f1);
-
-    root.run_and_finish(f0);
-}
-
-// five arguments
-template<typename F0, typename F1, typename F2, typename F3, typename F4 >
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
-                     tbb::task_group_context& context)
-{
-    internal::parallel_invoke_cleaner cleaner(3, context);
-    internal::parallel_invoke_helper& root = cleaner.root;
-
-    root.add_children(f4, f3);
-    root.add_children(f2, f1);
-
-    root.run_and_finish(f0);
-}
-
-// six arguments
-template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5>
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4, const F5& f5,
-                     tbb::task_group_context& context)
-{
-    internal::parallel_invoke_cleaner cleaner(3, context);
-    internal::parallel_invoke_helper& root = cleaner.root;
-
-    root.add_children(f5, f4, f3);
-    root.add_children(f2, f1);
-
-    root.run_and_finish(f0);
-}
-
-// seven arguments
-template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5, typename F6>
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
-                     const F5& f5, const F6& f6,
-                     tbb::task_group_context& context)
-{
-    internal::parallel_invoke_cleaner cleaner(3, context);
-    internal::parallel_invoke_helper& root = cleaner.root;
-
-    root.add_children(f6, f5, f4);
-    root.add_children(f3, f2, f1);
-
-    root.run_and_finish(f0);
-}
-
-// eight arguments
-template<typename F0, typename F1, typename F2, typename F3, typename F4,
-         typename F5, typename F6, typename F7>
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
-                     const F5& f5, const F6& f6, const F7& f7,
-                     tbb::task_group_context& context)
-{
-    internal::parallel_invoke_cleaner cleaner(4, context);
-    internal::parallel_invoke_helper& root = cleaner.root;
-
-    root.add_children(f7, f6, f5);
-    root.add_children(f4, f3);
-    root.add_children(f2, f1);
-
-    root.run_and_finish(f0);
-}
-
-// nine arguments
-template<typename F0, typename F1, typename F2, typename F3, typename F4,
-         typename F5, typename F6, typename F7, typename F8>
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
-                     const F5& f5, const F6& f6, const F7& f7, const F8& f8,
-                     tbb::task_group_context& context)
-{
-    internal::parallel_invoke_cleaner cleaner(4, context);
-    internal::parallel_invoke_helper& root = cleaner.root;
-
-    root.add_children(f8, f7, f6);
-    root.add_children(f5, f4, f3);
-    root.add_children(f2, f1);
-
-    root.run_and_finish(f0);
-}
-
-// ten arguments
-template<typename F0, typename F1, typename F2, typename F3, typename F4,
-         typename F5, typename F6, typename F7, typename F8, typename F9>
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
-                     const F5& f5, const F6& f6, const F7& f7, const F8& f8, const F9& f9,
-                     tbb::task_group_context& context)
-{
-    internal::parallel_invoke_cleaner cleaner(4, context);
-    internal::parallel_invoke_helper& root = cleaner.root;
-
-    root.add_children(f9, f8, f7);
-    root.add_children(f6, f5, f4);
-    root.add_children(f3, f2, f1);
-
-    root.run_and_finish(f0);
-}
-
-// two arguments
-template<typename F0, typename F1>
-void parallel_invoke(const F0& f0, const F1& f1) {
-    task_group_context context;
-    parallel_invoke<F0, F1>(f0, f1, context);
-}
-// three arguments
-template<typename F0, typename F1, typename F2>
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2) {
-    task_group_context context;
-    parallel_invoke<F0, F1, F2>(f0, f1, f2, context);
-}
-// four arguments
-template<typename F0, typename F1, typename F2, typename F3 >
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3) {
-    task_group_context context;
-    parallel_invoke<F0, F1, F2, F3>(f0, f1, f2, f3, context);
-}
-// five arguments
-template<typename F0, typename F1, typename F2, typename F3, typename F4>
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4) {
-    task_group_context context;
-    parallel_invoke<F0, F1, F2, F3, F4>(f0, f1, f2, f3, f4, context);
-}
-// six arguments
-template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5>
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4, const F5& f5) {
-    task_group_context context;
-    parallel_invoke<F0, F1, F2, F3, F4, F5>(f0, f1, f2, f3, f4, f5, context);
-}
-// seven arguments
-template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5, typename F6>
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
-                     const F5& f5, const F6& f6)
-{
-    task_group_context context;
-    parallel_invoke<F0, F1, F2, F3, F4, F5, F6>(f0, f1, f2, f3, f4, f5, f6, context);
-}
-// eight arguments
-template<typename F0, typename F1, typename F2, typename F3, typename F4,
-         typename F5, typename F6, typename F7>
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
-                     const F5& f5, const F6& f6, const F7& f7)
-{
-    task_group_context context;
-    parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7>(f0, f1, f2, f3, f4, f5, f6, f7, context);
-}
-// nine arguments
-template<typename F0, typename F1, typename F2, typename F3, typename F4,
-         typename F5, typename F6, typename F7, typename F8>
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
-                     const F5& f5, const F6& f6, const F7& f7, const F8& f8)
-{
-    task_group_context context;
-    parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7, F8>(f0, f1, f2, f3, f4, f5, f6, f7, f8, context);
-}
-// ten arguments
-template<typename F0, typename F1, typename F2, typename F3, typename F4,
-         typename F5, typename F6, typename F7, typename F8, typename F9>
-void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
-                     const F5& f5, const F6& f6, const F7& f7, const F8& f8, const F9& f9)
-{
-    task_group_context context;
-    parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7, F8, F9>(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, context);
-}
-#endif // __TBB_VARIADIC_PARALLEL_INVOKE
-//@}
-
-} // namespace
-
-#endif /* __TBB_parallel_invoke_H */
diff --git a/lib/3rdParty/tbb/include/tbb/parallel_reduce.h b/lib/3rdParty/tbb/include/tbb/parallel_reduce.h
deleted file mode 100644
index 0596ae03..00000000
--- a/lib/3rdParty/tbb/include/tbb/parallel_reduce.h
+++ /dev/null
@@ -1,569 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_parallel_reduce_H
-#define __TBB_parallel_reduce_H
-
-#include <new>
-#include "task.h"
-#include "aligned_space.h"
-#include "partitioner.h"
-#include "tbb_profiling.h"
-
-namespace tbb {
-
-namespace interface9 {
-//! @cond INTERNAL
-namespace internal {
-
-    using namespace tbb::internal;
-
-    /** Values for reduction_context. */
-    enum {
-        root_task, left_child, right_child
-    };
-
-    /** Represented as a char, not enum, for compactness. */
-    typedef char reduction_context;
-
-    //! Task type used to combine the partial results of parallel_reduce.
-    /** @ingroup algorithms */
-    template<typename Body>
-    class finish_reduce: public flag_task {
-        //! Pointer to body, or NULL if the left child has not yet finished.
-        bool has_right_zombie;
-        const reduction_context my_context;
-        Body* my_body;
-        aligned_space<Body> zombie_space;
-        finish_reduce( reduction_context context_ ) :
-            has_right_zombie(false), // TODO: substitute by flag_task::child_stolen?
-            my_context(context_),
-            my_body(NULL)
-        {
-        }
-        ~finish_reduce() {
-            if( has_right_zombie )
-                zombie_space.begin()->~Body();
-        }
-        task* execute() __TBB_override {
-            if( has_right_zombie ) {
-                // Right child was stolen.
-                Body* s = zombie_space.begin();
-                my_body->join( *s );
-                // Body::join() won't be called if canceled. Defer destruction to destructor
-            }
-            if( my_context==left_child )
-                itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body );
-            return NULL;
-        }
-        template<typename Range,typename Body_, typename Partitioner>
-        friend class start_reduce;
-    };
-
-    //! allocate right task with new parent
-    void allocate_sibling(task* start_reduce_task, task *tasks[], size_t start_bytes, size_t finish_bytes);
-
-    //! Task type used to split the work of parallel_reduce.
-    /** @ingroup algorithms */
-    template<typename Range, typename Body, typename Partitioner>
-    class start_reduce: public task {
-        typedef finish_reduce<Body> finish_type;
-        Body* my_body;
-        Range my_range;
-        typename Partitioner::task_partition_type my_partition;
-        reduction_context my_context;
-        task* execute() __TBB_override;
-        //! Update affinity info, if any
-        void note_affinity( affinity_id id ) __TBB_override {
-            my_partition.note_affinity( id );
-        }
-        template<typename Body_>
-        friend class finish_reduce;
-
-public:
-        //! Constructor used for root task
-        start_reduce( const Range& range, Body* body, Partitioner& partitioner ) :
-            my_body(body),
-            my_range(range),
-            my_partition(partitioner),
-            my_context(root_task)
-        {
-        }
-        //! Splitting constructor used to generate children.
-        /** parent_ becomes left child.  Newly constructed object is right child. */
-        start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj ) :
-            my_body(parent_.my_body),
-            my_range(parent_.my_range, split_obj),
-            my_partition(parent_.my_partition, split_obj),
-            my_context(right_child)
-        {
-            my_partition.set_affinity(*this);
-            parent_.my_context = left_child;
-        }
-        //! Construct right child from the given range as response to the demand.
-        /** parent_ remains left child.  Newly constructed object is right child. */
-        start_reduce( start_reduce& parent_, const Range& r, depth_t d ) :
-            my_body(parent_.my_body),
-            my_range(r),
-            my_partition(parent_.my_partition, split()),
-            my_context(right_child)
-        {
-            my_partition.set_affinity(*this);
-            my_partition.align_depth( d ); // TODO: move into constructor of partitioner
-            parent_.my_context = left_child;
-        }
-        static void run( const Range& range, Body& body, Partitioner& partitioner ) {
-            if( !range.empty() ) {
-#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
-                task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) );
-#else
-                // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
-                // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
-                task_group_context context;
-                task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
-#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
-            }
-        }
-#if __TBB_TASK_GROUP_CONTEXT
-        static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
-            if( !range.empty() )
-                task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
-        }
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-        //! Run body for range
-        void run_body( Range &r ) { (*my_body)( r ); }
-
-        //! spawn right task, serves as callback for partitioner
-        // TODO: remove code duplication from 'offer_work' methods
-        void offer_work(typename Partitioner::split_type& split_obj) {
-            task *tasks[2];
-            allocate_sibling(static_cast<task*>(this), tasks, sizeof(start_reduce), sizeof(finish_type));
-            new((void*)tasks[0]) finish_type(my_context);
-            new((void*)tasks[1]) start_reduce(*this, split_obj);
-            spawn(*tasks[1]);
-        }
-        //! spawn right task, serves as callback for partitioner
-        void offer_work(const Range& r, depth_t d = 0) {
-            task *tasks[2];
-            allocate_sibling(static_cast<task*>(this), tasks, sizeof(start_reduce), sizeof(finish_type));
-            new((void*)tasks[0]) finish_type(my_context);
-            new((void*)tasks[1]) start_reduce(*this, r, d);
-            spawn(*tasks[1]);
-        }
-    };
-
-    //! allocate right task with new parent
-    // TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined
-    inline void allocate_sibling(task* start_reduce_task, task *tasks[], size_t start_bytes, size_t finish_bytes) {
-        tasks[0] = &start_reduce_task->allocate_continuation().allocate(finish_bytes);
-        start_reduce_task->set_parent(tasks[0]);
-        tasks[0]->set_ref_count(2);
-        tasks[1] = &tasks[0]->allocate_child().allocate(start_bytes);
-    }
-
-    template<typename Range, typename Body, typename Partitioner>
-    task* start_reduce<Range,Body,Partitioner>::execute() {
-        my_partition.check_being_stolen( *this );
-        if( my_context==right_child ) {
-            finish_type* parent_ptr = static_cast<finish_type*>(parent());
-            if( !itt_load_word_with_acquire(parent_ptr->my_body) ) { // TODO: replace by is_stolen_task() or by parent_ptr->ref_count() == 2???
-                my_body = new( parent_ptr->zombie_space.begin() ) Body(*my_body,split());
-                parent_ptr->has_right_zombie = true;
-            }
-        } else __TBB_ASSERT(my_context==root_task,NULL);// because left leaf spawns right leafs without recycling
-        my_partition.execute(*this, my_range);
-        if( my_context==left_child ) {
-            finish_type* parent_ptr = static_cast<finish_type*>(parent());
-            __TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),NULL);
-            itt_store_word_with_release(parent_ptr->my_body, my_body );
-        }
-        return NULL;
-    }
-
-    //! Task type used to combine the partial results of parallel_deterministic_reduce.
-    /** @ingroup algorithms */
-    template<typename Body>
-    class finish_deterministic_reduce: public task {
-        Body &my_left_body;
-        Body my_right_body;
-
-        finish_deterministic_reduce( Body &body ) :
-            my_left_body( body ),
-            my_right_body( body, split() )
-        {
-        }
-        task* execute() __TBB_override {
-            my_left_body.join( my_right_body );
-            return NULL;
-        }
-        template<typename Range,typename Body_>
-        friend class start_deterministic_reduce;
-    };
-
-    //! Task type used to split the work of parallel_deterministic_reduce.
-    /** @ingroup algorithms */
-    template<typename Range, typename Body>
-    class start_deterministic_reduce: public task {
-        typedef finish_deterministic_reduce<Body> finish_type;
-        Body &my_body;
-        Range my_range;
-        task* execute() __TBB_override;
-
-        //! Constructor used for root task
-        start_deterministic_reduce( const Range& range, Body& body ) :
-            my_body( body ),
-            my_range( range )
-        {
-        }
-        //! Splitting constructor used to generate children.
-        /** parent_ becomes left child.  Newly constructed object is right child. */
-        start_deterministic_reduce( start_deterministic_reduce& parent_, finish_type& c ) :
-            my_body( c.my_right_body ),
-            my_range( parent_.my_range, split() )
-        {
-        }
-
-public:
-        static void run( const Range& range, Body& body ) {
-            if( !range.empty() ) {
-#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
-                task::spawn_root_and_wait( *new(task::allocate_root()) start_deterministic_reduce(range,&body) );
-#else
-                // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
-                // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
-                task_group_context context;
-                task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
-#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
-            }
-        }
-#if __TBB_TASK_GROUP_CONTEXT
-        static void run( const Range& range, Body& body, task_group_context& context ) {
-            if( !range.empty() )
-                task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
-        }
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-    };
-
-    template<typename Range, typename Body>
-    task* start_deterministic_reduce<Range,Body>::execute() {
-        if( !my_range.is_divisible() ) {
-            my_body( my_range );
-            return NULL;
-        } else {
-            finish_type& c = *new( allocate_continuation() ) finish_type( my_body );
-            recycle_as_child_of(c);
-            c.set_ref_count(2);
-            start_deterministic_reduce& b = *new( c.allocate_child() ) start_deterministic_reduce( *this, c );
-            task::spawn(b);
-            return this;
-        }
-    }
-} // namespace internal
-//! @endcond
-} //namespace interfaceX
-
-//! @cond INTERNAL
-namespace internal {
-    using interface9::internal::start_reduce;
-    using interface9::internal::start_deterministic_reduce;
-    //! Auxiliary class for parallel_reduce; for internal use only.
-    /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body"
-        using given \ref parallel_reduce_lambda_req "anonymous function objects".
-     **/
-    /** @ingroup algorithms */
-    template<typename Range, typename Value, typename RealBody, typename Reduction>
-    class lambda_reduce_body {
-
-//FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
-//       (might require some performance measurements)
-
-        const Value&     identity_element;
-        const RealBody&  my_real_body;
-        const Reduction& my_reduction;
-        Value            my_value;
-        lambda_reduce_body& operator= ( const lambda_reduce_body& other );
-    public:
-        lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
-            : identity_element(identity)
-            , my_real_body(body)
-            , my_reduction(reduction)
-            , my_value(identity)
-        { }
-        lambda_reduce_body( const lambda_reduce_body& other )
-            : identity_element(other.identity_element)
-            , my_real_body(other.my_real_body)
-            , my_reduction(other.my_reduction)
-            , my_value(other.my_value)
-        { }
-        lambda_reduce_body( lambda_reduce_body& other, tbb::split )
-            : identity_element(other.identity_element)
-            , my_real_body(other.my_real_body)
-            , my_reduction(other.my_reduction)
-            , my_value(other.identity_element)
-        { }
-        void operator()(Range& range) {
-            my_value = my_real_body(range, const_cast<const Value&>(my_value));
-        }
-        void join( lambda_reduce_body& rhs ) {
-            my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
-        }
-        Value result() const {
-            return my_value;
-        }
-    };
-
-} // namespace internal
-//! @endcond
-
-// Requirements on Range concept are documented in blocked_range.h
-
-/** \page parallel_reduce_body_req Requirements on parallel_reduce body
-    Class \c Body implementing the concept of parallel_reduce body must define:
-    - \code Body::Body( Body&, split ); \endcode        Splitting constructor.
-                                                        Must be able to run concurrently with operator() and method \c join
-    - \code Body::~Body(); \endcode                     Destructor
-    - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r
-                                                        and accumulating the result
-    - \code void Body::join( Body& b ); \endcode        Join results.
-                                                        The result in \c b should be merged into the result of \c this
-**/
-
-/** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions)
-    TO BE DOCUMENTED
-**/
-
-/** \name parallel_reduce
-    See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/
-//@{
-
-//! Parallel iteration with reduction and default partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body ) {
-    internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
-}
-
-//! Parallel iteration with reduction and simple_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
-    internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
-}
-
-//! Parallel iteration with reduction and auto_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
-    internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
-}
-
-//! Parallel iteration with reduction and static_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
-    internal::start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner );
-}
-
-//! Parallel iteration with reduction and affinity_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
-    internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
-}
-
-#if __TBB_TASK_GROUP_CONTEXT
-//! Parallel iteration with reduction, simple partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
-    internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
-}
-
-//! Parallel iteration with reduction, auto_partitioner and user-supplied context
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
-    internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
-}
-
-//! Parallel iteration with reduction, static_partitioner and user-supplied context
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
-    internal::start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context );
-}
-
-//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
-    internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
-}
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-/** parallel_reduce overloads that work with anonymous function objects
-    (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
-
-//! Parallel iteration with reduction and default partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
-    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
-                          ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
-    return body.result();
-}
-
-//! Parallel iteration with reduction and simple_partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       const simple_partitioner& partitioner ) {
-    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
-                          ::run(range, body, partitioner );
-    return body.result();
-}
-
-//! Parallel iteration with reduction and auto_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       const auto_partitioner& partitioner ) {
-    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
-                          ::run( range, body, partitioner );
-    return body.result();
-}
-
-//! Parallel iteration with reduction and static_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       const static_partitioner& partitioner ) {
-    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
-                                        ::run( range, body, partitioner );
-    return body.result();
-}
-
-//! Parallel iteration with reduction and affinity_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       affinity_partitioner& partitioner ) {
-    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
-                                        ::run( range, body, partitioner );
-    return body.result();
-}
-
-#if __TBB_TASK_GROUP_CONTEXT
-//! Parallel iteration with reduction, simple partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       const simple_partitioner& partitioner, task_group_context& context ) {
-    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
-                          ::run( range, body, partitioner, context );
-    return body.result();
-}
-
-//! Parallel iteration with reduction, auto_partitioner and user-supplied context
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       const auto_partitioner& partitioner, task_group_context& context ) {
-    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
-                          ::run( range, body, partitioner, context );
-    return body.result();
-}
-
-//! Parallel iteration with reduction, static_partitioner and user-supplied context
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       const static_partitioner& partitioner, task_group_context& context ) {
-    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
-                                        ::run( range, body, partitioner, context );
-    return body.result();
-}
-
-//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       affinity_partitioner& partitioner, task_group_context& context ) {
-    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
-                                        ::run( range, body, partitioner, context );
-    return body.result();
-}
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-//! Parallel iteration with deterministic reduction and default partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_deterministic_reduce( const Range& range, Body& body ) {
-    internal::start_deterministic_reduce<Range,Body>::run( range, body );
-}
-
-#if __TBB_TASK_GROUP_CONTEXT
-//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
-    internal::start_deterministic_reduce<Range,Body>::run( range, body, context );
-}
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-/** parallel_reduce overloads that work with anonymous function objects
-    (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
-
-//! Parallel iteration with deterministic reduction and default partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
-    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
-                          ::run(range, body);
-    return body.result();
-}
-
-#if __TBB_TASK_GROUP_CONTEXT
-//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       task_group_context& context ) {
-    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
-                          ::run( range, body, context );
-    return body.result();
-}
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-//@}
-
-} // namespace tbb
-
-#endif /* __TBB_parallel_reduce_H */
diff --git a/lib/3rdParty/tbb/include/tbb/parallel_scan.h b/lib/3rdParty/tbb/include/tbb/parallel_scan.h
deleted file mode 100644
index faf6b316..00000000
--- a/lib/3rdParty/tbb/include/tbb/parallel_scan.h
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_parallel_scan_H
-#define __TBB_parallel_scan_H
-
-#include "task.h"
-#include "aligned_space.h"
-#include <new>
-#include "partitioner.h"
-
-namespace tbb {
-
-//! Used to indicate that the initial scan is being performed.
-/** @ingroup algorithms */
-struct pre_scan_tag {
-    static bool is_final_scan() {return false;}
-};
-
-//! Used to indicate that the final scan is being performed.
-/** @ingroup algorithms */
-struct final_scan_tag {
-    static bool is_final_scan() {return true;}
-};
-
-//! @cond INTERNAL
-namespace internal {
-
-    //! Performs final scan for a leaf
-    /** @ingroup algorithms */
-    template<typename Range, typename Body>
-    class final_sum: public task {
-    public:
-        Body my_body;
-    private:
-        aligned_space<Range> my_range;
-        //! Where to put result of last subrange, or NULL if not last subrange.
-        Body* my_stuff_last;
-    public:
-        final_sum( Body& body_ ) :
-            my_body(body_,split())
-        {
-            poison_pointer(my_stuff_last);
-        }
-        ~final_sum() {
-            my_range.begin()->~Range();
-        }
-        void finish_construction( const Range& range_, Body* stuff_last_ ) {
-            new( my_range.begin() ) Range(range_);
-            my_stuff_last = stuff_last_;
-        }
-    private:
-        task* execute() __TBB_override {
-            my_body( *my_range.begin(), final_scan_tag() );
-            if( my_stuff_last )
-                my_stuff_last->assign(my_body);
-            return NULL;
-        }
-    };
-
-    //! Split work to be done in the scan.
-    /** @ingroup algorithms */
-    template<typename Range, typename Body>
-    class sum_node: public task {
-        typedef final_sum<Range,Body> final_sum_type;
-    public:
-        final_sum_type *my_incoming;
-        final_sum_type *my_body;
-        Body *my_stuff_last;
-    private:
-        final_sum_type *my_left_sum;
-        sum_node *my_left;
-        sum_node *my_right;
-        bool my_left_is_final;
-        Range my_range;
-        sum_node( const Range range_, bool left_is_final_ ) :
-            my_stuff_last(NULL),
-            my_left_sum(NULL),
-            my_left(NULL),
-            my_right(NULL),
-            my_left_is_final(left_is_final_),
-            my_range(range_)
-        {
-            // Poison fields that will be set by second pass.
-            poison_pointer(my_body);
-            poison_pointer(my_incoming);
-        }
-        task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
-            if( !n ) {
-                f.recycle_as_child_of( *this );
-                f.finish_construction( range_, stuff_last_ );
-                return &f;
-            } else {
-                n->my_body = &f;
-                n->my_incoming = incoming_;
-                n->my_stuff_last = stuff_last_;
-                return n;
-            }
-        }
-        task* execute() __TBB_override {
-            if( my_body ) {
-                if( my_incoming )
-                    my_left_sum->my_body.reverse_join( my_incoming->my_body );
-                recycle_as_continuation();
-                sum_node& c = *this;
-                task* b = c.create_child(Range(my_range,split()),*my_left_sum,my_right,my_left_sum,my_stuff_last);
-                task* a = my_left_is_final ? NULL : c.create_child(my_range,*my_body,my_left,my_incoming,NULL);
-                set_ref_count( (a!=NULL)+(b!=NULL) );
-                my_body = NULL;
-                if( a ) spawn(*b);
-                else a = b;
-                return a;
-            } else {
-                return NULL;
-            }
-        }
-        template<typename Range_,typename Body_,typename Partitioner_>
-        friend class start_scan;
-
-        template<typename Range_,typename Body_>
-        friend class finish_scan;
-    };
-
-    //! Combine partial results
-    /** @ingroup algorithms */
-    template<typename Range, typename Body>
-    class finish_scan: public task {
-        typedef sum_node<Range,Body> sum_node_type;
-        typedef final_sum<Range,Body> final_sum_type;
-        final_sum_type** const my_sum;
-        sum_node_type*& my_return_slot;
-    public:
-        final_sum_type* my_right_zombie;
-        sum_node_type& my_result;
-
-        task* execute() __TBB_override {
-            __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
-            if( my_result.my_left )
-                my_result.my_left_is_final = false;
-            if( my_right_zombie && my_sum )
-                ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
-            __TBB_ASSERT( !my_return_slot, NULL );
-            if( my_right_zombie || my_result.my_right ) {
-                my_return_slot = &my_result;
-            } else {
-                destroy( my_result );
-            }
-            if( my_right_zombie && !my_sum && !my_result.my_right ) {
-                destroy(*my_right_zombie);
-                my_right_zombie = NULL;
-            }
-            return NULL;
-        }
-
-        finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
-            my_sum(sum_),
-            my_return_slot(return_slot_),
-            my_right_zombie(NULL),
-            my_result(result_)
-        {
-            __TBB_ASSERT( !my_return_slot, NULL );
-        }
-    };
-
-    //! Initial task to split the work
-    /** @ingroup algorithms */
-    template<typename Range, typename Body, typename Partitioner=simple_partitioner>
-    class start_scan: public task {
-        typedef sum_node<Range,Body> sum_node_type;
-        typedef final_sum<Range,Body> final_sum_type;
-        final_sum_type* my_body;
-        /** Non-null if caller is requesting total. */
-        final_sum_type** my_sum;
-        sum_node_type** my_return_slot;
-        /** Null if computing root. */
-        sum_node_type* my_parent_sum;
-        bool my_is_final;
-        bool my_is_right_child;
-        Range my_range;
-        typename Partitioner::partition_type my_partition;
-        task* execute() __TBB_override ;
-    public:
-        start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
-            my_body(parent_.my_body),
-            my_sum(parent_.my_sum),
-            my_return_slot(&return_slot_),
-            my_parent_sum(parent_sum_),
-            my_is_final(parent_.my_is_final),
-            my_is_right_child(false),
-            my_range(parent_.my_range,split()),
-            my_partition(parent_.my_partition,split())
-        {
-            __TBB_ASSERT( !*my_return_slot, NULL );
-        }
-
-        start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
-            my_body(&body_),
-            my_sum(NULL),
-            my_return_slot(&return_slot_),
-            my_parent_sum(NULL),
-            my_is_final(true),
-            my_is_right_child(false),
-            my_range(range_),
-            my_partition(partitioner_)
-        {
-            __TBB_ASSERT( !*my_return_slot, NULL );
-        }
-
-        static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
-            if( !range_.empty() ) {
-                typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
-                internal::sum_node<Range,Body>* root = NULL;
-                typedef internal::final_sum<Range,Body> final_sum_type;
-                final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
-                start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
-                    /*my_return_slot=*/root,
-                    range_,
-                    *temp_body,
-                    partitioner_ );
-                temp_body->my_body.reverse_join(body_);
-                task::spawn_root_and_wait( pass1 );
-                if( root ) {
-                    root->my_body = temp_body;
-                    root->my_incoming = NULL;
-                    root->my_stuff_last = &body_;
-                    task::spawn_root_and_wait( *root );
-                } else {
-                    body_.assign(temp_body->my_body);
-                    temp_body->finish_construction( range_, NULL );
-                    temp_body->destroy(*temp_body);
-                }
-            }
-        }
-    };
-
-    template<typename Range, typename Body, typename Partitioner>
-    task* start_scan<Range,Body,Partitioner>::execute() {
-        typedef internal::finish_scan<Range,Body> finish_pass1_type;
-        finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
-        // Inspecting p->result.left_sum would ordinarily be a race condition.
-        // But we inspect it only if we are not a stolen task, in which case we
-        // know that task assigning to p->result.left_sum has completed.
-        bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
-        if( treat_as_stolen ) {
-            // Invocation is for right child that has been really stolen or needs to be virtually stolen
-            p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
-            my_is_final = false;
-        }
-        task* next_task = NULL;
-        if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
-            if( my_is_final )
-                (my_body->my_body)( my_range, final_scan_tag() );
-            else if( my_sum )
-                (my_body->my_body)( my_range, pre_scan_tag() );
-            if( my_sum )
-                *my_sum = my_body;
-            __TBB_ASSERT( !*my_return_slot, NULL );
-        } else {
-            sum_node_type* result;
-            if( my_parent_sum )
-                result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
-            else
-                result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
-            finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
-            // Split off right child
-            start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result );
-            b.my_is_right_child = true;
-            // Left child is recycling of *this.  Must recycle this before spawning b,
-            // otherwise b might complete and decrement c.ref_count() to zero, which
-            // would cause c.execute() to run prematurely.
-            recycle_as_child_of(c);
-            c.set_ref_count(2);
-            c.spawn(b);
-            my_sum = &result->my_left_sum;
-            my_return_slot = &result->my_left;
-            my_is_right_child = false;
-            next_task = this;
-            my_parent_sum = result;
-            __TBB_ASSERT( !*my_return_slot, NULL );
-        }
-        return next_task;
-    }
-} // namespace internal
-//! @endcond
-
-// Requirements on Range concept are documented in blocked_range.h
-
-/** \page parallel_scan_body_req Requirements on parallel_scan body
-    Class \c Body implementing the concept of parallel_scan body must define:
-    - \code Body::Body( Body&, split ); \endcode    Splitting constructor.
-                                                    Split \c b so that \c this and \c b can accumulate separately
-    - \code Body::~Body(); \endcode                 Destructor
-    - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode
-                                                    Preprocess iterations for range \c r
-    - \code void Body::operator()( const Range& r, final_scan_tag ); \endcode
-                                                    Do final processing for iterations of range \c r
-    - \code void Body::reverse_join( Body& a ); \endcode
-                                                    Merge preprocessing state of \c a into \c this, where \c a was
-                                                    created earlier from \c b by b's splitting constructor
-**/
-
-/** \name parallel_scan
-    See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/
-//@{
-
-//! Parallel prefix with default partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_scan( const Range& range, Body& body ) {
-    internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
-}
-
-//! Parallel prefix with simple_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
-    internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
-}
-
-//! Parallel prefix with auto_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
-    internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
-}
-//@}
-
-} // namespace tbb
-
-#endif /* __TBB_parallel_scan_H */
-
diff --git a/lib/3rdParty/tbb/include/tbb/parallel_sort.h b/lib/3rdParty/tbb/include/tbb/parallel_sort.h
deleted file mode 100644
index 07d3907b..00000000
--- a/lib/3rdParty/tbb/include/tbb/parallel_sort.h
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_parallel_sort_H
-#define __TBB_parallel_sort_H
-
-#include "parallel_for.h"
-#include "blocked_range.h"
-#include "internal/_range_iterator.h"
-#include <algorithm>
-#include <iterator>
-#include <functional>
-
-namespace tbb {
-
-namespace interface9 {
-//! @cond INTERNAL
-namespace internal {
-
-using tbb::internal::no_assign;
-
-//! Range used in quicksort to split elements into subranges based on a value.
-/** The split operation selects a splitter and places all elements less than or equal
-    to the value in the first range and the remaining elements in the second range.
-    @ingroup algorithms */
-template<typename RandomAccessIterator, typename Compare>
-class quick_sort_range: private no_assign {
-
-    inline size_t median_of_three(const RandomAccessIterator &array, size_t l, size_t m, size_t r) const {
-        return comp(array[l], array[m]) ? ( comp(array[m], array[r]) ? m : ( comp( array[l], array[r]) ? r : l ) )
-                                        : ( comp(array[r], array[m]) ? m : ( comp( array[r], array[l] ) ? r : l ) );
-    }
-
-    inline size_t pseudo_median_of_nine( const RandomAccessIterator &array, const quick_sort_range &range ) const {
-        size_t offset = range.size/8u;
-        return median_of_three(array,
-                               median_of_three(array, 0, offset, offset*2),
-                               median_of_three(array, offset*3, offset*4, offset*5),
-                               median_of_three(array, offset*6, offset*7, range.size - 1) );
-
-    }
-
-    size_t split_range( quick_sort_range& range ) {
-        using std::iter_swap;
-        RandomAccessIterator array = range.begin;
-        RandomAccessIterator key0 = range.begin;
-        size_t m = pseudo_median_of_nine(array, range);
-        if (m) iter_swap ( array, array+m );
-
-        size_t i=0;
-        size_t j=range.size;
-        // Partition interval [i+1,j-1] with key *key0.
-        for(;;) {
-            __TBB_ASSERT( i<j, NULL );
-            // Loop must terminate since array[l]==*key0.
-            do {
-                --j;
-                __TBB_ASSERT( i<=j, "bad ordering relation?" );
-            } while( comp( *key0, array[j] ));
-            do {
-                __TBB_ASSERT( i<=j, NULL );
-                if( i==j ) goto partition;
-                ++i;
-            } while( comp( array[i],*key0 ));
-            if( i==j ) goto partition;
-            iter_swap( array+i, array+j );
-        }
-partition:
-        // Put the partition key were it belongs
-        iter_swap( array+j, key0 );
-        // array[l..j) is less or equal to key.
-        // array(j..r) is greater or equal to key.
-        // array[j] is equal to key
-        i=j+1;
-        size_t new_range_size = range.size-i;
-        range.size = j;
-        return new_range_size;
-    }
-
-public:
-
-    static const size_t grainsize = 500;
-    const Compare &comp;
-    size_t size;
-    RandomAccessIterator begin;
-
-    quick_sort_range( RandomAccessIterator begin_, size_t size_, const Compare &comp_ ) :
-        comp(comp_), size(size_), begin(begin_) {}
-
-    bool empty() const {return size==0;}
-    bool is_divisible() const {return size>=grainsize;}
-
-    quick_sort_range( quick_sort_range& range, split )
-        : comp(range.comp)
-        , size(split_range(range))
-          // +1 accounts for the pivot element, which is at its correct place
-          // already and, therefore, is not included into subranges.
-        , begin(range.begin+range.size+1) {}
-};
-
-#if __TBB_TASK_GROUP_CONTEXT
-//! Body class used to test if elements in a range are presorted
-/** @ingroup algorithms */
-template<typename RandomAccessIterator, typename Compare>
-class quick_sort_pretest_body : no_assign {
-    const Compare &comp;
-
-public:
-    quick_sort_pretest_body(const Compare &_comp) : comp(_comp) {}
-
-    void operator()( const blocked_range<RandomAccessIterator>& range ) const {
-        task &my_task = task::self();
-        RandomAccessIterator my_end = range.end();
-
-        int i = 0;
-        for (RandomAccessIterator k = range.begin(); k != my_end; ++k, ++i) {
-            if ( i%64 == 0 && my_task.is_cancelled() ) break;
-
-            // The k-1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1
-            if ( comp( *(k), *(k-1) ) ) {
-                my_task.cancel_group_execution();
-                break;
-            }
-        }
-    }
-
-};
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-//! Body class used to sort elements in a range that is smaller than the grainsize.
-/** @ingroup algorithms */
-template<typename RandomAccessIterator, typename Compare>
-struct quick_sort_body {
-    void operator()( const quick_sort_range<RandomAccessIterator,Compare>& range ) const {
-        //SerialQuickSort( range.begin, range.size, range.comp );
-        std::sort( range.begin, range.begin + range.size, range.comp );
-    }
-};
-
-//! Wrapper method to initiate the sort by calling parallel_for.
-/** @ingroup algorithms */
-template<typename RandomAccessIterator, typename Compare>
-void parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) {
-#if __TBB_TASK_GROUP_CONTEXT
-    task_group_context my_context;
-    const int serial_cutoff = 9;
-
-    __TBB_ASSERT( begin + serial_cutoff < end, "min_parallel_size is smaller than serial cutoff?" );
-    RandomAccessIterator k = begin;
-    for ( ; k != begin + serial_cutoff; ++k ) {
-        if ( comp( *(k+1), *k ) ) {
-            goto do_parallel_quick_sort;
-        }
-    }
-
-    parallel_for( blocked_range<RandomAccessIterator>(k+1, end),
-                  quick_sort_pretest_body<RandomAccessIterator,Compare>(comp),
-                  auto_partitioner(),
-                  my_context);
-
-    if (my_context.is_group_execution_cancelled())
-do_parallel_quick_sort:
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-        parallel_for( quick_sort_range<RandomAccessIterator,Compare>(begin, end-begin, comp ),
-                      quick_sort_body<RandomAccessIterator,Compare>(),
-                      auto_partitioner() );
-}
-
-} // namespace internal
-//! @endcond
-} // namespace interfaceX
-
-/** \page parallel_sort_iter_req Requirements on iterators for parallel_sort
-    Requirements on the iterator type \c It and its value type \c T for \c parallel_sort:
-
-    - \code void iter_swap( It a, It b ) \endcode Swaps the values of the elements the given
-    iterators \c a and \c b are pointing to. \c It should be a random access iterator.
-
-    - \code bool Compare::operator()( const T& x, const T& y ) \endcode True if x comes before y;
-**/
-
-/** \name parallel_sort
-    See also requirements on \ref parallel_sort_iter_req "iterators for parallel_sort". **/
-//@{
-
-//! Sorts the data in [begin,end) using the given comparator
-/** The compare function object is used for all comparisons between elements during sorting.
-    The compare object must define a bool operator() function.
-    @ingroup algorithms **/
-template<typename RandomAccessIterator, typename Compare>
-void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp) {
-    const int min_parallel_size = 500;
-    if( end > begin ) {
-        if (end - begin < min_parallel_size) {
-            std::sort(begin, end, comp);
-        } else {
-            interface9::internal::parallel_quick_sort(begin, end, comp);
-        }
-    }
-}
-
-//! Sorts the data in [begin,end) with a default comparator \c std::less<RandomAccessIterator>
-/** @ingroup algorithms **/
-template<typename RandomAccessIterator>
-inline void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end ) {
-    parallel_sort( begin, end, std::less< typename std::iterator_traits<RandomAccessIterator>::value_type >() );
-}
-
-//! Sorts the data in rng using the given comparator
-/** @ingroup algorithms **/
-template<typename Range, typename Compare>
-void parallel_sort(Range& rng, const Compare& comp) {
-    parallel_sort(tbb::internal::first(rng), tbb::internal::last(rng), comp);
-}
-
-//! Sorts the data in const rng using the given comparator
-/** @ingroup algorithms **/
-template<typename Range, typename Compare>
-void parallel_sort(const Range& rng, const Compare& comp) {
-    parallel_sort(tbb::internal::first(rng), tbb::internal::last(rng), comp);
-}
-
-//! Sorts the data in rng with a default comparator \c std::less<RandomAccessIterator>
-/** @ingroup algorithms **/
-template<typename Range>
-void parallel_sort(Range& rng) {
-    parallel_sort(tbb::internal::first(rng), tbb::internal::last(rng));
-}
-
-//! Sorts the data in const rng with a default comparator \c std::less<RandomAccessIterator>
-/** @ingroup algorithms **/
-template<typename Range>
-void parallel_sort(const Range& rng) {
-    parallel_sort(tbb::internal::first(rng), tbb::internal::last(rng));
-}
-
-//! Sorts the data in the range \c [begin,end) with a default comparator \c std::less<T>
-/** @ingroup algorithms **/
-template<typename T>
-inline void parallel_sort( T * begin, T * end ) {
-    parallel_sort( begin, end, std::less< T >() );
-}
-//@}
-
-
-} // namespace tbb
-
-#endif
-
diff --git a/lib/3rdParty/tbb/include/tbb/parallel_while.h b/lib/3rdParty/tbb/include/tbb/parallel_while.h
deleted file mode 100644
index 2f37a41a..00000000
--- a/lib/3rdParty/tbb/include/tbb/parallel_while.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_parallel_while
-#define __TBB_parallel_while
-
-#include "task.h"
-#include <new>
-
-namespace tbb {
-
-template<typename Body>
-class parallel_while;
-
-//! @cond INTERNAL
-namespace internal {
-
-    template<typename Stream, typename Body> class while_task;
-
-    //! For internal use only.
-    /** Executes one iteration of a while.
-        @ingroup algorithms */
-    template<typename Body>
-    class while_iteration_task: public task {
-        const Body& my_body;
-        typename Body::argument_type my_value;
-        task* execute() __TBB_override {
-            my_body(my_value);
-            return NULL;
-        }
-        while_iteration_task( const typename Body::argument_type& value, const Body& body ) :
-            my_body(body), my_value(value)
-        {}
-        template<typename Body_> friend class while_group_task;
-        friend class tbb::parallel_while<Body>;
-    };
-
-    //! For internal use only
-    /** Unpacks a block of iterations.
-        @ingroup algorithms */
-    template<typename Body>
-    class while_group_task: public task {
-        static const size_t max_arg_size = 4;
-        const Body& my_body;
-        size_t size;
-        typename Body::argument_type my_arg[max_arg_size];
-        while_group_task( const Body& body ) : my_body(body), size(0) {}
-        task* execute() __TBB_override {
-            typedef while_iteration_task<Body> iteration_type;
-            __TBB_ASSERT( size>0, NULL );
-            task_list list;
-            task* t;
-            size_t k=0;
-            for(;;) {
-                t = new( allocate_child() ) iteration_type(my_arg[k],my_body);
-                if( ++k==size ) break;
-                list.push_back(*t);
-            }
-            set_ref_count(int(k+1));
-            spawn(list);
-            spawn_and_wait_for_all(*t);
-            return NULL;
-        }
-        template<typename Stream, typename Body_> friend class while_task;
-    };
-
-    //! For internal use only.
-    /** Gets block of iterations from a stream and packages them into a while_group_task.
-        @ingroup algorithms */
-    template<typename Stream, typename Body>
-    class while_task: public task {
-        Stream& my_stream;
-        const Body& my_body;
-        empty_task& my_barrier;
-        task* execute() __TBB_override {
-            typedef while_group_task<Body> block_type;
-            block_type& t = *new( allocate_additional_child_of(my_barrier) ) block_type(my_body);
-            size_t k=0;
-            while( my_stream.pop_if_present(t.my_arg[k]) ) {
-                if( ++k==block_type::max_arg_size ) {
-                    // There might be more iterations.
-                    recycle_to_reexecute();
-                    break;
-                }
-            }
-            if( k==0 ) {
-                destroy(t);
-                return NULL;
-            } else {
-                t.size = k;
-                return &t;
-            }
-        }
-        while_task( Stream& stream, const Body& body, empty_task& barrier ) :
-            my_stream(stream),
-            my_body(body),
-            my_barrier(barrier)
-        {}
-        friend class tbb::parallel_while<Body>;
-    };
-
-} // namespace internal
-//! @endcond
-
-//! Parallel iteration over a stream, with optional addition of more work.
-/** The Body b has the requirement: \n
-        "b(v)"                      \n
-        "b.argument_type"           \n
-    where v is an argument_type
-    @ingroup algorithms */
-template<typename Body>
-class parallel_while: internal::no_copy {
-public:
-    //! Construct empty non-running parallel while.
-    parallel_while() : my_body(NULL), my_barrier(NULL) {}
-
-    //! Destructor cleans up data members before returning.
-    ~parallel_while() {
-        if( my_barrier ) {
-            my_barrier->destroy(*my_barrier);
-            my_barrier = NULL;
-        }
-    }
-
-    //! Type of items
-    typedef typename Body::argument_type value_type;
-
-    //! Apply body.apply to each item in the stream.
-    /** A Stream s has the requirements \n
-         "S::value_type"                \n
-         "s.pop_if_present(value) is convertible to bool */
-    template<typename Stream>
-    void run( Stream& stream, const Body& body );
-
-    //! Add a work item while running.
-    /** Should be executed only by body.apply or a thread spawned therefrom. */
-    void add( const value_type& item );
-
-private:
-    const Body* my_body;
-    empty_task* my_barrier;
-};
-
-template<typename Body>
-template<typename Stream>
-void parallel_while<Body>::run( Stream& stream, const Body& body ) {
-    using namespace internal;
-    empty_task& barrier = *new( task::allocate_root() ) empty_task();
-    my_body = &body;
-    my_barrier = &barrier;
-    my_barrier->set_ref_count(2);
-    while_task<Stream,Body>& w = *new( my_barrier->allocate_child() ) while_task<Stream,Body>( stream, body, barrier );
-    my_barrier->spawn_and_wait_for_all(w);
-    my_barrier->destroy(*my_barrier);
-    my_barrier = NULL;
-    my_body = NULL;
-}
-
-template<typename Body>
-void parallel_while<Body>::add( const value_type& item ) {
-    __TBB_ASSERT(my_barrier,"attempt to add to parallel_while that is not running");
-    typedef internal::while_iteration_task<Body> iteration_type;
-    iteration_type& i = *new( task::allocate_additional_child_of(*my_barrier) ) iteration_type(item,*my_body);
-    task::self().spawn( i );
-}
-
-} // namespace
-
-#endif /* __TBB_parallel_while */
diff --git a/lib/3rdParty/tbb/include/tbb/partitioner.h b/lib/3rdParty/tbb/include/tbb/partitioner.h
deleted file mode 100644
index 96a0b757..00000000
--- a/lib/3rdParty/tbb/include/tbb/partitioner.h
+++ /dev/null
@@ -1,681 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_partitioner_H
-#define __TBB_partitioner_H
-
-#ifndef __TBB_INITIAL_CHUNKS
-// initial task divisions per thread
-#define __TBB_INITIAL_CHUNKS 2
-#endif
-#ifndef __TBB_RANGE_POOL_CAPACITY
-// maximum number of elements in range pool
-#define __TBB_RANGE_POOL_CAPACITY 8
-#endif
-#ifndef __TBB_INIT_DEPTH
-// initial value for depth of range pool
-#define __TBB_INIT_DEPTH 5
-#endif
-#ifndef __TBB_DEMAND_DEPTH_ADD
-// when imbalance is found range splits this value times more
-#define __TBB_DEMAND_DEPTH_ADD 1
-#endif
-#ifndef __TBB_STATIC_THRESHOLD
-// necessary number of clocks for the work to be distributed among all tasks
-#define __TBB_STATIC_THRESHOLD 40000
-#endif
-#if __TBB_DEFINE_MIC
-#define __TBB_NONUNIFORM_TASK_CREATION 1
-#ifdef __TBB_time_stamp
-#define __TBB_USE_MACHINE_TIME_STAMPS 1
-#define __TBB_task_duration() __TBB_STATIC_THRESHOLD
-#endif // __TBB_machine_time_stamp
-#endif // __TBB_DEFINE_MIC
-
-#include "task.h"
-#include "aligned_space.h"
-#include "atomic.h"
-#include "internal/_template_helpers.h"
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-    // Workaround for overzealous compiler warnings
-    #pragma warning (push)
-    #pragma warning (disable: 4244)
-#endif
-
-namespace tbb {
-
-class auto_partitioner;
-class simple_partitioner;
-class static_partitioner;
-class affinity_partitioner;
-
-namespace interface9 {
-    namespace internal {
-        class affinity_partition_type;
-    }
-}
-
-namespace internal { //< @cond INTERNAL
-size_t __TBB_EXPORTED_FUNC get_initial_auto_partitioner_divisor();
-
-//! Defines entry point for affinity partitioner into tbb run-time library.
-class affinity_partitioner_base_v3: no_copy {
-    friend class tbb::affinity_partitioner;
-    friend class tbb::interface9::internal::affinity_partition_type;
-    //! Array that remembers affinities of tree positions to affinity_id.
-    /** NULL if my_size==0. */
-    affinity_id* my_array;
-    //! Number of elements in my_array.
-    size_t my_size;
-    //! Zeros the fields.
-    affinity_partitioner_base_v3() : my_array(NULL), my_size(0) {}
-    //! Deallocates my_array.
-    ~affinity_partitioner_base_v3() {resize(0);}
-    //! Resize my_array.
-    /** Retains values if resulting size is the same. */
-    void __TBB_EXPORTED_METHOD resize( unsigned factor );
-};
-
-//! Provides backward-compatible methods for partition objects without affinity.
-class partition_type_base {
-public:
-    void set_affinity( task & ) {}
-    void note_affinity( task::affinity_id ) {}
-    task* continue_after_execute_range() {return NULL;}
-    bool decide_whether_to_delay() {return false;}
-    void spawn_or_delay( bool, task& b ) {
-        task::spawn(b);
-    }
-};
-
-template<typename Range, typename Body, typename Partitioner> class start_scan;
-
-} //< namespace internal @endcond
-
-namespace serial {
-namespace interface9 {
-template<typename Range, typename Body, typename Partitioner> class start_for;
-}
-}
-
-namespace interface9 {
-//! @cond INTERNAL
-namespace internal {
-using namespace tbb::internal;
-template<typename Range, typename Body, typename Partitioner> class start_for;
-template<typename Range, typename Body, typename Partitioner> class start_reduce;
-
-//! Join task node that contains shared flag for stealing feedback
-class flag_task: public task {
-public:
-    tbb::atomic<bool> my_child_stolen;
-    flag_task() { my_child_stolen = false; }
-    task* execute() __TBB_override { return NULL; }
-    static void mark_task_stolen(task &t) {
-        tbb::atomic<bool> &flag = static_cast<flag_task*>(t.parent())->my_child_stolen;
-#if TBB_USE_THREADING_TOOLS
-        // Threading tools respect lock prefix but report false-positive data-race via plain store
-        flag.fetch_and_store<release>(true);
-#else
-        flag = true;
-#endif //TBB_USE_THREADING_TOOLS
-    }
-    static bool is_peer_stolen(task &t) {
-        return static_cast<flag_task*>(t.parent())->my_child_stolen;
-    }
-};
-
-//! Depth is a relative depth of recursive division inside a range pool. Relative depth allows
-//! infinite absolute depth of the recursion for heavily unbalanced workloads with range represented
-//! by a number that cannot fit into machine word.
-typedef unsigned char depth_t;
-
-//! Range pool stores ranges of type T in a circular buffer with MaxCapacity
-template <typename T, depth_t MaxCapacity>
-class range_vector {
-    depth_t my_head;
-    depth_t my_tail;
-    depth_t my_size;
-    depth_t my_depth[MaxCapacity]; // relative depths of stored ranges
-    tbb::aligned_space<T, MaxCapacity> my_pool;
-
-public:
-    //! initialize via first range in pool
-    range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) {
-        my_depth[0] = 0;
-        new( static_cast<void *>(my_pool.begin()) ) T(elem);//TODO: std::move?
-    }
-    ~range_vector() {
-        while( !empty() ) pop_back();
-    }
-    bool empty() const { return my_size == 0; }
-    depth_t size() const { return my_size; }
-    //! Populates range pool via ranges up to max depth or while divisible
-    //! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up to two 1/4 pieces
-    void split_to_fill(depth_t max_depth) {
-        while( my_size < MaxCapacity && is_divisible(max_depth) ) {
-            depth_t prev = my_head;
-            my_head = (my_head + 1) % MaxCapacity;
-            new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move?
-            my_pool.begin()[prev].~T(); // instead of assignment
-            new(my_pool.begin()+prev) T(my_pool.begin()[my_head], split()); // do 'inverse' split
-            my_depth[my_head] = ++my_depth[prev];
-            my_size++;
-        }
-    }
-    void pop_back() {
-        __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size");
-        my_pool.begin()[my_head].~T();
-        my_size--;
-        my_head = (my_head + MaxCapacity - 1) % MaxCapacity;
-    }
-    void pop_front() {
-        __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size");
-        my_pool.begin()[my_tail].~T();
-        my_size--;
-        my_tail = (my_tail + 1) % MaxCapacity;
-    }
-    T& back() {
-        __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size");
-        return my_pool.begin()[my_head];
-    }
-    T& front() {
-        __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size");
-        return my_pool.begin()[my_tail];
-    }
-    //! similarly to front(), returns depth of the first range in the pool
-    depth_t front_depth() {
-        __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size");
-        return my_depth[my_tail];
-    }
-    depth_t back_depth() {
-        __TBB_ASSERT(my_size > 0, "range_vector::back_depth() with empty size");
-        return my_depth[my_head];
-    }
-    bool is_divisible(depth_t max_depth) {
-        return back_depth() < max_depth && back().is_divisible();
-    }
-};
-
-//! Provides default methods for partition objects and common algorithm blocks.
-template <typename Partition>
-struct partition_type_base {
-    typedef split split_type;
-    // decision makers
-    void set_affinity( task & ) {}
-    void note_affinity( task::affinity_id ) {}
-    bool check_being_stolen(task &) { return false; } // part of old should_execute_range()
-    bool check_for_demand(task &) { return false; }
-    bool is_divisible() { return true; } // part of old should_execute_range()
-    depth_t max_depth() { return 0; }
-    void align_depth(depth_t) { }
-    template <typename Range> split_type get_split() { return split(); }
-    Partition& self() { return *static_cast<Partition*>(this); } // CRTP helper
-
-    template<typename StartType, typename Range>
-    void work_balance(StartType &start, Range &range) {
-        start.run_body( range ); // simple partitioner goes always here
-    }
-
-    template<typename StartType, typename Range>
-    void execute(StartType &start, Range &range) {
-        // The algorithm in a few words ([]-denotes calls to decision methods of partitioner):
-        // [If this task is stolen, adjust depth and divisions if necessary, set flag].
-        // If range is divisible {
-        //    Spread the work while [initial divisions left];
-        //    Create trap task [if necessary];
-        // }
-        // If not divisible or [max depth is reached], execute, else do the range pool part
-        if ( range.is_divisible() ) {
-            if ( self().is_divisible() ) {
-                do { // split until is divisible
-                    typename Partition::split_type split_obj = self().template get_split<Range>();
-                    start.offer_work( split_obj );
-                } while ( range.is_divisible() && self().is_divisible() );
-            }
-        }
-        self().work_balance(start, range);
-    }
-};
-
-//! Provides default splitting strategy for partition objects.
-template <typename Partition>
-struct adaptive_mode : partition_type_base<Partition> {
-    typedef Partition my_partition;
-    using partition_type_base<Partition>::self; // CRTP helper to get access to derived classes
-    size_t my_divisor;
-    // For affinity_partitioner, my_divisor indicates the number of affinity array indices the task reserves.
-    // A task which has only one index must produce the right split without reserved index in order to avoid
-    // it to be overwritten in note_affinity() of the created (right) task.
-    // I.e. a task created deeper than the affinity array can remember must not save its affinity (LIFO order)
-    static const unsigned factor = 1;
-    adaptive_mode() : my_divisor(tbb::internal::get_initial_auto_partitioner_divisor() / 4 * my_partition::factor) {}
-    adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split())) {}
-    adaptive_mode(adaptive_mode &src, const proportional_split& split_obj) : my_divisor(do_split(src, split_obj)) {}
-    /*! Override do_split methods in order to specify splitting strategy */
-    size_t do_split(adaptive_mode &src, split) {
-        return src.my_divisor /= 2u;
-    }
-    size_t do_split(adaptive_mode &src, const proportional_split& split_obj) {
-#if __TBB_ENABLE_RANGE_FEEDBACK
-        size_t portion = size_t(float(src.my_divisor) * float(split_obj.right())
-                                / float(split_obj.left() + split_obj.right()) + 0.5f);
-#else
-        size_t portion = split_obj.right() * my_partition::factor;
-#endif
-        portion = (portion + my_partition::factor/2) & (0ul - my_partition::factor);
-#if __TBB_ENABLE_RANGE_FEEDBACK
-        /** Corner case handling */
-        if (!portion)
-            portion = my_partition::factor;
-        else if (portion == src.my_divisor)
-            portion = src.my_divisor - my_partition::factor;
-#endif
-        src.my_divisor -= portion;
-        return portion;
-    }
-    bool is_divisible() { // part of old should_execute_range()
-        return my_divisor > my_partition::factor;
-    }
-};
-
-//! Provides default linear indexing of partitioner's sequence
-template <typename Partition>
-struct linear_affinity_mode : adaptive_mode<Partition> {
-    using adaptive_mode<Partition>::my_divisor;
-    size_t my_head;
-    using adaptive_mode<Partition>::self;
-    linear_affinity_mode() : adaptive_mode<Partition>(), my_head(0) {}
-    linear_affinity_mode(linear_affinity_mode &src, split) : adaptive_mode<Partition>(src, split())
-        , my_head(src.my_head + src.my_divisor) {}
-    linear_affinity_mode(linear_affinity_mode &src, const proportional_split& split_obj) : adaptive_mode<Partition>(src, split_obj)
-        , my_head(src.my_head + src.my_divisor) {}
-    void set_affinity( task &t ) {
-        if( my_divisor )
-            t.set_affinity( affinity_id(my_head) + 1 );
-    }
-};
-
-//! Class determines whether template parameter has static boolean constant
-//! 'is_splittable_in_proportion' initialized with value of 'true' or not.
-/** If template parameter has such field that has been initialized with non-zero
- *  value then class field will be set to 'true', otherwise - 'false'
- */
-template <typename Range>
-class is_splittable_in_proportion {
-private:
-    typedef char yes[1];
-    typedef char no [2];
-
-    template <typename range_type> static yes& decide(typename enable_if<range_type::is_splittable_in_proportion>::type *);
-    template <typename range_type> static no& decide(...);
-public:
-    // equals to 'true' if and only if static const variable 'is_splittable_in_proportion' of template parameter
-    // initialized with the value of 'true'
-    static const bool value = (sizeof(decide<Range>(0)) == sizeof(yes));
-};
-
-//! Provides default methods for non-balancing partition objects.
-template<class Mode>
-struct unbalancing_partition_type : Mode {
-    using Mode::self;
-    unbalancing_partition_type() : Mode() {}
-    unbalancing_partition_type(unbalancing_partition_type& p, split) : Mode(p, split()) {}
-    unbalancing_partition_type(unbalancing_partition_type& p, const proportional_split& split_obj) : Mode(p, split_obj) {}
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    // Suppress "conditional expression is constant" warning.
-    #pragma warning( push )
-    #pragma warning( disable: 4127 )
-#endif
-    template <typename Range>
-    proportional_split get_split() {
-        if (is_splittable_in_proportion<Range>::value) {
-            size_t size = self().my_divisor / Mode::my_partition::factor;
-#if __TBB_NONUNIFORM_TASK_CREATION
-            size_t right = (size + 2) / 3;
-#else
-            size_t right = size / 2;
-#endif
-            size_t left = size - right;
-            return proportional_split(left, right);
-        } else {
-            return proportional_split(1, 1);
-        }
-    }
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    #pragma warning( pop )
-#endif // warning 4127 is back
-};
-
-/*! Determine work-balance phase implementing splitting & stealing actions */
-template<class Mode>
-struct balancing_partition_type : unbalancing_partition_type<Mode> {
-    using Mode::self;
-#ifdef __TBB_USE_MACHINE_TIME_STAMPS
-    tbb::internal::machine_tsc_t my_dst_tsc;
-#endif
-    enum {
-        begin = 0,
-        run,
-        pass
-    } my_delay;
-    depth_t my_max_depth;
-    static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
-    balancing_partition_type(): unbalancing_partition_type<Mode>()
-#ifdef __TBB_USE_MACHINE_TIME_STAMPS
-        , my_dst_tsc(0)
-#endif
-        , my_delay(begin)
-        , my_max_depth(__TBB_INIT_DEPTH) {}
-    balancing_partition_type(balancing_partition_type& p, split)
-        : unbalancing_partition_type<Mode>(p, split())
-#ifdef __TBB_USE_MACHINE_TIME_STAMPS
-        , my_dst_tsc(0)
-#endif
-        , my_delay(pass)
-        , my_max_depth(p.my_max_depth) {}
-    balancing_partition_type(balancing_partition_type& p, const proportional_split& split_obj)
-        : unbalancing_partition_type<Mode>(p, split_obj)
-#ifdef __TBB_USE_MACHINE_TIME_STAMPS
-        , my_dst_tsc(0)
-#endif
-        , my_delay(begin)
-        , my_max_depth(p.my_max_depth) {}
-    bool check_being_stolen( task &t) { // part of old should_execute_range()
-        if( !(self().my_divisor / Mode::my_partition::factor) ) { // if not from the top P tasks of binary tree
-            self().my_divisor = 1; // TODO: replace by on-stack flag (partition_state's member)?
-            if( t.is_stolen_task() && t.parent()->ref_count() >= 2 ) { // runs concurrently with the left task
-#if __TBB_USE_OPTIONAL_RTTI
-                // RTTI is available, check whether the cast is valid
-                __TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0);
-                // correctness of the cast relies on avoiding the root task for which:
-                // - initial value of my_divisor != 0 (protected by separate assertion)
-                // - is_stolen_task() always returns false for the root task.
-#endif
-                flag_task::mark_task_stolen(t);
-                if( !my_max_depth ) my_max_depth++;
-                my_max_depth += __TBB_DEMAND_DEPTH_ADD;
-                return true;
-            }
-        }
-        return false;
-    }
-    depth_t max_depth() { return my_max_depth; }
-    void align_depth(depth_t base) {
-        __TBB_ASSERT(base <= my_max_depth, 0);
-        my_max_depth -= base;
-    }
-    template<typename StartType, typename Range>
-    void work_balance(StartType &start, Range &range) {
-        if( !range.is_divisible() || !self().max_depth() ) {
-            start.run_body( range ); // simple partitioner goes always here
-        }
-        else { // do range pool
-            internal::range_vector<Range, range_pool_size> range_pool(range);
-            do {
-                range_pool.split_to_fill(self().max_depth()); // fill range pool
-                if( self().check_for_demand( start ) ) {
-                    if( range_pool.size() > 1 ) {
-                        start.offer_work( range_pool.front(), range_pool.front_depth() );
-                        range_pool.pop_front();
-                        continue;
-                    }
-                    if( range_pool.is_divisible(self().max_depth()) ) // was not enough depth to fork a task
-                        continue; // note: next split_to_fill() should split range at least once
-                }
-                start.run_body( range_pool.back() );
-                range_pool.pop_back();
-            } while( !range_pool.empty() && !start.is_cancelled() );
-        }
-    }
-    bool check_for_demand( task &t ) {
-        if( pass == my_delay ) {
-            if( self().my_divisor > 1 ) // produce affinitized tasks while they have slot in array
-                return true; // do not do my_max_depth++ here, but be sure range_pool is splittable once more
-            else if( self().my_divisor && my_max_depth ) { // make balancing task
-                self().my_divisor = 0; // once for each task; depth will be decreased in align_depth()
-                return true;
-            }
-            else if( flag_task::is_peer_stolen(t) ) {
-                my_max_depth += __TBB_DEMAND_DEPTH_ADD;
-                return true;
-            }
-        } else if( begin == my_delay ) {
-#ifndef __TBB_USE_MACHINE_TIME_STAMPS
-            my_delay = pass;
-#else
-            my_dst_tsc = __TBB_time_stamp() + __TBB_task_duration();
-            my_delay = run;
-        } else if( run == my_delay ) {
-            if( __TBB_time_stamp() < my_dst_tsc ) {
-                __TBB_ASSERT(my_max_depth > 0, NULL);
-                 my_max_depth--; // increase granularity since tasks seem having too small work
-                return false;
-            }
-            my_delay = pass;
-            return true;
-#endif // __TBB_USE_MACHINE_TIME_STAMPS
-        }
-        return false;
-    }
-};
-
-class auto_partition_type: public balancing_partition_type<adaptive_mode<auto_partition_type> > {
-public:
-    auto_partition_type( const auto_partitioner& )
-        : balancing_partition_type<adaptive_mode<auto_partition_type> >() {
-        my_divisor *= __TBB_INITIAL_CHUNKS;
-    }
-    auto_partition_type( auto_partition_type& src, split)
-        : balancing_partition_type<adaptive_mode<auto_partition_type> >(src, split()) {}
-    bool is_divisible() { // part of old should_execute_range()
-        if( my_divisor > 1 ) return true;
-        if( my_divisor && my_max_depth ) { // can split the task. TODO: on-stack flag instead
-            // keep same fragmentation while splitting for the local task pool
-            my_max_depth--;
-            my_divisor = 0; // decrease max_depth once per task
-            return true;
-        } else return false;
-    }
-    bool check_for_demand(task &t) {
-        if( flag_task::is_peer_stolen(t) ) {
-            my_max_depth += __TBB_DEMAND_DEPTH_ADD;
-            return true;
-        } else return false;
-    }
-};
-
-class simple_partition_type: public partition_type_base<simple_partition_type> {
-public:
-    simple_partition_type( const simple_partitioner& ) {}
-    simple_partition_type( const simple_partition_type&, split ) {}
-    //! simplified algorithm
-    template<typename StartType, typename Range>
-    void execute(StartType &start, Range &range) {
-        split_type split_obj = split(); // start.offer_work accepts split_type as reference
-        while( range.is_divisible() )
-            start.offer_work( split_obj );
-        start.run_body( range );
-    }
-};
-
-class static_partition_type : public unbalancing_partition_type<linear_affinity_mode<static_partition_type> > {
-public:
-    typedef proportional_split split_type;
-    static_partition_type( const static_partitioner& )
-        : unbalancing_partition_type<linear_affinity_mode<static_partition_type> >() {}
-    static_partition_type( static_partition_type& p, split )
-        : unbalancing_partition_type<linear_affinity_mode<static_partition_type> >(p, split()) {}
-    static_partition_type( static_partition_type& p, const proportional_split& split_obj )
-        : unbalancing_partition_type<linear_affinity_mode<static_partition_type> >(p, split_obj) {}
-};
-
-class affinity_partition_type : public balancing_partition_type<linear_affinity_mode<affinity_partition_type> > {
-    static const unsigned factor_power = 4; // TODO: get a unified formula based on number of computing units
-    tbb::internal::affinity_id* my_array;
-public:
-    static const unsigned factor = 1 << factor_power; // number of slots in affinity array per task
-    typedef proportional_split split_type;
-    affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& ap )
-        : balancing_partition_type<linear_affinity_mode<affinity_partition_type> >() {
-        __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" );
-        ap.resize(factor);
-        my_array = ap.my_array;
-        my_max_depth = factor_power + 1;
-        __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 );
-    }
-    affinity_partition_type(affinity_partition_type& p, split)
-        : balancing_partition_type<linear_affinity_mode<affinity_partition_type> >(p, split())
-        , my_array(p.my_array) {}
-    affinity_partition_type(affinity_partition_type& p, const proportional_split& split_obj)
-        : balancing_partition_type<linear_affinity_mode<affinity_partition_type> >(p, split_obj)
-        , my_array(p.my_array) {}
-    void set_affinity( task &t ) {
-        if( my_divisor ) {
-            if( !my_array[my_head] )
-                // TODO: consider new ideas with my_array for both affinity and static partitioner's, then code reuse
-                t.set_affinity( affinity_id(my_head / factor + 1) );
-            else
-                t.set_affinity( my_array[my_head] );
-        }
-    }
-    void note_affinity( task::affinity_id id ) {
-        if( my_divisor )
-            my_array[my_head] = id;
-    }
-};
-
-//! Backward-compatible partition for auto and affinity partition objects.
-class old_auto_partition_type: public tbb::internal::partition_type_base {
-    size_t num_chunks;
-    static const size_t VICTIM_CHUNKS = 4;
-public:
-    bool should_execute_range(const task &t) {
-        if( num_chunks<VICTIM_CHUNKS && t.is_stolen_task() )
-            num_chunks = VICTIM_CHUNKS;
-        return num_chunks==1;
-    }
-    old_auto_partition_type( const auto_partitioner& )
-      : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
-    old_auto_partition_type( const affinity_partitioner& )
-      : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
-    old_auto_partition_type( old_auto_partition_type& pt, split ) {
-        num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u;
-    }
-};
-
-} // namespace interfaceX::internal
-//! @endcond
-} // namespace interfaceX
-
-//! A simple partitioner
-/** Divides the range until the range is not divisible.
-    @ingroup algorithms */
-class simple_partitioner {
-public:
-    simple_partitioner() {}
-private:
-    template<typename Range, typename Body, typename Partitioner> friend class serial::interface9::start_for;
-    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_for;
-    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_reduce;
-    template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
-    // backward compatibility
-    class partition_type: public internal::partition_type_base {
-    public:
-        bool should_execute_range(const task& ) {return false;}
-        partition_type( const simple_partitioner& ) {}
-        partition_type( const partition_type&, split ) {}
-    };
-    // new implementation just extends existing interface
-    typedef interface9::internal::simple_partition_type task_partition_type;
-
-    // TODO: consider to make split_type public
-    typedef interface9::internal::simple_partition_type::split_type split_type;
-};
-
-//! An auto partitioner
-/** The range is initial divided into several large chunks.
-    Chunks are further subdivided into smaller pieces if demand detected and they are divisible.
-    @ingroup algorithms */
-class auto_partitioner {
-public:
-    auto_partitioner() {}
-
-private:
-    template<typename Range, typename Body, typename Partitioner> friend class serial::interface9::start_for;
-    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_for;
-    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_reduce;
-    template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
-    // backward compatibility
-    typedef interface9::internal::old_auto_partition_type partition_type;
-    // new implementation just extends existing interface
-    typedef interface9::internal::auto_partition_type task_partition_type;
-
-    // TODO: consider to make split_type public
-    typedef interface9::internal::auto_partition_type::split_type split_type;
-};
-
-//! A static partitioner
-class static_partitioner {
-public:
-    static_partitioner() {}
-private:
-    template<typename Range, typename Body, typename Partitioner> friend class serial::interface9::start_for;
-    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_for;
-    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_reduce;
-    template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
-    // backward compatibility
-    typedef interface9::internal::old_auto_partition_type partition_type;
-    // new implementation just extends existing interface
-    typedef interface9::internal::static_partition_type task_partition_type;
-
-    // TODO: consider to make split_type public
-    typedef interface9::internal::static_partition_type::split_type split_type;
-};
-
-//! An affinity partitioner
-class affinity_partitioner: internal::affinity_partitioner_base_v3 {
-public:
-    affinity_partitioner() {}
-
-private:
-    template<typename Range, typename Body, typename Partitioner> friend class serial::interface9::start_for;
-    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_for;
-    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_reduce;
-    template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
-    // backward compatibility - for parallel_scan only
-    typedef interface9::internal::old_auto_partition_type partition_type;
-    // new implementation just extends existing interface
-    typedef interface9::internal::affinity_partition_type task_partition_type;
-
-    // TODO: consider to make split_type public
-    typedef interface9::internal::affinity_partition_type::split_type split_type;
-};
-
-} // namespace tbb
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-    #pragma warning (pop)
-#endif // warning 4244 is back
-#undef __TBB_INITIAL_CHUNKS
-#undef __TBB_RANGE_POOL_CAPACITY
-#undef __TBB_INIT_DEPTH
-#endif /* __TBB_partitioner_H */
diff --git a/lib/3rdParty/tbb/include/tbb/pipeline.h b/lib/3rdParty/tbb/include/tbb/pipeline.h
deleted file mode 100644
index 20a8ec9b..00000000
--- a/lib/3rdParty/tbb/include/tbb/pipeline.h
+++ /dev/null
@@ -1,665 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_pipeline_H
-#define __TBB_pipeline_H
-
-#include "atomic.h"
-#include "task.h"
-#include "tbb_allocator.h"
-#include <cstddef>
-
-#if __TBB_CPP11_TYPE_PROPERTIES_PRESENT || __TBB_TR1_TYPE_PROPERTIES_IN_STD_PRESENT
-#include <type_traits>
-#endif
-
-namespace tbb {
-
-class pipeline;
-class filter;
-
-//! @cond INTERNAL
-namespace internal {
-
-// The argument for PIPELINE_VERSION should be an integer between 2 and 9
-#define __TBB_PIPELINE_VERSION(x) ((unsigned char)(x-2)<<1)
-
-typedef unsigned long Token;
-typedef long tokendiff_t;
-class stage_task;
-class input_buffer;
-class pipeline_root_task;
-class pipeline_cleaner;
-
-} // namespace internal
-
-namespace interface6 {
-    template<typename T, typename U> class filter_t;
-
-    namespace internal {
-        class pipeline_proxy;
-    }
-}
-
-//! @endcond
-
-//! A stage in a pipeline.
-/** @ingroup algorithms */
-class filter: internal::no_copy {
-private:
-    //! Value used to mark "not in pipeline"
-    static filter* not_in_pipeline() {return reinterpret_cast<filter*>(intptr_t(-1));}
-protected:
-    //! The lowest bit 0 is for parallel vs. serial
-    static const unsigned char filter_is_serial = 0x1;
-
-    //! 4th bit distinguishes ordered vs unordered filters.
-    /** The bit was not set for parallel filters in TBB 2.1 and earlier,
-        but is_ordered() function always treats parallel filters as out of order. */
-    static const unsigned char filter_is_out_of_order = 0x1<<4;
-
-    //! 5th bit distinguishes thread-bound and regular filters.
-    static const unsigned char filter_is_bound = 0x1<<5;
-
-    //! 6th bit marks input filters emitting small objects
-    static const unsigned char filter_may_emit_null = 0x1<<6;
-
-    //! 7th bit defines exception propagation mode expected by the application.
-    static const unsigned char exact_exception_propagation =
-#if TBB_USE_CAPTURED_EXCEPTION
-            0x0;
-#else
-            0x1<<7;
-#endif /* TBB_USE_CAPTURED_EXCEPTION */
-
-    static const unsigned char current_version = __TBB_PIPELINE_VERSION(5);
-    static const unsigned char version_mask = 0x7<<1; // bits 1-3 are for version
-public:
-    enum mode {
-        //! processes multiple items in parallel and in no particular order
-        parallel = current_version | filter_is_out_of_order,
-        //! processes items one at a time; all such filters process items in the same order
-        serial_in_order = current_version | filter_is_serial,
-        //! processes items one at a time and in no particular order
-        serial_out_of_order = current_version | filter_is_serial | filter_is_out_of_order,
-        //! @deprecated use serial_in_order instead
-        serial = serial_in_order
-    };
-protected:
-    explicit filter( bool is_serial_ ) :
-        next_filter_in_pipeline(not_in_pipeline()),
-        my_input_buffer(NULL),
-        my_filter_mode(static_cast<unsigned char>((is_serial_ ? serial : parallel) | exact_exception_propagation)),
-        prev_filter_in_pipeline(not_in_pipeline()),
-        my_pipeline(NULL),
-        next_segment(NULL)
-    {}
-
-    explicit filter( mode filter_mode ) :
-        next_filter_in_pipeline(not_in_pipeline()),
-        my_input_buffer(NULL),
-        my_filter_mode(static_cast<unsigned char>(filter_mode | exact_exception_propagation)),
-        prev_filter_in_pipeline(not_in_pipeline()),
-        my_pipeline(NULL),
-        next_segment(NULL)
-    {}
-
-    // signal end-of-input for concrete_filters
-    void __TBB_EXPORTED_METHOD set_end_of_input();
-
-public:
-    //! True if filter is serial.
-    bool is_serial() const {
-        return bool( my_filter_mode & filter_is_serial );
-    }
-
-    //! True if filter must receive stream in order.
-    bool is_ordered() const {
-        return (my_filter_mode & (filter_is_out_of_order|filter_is_serial))==filter_is_serial;
-    }
-
-    //! True if filter is thread-bound.
-    bool is_bound() const {
-        return ( my_filter_mode & filter_is_bound )==filter_is_bound;
-    }
-
-    //! true if an input filter can emit null
-    bool object_may_be_null() {
-        return ( my_filter_mode & filter_may_emit_null ) == filter_may_emit_null;
-    }
-
-    //! Operate on an item from the input stream, and return item for output stream.
-    /** Returns NULL if filter is a sink. */
-    virtual void* operator()( void* item ) = 0;
-
-    //! Destroy filter.
-    /** If the filter was added to a pipeline, the pipeline must be destroyed first. */
-    virtual __TBB_EXPORTED_METHOD ~filter();
-
-#if __TBB_TASK_GROUP_CONTEXT
-    //! Destroys item if pipeline was cancelled.
-    /** Required to prevent memory leaks.
-        Note it can be called concurrently even for serial filters.*/
-    virtual void finalize( void* /*item*/ ) {};
-#endif
-
-private:
-    //! Pointer to next filter in the pipeline.
-    filter* next_filter_in_pipeline;
-
-    //! has the filter not yet processed all the tokens it will ever see?
-    //  (pipeline has not yet reached end_of_input or this filter has not yet
-    //  seen the last token produced by input_filter)
-    bool has_more_work();
-
-    //! Buffer for incoming tokens, or NULL if not required.
-    /** The buffer is required if the filter is serial or follows a thread-bound one. */
-    internal::input_buffer* my_input_buffer;
-
-    friend class internal::stage_task;
-    friend class internal::pipeline_root_task;
-    friend class pipeline;
-    friend class thread_bound_filter;
-
-    //! Storage for filter mode and dynamically checked implementation version.
-    const unsigned char my_filter_mode;
-
-    //! Pointer to previous filter in the pipeline.
-    filter* prev_filter_in_pipeline;
-
-    //! Pointer to the pipeline.
-    pipeline* my_pipeline;
-
-    //! Pointer to the next "segment" of filters, or NULL if not required.
-    /** In each segment, the first filter is not thread-bound but follows a thread-bound one. */
-    filter* next_segment;
-};
-
-//! A stage in a pipeline served by a user thread.
-/** @ingroup algorithms */
-class thread_bound_filter: public filter {
-public:
-    enum result_type {
-        // item was processed
-        success,
-        // item is currently not available
-        item_not_available,
-        // there are no more items to process
-        end_of_stream
-    };
-protected:
-    explicit thread_bound_filter(mode filter_mode):
-         filter(static_cast<mode>(filter_mode | filter::filter_is_bound))
-    {
-        __TBB_ASSERT(filter_mode & filter::filter_is_serial, "thread-bound filters must be serial");
-    }
-public:
-    //! If a data item is available, invoke operator() on that item.
-    /** This interface is non-blocking.
-        Returns 'success' if an item was processed.
-        Returns 'item_not_available' if no item can be processed now
-        but more may arrive in the future, or if token limit is reached.
-        Returns 'end_of_stream' if there are no more items to process. */
-    result_type __TBB_EXPORTED_METHOD try_process_item();
-
-    //! Wait until a data item becomes available, and invoke operator() on that item.
-    /** This interface is blocking.
-        Returns 'success' if an item was processed.
-        Returns 'end_of_stream' if there are no more items to process.
-        Never returns 'item_not_available', as it blocks until another return condition applies. */
-    result_type __TBB_EXPORTED_METHOD process_item();
-
-private:
-    //! Internal routine for item processing
-    result_type internal_process_item(bool is_blocking);
-};
-
-//! A processing pipeline that applies filters to items.
-/** @ingroup algorithms */
-class pipeline {
-public:
-    //! Construct empty pipeline.
-    __TBB_EXPORTED_METHOD pipeline();
-
-    /** Though the current implementation declares the destructor virtual, do not rely on this
-        detail.  The virtualness is deprecated and may disappear in future versions of TBB. */
-    virtual __TBB_EXPORTED_METHOD ~pipeline();
-
-    //! Add filter to end of pipeline.
-    void __TBB_EXPORTED_METHOD add_filter( filter& filter_ );
-
-    //! Run the pipeline to completion.
-    void __TBB_EXPORTED_METHOD run( size_t max_number_of_live_tokens );
-
-#if __TBB_TASK_GROUP_CONTEXT
-    //! Run the pipeline to completion with user-supplied context.
-    void __TBB_EXPORTED_METHOD run( size_t max_number_of_live_tokens, tbb::task_group_context& context );
-#endif
-
-    //! Remove all filters from the pipeline.
-    void __TBB_EXPORTED_METHOD clear();
-
-private:
-    friend class internal::stage_task;
-    friend class internal::pipeline_root_task;
-    friend class filter;
-    friend class thread_bound_filter;
-    friend class internal::pipeline_cleaner;
-    friend class tbb::interface6::internal::pipeline_proxy;
-
-    //! Pointer to first filter in the pipeline.
-    filter* filter_list;
-
-    //! Pointer to location where address of next filter to be added should be stored.
-    filter* filter_end;
-
-    //! task who's reference count is used to determine when all stages are done.
-    task* end_counter;
-
-    //! Number of idle tokens waiting for input stage.
-    atomic<internal::Token> input_tokens;
-
-    //! Global counter of tokens
-    atomic<internal::Token> token_counter;
-
-    //! False until fetch_input returns NULL.
-    bool end_of_input;
-
-    //! True if the pipeline contains a thread-bound filter; false otherwise.
-    bool has_thread_bound_filters;
-
-    //! Remove filter from pipeline.
-    void remove_filter( filter& filter_ );
-
-    //! Not used, but retained to satisfy old export files.
-    void __TBB_EXPORTED_METHOD inject_token( task& self );
-
-#if __TBB_TASK_GROUP_CONTEXT
-    //! Does clean up if pipeline is cancelled or exception occurred
-    void clear_filters();
-#endif
-};
-
-//------------------------------------------------------------------------
-// Support for lambda-friendly parallel_pipeline interface
-//------------------------------------------------------------------------
-
-namespace interface6 {
-
-namespace internal {
-    template<typename T, typename U, typename Body> class concrete_filter;
-}
-
-//! input_filter control to signal end-of-input for parallel_pipeline
-class flow_control {
-    bool is_pipeline_stopped;
-    flow_control() { is_pipeline_stopped = false; }
-    template<typename T, typename U, typename Body> friend class internal::concrete_filter;
-public:
-    void stop() { is_pipeline_stopped = true; }
-};
-
-//! @cond INTERNAL
-namespace internal {
-
-template<typename T> struct tbb_large_object {enum { value = sizeof(T) > sizeof(void *) }; };
-
-// Obtain type properties in one or another way
-#if   __TBB_CPP11_TYPE_PROPERTIES_PRESENT
-template<typename T> struct tbb_trivially_copyable { enum { value = std::is_trivially_copyable<T>::value }; };
-#elif __TBB_TR1_TYPE_PROPERTIES_IN_STD_PRESENT
-template<typename T> struct tbb_trivially_copyable { enum { value = std::has_trivial_copy_constructor<T>::value }; };
-#else
-// Explicitly list the types we wish to be placed as-is in the pipeline input_buffers.
-template<typename T> struct tbb_trivially_copyable { enum { value = false }; };
-template<typename T> struct tbb_trivially_copyable <T*> { enum { value = true }; };
-template<> struct tbb_trivially_copyable <short> { enum { value = true }; };
-template<> struct tbb_trivially_copyable <unsigned short> { enum { value = true }; };
-template<> struct tbb_trivially_copyable <int> { enum { value = !tbb_large_object<int>::value }; };
-template<> struct tbb_trivially_copyable <unsigned int> { enum { value = !tbb_large_object<int>::value }; };
-template<> struct tbb_trivially_copyable <long> { enum { value = !tbb_large_object<long>::value }; };
-template<> struct tbb_trivially_copyable <unsigned long> { enum { value = !tbb_large_object<long>::value }; };
-template<> struct tbb_trivially_copyable <float> { enum { value = !tbb_large_object<float>::value }; };
-template<> struct tbb_trivially_copyable <double> { enum { value = !tbb_large_object<double>::value }; };
-#endif // Obtaining type properties
-
-template<typename T> struct is_large_object {enum { value = tbb_large_object<T>::value || !tbb_trivially_copyable<T>::value }; };
-
-template<typename T, bool> class token_helper;
-
-// large object helper (uses tbb_allocator)
-template<typename T>
-class token_helper<T, true> {
-    public:
-    typedef typename tbb::tbb_allocator<T> allocator;
-    typedef T* pointer;
-    typedef T value_type;
-    static pointer create_token(const value_type & source) {
-        pointer output_t = allocator().allocate(1);
-        return new (output_t) T(source);
-    }
-    static value_type & token(pointer & t) { return *t;}
-    static void * cast_to_void_ptr(pointer ref) { return (void *) ref; }
-    static pointer cast_from_void_ptr(void * ref) { return (pointer)ref; }
-    static void destroy_token(pointer token) {
-        allocator().destroy(token);
-        allocator().deallocate(token,1);
-    }
-};
-
-// pointer specialization
-template<typename T>
-class token_helper<T*, false > {
-    public:
-    typedef T* pointer;
-    typedef T* value_type;
-    static pointer create_token(const value_type & source) { return source; }
-    static value_type & token(pointer & t) { return t;}
-    static void * cast_to_void_ptr(pointer ref) { return (void *)ref; }
-    static pointer cast_from_void_ptr(void * ref) { return (pointer)ref; }
-    static void destroy_token( pointer /*token*/) {}
-};
-
-// small object specialization (converts void* to the correct type, passes objects directly.)
-template<typename T>
-class token_helper<T, false> {
-    typedef union {
-        T actual_value;
-        void * void_overlay;
-    } type_to_void_ptr_map;
-    public:
-    typedef T pointer;  // not really a pointer in this case.
-    typedef T value_type;
-    static pointer create_token(const value_type & source) {
-        return source; }
-    static value_type & token(pointer & t) { return t;}
-    static void * cast_to_void_ptr(pointer ref) {
-        type_to_void_ptr_map mymap;
-        mymap.void_overlay = NULL;
-        mymap.actual_value = ref;
-        return mymap.void_overlay;
-    }
-    static pointer cast_from_void_ptr(void * ref) {
-        type_to_void_ptr_map mymap;
-        mymap.void_overlay = ref;
-        return mymap.actual_value;
-    }
-    static void destroy_token( pointer /*token*/) {}
-};
-
-template<typename T, typename U, typename Body>
-class concrete_filter: public tbb::filter {
-    const Body& my_body;
-    typedef token_helper<T,is_large_object<T>::value > t_helper;
-    typedef typename t_helper::pointer t_pointer;
-    typedef token_helper<U,is_large_object<U>::value > u_helper;
-    typedef typename u_helper::pointer u_pointer;
-
-    void* operator()(void* input) __TBB_override {
-        t_pointer temp_input = t_helper::cast_from_void_ptr(input);
-        u_pointer output_u = u_helper::create_token(my_body(t_helper::token(temp_input)));
-        t_helper::destroy_token(temp_input);
-        return u_helper::cast_to_void_ptr(output_u);
-    }
-
-    void finalize(void * input) __TBB_override {
-        t_pointer temp_input = t_helper::cast_from_void_ptr(input);
-        t_helper::destroy_token(temp_input);
-    }
-
-public:
-    concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filter(filter_mode), my_body(body) {}
-};
-
-// input
-template<typename U, typename Body>
-class concrete_filter<void,U,Body>: public filter {
-    const Body& my_body;
-    typedef token_helper<U, is_large_object<U>::value > u_helper;
-    typedef typename u_helper::pointer u_pointer;
-
-    void* operator()(void*) __TBB_override {
-        flow_control control;
-        u_pointer output_u = u_helper::create_token(my_body(control));
-        if(control.is_pipeline_stopped) {
-            u_helper::destroy_token(output_u);
-            set_end_of_input();
-            return NULL;
-        }
-        return u_helper::cast_to_void_ptr(output_u);
-    }
-
-public:
-    concrete_filter(tbb::filter::mode filter_mode, const Body& body) :
-        filter(static_cast<tbb::filter::mode>(filter_mode | filter_may_emit_null)),
-        my_body(body)
-    {}
-};
-
-template<typename T, typename Body>
-class concrete_filter<T,void,Body>: public filter {
-    const Body& my_body;
-    typedef token_helper<T, is_large_object<T>::value > t_helper;
-    typedef typename t_helper::pointer t_pointer;
-
-    void* operator()(void* input) __TBB_override {
-        t_pointer temp_input = t_helper::cast_from_void_ptr(input);
-        my_body(t_helper::token(temp_input));
-        t_helper::destroy_token(temp_input);
-        return NULL;
-    }
-    void finalize(void* input) __TBB_override {
-        t_pointer temp_input = t_helper::cast_from_void_ptr(input);
-        t_helper::destroy_token(temp_input);
-    }
-
-public:
-    concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filter(filter_mode), my_body(body) {}
-};
-
-template<typename Body>
-class concrete_filter<void,void,Body>: public filter {
-    const Body& my_body;
-
-    /** Override privately because it is always called virtually */
-    void* operator()(void*) __TBB_override {
-        flow_control control;
-        my_body(control);
-        void* output = control.is_pipeline_stopped ? NULL : (void*)(intptr_t)-1;
-        return output;
-    }
-public:
-    concrete_filter(filter::mode filter_mode, const Body& body) : filter(filter_mode), my_body(body) {}
-};
-
-//! The class that represents an object of the pipeline for parallel_pipeline().
-/** It primarily serves as RAII class that deletes heap-allocated filter instances. */
-class pipeline_proxy {
-    tbb::pipeline my_pipe;
-public:
-    pipeline_proxy( const filter_t<void,void>& filter_chain );
-    ~pipeline_proxy() {
-        while( filter* f = my_pipe.filter_list )
-            delete f; // filter destructor removes it from the pipeline
-    }
-    tbb::pipeline* operator->() { return &my_pipe; }
-};
-
-//! Abstract base class that represents a node in a parse tree underlying a filter_t.
-/** These nodes are always heap-allocated and can be shared by filter_t objects. */
-class filter_node: tbb::internal::no_copy {
-    /** Count must be atomic because it is hidden state for user, but might be shared by threads. */
-    tbb::atomic<intptr_t> ref_count;
-protected:
-    filter_node() {
-        ref_count = 0;
-#ifdef __TBB_TEST_FILTER_NODE_COUNT
-        ++(__TBB_TEST_FILTER_NODE_COUNT);
-#endif
-    }
-public:
-    //! Add concrete_filter to pipeline
-    virtual void add_to( pipeline& ) = 0;
-    //! Increment reference count
-    void add_ref() {++ref_count;}
-    //! Decrement reference count and delete if it becomes zero.
-    void remove_ref() {
-        __TBB_ASSERT(ref_count>0,"ref_count underflow");
-        if( --ref_count==0 )
-            delete this;
-    }
-    virtual ~filter_node() {
-#ifdef __TBB_TEST_FILTER_NODE_COUNT
-        --(__TBB_TEST_FILTER_NODE_COUNT);
-#endif
-    }
-};
-
-//! Node in parse tree representing result of make_filter.
-template<typename T, typename U, typename Body>
-class filter_node_leaf: public filter_node  {
-    const tbb::filter::mode mode;
-    const Body body;
-    void add_to( pipeline& p ) __TBB_override {
-        concrete_filter<T,U,Body>* f = new concrete_filter<T,U,Body>(mode,body);
-        p.add_filter( *f );
-    }
-public:
-    filter_node_leaf( tbb::filter::mode m, const Body& b ) : mode(m), body(b) {}
-};
-
-//! Node in parse tree representing join of two filters.
-class filter_node_join: public filter_node {
-    friend class filter_node; // to suppress GCC 3.2 warnings
-    filter_node& left;
-    filter_node& right;
-    ~filter_node_join() {
-       left.remove_ref();
-       right.remove_ref();
-    }
-    void add_to( pipeline& p ) __TBB_override {
-        left.add_to(p);
-        right.add_to(p);
-    }
-public:
-    filter_node_join( filter_node& x, filter_node& y ) : left(x), right(y) {
-       left.add_ref();
-       right.add_ref();
-    }
-};
-
-} // namespace internal
-//! @endcond
-
-//! Create a filter to participate in parallel_pipeline
-template<typename T, typename U, typename Body>
-filter_t<T,U> make_filter(tbb::filter::mode mode, const Body& body) {
-    return new internal::filter_node_leaf<T,U,Body>(mode, body);
-}
-
-template<typename T, typename V, typename U>
-filter_t<T,U> operator& (const filter_t<T,V>& left, const filter_t<V,U>& right) {
-    __TBB_ASSERT(left.root,"cannot use default-constructed filter_t as left argument of '&'");
-    __TBB_ASSERT(right.root,"cannot use default-constructed filter_t as right argument of '&'");
-    return new internal::filter_node_join(*left.root,*right.root);
-}
-
-//! Class representing a chain of type-safe pipeline filters
-template<typename T, typename U>
-class filter_t {
-    typedef internal::filter_node filter_node;
-    filter_node* root;
-    filter_t( filter_node* root_ ) : root(root_) {
-        root->add_ref();
-    }
-    friend class internal::pipeline_proxy;
-    template<typename T_, typename U_, typename Body>
-    friend filter_t<T_,U_> make_filter(tbb::filter::mode, const Body& );
-    template<typename T_, typename V_, typename U_>
-    friend filter_t<T_,U_> operator& (const filter_t<T_,V_>& , const filter_t<V_,U_>& );
-public:
-    // TODO: add move-constructors, move-assignment, etc. where C++11 is available.
-    filter_t() : root(NULL) {}
-    filter_t( const filter_t<T,U>& rhs ) : root(rhs.root) {
-        if( root ) root->add_ref();
-    }
-    template<typename Body>
-    filter_t( tbb::filter::mode mode, const Body& body ) :
-        root( new internal::filter_node_leaf<T,U,Body>(mode, body) ) {
-        root->add_ref();
-    }
-
-    void operator=( const filter_t<T,U>& rhs ) {
-        // Order of operations below carefully chosen so that reference counts remain correct
-        // in unlikely event that remove_ref throws exception.
-        filter_node* old = root;
-        root = rhs.root;
-        if( root ) root->add_ref();
-        if( old ) old->remove_ref();
-    }
-    ~filter_t() {
-        if( root ) root->remove_ref();
-    }
-    void clear() {
-        // Like operator= with filter_t() on right side.
-        if( root ) {
-            filter_node* old = root;
-            root = NULL;
-            old->remove_ref();
-        }
-    }
-};
-
-inline internal::pipeline_proxy::pipeline_proxy( const filter_t<void,void>& filter_chain ) : my_pipe() {
-    __TBB_ASSERT( filter_chain.root, "cannot apply parallel_pipeline to default-constructed filter_t"  );
-    filter_chain.root->add_to(my_pipe);
-}
-
-inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter_t<void,void>& filter_chain
-#if __TBB_TASK_GROUP_CONTEXT
-    , tbb::task_group_context& context
-#endif
-    ) {
-    internal::pipeline_proxy pipe(filter_chain);
-    // tbb::pipeline::run() is called via the proxy
-    pipe->run(max_number_of_live_tokens
-#if __TBB_TASK_GROUP_CONTEXT
-              , context
-#endif
-    );
-}
-
-#if __TBB_TASK_GROUP_CONTEXT
-inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter_t<void,void>& filter_chain) {
-    tbb::task_group_context context;
-    parallel_pipeline(max_number_of_live_tokens, filter_chain, context);
-}
-#endif // __TBB_TASK_GROUP_CONTEXT
-
-} // interface6
-
-using interface6::flow_control;
-using interface6::filter_t;
-using interface6::make_filter;
-using interface6::parallel_pipeline;
-
-} // tbb
-
-#endif /* __TBB_pipeline_H */
diff --git a/lib/3rdParty/tbb/include/tbb/queuing_mutex.h b/lib/3rdParty/tbb/include/tbb/queuing_mutex.h
deleted file mode 100644
index 0fe4b3ea..00000000
--- a/lib/3rdParty/tbb/include/tbb/queuing_mutex.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_queuing_mutex_H
-#define __TBB_queuing_mutex_H
-
-#include "tbb_config.h"
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
-    #pragma warning (push)
-    #pragma warning (disable: 4530)
-#endif
-
-#include <cstring>
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    #pragma warning (pop)
-#endif
-
-#include "atomic.h"
-#include "tbb_profiling.h"
-
-namespace tbb {
-
-//! Queuing mutex with local-only spinning.
-/** @ingroup synchronization */
-class queuing_mutex : internal::mutex_copy_deprecated_and_disabled {
-public:
-    //! Construct unacquired mutex.
-    queuing_mutex() {
-        q_tail = NULL;
-#if TBB_USE_THREADING_TOOLS
-        internal_construct();
-#endif
-    }
-
-    //! The scoped locking pattern
-    /** It helps to avoid the common problem of forgetting to release lock.
-        It also nicely provides the "node" for queuing locks. */
-    class scoped_lock: internal::no_copy {
-        //! Initialize fields to mean "no lock held".
-        void initialize() {
-            mutex = NULL;
-#if TBB_USE_ASSERT
-            internal::poison_pointer(next);
-#endif /* TBB_USE_ASSERT */
-        }
-
-    public:
-        //! Construct lock that has not acquired a mutex.
-        /** Equivalent to zero-initialization of *this. */
-        scoped_lock() {initialize();}
-
-        //! Acquire lock on given mutex.
-        scoped_lock( queuing_mutex& m ) {
-            initialize();
-            acquire(m);
-        }
-
-        //! Release lock (if lock is held).
-        ~scoped_lock() {
-            if( mutex ) release();
-        }
-
-        //! Acquire lock on given mutex.
-        void __TBB_EXPORTED_METHOD acquire( queuing_mutex& m );
-
-        //! Acquire lock on given mutex if free (i.e. non-blocking)
-        bool __TBB_EXPORTED_METHOD try_acquire( queuing_mutex& m );
-
-        //! Release lock.
-        void __TBB_EXPORTED_METHOD release();
-
-    private:
-        //! The pointer to the mutex owned, or NULL if not holding a mutex.
-        queuing_mutex* mutex;
-
-        //! The pointer to the next competitor for a mutex
-        scoped_lock *next;
-
-        //! The local spin-wait variable
-        /** Inverted (0 - blocked, 1 - acquired the mutex) for the sake of
-            zero-initialization.  Defining it as an entire word instead of
-            a byte seems to help performance slightly. */
-        uintptr_t going;
-    };
-
-    void __TBB_EXPORTED_METHOD internal_construct();
-
-    // Mutex traits
-    static const bool is_rw_mutex = false;
-    static const bool is_recursive_mutex = false;
-    static const bool is_fair_mutex = true;
-
-private:
-    //! The last competitor requesting the lock
-    atomic<scoped_lock*> q_tail;
-
-};
-
-__TBB_DEFINE_PROFILING_SET_NAME(queuing_mutex)
-
-} // namespace tbb
-
-#endif /* __TBB_queuing_mutex_H */
diff --git a/lib/3rdParty/tbb/include/tbb/queuing_rw_mutex.h b/lib/3rdParty/tbb/include/tbb/queuing_rw_mutex.h
deleted file mode 100644
index e0224ed5..00000000
--- a/lib/3rdParty/tbb/include/tbb/queuing_rw_mutex.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_queuing_rw_mutex_H
-#define __TBB_queuing_rw_mutex_H
-
-#include "tbb_config.h"
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
-    #pragma warning (push)
-    #pragma warning (disable: 4530)
-#endif
-
-#include <cstring>
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    #pragma warning (pop)
-#endif
-
-#include "atomic.h"
-#include "tbb_profiling.h"
-
-namespace tbb {
-
-//! Queuing reader-writer mutex with local-only spinning.
-/** Adapted from Krieger, Stumm, et al. pseudocode at
-    http://www.eecg.toronto.edu/parallel/pubs_abs.html#Krieger_etal_ICPP93
-    @ingroup synchronization */
-class queuing_rw_mutex : internal::mutex_copy_deprecated_and_disabled {
-public:
-    //! Construct unacquired mutex.
-    queuing_rw_mutex() {
-        q_tail = NULL;
-#if TBB_USE_THREADING_TOOLS
-        internal_construct();
-#endif
-    }
-
-    //! Destructor asserts if the mutex is acquired, i.e. q_tail is non-NULL
-    ~queuing_rw_mutex() {
-#if TBB_USE_ASSERT
-        __TBB_ASSERT( !q_tail, "destruction of an acquired mutex");
-#endif
-    }
-
-    //! The scoped locking pattern
-    /** It helps to avoid the common problem of forgetting to release lock.
-        It also nicely provides the "node" for queuing locks. */
-    class scoped_lock: internal::no_copy {
-        //! Initialize fields to mean "no lock held".
-        void initialize() {
-            my_mutex = NULL;
-#if TBB_USE_ASSERT
-            my_state = 0xFF; // Set to invalid state
-            internal::poison_pointer(my_next);
-            internal::poison_pointer(my_prev);
-#endif /* TBB_USE_ASSERT */
-        }
-
-    public:
-        //! Construct lock that has not acquired a mutex.
-        /** Equivalent to zero-initialization of *this. */
-        scoped_lock() {initialize();}
-
-        //! Acquire lock on given mutex.
-        scoped_lock( queuing_rw_mutex& m, bool write=true ) {
-            initialize();
-            acquire(m,write);
-        }
-
-        //! Release lock (if lock is held).
-        ~scoped_lock() {
-            if( my_mutex ) release();
-        }
-
-        //! Acquire lock on given mutex.
-        void acquire( queuing_rw_mutex& m, bool write=true );
-
-        //! Acquire lock on given mutex if free (i.e. non-blocking)
-        bool try_acquire( queuing_rw_mutex& m, bool write=true );
-
-        //! Release lock.
-        void release();
-
-        //! Upgrade reader to become a writer.
-        /** Returns whether the upgrade happened without releasing and re-acquiring the lock */
-        bool upgrade_to_writer();
-
-        //! Downgrade writer to become a reader.
-        bool downgrade_to_reader();
-
-    private:
-        //! The pointer to the mutex owned, or NULL if not holding a mutex.
-        queuing_rw_mutex* my_mutex;
-
-        //! The pointer to the previous and next competitors for a mutex
-        scoped_lock *__TBB_atomic my_prev, *__TBB_atomic my_next;
-
-        typedef unsigned char state_t;
-
-        //! State of the request: reader, writer, active reader, other service states
-        atomic<state_t> my_state;
-
-        //! The local spin-wait variable
-        /** Corresponds to "spin" in the pseudocode but inverted for the sake of zero-initialization */
-        unsigned char __TBB_atomic my_going;
-
-        //! A tiny internal lock
-        unsigned char my_internal_lock;
-
-        //! Acquire the internal lock
-        void acquire_internal_lock();
-
-        //! Try to acquire the internal lock
-        /** Returns true if lock was successfully acquired. */
-        bool try_acquire_internal_lock();
-
-        //! Release the internal lock
-        void release_internal_lock();
-
-        //! Wait for internal lock to be released
-        void wait_for_release_of_internal_lock();
-
-        //! A helper function
-        void unblock_or_wait_on_internal_lock( uintptr_t );
-    };
-
-    void __TBB_EXPORTED_METHOD internal_construct();
-
-    // Mutex traits
-    static const bool is_rw_mutex = true;
-    static const bool is_recursive_mutex = false;
-    static const bool is_fair_mutex = true;
-
-private:
-    //! The last competitor requesting the lock
-    atomic<scoped_lock*> q_tail;
-
-};
-
-__TBB_DEFINE_PROFILING_SET_NAME(queuing_rw_mutex)
-
-} // namespace tbb
-
-#endif /* __TBB_queuing_rw_mutex_H */
diff --git a/lib/3rdParty/tbb/include/tbb/reader_writer_lock.h b/lib/3rdParty/tbb/include/tbb/reader_writer_lock.h
deleted file mode 100644
index 353beec5..00000000
--- a/lib/3rdParty/tbb/include/tbb/reader_writer_lock.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_reader_writer_lock_H
-#define __TBB_reader_writer_lock_H
-
-#include "tbb_thread.h"
-#include "tbb_allocator.h"
-#include "atomic.h"
-
-namespace tbb {
-namespace interface5 {
-//! Writer-preference reader-writer lock with local-only spinning on readers.
-/** Loosely adapted from Mellor-Crummey and Scott pseudocode at
-    http://www.cs.rochester.edu/research/synchronization/pseudocode/rw.html#s_wp
-    @ingroup synchronization */
-    class reader_writer_lock : tbb::internal::no_copy {
- public:
-    friend class scoped_lock;
-    friend class scoped_lock_read;
-    //! Status type for nodes associated with lock instances
-    /** waiting_nonblocking: the wait state for nonblocking lock
-          instances; for writes, these transition straight to active
-          states; for reads, these are unused.
-
-        waiting: the start and spin state for all lock instances; these will
-          transition to active state when appropriate.  Non-blocking write locks
-          transition from this state to waiting_nonblocking immediately.
-
-        active: the active state means that the lock instance holds
-          the lock; it will transition to invalid state during node deletion
-
-        invalid: the end state for all nodes; this is set in the
-          destructor so if we encounter this state, we are looking at
-          memory that has already been freed
-
-        The state diagrams below describe the status transitions.
-        Single arrows indicate that the thread that owns the node is
-        responsible for the transition; double arrows indicate that
-        any thread could make the transition.
-
-        State diagram for scoped_lock status:
-
-        waiting ----------> waiting_nonblocking
-          |     _____________/       |
-          V    V                     V
-        active -----------------> invalid
-
-        State diagram for scoped_lock_read status:
-
-        waiting
-          |
-          V
-        active ----------------->invalid
-
-    */
-    enum status_t { waiting_nonblocking, waiting, active, invalid };
-
-    //! Constructs a new reader_writer_lock
-    reader_writer_lock() {
-        internal_construct();
-    }
-
-    //! Destructs a reader_writer_lock object
-    ~reader_writer_lock() {
-        internal_destroy();
-    }
-
-    //! The scoped lock pattern for write locks
-    /** Scoped locks help avoid the common problem of forgetting to release the lock.
-        This type also serves as the node for queuing locks. */
-    class scoped_lock : tbb::internal::no_copy {
-    public:
-        friend class reader_writer_lock;
-
-        //! Construct with blocking attempt to acquire write lock on the passed-in lock
-        scoped_lock(reader_writer_lock& lock) {
-            internal_construct(lock);
-        }
-
-        //! Destructor, releases the write lock
-        ~scoped_lock() {
-            internal_destroy();
-        }
-
-        void* operator new(size_t s) {
-            return tbb::internal::allocate_via_handler_v3(s);
-        }
-        void operator delete(void* p) {
-            tbb::internal::deallocate_via_handler_v3(p);
-        }
-
-    private:
-        //! The pointer to the mutex to lock
-        reader_writer_lock *mutex;
-        //! The next queued competitor for the mutex
-        scoped_lock* next;
-        //! Status flag of the thread associated with this node
-        atomic<status_t> status;
-
-        //! Construct scoped_lock that is not holding lock
-        scoped_lock();
-
-        void __TBB_EXPORTED_METHOD internal_construct(reader_writer_lock&);
-        void __TBB_EXPORTED_METHOD internal_destroy();
-   };
-
-    //! The scoped lock pattern for read locks
-    class scoped_lock_read : tbb::internal::no_copy {
-    public:
-        friend class reader_writer_lock;
-
-        //! Construct with blocking attempt to acquire read lock on the passed-in lock
-        scoped_lock_read(reader_writer_lock& lock) {
-            internal_construct(lock);
-        }
-
-        //! Destructor, releases the read lock
-        ~scoped_lock_read() {
-            internal_destroy();
-        }
-
-        void* operator new(size_t s) {
-            return tbb::internal::allocate_via_handler_v3(s);
-        }
-        void operator delete(void* p) {
-            tbb::internal::deallocate_via_handler_v3(p);
-        }
-
-    private:
-        //! The pointer to the mutex to lock
-        reader_writer_lock *mutex;
-        //! The next queued competitor for the mutex
-        scoped_lock_read *next;
-        //! Status flag of the thread associated with this node
-        atomic<status_t> status;
-
-        //! Construct scoped_lock_read that is not holding lock
-        scoped_lock_read();
-
-        void __TBB_EXPORTED_METHOD internal_construct(reader_writer_lock&);
-        void __TBB_EXPORTED_METHOD internal_destroy();
-    };
-
-    //! Acquires the reader_writer_lock for write.
-    /** If the lock is currently held in write mode by another
-        context, the writer will block by spinning on a local
-        variable.  Exceptions thrown: improper_lock The context tries
-        to acquire a reader_writer_lock that it already has write
-        ownership of.*/
-    void __TBB_EXPORTED_METHOD lock();
-
-    //! Tries to acquire the reader_writer_lock for write.
-    /** This function does not block.  Return Value: True or false,
-        depending on whether the lock is acquired or not.  If the lock
-        is already held by this acquiring context, try_lock() returns
-        false. */
-    bool __TBB_EXPORTED_METHOD try_lock();
-
-    //! Acquires the reader_writer_lock for read.
-    /** If the lock is currently held by a writer, this reader will
-        block and wait until the writers are done.  Exceptions thrown:
-        improper_lock The context tries to acquire a
-        reader_writer_lock that it already has write ownership of. */
-    void __TBB_EXPORTED_METHOD lock_read();
-
-    //! Tries to acquire the reader_writer_lock for read.
-    /** This function does not block.  Return Value: True or false,
-        depending on whether the lock is acquired or not.  */
-    bool __TBB_EXPORTED_METHOD try_lock_read();
-
-    //! Releases the reader_writer_lock
-    void __TBB_EXPORTED_METHOD unlock();
-
- private:
-    void __TBB_EXPORTED_METHOD internal_construct();
-    void __TBB_EXPORTED_METHOD internal_destroy();
-
-    //! Attempts to acquire write lock
-    /** If unavailable, spins in blocking case, returns false in non-blocking case. */
-    bool start_write(scoped_lock *);
-    //! Sets writer_head to w and attempts to unblock
-    void set_next_writer(scoped_lock *w);
-    //! Relinquishes write lock to next waiting writer or group of readers
-    void end_write(scoped_lock *);
-    //! Checks if current thread holds write lock
-    bool is_current_writer();
-
-    //! Attempts to acquire read lock
-    /** If unavailable, spins in blocking case, returns false in non-blocking case. */
-    void start_read(scoped_lock_read *);
-    //! Unblocks pending readers
-    void unblock_readers();
-    //! Relinquishes read lock by decrementing counter; last reader wakes pending writer
-    void end_read();
-
-    //! The list of pending readers
-    atomic<scoped_lock_read*> reader_head;
-    //! The list of pending writers
-    atomic<scoped_lock*> writer_head;
-    //! The last node in the list of pending writers
-    atomic<scoped_lock*> writer_tail;
-    //! Writer that owns the mutex; tbb_thread::id() otherwise.
-    tbb_thread::id my_current_writer;
-    //! Status of mutex
-    atomic<uintptr_t> rdr_count_and_flags; // used with __TBB_AtomicOR, which assumes uintptr_t
-};
-
-} // namespace interface5
-
-using interface5::reader_writer_lock;
-
-} // namespace tbb
-
-#endif /* __TBB_reader_writer_lock_H */
diff --git a/lib/3rdParty/tbb/include/tbb/recursive_mutex.h b/lib/3rdParty/tbb/include/tbb/recursive_mutex.h
deleted file mode 100644
index 5a23c097..00000000
--- a/lib/3rdParty/tbb/include/tbb/recursive_mutex.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_recursive_mutex_H
-#define __TBB_recursive_mutex_H
-
-#if _WIN32||_WIN64
-#include "machine/windows_api.h"
-#else
-#include <pthread.h>
-#endif /* _WIN32||_WIN64 */
-
-#include <new>
-#include "aligned_space.h"
-#include "tbb_stddef.h"
-#include "tbb_profiling.h"
-
-namespace tbb {
-//! Mutex that allows recursive mutex acquisition.
-/** Mutex that allows recursive mutex acquisition.
-    @ingroup synchronization */
-class recursive_mutex : internal::mutex_copy_deprecated_and_disabled {
-public:
-    //! Construct unacquired recursive_mutex.
-    recursive_mutex() {
-#if TBB_USE_ASSERT || TBB_USE_THREADING_TOOLS
-        internal_construct();
-#else
-  #if _WIN32||_WIN64
-        InitializeCriticalSectionEx(&impl, 4000, 0);
-  #else
-        pthread_mutexattr_t mtx_attr;
-        int error_code = pthread_mutexattr_init( &mtx_attr );
-        if( error_code )
-            tbb::internal::handle_perror(error_code,"recursive_mutex: pthread_mutexattr_init failed");
-
-        pthread_mutexattr_settype( &mtx_attr, PTHREAD_MUTEX_RECURSIVE );
-        error_code = pthread_mutex_init( &impl, &mtx_attr );
-        if( error_code )
-            tbb::internal::handle_perror(error_code,"recursive_mutex: pthread_mutex_init failed");
-
-        pthread_mutexattr_destroy( &mtx_attr );
-  #endif /* _WIN32||_WIN64*/
-#endif /* TBB_USE_ASSERT */
-    };
-
-    ~recursive_mutex() {
-#if TBB_USE_ASSERT
-        internal_destroy();
-#else
-  #if _WIN32||_WIN64
-        DeleteCriticalSection(&impl);
-  #else
-        pthread_mutex_destroy(&impl);
-
-  #endif /* _WIN32||_WIN64 */
-#endif /* TBB_USE_ASSERT */
-    };
-
-    class scoped_lock;
-    friend class scoped_lock;
-
-    //! The scoped locking pattern
-    /** It helps to avoid the common problem of forgetting to release lock.
-        It also nicely provides the "node" for queuing locks. */
-    class scoped_lock: internal::no_copy {
-    public:
-        //! Construct lock that has not acquired a recursive_mutex.
-        scoped_lock() : my_mutex(NULL) {};
-
-        //! Acquire lock on given mutex.
-        scoped_lock( recursive_mutex& mutex ) {
-#if TBB_USE_ASSERT
-            my_mutex = &mutex;
-#endif /* TBB_USE_ASSERT */
-            acquire( mutex );
-        }
-
-        //! Release lock (if lock is held).
-        ~scoped_lock() {
-            if( my_mutex )
-                release();
-        }
-
-        //! Acquire lock on given mutex.
-        void acquire( recursive_mutex& mutex ) {
-#if TBB_USE_ASSERT
-            internal_acquire( mutex );
-#else
-            my_mutex = &mutex;
-            mutex.lock();
-#endif /* TBB_USE_ASSERT */
-        }
-
-        //! Try acquire lock on given recursive_mutex.
-        bool try_acquire( recursive_mutex& mutex ) {
-#if TBB_USE_ASSERT
-            return internal_try_acquire( mutex );
-#else
-            bool result = mutex.try_lock();
-            if( result )
-                my_mutex = &mutex;
-            return result;
-#endif /* TBB_USE_ASSERT */
-        }
-
-        //! Release lock
-        void release() {
-#if TBB_USE_ASSERT
-            internal_release();
-#else
-            my_mutex->unlock();
-            my_mutex = NULL;
-#endif /* TBB_USE_ASSERT */
-        }
-
-    private:
-        //! The pointer to the current recursive_mutex to work
-        recursive_mutex* my_mutex;
-
-        //! All checks from acquire using mutex.state were moved here
-        void __TBB_EXPORTED_METHOD internal_acquire( recursive_mutex& m );
-
-        //! All checks from try_acquire using mutex.state were moved here
-        bool __TBB_EXPORTED_METHOD internal_try_acquire( recursive_mutex& m );
-
-        //! All checks from release using mutex.state were moved here
-        void __TBB_EXPORTED_METHOD internal_release();
-
-        friend class recursive_mutex;
-    };
-
-    // Mutex traits
-    static const bool is_rw_mutex = false;
-    static const bool is_recursive_mutex = true;
-    static const bool is_fair_mutex = false;
-
-    // C++0x compatibility interface
-
-    //! Acquire lock
-    void lock() {
-#if TBB_USE_ASSERT
-        aligned_space<scoped_lock> tmp;
-        new(tmp.begin()) scoped_lock(*this);
-#else
-  #if _WIN32||_WIN64
-        EnterCriticalSection(&impl);
-  #else
-        int error_code = pthread_mutex_lock(&impl);
-        if( error_code )
-            tbb::internal::handle_perror(error_code,"recursive_mutex: pthread_mutex_lock failed");
-  #endif /* _WIN32||_WIN64 */
-#endif /* TBB_USE_ASSERT */
-    }
-
-    //! Try acquiring lock (non-blocking)
-    /** Return true if lock acquired; false otherwise. */
-    bool try_lock() {
-#if TBB_USE_ASSERT
-        aligned_space<scoped_lock> tmp;
-        return (new(tmp.begin()) scoped_lock)->internal_try_acquire(*this);
-#else
-  #if _WIN32||_WIN64
-        return TryEnterCriticalSection(&impl)!=0;
-  #else
-        return pthread_mutex_trylock(&impl)==0;
-  #endif /* _WIN32||_WIN64 */
-#endif /* TBB_USE_ASSERT */
-    }
-
-    //! Release lock
-    void unlock() {
-#if TBB_USE_ASSERT
-        aligned_space<scoped_lock> tmp;
-        scoped_lock& s = *tmp.begin();
-        s.my_mutex = this;
-        s.internal_release();
-#else
-  #if _WIN32||_WIN64
-        LeaveCriticalSection(&impl);
-  #else
-        pthread_mutex_unlock(&impl);
-  #endif /* _WIN32||_WIN64 */
-#endif /* TBB_USE_ASSERT */
-    }
-
-    //! Return native_handle
-  #if _WIN32||_WIN64
-    typedef LPCRITICAL_SECTION native_handle_type;
-  #else
-    typedef pthread_mutex_t* native_handle_type;
-  #endif
-    native_handle_type native_handle() { return (native_handle_type) &impl; }
-
-private:
-#if _WIN32||_WIN64
-    CRITICAL_SECTION impl;
-    enum state_t {
-        INITIALIZED=0x1234,
-        DESTROYED=0x789A,
-    } state;
-#else
-    pthread_mutex_t impl;
-#endif /* _WIN32||_WIN64 */
-
-    //! All checks from mutex constructor using mutex.state were moved here
-    void __TBB_EXPORTED_METHOD internal_construct();
-
-    //! All checks from mutex destructor using mutex.state were moved here
-    void __TBB_EXPORTED_METHOD internal_destroy();
-};
-
-__TBB_DEFINE_PROFILING_SET_NAME(recursive_mutex)
-
-} // namespace tbb
-
-#endif /* __TBB_recursive_mutex_H */
diff --git a/lib/3rdParty/tbb/include/tbb/runtime_loader.h b/lib/3rdParty/tbb/include/tbb/runtime_loader.h
deleted file mode 100644
index df284645..00000000
--- a/lib/3rdParty/tbb/include/tbb/runtime_loader.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_runtime_loader_H
-#define __TBB_runtime_loader_H
-
-#if ! TBB_PREVIEW_RUNTIME_LOADER
-    #error Set TBB_PREVIEW_RUNTIME_LOADER to include runtime_loader.h
-#endif
-
-#include "tbb_stddef.h"
-#include <climits>
-
-#if _MSC_VER
-    #if ! __TBB_NO_IMPLICIT_LINKAGE
-        #ifdef _DEBUG
-            #pragma comment( linker, "/nodefaultlib:tbb_debug.lib" )
-            #pragma comment( linker, "/defaultlib:tbbproxy_debug.lib" )
-        #else
-            #pragma comment( linker, "/nodefaultlib:tbb.lib" )
-            #pragma comment( linker, "/defaultlib:tbbproxy.lib" )
-        #endif
-    #endif
-#endif
-
-namespace tbb {
-
-namespace interface6 {
-
-//! Load TBB at runtime.
-/*!
-
-\b Usage:
-
-In source code:
-
-\code
-#include "tbb/runtime_loader.h"
-
-char const * path[] = { "<install dir>/lib/ia32", NULL };
-tbb::runtime_loader loader( path );
-
-// Now use TBB.
-\endcode
-
-Link with \c tbbproxy.lib (or \c libtbbproxy.a) instead of \c tbb.lib (\c libtbb.dylib,
-\c libtbb.so).
-
-TBB library will be loaded at runtime from \c <install dir>/lib/ia32 directory.
-
-\b Attention:
-
-All \c runtime_loader objects (in the same module, i.e. exe or dll) share some global state.
-The most noticeable piece of global state is loaded TBB library.
-There are some implications:
-
-    -   Only one TBB library can be loaded per module.
-
-    -   If one object has already loaded TBB library, another object will not load TBB.
-        If the loaded TBB library is suitable for the second object, both will use TBB
-        cooperatively, otherwise the second object will report an error.
-
-    -   \c runtime_loader objects will not work (correctly) in parallel due to absence of
-        synchronization.
-
-*/
-
-class runtime_loader : tbb::internal::no_copy {
-
-    public:
-
-        //! Error mode constants.
-        enum error_mode {
-            em_status,     //!< Save status of operation and continue.
-            em_throw,      //!< Throw an exception of tbb::runtime_loader::error_code type.
-            em_abort       //!< Print message to \c stderr and call \c abort().
-        }; // error_mode
-
-        //! Error codes.
-        enum error_code {
-            ec_ok,         //!< No errors.
-            ec_bad_call,   //!< Invalid function call (e. g. load() called when TBB is already loaded).
-            ec_bad_arg,    //!< Invalid argument passed.
-            ec_bad_lib,    //!< Invalid library found (e. g. \c TBB_runtime_version symbol not found).
-            ec_bad_ver,    //!< TBB found but version is not suitable.
-            ec_no_lib      //!< No suitable TBB library found.
-        }; // error_code
-
-        //! Initialize object but do not load TBB.
-        runtime_loader( error_mode mode = em_abort );
-
-        //! Initialize object and load TBB.
-        /*!
-            See load() for details.
-
-            If error mode is \c em_status, call status() to check whether TBB was loaded or not.
-        */
-        runtime_loader(
-            char const * path[],                           //!< List of directories to search TBB in.
-            int          min_ver = TBB_INTERFACE_VERSION,  //!< Minimal suitable version of TBB.
-            int          max_ver = INT_MAX,                //!< Maximal suitable version of TBB.
-            error_mode   mode    = em_abort                //!< Error mode for this object.
-        );
-
-        //! Destroy object.
-        ~runtime_loader();
-
-        //! Load TBB.
-        /*!
-            The method searches the directories specified in \c path[] array for the TBB library.
-            When the library is found, it is loaded and its version is checked. If the version is
-            not suitable, the library is unloaded, and the search continues.
-
-            \b Note:
-
-            For security reasons, avoid using relative directory names. For example, never load
-            TBB from current (\c "."), parent (\c "..") or any other relative directory (like
-            \c "lib" ). Use only absolute directory names (e. g. "/usr/local/lib").
-
-            For the same security reasons, avoid using system default directories (\c "") on
-            Windows. (See http://www.microsoft.com/technet/security/advisory/2269637.mspx for
-            details.)
-
-            Neglecting these rules may cause your program to execute 3-rd party malicious code.
-
-            \b Errors:
-                -   \c ec_bad_call - TBB already loaded by this object.
-                -   \c ec_bad_arg - \p min_ver and/or \p max_ver negative or zero,
-                    or \p min_ver > \p max_ver.
-                -   \c ec_bad_ver - TBB of unsuitable version already loaded by another object.
-                -   \c ec_no_lib - No suitable library found.
-        */
-        error_code
-        load(
-            char const * path[],                           //!< List of directories to search TBB in.
-            int          min_ver = TBB_INTERFACE_VERSION,  //!< Minimal suitable version of TBB.
-            int          max_ver = INT_MAX                 //!< Maximal suitable version of TBB.
-
-        );
-
-
-        //! Report status.
-        /*!
-            If error mode is \c em_status, the function returns status of the last operation.
-        */
-        error_code status();
-
-    private:
-
-        error_mode const my_mode;
-        error_code       my_status;
-        bool             my_loaded;
-
-}; // class runtime_loader
-
-} // namespace interface6
-
-using interface6::runtime_loader;
-
-} // namespace tbb
-
-#endif /* __TBB_runtime_loader_H */
-
diff --git a/lib/3rdParty/tbb/include/tbb/scalable_allocator.h b/lib/3rdParty/tbb/include/tbb/scalable_allocator.h
deleted file mode 100644
index c2a81493..00000000
--- a/lib/3rdParty/tbb/include/tbb/scalable_allocator.h
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_scalable_allocator_H
-#define __TBB_scalable_allocator_H
-/** @file */
-
-#include <stddef.h> /* Need ptrdiff_t and size_t from here. */
-#if !_MSC_VER
-#include <stdint.h> /* Need intptr_t from here. */
-#endif
-
-#if !defined(__cplusplus) && __ICC==1100
-    #pragma warning (push)
-    #pragma warning (disable: 991)
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#if _MSC_VER >= 1400
-#define __TBB_EXPORTED_FUNC   __cdecl
-#else
-#define __TBB_EXPORTED_FUNC
-#endif
-
-/** The "malloc" analogue to allocate block of memory of size bytes.
-  * @ingroup memory_allocation */
-void * __TBB_EXPORTED_FUNC scalable_malloc (size_t size);
-
-/** The "free" analogue to discard a previously allocated piece of memory.
-    @ingroup memory_allocation */
-void   __TBB_EXPORTED_FUNC scalable_free (void* ptr);
-
-/** The "realloc" analogue complementing scalable_malloc.
-    @ingroup memory_allocation */
-void * __TBB_EXPORTED_FUNC scalable_realloc (void* ptr, size_t size);
-
-/** The "calloc" analogue complementing scalable_malloc.
-    @ingroup memory_allocation */
-void * __TBB_EXPORTED_FUNC scalable_calloc (size_t nobj, size_t size);
-
-/** The "posix_memalign" analogue.
-    @ingroup memory_allocation */
-int __TBB_EXPORTED_FUNC scalable_posix_memalign (void** memptr, size_t alignment, size_t size);
-
-/** The "_aligned_malloc" analogue.
-    @ingroup memory_allocation */
-void * __TBB_EXPORTED_FUNC scalable_aligned_malloc (size_t size, size_t alignment);
-
-/** The "_aligned_realloc" analogue.
-    @ingroup memory_allocation */
-void * __TBB_EXPORTED_FUNC scalable_aligned_realloc (void* ptr, size_t size, size_t alignment);
-
-/** The "_aligned_free" analogue.
-    @ingroup memory_allocation */
-void __TBB_EXPORTED_FUNC scalable_aligned_free (void* ptr);
-
-/** The analogue of _msize/malloc_size/malloc_usable_size.
-    Returns the usable size of a memory block previously allocated by scalable_*,
-    or 0 (zero) if ptr does not point to such a block.
-    @ingroup memory_allocation */
-size_t __TBB_EXPORTED_FUNC scalable_msize (void* ptr);
-
-/* Results for scalable_allocation_* functions */
-typedef enum {
-    TBBMALLOC_OK,
-    TBBMALLOC_INVALID_PARAM,
-    TBBMALLOC_UNSUPPORTED,
-    TBBMALLOC_NO_MEMORY,
-    TBBMALLOC_NO_EFFECT
-} ScalableAllocationResult;
-
-/* Setting TBB_MALLOC_USE_HUGE_PAGES environment variable to 1 enables huge pages.
-   scalable_allocation_mode call has priority over environment variable. */
-typedef enum {
-    TBBMALLOC_USE_HUGE_PAGES,  /* value turns using huge pages on and off */
-    /* deprecated, kept for backward compatibility only */
-    USE_HUGE_PAGES = TBBMALLOC_USE_HUGE_PAGES,
-    /* try to limit memory consumption value Bytes, clean internal buffers
-       if limit is exceeded, but not prevents from requesting memory from OS */
-    TBBMALLOC_SET_SOFT_HEAP_LIMIT
-} AllocationModeParam;
-
-/** Set TBB allocator-specific allocation modes.
-    @ingroup memory_allocation */
-int __TBB_EXPORTED_FUNC scalable_allocation_mode(int param, intptr_t value);
-
-typedef enum {
-    /* Clean internal allocator buffers for all threads.
-       Returns TBBMALLOC_NO_EFFECT if no buffers cleaned,
-       TBBMALLOC_OK if some memory released from buffers. */
-    TBBMALLOC_CLEAN_ALL_BUFFERS,
-    /* Clean internal allocator buffer for current thread only.
-       Return values same as for TBBMALLOC_CLEAN_ALL_BUFFERS. */
-    TBBMALLOC_CLEAN_THREAD_BUFFERS
-} ScalableAllocationCmd;
-
-/** Call TBB allocator-specific commands.
-    @ingroup memory_allocation */
-int __TBB_EXPORTED_FUNC scalable_allocation_command(int cmd, void *param);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif /* __cplusplus */
-
-#ifdef __cplusplus
-
-//! The namespace rml contains components of low-level memory pool interface.
-namespace rml {
-class MemoryPool;
-
-typedef void *(*rawAllocType)(intptr_t pool_id, size_t &bytes);
-// returns non-zero in case of error
-typedef int   (*rawFreeType)(intptr_t pool_id, void* raw_ptr, size_t raw_bytes);
-
-/*
-MemPoolPolicy extension must be compatible with such structure fields layout
-
-struct MemPoolPolicy {
-    rawAllocType pAlloc;
-    rawFreeType  pFree;
-    size_t       granularity;   // granularity of pAlloc allocations
-};
-*/
-
-struct MemPoolPolicy {
-    enum {
-        TBBMALLOC_POOL_VERSION = 1
-    };
-
-    rawAllocType pAlloc;
-    rawFreeType  pFree;
-                 // granularity of pAlloc allocations. 0 means default used.
-    size_t       granularity;
-    int          version;
-                 // all memory consumed at 1st pAlloc call and never returned,
-                 // no more pAlloc calls after 1st
-    unsigned     fixedPool : 1,
-                 // memory consumed but returned only at pool termination
-                 keepAllMemory : 1,
-                 reserved : 30;
-
-    MemPoolPolicy(rawAllocType pAlloc_, rawFreeType pFree_,
-                  size_t granularity_ = 0, bool fixedPool_ = false,
-                  bool keepAllMemory_ = false) :
-        pAlloc(pAlloc_), pFree(pFree_), granularity(granularity_), version(TBBMALLOC_POOL_VERSION),
-        fixedPool(fixedPool_), keepAllMemory(keepAllMemory_),
-        reserved(0) {}
-};
-
-// enums have same values as appropriate enums from ScalableAllocationResult
-// TODO: use ScalableAllocationResult in pool_create directly
-enum MemPoolError {
-    // pool created successfully
-    POOL_OK = TBBMALLOC_OK,
-    // invalid policy parameters found
-    INVALID_POLICY = TBBMALLOC_INVALID_PARAM,
-     // requested pool policy is not supported by allocator library
-    UNSUPPORTED_POLICY = TBBMALLOC_UNSUPPORTED,
-    // lack of memory during pool creation
-    NO_MEMORY = TBBMALLOC_NO_MEMORY,
-    // action takes no effect
-    NO_EFFECT = TBBMALLOC_NO_EFFECT
-};
-
-MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy,
-                            rml::MemoryPool **pool);
-
-bool  pool_destroy(MemoryPool* memPool);
-void *pool_malloc(MemoryPool* memPool, size_t size);
-void *pool_realloc(MemoryPool* memPool, void *object, size_t size);
-void *pool_aligned_malloc(MemoryPool* mPool, size_t size, size_t alignment);
-void *pool_aligned_realloc(MemoryPool* mPool, void *ptr, size_t size, size_t alignment);
-bool  pool_reset(MemoryPool* memPool);
-bool  pool_free(MemoryPool *memPool, void *object);
-MemoryPool *pool_identify(void *object);
-}
-
-#include <new>      /* To use new with the placement argument */
-
-/* Ensure that including this header does not cause implicit linkage with TBB */
-#ifndef __TBB_NO_IMPLICIT_LINKAGE
-    #define __TBB_NO_IMPLICIT_LINKAGE 1
-    #include "tbb_stddef.h"
-    #undef  __TBB_NO_IMPLICIT_LINKAGE
-#else
-    #include "tbb_stddef.h"
-#endif
-
-#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
- #include <utility> // std::forward
-#endif
-
-namespace tbb {
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    // Workaround for erroneous "unreferenced parameter" warning in method destroy.
-    #pragma warning (push)
-    #pragma warning (disable: 4100)
-#endif
-
-//! @cond INTERNAL
-namespace internal {
-
-#if TBB_USE_EXCEPTIONS
-// forward declaration is for inlining prevention
-template<typename E> __TBB_NOINLINE( void throw_exception(const E &e) );
-#endif
-
-// keep throw in a separate function to prevent code bloat
-template<typename E>
-void throw_exception(const E &e) {
-    __TBB_THROW(e);
-}
-
-} // namespace internal
-//! @endcond
-
-//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
-/** The members are ordered the same way they are in section 20.4.1
-    of the ISO C++ standard.
-    @ingroup memory_allocation */
-template<typename T>
-class scalable_allocator {
-public:
-    typedef typename internal::allocator_type<T>::value_type value_type;
-    typedef value_type* pointer;
-    typedef const value_type* const_pointer;
-    typedef value_type& reference;
-    typedef const value_type& const_reference;
-    typedef size_t size_type;
-    typedef ptrdiff_t difference_type;
-    template<class U> struct rebind {
-        typedef scalable_allocator<U> other;
-    };
-
-    scalable_allocator() throw() {}
-    scalable_allocator( const scalable_allocator& ) throw() {}
-    template<typename U> scalable_allocator(const scalable_allocator<U>&) throw() {}
-
-    pointer address(reference x) const {return &x;}
-    const_pointer address(const_reference x) const {return &x;}
-
-    //! Allocate space for n objects.
-    pointer allocate( size_type n, const void* /*hint*/ =0 ) {
-        pointer p = static_cast<pointer>( scalable_malloc( n * sizeof(value_type) ) );
-        if (!p)
-            internal::throw_exception(std::bad_alloc());
-        return p;
-    }
-
-    //! Free previously allocated block of memory
-    void deallocate( pointer p, size_type ) {
-        scalable_free( p );
-    }
-
-    //! Largest value for which method allocate might succeed.
-    size_type max_size() const throw() {
-        size_type absolutemax = static_cast<size_type>(-1) / sizeof (value_type);
-        return (absolutemax > 0 ? absolutemax : 1);
-    }
-#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
-    template<typename U, typename... Args>
-    void construct(U *p, Args&&... args)
-        { ::new((void *)p) U(std::forward<Args>(args)...); }
-#else /* __TBB_ALLOCATOR_CONSTRUCT_VARIADIC */
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    void construct( pointer p, value_type&& value ) { ::new((void*)(p)) value_type( std::move( value ) ); }
-#endif
-    void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);}
-#endif /* __TBB_ALLOCATOR_CONSTRUCT_VARIADIC */
-    void destroy( pointer p ) {p->~value_type();}
-};
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    #pragma warning (pop)
-#endif /* warning 4100 is back */
-
-//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
-/** @ingroup memory_allocation */
-template<>
-class scalable_allocator<void> {
-public:
-    typedef void* pointer;
-    typedef const void* const_pointer;
-    typedef void value_type;
-    template<class U> struct rebind {
-        typedef scalable_allocator<U> other;
-    };
-};
-
-template<typename T, typename U>
-inline bool operator==( const scalable_allocator<T>&, const scalable_allocator<U>& ) {return true;}
-
-template<typename T, typename U>
-inline bool operator!=( const scalable_allocator<T>&, const scalable_allocator<U>& ) {return false;}
-
-} // namespace tbb
-
-#if _MSC_VER
-    #if (__TBB_BUILD || __TBBMALLOC_BUILD) && !defined(__TBBMALLOC_NO_IMPLICIT_LINKAGE)
-        #define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1
-    #endif
-
-    #if !__TBBMALLOC_NO_IMPLICIT_LINKAGE
-        #ifdef _DEBUG
-            #pragma comment(lib, "tbbmalloc_debug.lib")
-        #else
-            #pragma comment(lib, "tbbmalloc.lib")
-        #endif
-    #endif
-
-
-#endif
-
-#endif /* __cplusplus */
-
-#if !defined(__cplusplus) && __ICC==1100
-    #pragma warning (pop)
-#endif /* ICC 11.0 warning 991 is back */
-
-#endif /* __TBB_scalable_allocator_H */
diff --git a/lib/3rdParty/tbb/include/tbb/spin_mutex.h b/lib/3rdParty/tbb/include/tbb/spin_mutex.h
deleted file mode 100644
index 99ef15c2..00000000
--- a/lib/3rdParty/tbb/include/tbb/spin_mutex.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_spin_mutex_H
-#define __TBB_spin_mutex_H
-
-#include <cstddef>
-#include <new>
-#include "aligned_space.h"
-#include "tbb_stddef.h"
-#include "tbb_machine.h"
-#include "tbb_profiling.h"
-#include "internal/_mutex_padding.h"
-
-namespace tbb {
-
-//! A lock that occupies a single byte.
-/** A spin_mutex is a spin mutex that fits in a single byte.
-    It should be used only for locking short critical sections
-    (typically less than 20 instructions) when fairness is not an issue.
-    If zero-initialized, the mutex is considered unheld.
-    @ingroup synchronization */
-class spin_mutex : internal::mutex_copy_deprecated_and_disabled {
-    //! 0 if lock is released, 1 if lock is acquired.
-    __TBB_atomic_flag flag;
-
-public:
-    //! Construct unacquired lock.
-    /** Equivalent to zero-initialization of *this. */
-    spin_mutex() : flag(0) {
-#if TBB_USE_THREADING_TOOLS
-        internal_construct();
-#endif
-    }
-
-    //! Represents acquisition of a mutex.
-    class scoped_lock : internal::no_copy {
-    private:
-        //! Points to currently held mutex, or NULL if no lock is held.
-        spin_mutex* my_mutex;
-
-        //! Value to store into spin_mutex::flag to unlock the mutex.
-        /** This variable is no longer used. Instead, 0 and 1 are used to
-            represent that the lock is free and acquired, respectively.
-            We keep the member variable here to ensure backward compatibility */
-        __TBB_Flag my_unlock_value;
-
-        //! Like acquire, but with ITT instrumentation.
-        void __TBB_EXPORTED_METHOD internal_acquire( spin_mutex& m );
-
-        //! Like try_acquire, but with ITT instrumentation.
-        bool __TBB_EXPORTED_METHOD internal_try_acquire( spin_mutex& m );
-
-        //! Like release, but with ITT instrumentation.
-        void __TBB_EXPORTED_METHOD internal_release();
-
-        friend class spin_mutex;
-
-    public:
-        //! Construct without acquiring a mutex.
-        scoped_lock() : my_mutex(NULL), my_unlock_value(0) {}
-
-        //! Construct and acquire lock on a mutex.
-        scoped_lock( spin_mutex& m ) : my_unlock_value(0) {
-            internal::suppress_unused_warning(my_unlock_value);
-#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
-            my_mutex=NULL;
-            internal_acquire(m);
-#else
-            my_mutex=&m;
-            __TBB_LockByte(m.flag);
-#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT*/
-        }
-
-        //! Acquire lock.
-        void acquire( spin_mutex& m ) {
-#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
-            internal_acquire(m);
-#else
-            my_mutex = &m;
-            __TBB_LockByte(m.flag);
-#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT*/
-        }
-
-        //! Try acquiring lock (non-blocking)
-        /** Return true if lock acquired; false otherwise. */
-        bool try_acquire( spin_mutex& m ) {
-#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
-            return internal_try_acquire(m);
-#else
-            bool result = __TBB_TryLockByte(m.flag);
-            if( result )
-                my_mutex = &m;
-            return result;
-#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT*/
-        }
-
-        //! Release lock
-        void release() {
-#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
-            internal_release();
-#else
-            __TBB_UnlockByte(my_mutex->flag);
-            my_mutex = NULL;
-#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
-        }
-
-        //! Destroy lock.  If holding a lock, releases the lock first.
-        ~scoped_lock() {
-            if( my_mutex ) {
-#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
-                internal_release();
-#else
-                __TBB_UnlockByte(my_mutex->flag);
-#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
-            }
-        }
-    };
-
-    //! Internal constructor with ITT instrumentation.
-    void __TBB_EXPORTED_METHOD internal_construct();
-
-    // Mutex traits
-    static const bool is_rw_mutex = false;
-    static const bool is_recursive_mutex = false;
-    static const bool is_fair_mutex = false;
-
-    // ISO C++0x compatibility methods
-
-    //! Acquire lock
-    void lock() {
-#if TBB_USE_THREADING_TOOLS
-        aligned_space<scoped_lock> tmp;
-        new(tmp.begin()) scoped_lock(*this);
-#else
-        __TBB_LockByte(flag);
-#endif /* TBB_USE_THREADING_TOOLS*/
-    }
-
-    //! Try acquiring lock (non-blocking)
-    /** Return true if lock acquired; false otherwise. */
-    bool try_lock() {
-#if TBB_USE_THREADING_TOOLS
-        aligned_space<scoped_lock> tmp;
-        return (new(tmp.begin()) scoped_lock)->internal_try_acquire(*this);
-#else
-        return __TBB_TryLockByte(flag);
-#endif /* TBB_USE_THREADING_TOOLS*/
-    }
-
-    //! Release lock
-    void unlock() {
-#if TBB_USE_THREADING_TOOLS
-        aligned_space<scoped_lock> tmp;
-        scoped_lock& s = *tmp.begin();
-        s.my_mutex = this;
-        s.internal_release();
-#else
-        __TBB_UnlockByte(flag);
-#endif /* TBB_USE_THREADING_TOOLS */
-    }
-
-    friend class scoped_lock;
-}; // end of spin_mutex
-
-__TBB_DEFINE_PROFILING_SET_NAME(spin_mutex)
-
-} // namespace tbb
-
-#if ( __TBB_x86_32 || __TBB_x86_64 )
-#include "internal/_x86_eliding_mutex_impl.h"
-#endif
-
-namespace tbb {
-//! A cross-platform spin mutex with speculative lock acquisition.
-/** On platforms with proper HW support, this lock may speculatively execute
-    its critical sections, using HW mechanisms to detect real data races and
-    ensure atomicity of the critical sections. In particular, it uses
-    Intel(R) Transactional Synchronization Extensions (Intel(R) TSX).
-    Without such HW support, it behaves like a spin_mutex.
-    It should be used for locking short critical sections where the lock is
-    contended but the data it protects are not.  If zero-initialized, the
-    mutex is considered unheld.
-    @ingroup synchronization */
-
-#if ( __TBB_x86_32 || __TBB_x86_64 )
-typedef interface7::internal::padded_mutex<interface7::internal::x86_eliding_mutex,false> speculative_spin_mutex;
-#else
-typedef interface7::internal::padded_mutex<spin_mutex,false> speculative_spin_mutex;
-#endif
-__TBB_DEFINE_PROFILING_SET_NAME(speculative_spin_mutex)
-
-} // namespace tbb
-
-#endif /* __TBB_spin_mutex_H */
diff --git a/lib/3rdParty/tbb/include/tbb/spin_rw_mutex.h b/lib/3rdParty/tbb/include/tbb/spin_rw_mutex.h
deleted file mode 100644
index b20f4ebd..00000000
--- a/lib/3rdParty/tbb/include/tbb/spin_rw_mutex.h
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_spin_rw_mutex_H
-#define __TBB_spin_rw_mutex_H
-
-#include "tbb_stddef.h"
-#include "tbb_machine.h"
-#include "tbb_profiling.h"
-#include "internal/_mutex_padding.h"
-
-namespace tbb {
-
-#if __TBB_TSX_AVAILABLE
-namespace interface8 { namespace internal {
-    class x86_rtm_rw_mutex;
-}}
-#endif
-
-class spin_rw_mutex_v3;
-typedef spin_rw_mutex_v3 spin_rw_mutex;
-
-//! Fast, unfair, spinning reader-writer lock with backoff and writer-preference
-/** @ingroup synchronization */
-class spin_rw_mutex_v3 : internal::mutex_copy_deprecated_and_disabled {
-    //! @cond INTERNAL
-
-    //! Internal acquire write lock.
-    bool __TBB_EXPORTED_METHOD internal_acquire_writer();
-
-    //! Out of line code for releasing a write lock.
-    /** This code has debug checking and instrumentation for Intel(R) Thread Checker and Intel(R) Thread Profiler. */
-    void __TBB_EXPORTED_METHOD internal_release_writer();
-
-    //! Internal acquire read lock.
-    void __TBB_EXPORTED_METHOD internal_acquire_reader();
-
-    //! Internal upgrade reader to become a writer.
-    bool __TBB_EXPORTED_METHOD internal_upgrade();
-
-    //! Out of line code for downgrading a writer to a reader.
-    /** This code has debug checking and instrumentation for Intel(R) Thread Checker and Intel(R) Thread Profiler. */
-    void __TBB_EXPORTED_METHOD internal_downgrade();
-
-    //! Internal release read lock.
-    void __TBB_EXPORTED_METHOD internal_release_reader();
-
-    //! Internal try_acquire write lock.
-    bool __TBB_EXPORTED_METHOD internal_try_acquire_writer();
-
-    //! Internal try_acquire read lock.
-    bool __TBB_EXPORTED_METHOD internal_try_acquire_reader();
-
-    //! @endcond
-public:
-    //! Construct unacquired mutex.
-    spin_rw_mutex_v3() : state(0) {
-#if TBB_USE_THREADING_TOOLS
-        internal_construct();
-#endif
-    }
-
-#if TBB_USE_ASSERT
-    //! Destructor asserts if the mutex is acquired, i.e. state is zero.
-    ~spin_rw_mutex_v3() {
-        __TBB_ASSERT( !state, "destruction of an acquired mutex");
-    };
-#endif /* TBB_USE_ASSERT */
-
-    //! The scoped locking pattern
-    /** It helps to avoid the common problem of forgetting to release lock.
-        It also nicely provides the "node" for queuing locks. */
-    class scoped_lock : internal::no_copy {
-#if __TBB_TSX_AVAILABLE
-        friend class tbb::interface8::internal::x86_rtm_rw_mutex;
-        // helper methods for x86_rtm_rw_mutex
-        spin_rw_mutex *internal_get_mutex() const { return mutex; }
-        void internal_set_mutex(spin_rw_mutex* m) { mutex = m; }
-#endif
-    public:
-        //! Construct lock that has not acquired a mutex.
-        /** Equivalent to zero-initialization of *this. */
-        scoped_lock() : mutex(NULL), is_writer(false) {}
-
-        //! Acquire lock on given mutex.
-        scoped_lock( spin_rw_mutex& m, bool write = true ) : mutex(NULL) {
-            acquire(m, write);
-        }
-
-        //! Release lock (if lock is held).
-        ~scoped_lock() {
-            if( mutex ) release();
-        }
-
-        //! Acquire lock on given mutex.
-        void acquire( spin_rw_mutex& m, bool write = true ) {
-            __TBB_ASSERT( !mutex, "holding mutex already" );
-            is_writer = write;
-            mutex = &m;
-            if( write ) mutex->internal_acquire_writer();
-            else        mutex->internal_acquire_reader();
-        }
-
-        //! Upgrade reader to become a writer.
-        /** Returns whether the upgrade happened without releasing and re-acquiring the lock */
-        bool upgrade_to_writer() {
-            __TBB_ASSERT( mutex, "lock is not acquired" );
-            __TBB_ASSERT( !is_writer, "not a reader" );
-            is_writer = true;
-            return mutex->internal_upgrade();
-        }
-
-        //! Release lock.
-        void release() {
-            __TBB_ASSERT( mutex, "lock is not acquired" );
-            spin_rw_mutex *m = mutex;
-            mutex = NULL;
-#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
-            if( is_writer ) m->internal_release_writer();
-            else            m->internal_release_reader();
-#else
-            if( is_writer ) __TBB_AtomicAND( &m->state, READERS );
-            else            __TBB_FetchAndAddWrelease( &m->state, -(intptr_t)ONE_READER);
-#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
-        }
-
-        //! Downgrade writer to become a reader.
-        bool downgrade_to_reader() {
-            __TBB_ASSERT( mutex, "lock is not acquired" );
-            __TBB_ASSERT( is_writer, "not a writer" );
-#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
-            mutex->internal_downgrade();
-#else
-            __TBB_FetchAndAddW( &mutex->state, ((intptr_t)ONE_READER-WRITER));
-#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
-            is_writer = false;
-            return true;
-        }
-
-        //! Try acquire lock on given mutex.
-        bool try_acquire( spin_rw_mutex& m, bool write = true ) {
-            __TBB_ASSERT( !mutex, "holding mutex already" );
-            bool result;
-            is_writer = write;
-            result = write? m.internal_try_acquire_writer()
-                          : m.internal_try_acquire_reader();
-            if( result )
-                mutex = &m;
-            return result;
-        }
-
-    protected:
-
-        //! The pointer to the current mutex that is held, or NULL if no mutex is held.
-        spin_rw_mutex* mutex;
-
-        //! If mutex!=NULL, then is_writer is true if holding a writer lock, false if holding a reader lock.
-        /** Not defined if not holding a lock. */
-        bool is_writer;
-    };
-
-    // Mutex traits
-    static const bool is_rw_mutex = true;
-    static const bool is_recursive_mutex = false;
-    static const bool is_fair_mutex = false;
-
-    // ISO C++0x compatibility methods
-
-    //! Acquire writer lock
-    void lock() {internal_acquire_writer();}
-
-    //! Try acquiring writer lock (non-blocking)
-    /** Return true if lock acquired; false otherwise. */
-    bool try_lock() {return internal_try_acquire_writer();}
-
-    //! Release lock
-    void unlock() {
-#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
-        if( state&WRITER ) internal_release_writer();
-        else               internal_release_reader();
-#else
-        if( state&WRITER ) __TBB_AtomicAND( &state, READERS );
-        else               __TBB_FetchAndAddWrelease( &state, -(intptr_t)ONE_READER);
-#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
-    }
-
-    // Methods for reader locks that resemble ISO C++0x compatibility methods.
-
-    //! Acquire reader lock
-    void lock_read() {internal_acquire_reader();}
-
-    //! Try acquiring reader lock (non-blocking)
-    /** Return true if reader lock acquired; false otherwise. */
-    bool try_lock_read() {return internal_try_acquire_reader();}
-
-protected:
-    typedef intptr_t state_t;
-    static const state_t WRITER = 1;
-    static const state_t WRITER_PENDING = 2;
-    static const state_t READERS = ~(WRITER | WRITER_PENDING);
-    static const state_t ONE_READER = 4;
-    static const state_t BUSY = WRITER | READERS;
-    //! State of lock
-    /** Bit 0 = writer is holding lock
-        Bit 1 = request by a writer to acquire lock (hint to readers to wait)
-        Bit 2..N = number of readers holding lock */
-    state_t state;
-
-private:
-    void __TBB_EXPORTED_METHOD internal_construct();
-};
-
-__TBB_DEFINE_PROFILING_SET_NAME(spin_rw_mutex)
-
-} // namespace tbb
-
-#if __TBB_TSX_AVAILABLE
-#include "internal/_x86_rtm_rw_mutex_impl.h"
-#endif
-
-namespace tbb {
-namespace interface8 {
-//! A cross-platform spin reader/writer mutex with speculative lock acquisition.
-/** On platforms with proper HW support, this lock may speculatively execute
-    its critical sections, using HW mechanisms to detect real data races and
-    ensure atomicity of the critical sections. In particular, it uses
-    Intel(R) Transactional Synchronization Extensions (Intel(R) TSX).
-    Without such HW support, it behaves like a spin_rw_mutex.
-    It should be used for locking short critical sections where the lock is
-    contended but the data it protects are not.
-    @ingroup synchronization */
-#if __TBB_TSX_AVAILABLE
-typedef interface7::internal::padded_mutex<tbb::interface8::internal::x86_rtm_rw_mutex,true> speculative_spin_rw_mutex;
-#else
-typedef interface7::internal::padded_mutex<tbb::spin_rw_mutex,true> speculative_spin_rw_mutex;
-#endif
-}  // namespace interface8
-
-using interface8::speculative_spin_rw_mutex;
-__TBB_DEFINE_PROFILING_SET_NAME(speculative_spin_rw_mutex)
-} // namespace tbb
-#endif /* __TBB_spin_rw_mutex_H */
diff --git a/lib/3rdParty/tbb/include/tbb/task.h b/lib/3rdParty/tbb/include/tbb/task.h
deleted file mode 100644
index 246684ab..00000000
--- a/lib/3rdParty/tbb/include/tbb/task.h
+++ /dev/null
@@ -1,1050 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_task_H
-#define __TBB_task_H
-
-#include "tbb_stddef.h"
-#include "tbb_machine.h"
-#include "tbb_profiling.h"
-#include <climits>
-
-typedef struct ___itt_caller *__itt_caller;
-
-namespace tbb {
-
-class task;
-class task_list;
-class task_group_context;
-
-// MSVC does not allow taking the address of a member that was defined
-// privately in task_base and made public in class task via a using declaration.
-#if _MSC_VER || (__GNUC__==3 && __GNUC_MINOR__<3)
-#define __TBB_TASK_BASE_ACCESS public
-#else
-#define __TBB_TASK_BASE_ACCESS private
-#endif
-
-namespace internal { //< @cond INTERNAL
-
-    class allocate_additional_child_of_proxy: no_assign {
-        //! No longer used, but retained for binary layout compatibility.  Always NULL.
-        task* self;
-        task& parent;
-    public:
-        explicit allocate_additional_child_of_proxy( task& parent_ ) : self(NULL), parent(parent_) {}
-        task& __TBB_EXPORTED_METHOD allocate( size_t size ) const;
-        void __TBB_EXPORTED_METHOD free( task& ) const;
-    };
-
-    struct cpu_ctl_env_space { int space[sizeof(internal::uint64_t)/sizeof(int)]; };
-} //< namespace internal @endcond
-
-namespace interface5 {
-    namespace internal {
-        //! Base class for methods that became static in TBB 3.0.
-        /** TBB's evolution caused the "this" argument for several methods to become obsolete.
-            However, for backwards binary compatibility, the new methods need distinct names,
-            otherwise the One Definition Rule would be broken.  Hence the new methods are
-            defined in this private base class, and then exposed in class task via
-            using declarations. */
-        class task_base: tbb::internal::no_copy {
-        __TBB_TASK_BASE_ACCESS:
-            friend class tbb::task;
-
-            //! Schedule task for execution when a worker becomes available.
-            static void spawn( task& t );
-
-            //! Spawn multiple tasks and clear list.
-            static void spawn( task_list& list );
-
-            //! Like allocate_child, except that task's parent becomes "t", not this.
-            /** Typically used in conjunction with schedule_to_reexecute to implement while loops.
-               Atomically increments the reference count of t.parent() */
-            static tbb::internal::allocate_additional_child_of_proxy allocate_additional_child_of( task& t ) {
-                return tbb::internal::allocate_additional_child_of_proxy(t);
-            }
-
-            //! Destroy a task.
-            /** Usually, calling this method is unnecessary, because a task is
-                implicitly deleted after its execute() method runs.  However,
-                sometimes a task needs to be explicitly deallocated, such as
-                when a root task is used as the parent in spawn_and_wait_for_all. */
-            static void __TBB_EXPORTED_FUNC destroy( task& victim );
-        };
-    } // internal
-} // interface5
-
-//! @cond INTERNAL
-namespace internal {
-
-    class scheduler: no_copy {
-    public:
-        //! For internal use only
-        virtual void spawn( task& first, task*& next ) = 0;
-
-        //! For internal use only
-        virtual void wait_for_all( task& parent, task* child ) = 0;
-
-        //! For internal use only
-        virtual void spawn_root_and_wait( task& first, task*& next ) = 0;
-
-        //! Pure virtual destructor;
-        //  Have to have it just to shut up overzealous compilation warnings
-        virtual ~scheduler() = 0;
-
-        //! For internal use only
-        virtual void enqueue( task& t, void* reserved ) = 0;
-    };
-
-    //! A reference count
-    /** Should always be non-negative.  A signed type is used so that underflow can be detected. */
-    typedef intptr_t reference_count;
-
-    //! An id as used for specifying affinity.
-    typedef unsigned short affinity_id;
-
-#if __TBB_TASK_ISOLATION
-    //! A tag for task isolation.
-    typedef intptr_t isolation_tag;
-    const isolation_tag no_isolation = 0;
-#endif /* __TBB_TASK_ISOLATION */
-
-#if __TBB_TASK_GROUP_CONTEXT
-    class generic_scheduler;
-
-    struct context_list_node_t {
-        context_list_node_t *my_prev,
-                            *my_next;
-    };
-
-    class allocate_root_with_context_proxy: no_assign {
-        task_group_context& my_context;
-    public:
-        allocate_root_with_context_proxy ( task_group_context& ctx ) : my_context(ctx) {}
-        task& __TBB_EXPORTED_METHOD allocate( size_t size ) const;
-        void __TBB_EXPORTED_METHOD free( task& ) const;
-    };
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-    class allocate_root_proxy: no_assign {
-    public:
-        static task& __TBB_EXPORTED_FUNC allocate( size_t size );
-        static void __TBB_EXPORTED_FUNC free( task& );
-    };
-
-    class allocate_continuation_proxy: no_assign {
-    public:
-        task& __TBB_EXPORTED_METHOD allocate( size_t size ) const;
-        void __TBB_EXPORTED_METHOD free( task& ) const;
-    };
-
-    class allocate_child_proxy: no_assign {
-    public:
-        task& __TBB_EXPORTED_METHOD allocate( size_t size ) const;
-        void __TBB_EXPORTED_METHOD free( task& ) const;
-    };
-
-    //! Memory prefix to a task object.
-    /** This class is internal to the library.
-        Do not reference it directly, except within the library itself.
-        Fields are ordered in way that preserves backwards compatibility and yields
-        good packing on typical 32-bit and 64-bit platforms. New fields should be
-        added at the beginning for backward compatibility with accesses to the task
-        prefix inlined into application code.
-
-        In case task prefix size exceeds 32 or 64 bytes on IA32 and Intel64
-        architectures correspondingly, consider dynamic setting of task_alignment
-        and task_prefix_reservation_size based on the maximal operand size supported
-        by the current CPU.
-
-        @ingroup task_scheduling */
-    class task_prefix {
-    private:
-        friend class tbb::task;
-        friend class tbb::interface5::internal::task_base;
-        friend class tbb::task_list;
-        friend class internal::scheduler;
-        friend class internal::allocate_root_proxy;
-        friend class internal::allocate_child_proxy;
-        friend class internal::allocate_continuation_proxy;
-        friend class internal::allocate_additional_child_of_proxy;
-
-#if __TBB_TASK_ISOLATION
-        //! The tag used for task isolation.
-        isolation_tag isolation;
-#endif /* __TBB_TASK_ISOLATION */
-
-#if __TBB_TASK_GROUP_CONTEXT
-        //! Shared context that is used to communicate asynchronous state changes
-        /** Currently it is used to broadcast cancellation requests generated both
-            by users and as the result of unhandled exceptions in the task::execute()
-            methods. */
-        task_group_context  *context;
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-        //! The scheduler that allocated the task, or NULL if the task is big.
-        /** Small tasks are pooled by the scheduler that allocated the task.
-            If a scheduler needs to free a small task allocated by another scheduler,
-            it returns the task to that other scheduler.  This policy avoids
-            memory space blowup issues for memory allocators that allocate from
-            thread-specific pools. */
-        scheduler* origin;
-
-#if __TBB_TASK_PRIORITY
-        union {
-#endif /* __TBB_TASK_PRIORITY */
-        //! Obsolete. The scheduler that owns the task.
-        /** Retained only for the sake of backward binary compatibility.
-            Still used by inline methods in the task.h header. **/
-        scheduler* owner;
-
-#if __TBB_TASK_PRIORITY
-        //! Pointer to the next offloaded lower priority task.
-        /** Used to maintain a list of offloaded tasks inside the scheduler. **/
-        task* next_offloaded;
-        };
-#endif /* __TBB_TASK_PRIORITY */
-
-        //! The task whose reference count includes me.
-        /** In the "blocking style" of programming, this field points to the parent task.
-            In the "continuation-passing style" of programming, this field points to the
-            continuation of the parent. */
-        tbb::task* parent;
-
-        //! Reference count used for synchronization.
-        /** In the "continuation-passing style" of programming, this field is
-            the difference of the number of allocated children minus the
-            number of children that have completed.
-            In the "blocking style" of programming, this field is one more than the difference. */
-        __TBB_atomic reference_count ref_count;
-
-        //! Obsolete. Used to be scheduling depth before TBB 2.2
-        /** Retained only for the sake of backward binary compatibility.
-            Not used by TBB anymore. **/
-        int depth;
-
-        //! A task::state_type, stored as a byte for compactness.
-        /** This state is exposed to users via method task::state(). */
-        unsigned char state;
-
-        //! Miscellaneous state that is not directly visible to users, stored as a byte for compactness.
-        /** 0x0 -> version 1.0 task
-            0x1 -> version >=2.1 task
-            0x10 -> task was enqueued
-            0x20 -> task_proxy
-            0x40 -> task has live ref_count
-            0x80 -> a stolen task */
-        unsigned char extra_state;
-
-        affinity_id affinity;
-
-        //! "next" field for list of task
-        tbb::task* next;
-
-        //! The task corresponding to this task_prefix.
-        tbb::task& task() {return *reinterpret_cast<tbb::task*>(this+1);}
-    };
-
-} // namespace internal
-//! @endcond
-
-#if __TBB_TASK_GROUP_CONTEXT
-
-#if __TBB_TASK_PRIORITY
-namespace internal {
-    static const int priority_stride_v4 = INT_MAX / 4;
-}
-
-enum priority_t {
-    priority_normal = internal::priority_stride_v4 * 2,
-    priority_low = priority_normal - internal::priority_stride_v4,
-    priority_high = priority_normal + internal::priority_stride_v4
-};
-
-#endif /* __TBB_TASK_PRIORITY */
-
-#if TBB_USE_CAPTURED_EXCEPTION
-    class tbb_exception;
-#else
-    namespace internal {
-        class tbb_exception_ptr;
-    }
-#endif /* !TBB_USE_CAPTURED_EXCEPTION */
-
-class task_scheduler_init;
-namespace interface7 { class task_arena; }
-
-//! Used to form groups of tasks
-/** @ingroup task_scheduling
-    The context services explicit cancellation requests from user code, and unhandled
-    exceptions intercepted during tasks execution. Intercepting an exception results
-    in generating internal cancellation requests (which is processed in exactly the
-    same way as external ones).
-
-    The context is associated with one or more root tasks and defines the cancellation
-    group that includes all the descendants of the corresponding root task(s). Association
-    is established when a context object is passed as an argument to the task::allocate_root()
-    method. See task_group_context::task_group_context for more details.
-
-    The context can be bound to another one, and other contexts can be bound to it,
-    forming a tree-like structure: parent -> this -> children. Arrows here designate
-    cancellation propagation direction. If a task in a cancellation group is cancelled
-    all the other tasks in this group and groups bound to it (as children) get cancelled too.
-
-    IMPLEMENTATION NOTE:
-    When adding new members to task_group_context or changing types of existing ones,
-    update the size of both padding buffers (_leading_padding and _trailing_padding)
-    appropriately. See also VERSIONING NOTE at the constructor definition below. **/
-class task_group_context : internal::no_copy {
-private:
-    friend class internal::generic_scheduler;
-    friend class task_scheduler_init;
-    friend class interface7::task_arena;
-
-#if TBB_USE_CAPTURED_EXCEPTION
-    typedef tbb_exception exception_container_type;
-#else
-    typedef internal::tbb_exception_ptr exception_container_type;
-#endif
-
-    enum version_traits_word_layout {
-        traits_offset = 16,
-        version_mask = 0xFFFF,
-        traits_mask = 0xFFFFul << traits_offset
-    };
-
-public:
-    enum kind_type {
-        isolated,
-        bound
-    };
-
-    enum traits_type {
-        exact_exception = 0x0001ul << traits_offset,
-#if __TBB_FP_CONTEXT
-        fp_settings     = 0x0002ul << traits_offset,
-#endif
-        concurrent_wait = 0x0004ul << traits_offset,
-#if TBB_USE_CAPTURED_EXCEPTION
-        default_traits = 0
-#else
-        default_traits = exact_exception
-#endif /* !TBB_USE_CAPTURED_EXCEPTION */
-    };
-
-private:
-    enum state {
-        may_have_children = 1,
-        // the following enumerations must be the last, new 2^x values must go above
-        next_state_value, low_unused_state_bit = (next_state_value-1)*2
-    };
-
-    union {
-        //! Flavor of this context: bound or isolated.
-        // TODO: describe asynchronous use, and whether any memory semantics are needed
-        __TBB_atomic kind_type my_kind;
-        uintptr_t _my_kind_aligner;
-    };
-
-    //! Pointer to the context of the parent cancellation group. NULL for isolated contexts.
-    task_group_context *my_parent;
-
-    //! Used to form the thread specific list of contexts without additional memory allocation.
-    /** A context is included into the list of the current thread when its binding to
-        its parent happens. Any context can be present in the list of one thread only. **/
-    internal::context_list_node_t my_node;
-
-    //! Used to set and maintain stack stitching point for Intel Performance Tools.
-    __itt_caller itt_caller;
-
-    //! Leading padding protecting accesses to frequently used members from false sharing.
-    /** Read accesses to the field my_cancellation_requested are on the hot path inside
-        the scheduler. This padding ensures that this field never shares the same cache
-        line with a local variable that is frequently written to. **/
-    char _leading_padding[internal::NFS_MaxLineSize
-                          - 2 * sizeof(uintptr_t)- sizeof(void*) - sizeof(internal::context_list_node_t)
-                          - sizeof(__itt_caller)
-#if __TBB_FP_CONTEXT
-                          - sizeof(internal::cpu_ctl_env_space)
-#endif
-                         ];
-
-#if __TBB_FP_CONTEXT
-    //! Space for platform-specific FPU settings.
-    /** Must only be accessed inside TBB binaries, and never directly in user
-        code or inline methods. */
-    internal::cpu_ctl_env_space my_cpu_ctl_env;
-#endif
-
-    //! Specifies whether cancellation was requested for this task group.
-    uintptr_t my_cancellation_requested;
-
-    //! Version for run-time checks and behavioral traits of the context.
-    /** Version occupies low 16 bits, and traits (zero or more ORed enumerators
-        from the traits_type enumerations) take the next 16 bits.
-        Original (zeroth) version of the context did not support any traits. **/
-    uintptr_t my_version_and_traits;
-
-    //! Pointer to the container storing exception being propagated across this task group.
-    exception_container_type *my_exception;
-
-    //! Scheduler instance that registered this context in its thread specific list.
-    internal::generic_scheduler *my_owner;
-
-    //! Internal state (combination of state flags, currently only may_have_children).
-    uintptr_t my_state;
-
-#if __TBB_TASK_PRIORITY
-    //! Priority level of the task group (in normalized representation)
-    intptr_t my_priority;
-#endif /* __TBB_TASK_PRIORITY */
-
-    //! Trailing padding protecting accesses to frequently used members from false sharing
-    /** \sa _leading_padding **/
-    char _trailing_padding[internal::NFS_MaxLineSize - 2 * sizeof(uintptr_t) - 2 * sizeof(void*)
-#if __TBB_TASK_PRIORITY
-                            - sizeof(intptr_t)
-#endif /* __TBB_TASK_PRIORITY */
-                          ];
-
-public:
-    //! Default & binding constructor.
-    /** By default a bound context is created. That is this context will be bound
-        (as child) to the context of the task calling task::allocate_root(this_context)
-        method. Cancellation requests passed to the parent context are propagated
-        to all the contexts bound to it. Similarly priority change is propagated
-        from the parent context to its children.
-
-        If task_group_context::isolated is used as the argument, then the tasks associated
-        with this context will never be affected by events in any other context.
-
-        Creating isolated contexts involve much less overhead, but they have limited
-        utility. Normally when an exception occurs in an algorithm that has nested
-        ones running, it is desirably to have all the nested algorithms cancelled
-        as well. Such a behavior requires nested algorithms to use bound contexts.
-
-        There is one good place where using isolated algorithms is beneficial. It is
-        a master thread. That is if a particular algorithm is invoked directly from
-        the master thread (not from a TBB task), supplying it with explicitly
-        created isolated context will result in a faster algorithm startup.
-
-        VERSIONING NOTE:
-        Implementation(s) of task_group_context constructor(s) cannot be made
-        entirely out-of-line because the run-time version must be set by the user
-        code. This will become critically important for binary compatibility, if
-        we ever have to change the size of the context object.
-
-        Boosting the runtime version will also be necessary if new data fields are
-        introduced in the currently unused padding areas and these fields are updated
-        by inline methods. **/
-    task_group_context ( kind_type relation_with_parent = bound,
-                         uintptr_t t = default_traits )
-        : my_kind(relation_with_parent)
-        , my_version_and_traits(2 | t)
-    {
-        init();
-    }
-
-    // Do not introduce standalone unbind method since it will break state propagation assumptions
-    __TBB_EXPORTED_METHOD ~task_group_context ();
-
-    //! Forcefully reinitializes the context after the task tree it was associated with is completed.
-    /** Because the method assumes that all the tasks that used to be associated with
-        this context have already finished, calling it while the context is still
-        in use somewhere in the task hierarchy leads to undefined behavior.
-
-        IMPORTANT: This method is not thread safe!
-
-        The method does not change the context's parent if it is set. **/
-    void __TBB_EXPORTED_METHOD reset ();
-
-    //! Initiates cancellation of all tasks in this cancellation group and its subordinate groups.
-    /** \return false if cancellation has already been requested, true otherwise.
-
-        Note that canceling never fails. When false is returned, it just means that
-        another thread (or this one) has already sent cancellation request to this
-        context or to one of its ancestors (if this context is bound). It is guaranteed
-        that when this method is concurrently called on the same not yet cancelled
-        context, true will be returned by one and only one invocation. **/
-    bool __TBB_EXPORTED_METHOD cancel_group_execution ();
-
-    //! Returns true if the context received cancellation request.
-    bool __TBB_EXPORTED_METHOD is_group_execution_cancelled () const;
-
-    //! Records the pending exception, and cancels the task group.
-    /** May be called only from inside a catch-block. If the context is already
-        cancelled, does nothing.
-        The method brings the task group associated with this context exactly into
-        the state it would be in, if one of its tasks threw the currently pending
-        exception during its execution. In other words, it emulates the actions
-        of the scheduler's dispatch loop exception handler. **/
-    void __TBB_EXPORTED_METHOD register_pending_exception ();
-
-#if __TBB_FP_CONTEXT
-    //! Captures the current FPU control settings to the context.
-    /** Because the method assumes that all the tasks that used to be associated with
-        this context have already finished, calling it while the context is still
-        in use somewhere in the task hierarchy leads to undefined behavior.
-
-        IMPORTANT: This method is not thread safe!
-
-        The method does not change the FPU control settings of the context's parent. **/
-    void __TBB_EXPORTED_METHOD capture_fp_settings ();
-#endif
-
-#if __TBB_TASK_PRIORITY
-    //! Changes priority of the task group
-    void set_priority ( priority_t );
-
-    //! Retrieves current priority of the current task group
-    priority_t priority () const;
-#endif /* __TBB_TASK_PRIORITY */
-
-    //! Returns the context's trait
-    uintptr_t traits() const { return my_version_and_traits & traits_mask; }
-
-protected:
-    //! Out-of-line part of the constructor.
-    /** Singled out to ensure backward binary compatibility of the future versions. **/
-    void __TBB_EXPORTED_METHOD init ();
-
-private:
-    friend class task;
-    friend class internal::allocate_root_with_context_proxy;
-
-    static const kind_type binding_required = bound;
-    static const kind_type binding_completed = kind_type(bound+1);
-    static const kind_type detached = kind_type(binding_completed+1);
-    static const kind_type dying = kind_type(detached+1);
-
-    //! Propagates any state change detected to *this, and as an optimisation possibly also upward along the heritage line.
-    template <typename T>
-    void propagate_task_group_state ( T task_group_context::*mptr_state, task_group_context& src, T new_state );
-
-    //! Registers this context with the local scheduler and binds it to its parent context
-    void bind_to ( internal::generic_scheduler *local_sched );
-
-    //! Registers this context with the local scheduler
-    void register_with ( internal::generic_scheduler *local_sched );
-
-#if __TBB_FP_CONTEXT
-    //! Copies FPU control setting from another context
-    // TODO: Consider adding #else stub in order to omit #if sections in other code
-    void copy_fp_settings( const task_group_context &src );
-#endif /* __TBB_FP_CONTEXT */
-}; // class task_group_context
-
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-//! Base class for user-defined tasks.
-/** @ingroup task_scheduling */
-class task: __TBB_TASK_BASE_ACCESS interface5::internal::task_base {
-
-    //! Set reference count
-    void __TBB_EXPORTED_METHOD internal_set_ref_count( int count );
-
-    //! Decrement reference count and return its new value.
-    internal::reference_count __TBB_EXPORTED_METHOD internal_decrement_ref_count();
-
-protected:
-    //! Default constructor.
-    task() {prefix().extra_state=1;}
-
-public:
-    //! Destructor.
-    virtual ~task() {}
-
-    //! Should be overridden by derived classes.
-    virtual task* execute() = 0;
-
-    //! Enumeration of task states that the scheduler considers.
-    enum state_type {
-        //! task is running, and will be destroyed after method execute() completes.
-        executing,
-        //! task to be rescheduled.
-        reexecute,
-        //! task is in ready pool, or is going to be put there, or was just taken off.
-        ready,
-        //! task object is freshly allocated or recycled.
-        allocated,
-        //! task object is on free list, or is going to be put there, or was just taken off.
-        freed,
-        //! task to be recycled as continuation
-        recycle
-#if __TBB_RECYCLE_TO_ENQUEUE
-        //! task to be scheduled for starvation-resistant execution
-        ,to_enqueue
-#endif
-    };
-
-    //------------------------------------------------------------------------
-    // Allocating tasks
-    //------------------------------------------------------------------------
-
-    //! Returns proxy for overloaded new that allocates a root task.
-    static internal::allocate_root_proxy allocate_root() {
-        return internal::allocate_root_proxy();
-    }
-
-#if __TBB_TASK_GROUP_CONTEXT
-    //! Returns proxy for overloaded new that allocates a root task associated with user supplied context.
-    static internal::allocate_root_with_context_proxy allocate_root( task_group_context& ctx ) {
-        return internal::allocate_root_with_context_proxy(ctx);
-    }
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-    //! Returns proxy for overloaded new that allocates a continuation task of *this.
-    /** The continuation's parent becomes the parent of *this. */
-    internal::allocate_continuation_proxy& allocate_continuation() {
-        return *reinterpret_cast<internal::allocate_continuation_proxy*>(this);
-    }
-
-    //! Returns proxy for overloaded new that allocates a child task of *this.
-    internal::allocate_child_proxy& allocate_child() {
-        return *reinterpret_cast<internal::allocate_child_proxy*>(this);
-    }
-
-    //! Define recommended static form via import from base class.
-    using task_base::allocate_additional_child_of;
-
-#if __TBB_DEPRECATED_TASK_INTERFACE
-    //! Destroy a task.
-    /** Usually, calling this method is unnecessary, because a task is
-        implicitly deleted after its execute() method runs.  However,
-        sometimes a task needs to be explicitly deallocated, such as
-        when a root task is used as the parent in spawn_and_wait_for_all. */
-    void __TBB_EXPORTED_METHOD destroy( task& t );
-#else /* !__TBB_DEPRECATED_TASK_INTERFACE */
-    //! Define recommended static form via import from base class.
-    using task_base::destroy;
-#endif /* !__TBB_DEPRECATED_TASK_INTERFACE */
-
-    //------------------------------------------------------------------------
-    // Recycling of tasks
-    //------------------------------------------------------------------------
-
-    //! Change this to be a continuation of its former self.
-    /** The caller must guarantee that the task's refcount does not become zero until
-        after the method execute() returns.  Typically, this is done by having
-        method execute() return a pointer to a child of the task.  If the guarantee
-        cannot be made, use method recycle_as_safe_continuation instead.
-
-        Because of the hazard, this method may be deprecated in the future. */
-    void recycle_as_continuation() {
-        __TBB_ASSERT( prefix().state==executing, "execute not running?" );
-        prefix().state = allocated;
-    }
-
-    //! Recommended to use, safe variant of recycle_as_continuation
-    /** For safety, it requires additional increment of ref_count.
-        With no descendants and ref_count of 1, it has the semantics of recycle_to_reexecute. */
-    void recycle_as_safe_continuation() {
-        __TBB_ASSERT( prefix().state==executing, "execute not running?" );
-        prefix().state = recycle;
-    }
-
-    //! Change this to be a child of new_parent.
-    void recycle_as_child_of( task& new_parent ) {
-        internal::task_prefix& p = prefix();
-        __TBB_ASSERT( prefix().state==executing||prefix().state==allocated, "execute not running, or already recycled" );
-        __TBB_ASSERT( prefix().ref_count==0, "no child tasks allowed when recycled as a child" );
-        __TBB_ASSERT( p.parent==NULL, "parent must be null" );
-        __TBB_ASSERT( new_parent.prefix().state<=recycle, "corrupt parent's state" );
-        __TBB_ASSERT( new_parent.prefix().state!=freed, "parent already freed" );
-        p.state = allocated;
-        p.parent = &new_parent;
-#if __TBB_TASK_GROUP_CONTEXT
-        p.context = new_parent.prefix().context;
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-    }
-
-    //! Schedule this for reexecution after current execute() returns.
-    /** Made obsolete by recycle_as_safe_continuation; may become deprecated. */
-    void recycle_to_reexecute() {
-        __TBB_ASSERT( prefix().state==executing, "execute not running, or already recycled" );
-        __TBB_ASSERT( prefix().ref_count==0, "no child tasks allowed when recycled for reexecution" );
-        prefix().state = reexecute;
-    }
-
-#if __TBB_RECYCLE_TO_ENQUEUE
-    //! Schedule this to enqueue after descendant tasks complete.
-    /** Save enqueue/spawn difference, it has the semantics of recycle_as_safe_continuation. */
-    void recycle_to_enqueue() {
-        __TBB_ASSERT( prefix().state==executing, "execute not running, or already recycled" );
-        prefix().state = to_enqueue;
-    }
-#endif /* __TBB_RECYCLE_TO_ENQUEUE */
-
-    //------------------------------------------------------------------------
-    // Spawning and blocking
-    //------------------------------------------------------------------------
-
-    //! Set reference count
-    void set_ref_count( int count ) {
-#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
-        internal_set_ref_count(count);
-#else
-        prefix().ref_count = count;
-#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
-    }
-
-    //! Atomically increment reference count.
-    /** Has acquire semantics */
-    void increment_ref_count() {
-        __TBB_FetchAndIncrementWacquire( &prefix().ref_count );
-    }
-
-    //! Atomically adds to reference count and returns its new value.
-    /** Has release-acquire semantics */
-    int add_ref_count( int count ) {
-        internal::call_itt_notify( internal::releasing, &prefix().ref_count );
-        internal::reference_count k = count+__TBB_FetchAndAddW( &prefix().ref_count, count );
-        __TBB_ASSERT( k>=0, "task's reference count underflowed" );
-        if( k==0 )
-            internal::call_itt_notify( internal::acquired, &prefix().ref_count );
-        return int(k);
-    }
-
-    //! Atomically decrement reference count and returns its new value.
-    /** Has release semantics. */
-    int decrement_ref_count() {
-#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
-        return int(internal_decrement_ref_count());
-#else
-        return int(__TBB_FetchAndDecrementWrelease( &prefix().ref_count ))-1;
-#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
-    }
-
-    //! Define recommended static forms via import from base class.
-    using task_base::spawn;
-
-    //! Similar to spawn followed by wait_for_all, but more efficient.
-    void spawn_and_wait_for_all( task& child ) {
-        prefix().owner->wait_for_all( *this, &child );
-    }
-
-    //! Similar to spawn followed by wait_for_all, but more efficient.
-    void __TBB_EXPORTED_METHOD spawn_and_wait_for_all( task_list& list );
-
-    //! Spawn task allocated by allocate_root, wait for it to complete, and deallocate it.
-    static void spawn_root_and_wait( task& root ) {
-        root.prefix().owner->spawn_root_and_wait( root, root.prefix().next );
-    }
-
-    //! Spawn root tasks on list and wait for all of them to finish.
-    /** If there are more tasks than worker threads, the tasks are spawned in
-        order of front to back. */
-    static void spawn_root_and_wait( task_list& root_list );
-
-    //! Wait for reference count to become one, and set reference count to zero.
-    /** Works on tasks while waiting. */
-    void wait_for_all() {
-        prefix().owner->wait_for_all( *this, NULL );
-    }
-
-    //! Enqueue task for starvation-resistant execution.
-#if __TBB_TASK_PRIORITY
-    /** The task will be enqueued on the normal priority level disregarding the
-        priority of its task group.
-
-        The rationale of such semantics is that priority of an enqueued task is
-        statically fixed at the moment of its enqueuing, while task group priority
-        is dynamic. Thus automatic priority inheritance would be generally a subject
-        to the race, which may result in unexpected behavior.
-
-        Use enqueue() overload with explicit priority value and task::group_priority()
-        method to implement such priority inheritance when it is really necessary. **/
-#endif /* __TBB_TASK_PRIORITY */
-    static void enqueue( task& t ) {
-        t.prefix().owner->enqueue( t, NULL );
-    }
-
-#if __TBB_TASK_PRIORITY
-    //! Enqueue task for starvation-resistant execution on the specified priority level.
-    static void enqueue( task& t, priority_t p ) {
-        __TBB_ASSERT( p == priority_low || p == priority_normal || p == priority_high, "Invalid priority level value" );
-        t.prefix().owner->enqueue( t, (void*)p );
-    }
-#endif /* __TBB_TASK_PRIORITY */
-
-    //! The innermost task being executed or destroyed by the current thread at the moment.
-    static task& __TBB_EXPORTED_FUNC self();
-
-    //! task on whose behalf this task is working, or NULL if this is a root.
-    task* parent() const {return prefix().parent;}
-
-    //! sets parent task pointer to specified value
-    void set_parent(task* p) {
-#if __TBB_TASK_GROUP_CONTEXT
-        __TBB_ASSERT(!p || prefix().context == p->prefix().context, "The tasks must be in the same context");
-#endif
-        prefix().parent = p;
-    }
-
-#if __TBB_TASK_GROUP_CONTEXT
-    //! This method is deprecated and will be removed in the future.
-    /** Use method group() instead. **/
-    task_group_context* context() {return prefix().context;}
-
-    //! Pointer to the task group descriptor.
-    task_group_context* group () { return prefix().context; }
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-    //! True if task was stolen from the task pool of another thread.
-    bool is_stolen_task() const {
-        return (prefix().extra_state & 0x80)!=0;
-    }
-
-    //------------------------------------------------------------------------
-    // Debugging
-    //------------------------------------------------------------------------
-
-    //! Current execution state
-    state_type state() const {return state_type(prefix().state);}
-
-    //! The internal reference count.
-    int ref_count() const {
-#if TBB_USE_ASSERT
-        internal::reference_count ref_count_ = prefix().ref_count;
-        __TBB_ASSERT( ref_count_==int(ref_count_), "integer overflow error");
-#endif
-        return int(prefix().ref_count);
-    }
-
-    //! Obsolete, and only retained for the sake of backward compatibility. Always returns true.
-    bool __TBB_EXPORTED_METHOD is_owned_by_current_thread() const;
-
-    //------------------------------------------------------------------------
-    // Affinity
-    //------------------------------------------------------------------------
-
-    //! An id as used for specifying affinity.
-    /** Guaranteed to be integral type.  Value of 0 means no affinity. */
-    typedef internal::affinity_id affinity_id;
-
-    //! Set affinity for this task.
-    void set_affinity( affinity_id id ) {prefix().affinity = id;}
-
-    //! Current affinity of this task
-    affinity_id affinity() const {return prefix().affinity;}
-
-    //! Invoked by scheduler to notify task that it ran on unexpected thread.
-    /** Invoked before method execute() runs, if task is stolen, or task has
-        affinity but will be executed on another thread.
-
-        The default action does nothing. */
-    virtual void __TBB_EXPORTED_METHOD note_affinity( affinity_id id );
-
-#if __TBB_TASK_GROUP_CONTEXT
-    //! Moves this task from its current group into another one.
-    /** Argument ctx specifies the new group.
-
-        The primary purpose of this method is to associate unique task group context
-        with a task allocated for subsequent enqueuing. In contrast to spawned tasks
-        enqueued ones normally outlive the scope where they were created. This makes
-        traditional usage model where task group context are allocated locally on
-        the stack inapplicable. Dynamic allocation of context objects is performance
-        inefficient. Method change_group() allows to make task group context object
-        a member of the task class, and then associate it with its containing task
-        object in the latter's constructor. **/
-    void __TBB_EXPORTED_METHOD change_group ( task_group_context& ctx );
-
-    //! Initiates cancellation of all tasks in this cancellation group and its subordinate groups.
-    /** \return false if cancellation has already been requested, true otherwise. **/
-    bool cancel_group_execution () { return prefix().context->cancel_group_execution(); }
-
-    //! Returns true if the context has received cancellation request.
-    bool is_cancelled () const { return prefix().context->is_group_execution_cancelled(); }
-#else
-    bool is_cancelled () const { return false; }
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-#if __TBB_TASK_PRIORITY
-    //! Changes priority of the task group this task belongs to.
-    void set_group_priority ( priority_t p ) {  prefix().context->set_priority(p); }
-
-    //! Retrieves current priority of the task group this task belongs to.
-    priority_t group_priority () const { return prefix().context->priority(); }
-
-#endif /* __TBB_TASK_PRIORITY */
-
-private:
-    friend class interface5::internal::task_base;
-    friend class task_list;
-    friend class internal::scheduler;
-    friend class internal::allocate_root_proxy;
-#if __TBB_TASK_GROUP_CONTEXT
-    friend class internal::allocate_root_with_context_proxy;
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-    friend class internal::allocate_continuation_proxy;
-    friend class internal::allocate_child_proxy;
-    friend class internal::allocate_additional_child_of_proxy;
-
-    //! Get reference to corresponding task_prefix.
-    /** Version tag prevents loader on Linux from using the wrong symbol in debug builds. **/
-    internal::task_prefix& prefix( internal::version_tag* = NULL ) const {
-        return reinterpret_cast<internal::task_prefix*>(const_cast<task*>(this))[-1];
-    }
-}; // class task
-
-//! task that does nothing.  Useful for synchronization.
-/** @ingroup task_scheduling */
-class empty_task: public task {
-    task* execute() __TBB_override {
-        return NULL;
-    }
-};
-
-//! @cond INTERNAL
-namespace internal {
-    template<typename F>
-    class function_task : public task {
-#if __TBB_ALLOW_MUTABLE_FUNCTORS
-        F my_func;
-#else
-        const F my_func;
-#endif
-        task* execute() __TBB_override {
-            my_func();
-            return NULL;
-        }
-    public:
-        function_task( const F& f ) : my_func(f) {}
-    };
-} // namespace internal
-//! @endcond
-
-//! A list of children.
-/** Used for method task::spawn_children
-    @ingroup task_scheduling */
-class task_list: internal::no_copy {
-private:
-    task* first;
-    task** next_ptr;
-    friend class task;
-    friend class interface5::internal::task_base;
-public:
-    //! Construct empty list
-    task_list() : first(NULL), next_ptr(&first) {}
-
-    //! Destroys the list, but does not destroy the task objects.
-    ~task_list() {}
-
-    //! True if list if empty; false otherwise.
-    bool empty() const {return !first;}
-
-    //! Push task onto back of list.
-    void push_back( task& task ) {
-        task.prefix().next = NULL;
-        *next_ptr = &task;
-        next_ptr = &task.prefix().next;
-    }
-#if __TBB_TODO
-    // TODO: add this method and implement&document the local execution ordering. See more in generic_scheduler::local_spawn
-    //! Push task onto front of list (FIFO local execution, like individual spawning in the same order).
-    void push_front( task& task ) {
-        if( empty() ) {
-            push_back(task);
-        } else {
-            task.prefix().next = first;
-            first = &task;
-        }
-    }
-#endif
-    //! Pop the front task from the list.
-    task& pop_front() {
-        __TBB_ASSERT( !empty(), "attempt to pop item from empty task_list" );
-        task* result = first;
-        first = result->prefix().next;
-        if( !first ) next_ptr = &first;
-        return *result;
-    }
-
-    //! Clear the list
-    void clear() {
-        first=NULL;
-        next_ptr=&first;
-    }
-};
-
-inline void interface5::internal::task_base::spawn( task& t ) {
-    t.prefix().owner->spawn( t, t.prefix().next );
-}
-
-inline void interface5::internal::task_base::spawn( task_list& list ) {
-    if( task* t = list.first ) {
-        t->prefix().owner->spawn( *t, *list.next_ptr );
-        list.clear();
-    }
-}
-
-inline void task::spawn_root_and_wait( task_list& root_list ) {
-    if( task* t = root_list.first ) {
-        t->prefix().owner->spawn_root_and_wait( *t, *root_list.next_ptr );
-        root_list.clear();
-    }
-}
-
-} // namespace tbb
-
-inline void *operator new( size_t bytes, const tbb::internal::allocate_root_proxy& ) {
-    return &tbb::internal::allocate_root_proxy::allocate(bytes);
-}
-
-inline void operator delete( void* task, const tbb::internal::allocate_root_proxy& ) {
-    tbb::internal::allocate_root_proxy::free( *static_cast<tbb::task*>(task) );
-}
-
-#if __TBB_TASK_GROUP_CONTEXT
-inline void *operator new( size_t bytes, const tbb::internal::allocate_root_with_context_proxy& p ) {
-    return &p.allocate(bytes);
-}
-
-inline void operator delete( void* task, const tbb::internal::allocate_root_with_context_proxy& p ) {
-    p.free( *static_cast<tbb::task*>(task) );
-}
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-inline void *operator new( size_t bytes, const tbb::internal::allocate_continuation_proxy& p ) {
-    return &p.allocate(bytes);
-}
-
-inline void operator delete( void* task, const tbb::internal::allocate_continuation_proxy& p ) {
-    p.free( *static_cast<tbb::task*>(task) );
-}
-
-inline void *operator new( size_t bytes, const tbb::internal::allocate_child_proxy& p ) {
-    return &p.allocate(bytes);
-}
-
-inline void operator delete( void* task, const tbb::internal::allocate_child_proxy& p ) {
-    p.free( *static_cast<tbb::task*>(task) );
-}
-
-inline void *operator new( size_t bytes, const tbb::internal::allocate_additional_child_of_proxy& p ) {
-    return &p.allocate(bytes);
-}
-
-inline void operator delete( void* task, const tbb::internal::allocate_additional_child_of_proxy& p ) {
-    p.free( *static_cast<tbb::task*>(task) );
-}
-
-#endif /* __TBB_task_H */
diff --git a/lib/3rdParty/tbb/include/tbb/task_arena.h b/lib/3rdParty/tbb/include/tbb/task_arena.h
deleted file mode 100644
index f33135b6..00000000
--- a/lib/3rdParty/tbb/include/tbb/task_arena.h
+++ /dev/null
@@ -1,333 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_task_arena_H
-#define __TBB_task_arena_H
-
-#include "task.h"
-#include "tbb_exception.h"
-#if TBB_USE_THREADING_TOOLS
-#include "atomic.h" // for as_atomic
-#endif
-
-namespace tbb {
-
-namespace this_task_arena {
-    int max_concurrency();
-} // namespace this_task_arena
-
-//! @cond INTERNAL
-namespace internal {
-    //! Internal to library. Should not be used by clients.
-    /** @ingroup task_scheduling */
-    class arena;
-    class task_scheduler_observer_v3;
-} // namespace internal
-//! @endcond
-
-namespace interface7 {
-class task_arena;
-
-//! @cond INTERNAL
-namespace internal {
-using namespace tbb::internal; //e.g. function_task from task.h
-
-class delegate_base : no_assign {
-public:
-    virtual void operator()() const = 0;
-    virtual ~delegate_base() {}
-};
-
-template<typename F>
-class delegated_function : public delegate_base {
-    F &my_func;
-    void operator()() const __TBB_override {
-        my_func();
-    }
-public:
-    delegated_function ( F& f ) : my_func(f) {}
-};
-
-class task_arena_base {
-protected:
-    //! NULL if not currently initialized.
-    internal::arena* my_arena;
-
-#if __TBB_TASK_GROUP_CONTEXT
-    //! default context of the arena
-    task_group_context *my_context;
-#endif
-
-    //! Concurrency level for deferred initialization
-    int my_max_concurrency;
-
-    //! Reserved master slots
-    unsigned my_master_slots;
-
-    //! Special settings
-    intptr_t my_version_and_traits;
-
-    enum {
-        default_flags = 0
-#if __TBB_TASK_GROUP_CONTEXT
-        | (task_group_context::default_traits & task_group_context::exact_exception)  // 0 or 1 << 16
-        , exact_exception_flag = task_group_context::exact_exception // used to specify flag for context directly
-#endif
-    };
-
-    task_arena_base(int max_concurrency, unsigned reserved_for_masters)
-        : my_arena(0)
-#if __TBB_TASK_GROUP_CONTEXT
-        , my_context(0)
-#endif
-        , my_max_concurrency(max_concurrency)
-        , my_master_slots(reserved_for_masters)
-        , my_version_and_traits(default_flags)
-        {}
-
-    void __TBB_EXPORTED_METHOD internal_initialize();
-    void __TBB_EXPORTED_METHOD internal_terminate();
-    void __TBB_EXPORTED_METHOD internal_attach();
-    void __TBB_EXPORTED_METHOD internal_enqueue( task&, intptr_t ) const;
-    void __TBB_EXPORTED_METHOD internal_execute( delegate_base& ) const;
-    void __TBB_EXPORTED_METHOD internal_wait() const;
-    static int __TBB_EXPORTED_FUNC internal_current_slot();
-    static int __TBB_EXPORTED_FUNC internal_max_concurrency( const task_arena * );
-public:
-    //! Typedef for number of threads that is automatic.
-    static const int automatic = -1;
-    static const int not_initialized = -2;
-
-};
-
-#if __TBB_TASK_ISOLATION
-void __TBB_EXPORTED_FUNC isolate_within_arena( delegate_base& d, intptr_t reserved = 0 );
-#endif /* __TBB_TASK_ISOLATION */
-} // namespace internal
-//! @endcond
-
-/** 1-to-1 proxy representation class of scheduler's arena
- * Constructors set up settings only, real construction is deferred till the first method invocation
- * Destructor only removes one of the references to the inner arena representation.
- * Final destruction happens when all the references (and the work) are gone.
- */
-class task_arena : public internal::task_arena_base {
-    friend class tbb::internal::task_scheduler_observer_v3;
-    friend int tbb::this_task_arena::max_concurrency();
-    bool my_initialized;
-    void mark_initialized() {
-        __TBB_ASSERT( my_arena, "task_arena initialization is incomplete" );
-#if __TBB_TASK_GROUP_CONTEXT
-        __TBB_ASSERT( my_context, "task_arena initialization is incomplete" );
-#endif
-#if TBB_USE_THREADING_TOOLS
-        // Actual synchronization happens in internal_initialize & internal_attach.
-        // The race on setting my_initialized is benign, but should be hidden from Intel(R) Inspector
-        internal::as_atomic(my_initialized).fetch_and_store<release>(true);
-#else
-        my_initialized = true;
-#endif
-    }
-
-public:
-    //! Creates task_arena with certain concurrency limits
-    /** Sets up settings only, real construction is deferred till the first method invocation
-     *  @arg max_concurrency specifies total number of slots in arena where threads work
-     *  @arg reserved_for_masters specifies number of slots to be used by master threads only.
-     *       Value of 1 is default and reflects behavior of implicit arenas.
-     **/
-    task_arena(int max_concurrency_ = automatic, unsigned reserved_for_masters = 1)
-        : task_arena_base(max_concurrency_, reserved_for_masters)
-        , my_initialized(false)
-    {}
-
-    //! Copies settings from another task_arena
-    task_arena(const task_arena &s) // copy settings but not the reference or instance
-        : task_arena_base(s.my_max_concurrency, s.my_master_slots)
-        , my_initialized(false)
-    {}
-
-    //! Tag class used to indicate the "attaching" constructor
-    struct attach {};
-
-    //! Creates an instance of task_arena attached to the current arena of the thread
-    explicit task_arena( attach )
-        : task_arena_base(automatic, 1) // use default settings if attach fails
-        , my_initialized(false)
-    {
-        internal_attach();
-        if( my_arena ) my_initialized = true;
-    }
-
-    //! Forces allocation of the resources for the task_arena as specified in constructor arguments
-    inline void initialize() {
-        if( !my_initialized ) {
-            internal_initialize();
-            mark_initialized();
-        }
-    }
-
-    //! Overrides concurrency level and forces initialization of internal representation
-    inline void initialize(int max_concurrency_, unsigned reserved_for_masters = 1) {
-        // TODO: decide if this call must be thread-safe
-        __TBB_ASSERT( !my_arena, "Impossible to modify settings of an already initialized task_arena");
-        if( !my_initialized ) {
-            my_max_concurrency = max_concurrency_;
-            my_master_slots = reserved_for_masters;
-            initialize();
-        }
-    }
-
-    //! Attaches this instance to the current arena of the thread
-    inline void initialize(attach) {
-        // TODO: decide if this call must be thread-safe
-        __TBB_ASSERT( !my_arena, "Impossible to modify settings of an already initialized task_arena");
-        if( !my_initialized ) {
-            internal_attach();
-            if( !my_arena ) internal_initialize();
-            mark_initialized();
-        }
-    }
-
-    //! Removes the reference to the internal arena representation.
-    //! Not thread safe wrt concurrent invocations of other methods.
-    inline void terminate() {
-        if( my_initialized ) {
-            internal_terminate();
-            my_initialized = false;
-        }
-    }
-
-    //! Removes the reference to the internal arena representation, and destroys the external object.
-    //! Not thread safe wrt concurrent invocations of other methods.
-    ~task_arena() {
-        terminate();
-    }
-
-    //! Returns true if the arena is active (initialized); false otherwise.
-    //! The name was chosen to match a task_scheduler_init method with the same semantics.
-    bool is_active() const { return my_initialized; }
-
-    //! Enqueues a task into the arena to process a functor, and immediately returns.
-    //! Does not require the calling thread to join the arena
-    template<typename F>
-    void enqueue( const F& f ) {
-        initialize();
-#if __TBB_TASK_GROUP_CONTEXT
-        internal_enqueue( *new( task::allocate_root(*my_context) ) internal::function_task<F>(f), 0 );
-#else
-        internal_enqueue( *new( task::allocate_root() ) internal::function_task<F>(f), 0 );
-#endif
-    }
-
-#if __TBB_TASK_PRIORITY
-    //! Enqueues a task with priority p into the arena to process a functor f, and immediately returns.
-    //! Does not require the calling thread to join the arena
-    template<typename F>
-    void enqueue( const F& f, priority_t p ) {
-        __TBB_ASSERT( p == priority_low || p == priority_normal || p == priority_high, "Invalid priority level value" );
-        initialize();
-#if __TBB_TASK_GROUP_CONTEXT
-        internal_enqueue( *new( task::allocate_root(*my_context) ) internal::function_task<F>(f), (intptr_t)p );
-#else
-        internal_enqueue( *new( task::allocate_root() ) internal::function_task<F>(f), (intptr_t)p );
-#endif
-    }
-#endif// __TBB_TASK_PRIORITY
-
-    //! Joins the arena and executes a functor, then returns
-    //! If not possible to join, wraps the functor into a task, enqueues it and waits for task completion
-    //! Can decrement the arena demand for workers, causing a worker to leave and free a slot to the calling thread
-    template<typename F>
-    void execute(F& f) {
-        initialize();
-        internal::delegated_function<F> d(f);
-        internal_execute( d );
-    }
-
-    //! Joins the arena and executes a functor, then returns
-    //! If not possible to join, wraps the functor into a task, enqueues it and waits for task completion
-    //! Can decrement the arena demand for workers, causing a worker to leave and free a slot to the calling thread
-    template<typename F>
-    void execute(const F& f) {
-        initialize();
-        internal::delegated_function<const F> d(f);
-        internal_execute( d );
-    }
-
-#if __TBB_EXTRA_DEBUG
-    //! Wait for all work in the arena to be completed
-    //! Even submitted by other application threads
-    //! Joins arena if/when possible (in the same way as execute())
-    void debug_wait_until_empty() {
-        initialize();
-        internal_wait();
-    }
-#endif //__TBB_EXTRA_DEBUG
-
-    //! Returns the index, aka slot number, of the calling thread in its current arena
-    //! This method is deprecated and replaced with this_task_arena::current_thread_index()
-    inline static int current_thread_index() {
-        return internal_current_slot();
-    }
-
-    //! Returns the maximal number of threads that can work inside the arena
-    inline int max_concurrency() const {
-        // Handle special cases inside the library
-        return (my_max_concurrency>1) ? my_max_concurrency : internal_max_concurrency(this);
-    }
-};
-
-#if __TBB_TASK_ISOLATION
-namespace this_task_arena {
-    template<typename F>
-    void isolate( const F& f ) {
-        internal::delegated_function<const F> d(f);
-        internal::isolate_within_arena( d );
-    }
-}
-#endif /* __TBB_TASK_ISOLATION */
-
-} // namespace interfaceX
-
-using interface7::task_arena;
-#if __TBB_TASK_ISOLATION
-namespace this_task_arena {
-    using namespace interface7::this_task_arena;
-}
-#endif /* __TBB_TASK_ISOLATION */
-
-namespace this_task_arena {
-    //! Returns the index, aka slot number, of the calling thread in its current arena
-    inline int current_thread_index() {
-        int idx = tbb::task_arena::current_thread_index();
-        return idx == -1 ? tbb::task_arena::not_initialized : idx;
-    }
-
-    //! Returns the maximal number of threads that can work inside the arena
-    inline int max_concurrency() {
-        return tbb::task_arena::internal_max_concurrency(NULL);
-    }
-
-} // namespace this_task_arena
-
-} // namespace tbb
-
-#endif /* __TBB_task_arena_H */
diff --git a/lib/3rdParty/tbb/include/tbb/task_group.h b/lib/3rdParty/tbb/include/tbb/task_group.h
deleted file mode 100644
index bf6922b9..00000000
--- a/lib/3rdParty/tbb/include/tbb/task_group.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_task_group_H
-#define __TBB_task_group_H
-
-#include "task.h"
-#include "tbb_exception.h"
-
-#if __TBB_TASK_GROUP_CONTEXT
-
-namespace tbb {
-
-namespace internal {
-    template<typename F> class task_handle_task;
-}
-
-class task_group;
-class structured_task_group;
-
-template<typename F>
-class task_handle : internal::no_assign {
-    template<typename _F> friend class internal::task_handle_task;
-    friend class task_group;
-    friend class structured_task_group;
-
-    static const intptr_t scheduled = 0x1;
-
-    F my_func;
-    intptr_t my_state;
-
-    void mark_scheduled () {
-        // The check here is intentionally lax to avoid the impact of interlocked operation
-        if ( my_state & scheduled )
-            internal::throw_exception( internal::eid_invalid_multiple_scheduling );
-        my_state |= scheduled;
-    }
-public:
-    task_handle( const F& f ) : my_func(f), my_state(0) {}
-
-    void operator() () const { my_func(); }
-};
-
-enum task_group_status {
-    not_complete,
-    complete,
-    canceled
-};
-
-namespace internal {
-
-template<typename F>
-class task_handle_task : public task {
-    task_handle<F>& my_handle;
-    task* execute() __TBB_override {
-        my_handle();
-        return NULL;
-    }
-public:
-    task_handle_task( task_handle<F>& h ) : my_handle(h) { h.mark_scheduled(); }
-};
-
-class task_group_base : internal::no_copy {
-protected:
-    empty_task* my_root;
-    task_group_context my_context;
-
-    task& owner () { return *my_root; }
-
-    template<typename F>
-    task_group_status internal_run_and_wait( F& f ) {
-        __TBB_TRY {
-            if ( !my_context.is_group_execution_cancelled() )
-                f();
-        } __TBB_CATCH( ... ) {
-            my_context.register_pending_exception();
-        }
-        return wait();
-    }
-
-    template<typename F, typename Task>
-    void internal_run( F& f ) {
-        owner().spawn( *new( owner().allocate_additional_child_of(*my_root) ) Task(f) );
-    }
-
-public:
-    task_group_base( uintptr_t traits = 0 )
-        : my_context(task_group_context::bound, task_group_context::default_traits | traits)
-    {
-        my_root = new( task::allocate_root(my_context) ) empty_task;
-        my_root->set_ref_count(1);
-    }
-
-    ~task_group_base() __TBB_NOEXCEPT(false) {
-        if( my_root->ref_count() > 1 ) {
-            bool stack_unwinding_in_progress = std::uncaught_exception();
-            // Always attempt to do proper cleanup to avoid inevitable memory corruption
-            // in case of missing wait (for the sake of better testability & debuggability)
-            if ( !is_canceling() )
-                cancel();
-            __TBB_TRY {
-                my_root->wait_for_all();
-            } __TBB_CATCH (...) {
-                task::destroy(*my_root);
-                __TBB_RETHROW();
-            }
-            task::destroy(*my_root);
-            if ( !stack_unwinding_in_progress )
-                internal::throw_exception( internal::eid_missing_wait );
-        }
-        else {
-            task::destroy(*my_root);
-        }
-    }
-
-    template<typename F>
-    void run( task_handle<F>& h ) {
-        internal_run< task_handle<F>, internal::task_handle_task<F> >( h );
-    }
-
-    task_group_status wait() {
-        __TBB_TRY {
-            my_root->wait_for_all();
-        } __TBB_CATCH( ... ) {
-            my_context.reset();
-            __TBB_RETHROW();
-        }
-        if ( my_context.is_group_execution_cancelled() ) {
-            // TODO: the reset method is not thread-safe. Ensure the correct behavior.
-            my_context.reset();
-            return canceled;
-        }
-        return complete;
-    }
-
-    bool is_canceling() {
-        return my_context.is_group_execution_cancelled();
-    }
-
-    void cancel() {
-        my_context.cancel_group_execution();
-    }
-}; // class task_group_base
-
-} // namespace internal
-
-class task_group : public internal::task_group_base {
-public:
-    task_group () : task_group_base( task_group_context::concurrent_wait ) {}
-
-#if __SUNPRO_CC
-    template<typename F>
-    void run( task_handle<F>& h ) {
-        internal_run< task_handle<F>, internal::task_handle_task<F> >( h );
-    }
-#else
-    using task_group_base::run;
-#endif
-
-    template<typename F>
-    void run( const F& f ) {
-        internal_run< const F, internal::function_task<F> >( f );
-    }
-
-    template<typename F>
-    task_group_status run_and_wait( const F& f ) {
-        return internal_run_and_wait<const F>( f );
-    }
-
-    template<typename F>
-    task_group_status run_and_wait( task_handle<F>& h ) {
-      h.mark_scheduled();
-      return internal_run_and_wait< task_handle<F> >( h );
-    }
-}; // class task_group
-
-class structured_task_group : public internal::task_group_base {
-public:
-    template<typename F>
-    task_group_status run_and_wait ( task_handle<F>& h ) {
-        h.mark_scheduled();
-        return internal_run_and_wait< task_handle<F> >( h );
-    }
-
-    task_group_status wait() {
-        task_group_status res = task_group_base::wait();
-        my_root->set_ref_count(1);
-        return res;
-    }
-}; // class structured_task_group
-
-inline
-bool is_current_task_group_canceling() {
-    return task::self().is_cancelled();
-}
-
-template<class F>
-task_handle<F> make_task( const F& f ) {
-    return task_handle<F>( f );
-}
-
-} // namespace tbb
-
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-#endif /* __TBB_task_group_H */
diff --git a/lib/3rdParty/tbb/include/tbb/task_scheduler_init.h b/lib/3rdParty/tbb/include/tbb/task_scheduler_init.h
deleted file mode 100644
index 928e7a4e..00000000
--- a/lib/3rdParty/tbb/include/tbb/task_scheduler_init.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_task_scheduler_init_H
-#define __TBB_task_scheduler_init_H
-
-#include "tbb_stddef.h"
-#include "limits.h"
-#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
-#include <new> // nothrow_t
-#endif
-
-namespace tbb {
-
-typedef std::size_t stack_size_type;
-
-//! @cond INTERNAL
-namespace internal {
-    //! Internal to library. Should not be used by clients.
-    /** @ingroup task_scheduling */
-    class scheduler;
-} // namespace internal
-//! @endcond
-
-//! Class delimiting the scope of task scheduler activity.
-/** A thread can construct a task_scheduler_init object and keep it alive
-    while it uses TBB's tasking subsystem (including parallel algorithms).
-
-    This class allows to customize properties of the TBB task pool to some extent.
-    For example it can limit concurrency level of parallel work initiated by the
-    given thread. It also can be used to specify stack size of the TBB worker threads,
-    though this setting is not effective if the thread pool has already been created.
-
-    If a parallel construct is used without task_scheduler_init object previously
-    created, the scheduler will be initialized automatically with default settings,
-    and will persist until this thread exits. Default concurrency level is defined
-    as described in task_scheduler_init::initialize().
-    @ingroup task_scheduling */
-class task_scheduler_init: internal::no_copy {
-    enum ExceptionPropagationMode {
-        propagation_mode_exact = 1u,
-        propagation_mode_captured = 2u,
-        propagation_mode_mask = propagation_mode_exact | propagation_mode_captured
-    };
-
-    /** NULL if not currently initialized. */
-    internal::scheduler* my_scheduler;
-
-    bool internal_terminate( bool blocking );
-#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
-    bool __TBB_EXPORTED_METHOD internal_blocking_terminate( bool throwing );
-#endif
-public:
-
-    //! Typedef for number of threads that is automatic.
-    static const int automatic = -1;
-
-    //! Argument to initialize() or constructor that causes initialization to be deferred.
-    static const int deferred = -2;
-
-    //! Ensure that scheduler exists for this thread
-    /** A value of -1 lets TBB decide on the number of threads, which is usually
-        maximal hardware concurrency for this process, that is the number of logical
-        CPUs on the machine (possibly limited by the processor affinity mask of this
-        process (Windows) or of this thread (Linux, FreeBSD). It is preferable option
-        for production code because it helps to avoid nasty surprises when several
-        TBB based components run side-by-side or in a nested fashion inside the same
-        process.
-
-        The number_of_threads is ignored if any other task_scheduler_inits
-        currently exist.  A thread may construct multiple task_scheduler_inits.
-        Doing so does no harm because the underlying scheduler is reference counted. */
-    void __TBB_EXPORTED_METHOD initialize( int number_of_threads=automatic );
-
-    //! The overloaded method with stack size parameter
-    /** Overloading is necessary to preserve ABI compatibility */
-    void __TBB_EXPORTED_METHOD initialize( int number_of_threads, stack_size_type thread_stack_size );
-
-    //! Inverse of method initialize.
-    void __TBB_EXPORTED_METHOD terminate();
-
-#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
-#if TBB_USE_EXCEPTIONS
-    //! terminate() that waits for worker threads termination. Throws exception on error.
-    void blocking_terminate() {
-        internal_blocking_terminate( /*throwing=*/true );
-    }
-#endif
-    //! terminate() that waits for worker threads termination. Returns false on error.
-    bool blocking_terminate(const std::nothrow_t&) __TBB_NOEXCEPT(true) {
-        return internal_blocking_terminate( /*throwing=*/false );
-    }
-#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
-
-    //! Shorthand for default constructor followed by call to initialize(number_of_threads).
-    task_scheduler_init( int number_of_threads=automatic, stack_size_type thread_stack_size=0 ) : my_scheduler(NULL)
-    {
-        // Two lowest order bits of the stack size argument may be taken to communicate
-        // default exception propagation mode of the client to be used when the
-        // client manually creates tasks in the master thread and does not use
-        // explicit task group context object. This is necessary because newer
-        // TBB binaries with exact propagation enabled by default may be used
-        // by older clients that expect tbb::captured_exception wrapper.
-        // All zeros mean old client - no preference.
-        __TBB_ASSERT( !(thread_stack_size & propagation_mode_mask), "Requested stack size is not aligned" );
-#if TBB_USE_EXCEPTIONS
-        thread_stack_size |= TBB_USE_CAPTURED_EXCEPTION ? propagation_mode_captured : propagation_mode_exact;
-#endif /* TBB_USE_EXCEPTIONS */
-        initialize( number_of_threads, thread_stack_size );
-    }
-
-    //! Destroy scheduler for this thread if thread has no other live task_scheduler_inits.
-    ~task_scheduler_init() {
-        if( my_scheduler )
-            terminate();
-        internal::poison_pointer( my_scheduler );
-    }
-    //! Returns the number of threads TBB scheduler would create if initialized by default.
-    /** Result returned by this method does not depend on whether the scheduler
-        has already been initialized.
-
-        Because tbb 2.0 does not support blocking tasks yet, you may use this method
-        to boost the number of threads in the tbb's internal pool, if your tasks are
-        doing I/O operations. The optimal number of additional threads depends on how
-        much time your tasks spend in the blocked state.
-
-        Before TBB 3.0 U4 this method returned the number of logical CPU in the
-        system. Currently on Windows, Linux and FreeBSD it returns the number of
-        logical CPUs available to the current process in accordance with its affinity
-        mask.
-
-        NOTE: The return value of this method never changes after its first invocation.
-        This means that changes in the process affinity mask that took place after
-        this method was first invoked will not affect the number of worker threads
-        in the TBB worker threads pool. */
-    static int __TBB_EXPORTED_FUNC default_num_threads ();
-
-    //! Returns true if scheduler is active (initialized); false otherwise
-    bool is_active() const { return my_scheduler != NULL; }
-};
-
-} // namespace tbb
-
-#endif /* __TBB_task_scheduler_init_H */
diff --git a/lib/3rdParty/tbb/include/tbb/task_scheduler_observer.h b/lib/3rdParty/tbb/include/tbb/task_scheduler_observer.h
deleted file mode 100644
index 5586ad4f..00000000
--- a/lib/3rdParty/tbb/include/tbb/task_scheduler_observer.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_task_scheduler_observer_H
-#define __TBB_task_scheduler_observer_H
-
-#include "atomic.h"
-#if __TBB_ARENA_OBSERVER || __TBB_SLEEP_PERMISSION
-#include "task_arena.h"
-#endif
-
-#if __TBB_SCHEDULER_OBSERVER
-
-namespace tbb {
-namespace interface6 {
-class task_scheduler_observer;
-}
-namespace internal {
-
-class observer_proxy;
-class observer_list;
-
-class task_scheduler_observer_v3 {
-    friend class observer_proxy;
-    friend class observer_list;
-    friend class interface6::task_scheduler_observer;
-
-    //! Pointer to the proxy holding this observer.
-    /** Observers are proxied by the scheduler to maintain persistent lists of them. **/
-    observer_proxy* my_proxy;
-
-    //! Counter preventing the observer from being destroyed while in use by the scheduler.
-    /** Valid only when observation is on. **/
-    atomic<intptr_t> my_busy_count;
-
-public:
-    //! Enable or disable observation
-    /** For local observers the method can be used only when the current thread
-        has the task scheduler initialized or is attached to an arena.
-
-        Repeated calls with the same state are no-ops. **/
-    void __TBB_EXPORTED_METHOD observe( bool state=true );
-
-    //! Returns true if observation is enabled, false otherwise.
-    bool is_observing() const {return my_proxy!=NULL;}
-
-    //! Construct observer with observation disabled.
-    task_scheduler_observer_v3() : my_proxy(NULL) { my_busy_count.store<relaxed>(0); }
-
-    //! Entry notification
-    /** Invoked from inside observe(true) call and whenever a worker enters the arena
-        this observer is associated with. If a thread is already in the arena when
-        the observer is activated, the entry notification is called before it
-        executes the first stolen task.
-
-        Obsolete semantics. For global observers it is called by a thread before
-        the first steal since observation became enabled. **/
-    virtual void on_scheduler_entry( bool /*is_worker*/ ) {}
-
-    //! Exit notification
-    /** Invoked from inside observe(false) call and whenever a worker leaves the
-        arena this observer is associated with.
-
-        Obsolete semantics. For global observers it is called by a thread before
-        the first steal since observation became enabled. **/
-    virtual void on_scheduler_exit( bool /*is_worker*/ ) {}
-
-    //! Destructor automatically switches observation off if it is enabled.
-    virtual ~task_scheduler_observer_v3() { if(my_proxy) observe(false);}
-};
-
-} // namespace internal
-
-#if __TBB_ARENA_OBSERVER || __TBB_SLEEP_PERMISSION
-namespace interface6 {
-class task_scheduler_observer : public internal::task_scheduler_observer_v3 {
-    friend class internal::task_scheduler_observer_v3;
-    friend class internal::observer_proxy;
-    friend class internal::observer_list;
-
-    /** Negative numbers with the largest absolute value to minimize probability
-        of coincidence in case of a bug in busy count usage. **/
-    // TODO: take more high bits for version number
-    static const intptr_t v6_trait = (intptr_t)((~(uintptr_t)0 >> 1) + 1);
-
-    //! contains task_arena pointer or tag indicating local or global semantics of the observer
-    intptr_t my_context_tag;
-    enum { global_tag = 0, implicit_tag = 1 };
-
-public:
-    //! Construct local or global observer in inactive state (observation disabled).
-    /** For a local observer entry/exit notifications are invoked whenever a worker
-        thread joins/leaves the arena of the observer's owner thread. If a thread is
-        already in the arena when the observer is activated, the entry notification is
-        called before it executes the first stolen task. **/
-    /** TODO: Obsolete.
-        Global observer semantics is obsolete as it violates master thread isolation
-        guarantees and is not composable. Thus the current default behavior of the
-        constructor is obsolete too and will be changed in one of the future versions
-        of the library. **/
-    explicit task_scheduler_observer( bool local = false ) {
-#if  __TBB_ARENA_OBSERVER
-        my_context_tag = local? implicit_tag : global_tag;
-#else
-        __TBB_ASSERT_EX( !local, NULL );
-        my_context_tag = global_tag;
-#endif
-    }
-
-#if  __TBB_ARENA_OBSERVER
-    //! Construct local observer for a given arena in inactive state (observation disabled).
-    /** entry/exit notifications are invoked whenever a thread joins/leaves arena.
-        If a thread is already in the arena when the observer is activated, the entry notification
-        is called before it executes the first stolen task. **/
-    explicit task_scheduler_observer( task_arena & a) {
-        my_context_tag = (intptr_t)&a;
-    }
-#endif /* __TBB_ARENA_OBSERVER */
-
-    /** Destructor protects instance of the observer from concurrent notification.
-       It is recommended to disable observation before destructor of a derived class starts,
-       otherwise it can lead to concurrent notification callback on partly destroyed object **/
-    virtual ~task_scheduler_observer() { if(my_proxy) observe(false); }
-
-    //! Enable or disable observation
-    /** Warning: concurrent invocations of this method are not safe.
-        Repeated calls with the same state are no-ops. **/
-    void observe( bool state=true ) {
-        if( state && !my_proxy ) {
-            __TBB_ASSERT( !my_busy_count, "Inconsistent state of task_scheduler_observer instance");
-            my_busy_count.store<relaxed>(v6_trait);
-        }
-        internal::task_scheduler_observer_v3::observe(state);
-    }
-
-#if  __TBB_SLEEP_PERMISSION
-    //! Return commands for may_sleep()
-    enum { keep_awake = false, allow_sleep = true };
-
-    //! The callback can be invoked by a worker thread before it goes to sleep.
-    /** If it returns false ('keep_awake'), the thread will keep spinning and looking for work.
-        It will not be called for master threads. **/
-    virtual bool may_sleep() { return allow_sleep; }
-#endif /*__TBB_SLEEP_PERMISSION*/
-};
-
-} //namespace interface6
-using interface6::task_scheduler_observer;
-#else /*__TBB_ARENA_OBSERVER || __TBB_SLEEP_PERMISSION*/
-typedef tbb::internal::task_scheduler_observer_v3 task_scheduler_observer;
-#endif /*__TBB_ARENA_OBSERVER || __TBB_SLEEP_PERMISSION*/
-
-} // namespace tbb
-
-#endif /* __TBB_SCHEDULER_OBSERVER */
-
-#endif /* __TBB_task_scheduler_observer_H */
diff --git a/lib/3rdParty/tbb/include/tbb/tbb.h b/lib/3rdParty/tbb/include/tbb/tbb.h
deleted file mode 100644
index 5e385ea2..00000000
--- a/lib/3rdParty/tbb/include/tbb/tbb.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_tbb_H
-#define __TBB_tbb_H
-
-/**
-    This header bulk-includes declarations or definitions of all the functionality
-    provided by TBB (save for malloc dependent headers).
-
-    If you use only a few TBB constructs, consider including specific headers only.
-    Any header listed below can be included independently of others.
-**/
-
-#if TBB_PREVIEW_AGGREGATOR
-#include "aggregator.h"
-#endif
-#include "aligned_space.h"
-#include "atomic.h"
-#include "blocked_range.h"
-#include "blocked_range2d.h"
-#include "blocked_range3d.h"
-#include "cache_aligned_allocator.h"
-#include "combinable.h"
-#include "concurrent_hash_map.h"
-#if TBB_PREVIEW_CONCURRENT_LRU_CACHE
-#include "concurrent_lru_cache.h"
-#endif
-#include "concurrent_priority_queue.h"
-#include "concurrent_queue.h"
-#include "concurrent_unordered_map.h"
-#include "concurrent_unordered_set.h"
-#include "concurrent_vector.h"
-#include "critical_section.h"
-#include "enumerable_thread_specific.h"
-#include "flow_graph.h"
-#if TBB_PREVIEW_GLOBAL_CONTROL
-#include "global_control.h"
-#endif
-#include "mutex.h"
-#include "null_mutex.h"
-#include "null_rw_mutex.h"
-#include "parallel_do.h"
-#include "parallel_for.h"
-#include "parallel_for_each.h"
-#include "parallel_invoke.h"
-#include "parallel_reduce.h"
-#include "parallel_scan.h"
-#include "parallel_sort.h"
-#include "partitioner.h"
-#include "pipeline.h"
-#include "queuing_mutex.h"
-#include "queuing_rw_mutex.h"
-#include "reader_writer_lock.h"
-#include "recursive_mutex.h"
-#include "spin_mutex.h"
-#include "spin_rw_mutex.h"
-#include "task.h"
-#include "task_arena.h"
-#include "task_group.h"
-#include "task_scheduler_init.h"
-#include "task_scheduler_observer.h"
-#include "tbb_allocator.h"
-#include "tbb_exception.h"
-#include "tbb_thread.h"
-#include "tick_count.h"
-
-#endif /* __TBB_tbb_H */
diff --git a/lib/3rdParty/tbb/include/tbb/tbb_allocator.h b/lib/3rdParty/tbb/include/tbb/tbb_allocator.h
deleted file mode 100644
index 6346d866..00000000
--- a/lib/3rdParty/tbb/include/tbb/tbb_allocator.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_tbb_allocator_H
-#define __TBB_tbb_allocator_H
-
-#include "tbb_stddef.h"
-#include <new>
-#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
- #include <utility> // std::forward
-#endif
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
-    #pragma warning (push)
-    #pragma warning (disable: 4530)
-#endif
-
-#include <cstring>
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    #pragma warning (pop)
-#endif
-
-namespace tbb {
-
-//! @cond INTERNAL
-namespace internal {
-
-    //! Deallocates memory using FreeHandler
-    /** The function uses scalable_free if scalable allocator is available and free if not*/
-    void __TBB_EXPORTED_FUNC deallocate_via_handler_v3( void *p );
-
-    //! Allocates memory using MallocHandler
-    /** The function uses scalable_malloc if scalable allocator is available and malloc if not*/
-    void* __TBB_EXPORTED_FUNC allocate_via_handler_v3( size_t n );
-
-    //! Returns true if standard malloc/free are used to work with memory.
-    bool __TBB_EXPORTED_FUNC is_malloc_used_v3();
-}
-//! @endcond
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    // Workaround for erroneous "unreferenced parameter" warning in method destroy.
-    #pragma warning (push)
-    #pragma warning (disable: 4100)
-#endif
-
-//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
-/** The class selects the best memory allocation mechanism available
-    from scalable_malloc and standard malloc.
-    The members are ordered the same way they are in section 20.4.1
-    of the ISO C++ standard.
-    @ingroup memory_allocation */
-template<typename T>
-class tbb_allocator {
-public:
-    typedef typename internal::allocator_type<T>::value_type value_type;
-    typedef value_type* pointer;
-    typedef const value_type* const_pointer;
-    typedef value_type& reference;
-    typedef const value_type& const_reference;
-    typedef size_t size_type;
-    typedef ptrdiff_t difference_type;
-    template<typename U> struct rebind {
-        typedef tbb_allocator<U> other;
-    };
-
-    //! Specifies current allocator
-    enum malloc_type {
-        scalable,
-        standard
-    };
-
-    tbb_allocator() throw() {}
-    tbb_allocator( const tbb_allocator& ) throw() {}
-    template<typename U> tbb_allocator(const tbb_allocator<U>&) throw() {}
-
-    pointer address(reference x) const {return &x;}
-    const_pointer address(const_reference x) const {return &x;}
-
-    //! Allocate space for n objects.
-    pointer allocate( size_type n, const void* /*hint*/ = 0) {
-        return pointer(internal::allocate_via_handler_v3( n * sizeof(value_type) ));
-    }
-
-    //! Free previously allocated block of memory.
-    void deallocate( pointer p, size_type ) {
-        internal::deallocate_via_handler_v3(p);
-    }
-
-    //! Largest value for which method allocate might succeed.
-    size_type max_size() const throw() {
-        size_type max = static_cast<size_type>(-1) / sizeof (value_type);
-        return (max > 0 ? max : 1);
-    }
-
-    //! Copy-construct value at location pointed to by p.
-#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
-    template<typename U, typename... Args>
-    void construct(U *p, Args&&... args)
-        { ::new((void *)p) U(std::forward<Args>(args)...); }
-#else // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-    void construct( pointer p, value_type&& value ) {::new((void*)(p)) value_type(std::move(value));}
-#endif
-    void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);}
-#endif // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
-
-    //! Destroy value at location pointed to by p.
-    void destroy( pointer p ) {p->~value_type();}
-
-    //! Returns current allocator
-    static malloc_type allocator_type() {
-        return internal::is_malloc_used_v3() ? standard : scalable;
-    }
-};
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    #pragma warning (pop)
-#endif // warning 4100 is back
-
-//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
-/** @ingroup memory_allocation */
-template<>
-class tbb_allocator<void> {
-public:
-    typedef void* pointer;
-    typedef const void* const_pointer;
-    typedef void value_type;
-    template<typename U> struct rebind {
-        typedef tbb_allocator<U> other;
-    };
-};
-
-template<typename T, typename U>
-inline bool operator==( const tbb_allocator<T>&, const tbb_allocator<U>& ) {return true;}
-
-template<typename T, typename U>
-inline bool operator!=( const tbb_allocator<T>&, const tbb_allocator<U>& ) {return false;}
-
-//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
-/** The class is an adapter over an actual allocator that fills the allocation
-    using memset function with template argument C as the value.
-    The members are ordered the same way they are in section 20.4.1
-    of the ISO C++ standard.
-    @ingroup memory_allocation */
-template <typename T, template<typename X> class Allocator = tbb_allocator>
-class zero_allocator : public Allocator<T>
-{
-public:
-    typedef Allocator<T> base_allocator_type;
-    typedef typename base_allocator_type::value_type value_type;
-    typedef typename base_allocator_type::pointer pointer;
-    typedef typename base_allocator_type::const_pointer const_pointer;
-    typedef typename base_allocator_type::reference reference;
-    typedef typename base_allocator_type::const_reference const_reference;
-    typedef typename base_allocator_type::size_type size_type;
-    typedef typename base_allocator_type::difference_type difference_type;
-    template<typename U> struct rebind {
-        typedef zero_allocator<U, Allocator> other;
-    };
-
-    zero_allocator() throw() { }
-    zero_allocator(const zero_allocator &a) throw() : base_allocator_type( a ) { }
-    template<typename U>
-    zero_allocator(const zero_allocator<U> &a) throw() : base_allocator_type( Allocator<U>( a ) ) { }
-
-    pointer allocate(const size_type n, const void *hint = 0 ) {
-        pointer ptr = base_allocator_type::allocate( n, hint );
-        std::memset( ptr, 0, n * sizeof(value_type) );
-        return ptr;
-    }
-};
-
-//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
-/** @ingroup memory_allocation */
-template<template<typename T> class Allocator>
-class zero_allocator<void, Allocator> : public Allocator<void> {
-public:
-    typedef Allocator<void> base_allocator_type;
-    typedef typename base_allocator_type::value_type value_type;
-    typedef typename base_allocator_type::pointer pointer;
-    typedef typename base_allocator_type::const_pointer const_pointer;
-    template<typename U> struct rebind {
-        typedef zero_allocator<U, Allocator> other;
-    };
-};
-
-template<typename T1, template<typename X1> class B1, typename T2, template<typename X2> class B2>
-inline bool operator==( const zero_allocator<T1,B1> &a, const zero_allocator<T2,B2> &b) {
-    return static_cast< B1<T1> >(a) == static_cast< B2<T2> >(b);
-}
-template<typename T1, template<typename X1> class B1, typename T2, template<typename X2> class B2>
-inline bool operator!=( const zero_allocator<T1,B1> &a, const zero_allocator<T2,B2> &b) {
-    return static_cast< B1<T1> >(a) != static_cast< B2<T2> >(b);
-}
-
-} // namespace tbb
-
-#endif /* __TBB_tbb_allocator_H */
diff --git a/lib/3rdParty/tbb/include/tbb/tbb_config.h b/lib/3rdParty/tbb/include/tbb/tbb_config.h
deleted file mode 100644
index 25f9f24d..00000000
--- a/lib/3rdParty/tbb/include/tbb/tbb_config.h
+++ /dev/null
@@ -1,779 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_tbb_config_H
-#define __TBB_tbb_config_H
-
-/** This header is supposed to contain macro definitions and C style comments only.
-    The macros defined here are intended to control such aspects of TBB build as
-    - presence of compiler features
-    - compilation modes
-    - feature sets
-    - known compiler/platform issues
-**/
-
-/* This macro marks incomplete code or comments describing ideas which are considered for the future.
- * See also for plain comment with TODO and FIXME marks for small improvement opportunities.
- */
-#define __TBB_TODO 0
-
-/*Check which standard library we use on macOS*.*/
-/*__TBB_SYMBOL is defined only while processing exported symbols list where C++ is not allowed.*/
-#if !defined(__TBB_SYMBOL) && (__APPLE__ || __ANDROID__)
-    #include <cstddef>
-#endif
-
-// note that when ICC or Clang is in use, __TBB_GCC_VERSION might not fully match
-// the actual GCC version on the system.
-#define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-
-// Since GNU libstdc++ does not have a convenient macro for its version,
-// we rely on the version of GCC or the user-specified macro below.
-// The format of TBB_USE_GLIBCXX_VERSION should match the __TBB_GCC_VERSION above,
-// e.g. it should be set to 40902 for libstdc++ coming with GCC 4.9.2.
-#ifdef TBB_USE_GLIBCXX_VERSION
-#define __TBB_GLIBCXX_VERSION TBB_USE_GLIBCXX_VERSION
-#else
-#define __TBB_GLIBCXX_VERSION __TBB_GCC_VERSION
-//TODO: analyze __GLIBCXX__ instead of __TBB_GCC_VERSION ?
-#endif
-
-#if __clang__
-    /**according to clang documentation version can be vendor specific **/
-    #define __TBB_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
-#endif
-
-/** Target OS is either iOS* or iOS* simulator **/
-#if __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__
-    #define __TBB_IOS 1
-#endif
-
-/** Preprocessor symbols to determine HW architecture **/
-
-#if _WIN32||_WIN64
-#   if defined(_M_X64)||defined(__x86_64__)  // the latter for MinGW support
-#       define __TBB_x86_64 1
-#   elif defined(_M_IA64)
-#       define __TBB_ipf 1
-#   elif defined(_M_IX86)||defined(__i386__) // the latter for MinGW support
-#       define __TBB_x86_32 1
-#   else
-#       define __TBB_generic_arch 1
-#   endif
-#else /* Assume generic Unix */
-#   if !__linux__ && !__APPLE__
-#       define __TBB_generic_os 1
-#   endif
-#   if __TBB_IOS
-#       define __TBB_generic_arch 1
-#   elif __x86_64__
-#       define __TBB_x86_64 1
-#   elif __ia64__
-#       define __TBB_ipf 1
-#   elif __i386__||__i386  // __i386 is for Sun OS
-#       define __TBB_x86_32 1
-#   else
-#       define __TBB_generic_arch 1
-#   endif
-#endif
-
-#if __MIC__ || __MIC2__
-#define __TBB_DEFINE_MIC 1
-#endif
-
-#define __TBB_TSX_AVAILABLE  ((__TBB_x86_32 || __TBB_x86_64) && !__TBB_DEFINE_MIC)
-
-/** Presence of compiler features **/
-
-#if __INTEL_COMPILER == 9999 && __INTEL_COMPILER_BUILD_DATE == 20110811
-/* Intel(R) Composer XE 2011 Update 6 incorrectly sets __INTEL_COMPILER. Fix it. */
-    #undef __INTEL_COMPILER
-    #define __INTEL_COMPILER 1210
-#endif
-
-#if __TBB_GCC_VERSION >= 40400 && !defined(__INTEL_COMPILER)
-    /** warning suppression pragmas available in GCC since 4.4 **/
-    #define __TBB_GCC_WARNING_SUPPRESSION_PRESENT 1
-#endif
-
-/* Select particular features of C++11 based on compiler version.
-   ICC 12.1 (Linux*), GCC 4.3 and higher, clang 2.9 and higher
-   set __GXX_EXPERIMENTAL_CXX0X__ in c++11 mode.
-
-   Compilers that mimics other compilers (ICC, clang) must be processed before
-   compilers they mimic (GCC, MSVC).
-
-   TODO: The following conditions should be extended when new compilers/runtimes
-   support added.
- */
-
-/** C++11 mode detection macros for Intel(R) C++ compiler (enabled by -std=c++XY option):
-    __INTEL_CXX11_MODE__ for version >=13.0 (not available for ICC 15.0 if -std=c++14 is used),
-    __STDC_HOSTED__ for version >=12.0 (useful only on Windows),
-    __GXX_EXPERIMENTAL_CXX0X__ for version >=12.0 on Linux and macOS. **/
-#if __INTEL_COMPILER &&  !__INTEL_CXX11_MODE__
-    // __INTEL_CXX11_MODE__ is not set, try to deduce it
-    #define __INTEL_CXX11_MODE__ (__GXX_EXPERIMENTAL_CXX0X__ || (_MSC_VER && __STDC_HOSTED__))
-#endif
-
-// Intel(R) C++ Compiler offloading API to the Intel(R) Graphics Technology presence macro
-// TODO: add support for ICC 15.00 _GFX_enqueue API and then decrease Intel compiler supported version
-// TODO: add linux support and restict it with (__linux__ && __TBB_x86_64 && !__ANDROID__) macro
-#if __INTEL_COMPILER >= 1600 && _WIN32
-#define __TBB_GFX_PRESENT 1
-#endif
-
-#if __INTEL_COMPILER && (!_MSC_VER || __INTEL_CXX11_MODE__)
-    //  On Windows, C++11 features supported by Visual Studio 2010 and higher are enabled by default,
-    //  so in absence of /Qstd= use MSVC branch for __TBB_CPP11_* detection.
-    //  On other platforms, no -std= means C++03.
-
-    #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT          (__INTEL_CXX11_MODE__ && __VARIADIC_TEMPLATES)
-    // Both r-value reference support in compiler and std::move/std::forward
-    // presence in C++ standard library is checked.
-    #define __TBB_CPP11_RVALUE_REF_PRESENT                  ((_MSC_VER >= 1700 || __GXX_EXPERIMENTAL_CXX0X__ && (__TBB_GLIBCXX_VERSION >= 40500 || _LIBCPP_VERSION)) && __INTEL_COMPILER >= 1400)
-    #define __TBB_IMPLICIT_MOVE_PRESENT                     (__INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1400 && (_MSC_VER >= 1900 || __TBB_GCC_VERSION >= 40600 || __clang__))
-    #if  _MSC_VER >= 1600
-        #define __TBB_EXCEPTION_PTR_PRESENT                 ( __INTEL_COMPILER > 1300                                                \
-                                                            /*ICC 12.1 Upd 10 and 13 beta Upd 2 fixed exception_ptr linking  issue*/ \
-                                                            || (__INTEL_COMPILER == 1300 && __INTEL_COMPILER_BUILD_DATE >= 20120530) \
-                                                            || (__INTEL_COMPILER == 1210 && __INTEL_COMPILER_BUILD_DATE >= 20120410) )
-    /** libstdc++ that comes with GCC 4.6 use C++11 features not supported by ICC 12.1.
-     *  Because of that ICC 12.1 does not support C++11 mode with gcc 4.6 (or higher),
-     *  and therefore does not define __GXX_EXPERIMENTAL_CXX0X__ macro **/
-    #elif __TBB_GLIBCXX_VERSION >= 40404 && __TBB_GLIBCXX_VERSION < 40600
-        #define __TBB_EXCEPTION_PTR_PRESENT                 (__GXX_EXPERIMENTAL_CXX0X__ && __INTEL_COMPILER >= 1200)
-    #elif __TBB_GLIBCXX_VERSION >= 40600
-        #define __TBB_EXCEPTION_PTR_PRESENT                 (__GXX_EXPERIMENTAL_CXX0X__ && __INTEL_COMPILER >= 1300)
-    #elif _LIBCPP_VERSION
-        #define __TBB_EXCEPTION_PTR_PRESENT                 __GXX_EXPERIMENTAL_CXX0X__
-    #else
-        #define __TBB_EXCEPTION_PTR_PRESENT                 0
-    #endif
-    #define __TBB_STATIC_ASSERT_PRESENT                     (__INTEL_CXX11_MODE__ || _MSC_VER >= 1600)
-    #define __TBB_CPP11_TUPLE_PRESENT                       (_MSC_VER >= 1600 || __GXX_EXPERIMENTAL_CXX0X__ && (__TBB_GLIBCXX_VERSION >= 40300 || _LIBCPP_VERSION))
-    #if (__clang__ && __INTEL_COMPILER > 1400)
-        /* Older versions of Intel Compiler do not have __has_include */
-        #if (__has_feature(__cxx_generalized_initializers__) && __has_include(<initializer_list>))
-            #define __TBB_INITIALIZER_LISTS_PRESENT         1
-        #endif
-    #else
-        #define __TBB_INITIALIZER_LISTS_PRESENT             (__INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1400 && (_MSC_VER >= 1800 || __TBB_GLIBCXX_VERSION >= 40400 || _LIBCPP_VERSION))
-    #endif
-    #define __TBB_CONSTEXPR_PRESENT                         (__INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1400)
-    #define __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT        (__INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1200)
-    /** ICC seems to disable support of noexcept event in c++11 when compiling in compatibility mode for gcc <4.6 **/
-    #define __TBB_NOEXCEPT_PRESENT                          (__INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1300 && (__TBB_GLIBCXX_VERSION >= 40600 || _LIBCPP_VERSION || _MSC_VER))
-    #define __TBB_CPP11_STD_BEGIN_END_PRESENT               (_MSC_VER >= 1700 || __GXX_EXPERIMENTAL_CXX0X__ && __INTEL_COMPILER >= 1310 && (__TBB_GLIBCXX_VERSION >= 40600 || _LIBCPP_VERSION))
-    #define __TBB_CPP11_AUTO_PRESENT                        (_MSC_VER >= 1600 || __GXX_EXPERIMENTAL_CXX0X__ && __INTEL_COMPILER >= 1210)
-    #define __TBB_CPP11_DECLTYPE_PRESENT                    (_MSC_VER >= 1600 || __GXX_EXPERIMENTAL_CXX0X__ && __INTEL_COMPILER >= 1210)
-    #define __TBB_CPP11_LAMBDAS_PRESENT                     (__INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1200)
-    #define __TBB_CPP11_DEFAULT_FUNC_TEMPLATE_ARGS_PRESENT  (_MSC_VER >= 1800 || __GXX_EXPERIMENTAL_CXX0X__ && __INTEL_COMPILER >= 1210)
-    #define __TBB_OVERRIDE_PRESENT                          (__INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1400)
-    #define __TBB_ALIGNAS_PRESENT                           (__INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1500)
-    #define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT            (__INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1210)
-#elif __clang__
-/** TODO: these options need to be rechecked **/
-/** on macOS the only way to get C++11 is to use clang. For library features (e.g. exception_ptr) libc++ is also
- *  required. So there is no need to check GCC version for clang**/
-    #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT          __has_feature(__cxx_variadic_templates__)
-    #define __TBB_CPP11_RVALUE_REF_PRESENT                  (__has_feature(__cxx_rvalue_references__) && (_LIBCPP_VERSION || __TBB_GLIBCXX_VERSION >= 40500))
-    #define __TBB_IMPLICIT_MOVE_PRESENT                     __has_feature(cxx_implicit_moves)
-/** TODO: extend exception_ptr related conditions to cover libstdc++ **/
-    #define __TBB_EXCEPTION_PTR_PRESENT                     (__cplusplus >= 201103L && (_LIBCPP_VERSION || __TBB_GLIBCXX_VERSION >= 40600))
-    #define __TBB_STATIC_ASSERT_PRESENT                     __has_feature(__cxx_static_assert__)
-    /**Clang (preprocessor) has problems with dealing with expression having __has_include in #ifs
-     * used inside C++ code. (At least version that comes with OS X 10.8 : Apple LLVM version 4.2 (clang-425.0.28) (based on LLVM 3.2svn)) **/
-    #if (__GXX_EXPERIMENTAL_CXX0X__ && __has_include(<tuple>))
-        #define __TBB_CPP11_TUPLE_PRESENT                   1
-    #endif
-    #if (__has_feature(__cxx_generalized_initializers__) && __has_include(<initializer_list>))
-        #define __TBB_INITIALIZER_LISTS_PRESENT             1
-    #endif
-    #define __TBB_CONSTEXPR_PRESENT                         __has_feature(__cxx_constexpr__)
-    #define __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT        (__has_feature(__cxx_defaulted_functions__) && __has_feature(__cxx_deleted_functions__))
-    /**For some unknown reason  __has_feature(__cxx_noexcept) does not yield true for all cases. Compiler bug ? **/
-    #define __TBB_NOEXCEPT_PRESENT                          (__cplusplus >= 201103L)
-    #define __TBB_CPP11_STD_BEGIN_END_PRESENT               (__has_feature(__cxx_range_for__) && (_LIBCPP_VERSION || __TBB_GLIBCXX_VERSION >= 40600))
-    #define __TBB_CPP11_AUTO_PRESENT                        __has_feature(__cxx_auto_type__)
-    #define __TBB_CPP11_DECLTYPE_PRESENT                    __has_feature(__cxx_decltype__)
-    #define __TBB_CPP11_LAMBDAS_PRESENT                     __has_feature(cxx_lambdas)
-    #define __TBB_CPP11_DEFAULT_FUNC_TEMPLATE_ARGS_PRESENT  __has_feature(cxx_default_function_template_args)
-    #define __TBB_OVERRIDE_PRESENT                          __has_feature(cxx_override_control)
-    #define __TBB_ALIGNAS_PRESENT                           __has_feature(cxx_alignas)
-    #define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT            __has_feature(cxx_alias_templates)
-#elif __GNUC__
-    #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT          __GXX_EXPERIMENTAL_CXX0X__
-    #define __TBB_CPP11_VARIADIC_FIXED_LENGTH_EXP_PRESENT   (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40700)
-    #define __TBB_CPP11_RVALUE_REF_PRESENT                  (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40500)
-    #define __TBB_IMPLICIT_MOVE_PRESENT                     (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40600)
-    /** __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 here is a substitution for _GLIBCXX_ATOMIC_BUILTINS_4, which is a prerequisite
-        for exception_ptr but cannot be used in this file because it is defined in a header, not by the compiler.
-        If the compiler has no atomic intrinsics, the C++ library should not expect those as well. **/
-    #define __TBB_EXCEPTION_PTR_PRESENT                     (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40404 && __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
-    #define __TBB_STATIC_ASSERT_PRESENT                     (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40300)
-    #define __TBB_CPP11_TUPLE_PRESENT                       (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40300)
-    #define __TBB_INITIALIZER_LISTS_PRESENT                 (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40400)
-    /** gcc seems have to support constexpr from 4.4 but tests in (test_atomic) seeming reasonable fail to compile prior 4.6**/
-    #define __TBB_CONSTEXPR_PRESENT                         (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40400)
-    #define __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT        (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40400)
-    #define __TBB_NOEXCEPT_PRESENT                          (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40600)
-    #define __TBB_CPP11_STD_BEGIN_END_PRESENT               (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40600)
-    #define __TBB_CPP11_AUTO_PRESENT                        (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40400)
-    #define __TBB_CPP11_DECLTYPE_PRESENT                    (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40400)
-    #define __TBB_CPP11_LAMBDAS_PRESENT                     (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40500)
-    #define __TBB_CPP11_DEFAULT_FUNC_TEMPLATE_ARGS_PRESENT  (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40300)
-    #define __TBB_OVERRIDE_PRESENT                          (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40700)
-    #define __TBB_ALIGNAS_PRESENT                           (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40800)
-    #define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT            (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40700)
-#elif _MSC_VER
-    // These definitions are also used with Intel Compiler in "default" mode; see a comment above.
-
-    #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT          (_MSC_VER >= 1800)
-    // Contains a workaround for ICC 13
-    #define __TBB_CPP11_RVALUE_REF_PRESENT                  (_MSC_VER >= 1700 && (!__INTEL_COMPILER || __INTEL_COMPILER >= 1400))
-    #define __TBB_IMPLICIT_MOVE_PRESENT                     (_MSC_VER >= 1900)
-    #define __TBB_EXCEPTION_PTR_PRESENT                     (_MSC_VER >= 1600)
-    #define __TBB_STATIC_ASSERT_PRESENT                     (_MSC_VER >= 1600)
-    #define __TBB_CPP11_TUPLE_PRESENT                       (_MSC_VER >= 1600)
-    #define __TBB_INITIALIZER_LISTS_PRESENT                 (_MSC_VER >= 1800)
-    #define __TBB_CONSTEXPR_PRESENT                         (_MSC_VER >= 1900)
-    #define __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT        (_MSC_VER >= 1800)
-    #define __TBB_NOEXCEPT_PRESENT                          (_MSC_VER >= 1900)
-    #define __TBB_CPP11_STD_BEGIN_END_PRESENT               (_MSC_VER >= 1700)
-    #define __TBB_CPP11_AUTO_PRESENT                        (_MSC_VER >= 1600)
-    #define __TBB_CPP11_DECLTYPE_PRESENT                    (_MSC_VER >= 1600)
-    #define __TBB_CPP11_LAMBDAS_PRESENT                     (_MSC_VER >= 1600)
-    #define __TBB_CPP11_DEFAULT_FUNC_TEMPLATE_ARGS_PRESENT  (_MSC_VER >= 1800)
-    #define __TBB_OVERRIDE_PRESENT                          (_MSC_VER >= 1700)
-    #define __TBB_ALIGNAS_PRESENT                           (_MSC_VER >= 1900)
-    #define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT            (_MSC_VER >= 1800)
-#else
-    #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT          0
-    #define __TBB_CPP11_RVALUE_REF_PRESENT                  0
-    #define __TBB_IMPLICIT_MOVE_PRESENT                     0
-    #define __TBB_EXCEPTION_PTR_PRESENT                     0
-    #define __TBB_STATIC_ASSERT_PRESENT                     0
-    #define __TBB_CPP11_TUPLE_PRESENT                       0
-    #define __TBB_INITIALIZER_LISTS_PRESENT                 0
-    #define __TBB_CONSTEXPR_PRESENT                         0
-    #define __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT        0
-    #define __TBB_NOEXCEPT_PRESENT                          0
-    #define __TBB_CPP11_STD_BEGIN_END_PRESENT               0
-    #define __TBB_CPP11_AUTO_PRESENT                        0
-    #define __TBB_CPP11_DECLTYPE_PRESENT                    0
-    #define __TBB_CPP11_LAMBDAS_PRESENT                     0
-    #define __TBB_CPP11_DEFAULT_FUNC_TEMPLATE_ARGS_PRESENT  0
-    #define __TBB_OVERRIDE_PRESENT                          0
-    #define __TBB_ALIGNAS_PRESENT                           0
-    #define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT            0
-#endif
-
-// C++11 standard library features
-
-#ifndef __TBB_CPP11_VARIADIC_FIXED_LENGTH_EXP_PRESENT
-#define __TBB_CPP11_VARIADIC_FIXED_LENGTH_EXP_PRESENT       __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-#endif
-#define __TBB_CPP11_VARIADIC_TUPLE_PRESENT          (!_MSC_VER || _MSC_VER >=1800)
-
-#define __TBB_CPP11_TYPE_PROPERTIES_PRESENT         (_LIBCPP_VERSION || _MSC_VER >= 1700 || (__TBB_GLIBCXX_VERSION >= 50000 && __GXX_EXPERIMENTAL_CXX0X__))
-#define __TBB_TR1_TYPE_PROPERTIES_IN_STD_PRESENT    (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GLIBCXX_VERSION >= 40300 || _MSC_VER >= 1600)
-// GCC supported some of type properties since 4.7
-#define __TBB_CPP11_IS_COPY_CONSTRUCTIBLE_PRESENT   (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GLIBCXX_VERSION >= 40700 || __TBB_CPP11_TYPE_PROPERTIES_PRESENT)
-
-// In GCC, std::move_if_noexcept appeared later than noexcept
-#define __TBB_MOVE_IF_NOEXCEPT_PRESENT     (__TBB_NOEXCEPT_PRESENT && (__TBB_GLIBCXX_VERSION >= 40700 || _MSC_VER >= 1900 || _LIBCPP_VERSION))
-#define __TBB_ALLOCATOR_TRAITS_PRESENT     (__cplusplus >= 201103L && _LIBCPP_VERSION  || _MSC_VER >= 1700 ||  \
-                                            __GXX_EXPERIMENTAL_CXX0X__ && __TBB_GLIBCXX_VERSION >= 40700 && !(__TBB_GLIBCXX_VERSION == 40700 && __TBB_DEFINE_MIC))
-#define __TBB_MAKE_EXCEPTION_PTR_PRESENT   (__TBB_EXCEPTION_PTR_PRESENT && (_MSC_VER >= 1700 || __TBB_GLIBCXX_VERSION >= 40600 || _LIBCPP_VERSION))
-
-#define __TBB_CPP11_FUTURE_PRESENT (_MSC_VER >= 1700 || __TBB_GLIBCXX_VERSION >= 40600 && _GXX_EXPERIMENTAL_CXX0X__ || _LIBCPP_VERSION)
-
-// std::swap is in <utility> only since C++11, though MSVC had it at least since VS2005
-#if _MSC_VER>=1400 || _LIBCPP_VERSION || __GXX_EXPERIMENTAL_CXX0X__
-#define __TBB_STD_SWAP_HEADER <utility>
-#else
-#define __TBB_STD_SWAP_HEADER <algorithm>
-#endif
-
-//TODO: not clear how exactly this macro affects exception_ptr - investigate
-// On linux ICC fails to find existing std::exception_ptr in libstdc++ without this define
-#if __INTEL_COMPILER && __GNUC__ && __TBB_EXCEPTION_PTR_PRESENT && !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
-    #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1
-#endif
-
-// Work around a bug in MinGW32
-#if __MINGW32__ && __TBB_EXCEPTION_PTR_PRESENT && !defined(_GLIBCXX_ATOMIC_BUILTINS_4)
-    #define _GLIBCXX_ATOMIC_BUILTINS_4
-#endif
-
-#if __GNUC__ || __SUNPRO_CC || __IBMCPP__
-    /* ICC defines __GNUC__ and so is covered */
-    #define __TBB_ATTRIBUTE_ALIGNED_PRESENT 1
-#elif _MSC_VER && (_MSC_VER >= 1300 || __INTEL_COMPILER)
-    #define __TBB_DECLSPEC_ALIGN_PRESENT 1
-#endif
-
-/* Actually ICC supports gcc __sync_* intrinsics starting 11.1,
- * but 64 bit support for 32 bit target comes in later ones*/
-/* TODO: change the version back to 4.1.2 once macro __TBB_WORD_SIZE become optional */
-/* Assumed that all clang versions have these gcc compatible intrinsics. */
-#if __TBB_GCC_VERSION >= 40306 || __INTEL_COMPILER >= 1200 || __clang__
-    /** built-in atomics available in GCC since 4.1.2 **/
-    #define __TBB_GCC_BUILTIN_ATOMICS_PRESENT 1
-#endif
-
-#if __INTEL_COMPILER >= 1200
-    /** built-in C++11 style atomics available in ICC since 12.0 **/
-    #define __TBB_ICC_BUILTIN_ATOMICS_PRESENT 1
-#endif
-
-#define __TBB_TSX_INTRINSICS_PRESENT ((__RTM__ || _MSC_VER>=1700 || __INTEL_COMPILER>=1300) && !__TBB_DEFINE_MIC && !__ANDROID__)
-
-/** Macro helpers **/
-#define __TBB_CONCAT_AUX(A,B) A##B
-// The additional level of indirection is needed to expand macros A and B (not to get the AB macro).
-// See [cpp.subst] and [cpp.concat] for more details.
-#define __TBB_CONCAT(A,B) __TBB_CONCAT_AUX(A,B)
-// The IGNORED argument and comma are needed to always have 2 arguments (even when A is empty).
-#define __TBB_IS_MACRO_EMPTY(A,IGNORED) __TBB_CONCAT_AUX(__TBB_MACRO_EMPTY,A)
-#define __TBB_MACRO_EMPTY 1
-
-/** User controlled TBB features & modes **/
-#ifndef TBB_USE_DEBUG
-/*
-There are four cases that are supported:
-  1. "_DEBUG is undefined" means "no debug";
-  2. "_DEBUG defined to something that is evaluated to 0 (the "garbage" is also evaluated to 0 [cpp.cond])" means "no debug";
-  3. "_DEBUG defined to something that is evaluated to non-zero value" means "debug";
-  4. "_DEBUG defined to nothing (empty)" means "debug".
-*/
-#ifdef _DEBUG
-// Check if _DEBUG is empty.
-#define __TBB_IS__DEBUG_EMPTY (__TBB_IS_MACRO_EMPTY(_DEBUG,IGNORED)==__TBB_MACRO_EMPTY)
-#if __TBB_IS__DEBUG_EMPTY
-#define TBB_USE_DEBUG 1
-#else
-#define TBB_USE_DEBUG _DEBUG
-#endif /* __TBB_IS__DEBUG_EMPTY */
-#else
-#define TBB_USE_DEBUG 0
-#endif
-#endif /* TBB_USE_DEBUG */
-
-#ifndef TBB_USE_ASSERT
-#define TBB_USE_ASSERT TBB_USE_DEBUG
-#endif /* TBB_USE_ASSERT */
-
-#ifndef TBB_USE_THREADING_TOOLS
-#define TBB_USE_THREADING_TOOLS TBB_USE_DEBUG
-#endif /* TBB_USE_THREADING_TOOLS */
-
-#ifndef TBB_USE_PERFORMANCE_WARNINGS
-#ifdef TBB_PERFORMANCE_WARNINGS
-#define TBB_USE_PERFORMANCE_WARNINGS TBB_PERFORMANCE_WARNINGS
-#else
-#define TBB_USE_PERFORMANCE_WARNINGS TBB_USE_DEBUG
-#endif /* TBB_PEFORMANCE_WARNINGS */
-#endif /* TBB_USE_PERFORMANCE_WARNINGS */
-
-#if __TBB_DEFINE_MIC
-    #if TBB_USE_EXCEPTIONS
-        #error The platform does not properly support exception handling. Please do not set TBB_USE_EXCEPTIONS macro or set it to 0.
-    #elif !defined(TBB_USE_EXCEPTIONS)
-        #define TBB_USE_EXCEPTIONS 0
-    #endif
-#elif !(__EXCEPTIONS || defined(_CPPUNWIND) || __SUNPRO_CC)
-    #if TBB_USE_EXCEPTIONS
-        #error Compilation settings do not support exception handling. Please do not set TBB_USE_EXCEPTIONS macro or set it to 0.
-    #elif !defined(TBB_USE_EXCEPTIONS)
-        #define TBB_USE_EXCEPTIONS 0
-    #endif
-#elif !defined(TBB_USE_EXCEPTIONS)
-    #define TBB_USE_EXCEPTIONS 1
-#endif
-
-#if __clang__ && !__INTEL_COMPILER
-#define __TBB_USE_OPTIONAL_RTTI __has_feature(cxx_rtti)
-#elif defined(_CPPRTTI)
-#define __TBB_USE_OPTIONAL_RTTI 1
-#else
-#define __TBB_USE_OPTIONAL_RTTI (__GXX_RTTI || __RTTI || __INTEL_RTTI__)
-#endif
-
-#ifndef TBB_IMPLEMENT_CPP0X
-/** By default, use C++11 classes if available **/
-    #if __clang__
-        /* Old versions of Intel Compiler do not have __has_include or cannot use it in #define */
-        #if (__INTEL_COMPILER && (__INTEL_COMPILER < 1500 || __INTEL_COMPILER == 1500 && __INTEL_COMPILER_UPDATE <= 1))
-            #define TBB_IMPLEMENT_CPP0X (__cplusplus < 201103L || !_LIBCPP_VERSION)
-        #else
-            #define TBB_IMPLEMENT_CPP0X (__cplusplus < 201103L || (!__has_include(<thread>) && !__has_include(<condition_variable>)))
-        #endif
-    #elif __GNUC__
-        #define TBB_IMPLEMENT_CPP0X (__TBB_GCC_VERSION < 40400 || !__GXX_EXPERIMENTAL_CXX0X__)
-    #elif _MSC_VER
-        #define TBB_IMPLEMENT_CPP0X (_MSC_VER < 1700)
-    #else
-        // TODO: Reconsider general approach to be more reliable, e.g. (!(__cplusplus >= 201103L && __ STDC_HOSTED__))
-        #define TBB_IMPLEMENT_CPP0X (!__STDCPP_THREADS__)
-    #endif
-#endif /* TBB_IMPLEMENT_CPP0X */
-
-/* TBB_USE_CAPTURED_EXCEPTION should be explicitly set to either 0 or 1, as it is used as C++ const */
-#ifndef TBB_USE_CAPTURED_EXCEPTION
-    /** IA-64 architecture pre-built TBB binaries do not support exception_ptr. **/
-    #if __TBB_EXCEPTION_PTR_PRESENT && !defined(__ia64__)
-        #define TBB_USE_CAPTURED_EXCEPTION 0
-    #else
-        #define TBB_USE_CAPTURED_EXCEPTION 1
-    #endif
-#else /* defined TBB_USE_CAPTURED_EXCEPTION */
-    #if !TBB_USE_CAPTURED_EXCEPTION && !__TBB_EXCEPTION_PTR_PRESENT
-        #error Current runtime does not support std::exception_ptr. Set TBB_USE_CAPTURED_EXCEPTION and make sure that your code is ready to catch tbb::captured_exception.
-    #endif
-#endif /* defined TBB_USE_CAPTURED_EXCEPTION */
-
-/** Check whether the request to use GCC atomics can be satisfied **/
-#if TBB_USE_GCC_BUILTINS && !__TBB_GCC_BUILTIN_ATOMICS_PRESENT
-    #error "GCC atomic built-ins are not supported."
-#endif
-
-/** Internal TBB features & modes **/
-
-/** __TBB_WEAK_SYMBOLS_PRESENT denotes that the system supports the weak symbol mechanism **/
-#ifndef __TBB_WEAK_SYMBOLS_PRESENT
-#define __TBB_WEAK_SYMBOLS_PRESENT ( !_WIN32 && !__APPLE__ && !__sun && (__TBB_GCC_VERSION >= 40000 || __INTEL_COMPILER ) )
-#endif
-
-/** __TBB_DYNAMIC_LOAD_ENABLED describes the system possibility to load shared libraries at run time **/
-#ifndef __TBB_DYNAMIC_LOAD_ENABLED
-    #define __TBB_DYNAMIC_LOAD_ENABLED 1
-#endif
-
-/** __TBB_SOURCE_DIRECTLY_INCLUDED is a mode used in whitebox testing when
-    it's necessary to test internal functions not exported from TBB DLLs
-**/
-#if (_WIN32||_WIN64) && (__TBB_SOURCE_DIRECTLY_INCLUDED || TBB_USE_PREVIEW_BINARY)
-    #define __TBB_NO_IMPLICIT_LINKAGE 1
-    #define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1
-#endif
-
-#ifndef __TBB_COUNT_TASK_NODES
-    #define __TBB_COUNT_TASK_NODES TBB_USE_ASSERT
-#endif
-
-#ifndef __TBB_TASK_GROUP_CONTEXT
-    #define __TBB_TASK_GROUP_CONTEXT 1
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-#ifndef __TBB_SCHEDULER_OBSERVER
-    #define __TBB_SCHEDULER_OBSERVER 1
-#endif /* __TBB_SCHEDULER_OBSERVER */
-
-#ifndef __TBB_FP_CONTEXT
-    #define __TBB_FP_CONTEXT __TBB_TASK_GROUP_CONTEXT
-#endif /* __TBB_FP_CONTEXT */
-
-#if __TBB_FP_CONTEXT && !__TBB_TASK_GROUP_CONTEXT
-    #error __TBB_FP_CONTEXT requires __TBB_TASK_GROUP_CONTEXT to be enabled
-#endif
-
-#define __TBB_RECYCLE_TO_ENQUEUE __TBB_BUILD // keep non-official
-
-#ifndef __TBB_ARENA_OBSERVER
-    #define __TBB_ARENA_OBSERVER ((__TBB_BUILD||TBB_PREVIEW_LOCAL_OBSERVER)&& __TBB_SCHEDULER_OBSERVER)
-#endif /* __TBB_ARENA_OBSERVER */
-
-#ifndef __TBB_SLEEP_PERMISSION
-    #define __TBB_SLEEP_PERMISSION ((__TBB_CPF_BUILD||TBB_PREVIEW_LOCAL_OBSERVER)&& __TBB_SCHEDULER_OBSERVER)
-#endif /* __TBB_SLEEP_PERMISSION */
-
-#ifndef __TBB_TASK_ISOLATION
-    #define __TBB_TASK_ISOLATION (__TBB_CPF_BUILD||TBB_PREVIEW_TASK_ISOLATION)
-#endif /* __TBB_TASK_ISOLATION */
-
-#if TBB_PREVIEW_FLOW_GRAPH_TRACE
-// Users of flow-graph trace need to explicitly link against the preview library.  This
-// prevents the linker from implicitly linking an application with a preview version of
-// TBB and unexpectedly bringing in other community preview features, which might change
-// the behavior of the application.
-#define __TBB_NO_IMPLICIT_LINKAGE 1
-#endif /* TBB_PREVIEW_FLOW_GRAPH_TRACE */
-
-#ifndef __TBB_ITT_STRUCTURE_API
-#define __TBB_ITT_STRUCTURE_API ( !__TBB_DEFINE_MIC && (__TBB_CPF_BUILD || TBB_PREVIEW_FLOW_GRAPH_TRACE) )
-#endif
-
-#if TBB_USE_EXCEPTIONS && !__TBB_TASK_GROUP_CONTEXT
-    #error TBB_USE_EXCEPTIONS requires __TBB_TASK_GROUP_CONTEXT to be enabled
-#endif
-
-#ifndef __TBB_TASK_PRIORITY
-    #define __TBB_TASK_PRIORITY (__TBB_TASK_GROUP_CONTEXT)
-#endif /* __TBB_TASK_PRIORITY */
-
-#if __TBB_TASK_PRIORITY && !__TBB_TASK_GROUP_CONTEXT
-    #error __TBB_TASK_PRIORITY requires __TBB_TASK_GROUP_CONTEXT to be enabled
-#endif
-
-#if TBB_PREVIEW_WAITING_FOR_WORKERS || __TBB_BUILD
-    #define __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 1
-#endif
-
-#ifndef __TBB_ENQUEUE_ENFORCED_CONCURRENCY
-    #define __TBB_ENQUEUE_ENFORCED_CONCURRENCY 1
-#endif
-
-#if !defined(__TBB_SURVIVE_THREAD_SWITCH) && \
-          (_WIN32 || _WIN64 || __APPLE__ || (__linux__ && !__ANDROID__))
-    #define __TBB_SURVIVE_THREAD_SWITCH 1
-#endif /* __TBB_SURVIVE_THREAD_SWITCH */
-
-#ifndef __TBB_DEFAULT_PARTITIONER
-#if TBB_DEPRECATED
-/** Default partitioner for parallel loop templates in TBB 1.0-2.1 */
-#define __TBB_DEFAULT_PARTITIONER tbb::simple_partitioner
-#else
-/** Default partitioner for parallel loop templates since TBB 2.2 */
-#define __TBB_DEFAULT_PARTITIONER tbb::auto_partitioner
-#endif /* TBB_DEPRECATED */
-#endif /* !defined(__TBB_DEFAULT_PARTITIONER */
-
-#ifndef __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES
-#define __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES 1
-#endif
-
-#ifndef __TBB_ENABLE_RANGE_FEEDBACK
-#define __TBB_ENABLE_RANGE_FEEDBACK 0
-#endif
-
-#ifdef _VARIADIC_MAX
-    #define __TBB_VARIADIC_MAX _VARIADIC_MAX
-#else
-    #if _MSC_VER == 1700
-        #define __TBB_VARIADIC_MAX 5 // VS11 setting, issue resolved in VS12
-    #elif _MSC_VER == 1600
-        #define __TBB_VARIADIC_MAX 10 // VS10 setting
-    #else
-        #define __TBB_VARIADIC_MAX 15
-    #endif
-#endif
-
-/** __TBB_WIN8UI_SUPPORT enables support of New Windows*8 Store Apps and limit a possibility to load
-    shared libraries at run time only from application container **/
-#if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_FAMILY_APP
-    #define __TBB_WIN8UI_SUPPORT 1
-#else
-    #define __TBB_WIN8UI_SUPPORT 0
-#endif
-
-/** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused by
-    the bugs in compilers, standard or OS specific libraries. They should be
-    removed as soon as the corresponding bugs are fixed or the buggy OS/compiler
-    versions go out of the support list.
-**/
-
-#if __SIZEOF_POINTER__ < 8 && __ANDROID__ && __TBB_GCC_VERSION <= 40403 && !__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
-    /** Necessary because on Android 8-byte CAS and F&A are not available for some processor architectures,
-        but no mandatory warning message appears from GCC 4.4.3. Instead, only a linkage error occurs when
-        these atomic operations are used (such as in unit test test_atomic.exe). **/
-    #define __TBB_GCC_64BIT_ATOMIC_BUILTINS_BROKEN 1
-#elif __TBB_x86_32 && __TBB_GCC_VERSION == 40102 && ! __GNUC_RH_RELEASE__
-    /** GCC 4.1.2 erroneously emit call to external function for 64 bit sync_ intrinsics.
-        However these functions are not defined anywhere. It seems that this problem was fixed later on
-        and RHEL got an updated version of gcc 4.1.2. **/
-    #define __TBB_GCC_64BIT_ATOMIC_BUILTINS_BROKEN 1
-#endif
-
-#if __GNUC__ && __TBB_x86_64 && __INTEL_COMPILER == 1200
-    #define __TBB_ICC_12_0_INL_ASM_FSTCW_BROKEN 1
-#endif
-
-#if _MSC_VER && __INTEL_COMPILER && (__INTEL_COMPILER<1110 || __INTEL_COMPILER==1110 && __INTEL_COMPILER_BUILD_DATE < 20091012)
-    /** Necessary to avoid ICL error (or warning in non-strict mode):
-        "exception specification for implicitly declared virtual destructor is
-        incompatible with that of overridden one". **/
-    #define __TBB_DEFAULT_DTOR_THROW_SPEC_BROKEN 1
-#endif
-
-#if !__INTEL_COMPILER && (_MSC_VER && _MSC_VER < 1500 || __GNUC__ && __TBB_GCC_VERSION < 40102)
-    /** gcc 3.4.6 (and earlier) and VS2005 (and earlier) do not allow declaring template class as a friend
-        of classes defined in other namespaces. **/
-    #define __TBB_TEMPLATE_FRIENDS_BROKEN 1
-#endif
-
-#if __GLIBC__==2 && __GLIBC_MINOR__==3 ||  (__APPLE__ && ( __INTEL_COMPILER==1200 && !TBB_USE_DEBUG))
-    /** Macro controlling EH usages in TBB tests.
-        Some older versions of glibc crash when exception handling happens concurrently. **/
-    #define __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN 1
-#endif
-
-#if (_WIN32||_WIN64) && __INTEL_COMPILER == 1110
-    /** That's a bug in Intel(R) C++ Compiler 11.1.044/IA-32 architecture/Windows* OS, that leads to a worker thread crash on the thread's startup. **/
-    #define __TBB_ICL_11_1_CODE_GEN_BROKEN 1
-#endif
-
-#if __clang__ || (__GNUC__==3 && __GNUC_MINOR__==3 && !defined(__INTEL_COMPILER))
-    /** Bugs with access to nested classes declared in protected area */
-    #define __TBB_PROTECTED_NESTED_CLASS_BROKEN 1
-#endif
-
-#if __MINGW32__ && __TBB_GCC_VERSION < 40200
-    /** MinGW has a bug with stack alignment for routines invoked from MS RTLs.
-        Since GCC 4.2, the bug can be worked around via a special attribute. **/
-    #define __TBB_SSE_STACK_ALIGNMENT_BROKEN 1
-#endif
-
-#if __TBB_GCC_VERSION==40300 && !__INTEL_COMPILER && !__clang__
-    /* GCC of this version may rashly ignore control dependencies */
-    #define __TBB_GCC_OPTIMIZER_ORDERING_BROKEN 1
-#endif
-
-#if __FreeBSD__
-    /** A bug in FreeBSD 8.0 results in kernel panic when there is contention
-        on a mutex created with this attribute. **/
-    #define __TBB_PRIO_INHERIT_BROKEN 1
-
-    /** A bug in FreeBSD 8.0 results in test hanging when an exception occurs
-        during (concurrent?) object construction by means of placement new operator. **/
-    #define __TBB_PLACEMENT_NEW_EXCEPTION_SAFETY_BROKEN 1
-#endif /* __FreeBSD__ */
-
-#if (__linux__ || __APPLE__) && __i386__ && defined(__INTEL_COMPILER)
-    /** The Intel(R) C++ Compiler for IA-32 architecture (Linux* OS|macOS) crashes or generates
-        incorrect code when __asm__ arguments have a cast to volatile. **/
-    #define __TBB_ICC_ASM_VOLATILE_BROKEN 1
-#endif
-
-#if !__INTEL_COMPILER && (_MSC_VER || __GNUC__==3 && __GNUC_MINOR__<=2)
-    /** Bug in GCC 3.2 and MSVC compilers that sometimes return 0 for __alignof(T)
-        when T has not yet been instantiated. **/
-    #define __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN 1
-#endif
-
-#if __TBB_DEFINE_MIC
-    /** Main thread and user's thread have different default thread affinity masks. **/
-    #define __TBB_MAIN_THREAD_AFFINITY_BROKEN 1
-#endif
-
-#if __GXX_EXPERIMENTAL_CXX0X__ && !defined(__EXCEPTIONS) && \
-    ((!__INTEL_COMPILER && !__clang__ && (__TBB_GCC_VERSION>=40400 && __TBB_GCC_VERSION<40600)) || \
-     (__INTEL_COMPILER<=1400 && (__TBB_GLIBCXX_VERSION>=40400 && __TBB_GLIBCXX_VERSION<=40801)))
-/* There is an issue for specific GCC toolchain when C++11 is enabled
-   and exceptions are disabled:
-   exceprion_ptr.h/nested_exception.h use throw unconditionally.
-   GCC can ignore 'throw' since 4.6; but with ICC the issue still exists.
- */
-    #define __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN 1
-#endif
-
-#if __INTEL_COMPILER==1300 && __TBB_GLIBCXX_VERSION>=40700 && defined(__GXX_EXPERIMENTAL_CXX0X__)
-/* Some C++11 features used inside libstdc++ are not supported by Intel compiler.
- * Checking version of gcc instead of libstdc++ because
- *  - they are directly connected,
- *  - for now it is not possible to check version of any standard library in this file
- */
-    #define __TBB_ICC_13_0_CPP11_STDLIB_SUPPORT_BROKEN 1
-#endif
-
-#if (__GNUC__==4 && __GNUC_MINOR__==4 ) && !defined(__INTEL_COMPILER) && !defined(__clang__)
-    /** excessive warnings related to strict aliasing rules in GCC 4.4 **/
-    #define __TBB_GCC_STRICT_ALIASING_BROKEN 1
-    /* topical remedy: #pragma GCC diagnostic ignored "-Wstrict-aliasing" */
-    #if !__TBB_GCC_WARNING_SUPPRESSION_PRESENT
-        #error Warning suppression is not supported, while should.
-    #endif
-#endif
-
-/*In a PIC mode some versions of GCC 4.1.2 generate incorrect inlined code for 8 byte __sync_val_compare_and_swap intrinsic */
-#if __TBB_GCC_VERSION == 40102 && __PIC__ && !defined(__INTEL_COMPILER) && !defined(__clang__)
-    #define __TBB_GCC_CAS8_BUILTIN_INLINING_BROKEN 1
-#endif
-
-#if __TBB_x86_32 && ( __INTEL_COMPILER || (__GNUC__==5 && __GNUC_MINOR__>=2 && __GXX_EXPERIMENTAL_CXX0X__) \
-    || (__GNUC__==3 && __GNUC_MINOR__==3) || (__MINGW32__ && __GNUC__==4 && __GNUC_MINOR__==5) || __SUNPRO_CC )
-    // Some compilers for IA-32 architecture fail to provide 8-byte alignment of objects on the stack,
-    // even if the object specifies 8-byte alignment. On such platforms, the implementation
-    // of 64 bit atomics for IA-32 architecture (e.g. atomic<long long>) use different tactics
-    // depending upon whether the object is properly aligned or not.
-    #define __TBB_FORCE_64BIT_ALIGNMENT_BROKEN 1
-#else
-    // Define to 0 explicitly because the macro is used in a compiled code of test_atomic
-    #define __TBB_FORCE_64BIT_ALIGNMENT_BROKEN 0
-#endif
-
-#if __GNUC__ && !__INTEL_COMPILER && !__clang__ && __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT && __TBB_GCC_VERSION < 40700
-    #define __TBB_ZERO_INIT_WITH_DEFAULTED_CTOR_BROKEN 1
-#endif
-
-#if _MSC_VER && _MSC_VER <= 1800 && !__INTEL_COMPILER
-    // With MSVC, when an array is passed by const reference to a template function,
-    // constness from the function parameter may get propagated to the template parameter.
-    #define __TBB_CONST_REF_TO_ARRAY_TEMPLATE_PARAM_BROKEN 1
-#endif
-
-// A compiler bug: a disabled copy constructor prevents use of the moving constructor
-#define __TBB_IF_NO_COPY_CTOR_MOVE_SEMANTICS_BROKEN (_MSC_VER && (__INTEL_COMPILER >= 1300 && __INTEL_COMPILER <= 1310) && !__INTEL_CXX11_MODE__)
-
-#define __TBB_CPP11_DECLVAL_BROKEN (_MSC_VER == 1600 || (__GNUC__ && __TBB_GCC_VERSION < 40500) )
-
-// Intel C++ compiler has difficulties with copying std::pair with VC11 std::reference_wrapper being a const member
-#define __TBB_COPY_FROM_NON_CONST_REF_BROKEN (_MSC_VER == 1700 && __INTEL_COMPILER && __INTEL_COMPILER < 1600)
-
-// The implicit upcasting of the tuple of a reference of a derived class to a base class fails on icc 13.X if the system's gcc environment is 4.8
-// Also in gcc 4.4 standard library the implementation of the tuple<&> conversion (tuple<A&> a = tuple<B&>, B is inherited from A) is broken.
-#if __GXX_EXPERIMENTAL_CXX0X__ && ((__INTEL_COMPILER >=1300 && __INTEL_COMPILER <=1310 && __TBB_GLIBCXX_VERSION>=40700) || (__TBB_GLIBCXX_VERSION < 40500))
-#define __TBB_UPCAST_OF_TUPLE_OF_REF_BROKEN 1
-#endif
-
-/** End of __TBB_XXX_BROKEN macro section **/
-
-#if defined(_MSC_VER) && _MSC_VER>=1500 && !defined(__INTEL_COMPILER)
-    // A macro to suppress erroneous or benign "unreachable code" MSVC warning (4702)
-    #define __TBB_MSVC_UNREACHABLE_CODE_IGNORED 1
-#endif
-
-#define __TBB_ATOMIC_CTORS     (__TBB_CONSTEXPR_PRESENT && __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT && (!__TBB_ZERO_INIT_WITH_DEFAULTED_CTOR_BROKEN))
-
-// Many OS versions (Android 4.0.[0-3] for example) need workaround for dlopen to avoid non-recursive loader lock hang
-// Setting the workaround for all compile targets ($APP_PLATFORM) below Android 4.4 (android-19)
-#if __ANDROID__
-#include <android/api-level.h>
-#define __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND  (__ANDROID_API__ < 19)
-#endif
-
-#define __TBB_ALLOCATOR_CONSTRUCT_VARIADIC      (__TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_PRESENT)
-
-#define __TBB_VARIADIC_PARALLEL_INVOKE          (TBB_PREVIEW_VARIADIC_PARALLEL_INVOKE && __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_PRESENT)
-#define __TBB_FLOW_GRAPH_CPP11_FEATURES         (__TBB_CPP11_VARIADIC_TEMPLATES_PRESENT \
-                                                && __TBB_CPP11_RVALUE_REF_PRESENT && __TBB_CPP11_AUTO_PRESENT) \
-                                                && __TBB_CPP11_VARIADIC_TUPLE_PRESENT && __TBB_CPP11_DEFAULT_FUNC_TEMPLATE_ARGS_PRESENT \
-                                                && !__TBB_UPCAST_OF_TUPLE_OF_REF_BROKEN
-#define __TBB_PREVIEW_STREAMING_NODE            (__TBB_CPP11_VARIADIC_FIXED_LENGTH_EXP_PRESENT && __TBB_FLOW_GRAPH_CPP11_FEATURES \
-                                                && TBB_PREVIEW_FLOW_GRAPH_NODES && !TBB_IMPLEMENT_CPP0X && !__TBB_UPCAST_OF_TUPLE_OF_REF_BROKEN)
-#define __TBB_PREVIEW_OPENCL_NODE               (__TBB_PREVIEW_STREAMING_NODE && __TBB_CPP11_TEMPLATE_ALIASES_PRESENT)
-#define __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING (TBB_PREVIEW_FLOW_GRAPH_FEATURES || __TBB_PREVIEW_OPENCL_NODE)
-#define __TBB_PREVIEW_ASYNC_MSG                 (TBB_PREVIEW_FLOW_GRAPH_FEATURES && __TBB_FLOW_GRAPH_CPP11_FEATURES)
-
-#define __TBB_PREVIEW_GFX_FACTORY               (__TBB_GFX_PRESENT && TBB_PREVIEW_FLOW_GRAPH_FEATURES && !__TBB_MIC_OFFLOAD \
-                                                && __TBB_FLOW_GRAPH_CPP11_FEATURES && __TBB_CPP11_TEMPLATE_ALIASES_PRESENT \
-                                                && __TBB_CPP11_FUTURE_PRESENT)
-#endif /* __TBB_tbb_config_H */
diff --git a/lib/3rdParty/tbb/include/tbb/tbb_exception.h b/lib/3rdParty/tbb/include/tbb/tbb_exception.h
deleted file mode 100644
index 1c843309..00000000
--- a/lib/3rdParty/tbb/include/tbb/tbb_exception.h
+++ /dev/null
@@ -1,371 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_exception_H
-#define __TBB_exception_H
-
-#include "tbb_stddef.h"
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
-    #pragma warning (push)
-    #pragma warning (disable: 4530)
-#endif
-
-#include <exception>
-#include <new>    //required for bad_alloc definition, operators new
-#include <string> // required to construct std exception classes
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    #pragma warning (pop)
-#endif
-
-namespace tbb {
-
-//! Exception for concurrent containers
-class bad_last_alloc : public std::bad_alloc {
-public:
-    const char* what() const throw() __TBB_override;
-#if __TBB_DEFAULT_DTOR_THROW_SPEC_BROKEN
-    ~bad_last_alloc() throw() __TBB_override {}
-#endif
-};
-
-//! Exception for PPL locks
-class improper_lock : public std::exception {
-public:
-    const char* what() const throw() __TBB_override;
-};
-
-//! Exception for user-initiated abort
-class user_abort : public std::exception {
-public:
-    const char* what() const throw() __TBB_override;
-};
-
-//! Exception for missing wait on structured_task_group
-class missing_wait : public std::exception {
-public:
-    const char* what() const throw() __TBB_override;
-};
-
-//! Exception for repeated scheduling of the same task_handle
-class invalid_multiple_scheduling : public std::exception {
-public:
-    const char* what() const throw() __TBB_override;
-};
-
-namespace internal {
-//! Obsolete
-void __TBB_EXPORTED_FUNC throw_bad_last_alloc_exception_v4();
-
-enum exception_id {
-    eid_bad_alloc = 1,
-    eid_bad_last_alloc,
-    eid_nonpositive_step,
-    eid_out_of_range,
-    eid_segment_range_error,
-    eid_index_range_error,
-    eid_missing_wait,
-    eid_invalid_multiple_scheduling,
-    eid_improper_lock,
-    eid_possible_deadlock,
-    eid_operation_not_permitted,
-    eid_condvar_wait_failed,
-    eid_invalid_load_factor,
-    eid_reserved, // free slot for backward compatibility, can be reused.
-    eid_invalid_swap,
-    eid_reservation_length_error,
-    eid_invalid_key,
-    eid_user_abort,
-    eid_reserved1,
-#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
-    // This id is used only from inside the library and only for support of CPF functionality.
-    // So, if we drop the functionality, eid_reserved1 can be safely renamed and reused.
-    eid_blocking_thread_join_impossible = eid_reserved1,
-#endif
-    eid_bad_tagged_msg_cast,
-    //! The last enumerator tracks the number of defined IDs. It must remain the last one.
-    /** When adding new IDs, place them immediately _before_ this comment (that is
-        _after_ all the existing IDs. NEVER insert new IDs between the existing ones. **/
-    eid_max
-};
-
-//! Gathers all throw operators in one place.
-/** Its purpose is to minimize code bloat that can be caused by throw operators
-    scattered in multiple places, especially in templates. **/
-void __TBB_EXPORTED_FUNC throw_exception_v4 ( exception_id );
-
-//! Versionless convenience wrapper for throw_exception_v4()
-inline void throw_exception ( exception_id eid ) { throw_exception_v4(eid); }
-
-} // namespace internal
-} // namespace tbb
-
-#if __TBB_TASK_GROUP_CONTEXT
-#include "tbb_allocator.h"
-#include <typeinfo> //for typeid
-
-namespace tbb {
-
-//! Interface to be implemented by all exceptions TBB recognizes and propagates across the threads.
-/** If an unhandled exception of the type derived from tbb::tbb_exception is intercepted
-    by the TBB scheduler in one of the worker threads, it is delivered to and re-thrown in
-    the root thread. The root thread is the thread that has started the outermost algorithm
-    or root task sharing the same task_group_context with the guilty algorithm/task (the one
-    that threw the exception first).
-
-    Note: when documentation mentions workers with respect to exception handling,
-    masters are implied as well, because they are completely equivalent in this context.
-    Consequently a root thread can be master or worker thread.
-
-    NOTE: In case of nested algorithms or complex task hierarchies when the nested
-    levels share (explicitly or by means of implicit inheritance) the task group
-    context of the outermost level, the exception may be (re-)thrown multiple times
-    (ultimately - in each worker on each nesting level) before reaching the root
-    thread at the outermost level. IMPORTANT: if you intercept an exception derived
-    from this class on a nested level, you must re-throw it in the catch block by means
-    of the "throw;" operator.
-
-    TBB provides two implementations of this interface: tbb::captured_exception and
-    template class tbb::movable_exception. See their declarations for more info. **/
-class tbb_exception : public std::exception
-{
-    /** No operator new is provided because the TBB usage model assumes dynamic
-        creation of the TBB exception objects only by means of applying move()
-        operation on an exception thrown out of TBB scheduler. **/
-    void* operator new ( size_t );
-
-public:
-#if __clang__
-    // At -O3 or even -O2 optimization level, Clang may fully throw away an empty destructor
-    // of tbb_exception from destructors of derived classes. As a result, it does not create
-    // vtable for tbb_exception, which is a required part of TBB binary interface.
-    // Making the destructor non-empty (with just a semicolon) prevents that optimization.
-    ~tbb_exception() throw() { /* keep the semicolon! */ ; }
-#endif
-
-    //! Creates and returns pointer to the deep copy of this exception object.
-    /** Move semantics is allowed. **/
-    virtual tbb_exception* move () throw() = 0;
-
-    //! Destroys objects created by the move() method.
-    /** Frees memory and calls destructor for this exception object.
-        Can and must be used only on objects created by the move method. **/
-    virtual void destroy () throw() = 0;
-
-    //! Throws this exception object.
-    /** Make sure that if you have several levels of derivation from this interface
-        you implement or override this method on the most derived level. The implementation
-        is as simple as "throw *this;". Failure to do this will result in exception
-        of a base class type being thrown. **/
-    virtual void throw_self () = 0;
-
-    //! Returns RTTI name of the originally intercepted exception
-    virtual const char* name() const throw() = 0;
-
-    //! Returns the result of originally intercepted exception's what() method.
-    virtual const char* what() const throw() __TBB_override = 0;
-
-    /** Operator delete is provided only to allow using existing smart pointers
-        with TBB exception objects obtained as the result of applying move()
-        operation on an exception thrown out of TBB scheduler.
-
-        When overriding method move() make sure to override operator delete as well
-        if memory is allocated not by TBB's scalable allocator. **/
-    void operator delete ( void* p ) {
-        internal::deallocate_via_handler_v3(p);
-    }
-};
-
-//! This class is used by TBB to propagate information about unhandled exceptions into the root thread.
-/** Exception of this type is thrown by TBB in the root thread (thread that started a parallel
-    algorithm ) if an unhandled exception was intercepted during the algorithm execution in one
-    of the workers.
-    \sa tbb::tbb_exception **/
-class captured_exception : public tbb_exception
-{
-public:
-    captured_exception ( const captured_exception& src )
-        : tbb_exception(src), my_dynamic(false)
-    {
-        set(src.my_exception_name, src.my_exception_info);
-    }
-
-    captured_exception ( const char* name_, const char* info )
-        : my_dynamic(false)
-    {
-        set(name_, info);
-    }
-
-    __TBB_EXPORTED_METHOD ~captured_exception () throw();
-
-    captured_exception& operator= ( const captured_exception& src ) {
-        if ( this != &src ) {
-            clear();
-            set(src.my_exception_name, src.my_exception_info);
-        }
-        return *this;
-    }
-
-    captured_exception* __TBB_EXPORTED_METHOD move () throw() __TBB_override;
-
-    void __TBB_EXPORTED_METHOD destroy () throw() __TBB_override;
-
-    void throw_self () __TBB_override { __TBB_THROW(*this); }
-
-    const char* __TBB_EXPORTED_METHOD name() const throw() __TBB_override;
-
-    const char* __TBB_EXPORTED_METHOD what() const throw() __TBB_override;
-
-    void __TBB_EXPORTED_METHOD set ( const char* name, const char* info ) throw();
-    void __TBB_EXPORTED_METHOD clear () throw();
-
-private:
-    //! Used only by method clone().
-    captured_exception() {}
-
-    //! Functionally equivalent to {captured_exception e(name,info); return e.clone();}
-    static captured_exception* allocate ( const char* name, const char* info );
-
-    bool my_dynamic;
-    const char* my_exception_name;
-    const char* my_exception_info;
-};
-
-//! Template that can be used to implement exception that transfers arbitrary ExceptionData to the root thread
-/** Code using TBB can instantiate this template with an arbitrary ExceptionData type
-    and throw this exception object. Such exceptions are intercepted by the TBB scheduler
-    and delivered to the root thread ().
-    \sa tbb::tbb_exception **/
-template<typename ExceptionData>
-class movable_exception : public tbb_exception
-{
-    typedef movable_exception<ExceptionData> self_type;
-
-public:
-    movable_exception ( const ExceptionData& data_ )
-        : my_exception_data(data_)
-        , my_dynamic(false)
-        , my_exception_name(
-#if TBB_USE_EXCEPTIONS
-        typeid(self_type).name()
-#else /* !TBB_USE_EXCEPTIONS */
-        "movable_exception"
-#endif /* !TBB_USE_EXCEPTIONS */
-        )
-    {}
-
-    movable_exception ( const movable_exception& src ) throw ()
-        : tbb_exception(src)
-        , my_exception_data(src.my_exception_data)
-        , my_dynamic(false)
-        , my_exception_name(src.my_exception_name)
-    {}
-
-    ~movable_exception () throw() {}
-
-    const movable_exception& operator= ( const movable_exception& src ) {
-        if ( this != &src ) {
-            my_exception_data = src.my_exception_data;
-            my_exception_name = src.my_exception_name;
-        }
-        return *this;
-    }
-
-    ExceptionData& data () throw() { return my_exception_data; }
-
-    const ExceptionData& data () const throw() { return my_exception_data; }
-
-    const char* name () const throw() __TBB_override { return my_exception_name; }
-
-    const char* what () const throw() __TBB_override { return "tbb::movable_exception"; }
-
-    movable_exception* move () throw() __TBB_override {
-        void* e = internal::allocate_via_handler_v3(sizeof(movable_exception));
-        if ( e ) {
-            ::new (e) movable_exception(*this);
-            ((movable_exception*)e)->my_dynamic = true;
-        }
-        return (movable_exception*)e;
-    }
-    void destroy () throw() __TBB_override {
-        __TBB_ASSERT ( my_dynamic, "Method destroy can be called only on dynamically allocated movable_exceptions" );
-        if ( my_dynamic ) {
-            this->~movable_exception();
-            internal::deallocate_via_handler_v3(this);
-        }
-    }
-    void throw_self () __TBB_override { __TBB_THROW( *this ); }
-
-protected:
-    //! User data
-    ExceptionData  my_exception_data;
-
-private:
-    //! Flag specifying whether this object has been dynamically allocated (by the move method)
-    bool my_dynamic;
-
-    //! RTTI name of this class
-    /** We rely on the fact that RTTI names are static string constants. **/
-    const char* my_exception_name;
-};
-
-#if !TBB_USE_CAPTURED_EXCEPTION
-namespace internal {
-
-//! Exception container that preserves the exact copy of the original exception
-/** This class can be used only when the appropriate runtime support (mandated
-    by C++0x) is present **/
-class tbb_exception_ptr {
-    std::exception_ptr  my_ptr;
-
-public:
-    static tbb_exception_ptr* allocate ();
-    static tbb_exception_ptr* allocate ( const tbb_exception& tag );
-    //! This overload uses move semantics (i.e. it empties src)
-    static tbb_exception_ptr* allocate ( captured_exception& src );
-
-    //! Destroys this objects
-    /** Note that objects of this type can be created only by the allocate() method. **/
-    void destroy () throw();
-
-    //! Throws the contained exception .
-    void throw_self () { std::rethrow_exception(my_ptr); }
-
-private:
-    tbb_exception_ptr ( const std::exception_ptr& src ) : my_ptr(src) {}
-    tbb_exception_ptr ( const captured_exception& src ) :
-        #if __TBB_MAKE_EXCEPTION_PTR_PRESENT
-            my_ptr(std::make_exception_ptr(src))  // the final function name in C++11
-        #else
-            my_ptr(std::copy_exception(src))      // early C++0x drafts name
-        #endif
-    {}
-}; // class tbb::internal::tbb_exception_ptr
-
-} // namespace internal
-#endif /* !TBB_USE_CAPTURED_EXCEPTION */
-
-} // namespace tbb
-
-#endif /* __TBB_TASK_GROUP_CONTEXT */
-
-#endif /* __TBB_exception_H */
diff --git a/lib/3rdParty/tbb/include/tbb/tbb_machine.h b/lib/3rdParty/tbb/include/tbb/tbb_machine.h
deleted file mode 100644
index 68d1d5d0..00000000
--- a/lib/3rdParty/tbb/include/tbb/tbb_machine.h
+++ /dev/null
@@ -1,974 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_machine_H
-#define __TBB_machine_H
-
-/** This header provides basic platform abstraction layer by hooking up appropriate
-    architecture/OS/compiler specific headers from the /include/tbb/machine directory.
-    If a plug-in header does not implement all the required APIs, it must specify
-    the missing ones by setting one or more of the following macros:
-
-    __TBB_USE_GENERIC_PART_WORD_CAS
-    __TBB_USE_GENERIC_PART_WORD_FETCH_ADD
-    __TBB_USE_GENERIC_PART_WORD_FETCH_STORE
-    __TBB_USE_GENERIC_FETCH_ADD
-    __TBB_USE_GENERIC_FETCH_STORE
-    __TBB_USE_GENERIC_DWORD_FETCH_ADD
-    __TBB_USE_GENERIC_DWORD_FETCH_STORE
-    __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
-    __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE
-    __TBB_USE_GENERIC_RELAXED_LOAD_STORE
-    __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
-
-    In this case tbb_machine.h will add missing functionality based on a minimal set
-    of APIs that are required to be implemented by all plug-n headers as described
-    further.
-    Note that these generic implementations may be sub-optimal for a particular
-    architecture, and thus should be relied upon only after careful evaluation
-    or as the last resort.
-
-    Additionally __TBB_64BIT_ATOMICS can be set to 0 on a 32-bit architecture to
-    indicate that the port is not going to support double word atomics. It may also
-    be set to 1 explicitly, though normally this is not necessary as tbb_machine.h
-    will set it automatically.
-
-    __TBB_ENDIANNESS macro can be defined by the implementation as well.
-    It is used only if __TBB_USE_GENERIC_PART_WORD_CAS is set (or for testing),
-    and must specify the layout of aligned 16-bit and 32-bit data anywhere within a process
-    (while the details of unaligned 16-bit or 32-bit data or of 64-bit data are irrelevant).
-    The layout must be the same at all relevant memory locations within the current process;
-    in case of page-specific endianness, one endianness must be kept "out of sight".
-    Possible settings, reflecting hardware and possibly O.S. convention, are:
-    -  __TBB_ENDIAN_BIG for big-endian data,
-    -  __TBB_ENDIAN_LITTLE for little-endian data,
-    -  __TBB_ENDIAN_DETECT for run-time detection iff exactly one of the above,
-    -  __TBB_ENDIAN_UNSUPPORTED to prevent undefined behavior if none of the above.
-
-    Prerequisites for each architecture port
-    ----------------------------------------
-    The following functions and macros have no generic implementation. Therefore they must be
-    implemented in each machine architecture specific header either as a conventional
-    function or as a functional macro.
-
-    __TBB_WORDSIZE
-        This is the size of machine word in bytes, i.e. for 32 bit systems it
-        should be defined to 4.
-
-    __TBB_Yield()
-        Signals OS that the current thread is willing to relinquish the remainder
-        of its time quantum.
-
-    __TBB_full_memory_fence()
-        Must prevent all memory operations from being reordered across it (both
-        by hardware and compiler). All such fences must be totally ordered (or
-        sequentially consistent).
-
-    __TBB_machine_cmpswp4( volatile void *ptr, int32_t value, int32_t comparand )
-        Must be provided if __TBB_USE_FENCED_ATOMICS is not set.
-
-    __TBB_machine_cmpswp8( volatile void *ptr, int32_t value, int64_t comparand )
-        Must be provided for 64-bit architectures if __TBB_USE_FENCED_ATOMICS is not set,
-        and for 32-bit architectures if __TBB_64BIT_ATOMICS is set
-
-    __TBB_machine_<op><S><fence>(...), where
-        <op> = {cmpswp, fetchadd, fetchstore}
-        <S> = {1, 2, 4, 8}
-        <fence> = {full_fence, acquire, release, relaxed}
-        Must be provided if __TBB_USE_FENCED_ATOMICS is set.
-
-    __TBB_control_consistency_helper()
-        Bridges the memory-semantics gap between architectures providing only
-        implicit C++0x "consume" semantics (like Power Architecture) and those
-        also implicitly obeying control dependencies (like IA-64 architecture).
-        It must be used only in conditional code where the condition is itself
-        data-dependent, and will then make subsequent code behave as if the
-        original data dependency were acquired.
-        It needs only a compiler fence where implied by the architecture
-        either specifically (like IA-64 architecture) or because generally stronger
-        "acquire" semantics are enforced (like x86).
-        It is always valid, though potentially suboptimal, to replace
-        control with acquire on the load and then remove the helper.
-
-    __TBB_acquire_consistency_helper(), __TBB_release_consistency_helper()
-        Must be provided if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE is set.
-        Enforce acquire and release semantics in generic implementations of fenced
-        store and load operations. Depending on the particular architecture/compiler
-        combination they may be a hardware fence, a compiler fence, both or nothing.
- **/
-
-#include "tbb_stddef.h"
-
-namespace tbb {
-namespace internal { //< @cond INTERNAL
-
-////////////////////////////////////////////////////////////////////////////////
-// Overridable helpers declarations
-//
-// A machine/*.h file may choose to define these templates, otherwise it must
-// request default implementation by setting appropriate __TBB_USE_GENERIC_XXX macro(s).
-//
-template <typename T, std::size_t S>
-struct machine_load_store;
-
-template <typename T, std::size_t S>
-struct machine_load_store_relaxed;
-
-template <typename T, std::size_t S>
-struct machine_load_store_seq_cst;
-//
-// End of overridable helpers declarations
-////////////////////////////////////////////////////////////////////////////////
-
-template<size_t S> struct atomic_selector;
-
-template<> struct atomic_selector<1> {
-    typedef int8_t word;
-    inline static word fetch_store ( volatile void* location, word value );
-};
-
-template<> struct atomic_selector<2> {
-    typedef int16_t word;
-    inline static word fetch_store ( volatile void* location, word value );
-};
-
-template<> struct atomic_selector<4> {
-#if _MSC_VER && !_WIN64
-    // Work-around that avoids spurious /Wp64 warnings
-    typedef intptr_t word;
-#else
-    typedef int32_t word;
-#endif
-    inline static word fetch_store ( volatile void* location, word value );
-};
-
-template<> struct atomic_selector<8> {
-    typedef int64_t word;
-    inline static word fetch_store ( volatile void* location, word value );
-};
-
-}} //< namespaces internal @endcond, tbb
-
-#define __TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(M)                                        \
-    inline void __TBB_machine_generic_store8##M(volatile void *ptr, int64_t value) {         \
-        for(;;) {                                                                            \
-            int64_t result = *(volatile int64_t *)ptr;                                       \
-            if( __TBB_machine_cmpswp8##M(ptr,value,result)==result ) break;                  \
-        }                                                                                    \
-    }                                                                                        \
-
-#define __TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(M)                                         \
-    inline int64_t __TBB_machine_generic_load8##M(const volatile void *ptr) {                \
-        /* Comparand and new value may be anything, they only must be equal, and      */     \
-        /* the value should have a low probability to be actually found in 'location'.*/     \
-        const int64_t anyvalue = 2305843009213693951LL;                                      \
-        return __TBB_machine_cmpswp8##M(const_cast<volatile void *>(ptr),anyvalue,anyvalue); \
-    }                                                                                        \
-
-// The set of allowed values for __TBB_ENDIANNESS (see above for details)
-#define __TBB_ENDIAN_UNSUPPORTED -1
-#define __TBB_ENDIAN_LITTLE       0
-#define __TBB_ENDIAN_BIG          1
-#define __TBB_ENDIAN_DETECT       2
-
-#if _WIN32||_WIN64
-
-#ifdef _MANAGED
-#pragma managed(push, off)
-#endif
-
-    #if __MINGW64__ || __MINGW32__
-        extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
-        #define __TBB_Yield()  SwitchToThread()
-        #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
-            #include "machine/gcc_generic.h"
-        #elif __MINGW64__
-            #include "machine/linux_intel64.h"
-        #elif __MINGW32__
-            #include "machine/linux_ia32.h"
-        #endif
-    #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
-        #include "machine/icc_generic.h"
-    #elif defined(_M_IX86) && !defined(__TBB_WIN32_USE_CL_BUILTINS)
-        #include "machine/windows_ia32.h"
-    #elif defined(_M_X64)
-        #include "machine/windows_intel64.h"
-    #elif defined(_M_ARM) || defined(__TBB_WIN32_USE_CL_BUILTINS)
-        #include "machine/msvc_armv7.h"
-    #endif
-
-#ifdef _MANAGED
-#pragma managed(pop)
-#endif
-
-#elif __TBB_DEFINE_MIC
-
-    #include "machine/mic_common.h"
-    #if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
-        #include "machine/icc_generic.h"
-    #else
-        #include "machine/linux_intel64.h"
-    #endif
-
-#elif __linux__ || __FreeBSD__ || __NetBSD__
-
-    #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
-        #include "machine/gcc_generic.h"
-    #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
-        #include "machine/icc_generic.h"
-    #elif __i386__
-        #include "machine/linux_ia32.h"
-    #elif __x86_64__
-        #include "machine/linux_intel64.h"
-    #elif __ia64__
-        #include "machine/linux_ia64.h"
-    #elif __powerpc__
-        #include "machine/mac_ppc.h"
-    #elif __ARM_ARCH_7A__
-        #include "machine/gcc_armv7.h"
-    #elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT
-        #include "machine/gcc_generic.h"
-    #endif
-    #include "machine/linux_common.h"
-
-#elif __APPLE__
-    //TODO:  TBB_USE_GCC_BUILTINS is not used for Mac, Sun, Aix
-    #if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
-        #include "machine/icc_generic.h"
-    #elif __TBB_x86_32
-        #include "machine/linux_ia32.h"
-    #elif __TBB_x86_64
-        #include "machine/linux_intel64.h"
-    #elif __POWERPC__
-        #include "machine/mac_ppc.h"
-    #endif
-    #include "machine/macos_common.h"
-
-#elif _AIX
-
-    #include "machine/ibm_aix51.h"
-
-#elif __sun || __SUNPRO_CC
-
-    #define __asm__ asm
-    #define __volatile__ volatile
-
-    #if __i386  || __i386__
-        #include "machine/linux_ia32.h"
-    #elif __x86_64__
-        #include "machine/linux_intel64.h"
-    #elif __sparc
-        #include "machine/sunos_sparc.h"
-    #endif
-    #include <sched.h>
-
-    #define __TBB_Yield() sched_yield()
-
-#endif /* OS selection */
-
-#ifndef __TBB_64BIT_ATOMICS
-    #define __TBB_64BIT_ATOMICS 1
-#endif
-
-//TODO: replace usage of these functions with usage of tbb::atomic, and then remove them
-//TODO: map functions with W suffix to use cast to tbb::atomic and according op, i.e. as_atomic().op()
-// Special atomic functions
-#if __TBB_USE_FENCED_ATOMICS
-    #define __TBB_machine_cmpswp1   __TBB_machine_cmpswp1full_fence
-    #define __TBB_machine_cmpswp2   __TBB_machine_cmpswp2full_fence
-    #define __TBB_machine_cmpswp4   __TBB_machine_cmpswp4full_fence
-    #define __TBB_machine_cmpswp8   __TBB_machine_cmpswp8full_fence
-
-    #if __TBB_WORDSIZE==8
-        #define __TBB_machine_fetchadd8             __TBB_machine_fetchadd8full_fence
-        #define __TBB_machine_fetchstore8           __TBB_machine_fetchstore8full_fence
-        #define __TBB_FetchAndAddWrelease(P,V)      __TBB_machine_fetchadd8release(P,V)
-        #define __TBB_FetchAndIncrementWacquire(P)  __TBB_machine_fetchadd8acquire(P,1)
-        #define __TBB_FetchAndDecrementWrelease(P)  __TBB_machine_fetchadd8release(P,(-1))
-    #else
-        #define __TBB_machine_fetchadd4             __TBB_machine_fetchadd4full_fence
-        #define __TBB_machine_fetchstore4           __TBB_machine_fetchstore4full_fence
-        #define __TBB_FetchAndAddWrelease(P,V)      __TBB_machine_fetchadd4release(P,V)
-        #define __TBB_FetchAndIncrementWacquire(P)  __TBB_machine_fetchadd4acquire(P,1)
-        #define __TBB_FetchAndDecrementWrelease(P)  __TBB_machine_fetchadd4release(P,(-1))
-    #endif /* __TBB_WORDSIZE==4 */
-#else /* !__TBB_USE_FENCED_ATOMICS */
-    #define __TBB_FetchAndAddWrelease(P,V)      __TBB_FetchAndAddW(P,V)
-    #define __TBB_FetchAndIncrementWacquire(P)  __TBB_FetchAndAddW(P,1)
-    #define __TBB_FetchAndDecrementWrelease(P)  __TBB_FetchAndAddW(P,(-1))
-#endif /* !__TBB_USE_FENCED_ATOMICS */
-
-#if __TBB_WORDSIZE==4
-    #define __TBB_CompareAndSwapW(P,V,C)    __TBB_machine_cmpswp4(P,V,C)
-    #define __TBB_FetchAndAddW(P,V)         __TBB_machine_fetchadd4(P,V)
-    #define __TBB_FetchAndStoreW(P,V)       __TBB_machine_fetchstore4(P,V)
-#elif  __TBB_WORDSIZE==8
-    #if __TBB_USE_GENERIC_DWORD_LOAD_STORE || __TBB_USE_GENERIC_DWORD_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_STORE
-        #error These macros should only be used on 32-bit platforms.
-    #endif
-
-    #define __TBB_CompareAndSwapW(P,V,C)    __TBB_machine_cmpswp8(P,V,C)
-    #define __TBB_FetchAndAddW(P,V)         __TBB_machine_fetchadd8(P,V)
-    #define __TBB_FetchAndStoreW(P,V)       __TBB_machine_fetchstore8(P,V)
-#else /* __TBB_WORDSIZE != 8 */
-    #error Unsupported machine word size.
-#endif /* __TBB_WORDSIZE */
-
-#ifndef __TBB_Pause
-    inline void __TBB_Pause(int32_t) {
-        __TBB_Yield();
-    }
-#endif
-
-namespace tbb {
-
-//! Sequentially consistent full memory fence.
-inline void atomic_fence () { __TBB_full_memory_fence(); }
-
-namespace internal { //< @cond INTERNAL
-
-//! Class that implements exponential backoff.
-/** See implementation of spin_wait_while_eq for an example. */
-class atomic_backoff : no_copy {
-    //! Time delay, in units of "pause" instructions.
-    /** Should be equal to approximately the number of "pause" instructions
-        that take the same time as an context switch. Must be a power of two.*/
-    static const int32_t LOOPS_BEFORE_YIELD = 16;
-    int32_t count;
-public:
-    // In many cases, an object of this type is initialized eagerly on hot path,
-    // as in for(atomic_backoff b; ; b.pause()) { /*loop body*/ }
-    // For this reason, the construction cost must be very small!
-    atomic_backoff() : count(1) {}
-    // This constructor pauses immediately; do not use on hot paths!
-    atomic_backoff( bool ) : count(1) { pause(); }
-
-    //! Pause for a while.
-    void pause() {
-        if( count<=LOOPS_BEFORE_YIELD ) {
-            __TBB_Pause(count);
-            // Pause twice as long the next time.
-            count*=2;
-        } else {
-            // Pause is so long that we might as well yield CPU to scheduler.
-            __TBB_Yield();
-        }
-    }
-
-    //! Pause for a few times and return false if saturated.
-    bool bounded_pause() {
-        __TBB_Pause(count);
-        if( count<LOOPS_BEFORE_YIELD ) {
-            // Pause twice as long the next time.
-            count*=2;
-            return true;
-        } else {
-            return false;
-        }
-    }
-
-    void reset() {
-        count = 1;
-    }
-};
-
-//! Spin WHILE the value of the variable is equal to a given value
-/** T and U should be comparable types. */
-template<typename T, typename U>
-void spin_wait_while_eq( const volatile T& location, U value ) {
-    atomic_backoff backoff;
-    while( location==value ) backoff.pause();
-}
-
-//! Spin UNTIL the value of the variable is equal to a given value
-/** T and U should be comparable types. */
-template<typename T, typename U>
-void spin_wait_until_eq( const volatile T& location, const U value ) {
-    atomic_backoff backoff;
-    while( location!=value ) backoff.pause();
-}
-
-template <typename predicate_type>
-void spin_wait_while(predicate_type condition){
-    atomic_backoff backoff;
-    while( condition() ) backoff.pause();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Generic compare-and-swap applied to only a part of a machine word.
-//
-#ifndef __TBB_ENDIANNESS
-#define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
-#endif
-
-#if __TBB_USE_GENERIC_PART_WORD_CAS && __TBB_ENDIANNESS==__TBB_ENDIAN_UNSUPPORTED
-#error Generic implementation of part-word CAS may not be used with __TBB_ENDIAN_UNSUPPORTED
-#endif
-
-#if __TBB_ENDIANNESS!=__TBB_ENDIAN_UNSUPPORTED
-//
-// This function is the only use of __TBB_ENDIANNESS.
-// The following restrictions/limitations apply for this operation:
-//  - T must be an integer type of at most 4 bytes for the casts and calculations to work
-//  - T must also be less than 4 bytes to avoid compiler warnings when computing mask
-//      (and for the operation to be useful at all, so no workaround is applied)
-//  - the architecture must consistently use either little-endian or big-endian (same for all locations)
-//
-// TODO: static_assert for the type requirements stated above
-template<typename T>
-inline T __TBB_MaskedCompareAndSwap (volatile T * const ptr, const T value, const T comparand ) {
-    struct endianness{ static bool is_big_endian(){
-        #if __TBB_ENDIANNESS==__TBB_ENDIAN_DETECT
-            const uint32_t probe = 0x03020100;
-            return (((const char*)(&probe))[0]==0x03);
-        #elif __TBB_ENDIANNESS==__TBB_ENDIAN_BIG || __TBB_ENDIANNESS==__TBB_ENDIAN_LITTLE
-            return __TBB_ENDIANNESS==__TBB_ENDIAN_BIG;
-        #else
-            #error Unexpected value of __TBB_ENDIANNESS
-        #endif
-    }};
-
-    const uint32_t byte_offset            = (uint32_t) ((uintptr_t)ptr & 0x3);
-    volatile uint32_t * const aligned_ptr = (uint32_t*)((uintptr_t)ptr - byte_offset );
-
-    // location of T within uint32_t for a C++ shift operation
-    const uint32_t bits_to_shift     = 8*(endianness::is_big_endian() ? (4 - sizeof(T) - (byte_offset)) : byte_offset);
-    const uint32_t mask              = (((uint32_t)1<<(sizeof(T)*8)) - 1 )<<bits_to_shift;
-    // for signed T, any sign extension bits in cast value/comparand are immediately clipped by mask
-    const uint32_t shifted_comparand = ((uint32_t)comparand << bits_to_shift)&mask;
-    const uint32_t shifted_value     = ((uint32_t)value     << bits_to_shift)&mask;
-
-    for( atomic_backoff b;;b.pause() ) {
-        const uint32_t surroundings  = *aligned_ptr & ~mask ; // may have changed during the pause
-        const uint32_t big_comparand = surroundings | shifted_comparand ;
-        const uint32_t big_value     = surroundings | shifted_value     ;
-        // __TBB_machine_cmpswp4 presumed to have full fence.
-        // Cast shuts up /Wp64 warning
-        const uint32_t big_result = (uint32_t)__TBB_machine_cmpswp4( aligned_ptr, big_value, big_comparand );
-        if( big_result == big_comparand                    // CAS succeeded
-          || ((big_result ^ big_comparand) & mask) != 0)   // CAS failed and the bits of interest have changed
-        {
-            return T((big_result & mask) >> bits_to_shift);
-        }
-        else continue;                                     // CAS failed but the bits of interest were not changed
-    }
-}
-#endif // __TBB_ENDIANNESS!=__TBB_ENDIAN_UNSUPPORTED
-////////////////////////////////////////////////////////////////////////////////
-
-template<size_t S, typename T>
-inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand );
-
-template<>
-inline int8_t __TBB_CompareAndSwapGeneric <1,int8_t> (volatile void *ptr, int8_t value, int8_t comparand ) {
-#if __TBB_USE_GENERIC_PART_WORD_CAS
-    return __TBB_MaskedCompareAndSwap<int8_t>((volatile int8_t *)ptr,value,comparand);
-#else
-    return __TBB_machine_cmpswp1(ptr,value,comparand);
-#endif
-}
-
-template<>
-inline int16_t __TBB_CompareAndSwapGeneric <2,int16_t> (volatile void *ptr, int16_t value, int16_t comparand ) {
-#if __TBB_USE_GENERIC_PART_WORD_CAS
-    return __TBB_MaskedCompareAndSwap<int16_t>((volatile int16_t *)ptr,value,comparand);
-#else
-    return __TBB_machine_cmpswp2(ptr,value,comparand);
-#endif
-}
-
-template<>
-inline int32_t __TBB_CompareAndSwapGeneric <4,int32_t> (volatile void *ptr, int32_t value, int32_t comparand ) {
-    // Cast shuts up /Wp64 warning
-    return (int32_t)__TBB_machine_cmpswp4(ptr,value,comparand);
-}
-
-#if __TBB_64BIT_ATOMICS
-template<>
-inline int64_t __TBB_CompareAndSwapGeneric <8,int64_t> (volatile void *ptr, int64_t value, int64_t comparand ) {
-    return __TBB_machine_cmpswp8(ptr,value,comparand);
-}
-#endif
-
-template<size_t S, typename T>
-inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
-    T result;
-    for( atomic_backoff b;;b.pause() ) {
-        result = *reinterpret_cast<volatile T *>(ptr);
-        // __TBB_CompareAndSwapGeneric presumed to have full fence.
-        if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result )
-            break;
-    }
-    return result;
-}
-
-template<size_t S, typename T>
-inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
-    T result;
-    for( atomic_backoff b;;b.pause() ) {
-        result = *reinterpret_cast<volatile T *>(ptr);
-        // __TBB_CompareAndSwapGeneric presumed to have full fence.
-        if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result )
-            break;
-    }
-    return result;
-}
-
-#if __TBB_USE_GENERIC_PART_WORD_CAS
-#define __TBB_machine_cmpswp1 tbb::internal::__TBB_CompareAndSwapGeneric<1,int8_t>
-#define __TBB_machine_cmpswp2 tbb::internal::__TBB_CompareAndSwapGeneric<2,int16_t>
-#endif
-
-#if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_PART_WORD_FETCH_ADD
-#define __TBB_machine_fetchadd1 tbb::internal::__TBB_FetchAndAddGeneric<1,int8_t>
-#define __TBB_machine_fetchadd2 tbb::internal::__TBB_FetchAndAddGeneric<2,int16_t>
-#endif
-
-#if __TBB_USE_GENERIC_FETCH_ADD
-#define __TBB_machine_fetchadd4 tbb::internal::__TBB_FetchAndAddGeneric<4,int32_t>
-#endif
-
-#if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_ADD
-#define __TBB_machine_fetchadd8 tbb::internal::__TBB_FetchAndAddGeneric<8,int64_t>
-#endif
-
-#if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_PART_WORD_FETCH_STORE
-#define __TBB_machine_fetchstore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,int8_t>
-#define __TBB_machine_fetchstore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,int16_t>
-#endif
-
-#if __TBB_USE_GENERIC_FETCH_STORE
-#define __TBB_machine_fetchstore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,int32_t>
-#endif
-
-#if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_DWORD_FETCH_STORE
-#define __TBB_machine_fetchstore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,int64_t>
-#endif
-
-#if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
-#define __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(S)                                             \
-    atomic_selector<S>::word atomic_selector<S>::fetch_store ( volatile void* location, word value ) {  \
-        return __TBB_machine_fetchstore##S( location, value );                                          \
-    }
-
-__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(1)
-__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(2)
-__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(4)
-__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(8)
-
-#undef __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE
-#endif /* __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
-
-#if __TBB_USE_GENERIC_DWORD_LOAD_STORE
-/*TODO: find a more elegant way to handle function names difference*/
-#if ! __TBB_USE_FENCED_ATOMICS
-    /* This name forwarding is needed for generic implementation of
-     * load8/store8 defined below (via macro) to pick the right CAS function*/
-    #define   __TBB_machine_cmpswp8full_fence __TBB_machine_cmpswp8
-#endif
-__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(full_fence)
-__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(full_fence)
-
-#if ! __TBB_USE_FENCED_ATOMICS
-    #undef   __TBB_machine_cmpswp8full_fence
-#endif
-
-#define __TBB_machine_store8 tbb::internal::__TBB_machine_generic_store8full_fence
-#define __TBB_machine_load8  tbb::internal::__TBB_machine_generic_load8full_fence
-#endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */
-
-#if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
-/** Fenced operations use volatile qualifier to prevent compiler from optimizing
-    them out, and on architectures with weak memory ordering to induce compiler
-    to generate code with appropriate acquire/release semantics.
-    On architectures like IA32, Intel64 (and likely Sparc TSO) volatile has
-    no effect on code gen, and consistency helpers serve as a compiler fence (the
-    latter being true for IA64/gcc as well to fix a bug in some gcc versions).
-    This code assumes that the generated instructions will operate atomically,
-    which typically requires a type that can be moved in a single instruction,
-    cooperation from the compiler for effective use of such an instruction,
-    and appropriate alignment of the data. **/
-template <typename T, size_t S>
-struct machine_load_store {
-    static T load_with_acquire ( const volatile T& location ) {
-        T to_return = location;
-        __TBB_acquire_consistency_helper();
-        return to_return;
-    }
-    static void store_with_release ( volatile T &location, T value ) {
-        __TBB_release_consistency_helper();
-        location = value;
-    }
-};
-
-//in general, plain load and store of 32bit compiler is not atomic for 64bit types
-#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
-template <typename T>
-struct machine_load_store<T,8> {
-    static T load_with_acquire ( const volatile T& location ) {
-        return (T)__TBB_machine_load8( (const volatile void*)&location );
-    }
-    static void store_with_release ( volatile T& location, T value ) {
-        __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
-    }
-};
-#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
-#endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */
-
-#if __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE
-template <typename T, size_t S>
-struct machine_load_store_seq_cst {
-    static T load ( const volatile T& location ) {
-        __TBB_full_memory_fence();
-        return machine_load_store<T,S>::load_with_acquire( location );
-    }
-#if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
-    static void store ( volatile T &location, T value ) {
-        atomic_selector<S>::fetch_store( (volatile void*)&location, (typename atomic_selector<S>::word)value );
-    }
-#else /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
-    static void store ( volatile T &location, T value ) {
-        machine_load_store<T,S>::store_with_release( location, value );
-        __TBB_full_memory_fence();
-    }
-#endif /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
-};
-
-#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
-/** The implementation does not use functions __TBB_machine_load8/store8 as they
-    are not required to be sequentially consistent. **/
-template <typename T>
-struct machine_load_store_seq_cst<T,8> {
-    static T load ( const volatile T& location ) {
-        // Comparand and new value may be anything, they only must be equal, and
-        // the value should have a low probability to be actually found in 'location'.
-        const int64_t anyvalue = 2305843009213693951LL;
-        return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T*>(&location), anyvalue, anyvalue );
-    }
-    static void store ( volatile T &location, T value ) {
-        int64_t result = (volatile int64_t&)location;
-        while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)value, result) != result )
-            result = (volatile int64_t&)location;
-    }
-};
-#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
-#endif /*__TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE */
-
-#if __TBB_USE_GENERIC_RELAXED_LOAD_STORE
-// Relaxed operations add volatile qualifier to prevent compiler from optimizing them out.
-/** Volatile should not incur any additional cost on IA32, Intel64, and Sparc TSO
-    architectures. However on architectures with weak memory ordering compiler may
-    generate code with acquire/release semantics for operations on volatile data. **/
-template <typename T, size_t S>
-struct machine_load_store_relaxed {
-    static inline T load ( const volatile T& location ) {
-        return location;
-    }
-    static inline void store ( volatile T& location, T value ) {
-        location = value;
-    }
-};
-
-#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
-template <typename T>
-struct machine_load_store_relaxed<T,8> {
-    static inline T load ( const volatile T& location ) {
-        return (T)__TBB_machine_load8( (const volatile void*)&location );
-    }
-    static inline void store ( volatile T& location, T value ) {
-        __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
-    }
-};
-#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
-#endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */
-
-#undef __TBB_WORDSIZE //this macro is forbidden to use outside of atomic machinery
-
-template<typename T>
-inline T __TBB_load_with_acquire(const volatile T &location) {
-    return machine_load_store<T,sizeof(T)>::load_with_acquire( location );
-}
-template<typename T, typename V>
-inline void __TBB_store_with_release(volatile T& location, V value) {
-    machine_load_store<T,sizeof(T)>::store_with_release( location, T(value) );
-}
-//! Overload that exists solely to avoid /Wp64 warnings.
-inline void __TBB_store_with_release(volatile size_t& location, size_t value) {
-    machine_load_store<size_t,sizeof(size_t)>::store_with_release( location, value );
-}
-
-template<typename T>
-inline T __TBB_load_full_fence(const volatile T &location) {
-    return machine_load_store_seq_cst<T,sizeof(T)>::load( location );
-}
-template<typename T, typename V>
-inline void __TBB_store_full_fence(volatile T& location, V value) {
-    machine_load_store_seq_cst<T,sizeof(T)>::store( location, T(value) );
-}
-//! Overload that exists solely to avoid /Wp64 warnings.
-inline void __TBB_store_full_fence(volatile size_t& location, size_t value) {
-    machine_load_store_seq_cst<size_t,sizeof(size_t)>::store( location, value );
-}
-
-template<typename T>
-inline T __TBB_load_relaxed (const volatile T& location) {
-    return machine_load_store_relaxed<T,sizeof(T)>::load( const_cast<T&>(location) );
-}
-template<typename T, typename V>
-inline void __TBB_store_relaxed ( volatile T& location, V value ) {
-    machine_load_store_relaxed<T,sizeof(T)>::store( const_cast<T&>(location), T(value) );
-}
-//! Overload that exists solely to avoid /Wp64 warnings.
-inline void __TBB_store_relaxed ( volatile size_t& location, size_t value ) {
-    machine_load_store_relaxed<size_t,sizeof(size_t)>::store( const_cast<size_t&>(location), value );
-}
-
-// Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as
-// strict as type T.  The type should have a trivial default constructor and destructor, so that
-// arrays of that type can be declared without initializers.
-// It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
-// to a type bigger than T.
-// The default definition here works on machines where integers are naturally aligned and the
-// strictest alignment is 64.
-#ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
-
-#if __TBB_ALIGNAS_PRESENT
-
-// Use C++11 keywords alignas and alignof
-#define __TBB_DefineTypeWithAlignment(PowerOf2)       \
-struct alignas(PowerOf2) __TBB_machine_type_with_alignment_##PowerOf2 { \
-    uint32_t member[PowerOf2/sizeof(uint32_t)];       \
-};
-#define __TBB_alignof(T) alignof(T)
-
-#elif __TBB_ATTRIBUTE_ALIGNED_PRESENT
-
-#define __TBB_DefineTypeWithAlignment(PowerOf2)       \
-struct __TBB_machine_type_with_alignment_##PowerOf2 { \
-    uint32_t member[PowerOf2/sizeof(uint32_t)];       \
-} __attribute__((aligned(PowerOf2)));
-#define __TBB_alignof(T) __alignof__(T)
-
-#elif __TBB_DECLSPEC_ALIGN_PRESENT
-
-#define __TBB_DefineTypeWithAlignment(PowerOf2)       \
-__declspec(align(PowerOf2))                           \
-struct __TBB_machine_type_with_alignment_##PowerOf2 { \
-    uint32_t member[PowerOf2/sizeof(uint32_t)];       \
-};
-#define __TBB_alignof(T) __alignof(T)
-
-#else /* A compiler with unknown syntax for data alignment */
-#error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T)
-#endif
-
-/* Now declare types aligned to useful powers of two */
-// TODO: Is __TBB_DefineTypeWithAlignment(8) needed on 32 bit platforms?
-__TBB_DefineTypeWithAlignment(16)
-__TBB_DefineTypeWithAlignment(32)
-__TBB_DefineTypeWithAlignment(64)
-
-typedef __TBB_machine_type_with_alignment_64 __TBB_machine_type_with_strictest_alignment;
-
-// Primary template is a declaration of incomplete type so that it fails with unknown alignments
-template<size_t N> struct type_with_alignment;
-
-// Specializations for allowed alignments
-template<> struct type_with_alignment<1> { char member; };
-template<> struct type_with_alignment<2> { uint16_t member; };
-template<> struct type_with_alignment<4> { uint32_t member; };
-template<> struct type_with_alignment<8> { uint64_t member; };
-template<> struct type_with_alignment<16> {__TBB_machine_type_with_alignment_16 member; };
-template<> struct type_with_alignment<32> {__TBB_machine_type_with_alignment_32 member; };
-template<> struct type_with_alignment<64> {__TBB_machine_type_with_alignment_64 member; };
-
-#if __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN
-//! Work around for bug in GNU 3.2 and MSVC compilers.
-/** Bug is that compiler sometimes returns 0 for __alignof(T) when T has not yet been instantiated.
-    The work-around forces instantiation by forcing computation of sizeof(T) before __alignof(T). */
-template<size_t Size, typename T>
-struct work_around_alignment_bug {
-    static const size_t alignment = __TBB_alignof(T);
-};
-#define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
-#else
-#define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__TBB_alignof(T)>
-#endif  /* __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN */
-
-#endif  /* __TBB_TypeWithAlignmentAtLeastAsStrict */
-
-// Template class here is to avoid instantiation of the static data for modules that don't use it
-template<typename T>
-struct reverse {
-    static const T byte_table[256];
-};
-// An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
-// values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
-template<typename T>
-const T reverse<T>::byte_table[256] = {
-    0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
-    0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
-    0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
-    0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
-    0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
-    0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
-    0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
-    0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
-    0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
-    0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
-    0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
-    0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
-    0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
-    0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
-    0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
-    0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
-};
-
-} // namespace internal @endcond
-} // namespace tbb
-
-// Preserving access to legacy APIs
-using tbb::internal::__TBB_load_with_acquire;
-using tbb::internal::__TBB_store_with_release;
-
-// Mapping historically used names to the ones expected by atomic_load_store_traits
-#define __TBB_load_acquire  __TBB_load_with_acquire
-#define __TBB_store_release __TBB_store_with_release
-
-#ifndef __TBB_Log2
-inline intptr_t __TBB_Log2( uintptr_t x ) {
-    if( x==0 ) return -1;
-    intptr_t result = 0;
-
-#if !defined(_M_ARM)
-    uintptr_t tmp_;
-    if( sizeof(x)>4 && (tmp_ = ((uint64_t)x)>>32) ) { x=tmp_; result += 32; }
-#endif
-    if( uintptr_t tmp = x>>16 ) { x=tmp; result += 16; }
-    if( uintptr_t tmp = x>>8 )  { x=tmp; result += 8; }
-    if( uintptr_t tmp = x>>4 )  { x=tmp; result += 4; }
-    if( uintptr_t tmp = x>>2 )  { x=tmp; result += 2; }
-
-    return (x&2)? result+1: result;
-}
-#endif
-
-#ifndef __TBB_AtomicOR
-inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
-    for( tbb::internal::atomic_backoff b;;b.pause() ) {
-        uintptr_t tmp = *(volatile uintptr_t *)operand;
-        uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
-        if( result==tmp ) break;
-    }
-}
-#endif
-
-#ifndef __TBB_AtomicAND
-inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
-    for( tbb::internal::atomic_backoff b;;b.pause() ) {
-        uintptr_t tmp = *(volatile uintptr_t *)operand;
-        uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
-        if( result==tmp ) break;
-    }
-}
-#endif
-
-#if __TBB_PREFETCHING
-#ifndef __TBB_cl_prefetch
-#error This platform does not define cache management primitives required for __TBB_PREFETCHING
-#endif
-
-#ifndef __TBB_cl_evict
-#define __TBB_cl_evict(p)
-#endif
-#endif
-
-#ifndef __TBB_Flag
-typedef unsigned char __TBB_Flag;
-#endif
-typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag;
-
-#ifndef __TBB_TryLockByte
-inline bool __TBB_TryLockByte( __TBB_atomic_flag &flag ) {
-    return __TBB_machine_cmpswp1(&flag,1,0)==0;
-}
-#endif
-
-#ifndef __TBB_LockByte
-inline __TBB_Flag __TBB_LockByte( __TBB_atomic_flag& flag ) {
-    tbb::internal::atomic_backoff backoff;
-    while( !__TBB_TryLockByte(flag) ) backoff.pause();
-    return 0;
-}
-#endif
-
-#ifndef  __TBB_UnlockByte
-#define __TBB_UnlockByte(addr) __TBB_store_with_release((addr),0)
-#endif
-
-// lock primitives with Intel(R) Transactional Synchronization Extensions (Intel(R) TSX)
-#if ( __TBB_x86_32 || __TBB_x86_64 )  /* only on ia32/intel64 */
-inline void __TBB_TryLockByteElidedCancel() { __TBB_machine_try_lock_elided_cancel(); }
-
-inline bool __TBB_TryLockByteElided( __TBB_atomic_flag& flag ) {
-    bool res = __TBB_machine_try_lock_elided( &flag )!=0;
-    // to avoid the "lemming" effect, we need to abort the transaction
-    // if  __TBB_machine_try_lock_elided returns false (i.e., someone else
-    // has acquired the mutex non-speculatively).
-    if( !res ) __TBB_TryLockByteElidedCancel();
-    return res;
-}
-
-inline void __TBB_LockByteElided( __TBB_atomic_flag& flag )
-{
-    for(;;) {
-        tbb::internal::spin_wait_while_eq( flag, 1 );
-        if( __TBB_machine_try_lock_elided( &flag ) )
-            return;
-        // Another thread acquired the lock "for real".
-        // To avoid the "lemming" effect, we abort the transaction.
-        __TBB_TryLockByteElidedCancel();
-    }
-}
-
-inline void __TBB_UnlockByteElided( __TBB_atomic_flag& flag ) {
-    __TBB_machine_unlock_elided( &flag );
-}
-#endif
-
-#ifndef __TBB_ReverseByte
-inline unsigned char __TBB_ReverseByte(unsigned char src) {
-    return tbb::internal::reverse<unsigned char>::byte_table[src];
-}
-#endif
-
-template<typename T>
-T __TBB_ReverseBits(T src) {
-    T dst;
-    unsigned char *original = (unsigned char *) &src;
-    unsigned char *reversed = (unsigned char *) &dst;
-
-    for( int i = sizeof(T)-1; i >= 0; i-- )
-        reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
-
-    return dst;
-}
-
-#endif /* __TBB_machine_H */
diff --git a/lib/3rdParty/tbb/include/tbb/tbb_profiling.h b/lib/3rdParty/tbb/include/tbb/tbb_profiling.h
deleted file mode 100644
index ffaf98b9..00000000
--- a/lib/3rdParty/tbb/include/tbb/tbb_profiling.h
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_profiling_H
-#define __TBB_profiling_H
-
-namespace tbb {
-    namespace internal {
-
-        //
-        // This is not under __TBB_ITT_STRUCTURE_API because these values are used directly in flow_graph.h.
-        //
-
-        // include list of index names
-        #define TBB_STRING_RESOURCE(index_name,str) index_name,
-        enum string_index {
-           #include "internal/_tbb_strings.h"
-           NUM_STRINGS
-        };
-        #undef TBB_STRING_RESOURCE
-
-        enum itt_relation
-        {
-        __itt_relation_is_unknown = 0,
-        __itt_relation_is_dependent_on,         /**< "A is dependent on B" means that A cannot start until B completes */
-        __itt_relation_is_sibling_of,           /**< "A is sibling of B" means that A and B were created as a group */
-        __itt_relation_is_parent_of,            /**< "A is parent of B" means that A created B */
-        __itt_relation_is_continuation_of,      /**< "A is continuation of B" means that A assumes the dependencies of B */
-        __itt_relation_is_child_of,             /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */
-        __itt_relation_is_continued_by,         /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */
-        __itt_relation_is_predecessor_to        /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */
-        };
-
-    }
-}
-
-// Check if the tools support is enabled
-#if (_WIN32||_WIN64||__linux__) && !__MINGW32__ && TBB_USE_THREADING_TOOLS
-
-#if _WIN32||_WIN64
-#include <stdlib.h>  /* mbstowcs_s */
-#endif
-#include "tbb_stddef.h"
-
-namespace tbb {
-    namespace internal {
-
-#if _WIN32||_WIN64
-        void __TBB_EXPORTED_FUNC itt_set_sync_name_v3( void *obj, const wchar_t* name );
-        inline size_t multibyte_to_widechar( wchar_t* wcs, const char* mbs, size_t bufsize) {
-#if _MSC_VER>=1400
-            size_t len;
-            mbstowcs_s( &len, wcs, bufsize, mbs, _TRUNCATE );
-            return len;   // mbstowcs_s counts null terminator
-#else
-            size_t len = mbstowcs( wcs, mbs, bufsize );
-            if(wcs && len!=size_t(-1) )
-                wcs[len<bufsize-1? len: bufsize-1] = wchar_t('\0');
-            return len+1; // mbstowcs does not count null terminator
-#endif
-        }
-#else
-        void __TBB_EXPORTED_FUNC itt_set_sync_name_v3( void *obj, const char* name );
-#endif
-    } // namespace internal
-} // namespace tbb
-
-//! Macro __TBB_DEFINE_PROFILING_SET_NAME(T) defines "set_name" methods for sync objects of type T
-/** Should be used in the "tbb" namespace only.
-    Don't place semicolon after it to avoid compiler warnings. **/
-#if _WIN32||_WIN64
-    #define __TBB_DEFINE_PROFILING_SET_NAME(sync_object_type)                       \
-        namespace profiling {                                                       \
-            inline void set_name( sync_object_type& obj, const wchar_t* name ) {    \
-                tbb::internal::itt_set_sync_name_v3( &obj, name );                  \
-            }                                                                       \
-            inline void set_name( sync_object_type& obj, const char* name ) {       \
-                size_t len = tbb::internal::multibyte_to_widechar(NULL, name, 0);   \
-                wchar_t *wname = new wchar_t[len];                                  \
-                tbb::internal::multibyte_to_widechar(wname, name, len);             \
-                set_name( obj, wname );                                             \
-                delete[] wname;                                                     \
-            }                                                                       \
-        }
-#else /* !WIN */
-    #define __TBB_DEFINE_PROFILING_SET_NAME(sync_object_type)                       \
-        namespace profiling {                                                       \
-            inline void set_name( sync_object_type& obj, const char* name ) {       \
-                tbb::internal::itt_set_sync_name_v3( &obj, name );                  \
-            }                                                                       \
-        }
-#endif /* !WIN */
-
-#else /* no tools support */
-
-#if _WIN32||_WIN64
-    #define __TBB_DEFINE_PROFILING_SET_NAME(sync_object_type)               \
-        namespace profiling {                                               \
-            inline void set_name( sync_object_type&, const wchar_t* ) {}    \
-            inline void set_name( sync_object_type&, const char* ) {}       \
-        }
-#else /* !WIN */
-    #define __TBB_DEFINE_PROFILING_SET_NAME(sync_object_type)               \
-        namespace profiling {                                               \
-            inline void set_name( sync_object_type&, const char* ) {}       \
-        }
-#endif /* !WIN */
-
-#endif /* no tools support */
-
-#include "atomic.h"
-// Need these to work regardless of tools support
-namespace tbb {
-    namespace internal {
-
-        enum notify_type {prepare=0, cancel, acquired, releasing};
-
-        const uintptr_t NUM_NOTIFY_TYPES = 4; // set to # elements in enum above
-
-        void __TBB_EXPORTED_FUNC call_itt_notify_v5(int t, void *ptr);
-        void __TBB_EXPORTED_FUNC itt_store_pointer_with_release_v3(void *dst, void *src);
-        void* __TBB_EXPORTED_FUNC itt_load_pointer_with_acquire_v3(const void *src);
-        void* __TBB_EXPORTED_FUNC itt_load_pointer_v3( const void* src );
-#if __TBB_ITT_STRUCTURE_API
-        enum itt_domain_enum { ITT_DOMAIN_FLOW=0 };
-
-        void __TBB_EXPORTED_FUNC itt_make_task_group_v7( itt_domain_enum domain, void *group, unsigned long long group_extra,
-                                                         void *parent, unsigned long long parent_extra, string_index name_index );
-        void __TBB_EXPORTED_FUNC itt_metadata_str_add_v7( itt_domain_enum domain, void *addr, unsigned long long addr_extra,
-                                                          string_index key, const char *value );
-        void __TBB_EXPORTED_FUNC itt_relation_add_v7( itt_domain_enum domain, void *addr0, unsigned long long addr0_extra,
-                                                      itt_relation relation, void *addr1, unsigned long long addr1_extra );
-        void __TBB_EXPORTED_FUNC itt_task_begin_v7( itt_domain_enum domain, void *task, unsigned long long task_extra,
-                                                    void *parent, unsigned long long parent_extra, string_index name_index );
-        void __TBB_EXPORTED_FUNC itt_task_end_v7( itt_domain_enum domain );
-
-        void __TBB_EXPORTED_FUNC itt_region_begin_v9( itt_domain_enum domain, void *region, unsigned long long region_extra,
-                                                      void *parent, unsigned long long parent_extra, string_index name_index );
-        void __TBB_EXPORTED_FUNC itt_region_end_v9( itt_domain_enum domain, void *region, unsigned long long region_extra );
-#endif // __TBB_ITT_STRUCTURE_API
-
-        // two template arguments are to workaround /Wp64 warning with tbb::atomic specialized for unsigned type
-        template <typename T, typename U>
-        inline void itt_store_word_with_release(tbb::atomic<T>& dst, U src) {
-#if TBB_USE_THREADING_TOOLS
-            // This assertion should be replaced with static_assert
-            __TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized.");
-            itt_store_pointer_with_release_v3(&dst, (void *)uintptr_t(src));
-#else
-            dst = src;
-#endif // TBB_USE_THREADING_TOOLS
-        }
-
-        template <typename T>
-        inline T itt_load_word_with_acquire(const tbb::atomic<T>& src) {
-#if TBB_USE_THREADING_TOOLS
-            // This assertion should be replaced with static_assert
-            __TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized.");
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-            // Workaround for overzealous compiler warnings
-            #pragma warning (push)
-            #pragma warning (disable: 4311)
-#endif
-            T result = (T)itt_load_pointer_with_acquire_v3(&src);
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-            #pragma warning (pop)
-#endif
-            return result;
-#else
-            return src;
-#endif // TBB_USE_THREADING_TOOLS
-        }
-
-        template <typename T>
-        inline void itt_store_word_with_release(T& dst, T src) {
-#if TBB_USE_THREADING_TOOLS
-            // This assertion should be replaced with static_assert
-            __TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized.");
-            itt_store_pointer_with_release_v3(&dst, (void *)src);
-#else
-            __TBB_store_with_release(dst, src);
-#endif // TBB_USE_THREADING_TOOLS
-        }
-
-        template <typename T>
-        inline T itt_load_word_with_acquire(const T& src) {
-#if TBB_USE_THREADING_TOOLS
-            // This assertion should be replaced with static_assert
-            __TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized");
-            return (T)itt_load_pointer_with_acquire_v3(&src);
-#else
-            return __TBB_load_with_acquire(src);
-#endif // TBB_USE_THREADING_TOOLS
-        }
-
-        template <typename T>
-        inline void itt_hide_store_word(T& dst, T src) {
-#if TBB_USE_THREADING_TOOLS
-            //TODO: This assertion should be replaced with static_assert
-            __TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized");
-            itt_store_pointer_with_release_v3(&dst, (void *)src);
-#else
-            dst = src;
-#endif
-        }
-
-        //TODO: rename to itt_hide_load_word_relaxed
-        template <typename T>
-        inline T itt_hide_load_word(const T& src) {
-#if TBB_USE_THREADING_TOOLS
-            //TODO: This assertion should be replaced with static_assert
-            __TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized.");
-            return (T)itt_load_pointer_v3(&src);
-#else
-            return src;
-#endif
-        }
-
-#if TBB_USE_THREADING_TOOLS
-        inline void call_itt_notify(notify_type t, void *ptr) {
-            call_itt_notify_v5((int)t, ptr);
-        }
-
-#else
-        inline void call_itt_notify(notify_type /*t*/, void * /*ptr*/) {}
-
-#endif // TBB_USE_THREADING_TOOLS
-
-#if __TBB_ITT_STRUCTURE_API
-        inline void itt_make_task_group( itt_domain_enum domain, void *group, unsigned long long group_extra,
-                                         void *parent, unsigned long long parent_extra, string_index name_index ) {
-            itt_make_task_group_v7( domain, group, group_extra, parent, parent_extra, name_index );
-        }
-
-        inline void itt_metadata_str_add( itt_domain_enum domain, void *addr, unsigned long long addr_extra,
-                                          string_index key, const char *value ) {
-            itt_metadata_str_add_v7( domain, addr, addr_extra, key, value );
-        }
-
-        inline void itt_relation_add( itt_domain_enum domain, void *addr0, unsigned long long addr0_extra,
-                                      itt_relation relation, void *addr1, unsigned long long addr1_extra ) {
-            itt_relation_add_v7( domain, addr0, addr0_extra, relation, addr1, addr1_extra );
-        }
-
-        inline void itt_task_begin( itt_domain_enum domain, void *task, unsigned long long task_extra,
-                                                        void *parent, unsigned long long parent_extra, string_index name_index ) {
-            itt_task_begin_v7( domain, task, task_extra, parent, parent_extra, name_index );
-        }
-
-        inline void itt_task_end( itt_domain_enum domain ) {
-            itt_task_end_v7( domain );
-        }
-
-        inline void itt_region_begin( itt_domain_enum domain, void *region, unsigned long long region_extra,
-                                      void *parent, unsigned long long parent_extra, string_index name_index ) {
-            itt_region_begin_v9( domain, region, region_extra, parent, parent_extra, name_index );
-        }
-
-        inline void itt_region_end( itt_domain_enum domain, void *region, unsigned long long region_extra  ) {
-            itt_region_end_v9( domain, region, region_extra );
-        }
-#endif // __TBB_ITT_STRUCTURE_API
-
-    } // namespace internal
-} // namespace tbb
-
-#endif /* __TBB_profiling_H */
diff --git a/lib/3rdParty/tbb/include/tbb/tbb_stddef.h b/lib/3rdParty/tbb/include/tbb/tbb_stddef.h
deleted file mode 100644
index 236f3d83..00000000
--- a/lib/3rdParty/tbb/include/tbb/tbb_stddef.h
+++ /dev/null
@@ -1,542 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_tbb_stddef_H
-#define __TBB_tbb_stddef_H
-
-// Marketing-driven product version
-#define TBB_VERSION_MAJOR 2017
-#define TBB_VERSION_MINOR 0
-
-// Engineering-focused interface version
-#define TBB_INTERFACE_VERSION 9107
-#define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000
-
-// The oldest major interface version still supported
-// To be used in SONAME, manifests, etc.
-#define TBB_COMPATIBLE_INTERFACE_VERSION 2
-
-#define __TBB_STRING_AUX(x) #x
-#define __TBB_STRING(x) __TBB_STRING_AUX(x)
-
-// We do not need defines below for resource processing on windows
-#if !defined RC_INVOKED
-
-// Define groups for Doxygen documentation
-/**
- * @defgroup algorithms         Algorithms
- * @defgroup containers         Containers
- * @defgroup memory_allocation  Memory Allocation
- * @defgroup synchronization    Synchronization
- * @defgroup timing             Timing
- * @defgroup task_scheduling    Task Scheduling
- */
-
-// Simple text that is displayed on the main page of Doxygen documentation.
-/**
- * \mainpage Main Page
- *
- * Click the tabs above for information about the
- * - <a href="./modules.html">Modules</a> (groups of functionality) implemented by the library
- * - <a href="./annotated.html">Classes</a> provided by the library
- * - <a href="./files.html">Files</a> constituting the library.
- * .
- * Please note that significant part of TBB functionality is implemented in the form of
- * template functions, descriptions of which are not accessible on the <a href="./annotated.html">Classes</a>
- * tab. Use <a href="./modules.html">Modules</a> or <a href="./namespacemembers.html">Namespace/Namespace Members</a>
- * tabs to find them.
- *
- * Additional pieces of information can be found here
- * - \subpage concepts
- * .
- */
-
-/** \page concepts TBB concepts
-
-    A concept is a set of requirements to a type, which are necessary and sufficient
-    for the type to model a particular behavior or a set of behaviors. Some concepts
-    are specific to a particular algorithm (e.g. algorithm body), while other ones
-    are common to several algorithms (e.g. range concept).
-
-    All TBB algorithms make use of different classes implementing various concepts.
-    Implementation classes are supplied by the user as type arguments of template
-    parameters and/or as objects passed as function call arguments. The library
-    provides predefined  implementations of some concepts (e.g. several kinds of
-    \ref range_req "ranges"), while other ones must always be implemented by the user.
-
-    TBB defines a set of minimal requirements each concept must conform to. Here is
-    the list of different concepts hyperlinked to the corresponding requirements specifications:
-    - \subpage range_req
-    - \subpage parallel_do_body_req
-    - \subpage parallel_for_body_req
-    - \subpage parallel_reduce_body_req
-    - \subpage parallel_scan_body_req
-    - \subpage parallel_sort_iter_req
-**/
-
-// tbb_config.h should be included the first since it contains macro definitions used in other headers
-#include "tbb_config.h"
-
-#if _MSC_VER >=1400
-    #define __TBB_EXPORTED_FUNC   __cdecl
-    #define __TBB_EXPORTED_METHOD __thiscall
-#else
-    #define __TBB_EXPORTED_FUNC
-    #define __TBB_EXPORTED_METHOD
-#endif
-
-#if __INTEL_COMPILER || _MSC_VER
-#define __TBB_NOINLINE(decl) __declspec(noinline) decl
-#elif __GNUC__
-#define __TBB_NOINLINE(decl) decl __attribute__ ((noinline))
-#else
-#define __TBB_NOINLINE(decl) decl
-#endif
-
-#if __TBB_NOEXCEPT_PRESENT
-#define __TBB_NOEXCEPT(expression) noexcept(expression)
-#else
-#define __TBB_NOEXCEPT(expression)
-#endif
-
-#include <cstddef>      /* Need size_t and ptrdiff_t */
-
-#if _MSC_VER
-    #define __TBB_tbb_windef_H
-    #include "internal/_tbb_windef.h"
-    #undef __TBB_tbb_windef_H
-#endif
-#if !defined(_MSC_VER) || _MSC_VER>=1600
-    #include <stdint.h>
-#endif
-
-//! Type for an assertion handler
-typedef void(*assertion_handler_type)( const char* filename, int line, const char* expression, const char * comment );
-
-#if __TBBMALLOC_BUILD
-namespace rml { namespace internal {
- #define __TBB_ASSERT_RELEASE(predicate,message) ((predicate)?((void)0) : rml::internal::assertion_failure(__FILE__,__LINE__,#predicate,message))
-#else
-namespace tbb {
- #define __TBB_ASSERT_RELEASE(predicate,message) ((predicate)?((void)0) : tbb::assertion_failure(__FILE__,__LINE__,#predicate,message))
-#endif
-
-    //! Set assertion handler and return previous value of it.
-    assertion_handler_type __TBB_EXPORTED_FUNC set_assertion_handler( assertion_handler_type new_handler );
-
-    //! Process an assertion failure.
-    /** Normally called from __TBB_ASSERT macro.
-        If assertion handler is null, print message for assertion failure and abort.
-        Otherwise call the assertion handler. */
-    void __TBB_EXPORTED_FUNC assertion_failure( const char* filename, int line, const char* expression, const char* comment );
-
-#if __TBBMALLOC_BUILD
-}}  // namespace rml::internal
-#else
-} // namespace tbb
-#endif
-
-#if TBB_USE_ASSERT
-
-    //! Assert that x is true.
-    /** If x is false, print assertion failure message.
-        If the comment argument is not NULL, it is printed as part of the failure message.
-        The comment argument has no other effect. */
-    #define __TBB_ASSERT(predicate,message) __TBB_ASSERT_RELEASE(predicate,message)
-
-    #define __TBB_ASSERT_EX __TBB_ASSERT
-
-#else /* !TBB_USE_ASSERT */
-
-    //! No-op version of __TBB_ASSERT.
-    #define __TBB_ASSERT(predicate,comment) ((void)0)
-    //! "Extended" version is useful to suppress warnings if a variable is only used with an assert
-    #define __TBB_ASSERT_EX(predicate,comment) ((void)(1 && (predicate)))
-
-#endif /* !TBB_USE_ASSERT */
-
-//! The namespace tbb contains all components of the library.
-namespace tbb {
-
-    namespace internal {
-#if _MSC_VER && _MSC_VER<1600
-        typedef __int8 int8_t;
-        typedef __int16 int16_t;
-        typedef __int32 int32_t;
-        typedef __int64 int64_t;
-        typedef unsigned __int8 uint8_t;
-        typedef unsigned __int16 uint16_t;
-        typedef unsigned __int32 uint32_t;
-        typedef unsigned __int64 uint64_t;
-#else /* Posix */
-        using ::int8_t;
-        using ::int16_t;
-        using ::int32_t;
-        using ::int64_t;
-        using ::uint8_t;
-        using ::uint16_t;
-        using ::uint32_t;
-        using ::uint64_t;
-#endif /* Posix */
-    } // namespace internal
-
-    using std::size_t;
-    using std::ptrdiff_t;
-
-//! The function returns the interface version of the TBB shared library being used.
-/**
- * The version it returns is determined at runtime, not at compile/link time.
- * So it can be different than the value of TBB_INTERFACE_VERSION obtained at compile time.
- */
-extern "C" int __TBB_EXPORTED_FUNC TBB_runtime_interface_version();
-
-/**
- * @cond INTERNAL
- * @brief Identifiers declared inside namespace internal should never be used directly by client code.
- */
-namespace internal {
-
-//! Compile-time constant that is upper bound on cache line/sector size.
-/** It should be used only in situations where having a compile-time upper
-    bound is more useful than a run-time exact answer.
-    @ingroup memory_allocation */
-const size_t NFS_MaxLineSize = 128;
-
-/** Label for data that may be accessed from different threads, and that may eventually become wrapped
-    in a formal atomic type.
-
-    Note that no problems have yet been observed relating to the definition currently being empty,
-    even if at least "volatile" would seem to be in order to avoid data sometimes temporarily hiding
-    in a register (although "volatile" as a "poor man's atomic" lacks several other features of a proper
-    atomic, some of which are now provided instead through specialized functions).
-
-    Note that usage is intentionally compatible with a definition as qualifier "volatile",
-    both as a way to have the compiler help enforce use of the label and to quickly rule out
-    one potential issue.
-
-    Note however that, with some architecture/compiler combinations, e.g. on IA-64 architecture, "volatile"
-    also has non-portable memory semantics that are needlessly expensive for "relaxed" operations.
-
-    Note that this must only be applied to data that will not change bit patterns when cast to/from
-    an integral type of the same length; tbb::atomic must be used instead for, e.g., floating-point types.
-
-    TODO: apply wherever relevant **/
-#define __TBB_atomic // intentionally empty, see above
-
-#if __TBB_OVERRIDE_PRESENT
-#define __TBB_override override
-#else
-#define __TBB_override // formal comment only
-#endif
-
-template<class T, size_t S, size_t R>
-struct padded_base : T {
-    char pad[S - R];
-};
-template<class T, size_t S> struct padded_base<T, S, 0> : T {};
-
-//! Pads type T to fill out to a multiple of cache line size.
-template<class T, size_t S = NFS_MaxLineSize>
-struct padded : padded_base<T, S, sizeof(T) % S> {};
-
-//! Extended variant of the standard offsetof macro
-/** The standard offsetof macro is not sufficient for TBB as it can be used for
-    POD-types only. The constant 0x1000 (not NULL) is necessary to appease GCC. **/
-#define __TBB_offsetof(class_name, member_name) \
-    ((ptrdiff_t)&(reinterpret_cast<class_name*>(0x1000)->member_name) - 0x1000)
-
-//! Returns address of the object containing a member with the given name and address
-#define __TBB_get_object_ref(class_name, member_name, member_addr) \
-    (*reinterpret_cast<class_name*>((char*)member_addr - __TBB_offsetof(class_name, member_name)))
-
-//! Throws std::runtime_error with what() returning error_code description prefixed with aux_info
-void __TBB_EXPORTED_FUNC handle_perror( int error_code, const char* aux_info );
-
-#if TBB_USE_EXCEPTIONS
-    #define __TBB_TRY try
-    #define __TBB_CATCH(e) catch(e)
-    #define __TBB_THROW(e) throw e
-    #define __TBB_RETHROW() throw
-#else /* !TBB_USE_EXCEPTIONS */
-    inline bool __TBB_false() { return false; }
-    #define __TBB_TRY
-    #define __TBB_CATCH(e) if ( tbb::internal::__TBB_false() )
-    #define __TBB_THROW(e) ((void)0)
-    #define __TBB_RETHROW() ((void)0)
-#endif /* !TBB_USE_EXCEPTIONS */
-
-//! Report a runtime warning.
-void __TBB_EXPORTED_FUNC runtime_warning( const char* format, ... );
-
-#if TBB_USE_ASSERT
-static void* const poisoned_ptr = reinterpret_cast<void*>(-1);
-
-//! Set p to invalid pointer value.
-//  Also works for regular (non-__TBB_atomic) pointers.
-template<typename T>
-inline void poison_pointer( T* __TBB_atomic & p ) { p = reinterpret_cast<T*>(poisoned_ptr); }
-
-/** Expected to be used in assertions only, thus no empty form is defined. **/
-template<typename T>
-inline bool is_poisoned( T* p ) { return p == reinterpret_cast<T*>(poisoned_ptr); }
-#else
-template<typename T>
-inline void poison_pointer( T* __TBB_atomic & ) {/*do nothing*/}
-#endif /* !TBB_USE_ASSERT */
-
-//! Cast between unrelated pointer types.
-/** This method should be used sparingly as a last resort for dealing with
-    situations that inherently break strict ISO C++ aliasing rules. */
-// T is a pointer type because it will be explicitly provided by the programmer as a template argument;
-// U is a referent type to enable the compiler to check that "ptr" is a pointer, deducing U in the process.
-template<typename T, typename U>
-inline T punned_cast( U* ptr ) {
-    uintptr_t x = reinterpret_cast<uintptr_t>(ptr);
-    return reinterpret_cast<T>(x);
-}
-
-//! Base class for types that should not be assigned.
-class no_assign {
-    // Deny assignment
-    void operator=( const no_assign& );
-public:
-#if __GNUC__
-    //! Explicitly define default construction, because otherwise gcc issues gratuitous warning.
-    no_assign() {}
-#endif /* __GNUC__ */
-};
-
-//! Base class for types that should not be copied or assigned.
-class no_copy: no_assign {
-    //! Deny copy construction
-    no_copy( const no_copy& );
-public:
-    //! Allow default construction
-    no_copy() {}
-};
-
-#if TBB_DEPRECATED_MUTEX_COPYING
-class mutex_copy_deprecated_and_disabled {};
-#else
-// By default various implementations of mutexes are not copy constructible
-// and not copy assignable.
-class mutex_copy_deprecated_and_disabled : no_copy {};
-#endif
-
-//! A function to check if passed in pointer is aligned on a specific border
-template<typename T>
-inline bool is_aligned(T* pointer, uintptr_t alignment) {
-    return 0==((uintptr_t)pointer & (alignment-1));
-}
-
-//! A function to check if passed integer is a power of 2
-template<typename integer_type>
-inline bool is_power_of_two(integer_type arg) {
-    return arg && (0 == (arg & (arg - 1)));
-}
-
-//! A function to compute arg modulo divisor where divisor is a power of 2.
-template<typename argument_integer_type, typename divisor_integer_type>
-inline argument_integer_type modulo_power_of_two(argument_integer_type arg, divisor_integer_type divisor) {
-    __TBB_ASSERT( is_power_of_two(divisor), "Divisor should be a power of two" );
-    return (arg & (divisor - 1));
-}
-
-
-//! A function to determine if arg is a power of 2 at least as big as another power of 2.
-// i.e. for strictly positive i and j, with j being a power of 2,
-// determines whether i==j<<k for some nonnegative k (so i==j yields true).
-template<typename argument_integer_type, typename power2_integer_type>
-inline bool is_power_of_two_at_least(argument_integer_type arg, power2_integer_type power2) {
-    __TBB_ASSERT( is_power_of_two(power2), "Divisor should be a power of two" );
-    return 0 == (arg & (arg - power2));
-}
-
-//! Utility template function to prevent "unused" warnings by various compilers.
-template<typename T1> void suppress_unused_warning( const T1& ) {}
-template<typename T1, typename T2> void suppress_unused_warning( const T1&, const T2& ) {}
-template<typename T1, typename T2, typename T3> void suppress_unused_warning( const T1&, const T2&, const T3& ) {}
-
-// Struct to be used as a version tag for inline functions.
-/** Version tag can be necessary to prevent loader on Linux from using the wrong
-    symbol in debug builds (when inline functions are compiled as out-of-line). **/
-struct version_tag_v3 {};
-
-typedef version_tag_v3 version_tag;
-
-} // internal
-
-//! Dummy type that distinguishes splitting constructor from copy constructor.
-/**
- * See description of parallel_for and parallel_reduce for example usages.
- * @ingroup algorithms
- */
-class split {
-};
-
-//! Type enables transmission of splitting proportion from partitioners to range objects
-/**
- * In order to make use of such facility Range objects must implement
- * splitting constructor with this type passed and initialize static
- * constant boolean field 'is_splittable_in_proportion' with the value
- * of 'true'
- */
-class proportional_split: internal::no_assign {
-public:
-    proportional_split(size_t _left = 1, size_t _right = 1) : my_left(_left), my_right(_right) { }
-
-    size_t left() const { return my_left; }
-    size_t right() const { return my_right; }
-
-    // used when range does not support proportional split
-    operator split() const { return split(); }
-
-#if __TBB_ENABLE_RANGE_FEEDBACK
-    void set_proportion(size_t _left, size_t _right) {
-        my_left = _left;
-        my_right = _right;
-    }
-#endif
-private:
-    size_t my_left, my_right;
-};
-
-} // tbb
-
-// Following is a set of classes and functions typically used in compile-time "metaprogramming".
-// TODO: move all that to a separate header
-
-#if __TBB_ALLOCATOR_TRAITS_PRESENT
-#include <memory> //for allocator_traits
-#endif
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT || _LIBCPP_VERSION
-#include <utility> // for std::move
-#endif
-
-namespace tbb {
-namespace internal {
-
-//! Class for determining type of std::allocator<T>::value_type.
-template<typename T>
-struct allocator_type {
-    typedef T value_type;
-};
-
-#if _MSC_VER
-//! Microsoft std::allocator has non-standard extension that strips const from a type.
-template<typename T>
-struct allocator_type<const T> {
-    typedef T value_type;
-};
-#endif
-
-// Ad-hoc implementation of true_type & false_type
-// Intended strictly for internal use! For public APIs (traits etc), use C++11 analogues.
-template <bool v>
-struct bool_constant {
-    static /*constexpr*/ const bool value = v;
-};
-typedef bool_constant<true> true_type;
-typedef bool_constant<false> false_type;
-
-#if __TBB_ALLOCATOR_TRAITS_PRESENT
-using std::allocator_traits;
-#else
-template<typename allocator>
-struct allocator_traits{
-    typedef tbb::internal::false_type propagate_on_container_move_assignment;
-};
-#endif
-
-//! A template to select either 32-bit or 64-bit constant as compile time, depending on machine word size.
-template <unsigned u, unsigned long long ull >
-struct select_size_t_constant {
-    //Explicit cast is needed to avoid compiler warnings about possible truncation.
-    //The value of the right size,   which is selected by ?:, is anyway not truncated or promoted.
-    static const size_t value = (size_t)((sizeof(size_t)==sizeof(u)) ? u : ull);
-};
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-using std::move;
-using std::forward;
-#elif defined(_LIBCPP_NAMESPACE)
-// libc++ defines "pre-C++11 move and forward" similarly to ours; use it to avoid name conflicts in some cases.
-using std::_LIBCPP_NAMESPACE::move;
-using std::_LIBCPP_NAMESPACE::forward;
-#else
-// It is assumed that cv qualifiers, if any, are part of the deduced type.
-template <typename T>
-T& move( T& x ) { return x; }
-template <typename T>
-T& forward( T& x ) { return x; }
-#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
-
-// Helper macros to simplify writing templates working with both C++03 and C++11.
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-#define  __TBB_FORWARDING_REF(A) A&&
-#else
-// It is assumed that cv qualifiers, if any, are part of a deduced type.
-// Thus this macro should not be used in public interfaces.
-#define  __TBB_FORWARDING_REF(A) A&
-#endif
-#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
-#define __TBB_PARAMETER_PACK ...
-#define __TBB_PACK_EXPANSION(A) A...
-#else
-#define __TBB_PARAMETER_PACK
-#define __TBB_PACK_EXPANSION(A) A
-#endif /* __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT */
-
-#if __TBB_CPP11_DECLTYPE_PRESENT
-#if __TBB_CPP11_DECLVAL_BROKEN
-// Ad-hoc implementation of std::declval
-template <class T> __TBB_FORWARDING_REF(T) declval() /*noexcept*/;
-#else
-using std::declval;
-#endif
-#endif
-
-template <bool condition>
-struct STATIC_ASSERTION_FAILED;
-
-template <>
-struct STATIC_ASSERTION_FAILED<false> { enum {value=1};};
-
-template<>
-struct STATIC_ASSERTION_FAILED<true>; //intentionally left undefined to cause compile time error
-
-//! @endcond
-}} // namespace tbb::internal
-
-#if    __TBB_STATIC_ASSERT_PRESENT
-#define __TBB_STATIC_ASSERT(condition,msg) static_assert(condition,msg)
-#else
-//please note condition is intentionally inverted to get a bit more understandable error msg
-#define __TBB_STATIC_ASSERT_IMPL1(condition,msg,line)       \
-    enum {static_assert_on_line_##line = tbb::internal::STATIC_ASSERTION_FAILED<!(condition)>::value}
-
-#define __TBB_STATIC_ASSERT_IMPL(condition,msg,line) __TBB_STATIC_ASSERT_IMPL1(condition,msg,line)
-//! Verify condition, at compile time
-#define __TBB_STATIC_ASSERT(condition,msg) __TBB_STATIC_ASSERT_IMPL(condition,msg,__LINE__)
-#endif
-
-#endif /* RC_INVOKED */
-#endif /* __TBB_tbb_stddef_H */
diff --git a/lib/3rdParty/tbb/include/tbb/tbb_thread.h b/lib/3rdParty/tbb/include/tbb/tbb_thread.h
deleted file mode 100644
index d1cafd65..00000000
--- a/lib/3rdParty/tbb/include/tbb/tbb_thread.h
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_tbb_thread_H
-#define __TBB_tbb_thread_H
-
-#include "tbb_stddef.h"
-
-#if _WIN32||_WIN64
-#include "machine/windows_api.h"
-#define __TBB_NATIVE_THREAD_ROUTINE unsigned WINAPI
-#define __TBB_NATIVE_THREAD_ROUTINE_PTR(r) unsigned (WINAPI* r)( void* )
-namespace tbb { namespace internal {
-#if __TBB_WIN8UI_SUPPORT
-    typedef size_t thread_id_type;
-#else  // __TBB_WIN8UI_SUPPORT
-    typedef DWORD thread_id_type;
-#endif // __TBB_WIN8UI_SUPPORT
-}} //namespace tbb::internal
-#else
-#define __TBB_NATIVE_THREAD_ROUTINE void*
-#define __TBB_NATIVE_THREAD_ROUTINE_PTR(r) void* (*r)( void* )
-#include <pthread.h>
-namespace tbb { namespace internal {
-    typedef pthread_t thread_id_type;
-}} //namespace tbb::internal
-#endif // _WIN32||_WIN64
-
-#include "atomic.h"
-#include "internal/_tbb_hash_compare_impl.h"
-#include "tick_count.h"
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
-    #pragma warning (push)
-    #pragma warning (disable: 4530)
-#endif
-
-#include __TBB_STD_SWAP_HEADER
-#include <iosfwd>
-
-#if !TBB_USE_EXCEPTIONS && _MSC_VER
-    #pragma warning (pop)
-#endif
-
-namespace tbb {
-
-namespace internal {
-    class tbb_thread_v3;
-}
-
-inline void swap( internal::tbb_thread_v3& t1, internal::tbb_thread_v3& t2 ) __TBB_NOEXCEPT(true);
-
-namespace internal {
-
-    //! Allocate a closure
-    void* __TBB_EXPORTED_FUNC allocate_closure_v3( size_t size );
-    //! Free a closure allocated by allocate_closure_v3
-    void __TBB_EXPORTED_FUNC free_closure_v3( void* );
-
-    struct thread_closure_base {
-        void* operator new( size_t size ) {return allocate_closure_v3(size);}
-        void operator delete( void* ptr ) {free_closure_v3(ptr);}
-    };
-
-    template<class F> struct thread_closure_0: thread_closure_base {
-        F function;
-
-        static __TBB_NATIVE_THREAD_ROUTINE start_routine( void* c ) {
-            thread_closure_0 *self = static_cast<thread_closure_0*>(c);
-            self->function();
-            delete self;
-            return 0;
-        }
-        thread_closure_0( const F& f ) : function(f) {}
-    };
-    //! Structure used to pass user function with 1 argument to thread.
-    template<class F, class X> struct thread_closure_1: thread_closure_base {
-        F function;
-        X arg1;
-        //! Routine passed to Windows's _beginthreadex by thread::internal_start() inside tbb.dll
-        static __TBB_NATIVE_THREAD_ROUTINE start_routine( void* c ) {
-            thread_closure_1 *self = static_cast<thread_closure_1*>(c);
-            self->function(self->arg1);
-            delete self;
-            return 0;
-        }
-        thread_closure_1( const F& f, const X& x ) : function(f), arg1(x) {}
-    };
-    template<class F, class X, class Y> struct thread_closure_2: thread_closure_base {
-        F function;
-        X arg1;
-        Y arg2;
-        //! Routine passed to Windows's _beginthreadex by thread::internal_start() inside tbb.dll
-        static __TBB_NATIVE_THREAD_ROUTINE start_routine( void* c ) {
-            thread_closure_2 *self = static_cast<thread_closure_2*>(c);
-            self->function(self->arg1, self->arg2);
-            delete self;
-            return 0;
-        }
-        thread_closure_2( const F& f, const X& x, const Y& y ) : function(f), arg1(x), arg2(y) {}
-    };
-
-    //! Versioned thread class.
-    class tbb_thread_v3 {
-#if __TBB_IF_NO_COPY_CTOR_MOVE_SEMANTICS_BROKEN
-        // Workaround for a compiler bug: declaring the copy constructor as public
-        // enables use of the moving constructor.
-        // The definition is not provided in order to prohibit copying.
-    public:
-#endif
-        tbb_thread_v3(const tbb_thread_v3&); // = delete;   // Deny access
-    public:
-#if _WIN32||_WIN64
-        typedef HANDLE native_handle_type;
-#else
-        typedef pthread_t native_handle_type;
-#endif // _WIN32||_WIN64
-
-        class id;
-        //! Constructs a thread object that does not represent a thread of execution.
-        tbb_thread_v3() __TBB_NOEXCEPT(true) : my_handle(0)
-#if _WIN32||_WIN64
-            , my_thread_id(0)
-#endif // _WIN32||_WIN64
-        {}
-
-        //! Constructs an object and executes f() in a new thread
-        template <class F> explicit tbb_thread_v3(F f) {
-            typedef internal::thread_closure_0<F> closure_type;
-            internal_start(closure_type::start_routine, new closure_type(f));
-        }
-        //! Constructs an object and executes f(x) in a new thread
-        template <class F, class X> tbb_thread_v3(F f, X x) {
-            typedef internal::thread_closure_1<F,X> closure_type;
-            internal_start(closure_type::start_routine, new closure_type(f,x));
-        }
-        //! Constructs an object and executes f(x,y) in a new thread
-        template <class F, class X, class Y> tbb_thread_v3(F f, X x, Y y) {
-            typedef internal::thread_closure_2<F,X,Y> closure_type;
-            internal_start(closure_type::start_routine, new closure_type(f,x,y));
-        }
-
-#if __TBB_CPP11_RVALUE_REF_PRESENT
-        tbb_thread_v3(tbb_thread_v3&& x) __TBB_NOEXCEPT(true)
-            : my_handle(x.my_handle)
-#if _WIN32||_WIN64
-            , my_thread_id(x.my_thread_id)
-#endif
-        {
-            x.internal_wipe();
-        }
-        tbb_thread_v3& operator=(tbb_thread_v3&& x) __TBB_NOEXCEPT(true) {
-            internal_move(x);
-            return *this;
-        }
-    private:
-        tbb_thread_v3& operator=(const tbb_thread_v3& x); // = delete;
-    public:
-#else  // __TBB_CPP11_RVALUE_REF_PRESENT
-        tbb_thread_v3& operator=(tbb_thread_v3& x) {
-            internal_move(x);
-            return *this;
-        }
-#endif // __TBB_CPP11_RVALUE_REF_PRESENT
-
-        void swap( tbb_thread_v3& t ) __TBB_NOEXCEPT(true) {tbb::swap( *this, t );}
-        bool joinable() const __TBB_NOEXCEPT(true) {return my_handle!=0; }
-        //! The completion of the thread represented by *this happens before join() returns.
-        void __TBB_EXPORTED_METHOD join();
-        //! When detach() returns, *this no longer represents the possibly continuing thread of execution.
-        void __TBB_EXPORTED_METHOD detach();
-        ~tbb_thread_v3() {if( joinable() ) detach();}
-        inline id get_id() const __TBB_NOEXCEPT(true);
-        native_handle_type native_handle() { return my_handle; }
-
-        //! The number of hardware thread contexts.
-        /** Before TBB 3.0 U4 this methods returned the number of logical CPU in
-            the system. Currently on Windows, Linux and FreeBSD it returns the
-            number of logical CPUs available to the current process in accordance
-            with its affinity mask.
-
-            NOTE: The return value of this method never changes after its first
-            invocation. This means that changes in the process affinity mask that
-            took place after this method was first invoked will not affect the
-            number of worker threads in the TBB worker threads pool. **/
-        static unsigned __TBB_EXPORTED_FUNC hardware_concurrency() __TBB_NOEXCEPT(true);
-    private:
-        native_handle_type my_handle;
-#if _WIN32||_WIN64
-        thread_id_type my_thread_id;
-#endif // _WIN32||_WIN64
-
-        void internal_wipe() __TBB_NOEXCEPT(true) {
-            my_handle = 0;
-#if _WIN32||_WIN64
-            my_thread_id = 0;
-#endif
-        }
-        void internal_move(tbb_thread_v3& x) __TBB_NOEXCEPT(true) {
-            if (joinable()) detach();
-            my_handle = x.my_handle;
-#if _WIN32||_WIN64
-            my_thread_id = x.my_thread_id;
-#endif // _WIN32||_WIN64
-            x.internal_wipe();
-        }
-
-        /** Runs start_routine(closure) on another thread and sets my_handle to the handle of the created thread. */
-        void __TBB_EXPORTED_METHOD internal_start( __TBB_NATIVE_THREAD_ROUTINE_PTR(start_routine),
-                             void* closure );
-        friend void __TBB_EXPORTED_FUNC move_v3( tbb_thread_v3& t1, tbb_thread_v3& t2 );
-        friend void tbb::swap( tbb_thread_v3& t1, tbb_thread_v3& t2 ) __TBB_NOEXCEPT(true);
-    };
-
-    class tbb_thread_v3::id {
-        thread_id_type my_id;
-        id( thread_id_type id_ ) : my_id(id_) {}
-
-        friend class tbb_thread_v3;
-    public:
-        id() __TBB_NOEXCEPT(true) : my_id(0) {}
-
-        friend bool operator==( tbb_thread_v3::id x, tbb_thread_v3::id y ) __TBB_NOEXCEPT(true);
-        friend bool operator!=( tbb_thread_v3::id x, tbb_thread_v3::id y ) __TBB_NOEXCEPT(true);
-        friend bool operator<( tbb_thread_v3::id x, tbb_thread_v3::id y ) __TBB_NOEXCEPT(true);
-        friend bool operator<=( tbb_thread_v3::id x, tbb_thread_v3::id y ) __TBB_NOEXCEPT(true);
-        friend bool operator>( tbb_thread_v3::id x, tbb_thread_v3::id y ) __TBB_NOEXCEPT(true);
-        friend bool operator>=( tbb_thread_v3::id x, tbb_thread_v3::id y ) __TBB_NOEXCEPT(true);
-
-        template<class charT, class traits>
-        friend std::basic_ostream<charT, traits>&
-        operator<< (std::basic_ostream<charT, traits> &out,
-                    tbb_thread_v3::id id)
-        {
-            out << id.my_id;
-            return out;
-        }
-        friend tbb_thread_v3::id __TBB_EXPORTED_FUNC thread_get_id_v3();
-
-        friend inline size_t tbb_hasher( const tbb_thread_v3::id& id ) {
-            __TBB_STATIC_ASSERT(sizeof(id.my_id) <= sizeof(size_t), "Implementaion assumes that thread_id_type fits into machine word");
-            return tbb::tbb_hasher(id.my_id);
-        }
-
-        // A workaround for lack of tbb::atomic<id> (which would require id to be POD in C++03).
-        friend id atomic_compare_and_swap(id& location, const id& value, const id& comparand){
-            return as_atomic(location.my_id).compare_and_swap(value.my_id, comparand.my_id);
-        }
-    }; // tbb_thread_v3::id
-
-    tbb_thread_v3::id tbb_thread_v3::get_id() const __TBB_NOEXCEPT(true) {
-#if _WIN32||_WIN64
-        return id(my_thread_id);
-#else
-        return id(my_handle);
-#endif // _WIN32||_WIN64
-    }
-
-    void __TBB_EXPORTED_FUNC move_v3( tbb_thread_v3& t1, tbb_thread_v3& t2 );
-    tbb_thread_v3::id __TBB_EXPORTED_FUNC thread_get_id_v3();
-    void __TBB_EXPORTED_FUNC thread_yield_v3();
-    void __TBB_EXPORTED_FUNC thread_sleep_v3(const tick_count::interval_t &i);
-
-    inline bool operator==(tbb_thread_v3::id x, tbb_thread_v3::id y) __TBB_NOEXCEPT(true)
-    {
-        return x.my_id == y.my_id;
-    }
-    inline bool operator!=(tbb_thread_v3::id x, tbb_thread_v3::id y) __TBB_NOEXCEPT(true)
-    {
-        return x.my_id != y.my_id;
-    }
-    inline bool operator<(tbb_thread_v3::id x, tbb_thread_v3::id y) __TBB_NOEXCEPT(true)
-    {
-        return x.my_id < y.my_id;
-    }
-    inline bool operator<=(tbb_thread_v3::id x, tbb_thread_v3::id y) __TBB_NOEXCEPT(true)
-    {
-        return x.my_id <= y.my_id;
-    }
-    inline bool operator>(tbb_thread_v3::id x, tbb_thread_v3::id y) __TBB_NOEXCEPT(true)
-    {
-        return x.my_id > y.my_id;
-    }
-    inline bool operator>=(tbb_thread_v3::id x, tbb_thread_v3::id y) __TBB_NOEXCEPT(true)
-    {
-        return x.my_id >= y.my_id;
-    }
-
-} // namespace internal;
-
-//! Users reference thread class by name tbb_thread
-typedef internal::tbb_thread_v3 tbb_thread;
-
-using internal::operator==;
-using internal::operator!=;
-using internal::operator<;
-using internal::operator>;
-using internal::operator<=;
-using internal::operator>=;
-
-inline void move( tbb_thread& t1, tbb_thread& t2 ) {
-    internal::move_v3(t1, t2);
-}
-
-inline void swap( internal::tbb_thread_v3& t1, internal::tbb_thread_v3& t2 )  __TBB_NOEXCEPT(true) {
-    std::swap(t1.my_handle, t2.my_handle);
-#if _WIN32||_WIN64
-    std::swap(t1.my_thread_id, t2.my_thread_id);
-#endif /* _WIN32||_WIN64 */
-}
-
-namespace this_tbb_thread {
-    inline tbb_thread::id get_id() { return internal::thread_get_id_v3(); }
-    //! Offers the operating system the opportunity to schedule another thread.
-    inline void yield() { internal::thread_yield_v3(); }
-    //! The current thread blocks at least until the time specified.
-    inline void sleep(const tick_count::interval_t &i) {
-        internal::thread_sleep_v3(i);
-    }
-}  // namespace this_tbb_thread
-
-} // namespace tbb
-
-#endif /* __TBB_tbb_thread_H */
diff --git a/lib/3rdParty/tbb/include/tbb/tbbmalloc_proxy.h b/lib/3rdParty/tbb/include/tbb/tbbmalloc_proxy.h
deleted file mode 100644
index 76cbd6d7..00000000
--- a/lib/3rdParty/tbb/include/tbb/tbbmalloc_proxy.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-/*
-Replacing the standard memory allocation routines in Microsoft* C/C++ RTL
-(malloc/free, global new/delete, etc.) with the TBB memory allocator.
-
-Include the following header to a source of any binary which is loaded during
-application startup
-
-#include "tbb/tbbmalloc_proxy.h"
-
-or add following parameters to the linker options for the binary which is
-loaded during application startup. It can be either exe-file or dll.
-
-For win32
-tbbmalloc_proxy.lib /INCLUDE:"___TBB_malloc_proxy"
-win64
-tbbmalloc_proxy.lib /INCLUDE:"__TBB_malloc_proxy"
-*/
-
-#ifndef __TBB_tbbmalloc_proxy_H
-#define __TBB_tbbmalloc_proxy_H
-
-#if _MSC_VER
-
-#ifdef _DEBUG
-    #pragma comment(lib, "tbbmalloc_proxy_debug.lib")
-#else
-    #pragma comment(lib, "tbbmalloc_proxy.lib")
-#endif
-
-#if defined(_WIN64)
-    #pragma comment(linker, "/include:__TBB_malloc_proxy")
-#else
-    #pragma comment(linker, "/include:___TBB_malloc_proxy")
-#endif
-
-#else
-/* Primarily to support MinGW */
-
-extern "C" void __TBB_malloc_proxy();
-struct __TBB_malloc_proxy_caller {
-    __TBB_malloc_proxy_caller() { __TBB_malloc_proxy(); }
-} volatile __TBB_malloc_proxy_helper_object;
-
-#endif // _MSC_VER
-
-#endif //__TBB_tbbmalloc_proxy_H
diff --git a/lib/3rdParty/tbb/include/tbb/tick_count.h b/lib/3rdParty/tbb/include/tbb/tick_count.h
deleted file mode 100644
index a7f4e0f3..00000000
--- a/lib/3rdParty/tbb/include/tbb/tick_count.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
-    Copyright (c) 2005-2017 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
-
-
-*/
-
-#ifndef __TBB_tick_count_H
-#define __TBB_tick_count_H
-
-#include "tbb_stddef.h"
-
-#if _WIN32||_WIN64
-#include "machine/windows_api.h"
-#elif __linux__
-#include <ctime>
-#else /* generic Unix */
-#include <sys/time.h>
-#endif /* (choice of OS) */
-
-namespace tbb {
-
-//! Absolute timestamp
-/** @ingroup timing */
-class tick_count {
-public:
-    //! Relative time interval.
-    class interval_t {
-        long long value;
-        explicit interval_t( long long value_ ) : value(value_) {}
-    public:
-        //! Construct a time interval representing zero time duration
-        interval_t() : value(0) {};
-
-        //! Construct a time interval representing sec seconds time  duration
-        explicit interval_t( double sec );
-
-        //! Return the length of a time interval in seconds
-        double seconds() const;
-
-        friend class tbb::tick_count;
-
-        //! Extract the intervals from the tick_counts and subtract them.
-        friend interval_t operator-( const tick_count& t1, const tick_count& t0 );
-
-        //! Add two intervals.
-        friend interval_t operator+( const interval_t& i, const interval_t& j ) {
-            return interval_t(i.value+j.value);
-        }
-
-        //! Subtract two intervals.
-        friend interval_t operator-( const interval_t& i, const interval_t& j ) {
-            return interval_t(i.value-j.value);
-        }
-
-        //! Accumulation operator
-        interval_t& operator+=( const interval_t& i ) {value += i.value; return *this;}
-
-        //! Subtraction operator
-        interval_t& operator-=( const interval_t& i ) {value -= i.value; return *this;}
-    private:
-        static long long ticks_per_second(){
-#if _WIN32||_WIN64
-            LARGE_INTEGER qpfreq;
-            int rval = QueryPerformanceFrequency(&qpfreq);
-            __TBB_ASSERT_EX(rval, "QueryPerformanceFrequency returned zero");
-            return static_cast<long long>(qpfreq.QuadPart);
-#elif __linux__
-            return static_cast<long long>(1E9);
-#else /* generic Unix */
-            return static_cast<long long>(1E6);
-#endif /* (choice of OS) */
-        }
-    };
-
-    //! Construct an absolute timestamp initialized to zero.
-    tick_count() : my_count(0) {};
-
-    //! Return current time.
-    static tick_count now();
-
-    //! Subtract two timestamps to get the time interval between
-    friend interval_t operator-( const tick_count& t1, const tick_count& t0 );
-
-    //! Return the resolution of the clock in seconds per tick.
-    static double resolution() { return 1.0 / interval_t::ticks_per_second(); }
-
-private:
-    long long my_count;
-};
-
-inline tick_count tick_count::now() {
-    tick_count result;
-#if _WIN32||_WIN64
-    LARGE_INTEGER qpcnt;
-    int rval = QueryPerformanceCounter(&qpcnt);
-    __TBB_ASSERT_EX(rval, "QueryPerformanceCounter failed");
-    result.my_count = qpcnt.QuadPart;
-#elif __linux__
-    struct timespec ts;
-    int status = clock_gettime( CLOCK_REALTIME, &ts );
-    __TBB_ASSERT_EX( status==0, "CLOCK_REALTIME not supported" );
-    result.my_count = static_cast<long long>(1000000000UL)*static_cast<long long>(ts.tv_sec) + static_cast<long long>(ts.tv_nsec);
-#else /* generic Unix */
-    struct timeval tv;
-    int status = gettimeofday(&tv, NULL);
-    __TBB_ASSERT_EX( status==0, "gettimeofday failed" );
-    result.my_count = static_cast<long long>(1000000)*static_cast<long long>(tv.tv_sec) + static_cast<long long>(tv.tv_usec);
-#endif /*(choice of OS) */
-    return result;
-}
-
-inline tick_count::interval_t::interval_t( double sec ) {
-    value = static_cast<long long>(sec*interval_t::ticks_per_second());
-}
-
-inline tick_count::interval_t operator-( const tick_count& t1, const tick_count& t0 ) {
-    return tick_count::interval_t( t1.my_count-t0.my_count );
-}
-
-inline double tick_count::interval_t::seconds() const {
-    return value*tick_count::resolution();
-}
-
-} // namespace tbb
-
-#endif /* __TBB_tick_count_H */
diff --git a/lib/3rdParty/tbb/index.html b/lib/3rdParty/tbb/index.html
deleted file mode 100644
index 13f29bc8..00000000
--- a/lib/3rdParty/tbb/index.html
+++ /dev/null
@@ -1,50 +0,0 @@
-<HTML>
-<BODY>
-<H2>Overview</H2>
-Top level directory for Intel&reg; Threading Building Blocks (Intel&reg; TBB).
-<H2>Common directories</H2>
-<DL>
-<DT><A HREF="doc/html/index.html">doc</A>
-<DD>Documentation for the library.
-<DT><A HREF="include/index.html">include</A>
-<DD>Include files required for compiling code that uses the library.
-<DT><A HREF="examples/index.html">examples</A>
-<DD>Examples of how to use the library.
-<DT><A HREF="python/index.html">python</A>
-<DD>Python* API for Intel TBB.
-</DL>
-<H2>Intel TBB source package</H2>
-<P>
-To build Intel TBB, use the <A HREF=Makefile>top-level Makefile</A>; see also the <A HREF=build/index.html#build>build directions</A>.
-To port Intel TBB to a new platform, operating system or architecture, see the <A HREF=build/index.html#port>porting directions</A>.
-</P>
-<H3>Files</H3>
-<DL>
-<DT><A HREF="Makefile">Makefile</A>
-<DD>Top-level Makefile for Intel TBB. See also the <A HREF=build/index.html#build>build directions</A>.
-</DL>
-<H3>Directories</H3>
-<DL>
-<DT><A HREF="src/index.html">src</A>
-<DD>Source code for the library.
-<DT><A HREF="build/index.html">build</A>, <A HREF="jni/">jni</A>
-<DD>Internal Makefile infrastructure for Intel TBB. Do not use directly; see the <A HREF=build/index.html#build>build directions</A>.
-</DL>
-<H2>Intel TBB binary package</H2>
-<H3>Directories</H3>
-<DL>
-<DT><A HREF="bin">bin</A>
-<DD>Start-up scripts for sourcing library for Linux* OS and macOS*. For Windows* OS: start-up scripts and dynamic-link libraries.
-<DT><A HREF="lib">lib</A>
-<DD>Platform-specific binary files for the library.
-</DL>
-<HR>
-<p></p>
-Copyright &copy; 2005-2017 Intel Corporation.  All Rights Reserved.
-<P></P>
-Intel is a registered trademark or trademark of Intel Corporation
-or its subsidiaries in the United States and other countries.
-<p></p>
-* Other names and brands may be claimed as the property of others.
-</BODY>
-</HTML>
diff --git a/lib/3rdParty/tbb/lib/x64/v140/irml/irml.lib b/lib/3rdParty/tbb/lib/x64/v140/irml/irml.lib
deleted file mode 100644
index 8228932b..00000000
Binary files a/lib/3rdParty/tbb/lib/x64/v140/irml/irml.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/lib/x64/v140/irml/irml_debug.lib b/lib/3rdParty/tbb/lib/x64/v140/irml/irml_debug.lib
deleted file mode 100644
index 143199cd..00000000
Binary files a/lib/3rdParty/tbb/lib/x64/v140/irml/irml_debug.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/lib/x64/v140/tbb.def b/lib/3rdParty/tbb/lib/x64/v140/tbb.def
deleted file mode 100644
index 62145ca1..00000000
--- a/lib/3rdParty/tbb/lib/x64/v140/tbb.def
+++ /dev/null
@@ -1,1126 +0,0 @@
-; Copyright (c) 2005-2017 Intel Corporation
-;
-; Licensed under the Apache License, Version 2.0 (the "License");
-; you may not use this file except in compliance with the License.
-; You may obtain a copy of the License at
-;
-;     http://www.apache.org/licenses/LICENSE-2.0
-;
-; Unless required by applicable law or agreed to in writing, software
-; distributed under the License is distributed on an "AS IS" BASIS,
-; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-; See the License for the specific language governing permissions and
-; limitations under the License.
-;
-;
-;
-;
-
-
-; This file is organized with a section for each .cpp file.
-; Each of these sections is in alphabetical order.
-
-EXPORTS
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-
-
-
-
-
-
-
-
-        
-    
-
-
-
-
-
-
-
-    
-    
-        
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-
-
-
-
-
-
-
-
-
-
-    
-
-
-
-    
-
-
-
-    
-
-
-
-    
-
-
-
-
-
-
-
-
-
-    
-
-
-
-    
-
-
-
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-
-
-
-
-
-
-    
-
-
-
-    
-
-
-
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-
-
-
-        
-    
-
-
-
-
-
-
-
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-__TBB_machine_cmpswp1
-__TBB_machine_fetchadd1
-__TBB_machine_fetchstore1
-__TBB_machine_cmpswp2
-__TBB_machine_fetchadd2
-__TBB_machine_fetchstore2
-__TBB_machine_pause
-__TBB_machine_try_lock_elided
-__TBB_machine_unlock_elided
-__TBB_machine_is_in_transaction
-
-
-?NFS_Allocate@internal@tbb@@YAPEAX_K0PEAX@Z
-?NFS_GetLineSize@internal@tbb@@YA_KXZ
-?NFS_Free@internal@tbb@@YAXPEAX@Z
-?allocate_via_handler_v3@internal@tbb@@YAPEAX_K@Z
-?deallocate_via_handler_v3@internal@tbb@@YAXPEAX@Z
-?is_malloc_used_v3@internal@tbb@@YA_NXZ
-
-
-
-?resize@affinity_partitioner_base_v3@internal@tbb@@AEAAXI@Z
-?allocate@allocate_additional_child_of_proxy@internal@tbb@@QEBAAEAVtask@3@_K@Z
-?allocate@allocate_child_proxy@internal@tbb@@QEBAAEAVtask@3@_K@Z
-?allocate@allocate_continuation_proxy@internal@tbb@@QEBAAEAVtask@3@_K@Z
-?allocate@allocate_root_proxy@internal@tbb@@SAAEAVtask@3@_K@Z
-?destroy@task_base@internal@interface5@tbb@@SAXAEAVtask@4@@Z
-?free@allocate_additional_child_of_proxy@internal@tbb@@QEBAXAEAVtask@3@@Z
-?free@allocate_child_proxy@internal@tbb@@QEBAXAEAVtask@3@@Z
-?free@allocate_continuation_proxy@internal@tbb@@QEBAXAEAVtask@3@@Z
-?free@allocate_root_proxy@internal@tbb@@SAXAEAVtask@3@@Z
-?internal_set_ref_count@task@tbb@@AEAAXH@Z
-?internal_decrement_ref_count@task@tbb@@AEAA_JXZ
-?is_owned_by_current_thread@task@tbb@@QEBA_NXZ
-?note_affinity@task@tbb@@UEAAXG@Z
-?self@task@tbb@@SAAEAV12@XZ
-?spawn_and_wait_for_all@task@tbb@@QEAAXAEAVtask_list@2@@Z
-?default_num_threads@task_scheduler_init@tbb@@SAHXZ
-?initialize@task_scheduler_init@tbb@@QEAAXH_K@Z
-?initialize@task_scheduler_init@tbb@@QEAAXH@Z
-?terminate@task_scheduler_init@tbb@@QEAAXXZ
-?internal_blocking_terminate@task_scheduler_init@tbb@@AEAA_N_N@Z
-
-?observe@task_scheduler_observer_v3@internal@tbb@@QEAAX_N@Z
-
-
-
-?internal_max_concurrency@task_arena_base@internal@interface7@tbb@@KAHPEBVtask_arena@34@@Z
-?internal_current_slot@task_arena_base@internal@interface7@tbb@@KAHXZ
-?internal_initialize@task_arena_base@internal@interface7@tbb@@IEAAXXZ
-?internal_terminate@task_arena_base@internal@interface7@tbb@@IEAAXXZ
-?internal_attach@task_arena_base@internal@interface7@tbb@@IEAAXXZ
-?internal_enqueue@task_arena_base@internal@interface7@tbb@@IEBAXAEAVtask@4@_J@Z
-?internal_execute@task_arena_base@internal@interface7@tbb@@IEBAXAEAVdelegate_base@234@@Z
-?internal_wait@task_arena_base@internal@interface7@tbb@@IEBAXXZ
-
-
-
-
-
-?destroy@task@tbb@@QEAAXAEAV12@@Z
-
-
-
-
-?allocate@allocate_root_with_context_proxy@internal@tbb@@QEBAAEAVtask@3@_K@Z
-?free@allocate_root_with_context_proxy@internal@tbb@@QEBAXAEAVtask@3@@Z
-?change_group@task@tbb@@QEAAXAEAVtask_group_context@2@@Z
-?is_group_execution_cancelled@task_group_context@tbb@@QEBA_NXZ
-?cancel_group_execution@task_group_context@tbb@@QEAA_NXZ
-?reset@task_group_context@tbb@@QEAAXXZ
-?capture_fp_settings@task_group_context@tbb@@QEAAXXZ
-?init@task_group_context@tbb@@IEAAXXZ
-?register_pending_exception@task_group_context@tbb@@QEAAXXZ
-??1task_group_context@tbb@@QEAA@XZ
-
-?set_priority@task_group_context@tbb@@QEAAXW4priority_t@2@@Z
-?priority@task_group_context@tbb@@QEBA?AW4priority_t@2@XZ
-
-?name@captured_exception@tbb@@UEBAPEBDXZ
-?what@captured_exception@tbb@@UEBAPEBDXZ
-??1captured_exception@tbb@@UEAA@XZ
-?move@captured_exception@tbb@@UEAAPEAV12@XZ
-?destroy@captured_exception@tbb@@UEAAXXZ
-?set@captured_exception@tbb@@QEAAXPEBD0@Z
-?clear@captured_exception@tbb@@QEAAXXZ
-
-
-
-?throw_bad_last_alloc_exception_v4@internal@tbb@@YAXXZ
-?throw_exception_v4@internal@tbb@@YAXW4exception_id@12@@Z
-?what@bad_last_alloc@tbb@@UEBAPEBDXZ
-?what@missing_wait@tbb@@UEBAPEBDXZ
-?what@invalid_multiple_scheduling@tbb@@UEBAPEBDXZ
-?what@improper_lock@tbb@@UEBAPEBDXZ
-?what@user_abort@tbb@@UEBAPEBDXZ
-
-
-?assertion_failure@tbb@@YAXPEBDH00@Z
-?get_initial_auto_partitioner_divisor@internal@tbb@@YA_KXZ
-?handle_perror@internal@tbb@@YAXHPEBD@Z
-?set_assertion_handler@tbb@@YAP6AXPEBDH00@ZP6AX0H00@Z@Z
-?runtime_warning@internal@tbb@@YAXPEBDZZ
-TBB_runtime_interface_version
-
-
-?itt_load_pointer_with_acquire_v3@internal@tbb@@YAPEAXPEBX@Z
-?itt_store_pointer_with_release_v3@internal@tbb@@YAXPEAX0@Z
-?call_itt_notify_v5@internal@tbb@@YAXHPEAX@Z
-?itt_load_pointer_v3@internal@tbb@@YAPEAXPEBX@Z
-?itt_set_sync_name_v3@internal@tbb@@YAXPEAXPEB_W@Z
-
-
-
-
-
-
-
-
-
-
-
-??_7pipeline@tbb@@6B@
-??0pipeline@tbb@@QEAA@XZ
-??1filter@tbb@@UEAA@XZ
-??1pipeline@tbb@@UEAA@XZ
-?add_filter@pipeline@tbb@@QEAAXAEAVfilter@2@@Z
-?clear@pipeline@tbb@@QEAAXXZ
-?inject_token@pipeline@tbb@@AEAAXAEAVtask@2@@Z
-?run@pipeline@tbb@@QEAAX_K@Z
-
-?run@pipeline@tbb@@QEAAX_KAEAVtask_group_context@2@@Z
-
-?process_item@thread_bound_filter@tbb@@QEAA?AW4result_type@12@XZ
-?try_process_item@thread_bound_filter@tbb@@QEAA?AW4result_type@12@XZ
-?set_end_of_input@filter@tbb@@IEAAXXZ
-
-
-?internal_construct@queuing_rw_mutex@tbb@@QEAAXXZ
-?acquire@scoped_lock@queuing_rw_mutex@tbb@@QEAAXAEAV23@_N@Z
-?downgrade_to_reader@scoped_lock@queuing_rw_mutex@tbb@@QEAA_NXZ
-?release@scoped_lock@queuing_rw_mutex@tbb@@QEAAXXZ
-?upgrade_to_writer@scoped_lock@queuing_rw_mutex@tbb@@QEAA_NXZ
-?try_acquire@scoped_lock@queuing_rw_mutex@tbb@@QEAA_NAEAV23@_N@Z
-
-
-?try_lock_read@reader_writer_lock@interface5@tbb@@QEAA_NXZ
-?try_lock@reader_writer_lock@interface5@tbb@@QEAA_NXZ
-?unlock@reader_writer_lock@interface5@tbb@@QEAAXXZ
-?lock_read@reader_writer_lock@interface5@tbb@@QEAAXXZ
-?lock@reader_writer_lock@interface5@tbb@@QEAAXXZ
-?internal_construct@reader_writer_lock@interface5@tbb@@AEAAXXZ
-?internal_destroy@reader_writer_lock@interface5@tbb@@AEAAXXZ
-?internal_construct@scoped_lock@reader_writer_lock@interface5@tbb@@AEAAXAEAV234@@Z
-?internal_destroy@scoped_lock@reader_writer_lock@interface5@tbb@@AEAAXXZ
-?internal_construct@scoped_lock_read@reader_writer_lock@interface5@tbb@@AEAAXAEAV234@@Z
-?internal_destroy@scoped_lock_read@reader_writer_lock@interface5@tbb@@AEAAXXZ
-
-
-
-?internal_itt_releasing@spin_rw_mutex@tbb@@CAXPEAV12@@Z
-?internal_acquire_writer@spin_rw_mutex@tbb@@CA_NPEAV12@@Z
-?internal_acquire_reader@spin_rw_mutex@tbb@@CAXPEAV12@@Z
-?internal_downgrade@spin_rw_mutex@tbb@@CAXPEAV12@@Z
-?internal_upgrade@spin_rw_mutex@tbb@@CA_NPEAV12@@Z
-?internal_release_reader@spin_rw_mutex@tbb@@CAXPEAV12@@Z
-?internal_release_writer@spin_rw_mutex@tbb@@CAXPEAV12@@Z
-?internal_try_acquire_writer@spin_rw_mutex@tbb@@CA_NPEAV12@@Z
-?internal_try_acquire_reader@spin_rw_mutex@tbb@@CA_NPEAV12@@Z
-
-
-
-?internal_construct@spin_rw_mutex_v3@tbb@@AEAAXXZ
-?internal_upgrade@spin_rw_mutex_v3@tbb@@AEAA_NXZ
-?internal_downgrade@spin_rw_mutex_v3@tbb@@AEAAXXZ
-?internal_acquire_reader@spin_rw_mutex_v3@tbb@@AEAAXXZ
-?internal_acquire_writer@spin_rw_mutex_v3@tbb@@AEAA_NXZ
-?internal_release_reader@spin_rw_mutex_v3@tbb@@AEAAXXZ
-?internal_release_writer@spin_rw_mutex_v3@tbb@@AEAAXXZ
-?internal_try_acquire_reader@spin_rw_mutex_v3@tbb@@AEAA_NXZ
-?internal_try_acquire_writer@spin_rw_mutex_v3@tbb@@AEAA_NXZ
-
-
-?internal_acquire_writer@x86_rtm_rw_mutex@internal@interface8@tbb@@AEAAXAEAVscoped_lock@1234@_N@Z
-?internal_acquire_reader@x86_rtm_rw_mutex@internal@interface8@tbb@@AEAAXAEAVscoped_lock@1234@_N@Z
-?internal_upgrade@x86_rtm_rw_mutex@internal@interface8@tbb@@AEAA_NAEAVscoped_lock@1234@@Z
-?internal_downgrade@x86_rtm_rw_mutex@internal@interface8@tbb@@AEAA_NAEAVscoped_lock@1234@@Z
-?internal_try_acquire_writer@x86_rtm_rw_mutex@internal@interface8@tbb@@AEAA_NAEAVscoped_lock@1234@@Z
-?internal_release@x86_rtm_rw_mutex@internal@interface8@tbb@@AEAAXAEAVscoped_lock@1234@@Z
-?internal_construct@x86_rtm_rw_mutex@internal@interface8@tbb@@AEAAXXZ
-
-
-?internal_construct@spin_mutex@tbb@@QEAAXXZ
-?internal_acquire@scoped_lock@spin_mutex@tbb@@AEAAXAEAV23@@Z
-?internal_release@scoped_lock@spin_mutex@tbb@@AEAAXXZ
-?internal_try_acquire@scoped_lock@spin_mutex@tbb@@AEAA_NAEAV23@@Z
-
-
-?internal_acquire@scoped_lock@mutex@tbb@@AEAAXAEAV23@@Z
-?internal_release@scoped_lock@mutex@tbb@@AEAAXXZ
-?internal_try_acquire@scoped_lock@mutex@tbb@@AEAA_NAEAV23@@Z
-?internal_construct@mutex@tbb@@AEAAXXZ
-?internal_destroy@mutex@tbb@@AEAAXXZ
-
-
-?internal_construct@recursive_mutex@tbb@@AEAAXXZ
-?internal_destroy@recursive_mutex@tbb@@AEAAXXZ
-?internal_acquire@scoped_lock@recursive_mutex@tbb@@AEAAXAEAV23@@Z
-?internal_try_acquire@scoped_lock@recursive_mutex@tbb@@AEAA_NAEAV23@@Z
-?internal_release@scoped_lock@recursive_mutex@tbb@@AEAAXXZ
-
-
-?internal_construct@queuing_mutex@tbb@@QEAAXXZ
-?acquire@scoped_lock@queuing_mutex@tbb@@QEAAXAEAV23@@Z
-?release@scoped_lock@queuing_mutex@tbb@@QEAAXXZ
-?try_acquire@scoped_lock@queuing_mutex@tbb@@QEAA_NAEAV23@@Z
-
-
-?internal_construct@critical_section_v4@internal@tbb@@QEAAXXZ
-
-
-
-?internal_grow_predicate@hash_map_segment_base@internal@tbb@@QEBA_NXZ
-
-
-??0concurrent_queue_base@internal@tbb@@IEAA@_K@Z
-??0concurrent_queue_iterator_base@internal@tbb@@IEAA@AEBVconcurrent_queue_base@12@@Z
-??1concurrent_queue_base@internal@tbb@@MEAA@XZ
-??1concurrent_queue_iterator_base@internal@tbb@@IEAA@XZ
-?advance@concurrent_queue_iterator_base@internal@tbb@@IEAAXXZ
-?assign@concurrent_queue_iterator_base@internal@tbb@@IEAAXAEBV123@@Z
-?internal_pop@concurrent_queue_base@internal@tbb@@IEAAXPEAX@Z
-?internal_pop_if_present@concurrent_queue_base@internal@tbb@@IEAA_NPEAX@Z
-?internal_push@concurrent_queue_base@internal@tbb@@IEAAXPEBX@Z
-?internal_push_if_not_full@concurrent_queue_base@internal@tbb@@IEAA_NPEBX@Z
-?internal_set_capacity@concurrent_queue_base@internal@tbb@@IEAAX_J_K@Z
-?internal_size@concurrent_queue_base@internal@tbb@@IEBA_JXZ
-
-
-
-??0concurrent_queue_iterator_base_v3@internal@tbb@@IEAA@AEBVconcurrent_queue_base_v3@12@@Z
-??0concurrent_queue_iterator_base_v3@internal@tbb@@IEAA@AEBVconcurrent_queue_base_v3@12@_K@Z
-??1concurrent_queue_iterator_base_v3@internal@tbb@@IEAA@XZ
-?assign@concurrent_queue_iterator_base_v3@internal@tbb@@IEAAXAEBV123@@Z
-?advance@concurrent_queue_iterator_base_v3@internal@tbb@@IEAAXXZ
-??0concurrent_queue_base_v3@internal@tbb@@IEAA@_K@Z
-??1concurrent_queue_base_v3@internal@tbb@@MEAA@XZ
-?internal_push@concurrent_queue_base_v3@internal@tbb@@IEAAXPEBX@Z
-?internal_push_move@concurrent_queue_base_v8@internal@tbb@@IEAAXPEBX@Z
-?internal_push_if_not_full@concurrent_queue_base_v3@internal@tbb@@IEAA_NPEBX@Z
-?internal_push_move_if_not_full@concurrent_queue_base_v8@internal@tbb@@IEAA_NPEBX@Z
-?internal_pop@concurrent_queue_base_v3@internal@tbb@@IEAAXPEAX@Z
-?internal_pop_if_present@concurrent_queue_base_v3@internal@tbb@@IEAA_NPEAX@Z
-?internal_abort@concurrent_queue_base_v3@internal@tbb@@IEAAXXZ
-?internal_size@concurrent_queue_base_v3@internal@tbb@@IEBA_JXZ
-?internal_empty@concurrent_queue_base_v3@internal@tbb@@IEBA_NXZ
-?internal_finish_clear@concurrent_queue_base_v3@internal@tbb@@IEAAXXZ
-?internal_set_capacity@concurrent_queue_base_v3@internal@tbb@@IEAAX_J_K@Z
-?internal_throw_exception@concurrent_queue_base_v3@internal@tbb@@IEBAXXZ
-?assign@concurrent_queue_base_v3@internal@tbb@@IEAAXAEBV123@@Z
-?move_content@concurrent_queue_base_v8@internal@tbb@@IEAAXAEAV123@@Z
-
-
-
-?internal_assign@concurrent_vector_base@internal@tbb@@IEAAXAEBV123@_KP6AXPEAX1@ZP6AX2PEBX1@Z5@Z
-?internal_capacity@concurrent_vector_base@internal@tbb@@IEBA_KXZ
-?internal_clear@concurrent_vector_base@internal@tbb@@IEAAXP6AXPEAX_K@Z_N@Z
-?internal_copy@concurrent_vector_base@internal@tbb@@IEAAXAEBV123@_KP6AXPEAXPEBX1@Z@Z
-?internal_grow_by@concurrent_vector_base@internal@tbb@@IEAA_K_K0P6AXPEAX0@Z@Z
-?internal_grow_to_at_least@concurrent_vector_base@internal@tbb@@IEAAX_K0P6AXPEAX0@Z@Z
-?internal_push_back@concurrent_vector_base@internal@tbb@@IEAAPEAX_KAEA_K@Z
-?internal_reserve@concurrent_vector_base@internal@tbb@@IEAAX_K00@Z
-
-
-
-??1concurrent_vector_base_v3@internal@tbb@@IEAA@XZ
-?internal_assign@concurrent_vector_base_v3@internal@tbb@@IEAAXAEBV123@_KP6AXPEAX1@ZP6AX2PEBX1@Z5@Z
-?internal_capacity@concurrent_vector_base_v3@internal@tbb@@IEBA_KXZ
-?internal_clear@concurrent_vector_base_v3@internal@tbb@@IEAA_KP6AXPEAX_K@Z@Z
-?internal_copy@concurrent_vector_base_v3@internal@tbb@@IEAAXAEBV123@_KP6AXPEAXPEBX1@Z@Z
-?internal_grow_by@concurrent_vector_base_v3@internal@tbb@@IEAA_K_K0P6AXPEAXPEBX0@Z2@Z
-?internal_grow_to_at_least@concurrent_vector_base_v3@internal@tbb@@IEAAX_K0P6AXPEAXPEBX0@Z2@Z
-?internal_push_back@concurrent_vector_base_v3@internal@tbb@@IEAAPEAX_KAEA_K@Z
-?internal_reserve@concurrent_vector_base_v3@internal@tbb@@IEAAX_K00@Z
-?internal_compact@concurrent_vector_base_v3@internal@tbb@@IEAAPEAX_KPEAXP6AX10@ZP6AX1PEBX0@Z@Z
-?internal_swap@concurrent_vector_base_v3@internal@tbb@@IEAAXAEAV123@@Z
-?internal_throw_exception@concurrent_vector_base_v3@internal@tbb@@IEBAX_K@Z
-?internal_resize@concurrent_vector_base_v3@internal@tbb@@IEAAX_K00PEBXP6AXPEAX0@ZP6AX210@Z@Z
-?internal_grow_to_at_least_with_result@concurrent_vector_base_v3@internal@tbb@@IEAA_K_K0P6AXPEAXPEBX0@Z2@Z
-
-
-?allocate_closure_v3@internal@tbb@@YAPEAX_K@Z
-?detach@tbb_thread_v3@internal@tbb@@QEAAXXZ
-?free_closure_v3@internal@tbb@@YAXPEAX@Z
-?hardware_concurrency@tbb_thread_v3@internal@tbb@@SAIXZ
-?internal_start@tbb_thread_v3@internal@tbb@@AEAAXP6AIPEAX@Z0@Z
-?join@tbb_thread_v3@internal@tbb@@QEAAXXZ
-?move_v3@internal@tbb@@YAXAEAVtbb_thread_v3@12@0@Z
-?thread_get_id_v3@internal@tbb@@YA?AVid@tbb_thread_v3@12@XZ
-?thread_sleep_v3@internal@tbb@@YAXAEBVinterval_t@tick_count@2@@Z
-?thread_yield_v3@internal@tbb@@YAXXZ
-
-
-?internal_initialize_condition_variable@internal@interface5@tbb@@YAXAEATcondvar_impl_t@123@@Z
-?internal_condition_variable_wait@internal@interface5@tbb@@YA_NAEATcondvar_impl_t@123@PEAVmutex@3@PEBVinterval_t@tick_count@3@@Z
-?internal_condition_variable_notify_one@internal@interface5@tbb@@YAXAEATcondvar_impl_t@123@@Z
-?internal_condition_variable_notify_all@internal@interface5@tbb@@YAXAEATcondvar_impl_t@123@@Z
-?internal_destroy_condition_variable@internal@interface5@tbb@@YAXAEATcondvar_impl_t@123@@Z
-
-
-?active_value@global_control@interface9@tbb@@CA_KH@Z
-?internal_create@global_control@interface9@tbb@@AEAAXXZ
-?internal_destroy@global_control@interface9@tbb@@AEAAXXZ
-
-
-
-
diff --git a/lib/3rdParty/tbb/lib/x64/v140/tbb.lib b/lib/3rdParty/tbb/lib/x64/v140/tbb.lib
deleted file mode 100644
index 7db38e1f..00000000
Binary files a/lib/3rdParty/tbb/lib/x64/v140/tbb.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/lib/x64/v140/tbb_debug.lib b/lib/3rdParty/tbb/lib/x64/v140/tbb_debug.lib
deleted file mode 100644
index 05734141..00000000
Binary files a/lib/3rdParty/tbb/lib/x64/v140/tbb_debug.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/lib/x64/v140/tbb_preview.lib b/lib/3rdParty/tbb/lib/x64/v140/tbb_preview.lib
deleted file mode 100644
index f3b8e82a..00000000
Binary files a/lib/3rdParty/tbb/lib/x64/v140/tbb_preview.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/lib/x64/v140/tbbmalloc.def b/lib/3rdParty/tbb/lib/x64/v140/tbbmalloc.def
deleted file mode 100644
index 9a998336..00000000
--- a/lib/3rdParty/tbb/lib/x64/v140/tbbmalloc.def
+++ /dev/null
@@ -1,49 +0,0 @@
-; Copyright (c) 2005-2017 Intel Corporation
-;
-; Licensed under the Apache License, Version 2.0 (the "License");
-; you may not use this file except in compliance with the License.
-; You may obtain a copy of the License at
-;
-;     http://www.apache.org/licenses/LICENSE-2.0
-;
-; Unless required by applicable law or agreed to in writing, software
-; distributed under the License is distributed on an "AS IS" BASIS,
-; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-; See the License for the specific language governing permissions and
-; limitations under the License.
-;
-;
-;
-;
-
-
-EXPORTS
-
-; frontend.cpp
-scalable_calloc
-scalable_free
-scalable_malloc
-scalable_realloc
-scalable_posix_memalign
-scalable_aligned_malloc
-scalable_aligned_realloc
-scalable_aligned_free
-scalable_msize
-scalable_allocation_mode
-scalable_allocation_command
-__TBB_malloc_safer_free
-__TBB_malloc_safer_realloc
-__TBB_malloc_safer_msize
-__TBB_malloc_safer_aligned_msize
-__TBB_malloc_safer_aligned_realloc
-; memory pool stuff
-?pool_create@rml@@YAPEAVMemoryPool@1@_JPEBUMemPoolPolicy@1@@Z
-?pool_create_v1@rml@@YA?AW4MemPoolError@1@_JPEBUMemPoolPolicy@1@PEAPEAVMemoryPool@1@@Z
-?pool_destroy@rml@@YA_NPEAVMemoryPool@1@@Z
-?pool_malloc@rml@@YAPEAXPEAVMemoryPool@1@_K@Z
-?pool_free@rml@@YA_NPEAVMemoryPool@1@PEAX@Z
-?pool_reset@rml@@YA_NPEAVMemoryPool@1@@Z
-?pool_realloc@rml@@YAPEAXPEAVMemoryPool@1@PEAX_K@Z
-?pool_aligned_realloc@rml@@YAPEAXPEAVMemoryPool@1@PEAX_K2@Z
-?pool_aligned_malloc@rml@@YAPEAXPEAVMemoryPool@1@_K1@Z
-?pool_identify@rml@@YAPEAVMemoryPool@1@PEAX@Z
diff --git a/lib/3rdParty/tbb/lib/x64/v140/tbbmalloc.lib b/lib/3rdParty/tbb/lib/x64/v140/tbbmalloc.lib
deleted file mode 100644
index fb67b977..00000000
Binary files a/lib/3rdParty/tbb/lib/x64/v140/tbbmalloc.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/lib/x64/v140/tbbmalloc_proxy.lib b/lib/3rdParty/tbb/lib/x64/v140/tbbmalloc_proxy.lib
deleted file mode 100644
index 45c14dfa..00000000
Binary files a/lib/3rdParty/tbb/lib/x64/v140/tbbmalloc_proxy.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/lib/x64/v140/tbbproxy.lib b/lib/3rdParty/tbb/lib/x64/v140/tbbproxy.lib
deleted file mode 100644
index 0d3d0e45..00000000
Binary files a/lib/3rdParty/tbb/lib/x64/v140/tbbproxy.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/lib/x86/v140/irml/irml.lib b/lib/3rdParty/tbb/lib/x86/v140/irml/irml.lib
deleted file mode 100644
index 124fc73b..00000000
Binary files a/lib/3rdParty/tbb/lib/x86/v140/irml/irml.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/lib/x86/v140/irml/irml_debug.lib b/lib/3rdParty/tbb/lib/x86/v140/irml/irml_debug.lib
deleted file mode 100644
index 46e6109b..00000000
Binary files a/lib/3rdParty/tbb/lib/x86/v140/irml/irml_debug.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/lib/x86/v140/tbb.def b/lib/3rdParty/tbb/lib/x86/v140/tbb.def
deleted file mode 100644
index 395056c9..00000000
--- a/lib/3rdParty/tbb/lib/x86/v140/tbb.def
+++ /dev/null
@@ -1,1133 +0,0 @@
-; Copyright (c) 2005-2017 Intel Corporation
-;
-; Licensed under the Apache License, Version 2.0 (the "License");
-; you may not use this file except in compliance with the License.
-; You may obtain a copy of the License at
-;
-;     http://www.apache.org/licenses/LICENSE-2.0
-;
-; Unless required by applicable law or agreed to in writing, software
-; distributed under the License is distributed on an "AS IS" BASIS,
-; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-; See the License for the specific language governing permissions and
-; limitations under the License.
-;
-;
-;
-;
-
-
-EXPORTS
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-
-
-
-
-
-
-
-
-        
-    
-
-
-
-
-
-
-
-    
-    
-        
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-
-
-
-
-
-
-
-
-
-
-    
-
-
-
-    
-
-
-
-    
-
-
-
-    
-
-
-
-
-
-
-
-
-
-    
-
-
-
-    
-
-
-
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-
-
-
-
-
-
-    
-
-
-
-    
-
-
-
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-
-
-
-        
-    
-
-
-
-
-
-
-
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-__TBB_machine_cmpswp8
-
-
-
-__TBB_machine_fetchadd8
-
-
-
-__TBB_machine_fetchstore8
-__TBB_machine_store8
-__TBB_machine_load8
-__TBB_machine_trylockbyte
-__TBB_machine_try_lock_elided
-__TBB_machine_unlock_elided
-__TBB_machine_is_in_transaction
-
-
-?NFS_Allocate@internal@tbb@@YAPAXIIPAX@Z
-?NFS_GetLineSize@internal@tbb@@YAIXZ
-?NFS_Free@internal@tbb@@YAXPAX@Z
-?allocate_via_handler_v3@internal@tbb@@YAPAXI@Z
-?deallocate_via_handler_v3@internal@tbb@@YAXPAX@Z
-?is_malloc_used_v3@internal@tbb@@YA_NXZ
-
-
-?allocate@allocate_additional_child_of_proxy@internal@tbb@@QBEAAVtask@3@I@Z
-?allocate@allocate_child_proxy@internal@tbb@@QBEAAVtask@3@I@Z
-?allocate@allocate_continuation_proxy@internal@tbb@@QBEAAVtask@3@I@Z
-?allocate@allocate_root_proxy@internal@tbb@@SAAAVtask@3@I@Z
-?destroy@task_base@internal@interface5@tbb@@SAXAAVtask@4@@Z
-?free@allocate_additional_child_of_proxy@internal@tbb@@QBEXAAVtask@3@@Z
-?free@allocate_child_proxy@internal@tbb@@QBEXAAVtask@3@@Z
-?free@allocate_continuation_proxy@internal@tbb@@QBEXAAVtask@3@@Z
-?free@allocate_root_proxy@internal@tbb@@SAXAAVtask@3@@Z
-?internal_set_ref_count@task@tbb@@AAEXH@Z
-?internal_decrement_ref_count@task@tbb@@AAEHXZ
-?is_owned_by_current_thread@task@tbb@@QBE_NXZ
-?note_affinity@task@tbb@@UAEXG@Z
-?resize@affinity_partitioner_base_v3@internal@tbb@@AAEXI@Z
-?self@task@tbb@@SAAAV12@XZ
-?spawn_and_wait_for_all@task@tbb@@QAEXAAVtask_list@2@@Z
-?default_num_threads@task_scheduler_init@tbb@@SAHXZ
-?initialize@task_scheduler_init@tbb@@QAEXHI@Z
-?initialize@task_scheduler_init@tbb@@QAEXH@Z
-?terminate@task_scheduler_init@tbb@@QAEXXZ
-?internal_blocking_terminate@task_scheduler_init@tbb@@AAE_N_N@Z
-
-?observe@task_scheduler_observer_v3@internal@tbb@@QAEX_N@Z
-
-
-
-?internal_max_concurrency@task_arena_base@internal@interface7@tbb@@KAHPBVtask_arena@34@@Z
-?internal_current_slot@task_arena_base@internal@interface7@tbb@@KAHXZ
-?internal_initialize@task_arena_base@internal@interface7@tbb@@IAEXXZ
-?internal_terminate@task_arena_base@internal@interface7@tbb@@IAEXXZ
-?internal_attach@task_arena_base@internal@interface7@tbb@@IAEXXZ
-?internal_enqueue@task_arena_base@internal@interface7@tbb@@IBEXAAVtask@4@H@Z
-?internal_execute@task_arena_base@internal@interface7@tbb@@IBEXAAVdelegate_base@234@@Z
-?internal_wait@task_arena_base@internal@interface7@tbb@@IBEXXZ
-
-
-
-
-
-
-?destroy@task@tbb@@QAEXAAV12@@Z
-
-
-
-
-?allocate@allocate_root_with_context_proxy@internal@tbb@@QBEAAVtask@3@I@Z
-?free@allocate_root_with_context_proxy@internal@tbb@@QBEXAAVtask@3@@Z
-?change_group@task@tbb@@QAEXAAVtask_group_context@2@@Z
-?is_group_execution_cancelled@task_group_context@tbb@@QBE_NXZ
-?cancel_group_execution@task_group_context@tbb@@QAE_NXZ
-?reset@task_group_context@tbb@@QAEXXZ
-?capture_fp_settings@task_group_context@tbb@@QAEXXZ
-?init@task_group_context@tbb@@IAEXXZ
-?register_pending_exception@task_group_context@tbb@@QAEXXZ
-??1task_group_context@tbb@@QAE@XZ
-
-?set_priority@task_group_context@tbb@@QAEXW4priority_t@2@@Z
-?priority@task_group_context@tbb@@QBE?AW4priority_t@2@XZ
-
-?name@captured_exception@tbb@@UBEPBDXZ
-?what@captured_exception@tbb@@UBEPBDXZ
-??1captured_exception@tbb@@UAE@XZ
-?move@captured_exception@tbb@@UAEPAV12@XZ
-?destroy@captured_exception@tbb@@UAEXXZ
-?set@captured_exception@tbb@@QAEXPBD0@Z
-?clear@captured_exception@tbb@@QAEXXZ
-
-
-
-?throw_bad_last_alloc_exception_v4@internal@tbb@@YAXXZ
-?throw_exception_v4@internal@tbb@@YAXW4exception_id@12@@Z
-?what@bad_last_alloc@tbb@@UBEPBDXZ
-?what@missing_wait@tbb@@UBEPBDXZ
-?what@invalid_multiple_scheduling@tbb@@UBEPBDXZ
-?what@improper_lock@tbb@@UBEPBDXZ
-?what@user_abort@tbb@@UBEPBDXZ
-
-
-?assertion_failure@tbb@@YAXPBDH00@Z
-?get_initial_auto_partitioner_divisor@internal@tbb@@YAIXZ
-?handle_perror@internal@tbb@@YAXHPBD@Z
-?set_assertion_handler@tbb@@YAP6AXPBDH00@ZP6AX0H00@Z@Z
-?runtime_warning@internal@tbb@@YAXPBDZZ
-TBB_runtime_interface_version
-
-
-?itt_load_pointer_with_acquire_v3@internal@tbb@@YAPAXPBX@Z
-?itt_store_pointer_with_release_v3@internal@tbb@@YAXPAX0@Z
-?call_itt_notify_v5@internal@tbb@@YAXHPAX@Z
-?itt_set_sync_name_v3@internal@tbb@@YAXPAXPB_W@Z
-?itt_load_pointer_v3@internal@tbb@@YAPAXPBX@Z
-
-
-
-
-
-
-
-
-
-
-
-??0pipeline@tbb@@QAE@XZ
-??1filter@tbb@@UAE@XZ
-??1pipeline@tbb@@UAE@XZ
-??_7pipeline@tbb@@6B@
-?add_filter@pipeline@tbb@@QAEXAAVfilter@2@@Z
-?clear@pipeline@tbb@@QAEXXZ
-?inject_token@pipeline@tbb@@AAEXAAVtask@2@@Z
-?run@pipeline@tbb@@QAEXI@Z
-
-?run@pipeline@tbb@@QAEXIAAVtask_group_context@2@@Z
-
-?process_item@thread_bound_filter@tbb@@QAE?AW4result_type@12@XZ
-?try_process_item@thread_bound_filter@tbb@@QAE?AW4result_type@12@XZ
-?set_end_of_input@filter@tbb@@IAEXXZ
-
-
-?internal_construct@queuing_rw_mutex@tbb@@QAEXXZ
-?acquire@scoped_lock@queuing_rw_mutex@tbb@@QAEXAAV23@_N@Z
-?downgrade_to_reader@scoped_lock@queuing_rw_mutex@tbb@@QAE_NXZ
-?release@scoped_lock@queuing_rw_mutex@tbb@@QAEXXZ
-?upgrade_to_writer@scoped_lock@queuing_rw_mutex@tbb@@QAE_NXZ
-?try_acquire@scoped_lock@queuing_rw_mutex@tbb@@QAE_NAAV23@_N@Z
-
-
-?try_lock_read@reader_writer_lock@interface5@tbb@@QAE_NXZ
-?try_lock@reader_writer_lock@interface5@tbb@@QAE_NXZ
-?unlock@reader_writer_lock@interface5@tbb@@QAEXXZ
-?lock_read@reader_writer_lock@interface5@tbb@@QAEXXZ
-?lock@reader_writer_lock@interface5@tbb@@QAEXXZ
-?internal_construct@reader_writer_lock@interface5@tbb@@AAEXXZ
-?internal_destroy@reader_writer_lock@interface5@tbb@@AAEXXZ
-?internal_construct@scoped_lock@reader_writer_lock@interface5@tbb@@AAEXAAV234@@Z
-?internal_destroy@scoped_lock@reader_writer_lock@interface5@tbb@@AAEXXZ
-?internal_construct@scoped_lock_read@reader_writer_lock@interface5@tbb@@AAEXAAV234@@Z
-?internal_destroy@scoped_lock_read@reader_writer_lock@interface5@tbb@@AAEXXZ
-
-
-
-?internal_acquire_reader@spin_rw_mutex@tbb@@CAXPAV12@@Z
-?internal_acquire_writer@spin_rw_mutex@tbb@@CA_NPAV12@@Z
-?internal_downgrade@spin_rw_mutex@tbb@@CAXPAV12@@Z
-?internal_itt_releasing@spin_rw_mutex@tbb@@CAXPAV12@@Z
-?internal_release_reader@spin_rw_mutex@tbb@@CAXPAV12@@Z
-?internal_release_writer@spin_rw_mutex@tbb@@CAXPAV12@@Z
-?internal_upgrade@spin_rw_mutex@tbb@@CA_NPAV12@@Z
-?internal_try_acquire_writer@spin_rw_mutex@tbb@@CA_NPAV12@@Z
-?internal_try_acquire_reader@spin_rw_mutex@tbb@@CA_NPAV12@@Z
-
-
-
-?internal_construct@spin_rw_mutex_v3@tbb@@AAEXXZ
-?internal_upgrade@spin_rw_mutex_v3@tbb@@AAE_NXZ
-?internal_downgrade@spin_rw_mutex_v3@tbb@@AAEXXZ
-?internal_acquire_reader@spin_rw_mutex_v3@tbb@@AAEXXZ
-?internal_acquire_writer@spin_rw_mutex_v3@tbb@@AAE_NXZ
-?internal_release_reader@spin_rw_mutex_v3@tbb@@AAEXXZ
-?internal_release_writer@spin_rw_mutex_v3@tbb@@AAEXXZ
-?internal_try_acquire_reader@spin_rw_mutex_v3@tbb@@AAE_NXZ
-?internal_try_acquire_writer@spin_rw_mutex_v3@tbb@@AAE_NXZ
-
-
-?internal_construct@x86_rtm_rw_mutex@internal@interface8@tbb@@AAEXXZ 
-?internal_release@x86_rtm_rw_mutex@internal@interface8@tbb@@AAEXAAVscoped_lock@1234@@Z 
-?internal_acquire_writer@x86_rtm_rw_mutex@internal@interface8@tbb@@AAEXAAVscoped_lock@1234@_N@Z 
-?internal_acquire_reader@x86_rtm_rw_mutex@internal@interface8@tbb@@AAEXAAVscoped_lock@1234@_N@Z 
-?internal_upgrade@x86_rtm_rw_mutex@internal@interface8@tbb@@AAE_NAAVscoped_lock@1234@@Z 
-?internal_downgrade@x86_rtm_rw_mutex@internal@interface8@tbb@@AAE_NAAVscoped_lock@1234@@Z 
-?internal_try_acquire_writer@x86_rtm_rw_mutex@internal@interface8@tbb@@AAE_NAAVscoped_lock@1234@@Z 
-
-
-?internal_construct@spin_mutex@tbb@@QAEXXZ
-?internal_acquire@scoped_lock@spin_mutex@tbb@@AAEXAAV23@@Z
-?internal_release@scoped_lock@spin_mutex@tbb@@AAEXXZ
-?internal_try_acquire@scoped_lock@spin_mutex@tbb@@AAE_NAAV23@@Z
-
-
-?internal_acquire@scoped_lock@mutex@tbb@@AAEXAAV23@@Z
-?internal_release@scoped_lock@mutex@tbb@@AAEXXZ
-?internal_try_acquire@scoped_lock@mutex@tbb@@AAE_NAAV23@@Z
-?internal_construct@mutex@tbb@@AAEXXZ
-?internal_destroy@mutex@tbb@@AAEXXZ
-
-
-?internal_acquire@scoped_lock@recursive_mutex@tbb@@AAEXAAV23@@Z
-?internal_release@scoped_lock@recursive_mutex@tbb@@AAEXXZ
-?internal_try_acquire@scoped_lock@recursive_mutex@tbb@@AAE_NAAV23@@Z
-?internal_construct@recursive_mutex@tbb@@AAEXXZ
-?internal_destroy@recursive_mutex@tbb@@AAEXXZ
-
-
-?internal_construct@queuing_mutex@tbb@@QAEXXZ
-?acquire@scoped_lock@queuing_mutex@tbb@@QAEXAAV23@@Z
-?release@scoped_lock@queuing_mutex@tbb@@QAEXXZ
-?try_acquire@scoped_lock@queuing_mutex@tbb@@QAE_NAAV23@@Z
-
-
-?internal_construct@critical_section_v4@internal@tbb@@QAEXXZ
-
-
-
-?internal_grow_predicate@hash_map_segment_base@internal@tbb@@QBE_NXZ
-
-
-?advance@concurrent_queue_iterator_base@internal@tbb@@IAEXXZ
-?assign@concurrent_queue_iterator_base@internal@tbb@@IAEXABV123@@Z
-?internal_size@concurrent_queue_base@internal@tbb@@IBEHXZ
-??0concurrent_queue_base@internal@tbb@@IAE@I@Z
-??0concurrent_queue_iterator_base@internal@tbb@@IAE@ABVconcurrent_queue_base@12@@Z
-??1concurrent_queue_base@internal@tbb@@MAE@XZ
-??1concurrent_queue_iterator_base@internal@tbb@@IAE@XZ
-?internal_pop@concurrent_queue_base@internal@tbb@@IAEXPAX@Z
-?internal_pop_if_present@concurrent_queue_base@internal@tbb@@IAE_NPAX@Z
-?internal_push@concurrent_queue_base@internal@tbb@@IAEXPBX@Z
-?internal_push_if_not_full@concurrent_queue_base@internal@tbb@@IAE_NPBX@Z
-?internal_set_capacity@concurrent_queue_base@internal@tbb@@IAEXHI@Z
-
-
-
-??1concurrent_queue_iterator_base_v3@internal@tbb@@IAE@XZ
-??0concurrent_queue_iterator_base_v3@internal@tbb@@IAE@ABVconcurrent_queue_base_v3@12@@Z
-??0concurrent_queue_iterator_base_v3@internal@tbb@@IAE@ABVconcurrent_queue_base_v3@12@I@Z
-?advance@concurrent_queue_iterator_base_v3@internal@tbb@@IAEXXZ
-?assign@concurrent_queue_iterator_base_v3@internal@tbb@@IAEXABV123@@Z
-??0concurrent_queue_base_v3@internal@tbb@@IAE@I@Z
-??1concurrent_queue_base_v3@internal@tbb@@MAE@XZ
-?internal_pop@concurrent_queue_base_v3@internal@tbb@@IAEXPAX@Z
-?internal_pop_if_present@concurrent_queue_base_v3@internal@tbb@@IAE_NPAX@Z
-?internal_abort@concurrent_queue_base_v3@internal@tbb@@IAEXXZ
-?internal_push@concurrent_queue_base_v3@internal@tbb@@IAEXPBX@Z
-?internal_push_move@concurrent_queue_base_v8@internal@tbb@@IAEXPBX@Z
-?internal_push_if_not_full@concurrent_queue_base_v3@internal@tbb@@IAE_NPBX@Z
-?internal_push_move_if_not_full@concurrent_queue_base_v8@internal@tbb@@IAE_NPBX@Z
-?internal_size@concurrent_queue_base_v3@internal@tbb@@IBEHXZ
-?internal_empty@concurrent_queue_base_v3@internal@tbb@@IBE_NXZ
-?internal_set_capacity@concurrent_queue_base_v3@internal@tbb@@IAEXHI@Z
-?internal_finish_clear@concurrent_queue_base_v3@internal@tbb@@IAEXXZ
-?internal_throw_exception@concurrent_queue_base_v3@internal@tbb@@IBEXXZ
-?assign@concurrent_queue_base_v3@internal@tbb@@IAEXABV123@@Z
-?move_content@concurrent_queue_base_v8@internal@tbb@@IAEXAAV123@@Z
-
-
-
-?internal_assign@concurrent_vector_base@internal@tbb@@IAEXABV123@IP6AXPAXI@ZP6AX1PBXI@Z4@Z
-?internal_capacity@concurrent_vector_base@internal@tbb@@IBEIXZ
-?internal_clear@concurrent_vector_base@internal@tbb@@IAEXP6AXPAXI@Z_N@Z
-?internal_copy@concurrent_vector_base@internal@tbb@@IAEXABV123@IP6AXPAXPBXI@Z@Z
-?internal_grow_by@concurrent_vector_base@internal@tbb@@IAEIIIP6AXPAXI@Z@Z
-?internal_grow_to_at_least@concurrent_vector_base@internal@tbb@@IAEXIIP6AXPAXI@Z@Z
-?internal_push_back@concurrent_vector_base@internal@tbb@@IAEPAXIAAI@Z
-?internal_reserve@concurrent_vector_base@internal@tbb@@IAEXIII@Z
-
-
-
-??1concurrent_vector_base_v3@internal@tbb@@IAE@XZ
-?internal_assign@concurrent_vector_base_v3@internal@tbb@@IAEXABV123@IP6AXPAXI@ZP6AX1PBXI@Z4@Z
-?internal_capacity@concurrent_vector_base_v3@internal@tbb@@IBEIXZ
-?internal_clear@concurrent_vector_base_v3@internal@tbb@@IAEIP6AXPAXI@Z@Z
-?internal_copy@concurrent_vector_base_v3@internal@tbb@@IAEXABV123@IP6AXPAXPBXI@Z@Z
-?internal_grow_by@concurrent_vector_base_v3@internal@tbb@@IAEIIIP6AXPAXPBXI@Z1@Z
-?internal_grow_to_at_least@concurrent_vector_base_v3@internal@tbb@@IAEXIIP6AXPAXPBXI@Z1@Z
-?internal_push_back@concurrent_vector_base_v3@internal@tbb@@IAEPAXIAAI@Z
-?internal_reserve@concurrent_vector_base_v3@internal@tbb@@IAEXIII@Z
-?internal_compact@concurrent_vector_base_v3@internal@tbb@@IAEPAXIPAXP6AX0I@ZP6AX0PBXI@Z@Z
-?internal_swap@concurrent_vector_base_v3@internal@tbb@@IAEXAAV123@@Z
-?internal_throw_exception@concurrent_vector_base_v3@internal@tbb@@IBEXI@Z
-?internal_resize@concurrent_vector_base_v3@internal@tbb@@IAEXIIIPBXP6AXPAXI@ZP6AX10I@Z@Z
-?internal_grow_to_at_least_with_result@concurrent_vector_base_v3@internal@tbb@@IAEIIIP6AXPAXPBXI@Z1@Z
-
-
-?join@tbb_thread_v3@internal@tbb@@QAEXXZ
-?detach@tbb_thread_v3@internal@tbb@@QAEXXZ
-?internal_start@tbb_thread_v3@internal@tbb@@AAEXP6GIPAX@Z0@Z
-?allocate_closure_v3@internal@tbb@@YAPAXI@Z
-?free_closure_v3@internal@tbb@@YAXPAX@Z
-?hardware_concurrency@tbb_thread_v3@internal@tbb@@SAIXZ
-?thread_yield_v3@internal@tbb@@YAXXZ
-?thread_sleep_v3@internal@tbb@@YAXABVinterval_t@tick_count@2@@Z
-?move_v3@internal@tbb@@YAXAAVtbb_thread_v3@12@0@Z
-?thread_get_id_v3@internal@tbb@@YA?AVid@tbb_thread_v3@12@XZ
-
-
-?internal_initialize_condition_variable@internal@interface5@tbb@@YAXAATcondvar_impl_t@123@@Z
-?internal_condition_variable_wait@internal@interface5@tbb@@YA_NAATcondvar_impl_t@123@PAVmutex@3@PBVinterval_t@tick_count@3@@Z
-?internal_condition_variable_notify_one@internal@interface5@tbb@@YAXAATcondvar_impl_t@123@@Z
-?internal_condition_variable_notify_all@internal@interface5@tbb@@YAXAATcondvar_impl_t@123@@Z
-?internal_destroy_condition_variable@internal@interface5@tbb@@YAXAATcondvar_impl_t@123@@Z
-
-
-?active_value@global_control@interface9@tbb@@CAIH@Z
-?internal_create@global_control@interface9@tbb@@AAEXXZ
-?internal_destroy@global_control@interface9@tbb@@AAEXXZ
-
-
-
-
-
-
diff --git a/lib/3rdParty/tbb/lib/x86/v140/tbb.lib b/lib/3rdParty/tbb/lib/x86/v140/tbb.lib
deleted file mode 100644
index 07aca347..00000000
Binary files a/lib/3rdParty/tbb/lib/x86/v140/tbb.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/lib/x86/v140/tbb_debug.lib b/lib/3rdParty/tbb/lib/x86/v140/tbb_debug.lib
deleted file mode 100644
index 1dee6789..00000000
Binary files a/lib/3rdParty/tbb/lib/x86/v140/tbb_debug.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/lib/x86/v140/tbb_preview.lib b/lib/3rdParty/tbb/lib/x86/v140/tbb_preview.lib
deleted file mode 100644
index da1cfbf8..00000000
Binary files a/lib/3rdParty/tbb/lib/x86/v140/tbb_preview.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/lib/x86/v140/tbbmalloc.def b/lib/3rdParty/tbb/lib/x86/v140/tbbmalloc.def
deleted file mode 100644
index ac00e3f2..00000000
--- a/lib/3rdParty/tbb/lib/x86/v140/tbbmalloc.def
+++ /dev/null
@@ -1,48 +0,0 @@
-; Copyright (c) 2005-2017 Intel Corporation
-;
-; Licensed under the Apache License, Version 2.0 (the "License");
-; you may not use this file except in compliance with the License.
-; You may obtain a copy of the License at
-;
-;     http://www.apache.org/licenses/LICENSE-2.0
-;
-; Unless required by applicable law or agreed to in writing, software
-; distributed under the License is distributed on an "AS IS" BASIS,
-; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-; See the License for the specific language governing permissions and
-; limitations under the License.
-;
-;
-;
-;
-
-
-EXPORTS
-
-; frontend.cpp
-scalable_calloc
-scalable_free
-scalable_malloc
-scalable_realloc
-scalable_posix_memalign
-scalable_aligned_malloc
-scalable_aligned_realloc
-scalable_aligned_free
-scalable_msize
-scalable_allocation_mode
-scalable_allocation_command
-__TBB_malloc_safer_free
-__TBB_malloc_safer_realloc
-__TBB_malloc_safer_msize
-__TBB_malloc_safer_aligned_msize
-__TBB_malloc_safer_aligned_realloc
-?pool_create@rml@@YAPAVMemoryPool@1@HPBUMemPoolPolicy@1@@Z
-?pool_create_v1@rml@@YA?AW4MemPoolError@1@HPBUMemPoolPolicy@1@PAPAVMemoryPool@1@@Z
-?pool_destroy@rml@@YA_NPAVMemoryPool@1@@Z
-?pool_malloc@rml@@YAPAXPAVMemoryPool@1@I@Z
-?pool_free@rml@@YA_NPAVMemoryPool@1@PAX@Z
-?pool_reset@rml@@YA_NPAVMemoryPool@1@@Z
-?pool_realloc@rml@@YAPAXPAVMemoryPool@1@PAXI@Z
-?pool_aligned_realloc@rml@@YAPAXPAVMemoryPool@1@PAXII@Z
-?pool_aligned_malloc@rml@@YAPAXPAVMemoryPool@1@II@Z
-?pool_identify@rml@@YAPAVMemoryPool@1@PAX@Z
diff --git a/lib/3rdParty/tbb/lib/x86/v140/tbbmalloc.lib b/lib/3rdParty/tbb/lib/x86/v140/tbbmalloc.lib
deleted file mode 100644
index d293df95..00000000
Binary files a/lib/3rdParty/tbb/lib/x86/v140/tbbmalloc.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/lib/x86/v140/tbbmalloc_proxy.lib b/lib/3rdParty/tbb/lib/x86/v140/tbbmalloc_proxy.lib
deleted file mode 100644
index e76e38e5..00000000
Binary files a/lib/3rdParty/tbb/lib/x86/v140/tbbmalloc_proxy.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/lib/x86/v140/tbbproxy.lib b/lib/3rdParty/tbb/lib/x86/v140/tbbproxy.lib
deleted file mode 100644
index bb6c5da4..00000000
Binary files a/lib/3rdParty/tbb/lib/x86/v140/tbbproxy.lib and /dev/null differ
diff --git a/lib/3rdParty/tbb/tbb.props b/lib/3rdParty/tbb/tbb.props
deleted file mode 100644
index 6d0e22c5..00000000
--- a/lib/3rdParty/tbb/tbb.props
+++ /dev/null
@@ -1,19 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ImportGroup Label="PropertySheets" />
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup />
-  <ItemDefinitionGroup>
-    <ClCompile>
-      <AdditionalIncludeDirectories>$(SolutionDir)lib\3rdParty\tbb\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <AdditionalLibraryDirectories>$(SolutionDir)lib\3rdParty\tbb\lib\$(PlatformShortName)\$(PlatformToolset);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>tbb.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <PreBuildEvent>
-      <Command>xcopy /I /E /Y /D /C "$(SolutionDir)lib\3rdParty\tbb\bin\$(PlatformShortName)\$(PlatformToolset)\tbb.dll" "$(OutDir)"</Command>
-    </PreBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemGroup />
-</Project>
\ No newline at end of file
diff --git a/lib/3rdParty/tbb/tbb_d.props b/lib/3rdParty/tbb/tbb_d.props
deleted file mode 100644
index 4b300f96..00000000
--- a/lib/3rdParty/tbb/tbb_d.props
+++ /dev/null
@@ -1,19 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ImportGroup Label="PropertySheets" />
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup />
-  <ItemDefinitionGroup>
-    <ClCompile>
-      <AdditionalIncludeDirectories>$(SolutionDir)lib\3rdParty\tbb\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <AdditionalLibraryDirectories>$(SolutionDir)lib\3rdParty\tbb\lib\$(PlatformShortName)\$(PlatformToolset);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>tbb_debug.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <PreBuildEvent>
-      <Command>xcopy /I /E /Y /D /C "$(SolutionDir)lib\3rdParty\tbb\bin\$(PlatformShortName)\$(PlatformToolset)\tbb_debug.dll" "$(OutDir)"</Command>
-    </PreBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemGroup />
-</Project>
\ No newline at end of file
diff --git a/lib/local/CppInerop/CppInerop.vcxproj b/lib/local/CppInerop/CppInerop.vcxproj
index 8b13fa5e..e5f9d092 100644
--- a/lib/local/CppInerop/CppInerop.vcxproj
+++ b/lib/local/CppInerop/CppInerop.vcxproj
@@ -64,33 +64,29 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
     <Import Project="..\..\3rdParty\boost\boost_d.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb_d.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
     <Import Project="..\..\3rdParty\boost\boost.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
     <Import Project="..\..\3rdParty\boost\boost_d.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb_d.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
     <Import Project="..\..\3rdParty\boost\boost.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
diff --git a/lib/local/FaceAnalyser/CMakeLists.txt b/lib/local/FaceAnalyser/CMakeLists.txt
index ef2b7d57..a55f4dc0 100644
--- a/lib/local/FaceAnalyser/CMakeLists.txt
+++ b/lib/local/FaceAnalyser/CMakeLists.txt
@@ -33,12 +33,11 @@ target_include_directories(FaceAnalyser PUBLIC
     $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include/OpenFace>)
 
 target_include_directories(FaceAnalyser PRIVATE ${FACEANALYSER_SOURCE_DIR}/include)
-target_include_directories(FaceAnalyser PUBLIC ${TBB_ROOT_DIR}/include)
 target_include_directories(FaceAnalyser PUBLIC ${Boost_INCLUDE_DIRS})
 target_include_directories(FaceAnalyser PUBLIC ${Boost_INCLUDE_DIRS}/boost)
 target_include_directories(FaceAnalyser PUBLIC ${OpenCV_INCLUDE_DIRS})
 
-target_link_libraries(FaceAnalyser PUBLIC ${OpenCV_LIBS} ${Boost_LIBRARIES} ${TBB_LIBRARIES} ${OpenBLAS_LIB})
+target_link_libraries(FaceAnalyser PUBLIC ${OpenCV_LIBS} ${Boost_LIBRARIES} ${OpenBLAS_LIB})
 target_link_libraries(FaceAnalyser PUBLIC dlib::dlib)
 
 target_include_directories(FaceAnalyser PRIVATE ${OpenBLAS_INCLUDE_DIR})
diff --git a/lib/local/FaceAnalyser/FaceAnalyser.vcxproj b/lib/local/FaceAnalyser/FaceAnalyser.vcxproj
index 87f344c9..6c5c774d 100644
--- a/lib/local/FaceAnalyser/FaceAnalyser.vcxproj
+++ b/lib/local/FaceAnalyser/FaceAnalyser.vcxproj
@@ -56,34 +56,30 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb_d.props" />
     <Import Project="..\..\3rdParty\boost\boost_d.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
     <Import Project="..\..\3rdParty\boost\boost_d.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb_d.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb.props" />
     <Import Project="..\..\3rdParty\boost\boost.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb.props" />
     <Import Project="..\..\3rdParty\boost\boost.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
diff --git a/lib/local/GazeAnalyser/CMakeLists.txt b/lib/local/GazeAnalyser/CMakeLists.txt
index d98974a2..144ba046 100644
--- a/lib/local/GazeAnalyser/CMakeLists.txt
+++ b/lib/local/GazeAnalyser/CMakeLists.txt
@@ -20,12 +20,11 @@ target_include_directories(GazeAnalyser PUBLIC
     $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include/OpenFace>)
 
 target_include_directories(GazeAnalyser PRIVATE ${GAZEANALYSER_SOURCE_DIR}/include)
-target_include_directories(GazeAnalyser PUBLIC ${TBB_ROOT_DIR}/include)
 target_include_directories(GazeAnalyser PUBLIC ${Boost_INCLUDE_DIRS})
 target_include_directories(GazeAnalyser PUBLIC ${Boost_INCLUDE_DIRS}/boost)
 target_include_directories(GazeAnalyser PUBLIC ${OpenCV_INCLUDE_DIRS})
 
-target_link_libraries(GazeAnalyser PUBLIC ${OpenCV_LIBS} ${Boost_LIBRARIES} ${TBB_LIBRARIES} ${OpenBLAS_LIB})
+target_link_libraries(GazeAnalyser PUBLIC ${OpenCV_LIBS} ${Boost_LIBRARIES} ${OpenBLAS_LIB})
 target_link_libraries(GazeAnalyser PUBLIC dlib::dlib)
 
 target_include_directories(GazeAnalyser PRIVATE ${OpenBLAS_INCLUDE_DIR})
diff --git a/lib/local/GazeAnalyser/GazeAnalyser.vcxproj b/lib/local/GazeAnalyser/GazeAnalyser.vcxproj
index 9a964dfd..7c5c45b6 100644
--- a/lib/local/GazeAnalyser/GazeAnalyser.vcxproj
+++ b/lib/local/GazeAnalyser/GazeAnalyser.vcxproj
@@ -57,31 +57,27 @@
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-    <Import Project="..\..\3rdParty\tbb\tbb_d.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-    <Import Project="..\..\3rdParty\tbb\tbb.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-    <Import Project="..\..\3rdParty\tbb\tbb_d.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-    <Import Project="..\..\3rdParty\tbb\tbb.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
diff --git a/lib/local/LandmarkDetector/CMakeLists.txt b/lib/local/LandmarkDetector/CMakeLists.txt
index 21d900c9..adc26239 100644
--- a/lib/local/LandmarkDetector/CMakeLists.txt
+++ b/lib/local/LandmarkDetector/CMakeLists.txt
@@ -45,12 +45,11 @@ target_include_directories(LandmarkDetector PUBLIC
     $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include/OpenFace>)
 
 target_include_directories(LandmarkDetector PRIVATE ${LandmarkDetector_SOURCE_DIR}/include)
-target_include_directories(LandmarkDetector PUBLIC ${TBB_ROOT_DIR}/include)
 target_include_directories(LandmarkDetector PUBLIC ${Boost_INCLUDE_DIRS})
 target_include_directories(LandmarkDetector PUBLIC ${Boost_INCLUDE_DIRS}/boost)
 target_include_directories(LandmarkDetector PUBLIC ${OpenCV_INCLUDE_DIRS})
 
-target_link_libraries(LandmarkDetector PUBLIC ${OpenCV_LIBS} ${Boost_LIBRARIES} ${TBB_LIBRARIES} ${OpenBLAS_LIB})
+target_link_libraries(LandmarkDetector PUBLIC ${OpenCV_LIBS} ${Boost_LIBRARIES} ${OpenBLAS_LIB})
 target_link_libraries(LandmarkDetector PUBLIC dlib::dlib)
 
 target_include_directories(LandmarkDetector PRIVATE ${OpenBLAS_INCLUDE_DIR})
diff --git a/lib/local/LandmarkDetector/LandmarkDetector.vcxproj b/lib/local/LandmarkDetector/LandmarkDetector.vcxproj
index cf50f08b..8ddfae86 100644
--- a/lib/local/LandmarkDetector/LandmarkDetector.vcxproj
+++ b/lib/local/LandmarkDetector/LandmarkDetector.vcxproj
@@ -54,33 +54,29 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\boost\boost.props" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\boost\boost.props" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
     <Import Project="..\..\3rdParty\boost\boost_d.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb_d.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
     <Import Project="..\..\3rdParty\boost\boost_d.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb_d.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup>
diff --git a/lib/local/LandmarkDetector/include/LandmarkDetectorParameters.h b/lib/local/LandmarkDetector/include/LandmarkDetectorParameters.h
index 4ed5b24b..71f0b32f 100644
--- a/lib/local/LandmarkDetector/include/LandmarkDetectorParameters.h
+++ b/lib/local/LandmarkDetector/include/LandmarkDetectorParameters.h
@@ -99,9 +99,6 @@ struct FaceModelParameters
 	string mtcnn_face_detector_location;
 	FaceDetector curr_face_detector;
 
-	// Should the results be visualised and reported to console
-	bool quiet_mode;
-
 	// Should the model be refined hierarchically (if available)
 	bool refine_hierarchical;
 
diff --git a/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp b/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp
index 886d5a88..5ba1c3eb 100644
--- a/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp
+++ b/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp
@@ -40,9 +40,6 @@
 #include <opencv2/core/core.hpp>
 #include <opencv2/imgproc.hpp>
 
-// TBB includes
-#include <tbb/tbb.h>
-
 // System includes
 #include <fstream>
 
@@ -681,7 +678,6 @@ bool FaceDetectorMTCNN::DetectFaces(vector<cv::Rect_<float> >& o_regions, const
 	vector<vector<float> > scores_cross_scale(num_scales);
 	vector<vector<cv::Rect_<float> > > proposal_corrections_cross_scale(num_scales);
 
-	//tbb::parallel_for(0, (int)num_scales, [&](int i) {
 	for (int i = 0; i < num_scales; ++i)
 	{
 		double scale = ((double)face_support / (double)min_face_size)*cv::pow(pyramid_factor, i);
@@ -746,7 +742,7 @@ bool FaceDetectorMTCNN::DetectFaces(vector<cv::Rect_<float> >& o_regions, const
 	// Creating proposal images from previous step detections
 	vector<bool> above_thresh;
 	above_thresh.resize(proposal_boxes_all.size(), false);
-	//tbb::parallel_for(0, (int)proposal_boxes_all.size(), [&](int k) {
+
 	for (size_t k = 0; k < proposal_boxes_all.size(); ++k) 
 	{
 		float width_target = proposal_boxes_all[k].width + 1;
@@ -819,7 +815,7 @@ bool FaceDetectorMTCNN::DetectFaces(vector<cv::Rect_<float> >& o_regions, const
 	// Preparing for the ONet stage
 	above_thresh.clear();
 	above_thresh.resize(proposal_boxes_all.size());
-	//tbb::parallel_for(0, (int)proposal_boxes_all.size(), [&](int k) {
+
 	for (size_t k = 0; k < proposal_boxes_all.size(); ++k)
 	{
 		float width_target = proposal_boxes_all[k].width + 1;
diff --git a/lib/local/LandmarkDetector/src/LandmarkDetectionValidator.cpp b/lib/local/LandmarkDetector/src/LandmarkDetectionValidator.cpp
index 13f2e6f0..9e30b3be 100644
--- a/lib/local/LandmarkDetector/src/LandmarkDetectionValidator.cpp
+++ b/lib/local/LandmarkDetector/src/LandmarkDetectionValidator.cpp
@@ -40,9 +40,6 @@
 #include <opencv2/core/core.hpp>
 #include <opencv2/imgproc.hpp>
 
-// TBB includes
-#include <tbb/tbb.h>
-
 // System includes
 #include <fstream>
 
diff --git a/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp b/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp
index 136fd591..5416dd88 100644
--- a/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp
+++ b/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp
@@ -40,9 +40,6 @@
 #include <filesystem.hpp>
 #include <filesystem/fstream.hpp>
 
-// TBB includes
-#include <tbb/tbb.h>
-
 // Local includes
 #include <LandmarkDetectorUtils.h>
 #include <RotationHelpers.h>
@@ -646,41 +643,43 @@ bool CLNF::DetectLandmarks(const cv::Mat_<uchar> &image, FaceModelParameters& pa
 		bool parts_used = false;		
 
 		// Do the hierarchical models in parallel
-		tbb::parallel_for(0, (int)hierarchical_models.size(), [&](int part_model){
-		{
-
-			int n_part_points = hierarchical_models[part_model].pdm.NumberOfPoints();
-
-			vector<pair<int, int>> mappings = this->hierarchical_mapping[part_model];
-
-			cv::Mat_<float> part_model_locs(n_part_points * 2, 1, 0.0f);
-
-			// Extract the corresponding landmarks
-			for (size_t mapping_ind = 0; mapping_ind < mappings.size(); ++mapping_ind)
+		parallel_for_(cv::Range(0, hierarchical_models.size()), [&](const cv::Range& range) {
+			for (int part_model = range.start; part_model < range.end; part_model++)
 			{
-				part_model_locs.at<float>(mappings[mapping_ind].second) = detected_landmarks.at<float>(mappings[mapping_ind].first);
-				part_model_locs.at<float>(mappings[mapping_ind].second + n_part_points) = detected_landmarks.at<float>(mappings[mapping_ind].first + this->pdm.NumberOfPoints());
+				
+				int n_part_points = hierarchical_models[part_model].pdm.NumberOfPoints();
+
+				vector<pair<int, int>> mappings = this->hierarchical_mapping[part_model];
+
+				cv::Mat_<float> part_model_locs(n_part_points * 2, 1, 0.0f);
+
+				// Extract the corresponding landmarks
+				for (size_t mapping_ind = 0; mapping_ind < mappings.size(); ++mapping_ind)
+				{
+					part_model_locs.at<float>(mappings[mapping_ind].second) = detected_landmarks.at<float>(mappings[mapping_ind].first);
+					part_model_locs.at<float>(mappings[mapping_ind].second + n_part_points) = detected_landmarks.at<float>(mappings[mapping_ind].first + this->pdm.NumberOfPoints());
+				}
+
+				// Fit the part based model PDM
+				hierarchical_models[part_model].pdm.CalcParams(hierarchical_models[part_model].params_global, hierarchical_models[part_model].params_local, part_model_locs);
+
+				// Only do this if we don't need to upsample
+				if (params_global[0] > 0.9 * hierarchical_models[part_model].patch_experts.patch_scaling[0])
+				{
+					parts_used = true;
+
+					this->hierarchical_params[part_model].window_sizes_current = this->hierarchical_params[part_model].window_sizes_init;
+
+					// Do the actual landmark detection
+					hierarchical_models[part_model].DetectLandmarks(image, hierarchical_params[part_model]);
+
+				}
+				else
+				{
+					hierarchical_models[part_model].pdm.CalcShape2D(hierarchical_models[part_model].detected_landmarks, hierarchical_models[part_model].params_local, hierarchical_models[part_model].params_global);
+				}
+		
 			}
-
-			// Fit the part based model PDM
-			hierarchical_models[part_model].pdm.CalcParams(hierarchical_models[part_model].params_global, hierarchical_models[part_model].params_local, part_model_locs);
-
-			// Only do this if we don't need to upsample
-			if (params_global[0] > 0.9 * hierarchical_models[part_model].patch_experts.patch_scaling[0])
-			{
-				parts_used = true;
-
-				this->hierarchical_params[part_model].window_sizes_current = this->hierarchical_params[part_model].window_sizes_init;
-
-				// Do the actual landmark detection
-				hierarchical_models[part_model].DetectLandmarks(image, hierarchical_params[part_model]);
-
-			}
-			else
-			{
-				hierarchical_models[part_model].pdm.CalcShape2D(hierarchical_models[part_model].detected_landmarks, hierarchical_models[part_model].params_local, hierarchical_models[part_model].params_global);
-			}
-		}
 		});
 
 		// Recompute main model based on the fit part models
diff --git a/lib/local/LandmarkDetector/src/LandmarkDetectorParameters.cpp b/lib/local/LandmarkDetector/src/LandmarkDetectorParameters.cpp
index 95233d56..97245808 100644
--- a/lib/local/LandmarkDetector/src/LandmarkDetectorParameters.cpp
+++ b/lib/local/LandmarkDetector/src/LandmarkDetectorParameters.cpp
@@ -150,13 +150,6 @@ FaceModelParameters::FaceModelParameters(vector<string> &arguments)
 			valid[i + 1] = false;
 			i++;
 		}
-		else if (arguments[i].compare("-q") == 0)
-		{
-
-			quiet_mode = true;
-
-			valid[i] = false;
-		}
 		else if (arguments[i].compare("-wild") == 0)
 		{
 			// For in the wild fitting these parameters are suitable
@@ -345,7 +338,6 @@ void FaceModelParameters::init()
 	// Face detection
 	haar_face_detector_location = "classifiers/haarcascade_frontalface_alt.xml";
 	mtcnn_face_detector_location = "model/mtcnn_detector/MTCNN_detector.txt";
-	quiet_mode = false;
 
 	// By default use MTCNN
 	curr_face_detector = MTCNN_DETECTOR;
diff --git a/lib/local/LandmarkDetector/src/Patch_experts.cpp b/lib/local/LandmarkDetector/src/Patch_experts.cpp
index 9ceb3c75..58ee67e4 100644
--- a/lib/local/LandmarkDetector/src/Patch_experts.cpp
+++ b/lib/local/LandmarkDetector/src/Patch_experts.cpp
@@ -38,9 +38,6 @@
 
 #include "RotationHelpers.h"
 
-// TBB includes
-#include <tbb/tbb.h>
-
 // Math includes
 #define _USE_MATH_DEFINES
 #include <cmath>
@@ -212,133 +209,129 @@ void Patch_experts::Response(vector<cv::Mat_<float> >& patch_expert_responses, c
 	// We do not want to create threads for invisible landmarks, so construct an index of visible ones
 	std::vector<int> vis_lmk = Collect_visible_landmarks(visibilities, scale, view_id, n);
 
-	// calculate the patch responses for every landmark, Actual work happens here. If openMP is turned on it is possible to do this in parallel,
-	// this might work well on some machines, while potentially have an adverse effect on others
-	//#ifdef _OPENMP
-	//#pragma omp parallel for
-	//#endif
-	tbb::parallel_for(0, (int)vis_lmk.size(), [&](int i) {
-	//for (int i = 0; i < vis_lmk.size(); i++)
-	{
-
-		// Work out how big the area of interest has to be to get a response of window size
-		int area_of_interest_width;
-		int area_of_interest_height;
-		int ind = vis_lmk.at(i);
-
-		if (use_cen)
-		{
-			area_of_interest_width = window_size + cen_expert_intensity[scale][view_id][ind].width_support - 1;
-			area_of_interest_height = window_size + cen_expert_intensity[scale][view_id][ind].height_support - 1;
-		}
-		else if (use_ccnf)
-		{
-			area_of_interest_width = window_size + ccnf_expert_intensity[scale][view_id][ind].width - 1;
-			area_of_interest_height = window_size + ccnf_expert_intensity[scale][view_id][ind].height - 1;
-		}
-		else
-		{
-			area_of_interest_width = window_size + svr_expert_intensity[scale][view_id][ind].width - 1;
-			area_of_interest_height = window_size + svr_expert_intensity[scale][view_id][ind].height - 1;
-		}
-
-		// scale and rotate to mean shape to reference frame
-		cv::Mat sim = (cv::Mat_<float>(2, 3) << a1, -b1, landmark_locations.at<float>(ind, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f + b1 * (area_of_interest_width - 1.0f) / 2.0f, b1, a1, landmark_locations.at<float>(ind + n, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f - b1 * (area_of_interest_width - 1.0f) / 2.0f);
-
-		// Extract the region of interest around the current landmark location
-		cv::Mat_<float> area_of_interest(area_of_interest_height, area_of_interest_width, 0.0f);
-
-		cv::warpAffine(grayscale_image, area_of_interest, sim, area_of_interest.size(), cv::WARP_INVERSE_MAP + cv::INTER_LINEAR);
-
-		// Get intensity response either from the SVR, CCNF, or CEN patch experts (prefer CEN as they are the most accurate so far)
-		if (!cen_expert_intensity.empty())
+	// calculate the patch responses for every landmark (this is the heavy lifting of landmark detection)
+	parallel_for_(cv::Range(0, vis_lmk.size()), [&](const cv::Range& range) {
+		for (int i = range.start; i < range.end; i++)
 		{
 
-			int im2col_size = (area_of_interest_width * area_of_interest_height - 1) / 2;
+			// Work out how big the area of interest has to be to get a response of window size
+			int area_of_interest_width;
+			int area_of_interest_height;
+			int ind = vis_lmk.at(i);
 
-			cv::Mat_<float> prealloc_mat = preallocated_im2col[ind][im2col_size];
-
-			// If frontal view we can do mirrored landmarks together
-			if (view_id == 0)
+			if (use_cen)
 			{
-				// If the patch expert does not have values, means it's a mirrored version and will be done in another part of a loop
-				if (!cen_expert_intensity[scale][view_id][ind].biases.empty())
-				{
-					// No mirrored expert, so do normally
-					int mirror_id = mirror_inds.at<int>(ind);
-					if (mirror_id == ind)
-					{
-						cv::Mat_<float> empty(0,0,0.0f);
-						cen_expert_intensity[scale][view_id][ind].ResponseSparse(area_of_interest, empty, patch_expert_responses[ind], empty, interp_mat, prealloc_mat, empty);
-					}
-					else
-					{
-
-						// Grab mirrored area of interest
-
-						// scale and rotate to mean shape to reference frame
-						cv::Mat sim_r = (cv::Mat_<float>(2, 3) << a1, -b1, landmark_locations.at<float>(mirror_id, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f + b1 * (area_of_interest_width - 1.0f) / 2.0f, b1, a1, landmark_locations.at<float>(mirror_id + n, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f - b1 * (area_of_interest_width - 1.0f) / 2.0f);
-
-						// Extract the region of interest around the current landmark location
-						cv::Mat_<float> area_of_interest_r(area_of_interest_height, area_of_interest_width, 0.0f);
-
-						cv::warpAffine(grayscale_image, area_of_interest_r, sim_r, area_of_interest_r.size(), cv::WARP_INVERSE_MAP + cv::INTER_LINEAR);
-
-						cv::Mat_<float> prealloc_mat_right = preallocated_im2col[mirror_id][im2col_size];
-
-						cen_expert_intensity[scale][view_id][ind].ResponseSparse(area_of_interest, area_of_interest_r, patch_expert_responses[ind], patch_expert_responses[mirror_id], interp_mat, prealloc_mat, prealloc_mat_right);
-
-						preallocated_im2col[mirror_id][im2col_size] = prealloc_mat_right;
-
-					}
-				}
+				area_of_interest_width = window_size + cen_expert_intensity[scale][view_id][ind].width_support - 1;
+				area_of_interest_height = window_size + cen_expert_intensity[scale][view_id][ind].height_support - 1;
+			}
+			else if (use_ccnf)
+			{
+				area_of_interest_width = window_size + ccnf_expert_intensity[scale][view_id][ind].width - 1;
+				area_of_interest_height = window_size + ccnf_expert_intensity[scale][view_id][ind].height - 1;
 			}
 			else
 			{
-				// For space and memory saving use a mirrored patch expert
-				if (!cen_expert_intensity[scale][view_id][ind].biases.empty())
+				area_of_interest_width = window_size + svr_expert_intensity[scale][view_id][ind].width - 1;
+				area_of_interest_height = window_size + svr_expert_intensity[scale][view_id][ind].height - 1;
+			}
+
+			// scale and rotate to mean shape to reference frame
+			cv::Mat sim = (cv::Mat_<float>(2, 3) << a1, -b1, landmark_locations.at<float>(ind, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f + b1 * (area_of_interest_width - 1.0f) / 2.0f, b1, a1, landmark_locations.at<float>(ind + n, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f - b1 * (area_of_interest_width - 1.0f) / 2.0f);
+
+			// Extract the region of interest around the current landmark location
+			cv::Mat_<float> area_of_interest(area_of_interest_height, area_of_interest_width, 0.0f);
+
+			cv::warpAffine(grayscale_image, area_of_interest, sim, area_of_interest.size(), cv::WARP_INVERSE_MAP + cv::INTER_LINEAR);
+
+			// Get intensity response either from the SVR, CCNF, or CEN patch experts (prefer CEN as they are the most accurate so far)
+			if (!cen_expert_intensity.empty())
+			{
+
+				int im2col_size = (area_of_interest_width * area_of_interest_height - 1) / 2;
+
+				cv::Mat_<float> prealloc_mat = preallocated_im2col[ind][im2col_size];
+
+				// If frontal view we can do mirrored landmarks together
+				if (view_id == 0)
 				{
-					cv::Mat_<float> empty(0, 0, 0.0f);
-					cen_expert_intensity[scale][view_id][ind].ResponseSparse(area_of_interest, empty, patch_expert_responses[ind], empty, interp_mat, prealloc_mat, empty);
-					
-					// A slower, but slightly more accurate version
-					//cen_expert_intensity[scale][view_id][ind].Response(area_of_interest, patch_expert_responses[ind]);
+					// If the patch expert does not have values, means it's a mirrored version and will be done in another part of a loop
+					if (!cen_expert_intensity[scale][view_id][ind].biases.empty())
+					{
+						// No mirrored expert, so do normally
+						int mirror_id = mirror_inds.at<int>(ind);
+						if (mirror_id == ind)
+						{
+							cv::Mat_<float> empty(0, 0, 0.0f);
+							cen_expert_intensity[scale][view_id][ind].ResponseSparse(area_of_interest, empty, patch_expert_responses[ind], empty, interp_mat, prealloc_mat, empty);
+						}
+						else
+						{
+
+							// Grab mirrored area of interest
+
+							// scale and rotate to mean shape to reference frame
+							cv::Mat sim_r = (cv::Mat_<float>(2, 3) << a1, -b1, landmark_locations.at<float>(mirror_id, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f + b1 * (area_of_interest_width - 1.0f) / 2.0f, b1, a1, landmark_locations.at<float>(mirror_id + n, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f - b1 * (area_of_interest_width - 1.0f) / 2.0f);
+
+							// Extract the region of interest around the current landmark location
+							cv::Mat_<float> area_of_interest_r(area_of_interest_height, area_of_interest_width, 0.0f);
+
+							cv::warpAffine(grayscale_image, area_of_interest_r, sim_r, area_of_interest_r.size(), cv::WARP_INVERSE_MAP + cv::INTER_LINEAR);
+
+							cv::Mat_<float> prealloc_mat_right = preallocated_im2col[mirror_id][im2col_size];
+
+							cen_expert_intensity[scale][view_id][ind].ResponseSparse(area_of_interest, area_of_interest_r, patch_expert_responses[ind], patch_expert_responses[mirror_id], interp_mat, prealloc_mat, prealloc_mat_right);
+
+							preallocated_im2col[mirror_id][im2col_size] = prealloc_mat_right;
+
+						}
+					}
 				}
 				else
 				{
-					cv::Mat_<float> empty(0, 0, 0.0f);
-					cen_expert_intensity[scale][mirror_views.at<int>(view_id)][mirror_inds.at<int>(ind)].ResponseSparse(empty, area_of_interest, empty, patch_expert_responses[ind], interp_mat, empty, prealloc_mat);
+					// For space and memory saving use a mirrored patch expert
+					if (!cen_expert_intensity[scale][view_id][ind].biases.empty())
+					{
+						cv::Mat_<float> empty(0, 0, 0.0f);
+						cen_expert_intensity[scale][view_id][ind].ResponseSparse(area_of_interest, empty, patch_expert_responses[ind], empty, interp_mat, prealloc_mat, empty);
+
+						// A slower, but slightly more accurate version
+						//cen_expert_intensity[scale][view_id][ind].Response(area_of_interest, patch_expert_responses[ind]);
+					}
+					else
+					{
+						cv::Mat_<float> empty(0, 0, 0.0f);
+						cen_expert_intensity[scale][mirror_views.at<int>(view_id)][mirror_inds.at<int>(ind)].ResponseSparse(empty, area_of_interest, empty, patch_expert_responses[ind], interp_mat, empty, prealloc_mat);
+					}
 				}
+
+				preallocated_im2col[ind][im2col_size] = prealloc_mat;
+
 			}
+			else if (!ccnf_expert_intensity.empty())
+			{
+				// get the correct size response window			
+				patch_expert_responses[ind] = cv::Mat_<float>(window_size, window_size);
 
-			preallocated_im2col[ind][im2col_size] = prealloc_mat;
+				int im2col_size = area_of_interest_width * area_of_interest_height;
 
+				cv::Mat_<float> prealloc_mat = preallocated_im2col[ind][im2col_size];
+
+				ccnf_expert_intensity[scale][view_id][ind].ResponseOpenBlas(area_of_interest, patch_expert_responses[ind], prealloc_mat);
+
+				preallocated_im2col[ind][im2col_size] = prealloc_mat;
+
+				// Below is an alternative way to compute the same, but that uses FFT instead of OpenBLAS
+				// ccnf_expert_intensity[scale][view_id][ind].Response(area_of_interest, patch_expert_responses[ind]);
+
+			}
+			else
+			{
+				// get the correct size response window			
+				patch_expert_responses[ind] = cv::Mat_<float>(window_size, window_size);
+
+				svr_expert_intensity[scale][view_id][ind].Response(area_of_interest, patch_expert_responses[ind]);
+			}
 		}
-		else if (!ccnf_expert_intensity.empty())
-		{
-			// get the correct size response window			
-			patch_expert_responses[ind] = cv::Mat_<float>(window_size, window_size);
-
-			int im2col_size = area_of_interest_width * area_of_interest_height;
-
-			cv::Mat_<float> prealloc_mat = preallocated_im2col[ind][im2col_size];
-
-			ccnf_expert_intensity[scale][view_id][ind].ResponseOpenBlas(area_of_interest, patch_expert_responses[ind], prealloc_mat);
-
-			preallocated_im2col[ind][im2col_size] = prealloc_mat;
-
-			// Below is an alternative way to compute the same, but that uses FFT instead of OpenBLAS
-			// ccnf_expert_intensity[scale][view_id][ind].Response(area_of_interest, patch_expert_responses[ind]);
-
-		}
-		else
-		{
-			// get the correct size response window			
-			patch_expert_responses[ind] = cv::Mat_<float>(window_size, window_size);
-
-			svr_expert_intensity[scale][view_id][ind].Response(area_of_interest, patch_expert_responses[ind]);
-		}
-	}
 	});
 }
 
diff --git a/lib/local/Utilities/CMakeLists.txt b/lib/local/Utilities/CMakeLists.txt
index a53cb2e2..3940e000 100644
--- a/lib/local/Utilities/CMakeLists.txt
+++ b/lib/local/Utilities/CMakeLists.txt
@@ -28,12 +28,11 @@ target_include_directories(Utilities PUBLIC
     $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include/OpenFace>)
 
 target_include_directories(Utilities PRIVATE ${UTILITIES_SOURCE_DIR}/include)
-target_include_directories(Utilities PUBLIC ${TBB_ROOT_DIR}/include)
 target_include_directories(Utilities PUBLIC ${Boost_INCLUDE_DIRS})
 target_include_directories(Utilities PUBLIC ${Boost_INCLUDE_DIRS}/boost)
 target_include_directories(Utilities PUBLIC ${OpenCV_INCLUDE_DIRS})
 
-target_link_libraries(Utilities PUBLIC ${OpenCV_LIBS} ${Boost_LIBRARIES} ${TBB_LIBRARIES})
+target_link_libraries(Utilities PUBLIC ${OpenCV_LIBS} ${Boost_LIBRARIES})
 target_link_libraries(Utilities PUBLIC dlib::dlib)
 
 install (TARGETS Utilities EXPORT OpenFaceTargets LIBRARY DESTINATION lib ARCHIVE DESTINATION lib)
diff --git a/lib/local/Utilities/Utilities.vcxproj b/lib/local/Utilities/Utilities.vcxproj
index 80b47e8e..6da57589 100644
--- a/lib/local/Utilities/Utilities.vcxproj
+++ b/lib/local/Utilities/Utilities.vcxproj
@@ -61,33 +61,29 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\boost\boost_d.props" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb_d.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\boost\boost.props" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_x86.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\boost\boost_d.props" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb_d.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\..\3rdParty\boost\boost.props" />
     <Import Project="..\..\3rdParty\dlib\dlib.props" />
-    <Import Project="..\..\3rdParty\tbb\tbb.props" />
     <Import Project="..\..\3rdParty\OpenCV3.4\openCV3.4.props" />
-    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS.props" />
+    <Import Project="..\..\3rdParty\OpenBLAS\OpenBLAS_64.props" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup />
@@ -174,6 +170,7 @@
     <ClCompile Include="src\Visualizer.cpp" />
   </ItemGroup>
   <ItemGroup>
+    <ClInclude Include="include\ConcurrentQueue.h" />
     <ClInclude Include="include\ImageCapture.h" />
     <ClInclude Include="include\ImageManipulationHelpers.h" />
     <ClInclude Include="include\RecorderCSV.h" />
diff --git a/lib/local/Utilities/Utilities.vcxproj.filters b/lib/local/Utilities/Utilities.vcxproj.filters
index eadc61ce..01173dad 100644
--- a/lib/local/Utilities/Utilities.vcxproj.filters
+++ b/lib/local/Utilities/Utilities.vcxproj.filters
@@ -71,5 +71,8 @@
     <ClInclude Include="include\ImageManipulationHelpers.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="include\ConcurrentQueue.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/lib/local/Utilities/include/ConcurrentQueue.h b/lib/local/Utilities/include/ConcurrentQueue.h
new file mode 100644
index 00000000..a5a44836
--- /dev/null
+++ b/lib/local/Utilities/include/ConcurrentQueue.h
@@ -0,0 +1,101 @@
+//
+// Copyright (c) 2013 Juan Palacios juan.palacios.puyana@gmail.com
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met :
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef CONCURRENT_QUEUE_
+#define CONCURRENT_QUEUE_
+
+#include <queue>
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+
+template <typename T>
+class ConcurrentQueue
+{
+public:
+
+	T pop()
+	{
+		std::unique_lock<std::mutex> mlock(mutex_);
+		while (queue_.empty())
+		{
+			cond_empty_.wait(mlock);
+		}
+		auto val = queue_.front();
+		queue_.pop();
+		mlock.unlock();
+		cond_full_.notify_one();
+		return val;
+	}
+
+	void pop(T& item)
+	{
+		std::unique_lock<std::mutex> mlock(mutex_);
+		while (queue_.empty())
+		{
+			cond_empty_.wait(mlock);
+		}
+		item = queue_.front();
+		queue_.pop();
+		mlock.unlock();
+		cond_full_.notify_one();
+	}
+
+	void push(const T& item)
+	{
+		std::unique_lock<std::mutex> mlock(mutex_);
+
+		while (capacity_ > 0 && queue_.size() >= capacity_)
+		{
+			cond_full_.wait(mlock);
+		}
+		queue_.push(item);
+		mlock.unlock();
+		cond_empty_.notify_one();
+	}
+	
+	void set_capacity(int capacity)
+	{
+		std::unique_lock<std::mutex> mlock(mutex_);
+		capacity_ = capacity;
+	}
+
+	bool empty()
+	{
+		std::unique_lock<std::mutex> mlock(mutex_);
+		return queue_.empty();
+	}
+
+	ConcurrentQueue() = default;
+	ConcurrentQueue(const ConcurrentQueue&) = delete;            // disable copying
+	ConcurrentQueue& operator=(const ConcurrentQueue&) = delete; // disable assignment
+
+private:
+	std::queue<T> queue_;
+	std::mutex mutex_;
+	std::condition_variable cond_empty_;
+	std::condition_variable cond_full_;
+	// If capacity greater than one, the queue will block on push if there are too many elements in it
+	int capacity_ = 0;
+};
+
+#endif
\ No newline at end of file
diff --git a/lib/local/Utilities/include/RecorderOpenFace.h b/lib/local/Utilities/include/RecorderOpenFace.h
index 524889a7..07805dc6 100644
--- a/lib/local/Utilities/include/RecorderOpenFace.h
+++ b/lib/local/Utilities/include/RecorderOpenFace.h
@@ -45,14 +45,9 @@
 #include <opencv2/core/core.hpp>
 #include <opencv2/highgui/highgui.hpp>
 
-#include "tbb/concurrent_queue.h"
+#include <thread>
 
-#ifdef _WIN32 
-	// For speeding up writing
-	#include "tbb/task_group.h"
-#else
-	#include <thread>
-#endif
+#include <ConcurrentQueue.h>
 
 namespace Utilities
 {
@@ -126,6 +121,10 @@ namespace Utilities
 
 		void PrepareRecording(const std::string& in_filename);
 
+		// A thread that will write image and video output (the slowest parts of output_
+		void VideoWritingTask(bool is_sequence);
+		void AlignedImageWritingTask();
+
 		// Keeping track of what to output and how to output it
 		const RecorderOpenFaceParameters params;
 
@@ -177,22 +176,16 @@ namespace Utilities
 		const int TRACKED_QUEUE_CAPACITY = 100;
 		bool tracked_writing_thread_started;
 		cv::Mat vis_to_out;
-		tbb::concurrent_bounded_queue<std::pair<std::string, cv::Mat> > vis_to_out_queue;
+		ConcurrentQueue<std::pair<std::string, cv::Mat> > vis_to_out_queue;
 
 		// For aligned face writing
 		const int ALIGNED_QUEUE_CAPACITY = 100;
 		bool aligned_writing_thread_started;
 		cv::Mat aligned_face;
-		tbb::concurrent_bounded_queue<std::pair<std::string, cv::Mat> > aligned_face_queue;
+		ConcurrentQueue<std::pair<std::string, cv::Mat> > aligned_face_queue;
 
-#ifdef _WIN32 
-		// For keeping track of tasks
-		tbb::task_group writing_threads;
-#else
 		std::thread video_writing_thread;
 		std::thread aligned_writing_thread;
-#endif
-
 
 	};
 }
diff --git a/lib/local/Utilities/include/RecorderOpenFaceParameters.h b/lib/local/Utilities/include/RecorderOpenFaceParameters.h
index f27c2caf..e6c44196 100644
--- a/lib/local/Utilities/include/RecorderOpenFaceParameters.h
+++ b/lib/local/Utilities/include/RecorderOpenFaceParameters.h
@@ -70,6 +70,8 @@ namespace Utilities
 		bool outputTracked() const { return output_tracked; }
 		bool outputAlignedFaces() const { return output_aligned_faces; }
 		std::string outputCodec() const { return output_codec; }
+		std::string imageFormatAligned() const { return image_format_aligned; }
+		std::string imageFormatVisualization() const { return image_format_visualization; }
 		double outputFps() const { return fps_vid_out; }
 
 		bool outputBadAligned() const { return record_aligned_bad; }
@@ -107,6 +109,10 @@ namespace Utilities
 		std::string output_codec;
 		double fps_vid_out;
 
+		// Image recording parameters
+		std::string image_format_aligned;
+		std::string image_format_visualization;
+
 		// Camera parameters for recording in the meta file;
 		float fx, fy, cx, cy;
 
diff --git a/lib/local/Utilities/include/SequenceCapture.h b/lib/local/Utilities/include/SequenceCapture.h
index ee59a02f..56adccd0 100644
--- a/lib/local/Utilities/include/SequenceCapture.h
+++ b/lib/local/Utilities/include/SequenceCapture.h
@@ -39,14 +39,14 @@
 #include <sstream>
 #include <vector>
 
-// For speeding up capture
-#include "tbb/concurrent_queue.h"
-#include "tbb/task_group.h"
+#include <thread>
 
 // OpenCV includes
 #include <opencv2/core/core.hpp>
 #include <opencv2/highgui/highgui.hpp>
 
+#include <ConcurrentQueue.h>
+
 namespace Utilities
 {
 
@@ -110,6 +110,8 @@ namespace Utilities
 		// Allows to differentiate if failed because no input specified or if failed to open a specified input
 		bool no_input_specified;
 
+				// Storing the captured data queue
+		static const int CAPTURE_CAPACITY = 200; // 200 MB
 
 	private:
 
@@ -119,7 +121,7 @@ namespace Utilities
 		bool capturing;
 
 		// For keeping track of tasks
-		tbb::task_group capture_threads;
+		std::thread capture_thread;
 
 		// A thread that will write video output, so that the rest of the application does not block on it
 		void CaptureThread();
@@ -137,11 +139,8 @@ namespace Utilities
 		cv::Mat latest_frame;
 		cv::Mat_<uchar> latest_gray_frame;
 		
-
-		// Storing the captured data queue
-		const int CAPTURE_CAPACITY = 200; // 200 MB
 		// Storing capture timestamp, RGB image, gray image
-		tbb::concurrent_bounded_queue<std::tuple<double, cv::Mat, cv::Mat_<uchar> > > capture_queue;
+		ConcurrentQueue<std::tuple<double, cv::Mat, cv::Mat_<uchar> > > capture_queue;
 
 		// Keeping track of frame number and the files in the image sequence
 		size_t  frame_num;
diff --git a/lib/local/Utilities/src/RecorderOpenFace.cpp b/lib/local/Utilities/src/RecorderOpenFace.cpp
index 132bcf33..633fbb1e 100644
--- a/lib/local/Utilities/src/RecorderOpenFace.cpp
+++ b/lib/local/Utilities/src/RecorderOpenFace.cpp
@@ -79,14 +79,14 @@ void CreateDirectory(std::string output_path)
 	}
 }
 
-void VideoWritingTask(tbb::concurrent_bounded_queue<std::pair<std::string, cv::Mat> > *writing_queue, bool is_sequence, cv::VideoWriter *video_writer)
+void RecorderOpenFace::VideoWritingTask(bool is_sequence)
 {
 
 	std::pair<std::string, cv::Mat> tracked_data;
 
 	while (true)
 	{
-		writing_queue->pop(tracked_data);
+		vis_to_out_queue.pop(tracked_data);
 
 		// Indicate that the thread should complete
 		if (tracked_data.second.empty())
@@ -96,9 +96,9 @@ void VideoWritingTask(tbb::concurrent_bounded_queue<std::pair<std::string, cv::M
 
 		if (is_sequence)
 		{
-			if (video_writer->isOpened())
+			if (video_writer.isOpened())
 			{
-				video_writer->write(tracked_data.second);
+				video_writer.write(tracked_data.second);
 			}
 		}
 		else
@@ -112,14 +112,14 @@ void VideoWritingTask(tbb::concurrent_bounded_queue<std::pair<std::string, cv::M
 	}
 }
 
-void AlignedImageWritingTask(tbb::concurrent_bounded_queue<std::pair<std::string, cv::Mat> > *writing_queue)
+void RecorderOpenFace::AlignedImageWritingTask()
 {
 
 	std::pair<std::string, cv::Mat> tracked_data;
 
 	while (true)
 	{
-		writing_queue->pop(tracked_data);
+		aligned_face_queue.pop(tracked_data);
 
 		// Empty frame indicates termination
 		if (tracked_data.second.empty())
@@ -197,7 +197,7 @@ void RecorderOpenFace::PrepareRecording(const std::string& in_filename)
 		}
 		else
 		{
-			this->media_filename = out_name + ".jpg";
+			this->media_filename = out_name + "." + params.imageFormatVisualization();
 			metadata_file << "Output image:" << this->media_filename << endl;
 			this->media_filename = (path(record_root) / this->media_filename).string();
 		}
@@ -373,29 +373,23 @@ void RecorderOpenFace::WriteObservation()
 			aligned_face_queue.set_capacity(capacity);
 
 			// Start the alignment output thread			
-#ifdef _WIN32 
-			// For keeping track of tasks
-			writing_threads.run([&] {AlignedImageWritingTask(&aligned_face_queue); });
-#else
-			// Start the alignment output thread
-			aligned_writing_thread = std::thread(&AlignedImageWritingTask, &aligned_face_queue);
-#endif
+			aligned_writing_thread = std::thread(&RecorderOpenFace::AlignedImageWritingTask, this);
 		}
 
 		char name[100];
 
-		// Filename is based on frame number
+		// Filename is based on frame number (TODO stringstream this)
 		if(params.isSequence())
-			std::sprintf(name, "frame_det_%02d_%06d.bmp", face_id, frame_number);
+			std::sprintf(name, "frame_det_%02d_%06d.", face_id, frame_number);
 		else
-			std::sprintf(name, "face_det_%06d.bmp", face_id);
+			std::sprintf(name, "face_det_%06d.", face_id);
 
 		// Construct the output filename
 		boost::filesystem::path slash("/");
 
 		std::string preferredSlash = slash.make_preferred().string();
 
-		string out_file = aligned_output_directory + preferredSlash + string(name);
+		string out_file = aligned_output_directory + preferredSlash + string(name) + params.imageFormatAligned();
 
 		if(params.outputBadAligned() || landmark_detection_success)
 		{
@@ -442,13 +436,7 @@ void RecorderOpenFace::WriteObservationTracked()
 			}
 
 			// Start the video and tracked image writing thread
-#ifdef _WIN32 
-			// For keeping track of tasks
-			writing_threads.run([&] {VideoWritingTask(&vis_to_out_queue, params.isSequence(), &video_writer); });
-#else
-			video_writing_thread = std::thread(&VideoWritingTask, &vis_to_out_queue, params.isSequence(), &video_writer);
-#endif
-
+			video_writing_thread = std::thread(&RecorderOpenFace::VideoWritingTask, this, params.isSequence());
 
 		}
 
@@ -541,14 +529,10 @@ void RecorderOpenFace::Close()
 	aligned_face_queue.push(std::pair<string, cv::Mat>("", cv::Mat()));
 
 	// Make sure the recording threads complete
-#ifdef _WIN32 
-	writing_threads.wait();
-#else
 	if (video_writing_thread.joinable())
 		video_writing_thread.join();
 	if (aligned_writing_thread.joinable())
 		aligned_writing_thread.join();
-#endif
 
 	tracked_writing_thread_started = false;
 	aligned_writing_thread_started = false;
diff --git a/lib/local/Utilities/src/RecorderOpenFaceParameters.cpp b/lib/local/Utilities/src/RecorderOpenFaceParameters.cpp
index 11786764..a00289e1 100644
--- a/lib/local/Utilities/src/RecorderOpenFaceParameters.cpp
+++ b/lib/local/Utilities/src/RecorderOpenFaceParameters.cpp
@@ -58,6 +58,9 @@ RecorderOpenFaceParameters::RecorderOpenFaceParameters(std::vector<std::string>
 	// Default output code
 	this->output_codec = "DIVX";
 
+	this->image_format_aligned = "bmp";
+	this->image_format_visualization = "jpg";
+
 	bool output_set = false;
 
 	this->output_2D_landmarks = false;
@@ -74,6 +77,16 @@ RecorderOpenFaceParameters::RecorderOpenFaceParameters(std::vector<std::string>
 
 	for (size_t i = 0; i < arguments.size(); ++i)
 	{
+		if (arguments[i].compare("-format_aligned") == 0)
+		{
+			this->image_format_aligned = arguments[i+1];
+			i++;
+		}
+		if (arguments[i].compare("-format_vis_image") == 0)
+		{
+			this->image_format_visualization = arguments[i + 1];
+			i++;
+		}
 		if (arguments[i].compare("-nobadaligned") == 0)
 		{
 			this->record_aligned_bad = false;
diff --git a/lib/local/Utilities/src/SequenceCapture.cpp b/lib/local/Utilities/src/SequenceCapture.cpp
index 46b2ffac..11a3295a 100644
--- a/lib/local/Utilities/src/SequenceCapture.cpp
+++ b/lib/local/Utilities/src/SequenceCapture.cpp
@@ -279,15 +279,15 @@ void SequenceCapture::Close()
 	// Close the capturing threads
 	capturing = false;
 
-	// In case the queue is full and the thread is blocking, free one element so it can finish
-	std::tuple<double, cv::Mat, cv::Mat_<uchar> > data;
-	capture_queue.try_pop(data);
+	// If the queue is full it will be blocked, so need to empty it
+	while (!capture_queue.empty())
+	{
+		capture_queue.pop();
+	}
 
-	capture_threads.wait();
+	if (capture_thread.joinable())
+		capture_thread.join();
 	
-	// Empty the capture queue (in case a capture was cancelled and we still have frames in the queue)
-	capture_queue.clear();
-
 	// Release the capture objects
 	if (capture.isOpened())
 		capture.release();
@@ -340,7 +340,8 @@ bool SequenceCapture::OpenVideoFile(std::string video_file, float fx, float fy,
 
 	this->name = video_file;
 	capturing = true;
-	capture_threads.run([&] {CaptureThread(); });
+
+	capture_thread = std::thread(&SequenceCapture::CaptureThread, this);
 
 	return true;
 
@@ -405,8 +406,9 @@ bool SequenceCapture::OpenImageSequence(std::string directory, float fx, float f
 	is_image_seq = true;	
 	vid_length = image_files.size();
 	capturing = true;
-	capture_threads.run([&] {CaptureThread(); });
 
+	capture_thread = std::thread(&SequenceCapture::CaptureThread, this);
+	
 	return true;
 
 }
@@ -444,6 +446,7 @@ void SequenceCapture::CaptureThread()
 {
 	int capacity = (CAPTURE_CAPACITY * 1024 * 1024) / (4 * frame_width * frame_height);
 	capture_queue.set_capacity(capacity);
+
 	int frame_num_int = 0;
 
 	while(capturing)
@@ -486,6 +489,7 @@ void SequenceCapture::CaptureThread()
 		ConvertToGrayscale_8bit(tmp_frame, tmp_gray_frame);
 
 		capture_queue.push(std::make_tuple(timestamp_curr, tmp_frame, tmp_gray_frame));
+		
 	}
 }
 
@@ -495,10 +499,12 @@ cv::Mat SequenceCapture::GetNextFrame()
 	{
 		std::tuple<double, cv::Mat, cv::Mat_<uchar> > data;
 
-		capture_queue.pop(data);
+		data = capture_queue.pop();
+
 		time_stamp = std::get<0>(data);
 		latest_frame = std::get<1>(data);
 		latest_gray_frame = std::get<2>(data);
+
 	}
 	else
 	{