CUDA GoodFeaturesToTrackDetector is not ThreadSafe ?
Hello I use OpenCV 4.2 with CUDA 10.2, Visual studio 2019 on windows 10. While using OpenCV CUDA GoodFeaturesToTrackDetector in parallel loop I noticed that I get systematic Exception "merge_sort: failed to synchronize" , though I run it on different cuda::GpuMats and in separate cuda::Streams with separate Algorithm instances.
reduced my code to minimum reproducible example :
#include <iostream>
#include <list>
#include <thread>
#include <vector>
#include <opencv2/core.hpp>
#include <opencv2/cvconfig.h>
#include <opencv2/opencv.hpp>
#if defined(HAVE_CUDA)
#include <opencv2/core/cuda.hpp>
#include <opencv2/core/cuda/common.hpp>
#include <opencv2/cudaimgproc.hpp>
#include <opencv2/cudaoptflow.hpp>
#include <opencv2/cudaarithm.hpp>
#endif
#include <thread>
#include <vector>
using namespace std; using namespace cv;
int main() {
int NBThread = 5;
Mat frames = imread("C:\\Users\\alex\\Desktop\\test.png");
cvtColor(frames, frames, COLOR_BGR2GRAY);
vector<Mat> vectImg;
for (int u = 0; u < NBThread; u++)
vectImg.push_back(frames.clone());
for (int i = 0; i < 100000; i++)
{
vector<thread> workers;
mutex m;
for (int id = 0; id < NBThread; ++id)
workers.emplace_back([&, id]()
{
Size frameSize = vectImg[id].size();
// Creation du detecteur
int srcType = CV_8UC1;
int maxCorners = /*1000*/ 4000;
double qualityLevel = /*0.01*/ 0.01;
double minDistance = /*0.0*/ 0.0;
int blockSize = /*3*/ 3;
bool useHarrisDetector = /*false*/ false;
double harrisK = /*0.04*/ 0.04;
auto m_CudaDetector = cv::cuda::createGoodFeaturesToTrackDetector(srcType, maxCorners, qualityLevel, minDistance, blockSize, useHarrisDetector, harrisK);
cuda::Stream stream;
cuda::GpuMat gpuFrame = cuda::GpuMat(frameSize, CV_8UC1);
gpuFrame.upload(vectImg[id], stream);
cv::cuda::GpuMat d_prevRef;
m_CudaDetector->detect(gpuFrame, d_prevRef, cuda::GpuMat(), stream);
stream.waitForCompletion();
std::cout << " Nombre de points detect = " << d_prevRef.size() << " thread : " << id << std::endl;
});
for (auto& worker : workers) worker.join();
}
return 0; }
after many loop I get Exception with CallStack :
- opencv_cudaimgproc420d.dll!thrust::cuda_cub::throw_on_error(cudaError status, const char * msg) Line 227 C++
- opencv_cudaimgproc420d.dll!thrust::cuda_cub::__merge_sort::merge_sort .... Line 1318 C++
- opencv_cudaimgproc420d.dll!thrust::cuda_cub::__smart_sort::smart_sort ... Line 1552 C++
- opencv_cudaimgproc420d.dll!thrust::cuda_cub::sort ... Line 1631 C++
- opencv_cudaimgproc420d.dll!thrust::sort ... Line 57 C++
- opencv_cudaimgproc420d.dll!cv::cuda::device::gfft::sortCorners_gpu(cv::cuda::PtrStepSz<float> eig, float2 * corners,int count, CUstream_st * stream) Line 139 C++
- opencv_cudaimgproc420d.dll!`anonymous namespace'::GoodFeaturesToTrackDetector::detect(const cv::debug_build_guard::_InputArray & _image, const cv::debug_build_guard::_OutputArray & _corners, const cv::debug_build_guard::_InputArray & _mask, cv::cuda::Stream & stream) Line 125 C++
I must conclude thatOpencv Cuda GoodFeaturesToTrackDetector is not threadsafe despite usage of the Stream s ?