I am working on a barcode recognition project that includes filtration,Image geometric transformation and CNN implementation.
I wrote the code working on cpu and gpu correctly and created same filtration in cuda(using cuda namespace) and core(cpu).According the performance criteria of my project, The code is working x2 or x3 times faster in Cpu than Gpu.I searched all relativity between Cpu and Cpu memory and created two different program.
According to my research, I checked all implementation for splitting image memory to gpu and the reason where the GP runs slowly.They directed me to run it in high resolution to see gpu works faster than cpu.But unfortunately Gpu was still working x2 slower than cpu.I would like to share part of the code here with same filtration on gpu and cpu.Therfore you can direct me right.I just would like to raise performance of my program using Gpu implementation.
IDE : Visual Studio 2015(x64), release mode in 64 bit
Nvdia Graphic Card : Nvdia Geforce gtx 550m , fermi
Cuda version : Cuda8 , compatible with given graphic card
Thank you in advance,
Sample Cpu Sobel filtration code;
Mat Morphology::Sobel_operations(Mat& gray_image){//directly change vector with using only struct(future operations)
vector<Sobel_variables> sobel_variables(1);
//Sobel_variables sobel_variables;
sobel_variables[0].alpha = 1;
sobel_variables[0].beta = 0.9;
Sobel(gray_image, sobel_variables[0].Gradx, sobel_variables[0].ddepth, 1, 0, 3);
Sobel(gray_image, sobel_variables[0].Grady, sobel_variables[0].ddepth, 0, 1, 3);
subtract(sobel_variables[0].Gradx, sobel_variables[0].Grady, sobel_variables[0].Gradient);
//normalize(sobel_variables[0].Gradient, sobel_variables[0].Gradient, 1, 0, NORM_MINMAX);
//convertScaleAbs(sobel_variables[0].Gradient, sobel_variables[0].Gradient);
add(sobel_variables[0].Grady, sobel_variables[0].Gradx, sobel_variables[0].Gradient1);
//convertScaleAbs(sobel_variables[0].Gradient1, sobel_variables[0].Gradient1);//It can be used but not necessary
addWeighted(sobel_variables[0].Gradient, sobel_variables[0].alpha, sobel_variables[0].Gradient1, sobel_variables[0].beta, 0, sobel_variables[0].Out_Image);
imshow("Sobel_operations", sobel_variables[0].Out_Image);
//------free memory of all unnecessarry images--------------------------
sobel_variables[0].Gradx.release();
sobel_variables[0].Grady.release();
gray_image.release();
sobel_variables[0].Gradient.release();
sobel_variables[0].Gradient1.release();
return sobel_variables[0].Out_Image;
}
The output result
Aproximately 110 ms with cpu in high resolution
Same sobel filtration with GPU;
Ptr<cuda::Filter> filter;
cuda::GpuMat Morphology::Sobel_operations(cuda::GpuMat& gray_image){//directly change vector with using only struct(future operations)
vector<Sobel_variables> sobel_variables(1);
//Sobel_variables sobel_variables;
sobel_variables[0].alpha = 1;
sobel_variables[0].beta = 0.9;
//Apply sobel filter to x for cuda
filter=cuda::createSobelFilter(gray_image.type(), sobel_variables[0].ddepth, 1, 0, 3);
filter->apply(gray_image, sobel_variables[0].Gradx);
//Apply sobel filter to y cuda
filter = cuda::createSobelFilter(gray_image.type(),sobel_variables[0].ddepth, 0, 1, 3);
filter->apply(gray_image, sobel_variables[0].Grady);
cuda::subtract(sobel_variables[0].Gradx, sobel_variables[0].Grady, sobel_variables[0].Gradient);
//normalize(sobel_variables[0].Gradient, sobel_variables[0].Gradient, 1, 0, NORM_MINMAX);
//convertScaleAbs(sobel_variables[0].Gradient, sobel_variables[0].Gradient);
cuda::add(sobel_variables[0].Grady, sobel_variables[0].Gradx, sobel_variables[0].Gradient1);
//convertScaleAbs(sobel_variables[0].Gradient1, sobel_variables[0].Gradient1);//It can be used but not necessary
cuda::addWeighted(sobel_variables[0].Gradient, sobel_variables[0].alpha, sobel_variables[0].Gradient1, sobel_variables[0].beta, 0, sobel_variables[0].Out_Image);
//Gpu to Cpu convertion for imshow
sobel_variables[0].Out_Image.download(cpu_sobeloutimage);
imshow("Sobel_operations", cpu_sobeloutimage);
//------free memory of all unnecessarry images--------------------------
sobel_variables[0].Gradx.release();
sobel_variables[0].Grady.release();
gray_image.release();
sobel_variables[0].Gradient.release();
sobel_variables[0].Gradient1.release();
cpu_sobeloutimage.release();
return sobel_variables[0].Out_Image;
}
The output result for GPU
Aproximately 250 ms in GPU with high resolution