GPU Code Not Working Question
Hi,
(This is a continuation from the post "Pseudocode for custom GPU computation")
Following is my GPU opencv code. I tried 2 things - 1) to just set all pixels of the output to zero, 2) return a monochrome image based on a threshold on the pixel values of the input image. But for both cases when I run it, I dont get the result I want, I get some grayscale image, which is just peppered noise. My code below:
//The size of the video frame is 480 * 640
//funcam.cpp
using namespace std;
using namespace cv;
using namespace cv::gpu;
void callKernel(const GpuMat& src, const GpuMat& dst)
{
uchar* p = src.data;
uchar* p2 = dst.data;
func(p, p2, src.step, src.cols, src.rows, dst.step);
}
int main(int, char**)
{
VideoCapture cap(0);
if(!cap.isOpened()) return -1;
int frameH = (int) cap.get(CV_CAP_PROP_FRAME_HEIGHT);
int frameW = (int) cap.get(CV_CAP_PROP_FRAME_WIDTH);
cout << frameH << " " << frameW << endl;
CvSize size = cvSize(frameH, frameW);
Mat frame;
Mat input;
Mat output;
GpuMat d_frame;
GpuMat d_output;
for(;;)
{
cap >> frame;
if (frame.empty())
break;
//convert to grayscale
cvtColor(frame, input, CV_BGR2GRAY);
// memory Copy from Host to Device
d_frame.upload(input);
// Call CUDA kernel
d_output.create(size, CV_8UC1);
callKernel(d_frame, d_output);
// memory Copy from Device to Host
d_output.download(output);
imshow("output", output);
if(waitKey(30) >= 0)
break;
}
return 0;
}
//funcam_cuda.cu
__global__ void funcKernel(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep)
{
int rowInd = blockIdx.y * blockDim.y + threadIdx.y;
int colInd = blockIdx.x * blockDim.x + threadIdx.x;
uchar* rowsrcPtr = srcptr + rowInd*step;
uchar* rowdstPtr = dstptr + rowInd*dststep;
uchar pixVal = rowsrcPtr[colInd];
// rowdstPtr[colInd] = (pixVal > 60 ? 255 : 0);
rowdstPtr[colInd] = 0;
}
extern "C"
void func(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep)
{
dim3 grDim (16, 12);
dim3 blDim (40, 40);
funcKernel<<< grDim, blDim >>>(srcptr, dstptr, step, cols, rows, dststep);
cudaThreadSynchronize(); //Is this reqd?
}