1 | initial version |
Use this pattern:
using namespace cv;
using namespace cv::gpu;
void callKernel(const GpuMat& src, GpuMat& dst)
{
// you can use
// src.cols - width
// src.rows - height
// src.step - step in bytes between image rows
// src.ptr<T>() - device pointer to data, T - element type
func(src.ptr<uchar3>(), src.step, src.cols, src.rows);
}
int main()
{
...
Mat frame;
Mat output;
GpuMat d_frame;
GpuMat d_output;
for(;;)
{
cap >> frame;
if (frame.empty())
break;
// memory Copy from Host to Device
d_frame.upload(frame);
// Call CUDA kernel
d_output.create(size, type);
callKernel(d_frame, d_output);
// memory Copy from Device to Host
d_output.download(output);
imshow("output", output);
if(waitKey(30) >= 0)
break;
}
...
}