1 | initial version |
Hello!
Your main error is:
CvSize size = cvSize(frameH, frameW);
cvSize()
signature is cvSize(int width, int height)
, the first parameter must be width:
CvSize size = cvSize(frameW, frameH);
Also dim3 blDim (40, 40);
is not a good size for block. The good sizes are 16x16, 32x8.
cudaThreadSynchronize
is deprecated, use cudaDeviceSynchronize
.
// .cu file
#include <cuda_runtime.h>
__global__ void funcKernel(const unsigned char* srcptr, unsigned char* dstptr,
size_t srcstep, size_t dststep, int cols, int rows)
{
int rowInd = blockIdx.y * blockDim.y + threadIdx.y;
int colInd = blockIdx.x * blockDim.x + threadIdx.x;
if (rowInd >= rows || colInd >= cols)
return;
const unsigned char* rowsrcPtr = srcptr + rowInd * srcstep;
unsigned char* rowdstPtr = dstptr + rowInd * dststep;
unsigned char pixVal = rowsrcPtr[colInd];
rowdstPtr[colInd] = (pixVal > 60 ? 255 : 0);
}
int divUp(int a, int b)
{
return (a + b - 1) /b;
}
void func(const unsigned char* srcptr, unsigned char* dstptr,
size_t srcstep, size_t dststep, int cols, int rows)
{
dim3 blDim(32, 8);
dim3 grDim(divUp(cols, blDim.x), divUp(rows, blDim.y));
funcKernel<<<grDim, blDim>>>(srcptr, dstptr, srcstep, dststep, cols, rows);
cudaDeviceSynchronize();
}