Following sample problem:
#include <iostream>
using namespace std;
__device__ __constant__ float* data;
template<class T> void allocOnly(T* deviceDest, size_t numElem)
{
cudaError_t errCode = cudaMalloc((void**)&deviceDest, numElem*sizeof(T));
if(errCode != cudaSuccess)
cout << "Got error with code " << errCode << endl;
}
int main()
{
float* test(0);
allocOnly<float>(test,10);
cout << "test = " << test << endl;
float* test2(0);
cudaError_t errCode = cudaMalloc((void**)&test2, 10*sizeof(float));
if(errCode != cudaSuccess)
cout << "Got error with code " << errCode << endl;
cout << "test2 = " << test2 << endl;
return 0;
}
compiled with nvcc test.cu -o testBin
returns
test = 0
test2 = 0x310100
Why is test not modified when called through template function, cudaMalloc is supposed to modify it to be a pointer to the newly allocated device memory!
The pointer is not being modified because
cudaMallocin the functionallocOnlyis allocating memory to the argumentdeviceTestwhich is local to the functionallocOnly. You can modify the functionallocOnlyto allocate memory as follows:Inside the main function: