Please look at the below code which does a simple char assignment
__global__ void seehowpointerwork(char* gpuHello, char* finalPoint){
char* temp;
bool found = false;
for(int i = 0 ; i < 11; i++){
if(gpuHello[i] == ' '){
temp = &gpuHello[i+1];
found = true;
break;
}
}
bool sth = found;
finalPoint = temp;
}
int main()
{
// Testing one concept;
string hello = "Hello World";
char* gpuHello;
cudaMalloc((void**)&gpuHello, 11 * sizeof(char));
cudaMemcpy(gpuHello, hello.c_str(), 11 * sizeof(char), cudaMemcpyHostToDevice);
char* didItFind;
char* whatIsIt = (char*)malloc(5 * sizeof(char));
seehowpointerwork<<<1,1>>>(gpuHello, didItFind);
cudaMemcpy(whatIsIt,didItFind, 5 * sizeof(char), cudaMemcpyDeviceToHost);
cout<<"The pointer points to : " << whatIsIt;
return 0;
}
I really dont understand that when i print whatIsIt, why does it not print “World” as the answer but just prints some random string.
EDIT
Update version after accouting for null characters as pointed out
__global__ void seehowpointerwork(char* gpuHello, char* finalPoint){
char* temp;
bool found = false;
for(int i = 0 ; i < 11; i++){
if(gpuHello[i] == ' '){
temp = gpuHello;
found = true;
break;
}
}
bool sth = found;
finalPoint = temp;
}
int main()
{
// Testing one concept;
string hello = "Hello World";
char* gpuHello;
cudaMalloc((void**)&gpuHello, 12 * sizeof(char));
cudaMemcpy(gpuHello, hello.c_str(), 12 * sizeof(char), cudaMemcpyHostToDevice);
char* didItFind;
char* whatIsIt = (char*)malloc(6 * sizeof(char));
seehowpointerwork<<<1,1>>>(gpuHello, didItFind);
cudaMemcpy(whatIsIt,didItFind, 6 * sizeof(char), cudaMemcpyDeviceToHost);
cout<<"The pointer points to : " << whatIsIt;
return 0;
}
You must pass
finalPointby reference, not by value if you want to have the kernel operate the way you have defined it. Perhaps something like this:When compiled and run, this version produces:
As it stands, the device to host copy will be failing, because
didItFindis not a valid device pointer – you passed it by value to the kernel, so its value on the host cannot be modified by the kernel. The code above contains sufficient error checking to find this sort of problem – you should always check the return status of every API call.