I have this code and it crashes every 1-2th launch.
I have tried use malloc/cudaMallocHost/cudeMalloc but it was useless. It think it happens due to manual cufftComplex initialization but prove it a can’t because without data I can’t get fft. Could you help me eliminate this crashes?
#include <stdio.h>
#include <string.h>
#include <iostream>
#include <fstream>
#include <conio.h>
#include <cuda.h>
#include <cufft.h>
using namespace std;
int main(int argc, char **argv)
{
cufftHandle plan;
cufftComplex *data;
cufftComplex *digits;
cufftComplex *h_data;
cudaMallocHost((void**)&digits, sizeof(cufftComplex)*8);
digits[0].x = 12.5f; digits[0].y = 0.0f;
digits[1].x = 66.23f; digits[1].y = 0.0f;
digits[2].x = 35.1f; digits[2].y = 0.0f;
digits[3].x = 16.7f; digits[3].y = 0.0f;
digits[4].x = 14.83f; digits[4].y = 0.0f;
digits[5].x = 55.1f; digits[5].y = 0.0f;
digits[6].x = 11.7f; digits[6].y = 0.0f;
digits[7].x = 18.83f; digits[7].y = 0.0f;
cudaMalloc((void**)&data, sizeof(cufftComplex)*8);
cudaMemcpy(data, digits, sizeof(cufftComplex)*8, cudaMemcpyHostToDevice);
if (cufftPlan1d(&plan, 8, CUFFT_C2C, 1) != CUFFT_SUCCESS) {
fprintf(stderr, "Cuda: cufftPlan1d CUFFT_C2C failed\n");
return 1;
}
if (cufftExecC2C(plan, data, data, CUFFT_FORWARD) != CUFFT_SUCCESS) {
fprintf(stderr, "Cuda: cufftExecC2C CUFFT_FORWARD failed\n");
return 1;
}
if (cudaMalloc((void**)&h_data, sizeof(cufftComplex)*8) != cudaSuccess) {
fprintf(stderr, "Cuda: cudaMalloc((void**)&h_data failed\n");
return 1;
}
cudaMemcpy(h_data, data, sizeof(cufftComplex)*8, cudaMemcpyDeviceToHost);
printf("\nOriginal:\n");
for(int i = 0; i < 8; ++i){
printf("\nRe:%2.5f Im:%2.5f", digits[i].x, digits[i].y);
}
printf("\n\n1D-FFT:\n");
for(int i = 0; i < 8; ++i){
printf("\nRe:%2.5f Im:%2.5f", h_data[i].x, h_data[i].y);
}
cudaFree(digits);
cudaFree(data);
cudaFree(h_data);
cufftDestroy(plan);
}
You basic problem is improper mixing of host and device memory pointers. You have assigned the address of a device memory allocation (using cudaMalloc) to
h_data, but are trying to use it as a pointer to an address in host memory. That won’t work and is producing the host segmentation fault you are seeing. Your example should look something like:Note that you should use plain
mallocor the C++newoperator to allocate host side memory rather thancudaMallocHost, unless you understand very well what the latter API does and why you are using it.