void inline initFFTs()
{
if((nPerCall*nGPU) > totalFFT) {
cerr << "nPerCall must be a multiple of totalFFT" << endl;
exit(1);
}
// Create a batched 3D plan
for(int sid=0; sid < nGPU; sid++) {
cudaSetDevice(sid);
if(sizeof(Real) == sizeof(float) ) {
cufftPlanMany(&fftPlanMany[sid], 3, dim, NULL, 1, 0, NULL, 1, 0, CUFFT_C2C,nPerCall);
} else {
cufftPlanMany(&fftPlanMany[sid], 3, dim, NULL, 1, 0, NULL, 1, 0, CUFFT_Z2Z,nPerCall);
}
if(cufftSetStream(fftPlanMany[sid],streams[sid])) {
cerr << "cufftSetStream failed!" << endl;
}
}
cudaSetDevice(0);
}
inline void _FFTerror(int ret) {
switch(ret) {
case CUFFT_SETUP_FAILED: cerr << "SETUP_FAILED" << endl; break;
case CUFFT_INVALID_PLAN: cerr << "INVALID_PLAN" << endl; break;
case CUFFT_INVALID_VALUE: cerr << "INVALID_VALUE" << endl; break;
case CUFFT_EXEC_FAILED: cerr << "EXEC_FAILED" << endl; break;
default: cerr << "UNKNOWN ret code " << ret << endl;
}
}
//template specialization to handle different data types (float,double)
inline void cinverseFFT_(cufftHandle myFFTplan, float* A, float* B ) {
int ret=cufftExecC2C(myFFTplan, (cufftComplex*)A, (cufftComplex*) B, CUFFT_INVERSE);
if(ret != CUFFT_SUCCESS) {
cerr << "C2C FFT failed! ret code " << ret << endl;
_FFTerror(ret); exit(1);
}
}
inline void cinverseFFT_(cufftHandle myFFTplan, double *A, double *B) {
int ret = cufftExecZ2Z(myFFTplan, (cufftDoubleComplex*)A, (cufftDoubleComplex*) B, CUFFT_INVERSE);
if(ret != CUFFT_SUCCESS) {
cerr << "Z2Z FFT failed! ret code " << ret << endl;
_FFTerror(ret); exit(1);
}
}
inline void cforwardFFT_(cufftHandle myFFTplan, float* A, float* B ) {
int ret = cufftExecC2C(myFFTplan, (cufftComplex*)A, (cufftComplex*) B, CUFFT_FORWARD);
if(ret != CUFFT_SUCCESS) {
cerr << "C C2C FFT failed!" << endl; _FFTerror(ret); exit(1);
}
}
inline void cforwardFFT_(cufftHandle myFFTplan, double *A, double *B) {
int ret = cufftExecZ2Z(myFFTplan, (cufftDoubleComplex*)A, (cufftDoubleComplex*) B, CUFFT_FORWARD);
if(ret != CUFFT_SUCCESS) {
cerr << "Z2Z FFT failed!" << endl; _FFTerror(ret); exit(1);
}
}