1 /*-------------------------------------------------------------------
2 Copyright 2011 Ravishankar Sundararaman
3
4 This file is part of JDFTx.
5
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with JDFTx. If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19
20 #include <core/LoopMacros.h>
21 #include <core/GpuKernelUtils.h>
22 #include <fluid/TranslationOperator_internal.h>
23
24 __global__
constantSplineTaxpy_kernel(int zBlock,const vector3<int> S,double alpha,const double * x,double * y,const vector3<int> Tint)25 void constantSplineTaxpy_kernel(int zBlock, const vector3<int> S,
26 double alpha, const double* x, double* y, const vector3<int> Tint)
27 { COMPUTE_rIndices
28 constantSplineTaxpy_calc(i, iv, S, alpha, x, y, Tint);
29 }
constantSplineTaxpy_gpu(const vector3<int> S,double alpha,const double * x,double * y,const vector3<int> Tint)30 void constantSplineTaxpy_gpu(const vector3<int> S,
31 double alpha, const double* x, double* y, const vector3<int> Tint)
32 { GpuLaunchConfig3D glc(constantSplineTaxpy_kernel, S);
33 for(int zBlock=0; zBlock<glc.zBlockMax; zBlock++)
34 constantSplineTaxpy_kernel<<<glc.nBlocks,glc.nPerBlock>>>(zBlock, S, alpha, x, y, Tint);
35 gpuErrorCheck();
36 }
37
38 __global__
linearSplineTaxpy_kernel(int zBlock,const vector3<int> S,double alpha,const double * x,double * y,const vector3<int> Tint,const vector3<> Tfrac)39 void linearSplineTaxpy_kernel(int zBlock, const vector3<int> S,
40 double alpha, const double* x, double* y, const vector3<int> Tint, const vector3<> Tfrac)
41 { COMPUTE_rIndices
42 linearSplineTaxpy_calc(i, iv, S, alpha, x, y, Tint, Tfrac);
43 }
linearSplineTaxpy_gpu(const vector3<int> S,double alpha,const double * x,double * y,const vector3<int> Tint,const vector3<> Tfrac)44 void linearSplineTaxpy_gpu(const vector3<int> S,
45 double alpha, const double* x, double* y, const vector3<int> Tint, const vector3<> Tfrac)
46 { GpuLaunchConfig3D glc(linearSplineTaxpy_kernel, S);
47 for(int zBlock=0; zBlock<glc.zBlockMax; zBlock++)
48 linearSplineTaxpy_kernel<<<glc.nBlocks,glc.nPerBlock>>>(zBlock, S, alpha, x, y, Tint, Tfrac);
49 gpuErrorCheck();
50 }
51
52
53
54 __global__
fourierTranslate_kernel(int zBlock,const vector3<int> S,const vector3<> Gt,complex * xTilde)55 void fourierTranslate_kernel(int zBlock, const vector3<int> S, const vector3<> Gt, complex* xTilde)
56 { COMPUTE_halfGindices
57 fourierTranslate_calc(i, iG, S, Gt, xTilde);
58 }
fourierTranslate_gpu(const vector3<int> S,const vector3<> Gt,complex * xTilde)59 void fourierTranslate_gpu(const vector3<int> S, const vector3<> Gt, complex* xTilde)
60 { GpuLaunchConfigHalf3D glc(fourierTranslate_kernel, S);
61 for(int zBlock=0; zBlock<glc.zBlockMax; zBlock++)
62 fourierTranslate_kernel<<<glc.nBlocks,glc.nPerBlock>>>(zBlock, S, Gt, xTilde);
63 }
64