1 /*-------------------------------------------------------------------
2 Copyright 2011 Ravishankar Sundararaman
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx.  If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #include <core/LoopMacros.h>
21 #include <core/GpuKernelUtils.h>
22 #include <fluid/TranslationOperator_internal.h>
23 
24 __global__
constantSplineTaxpy_kernel(int zBlock,const vector3<int> S,double alpha,const double * x,double * y,const vector3<int> Tint)25 void constantSplineTaxpy_kernel(int zBlock, const vector3<int> S,
26 	double alpha, const double* x, double* y, const vector3<int> Tint)
27 {	COMPUTE_rIndices
28 	constantSplineTaxpy_calc(i, iv, S, alpha, x, y, Tint);
29 }
constantSplineTaxpy_gpu(const vector3<int> S,double alpha,const double * x,double * y,const vector3<int> Tint)30 void constantSplineTaxpy_gpu(const vector3<int> S,
31 	double alpha, const double* x, double* y, const vector3<int> Tint)
32 {	GpuLaunchConfig3D glc(constantSplineTaxpy_kernel, S);
33 	for(int zBlock=0; zBlock<glc.zBlockMax; zBlock++)
34 		constantSplineTaxpy_kernel<<<glc.nBlocks,glc.nPerBlock>>>(zBlock, S, alpha, x, y, Tint);
35 	gpuErrorCheck();
36 }
37 
38 __global__
linearSplineTaxpy_kernel(int zBlock,const vector3<int> S,double alpha,const double * x,double * y,const vector3<int> Tint,const vector3<> Tfrac)39 void linearSplineTaxpy_kernel(int zBlock, const vector3<int> S,
40 	double alpha, const double* x, double* y, const vector3<int> Tint, const vector3<> Tfrac)
41 {	COMPUTE_rIndices
42 	linearSplineTaxpy_calc(i, iv, S, alpha, x, y, Tint, Tfrac);
43 }
linearSplineTaxpy_gpu(const vector3<int> S,double alpha,const double * x,double * y,const vector3<int> Tint,const vector3<> Tfrac)44 void linearSplineTaxpy_gpu(const vector3<int> S,
45 	double alpha, const double* x, double* y, const vector3<int> Tint, const vector3<> Tfrac)
46 {	GpuLaunchConfig3D glc(linearSplineTaxpy_kernel, S);
47 	for(int zBlock=0; zBlock<glc.zBlockMax; zBlock++)
48 		linearSplineTaxpy_kernel<<<glc.nBlocks,glc.nPerBlock>>>(zBlock, S, alpha, x, y, Tint, Tfrac);
49 	gpuErrorCheck();
50 }
51 
52 
53 
54 __global__
fourierTranslate_kernel(int zBlock,const vector3<int> S,const vector3<> Gt,complex * xTilde)55 void fourierTranslate_kernel(int zBlock, const vector3<int> S, const vector3<> Gt, complex* xTilde)
56 {	COMPUTE_halfGindices
57 	fourierTranslate_calc(i, iG, S, Gt, xTilde);
58 }
fourierTranslate_gpu(const vector3<int> S,const vector3<> Gt,complex * xTilde)59 void fourierTranslate_gpu(const vector3<int> S, const vector3<> Gt, complex* xTilde)
60 {	GpuLaunchConfigHalf3D glc(fourierTranslate_kernel, S);
61 	for(int zBlock=0; zBlock<glc.zBlockMax; zBlock++)
62 		fourierTranslate_kernel<<<glc.nBlocks,glc.nPerBlock>>>(zBlock, S, Gt, xTilde);
63 }
64