1 /*-------------------------------------------------------------------
2 Copyright 2011 Ravishankar Sundararaman
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx.  If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #ifndef JDFTX_CORE_LOOPMACROS_H
21 #define JDFTX_CORE_LOOPMACROS_H
22 
23 //! @addtogroup Utilities
24 //! @{
25 
26 //! @file LoopMacros.h
27 
28 
29 //! One thread of a loop over real space (see applyFunc_r_sub for example)
30 #define THREAD_rLoop(code) \
31 	size_t i=iStart; \
32 	vector3<int> iv( \
33 		i / (S[2]*S[1]), \
34 		(i/S[2]) % S[1], \
35 		i % S[2] ); \
36 	while(i<iStop) \
37 	{	\
38 		code \
39 		\
40 		i++; if(i==iStop) break; \
41 		iv[2]++; \
42 		if(iv[2]==S[2]) \
43 		{	iv[2]=0; \
44 			iv[1]++; \
45 			if(iv[1]==S[1]) \
46 			{	iv[1] = 0; \
47 				iv[0]++; \
48 			} \
49 		} \
50 	}
51 
52 //! The GPU equivalent of THREAD_rLoop
53 #define COMPUTE_rIndices \
54 	vector3<int> iv( \
55 		threadIdx.z + zBlock * blockDim.z, \
56 		kernelIndex(y), \
57 		kernelIndex(x) ); \
58 	if(iv[0]>=S[0] || iv[1]>=S[1] || iv[2]>=S[2]) return; \
59 	size_t i = iv[2] + S[2]*size_t(iv[1] + S[1]*iv[0]);
60 
61 
62 //! One thread of a loop over G-space (see L_sub in PHLO.cpp for example)
63 #define THREAD_fullGspaceLoop(code) \
64 	size_t i=iStart; \
65 	vector3<int> iG( i / (S[2]*S[1]), (i/S[2]) % S[1], i % S[2] ); \
66 	for(int j=0; j<3; j++) if(2*iG[j]>S[j]) iG[j]-=S[j]; \
67 	while(true) \
68 	{	\
69 		code \
70 		\
71 		i++; if(i==iStop) break; \
72 		iG[2]++; \
73 		if(2*iG[2]>S[2]) iG[2]-=S[2]; \
74 		if(iG[2]==0) \
75 		{	iG[1]++; \
76 			if(2*iG[1]>S[1]) iG[1]-=S[1]; \
77 			if(iG[1]==0) \
78 			{	iG[0]++; \
79 				if(2*iG[0]>S[0]) iG[0]-=S[0]; \
80 			} \
81 		} \
82 	}
83 
84 //! The GPU equivalent of THREAD_fullGspaceLoop
85 //! NOTE: x and z are swapped in kernel indices since x is contiguous on GPU
86 #define COMPUTE_fullGindices \
87 	vector3<int> iG( \
88 		zBlock * blockDim.z + threadIdx.z, \
89 		kernelIndex(y), \
90 		kernelIndex(x) ); \
91 	if(iG[0]>=S[0] || iG[1]>=S[1] || iG[2]>=S[2]) return; \
92 	size_t i = iG[2] + S[2]*size_t(iG[1] + S[1]*iG[0]); \
93 	for(int j=0; j<3; j++) if(2*iG[j]>S[j]) iG[j]-=S[j];
94 
95 
96 //! One thread of a loop over symmetry-reduced G-space (see applyFuncGsq_sub in Operators.h for example)
97 #define THREAD_halfGspaceLoop(code) \
98 	int size2 = S[2]/2+1; \
99 	size_t i=iStart; \
100 	vector3<int> iG( i / (size2*S[1]), (i/size2) % S[1], i % size2 ); \
101 	for(int j=0; j<3; j++) if(2*iG[j]>S[j]) iG[j]-=S[j]; \
102 	while(i<iStop) \
103 	{	\
104 		code \
105 		\
106 		i++; if(i==iStop) break; \
107 		iG[2]++; \
108 		if(iG[2]==size2) \
109 		{	iG[2]=0; \
110 			iG[1]++; \
111 			if(2*iG[1]>S[1]) iG[1]-=S[1]; \
112 			if(iG[1]==0) \
113 			{	iG[0]++; \
114 				if(2*iG[0]>S[0]) iG[0]-=S[0]; \
115 			} \
116 		} \
117 	}
118 
119 //! The GPU equivalent of THREAD_halfGspaceLoop
120 //! NOTE: x and z are swapped in kernel indices since x is contiguous on GPU
121 #define COMPUTE_halfGindices \
122 	int size2 = S[2]/2+1; \
123 	vector3<int> iG( \
124 		threadIdx.z + zBlock * blockDim.z, \
125 		kernelIndex(y), \
126 		kernelIndex(x) ); \
127 	if(iG[0]>=S[0] || iG[1]>=S[1] || iG[2]>=size2) return; \
128 	size_t i = iG[2] + size2*size_t(iG[1] + S[1]*iG[0]); \
129 	for(int j=0; j<3; j++) if(2*iG[j]>S[j]) iG[j]-=S[j];
130 
131 //! Determine if this is a nyquist component
132 //! NOTE: no component is nyquist for odd sizes in this convention
133 #define IS_NYQUIST ( (!(2*iG[0]-S[0])) | (!(2*iG[1]-S[1])) | (!(2*iG[2]-S[2])) )
134 
135 //! @}
136 #endif // JDFTX_CORE_LOOPMACROS_H
137