1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 #pragma once
19 #if !defined( CLFFT_TESTCONSTANTS_H )
20 #define CLFFT_TESTCONSTANTS_H
21 
22 #include "clFFT.h"
23 #include <string>
24 #include <stdexcept>
25 
26 //Pre-callback function strings
27 #define PRE_MULVAL float2 mulval_pre(__global void* in, uint offset, __global void* userdata)\n \
28 				{ \n \
29 				float scalar = *((__global float*)userdata + offset); \n \
30 				float2 ret = *((__global float2*)in + offset) * scalar; \n \
31 				return ret; \n \
32 				}
33 
34 #define PRE_MULVAL_UDT typedef struct USER_DATA  \
35 					   {  \
36 						float scalar1;  \
37 						float scalar2;  \
38 						} USER_DATA; \n \
39 					float2 mulval_pre(__global void* in, uint offset, __global void* userdata)\n \
40 					{ \n \
41 					__global USER_DATA *data = ((__global USER_DATA *)userdata + offset); \n \
42 					float scalar = data->scalar1 * data->scalar2; \n \
43 					float2 ret = *((__global float2*)in + offset) * scalar; \n \
44 					return ret; \n \
45 					}
46 
47 #define PRE_MULVAL_DP double2 mulval_pre(__global void* in, uint offset, __global void* userdata)\n \
48 				{ \n \
49 				double scalar = *((__global double*)userdata + offset); \n \
50 				double2 ret = *((__global double2*)in + offset) * scalar; \n \
51 				return ret; \n \
52 				}
53 
54 #define PRE_MULVAL_PLANAR float2 mulval_pre(__global void* inRe, __global void* inIm, uint offset, __global void* userdata)\n \
55 				{ \n \
56 				float scalar = *((__global float*)userdata + offset); \n \
57 				float2 ret; \n \
58 				ret.x = *((__global float*)inRe + offset) * scalar; \n \
59 				ret.y = *((__global float*)inIm + offset) * scalar; \n \
60 				return ret; \n \
61 				}
62 
63 #define PRE_MULVAL_PLANAR_DP double2 mulval_pre(__global void* inRe, __global void* inIm, uint offset, __global void* userdata)\n \
64 				{ \n \
65 				double scalar = *((__global double*)userdata + offset); \n \
66 				double2 ret; \n \
67 				ret.x = *((__global double*)inRe + offset) * scalar; \n \
68 				ret.y = *((__global double*)inIm + offset) * scalar; \n \
69 				return ret; \n \
70 				}
71 
72 #define PRE_MULVAL_REAL float mulval_pre(__global void* in, uint offset, __global void* userdata)\n \
73 				{ \n \
74 				float scalar = *((__global float*)userdata + offset); \n \
75 				float ret = *((__global float*)in + offset) * scalar; \n \
76 				return ret; \n \
77 				}
78 
79 #define PRE_MULVAL_REAL_DP double mulval_pre(__global void* in, uint offset, __global void* userdata)\n \
80 				{ \n \
81 				double scalar = *((__global double*)userdata + offset); \n \
82 				double ret = *((__global double*)in + offset) * scalar; \n \
83 				return ret; \n \
84 				}
85 
86 //Precallback test for LDS - works when 1 WI works on one input element
87 #define PRE_MULVAL_LDS float2 mulval_pre(__global void* in, uint offset, __global void* userdata, __local void* localmem)\n \
88 				{ \n \
89 				uint lid = get_local_id(0); \n \
90 				__local float* lds = (__local float*)localmem + lid; \n \
91 				lds[0] = *((__global float*)userdata + offset); \n \
92 				barrier(CLK_LOCAL_MEM_FENCE); \n \
93 				float prev = offset <= 0 ? 0 : *(lds - 1); \n \
94 				float next = offset >= get_global_size(0) ? 0 : *(lds + 1); \n \
95 				float avg = (prev + *lds + next)/3.0f;\n \
96 				float2 ret = *((__global float2*)in + offset) * avg; \n \
97 				return ret; \n \
98 				}
99 
100 //Post-callback function strings
101 #define POST_MULVAL void mulval_post(__global void *output, uint outoffset, __global void *userdata, float2 fftoutput )\n \
102 				{ \n \
103 				float scalar = *((__global float*)userdata + outoffset); \n \
104 				*((__global float2*)output + outoffset) = fftoutput * scalar; \n \
105 				}
106 
107 #define POST_MULVAL_DP void mulval_post(__global void *output, uint outoffset, __global void *userdata, double2 fftoutput )\n \
108 				{ \n \
109 				double scalar = *((__global double*)userdata + outoffset); \n \
110 				*((__global double2*)output + outoffset) = fftoutput * scalar; \n \
111 				}
112 
113 #define POST_MULVAL_PLANAR void mulval_post(__global void *outputRe, __global void *outputIm, size_t outoffset, __global void *userdata, float fftoutputRe, float fftoutputIm )\n \
114 				{ \n \
115 				float scalar = *((__global float*)userdata + outoffset); \n \
116 				*((__global float*)outputRe + outoffset) = fftoutputRe * scalar; \n \
117 				*((__global float*)outputIm + outoffset) = fftoutputIm * scalar; \n \
118 				}
119 
120 #define POST_MULVAL_PLANAR_DP void mulval_post(__global void *outputRe, __global void *outputIm, size_t outoffset, __global void *userdata, double fftoutputRe, double fftoutputIm )\n \
121 				{ \n \
122 				double scalar = *((__global double*)userdata + outoffset); \n \
123 				*((__global double*)outputRe + outoffset) = fftoutputRe * scalar; \n \
124 				*((__global double*)outputIm + outoffset) = fftoutputIm * scalar; \n \
125 				}
126 
127 //Postcallback test for LDS - works when 1 WI works on one element.
128 //Assumes 1D FFT of length 64.
129 #define POST_MULVAL_LDS void mulval_post(__global void *output, uint outoffset, __global void *userdata, float2 fftoutput, __local void* localmem)\n \
130 				{ \n \
131 				uint lid = get_local_id(0); \n \
132 				__local float* lds; \n \
133 				if (outoffset < 16) \n \
134 				{ \n \
135 				lds  = (__local float*)localmem + lid*4; \n \
136 				lds[0] = *((__global float*)userdata + lid*4); \n \
137 				lds[1] = *((__global float*)userdata + lid*4 + 1); \n \
138 				lds[2] = *((__global float*)userdata + lid*4 + 2); \n \
139 				lds[3] = *((__global float*)userdata + lid*4 + 3); \n \
140 				} \n \
141 				barrier(CLK_LOCAL_MEM_FENCE); \n \
142 				lds  = (__local float*)localmem + outoffset; \n \
143 				float prev = outoffset <= 0 ? 0 : *(lds - 1); \n \
144 				float next = outoffset >= (get_global_size(0) - 1) ? 0 : *(lds + 1); \n \
145 				float avg = (prev + *lds + next)/3.0f; \n \
146 				*((__global float2*)output + outoffset) = fftoutput * avg; \n \
147 				}
148 
149 #define POST_MULVAL_REAL void mulval_post(__global void *output, uint outoffset, __global void *userdata, float fftoutput )\n \
150 				{ \n \
151 				float scalar = *((__global float*)userdata + outoffset); \n \
152 				*((__global float*)output + outoffset) = fftoutput * scalar; \n \
153 				}
154 
155 #define POST_MULVAL_REAL_DP void mulval_post(__global void *output, uint outoffset, __global void *userdata, double fftoutput )\n \
156 				{ \n \
157 				double scalar = *((__global double*)userdata + outoffset); \n \
158 				*((__global double*)output + outoffset) = fftoutput * scalar; \n \
159 				}
160 
161 typedef struct USER_DATA
162 				{
163 				float scalar1;
164 				float scalar2;
165 				} USER_DATA;
166 
167 #define CALLBCKSTR(...) #__VA_ARGS__
168 #define STRINGIFY(...) 	CALLBCKSTR(__VA_ARGS__)
169 
170 enum { REAL=0, IMAG=1 };
171 enum { dimx=0, dimy=1, dimz=2 };
172 enum fftw_dim { one_d=1, two_d=2, three_d=3 };
173 enum { one_interleaved_buffer=1, separate_real_and_imaginary_buffers=2 };
174 const bool use_explicit_intermediate_buffer = true;
175 const bool autogenerate_intermediate_buffer = false;
176 const bool pointwise_compare = true;
177 const bool root_mean_square = false;
178 extern bool comparison_type;
179 extern bool suppress_output;
180 
181 // this thing is horrible. horrible! i am not proud.
182 extern size_t super_duper_global_seed;
183 
184 const size_t small2 = 32;
185 const size_t normal2 = 1024;
186 const size_t large2 = 8192;
187 const size_t dlarge2 = 4096;
188 
189 const size_t small3 = 9;
190 const size_t normal3 = 729;
191 const size_t large3 = 6561;
192 const size_t dlarge3 = 2187;
193 
194 const size_t small5 = 25;
195 const size_t normal5 = 625;
196 const size_t large5 = 15625;
197 const size_t dlarge5 = 3125;
198 
199 const size_t small7 = 49;
200 const size_t normal7 = 343;
201 const size_t large7 = 16807;
202 const size_t dlarge7 = 2401;
203 
204 const size_t large_batch_size = 2048;
205 const size_t do_not_output_any_mismatches = 0;
206 const size_t default_number_of_mismatches_to_output = 10;
207 const size_t max_dimension = 3;
208 
209 const double magnitude_lower_limit = 1.0E-100;
210 
211 extern float tolerance;
212 extern double rmse_tolerance;
213 
214 extern cl_device_type g_device_type;
215 extern cl_int g_device_id;
216 extern cl_int g_platform_id;
217 
218 extern size_t number_of_random_tests;
219 extern time_t random_test_parameter_seed;
220 extern bool verbose;
221 
222 void handle_exception( const std::exception& except );
223 size_t max_mem_available_on_cl_device(size_t device_index);
224 
225 // Creating this template function and specializations to control the length inputs to the tests;
226 // these should be removed once the size restriction on transfrom lengths (SP 2^24 and DP 2^22)
227 // is removed; the dlarge* constants can then be removed
228 
229 template <typename T>
MaxLength2D(size_t rad)230 inline size_t MaxLength2D(size_t rad)
231 {
232 	return 0;
233 }
234 
235 template <>
236 inline size_t MaxLength2D<float>(size_t rad)
237 {
238 	switch(rad)
239 	{
240 	case 2:		return large2;
241 	case 3:		return large3;
242 	case 5:		return large5;
243 	case 7:		return large7;
244 	default:	return 0;
245 	}
246 }
247 
248 template <>
249 inline size_t MaxLength2D<double>(size_t rad)
250 {
251 	switch(rad)
252 	{
253 	case 2:		return dlarge2;
254 	case 3:		return dlarge3;
255 	case 5:		return dlarge5;
256 	case 7:		return dlarge7;
257 	default:	return 0;
258 	}
259 }
260 
261 
262 #endif
263