1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17
18 #pragma once
19 #if !defined( CLFFT_TESTCONSTANTS_H )
20 #define CLFFT_TESTCONSTANTS_H
21
22 #include "clFFT.h"
23 #include <string>
24 #include <stdexcept>
25
26 //Pre-callback function strings
27 #define PRE_MULVAL float2 mulval_pre(__global void* in, uint offset, __global void* userdata)\n \
28 { \n \
29 float scalar = *((__global float*)userdata + offset); \n \
30 float2 ret = *((__global float2*)in + offset) * scalar; \n \
31 return ret; \n \
32 }
33
34 #define PRE_MULVAL_UDT typedef struct USER_DATA \
35 { \
36 float scalar1; \
37 float scalar2; \
38 } USER_DATA; \n \
39 float2 mulval_pre(__global void* in, uint offset, __global void* userdata)\n \
40 { \n \
41 __global USER_DATA *data = ((__global USER_DATA *)userdata + offset); \n \
42 float scalar = data->scalar1 * data->scalar2; \n \
43 float2 ret = *((__global float2*)in + offset) * scalar; \n \
44 return ret; \n \
45 }
46
47 #define PRE_MULVAL_DP double2 mulval_pre(__global void* in, uint offset, __global void* userdata)\n \
48 { \n \
49 double scalar = *((__global double*)userdata + offset); \n \
50 double2 ret = *((__global double2*)in + offset) * scalar; \n \
51 return ret; \n \
52 }
53
54 #define PRE_MULVAL_PLANAR float2 mulval_pre(__global void* inRe, __global void* inIm, uint offset, __global void* userdata)\n \
55 { \n \
56 float scalar = *((__global float*)userdata + offset); \n \
57 float2 ret; \n \
58 ret.x = *((__global float*)inRe + offset) * scalar; \n \
59 ret.y = *((__global float*)inIm + offset) * scalar; \n \
60 return ret; \n \
61 }
62
63 #define PRE_MULVAL_PLANAR_DP double2 mulval_pre(__global void* inRe, __global void* inIm, uint offset, __global void* userdata)\n \
64 { \n \
65 double scalar = *((__global double*)userdata + offset); \n \
66 double2 ret; \n \
67 ret.x = *((__global double*)inRe + offset) * scalar; \n \
68 ret.y = *((__global double*)inIm + offset) * scalar; \n \
69 return ret; \n \
70 }
71
72 #define PRE_MULVAL_REAL float mulval_pre(__global void* in, uint offset, __global void* userdata)\n \
73 { \n \
74 float scalar = *((__global float*)userdata + offset); \n \
75 float ret = *((__global float*)in + offset) * scalar; \n \
76 return ret; \n \
77 }
78
79 #define PRE_MULVAL_REAL_DP double mulval_pre(__global void* in, uint offset, __global void* userdata)\n \
80 { \n \
81 double scalar = *((__global double*)userdata + offset); \n \
82 double ret = *((__global double*)in + offset) * scalar; \n \
83 return ret; \n \
84 }
85
86 //Precallback test for LDS - works when 1 WI works on one input element
87 #define PRE_MULVAL_LDS float2 mulval_pre(__global void* in, uint offset, __global void* userdata, __local void* localmem)\n \
88 { \n \
89 uint lid = get_local_id(0); \n \
90 __local float* lds = (__local float*)localmem + lid; \n \
91 lds[0] = *((__global float*)userdata + offset); \n \
92 barrier(CLK_LOCAL_MEM_FENCE); \n \
93 float prev = offset <= 0 ? 0 : *(lds - 1); \n \
94 float next = offset >= get_global_size(0) ? 0 : *(lds + 1); \n \
95 float avg = (prev + *lds + next)/3.0f;\n \
96 float2 ret = *((__global float2*)in + offset) * avg; \n \
97 return ret; \n \
98 }
99
100 //Post-callback function strings
101 #define POST_MULVAL void mulval_post(__global void *output, uint outoffset, __global void *userdata, float2 fftoutput )\n \
102 { \n \
103 float scalar = *((__global float*)userdata + outoffset); \n \
104 *((__global float2*)output + outoffset) = fftoutput * scalar; \n \
105 }
106
107 #define POST_MULVAL_DP void mulval_post(__global void *output, uint outoffset, __global void *userdata, double2 fftoutput )\n \
108 { \n \
109 double scalar = *((__global double*)userdata + outoffset); \n \
110 *((__global double2*)output + outoffset) = fftoutput * scalar; \n \
111 }
112
113 #define POST_MULVAL_PLANAR void mulval_post(__global void *outputRe, __global void *outputIm, size_t outoffset, __global void *userdata, float fftoutputRe, float fftoutputIm )\n \
114 { \n \
115 float scalar = *((__global float*)userdata + outoffset); \n \
116 *((__global float*)outputRe + outoffset) = fftoutputRe * scalar; \n \
117 *((__global float*)outputIm + outoffset) = fftoutputIm * scalar; \n \
118 }
119
120 #define POST_MULVAL_PLANAR_DP void mulval_post(__global void *outputRe, __global void *outputIm, size_t outoffset, __global void *userdata, double fftoutputRe, double fftoutputIm )\n \
121 { \n \
122 double scalar = *((__global double*)userdata + outoffset); \n \
123 *((__global double*)outputRe + outoffset) = fftoutputRe * scalar; \n \
124 *((__global double*)outputIm + outoffset) = fftoutputIm * scalar; \n \
125 }
126
127 //Postcallback test for LDS - works when 1 WI works on one element.
128 //Assumes 1D FFT of length 64.
129 #define POST_MULVAL_LDS void mulval_post(__global void *output, uint outoffset, __global void *userdata, float2 fftoutput, __local void* localmem)\n \
130 { \n \
131 uint lid = get_local_id(0); \n \
132 __local float* lds; \n \
133 if (outoffset < 16) \n \
134 { \n \
135 lds = (__local float*)localmem + lid*4; \n \
136 lds[0] = *((__global float*)userdata + lid*4); \n \
137 lds[1] = *((__global float*)userdata + lid*4 + 1); \n \
138 lds[2] = *((__global float*)userdata + lid*4 + 2); \n \
139 lds[3] = *((__global float*)userdata + lid*4 + 3); \n \
140 } \n \
141 barrier(CLK_LOCAL_MEM_FENCE); \n \
142 lds = (__local float*)localmem + outoffset; \n \
143 float prev = outoffset <= 0 ? 0 : *(lds - 1); \n \
144 float next = outoffset >= (get_global_size(0) - 1) ? 0 : *(lds + 1); \n \
145 float avg = (prev + *lds + next)/3.0f; \n \
146 *((__global float2*)output + outoffset) = fftoutput * avg; \n \
147 }
148
149 #define POST_MULVAL_REAL void mulval_post(__global void *output, uint outoffset, __global void *userdata, float fftoutput )\n \
150 { \n \
151 float scalar = *((__global float*)userdata + outoffset); \n \
152 *((__global float*)output + outoffset) = fftoutput * scalar; \n \
153 }
154
155 #define POST_MULVAL_REAL_DP void mulval_post(__global void *output, uint outoffset, __global void *userdata, double fftoutput )\n \
156 { \n \
157 double scalar = *((__global double*)userdata + outoffset); \n \
158 *((__global double*)output + outoffset) = fftoutput * scalar; \n \
159 }
160
161 typedef struct USER_DATA
162 {
163 float scalar1;
164 float scalar2;
165 } USER_DATA;
166
167 #define CALLBCKSTR(...) #__VA_ARGS__
168 #define STRINGIFY(...) CALLBCKSTR(__VA_ARGS__)
169
170 enum { REAL=0, IMAG=1 };
171 enum { dimx=0, dimy=1, dimz=2 };
172 enum fftw_dim { one_d=1, two_d=2, three_d=3 };
173 enum { one_interleaved_buffer=1, separate_real_and_imaginary_buffers=2 };
174 const bool use_explicit_intermediate_buffer = true;
175 const bool autogenerate_intermediate_buffer = false;
176 const bool pointwise_compare = true;
177 const bool root_mean_square = false;
178 extern bool comparison_type;
179 extern bool suppress_output;
180
181 // this thing is horrible. horrible! i am not proud.
182 extern size_t super_duper_global_seed;
183
184 const size_t small2 = 32;
185 const size_t normal2 = 1024;
186 const size_t large2 = 8192;
187 const size_t dlarge2 = 4096;
188
189 const size_t small3 = 9;
190 const size_t normal3 = 729;
191 const size_t large3 = 6561;
192 const size_t dlarge3 = 2187;
193
194 const size_t small5 = 25;
195 const size_t normal5 = 625;
196 const size_t large5 = 15625;
197 const size_t dlarge5 = 3125;
198
199 const size_t small7 = 49;
200 const size_t normal7 = 343;
201 const size_t large7 = 16807;
202 const size_t dlarge7 = 2401;
203
204 const size_t large_batch_size = 2048;
205 const size_t do_not_output_any_mismatches = 0;
206 const size_t default_number_of_mismatches_to_output = 10;
207 const size_t max_dimension = 3;
208
209 const double magnitude_lower_limit = 1.0E-100;
210
211 extern float tolerance;
212 extern double rmse_tolerance;
213
214 extern cl_device_type g_device_type;
215 extern cl_int g_device_id;
216 extern cl_int g_platform_id;
217
218 extern size_t number_of_random_tests;
219 extern time_t random_test_parameter_seed;
220 extern bool verbose;
221
222 void handle_exception( const std::exception& except );
223 size_t max_mem_available_on_cl_device(size_t device_index);
224
225 // Creating this template function and specializations to control the length inputs to the tests;
226 // these should be removed once the size restriction on transfrom lengths (SP 2^24 and DP 2^22)
227 // is removed; the dlarge* constants can then be removed
228
229 template <typename T>
MaxLength2D(size_t rad)230 inline size_t MaxLength2D(size_t rad)
231 {
232 return 0;
233 }
234
235 template <>
236 inline size_t MaxLength2D<float>(size_t rad)
237 {
238 switch(rad)
239 {
240 case 2: return large2;
241 case 3: return large3;
242 case 5: return large5;
243 case 7: return large7;
244 default: return 0;
245 }
246 }
247
248 template <>
249 inline size_t MaxLength2D<double>(size_t rad)
250 {
251 switch(rad)
252 {
253 case 2: return dlarge2;
254 case 3: return dlarge3;
255 case 5: return dlarge5;
256 case 7: return dlarge7;
257 default: return 0;
258 }
259 }
260
261
262 #endif
263