1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17
18 #pragma once
19 #if !defined( CLFFT_BUFFER_H )
20 #define CLFFT_BUFFER_H
21
22 #include <cmath>
23 #include <complex>
24 #include <stdexcept>
25 #include <memory>
26 #include <vector>
27 #include <utility>
28 #include <sstream>
29 #include "../include/clFFT.h"
30 #include "test_constants.h"
31 #include <boost/random.hpp>
32 #include <stdint.h>
33 #include "buffer_memory.h"
34
35 /*****************************************************/
36 /*****************************************************/
37 template< typename T >
floats_are_about_equal(T a,T b)38 bool floats_are_about_equal( T a, T b) {
39 // explicit check to see if a and b are both zero-ish . . .
40 if( fabs(a) < 0.00001f && fabs(b) < 0.00001f) return true;
41 // . . . and if not, we'll see if they're the same-ish
42 return ( fabs(a-b) > fabs(a*tolerance) ) ? false : true;
43 }
44
45 /*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
46 /*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
47 struct index_t {
48 size_t x, y, z, batch;
49
index_tindex_t50 index_t( size_t inx, size_t iny, size_t inz, size_t inbatch )
51 : x(inx)
52 , y(iny)
53 , z(inz)
54 , batch(inbatch)
55 {}
56 };
57
58 namespace layout
59 {
60 // buffer_layout_t will be used to let class buffer know how many instances of buffer_memory to make and their sizes
61 enum buffer_layout_t
62 {
63 real,
64 complex_interleaved,
65 complex_planar,
66 hermitian_interleaved,
67 hermitian_planar
68 };
69 }
70
71 /*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
72 /*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
73 template <class T>
74 class buffer {
75 private:
76 // we need to save the requested length x, because
77 // if we change the buffer from complex to real,
78 // (as in a round-trip test) we need to be able to
79 // get back to the original length of x. in the case
80 // of an odd transform length, that's not possible
81 // due to round-off error unless we explicitly save it
82 size_t _requested_length_x;
83 size_t _number_of_dimensions;
84 size_t _batch_size;
85 size_t _distance;
86 layout::buffer_layout_t _layout;
87 clfftResultLocation _placeness;
88
89 std::vector< size_t > _lengths;
90 std::vector< size_t > _strides;
91 bool _tightly_packed_strides;
92 bool _tightly_packed_distance;
93
94 static const size_t tightly_packed = 0;
95
96 // if real or planar:
97 // _the_buffers[re] will hold the real portion
98 // _the_buffers[im] will hold the imaginary portion (planar only)
99 // if interleaved:
100 // _the_buffers[interleaved] will hold the whole banana
101 std::vector< buffer_memory< T > > _the_buffers;
102
103 enum
104 {
105 interleaved = 0,
106 re = 0, // real
107 im = 1 // imaginary
108 };
109
110 public:
111 /*****************************************************/
buffer(const size_t dimensions_in,const size_t * lengths_in,const size_t * strides_in,const size_t batch_size_in,const size_t distance_in,const layout::buffer_layout_t layout_in,const clfftResultLocation placeness_in)112 buffer( const size_t dimensions_in,
113 const size_t* lengths_in,
114 const size_t* strides_in,
115 const size_t batch_size_in,
116 const size_t distance_in,
117 const layout::buffer_layout_t layout_in,
118 const clfftResultLocation placeness_in
119 )
120 : _number_of_dimensions( dimensions_in )
121 , _batch_size( batch_size_in )
122 , _distance( distance_in )
123 , _layout( layout_in )
124 , _placeness( placeness_in )
125 , _lengths()
126 , _strides()
127 , _the_buffers()
128 {
129 initialize_lengths(lengths_in);
130 initialize_strides(strides_in);
131 initialize_distance(distance_in);
132 create_buffer_memory();
133 clear();
134 }
135
136 /*****************************************************/
~buffer()137 ~buffer()
138 {}
139
140 /*****************************************************/
141 // this assignment operator only copies _data_.
142 // it does not change the rest of the buffer information
143 // and in fact, it requires that the buffer sizes be the same going in
144 buffer<T> & operator=( buffer<T> & that )
145 {
146 if( this->is_real() != that.is_real() ||
147 this->is_hermitian() != that.is_hermitian() ||
148 this->is_complex() != that.is_complex() )
149 {
150 throw std::runtime_error( "Buffers must be the same layout type for assignment operator" );
151 }
152
153 if( this->_number_of_dimensions != that._number_of_dimensions ||
154 this->_batch_size != that._batch_size ||
155 this->_lengths != that._lengths )
156 {
157 throw std::runtime_error( "Buffers must be the same size for assignment operator" );
158 }
159
160 if( this->is_real() )
161 {
162 for( size_t batch = 0; batch < batch_size(); batch++ ) {
163 for( size_t z = 0; z < length(dimz); z++ ) {
164 for( size_t y = 0; y < length(dimy); y++ ) {
165 for( size_t x = 0; x < length(dimx); x++ ) {
166 this->set_one_data_point( that.real(x,y,z,batch), x, y, z, batch );
167 }
168 }
169 }
170 }
171 }
172 else
173 {
174 for( size_t batch = 0; batch < batch_size(); batch++ ) {
175 for( size_t z = 0; z < length(dimz); z++ ) {
176 for( size_t y = 0; y < length(dimy); y++ ) {
177 for( size_t x = 0; x < length(dimx); x++ ) {
178 this->set_one_data_point( that.real(x,y,z,batch), that.imag(x,y,z,batch), x, y, z, batch );
179 }
180 }
181 }
182 }
183 }
184
185 return *this;
186 }
187
188 private:
189 /*****************************************************/
preinitialize_lengths_to_1_1_1()190 void preinitialize_lengths_to_1_1_1()
191 {
192 _lengths.clear();
193
194 for( int i = 0; i < max_dimension; ++i ) {
195 _lengths.push_back(1);
196 }
197 }
198
199 /*****************************************************/
initialize_lengths(const size_t * lengths_in)200 void initialize_lengths(const size_t* lengths_in)
201 {
202 preinitialize_lengths_to_1_1_1();
203
204 for( size_t i = 0; i < _number_of_dimensions; ++i )
205 {
206 _lengths[i] = lengths_in[i];
207 }
208
209 _requested_length_x = _lengths[dimx];
210 adjust_length_x_for_hermitian_buffers();
211 }
212
213 /*****************************************************/
adjust_length_x_for_hermitian_buffers()214 void adjust_length_x_for_hermitian_buffers()
215 {
216 // complex-to-complex transforms do not require any change
217 // to the number of points in the buffer
218
219 // real buffers also never require a change to the number of
220 // points in the buffer
221
222 // a hermitian buffer with a length of "X" will actually
223 // have X/2 + 1 points (the other half-ish are conjugates
224 // and do not need to be stored). lenY and lenZ are never
225 // modified
226 if( is_hermitian() )
227 {
228 _lengths[dimx] = _lengths[dimx] / 2 + 1;
229 }
230 }
231
232 /*****************************************************/
preinitialize_strides_to_1_1_1()233 void preinitialize_strides_to_1_1_1()
234 {
235 _strides.clear();
236
237 for( int i = 0; i < max_dimension; ++i ) {
238 _strides.push_back(1);
239 }
240 }
241
242 /*****************************************************/
initialize_strides(const size_t * strides_in)243 void initialize_strides(const size_t* strides_in)
244 {
245 preinitialize_strides_to_1_1_1();
246
247 // we need to calculate the strides if tightly packed
248 if( strides_in == nullptr ) {
249 _strides[dimx] = 1;
250 for( size_t i = 1; i < _number_of_dimensions; ++i )
251 {
252 _strides[i] = _strides[i-1]*_lengths[i-1];
253 }
254
255 _tightly_packed_strides = true;
256 }
257 // we do not need to calculate anything if the user specifies strides
258 // we just copy the input strides into place
259 else
260 {
261 for( size_t i = 0; i < _number_of_dimensions; ++i )
262 {
263 _strides[i] = strides_in[i];
264 }
265
266 _tightly_packed_strides = false;
267 }
268 }
269
270 /*****************************************************/
initialize_distance(const size_t distance_in)271 void initialize_distance(const size_t distance_in)
272 {
273 if( distance_in == tightly_packed )
274 {
275 // calculate distance if not passed in
276 _distance = _lengths[_number_of_dimensions-1] * _strides[_number_of_dimensions-1];
277
278 _tightly_packed_distance = true;
279 }
280 else
281 {
282 // or copy it if passed in
283 _distance = distance_in;
284
285 _tightly_packed_distance = false;
286 }
287 }
288
289 /*****************************************************/
create_buffer_memory()290 void create_buffer_memory()
291 {
292 if( is_real() )
293 {
294 // just one real buffer
295 _the_buffers.push_back( buffer_memory< T >( total_number_of_points_including_data_and_intervening() ) );
296
297 increase_memory_allocation_for_real_in_place_buffers();
298 }
299 else if( is_planar() )
300 {
301 // one real buffer
302 _the_buffers.push_back( buffer_memory< T >( total_number_of_points_including_data_and_intervening() ) );
303 // and one imaginary buffer
304 _the_buffers.push_back( buffer_memory< T >( total_number_of_points_including_data_and_intervening() ) );
305 }
306 else if( is_interleaved() )
307 {
308 // one double-wide interleaved buffer
309 _the_buffers.push_back( buffer_memory< T >( 2 * total_number_of_points_including_data_and_intervening() ) );
310 }
311 }
312
313 /*****************************************************/
amount_of_extra_padding_per_x()314 size_t amount_of_extra_padding_per_x()
315 {
316 if( length(dimx) % 2 == 0 ) // even lengths of x add 2 per row
317 return 2;
318 else // odd lengths of x add 1 per row
319 return 1;
320 }
321
322 /*****************************************************/
adjust_strides_and_distance_for_in_place_real_buffer()323 void adjust_strides_and_distance_for_in_place_real_buffer()
324 {
325 if( is_real() )
326 {
327 if( is_in_place() )
328 {
329 size_t amount_to_add_for_this_dimension = stride(dimx) * amount_of_extra_padding_per_x();
330
331 // strides first
332 if( number_of_dimensions() >= 2 )
333 {
334 _strides[dimy] += amount_to_add_for_this_dimension;
335 }
336
337 if( number_of_dimensions() == 3 )
338 {
339 amount_to_add_for_this_dimension *= length(dimy);
340 _strides[dimz] += amount_to_add_for_this_dimension;
341 }
342
343 // distance next
344 if( number_of_dimensions() == 1 )
345 {
346 _distance += amount_to_add_for_this_dimension;
347 }
348 else if( number_of_dimensions() == 2 )
349 {
350 _distance += ( amount_to_add_for_this_dimension * length(dimy) );
351 }
352 else if( number_of_dimensions() == 3 )
353 {
354 _distance += ( amount_to_add_for_this_dimension * length(dimz) );
355 }
356 else throw std::runtime_error( "invalid dimensions in adjust_strides_and_distance_for_in_place_real_buffer()" );
357 }
358 else throw std::runtime_error( "this buffer is out of place and shouldn't be adjusting strides" );
359 }
360 else throw std::runtime_error( "this buffer is unreal and shouldn't be adjusting strides" );
361 }
362
363 /*****************************************************/
increase_memory_allocation_for_real_in_place_buffers()364 void increase_memory_allocation_for_real_in_place_buffers()
365 {
366 // when performing an in-place, real-to-hermitian transform,
367 // we want a little extra space to account for the larger size
368 // of the hermitian output.
369
370 // each row in the X dimension should have enough space for 2 extra reals
371 // (to account for the one extra complex number that will be put
372 // into the buffer after the transform)
373
374 // we don't want to change the length, because the number of points
375 // in the transform isn't changing. we only want to change the
376 // amount of memory reserved
377 if( is_real() )
378 {
379 if( is_in_place() )
380 {
381 if( _tightly_packed_strides && _tightly_packed_distance )
382 {
383 // request extra memory
384 _the_buffers[re].increase_allocated_memory( amount_of_extra_padding_per_x() * stride(dimx) * length(dimy) * length(dimz) * batch_size() );
385
386 // adjust strides/distances so that the padding is at the end of each row in the Xth dimension
387 adjust_strides_and_distance_for_in_place_real_buffer();
388 }
389 }
390 }
391 }
392
393 /*****************************************************/
394 size_t index( const size_t x, const size_t y=0, const size_t z=0, const size_t batch=0)
395 {
396 size_t interleaved_offset = 1;
397
398 // if this buffer is interleaved, the index should actually be double what it appears.
399 // interleaved_offset will accomplish this magical doubling.
400 if( is_interleaved() )
401 interleaved_offset = 2;
402
403 size_t the_index = ( stride(dimx) * x + stride(dimy) * y + stride(dimz) * z + distance() * batch ) * interleaved_offset;
404
405 return the_index;
406 }
407
408 /*****************************************************/
409 size_t next_index( const size_t x, const size_t y=0, const size_t z=0, const size_t batch=0)
410 {
411 if( x+1 < length(dimx))
412 return index( x+1, y, z, batch );
413 else if( y+1 < length(dimy) )
414 return index( 0, y+1, z, batch );
415 else if( z+1 < length(dimz) )
416 return index( 0, 0, z+1, batch );
417 else if( batch+1 < batch_size() )
418 return index( 0, 0, 0, batch+1 );
419 else
420 // we are at the last point
421 // return the location immediately after the last point
422 return index( 0, 0, 0, batch+1 );
423 }
424
425 /*****************************************************/
points_are_about_equal(buffer<T> & other_buffer,size_t x,size_t y,size_t z,size_t batch)426 bool points_are_about_equal( buffer<T> & other_buffer, size_t x, size_t y, size_t z, size_t batch )
427 {
428 if( is_real() )
429 return floats_are_about_equal<T>( real(x, y, z, batch), other_buffer.real(x, y, z, batch) );
430 else if( is_complex() || is_hermitian() )
431 return ( floats_are_about_equal<T>( real(x, y, z, batch), other_buffer.real(x, y, z, batch) ) &&
432 floats_are_about_equal<T>( imag(x, y, z, batch), other_buffer.imag(x, y, z, batch) ) );
433 else
434 throw std::runtime_error( "invalid layout in points_are_about_equal()" );
435 }
436
437 /*****************************************************/
buffer_mismatches(buffer<T> & other_buffer,bool compare_method)438 size_t buffer_mismatches( buffer<T> & other_buffer, bool compare_method)
439 {
440 std::vector< index_t > mismatched_point_indices;
441
442 if (compare_method == pointwise_compare)
443 {
444 for( size_t batch = 0; batch < batch_size(); batch++ )
445 for( size_t z = 0; z < length(dimz); z++ )
446 for( size_t y = 0; y < length(dimy); y++ )
447 for( size_t x = 0; x < length(dimx); x++ )
448 if( !points_are_about_equal( other_buffer, x, y, z, batch ) )
449 {
450 mismatched_point_indices.push_back( index_t(x, y, z, batch));
451 }
452
453 const size_t max_mismatches_output = default_number_of_mismatches_to_output;
454
455 if( mismatched_point_indices.size() != 0 && max_mismatches_output != 0 && suppress_output == false) {
456 std::cout << std::endl << std::dec << mismatched_point_indices.size() << " of " << batch_size() * number_of_data_points_single_batch()
457 <<" data points did not match. The first " << max_mismatches_output << " (max) mismatching points follow:" << std::endl;
458
459 std::cout << std::endl << "(array index)(index) ";
460 std::cout << "[test value (dec)] / [expected value (dec)]";
461 std::cout << std::endl;
462 for( size_t i = 0; i < max_mismatches_output && i < mismatched_point_indices.size(); i++ )
463 {
464 index_t mismatch = mismatched_point_indices[i];
465
466 std::cout
467 << std::dec << "(" << mismatched_point_indices.at(i).batch << ")"
468 << std::dec << "(" << mismatched_point_indices.at(i).x << "," << mismatched_point_indices.at(i).y << "," << mismatched_point_indices.at(i).z << ") ";
469 std::cout
470 << real( mismatch.x, mismatch.y, mismatch.z, mismatch.batch );
471
472 if( is_complex() || is_hermitian() )
473 {
474 std::cout << "+i*" << imag( mismatch.x, mismatch.y, mismatch.z, mismatch.batch );
475 }
476 std::cout
477 << " / " << other_buffer.real( mismatch.x, mismatch.y, mismatch.z, mismatch.batch );
478
479 if( is_complex() || is_hermitian() )
480 {
481 std::cout << "+i*" << other_buffer.imag( mismatch.x, mismatch.y, mismatch.z, mismatch.batch );
482 }
483 std::cout << std::endl;
484 }
485 std::cout << std::endl;
486 }
487 return mismatched_point_indices.size();
488 }
489 else
490 {
491 //RMS accuracy judgement
492
493 size_t problem_size_per_transform = length(dimx) * length(dimy) * length(dimz);
494 double rmse_tolerance_this = rmse_tolerance * sqrt((double)problem_size_per_transform / 4096.0);
495
496 for (size_t batch = 0; batch < batch_size(); batch++) {
497
498 double maxMag = 0.0, maxMagInv = 1.0;
499
500 // Compute RMS error relative to maximum magnitude
501 double rms = 0;
502
503 for (size_t z = 0; z < length(dimz); z++) {
504 for (size_t y = 0; y < length(dimy); y++) {
505 for (size_t x = 0; x < length(dimx); x++) {
506 double ex_r, ex_i, ac_r, ac_i;
507 double mag;
508
509 ex_r = other_buffer.real(x, y, z, batch);
510 ac_r = real(x, y, z, batch);
511
512 if (other_buffer.is_complex() || other_buffer.is_hermitian())
513 ex_i = other_buffer.imag(x, y, z, batch);
514 else
515 ex_i = 0;
516
517 if (other_buffer.is_complex() || other_buffer.is_hermitian())
518 ac_i = imag(x, y, z, batch);
519 else
520 ac_i = 0;
521
522 // find maximum magnitude
523 mag = ex_r*ex_r + ex_i*ex_i;
524 maxMag = (mag > maxMag) ? mag : maxMag;
525
526 // compute square error
527 rms += ((ex_r - ac_r)*(ex_r - ac_r) + (ex_i - ac_i)*(ex_i - ac_i));
528 }
529 }
530 }
531
532 if (maxMag > magnitude_lower_limit)
533 {
534 maxMagInv = 1.0 / maxMag;
535 }
536
537 rms = sqrt(rms*maxMagInv);
538
539 if (fabs(rms) > rmse_tolerance_this)
540 {
541 if (suppress_output == false)
542 std::cout << std::endl << "RMSE accuracy judgement failure -- RMSE = " << std::dec << rms <<
543 ", maximum allowed RMSE = " << std::dec << rmse_tolerance_this << std::endl;
544 return 1;
545 }
546 }
547
548 return 0;
549 }
550 }
551
552 public:
553 /*****************************************************/
554 bool operator==( buffer<T> & other_buffer )
555 {
556 // complexity of each dimension must be the same
557 if( ( is_real() && !other_buffer.is_real() ) || ( !is_real() && other_buffer.is_real() ) ||
558 ( is_hermitian() && !other_buffer.is_hermitian() ) || ( !is_hermitian() && other_buffer.is_hermitian() ) ||
559 ( is_complex() && !other_buffer.is_complex() ) || ( !is_complex() && other_buffer.is_complex() ) )
560 {
561 return false;
562 }
563
564 // batch_size of the data must be the same
565 if( batch_size() != other_buffer.batch_size() )
566 {
567 return false;
568 }
569
570 // dimensionality of the data must be the same
571 if( number_of_dimensions() != other_buffer.number_of_dimensions() )
572 {
573 return false;
574 }
575
576 // size of each dimension must be the same
577 for( size_t i = 0; i < number_of_dimensions(); ++i )
578 {
579 if( length(i) != other_buffer.length(i)) return false;
580 }
581
582 size_t number_deaths = 0;
583 number_deaths += buffer_mismatches( other_buffer, comparison_type);
584
585 if( number_deaths == 0 ) return true;
586 else return false;
587 }
588
589 /*****************************************************/
590 bool operator!=( buffer<T> & other_buffer )
591 {
592 return !( *this == other_buffer );
593 }
594
595 void operator*=( buffer<T> & other_buffer )
596 {
597 size_t the_index;
598 T* base_ptr;
599 T* real_ptr;
600 T* imag_ptr;
601
602 if( is_interleaved() )
603 {
604 base_ptr = _the_buffers[interleaved].ptr();
605 }
606 else if ( is_planar() )
607 {
608 real_ptr = _the_buffers[re].ptr();
609 imag_ptr = _the_buffers[im].ptr();
610 }
611 else if ( is_real() )
612 {
613 base_ptr = _the_buffers[re].ptr();
614 }
615
616 for( size_t batch = 0; batch < batch_size(); batch++ )
617 for( size_t z = 0; z < length(dimz); z++ )
618 for( size_t y = 0; y < length(dimy); y++ )
619 for( size_t x = 0; x < length(dimx); x++ )
620 {
621 the_index = index(x, y, z, batch);
622 if( is_interleaved() )
623 {
624 *( base_ptr + the_index ) *= other_buffer.real(x, y, z, batch);
625
626 the_index = the_index + 1; // the imaginary component immediately follows the real
627 if (other_buffer.is_real())
628 {
629 *( base_ptr + the_index ) *= other_buffer.real(x, y, z, batch);
630 }
631 else
632 {
633 *( base_ptr + the_index ) *= other_buffer.imag(x, y, z, batch);
634 }
635 }
636 else if ( is_planar() )
637 {
638 *( real_ptr + the_index ) *= other_buffer.real(x, y, z, batch);
639
640 if (other_buffer.is_real())
641 {
642 *( imag_ptr + the_index ) *= other_buffer.real(x, y, z, batch);
643 }
644 else
645 {
646 *( imag_ptr + the_index ) *= other_buffer.imag(x, y, z, batch);
647 }
648 }
649 else if ( is_real() )
650 {
651 *( base_ptr + the_index ) *= other_buffer.real(x, y, z, batch);
652 }
653 }
654 }
655
656 //Calculates a 3 point average of other_buffer and
657 //multiplies with buffer
658 //only real layout is supported for other_buffer currently
multiply_3pt_average(buffer<T> & other_buffer)659 void multiply_3pt_average( buffer<T> & other_buffer )
660 {
661 if (!other_buffer.is_real())
662 {
663 throw std::runtime_error( "only real layout is supported currently for other_buffer" );
664 }
665
666 size_t the_index, o_the_index;
667 T *base_ptr, *o_base_ptr;
668 T *real_ptr;
669 T *imag_ptr;
670 T o_prev_val, o_next_val;
671 T average;
672
673 if( is_interleaved() )
674 {
675 base_ptr = _the_buffers[interleaved].ptr();
676 }
677 else if ( is_planar() )
678 {
679 real_ptr = _the_buffers[re].ptr();
680 imag_ptr = _the_buffers[im].ptr();
681 }
682 else if ( is_real() )
683 {
684 base_ptr = _the_buffers[re].ptr();
685 }
686 o_base_ptr = other_buffer.real_ptr();
687
688 for( size_t batch = 0; batch < batch_size(); batch++ )
689 for( size_t z = 0; z < length(dimz); z++ )
690 for( size_t y = 0; y < length(dimy); y++ )
691 for( size_t x = 0; x < length(dimx); x++ )
692 {
693 the_index = index(x, y, z, batch);
694 o_the_index = other_buffer.index(x, y, z, batch);
695 o_prev_val = o_the_index <= 0 ? 0 : *(o_base_ptr + o_the_index - 1);
696 o_next_val = o_the_index >= (other_buffer.total_number_of_points_including_data_and_intervening() - 1) ? 0 : *(o_base_ptr + o_the_index + 1);
697
698 average = (o_prev_val + *(o_base_ptr + o_the_index) + o_next_val)/ 3.0f ;
699
700 if( is_interleaved() )
701 {
702 *( base_ptr + the_index ) *= average;
703
704 the_index = the_index + 1; // the imaginary component immediately follows the real
705 *( base_ptr + the_index ) *= average;
706 }
707 else if ( is_planar() )
708 {
709 *( real_ptr + the_index ) *= average;
710
711 *( imag_ptr + the_index ) *= average;
712 }
713 else if ( is_real() )
714 {
715 *( base_ptr + the_index ) *= average;
716 }
717 }
718 }
719
720 /*****************************************************/
721 // strides and distance are those of the output (that is, the new hermitian buffer)
change_real_to_hermitian(const size_t * strides_in,const size_t distance_in)722 void change_real_to_hermitian( const size_t* strides_in, const size_t distance_in )
723 {
724 if( !is_real() || !is_in_place() )
725 {
726 throw std::runtime_error( "can only change a real buffer used in an in-place transform to a hermitian one" );
727 }
728
729 // we currently only support hermitian interleaved for in-place transforms
730 _layout = layout::hermitian_interleaved;
731 adjust_length_x_for_hermitian_buffers();
732 initialize_strides(strides_in);
733 initialize_distance(distance_in);
734 }
735
736 /*****************************************************/
737 // strides and distance are those of the output (that is, the new real buffer)
change_hermitian_to_real(const size_t * strides_in,const size_t distance_in)738 void change_hermitian_to_real( const size_t* strides_in, const size_t distance_in )
739 {
740 // we currently only support hermitian interleaved for in-place transforms
741 if( _layout != layout::hermitian_interleaved || !is_in_place() )
742 {
743 throw std::runtime_error( "can only change a hermitian interleaved buffer used in an in-place transform to a real one" );
744 }
745
746 _layout = layout::real;
747 _lengths[dimx] = _requested_length_x;
748 initialize_strides(strides_in);
749 initialize_distance(distance_in);
750 }
751
752 /*****************************************************/
is_real()753 bool is_real()
754 {
755 return _layout == layout::real;
756 }
757
758 /*****************************************************/
is_complex()759 bool is_complex()
760 {
761 return _layout == layout::complex_interleaved || _layout == layout::complex_planar;
762 }
763
764 /*****************************************************/
is_hermitian()765 bool is_hermitian()
766 {
767 return _layout == layout::hermitian_interleaved || _layout == layout::hermitian_planar;
768 }
769
770 /*****************************************************/
is_planar()771 bool is_planar()
772 {
773 return _layout == layout::complex_planar || _layout == layout::hermitian_planar;
774 }
775
776 /*****************************************************/
is_interleaved()777 bool is_interleaved()
778 {
779 return _layout == layout::complex_interleaved || _layout == layout::hermitian_interleaved;
780 }
781
782 /*****************************************************/
is_in_place()783 bool is_in_place()
784 {
785 if( _placeness == CLFFT_INPLACE ) return true;
786 else if( _placeness == CLFFT_OUTOFPLACE) return false;
787 else throw std::runtime_error( "invalid placeness value in is_in_place()" );
788 }
789
790 /*****************************************************/
interleaved_ptr()791 T* interleaved_ptr()
792 {
793 if( is_interleaved() )
794 return _the_buffers[interleaved].ptr();
795 else
796 throw std::runtime_error( "interleaved_ptr() is only available on interleaved buffers" );
797 }
798
799 /*****************************************************/
real_ptr()800 T* real_ptr()
801 {
802 if( is_planar() || is_real() )
803 return _the_buffers[re].ptr();
804 else
805 throw std::runtime_error( "real() is only available on real and planar buffers" );
806 }
807
808 /*****************************************************/
imag_ptr()809 T* imag_ptr()
810 {
811 if( is_planar() )
812 return _the_buffers[im].ptr();
813 else
814 throw std::runtime_error( "imag_ptr() is only available on planar buffers" );
815 }
816
817 /*****************************************************/
818 T real( const size_t x, const size_t y=0, const size_t z=0, const size_t batch=0 )
819 {
820 size_t this_index = index( x, y, z, batch );
821
822 // all layouts will have a real component
823 // using [re] will catch the real component for
824 // layout::interleaved as well
825 T this_value = _the_buffers[re][this_index];
826 return this_value;
827 }
828
829 /*****************************************************/
830 T imag( const size_t x, const size_t y=0, const size_t z=0, const size_t batch=0 )
831 {
832 size_t this_index = index( x, y, z, batch );
833
834 if( is_real() )
835 throw std::runtime_error( "imag() is not available for this real buffer" );
836 else if( is_planar() )
837 return _the_buffers[im][this_index];
838 else if( is_interleaved() )
839 // index always points to the real component of an interleaved number
840 // the following memory location is the imaginary component
841 return _the_buffers[interleaved][this_index + 1];
842 else
843 throw std::runtime_error( "invalid layout type in imag()" );
844 }
845
846 /*****************************************************/
847 std::complex<T> complex( const size_t x, const size_t y=0, const size_t z=0, const size_t batch=0 )
848 {
849 if( is_real() )
850 throw std::runtime_error( "complex() is not available for this real buffer" );
851 else if( is_complex() || is_hermitian() )
852 {
853 std::complex<T> this_complex( real( x, y, z, batch ), imag( x, y, z, batch ) );
854 return this_complex;
855 }
856 else
857 throw std::runtime_error( "invalid layout type in complex()" );
858 }
859
860 /*****************************************************/
number_of_dimensions()861 size_t number_of_dimensions()
862 {
863 return _number_of_dimensions;
864 }
865
866 /*****************************************************/
number_of_data_points_single_batch()867 size_t number_of_data_points_single_batch()
868 {
869 size_t number_of_points = 1;
870 for( size_t i = 0; i < _number_of_dimensions; ++i )
871 {
872 number_of_points *= length(i);
873 }
874 return number_of_points;
875 }
876
877 /*****************************************************/
number_of_data_points()878 size_t number_of_data_points()
879 {
880 return number_of_data_points_single_batch() * batch_size();
881 }
882
883 /*****************************************************/
884 // note that this returns the size in number of points and
885 // does not take layout into consideration. this will yield
886 // the same number for real, interleaved, and planar layouts.
887 // whomever uses this information will need to know if they
888 // want 1x buffer of this size (real), 2x buffer of this
889 // size (planar), or 1x double-wide buffer (interleaved)
total_number_of_points_including_data_and_intervening()890 size_t total_number_of_points_including_data_and_intervening()
891 {
892 return distance() * batch_size();
893 }
894
895 /*****************************************************/
896 // note that this will return the size of ONE BUFFER in bytes
897 // for real and interleaved, that doesn't change anything
898 // for planar, you will get the size of the real _or_ the imaginary
899 // (which should always be the same)
size_in_bytes()900 size_t size_in_bytes()
901 {
902 return _the_buffers[0].size_in_bytes();
903 }
904
905 /*****************************************************/
length(size_t dim)906 size_t length(size_t dim)
907 {
908 return _lengths[dim];
909 }
910
911 /*****************************************************/
stride(size_t dim)912 size_t stride(size_t dim)
913 {
914 return _strides[dim];
915 }
916
917 /*****************************************************/
lengths()918 size_t* lengths()
919 {
920 return &_lengths[0];
921 }
922
923 /*****************************************************/
strides()924 size_t* strides()
925 {
926 return &_strides[0];
927 }
928
929 /*****************************************************/
batch_size()930 size_t batch_size()
931 {
932 return _batch_size;
933 }
934
935 /*****************************************************/
distance()936 size_t distance()
937 {
938 return _distance;
939 }
940
941 /*****************************************************/
clear()942 void clear()
943 {
944 // for all batches
945
946 if( is_real() )
947 set_all_to_value( 0.0f );
948 else
949 set_all_to_value( 0.0f, 0.0f );
950 }
951
952 /*****************************************************/
set_one_data_point(T real,const size_t x,const size_t y,const size_t z,const size_t batch)953 void set_one_data_point( T real, const size_t x, const size_t y, const size_t z, const size_t batch )
954 {
955 if( is_real() )
956 {
957 T* base_ptr = _the_buffers[re].ptr();
958 size_t real_index = index(x, y, z, batch);
959
960 *( base_ptr + real_index ) = real;
961 }
962 else
963 throw std::runtime_error( "attempting to use real data point setter for complex or hermitian buffer" );
964 }
965
966 /*****************************************************/
set_one_data_point(T real,T imag,const size_t x,const size_t y,const size_t z,const size_t batch)967 void set_one_data_point( T real, T imag, const size_t x, const size_t y, const size_t z, const size_t batch )
968 {
969 if( is_real() )
970 throw std::runtime_error( "attempting to use complex data point setter for real buffer" );
971 else if( is_interleaved() )
972 {
973 T* base_ptr = _the_buffers[interleaved].ptr();
974 size_t real_index = index(x, y, z, batch);
975 size_t imag_index = real_index + 1; // the imaginary component immediately follows the real
976
977 *( base_ptr + real_index ) = real;
978 *( base_ptr + imag_index ) = imag;
979 }
980 else // planar
981 {
982 T* real_ptr = _the_buffers[re].ptr();
983 T* imag_ptr = _the_buffers[im].ptr();
984 size_t the_index = index(x, y, z, batch);
985
986 *( real_ptr + the_index ) = real;
987 *( imag_ptr + the_index ) = imag;
988 }
989 }
990
991 /*****************************************************/
set_all_to_value(T real)992 void set_all_to_value( T real )
993 {
994 // for all batches
995
996 for( size_t batch = 0; batch < batch_size(); batch++ ) {
997 for( size_t z = 0; z < length(dimz); z++ ) {
998 for( size_t y = 0; y < length(dimy); y++ ) {
999 for( size_t x = 0; x < length(dimx); x++ ) {
1000 set_one_data_point( real, x, y, z, batch );
1001 }
1002 }
1003 }
1004 }
1005 }
1006
1007 /*****************************************************/
set_all_to_value(T real,T imag)1008 void set_all_to_value( T real, T imag )
1009 {
1010 // for all batches
1011
1012 for( size_t batch = 0; batch < batch_size(); batch++ ) {
1013 for( size_t z = 0; z < length(dimz); z++ ) {
1014 for( size_t y = 0; y < length(dimy); y++ ) {
1015 for( size_t x = 0; x < length(dimx); x++ ) {
1016 set_one_data_point( real, imag, x, y, z, batch );
1017 }
1018 }
1019 }
1020 }
1021 }
1022
1023 /*****************************************************/
set_all_to_linear_increase()1024 void set_all_to_linear_increase()
1025 {
1026 // for all batches
1027
1028 size_t val = 1;
1029 for( size_t batch = 0; batch < batch_size(); batch++ ) {
1030 for( size_t z = 0; z < length(dimz); z++ ) {
1031 for( size_t y = 0; y < length(dimy); y++ ) {
1032 for( size_t x = 0; x < length(dimx); x++ ) {
1033 if( is_real() )
1034 {
1035 set_one_data_point( static_cast<T>(val), x, y, z, batch );
1036 }
1037
1038 else
1039 {
1040 set_one_data_point( static_cast<T>(val), static_cast<T>(val) + 0.5f, x, y, z, batch );
1041 }
1042
1043 ++val;
1044 }
1045 }
1046 }
1047 }
1048 }
1049
1050 /*****************************************************/
set_all_to_sawtooth(T amplitude)1051 void set_all_to_sawtooth( T amplitude )
1052 {
1053 // for all batches
1054
1055 for( size_t batch = 0; batch < batch_size(); batch++ )
1056 {
1057 for( size_t z = 0; z < length(dimz); z++ )
1058 {
1059 for( size_t y = 0; y < length(dimy); y++ )
1060 {
1061 // waveform will be 1 period of sawtooth
1062 size_t number_of_points_in_one_period = length(dimx);
1063 size_t number_of_points_on_one_line = number_of_points_in_one_period / 2;
1064
1065 // the sawtooth will start at 0 and increase to amplitude at T/2
1066 // at T/2, value will change to -amplitude and increase back up to 0 at T
1067 // if there are an odd number of points in the whole period,
1068 // we'll make a stop at 0 in the middle of the jump
1069 T value = 0.0f;
1070 T per_point_delta = amplitude / (number_of_points_on_one_line - 1);
1071
1072 for( size_t x = 0; x < number_of_points_in_one_period; x++) {
1073 if( is_real() )
1074 {
1075 set_one_data_point( value, x, y, z, batch);
1076 }
1077 else
1078 {
1079 // for the real value, we want the sawtooth as described above
1080 // for the imaginary value, we want the 2 times the inverse
1081 // (so that real and imaginary don't match, possibly obscuring errors)
1082 set_one_data_point( value, -2.0f * value, x, y, z, batch);
1083 }
1084
1085 // if we're at T/2, we want to saw on down to the negative amplitude . . .
1086 if( floats_are_about_equal( value, amplitude ) )
1087 {
1088 if( number_of_points_in_one_period % 2 != 0 ) // odd, we need to add the 0
1089 {
1090 x++;
1091 if( is_real() )
1092 {
1093 set_one_data_point( 0.0f, x, y, z, batch);
1094 }
1095 else
1096 {
1097 set_one_data_point( 0.0f, 0.0f, x, y, z, batch);
1098 }
1099 }
1100 value = -1 * amplitude;
1101 }
1102 // . . . otherwise, keep going up
1103 else value += per_point_delta;
1104 }
1105 }
1106 }
1107 }
1108 }
1109
1110 /*****************************************************/
set_all_to_random_data(size_t max_value,size_t seed)1111 void set_all_to_random_data( size_t max_value, size_t seed ) {
1112 // for all batches
1113
1114 boost::mt19937 random_data_generator;
1115 boost::uniform_int<> distribution(1, INT_MAX);
1116 boost::variate_generator<boost::mt19937&, boost::uniform_int<> >
1117 random_value(random_data_generator, distribution);
1118 random_data_generator.seed( static_cast<boost::uint32_t>( seed ) );
1119
1120 for( size_t batch = 0; batch < batch_size(); batch++) {
1121 for( size_t z = 0; z < length(dimz); z++) {
1122 for( size_t y = 0; y < length(dimy); y++) {
1123 for( size_t x = 0; x < length(dimx); x++) {
1124 int val = random_value() % (max_value + 1); // pluck a random value
1125 if( random_value() % 2 ) val *= -1; // make it negative about 50% of the time
1126
1127 if( is_real() )
1128 {
1129 set_one_data_point( static_cast<T>(val), x, y, z, batch );
1130 }
1131
1132 else
1133 {
1134 set_one_data_point( static_cast<T>(val), static_cast<T>(val), x, y, z, batch );
1135 }
1136 }
1137 }
1138 }
1139 }
1140 }
1141
1142 /*****************************************************/
set_all_to_impulse()1143 void set_all_to_impulse()
1144 {
1145 // for all batches
1146 clear();
1147
1148 for( size_t batch = 0; batch < batch_size(); batch++ )
1149 {
1150 if( is_real() )
1151 set_one_data_point( static_cast<T>(number_of_data_points_single_batch()), 0, 0, 0, batch);
1152 else
1153 set_one_data_point( static_cast<T>(number_of_data_points_single_batch()), 0.0f, 0, 0, 0, batch);
1154 }
1155 }
1156
1157
1158
1159 /*****************************************************/
scale_data(T scale)1160 void scale_data( T scale) {
1161 // for all batches
1162
1163 for( size_t batch = 0; batch < batch_size(); batch++ )
1164 {
1165 for( size_t z = 0; z < length(dimz); z++ )
1166 {
1167 for( size_t y = 0; y < length(dimy); y++ )
1168 {
1169 for( size_t x = 0; x < length(dimx); x++ )
1170 {
1171 if( is_real() )
1172 {
1173 T this_value = real(x, y, z, batch);
1174 T scaled_value = this_value * scale;
1175 set_one_data_point( scaled_value, x, y, z, batch );
1176 }
1177 else
1178 {
1179 T this_real = real(x, y, z, batch);
1180 T this_imag = imag(x, y, z, batch);
1181
1182 T scaled_real = this_real * scale;
1183 T scaled_imag = this_imag * scale;
1184 set_one_data_point( scaled_real, scaled_imag, x, y, z, batch );
1185 }
1186 }
1187 }
1188 }
1189 }
1190 }
1191
1192 /*****************************************************/
make_sure_padding_was_not_overwritten()1193 void make_sure_padding_was_not_overwritten()
1194 {
1195 // check before and after memory first
1196 for( size_t i = 0; i < _the_buffers.size(); i++ )
1197 {
1198 _the_buffers[i].check_memory_boundaries();
1199 }
1200
1201 if( _tightly_packed_strides && _tightly_packed_distance) return; // nothing worth checking
1202
1203 size_t intervening_point_touched = 0;
1204
1205 for( size_t batch = 0; batch < batch_size(); batch++)
1206 {
1207 for( size_t z = 0; z < length(dimz); z++)
1208 {
1209 for( size_t y = 0; y < length(dimy); y++)
1210 {
1211 for( size_t x = 0; x < length(dimx); x++)
1212 {
1213 size_t this_point = index(x, y, z, batch);
1214 size_t next_point = next_index(x, y, z, batch);
1215
1216 if( is_planar() )
1217 {
1218 if( this_point < _the_buffers[re].size() && this_point + 1 != next_point)
1219 {
1220 for( size_t i = this_point+1; i < next_point; i++)
1221 {
1222 T this_real = _the_buffers[re][i];
1223 T this_imag = _the_buffers[im][i];
1224
1225 if( nan_as_hex(this_real) != float_as_hex(this_real)
1226 || nan_as_hex(this_imag) != float_as_hex(this_imag) )
1227 {
1228 ++intervening_point_touched;
1229 }
1230 }
1231 }
1232 }
1233 else if( is_real() )
1234 {
1235 if( this_point < _the_buffers[re].size() && this_point + 1 != next_point)
1236 {
1237 for( size_t i = this_point+1; i < next_point; i++)
1238 {
1239 T this_real = _the_buffers[re][i];
1240
1241 if( nan_as_hex(this_real) != float_as_hex(this_real) )
1242 {
1243 ++intervening_point_touched;
1244 }
1245 }
1246 }
1247 }
1248 else if( is_interleaved() )
1249 {
1250 if( this_point < _the_buffers[re].size() && this_point + 1 != next_point)
1251 {
1252 // NOTE whereas real and planar initialize i = this_point+1,
1253 // we want this_point+2 for interleaved so that we skip the
1254 // imaginary value of the point
1255 for( size_t i = this_point+2; i < next_point; i++)
1256 {
1257 T this_real = _the_buffers[interleaved][i];
1258
1259 if( nan_as_hex(this_real) != float_as_hex(this_real) )
1260 {
1261 ++intervening_point_touched;
1262 }
1263 }
1264 }
1265 }
1266 else
1267 throw std::runtime_error( "invalid layout in make_sure_memory_between_data_points_was_not_touched()" );
1268 }
1269 }
1270 }
1271 }
1272
1273 EXPECT_EQ( 0, intervening_point_touched );
1274 }
1275 };
1276
1277 #endif
1278