1 /* ************************************************************************ 2 * Copyright 2013 Advanced Micro Devices, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * ************************************************************************/ 16 17 18 #pragma once 19 #if !defined( CLIENT_H ) 20 #define CLIENT_H 21 22 // Boost headers that we want to use 23 // #define BOOST_PROGRAM_OPTIONS_DYN_LINK 24 #include <boost/program_options.hpp> 25 #include "stdafx.h" 26 #include "../statTimer/statisticalTimer.extern.h" 27 #include "../include/unicode.compatibility.h" 28 29 #include <fftw3.h> 30 31 typedef unsigned char uint24_t[3]; 32 33 #define CALLBCKSTR(...) #__VA_ARGS__ 34 #define STRINGIFY(...) CALLBCKSTR(__VA_ARGS__) 35 36 #define BATCH_LENGTH 1024 37 38 #define ConvertToFloat typedef unsigned char uint24_t[3]; \n \ 39 float convert24To32bit(__global void* in, uint inoffset, __global void* userdata) \n \ 40 { \n \ 41 __global uint24_t* inData = (__global uint24_t*)in; \n \ 42 float val = inData[inoffset][0] << 16 | inData[inoffset][1] << 8 | inData[inoffset][2] ; \n \ 43 return val; \n \ 44 } 45 46 #define ConvertToFloat_KERNEL typedef unsigned char uint24_t[3]; \n \ 47 __kernel void convert24To32bit (__global void *input, __global void *output) \n \ 48 { \n \ 49 uint inoffset = get_global_id(0); \n \ 50 __global uint24_t* inData = (__global uint24_t*)input; \n \ 51 float val = inData[inoffset][0] << 16 | inData[inoffset][1] << 8 | inData[inoffset][2] ; \n \ 52 *((__global float*)output + inoffset) = val; \n \ 53 } \n 54 55 #define MagnitudeExtraction void extractMagnitude(__global void *output, uint outoffset, __global void *userdata, float2 fftoutput) \n \ 56 { \n \ 57 float magnitude = sqrt(fftoutput.x * fftoutput.x + fftoutput.y * fftoutput.y); \n \ 58 *((__global float*)output + outoffset) = magnitude; \n \ 59 } \n 60 61 #define MagnitudeExtraction_KERNEL __kernel void extractMagnitude(__global float2 *output, __global float *magoutput) \n \ 62 { \n \ 63 uint outoffset = get_global_id(0); \n \ 64 float magnitude = sqrt(output[outoffset].x * output[outoffset].x + output[outoffset].y * output[outoffset].y); \n \ 65 *(magoutput + outoffset) = magnitude; \n \ 66 } \n 67 68 template < typename T > 69 void R2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths, size_t batchSize, 70 clfftDim dim, clfftPrecision precision, cl_uint profile_count); 71 72 template < typename T > 73 void runR2C_FFT_WithCallback(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue, 74 size_t* inlengths, clfftDim dim, clfftPrecision precision, 75 size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count); 76 77 template < typename T > 78 void runR2C_FFT_PreAndPostprocessKernel(std::auto_ptr< clfftSetupData > setupData, cl_context context, 79 cl_command_queue commandQueue, cl_device_id device_id, 80 size_t* inlengths, clfftDim dim, clfftPrecision precision, 81 size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count); 82 83 fftwf_complex* get_R2C_fftwf_output(size_t* lengths, size_t fftbatchLength, int batch_size, 84 clfftLayout in_layout, clfftDim dim); 85 86 template < typename T1, typename T2> 87 bool compare(T1 *refData, std::vector< T2 > data, 88 size_t length, const float epsilon = 1e-6f); 89 90 #ifdef WIN32 91 92 struct Timer 93 { 94 LARGE_INTEGER start, stop, freq; 95 96 public: TimerTimer97 Timer() { QueryPerformanceFrequency( &freq ); } 98 StartTimer99 void Start() { QueryPerformanceCounter(&start); } SampleTimer100 double Sample() 101 { 102 QueryPerformanceCounter ( &stop ); 103 double time = (double)(stop.QuadPart-start.QuadPart) / (double)(freq.QuadPart); 104 return time; 105 } 106 }; 107 108 #elif defined(__APPLE__) || defined(__MACOSX) 109 110 #include <mach/clock.h> 111 #include <mach/mach.h> 112 113 struct Timer 114 { 115 clock_serv_t clock; 116 mach_timespec_t start, end; 117 118 public: TimerTimer119 Timer() { host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &clock); } ~TimerTimer120 ~Timer() { mach_port_deallocate(mach_task_self(), clock); } 121 StartTimer122 void Start() { clock_get_time(clock, &start); } SampleTimer123 double Sample() 124 { 125 clock_get_time(clock, &end); 126 double time = 1000000000L * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec; 127 return time * 1E-9; 128 } 129 }; 130 131 #else 132 133 #include <time.h> 134 #include <math.h> 135 136 struct Timer 137 { 138 struct timespec start, end; 139 140 public: TimerTimer141 Timer() { } 142 StartTimer143 void Start() { clock_gettime(CLOCK_MONOTONIC, &start); } SampleTimer144 double Sample() 145 { 146 clock_gettime(CLOCK_MONOTONIC, &end); 147 double time = 1000000000L * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec; 148 return time * 1E-9; 149 } 150 }; 151 152 #endif 153 154 #endif 155