1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 #pragma once
19 #if !defined( CLIENT_H )
20 #define CLIENT_H
21 
22 //	Boost headers that we want to use
23 //	#define BOOST_PROGRAM_OPTIONS_DYN_LINK
24 #include <boost/program_options.hpp>
25 #include "stdafx.h"
26 #include "../statTimer/statisticalTimer.extern.h"
27 #include "../include/unicode.compatibility.h"
28 
29 #include <fftw3.h>
30 
31 typedef unsigned char uint24_t[3];
32 
33 #define CALLBCKSTR(...) #__VA_ARGS__
34 #define STRINGIFY(...) 	CALLBCKSTR(__VA_ARGS__)
35 
36 #define BATCH_LENGTH 1024
37 
38 #define ConvertToFloat typedef unsigned char uint24_t[3]; \n \
39 						float convert24To32bit(__global void* in, uint inoffset, __global void* userdata) \n \
40 						{ \n \
41 						__global uint24_t* inData =  (__global uint24_t*)in; \n \
42 						float val = inData[inoffset][0] << 16 | inData[inoffset][1] << 8 | inData[inoffset][2] ; \n \
43 						return val;  \n \
44 						}
45 
46 #define ConvertToFloat_KERNEL typedef unsigned char uint24_t[3]; \n \
47 							__kernel void convert24To32bit (__global void *input, __global void *output) \n \
48 							 { \n \
49 								uint inoffset = get_global_id(0); \n \
50 								__global uint24_t* inData =  (__global uint24_t*)input; \n \
51 								float val = inData[inoffset][0] << 16 | inData[inoffset][1] << 8 | inData[inoffset][2] ; \n \
52 								*((__global float*)output + inoffset) = val;  \n \
53 							} \n
54 
55 #define MagnitudeExtraction void extractMagnitude(__global void *output, uint outoffset, __global void *userdata, float2 fftoutput) \n \
56 							{ \n \
57 								float magnitude = sqrt(fftoutput.x * fftoutput.x + fftoutput.y * fftoutput.y); \n \
58 								*((__global float*)output + outoffset) = magnitude; \n \
59 							} \n
60 
61 #define MagnitudeExtraction_KERNEL __kernel void extractMagnitude(__global float2 *output, __global float *magoutput) \n \
62 							{ \n \
63 								uint outoffset = get_global_id(0); \n \
64 								float magnitude = sqrt(output[outoffset].x * output[outoffset].x + output[outoffset].y * output[outoffset].y); \n \
65 								*(magoutput + outoffset) = magnitude; \n \
66 							} \n
67 
68 template < typename T >
69 void R2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths, size_t batchSize,
70 				   clfftDim dim, clfftPrecision precision,  cl_uint profile_count);
71 
72 template < typename T >
73 void runR2C_FFT_WithCallback(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue,
74 						size_t* inlengths, clfftDim dim, clfftPrecision precision,
75 						size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count);
76 
77 template < typename T >
78 void runR2C_FFT_PreAndPostprocessKernel(std::auto_ptr< clfftSetupData > setupData, cl_context context,
79 							cl_command_queue commandQueue, cl_device_id device_id,
80 							size_t* inlengths, clfftDim dim, clfftPrecision precision,
81 							size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count);
82 
83 fftwf_complex* get_R2C_fftwf_output(size_t* lengths, size_t fftbatchLength, int batch_size,
84 									clfftLayout in_layout, clfftDim dim);
85 
86 template < typename T1, typename T2>
87 bool compare(T1 *refData, std::vector< T2 > data,
88              size_t length, const float epsilon = 1e-6f);
89 
90 #ifdef WIN32
91 
92 struct Timer
93 {
94     LARGE_INTEGER start, stop, freq;
95 
96 public:
TimerTimer97     Timer() { QueryPerformanceFrequency( &freq ); }
98 
StartTimer99     void Start() { QueryPerformanceCounter(&start); }
SampleTimer100     double Sample()
101     {
102         QueryPerformanceCounter  ( &stop );
103         double time = (double)(stop.QuadPart-start.QuadPart) / (double)(freq.QuadPart);
104         return time;
105     }
106 };
107 
108 #elif defined(__APPLE__) || defined(__MACOSX)
109 
110 #include <mach/clock.h>
111 #include <mach/mach.h>
112 
113 struct Timer
114 {
115     clock_serv_t clock;
116     mach_timespec_t start, end;
117 
118 public:
TimerTimer119     Timer() { host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &clock); }
~TimerTimer120     ~Timer() { mach_port_deallocate(mach_task_self(), clock); }
121 
StartTimer122     void Start() { clock_get_time(clock, &start); }
SampleTimer123     double Sample()
124     {
125         clock_get_time(clock, &end);
126         double time = 1000000000L * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec;
127         return time * 1E-9;
128     }
129 };
130 
131 #else
132 
133 #include <time.h>
134 #include <math.h>
135 
136 struct Timer
137 {
138     struct timespec start, end;
139 
140 public:
TimerTimer141     Timer() { }
142 
StartTimer143     void Start() { clock_gettime(CLOCK_MONOTONIC, &start); }
SampleTimer144     double Sample()
145     {
146         clock_gettime(CLOCK_MONOTONIC, &end);
147         double time = 1000000000L * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec;
148         return time * 1E-9;
149     }
150 };
151 
152 #endif
153 
154 #endif
155