1 /* =========================================================================
2    Copyright (c) 2010-2016, Institute for Microelectronics,
3                             Institute for Analysis and Scientific Computing,
4                             TU Wien.
5    Portions of this software are copyright by UChicago Argonne, LLC.
6 
7                             -----------------
8                   ViennaCL - The Vienna Computing Library
9                             -----------------
10 
11    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
12 
13    (A list of authors and contributors can be found in the PDF manual)
14 
15    License:         MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
18 
19 /** \file tests/src/libviennacl_blas1.cpp  Testing the BLAS level 1 routines in the ViennaCL BLAS-like shared library
20 *   \test Testing the BLAS level 1 routines in the ViennaCL BLAS-like shared library
21 **/
22 
23 
24 // include necessary system headers
25 #include <iostream>
26 #include <vector>
27 #include <cmath>
28 
29 // Some helper functions for this tutorial:
30 #include "viennacl.hpp"
31 
32 #include "viennacl/vector.hpp"
33 
34 template<typename ScalarType>
diff(ScalarType const & s1,ScalarType const & s2)35 ScalarType diff(ScalarType const & s1, ScalarType const & s2)
36 {
37    if (s1 > s2 || s1 < s2)
38       return (s1 - s2) / std::max(static_cast<ScalarType>(std::fabs(static_cast<double>(s1))),
39                                   static_cast<ScalarType>(std::fabs(static_cast<double>(s2))));
40    return ScalarType(0);
41 }
42 
43 template<typename ScalarType, typename ViennaCLVectorType>
44 ScalarType diff(std::vector<ScalarType> const & v1, ViennaCLVectorType const & vcl_vec)
45 {
46    std::vector<ScalarType> v2_cpu(vcl_vec.size());
47    viennacl::backend::finish();
48    viennacl::copy(vcl_vec, v2_cpu);
49 
50    ScalarType inf_norm = 0;
51    for (unsigned int i=0;i<v1.size(); ++i)
52    {
53       if ( std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
54          v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
55       else
56          v2_cpu[i] = 0.0;
57 
58       if (v2_cpu[i] > inf_norm)
59         inf_norm = v2_cpu[i];
60    }
61 
62    return inf_norm;
63 }
64 
65 template<typename T, typename U, typename EpsilonT>
check(T const & t,U const & u,EpsilonT eps)66 void check(T const & t, U const & u, EpsilonT eps)
67 {
68   EpsilonT rel_error = std::fabs(static_cast<EpsilonT>(diff(t,u)));
69   if (rel_error > eps)
70   {
71     std::cerr << "Relative error: " << rel_error << std::endl;
72     std::cerr << "Aborting!" << std::endl;
73     exit(EXIT_FAILURE);
74   }
75   std::cout << "SUCCESS ";
76 }
77 
main()78 int main()
79 {
80   std::size_t size  = 10; // at least 7
81   float  eps_float  = 1e-5f;
82   double eps_double = 1e-12;
83 
84   float  ref_float_alpha;
85   double ref_double_alpha;
86 
87   std::vector<float> ref_float_x(size, 1.0f);
88   std::vector<float> ref_float_y(size, 2.0f);
89 
90   std::vector<double> ref_double_x(size, 1.0);
91   std::vector<double> ref_double_y(size, 2.0);
92 
93   ViennaCLBackend my_backend;
94   ViennaCLBackendCreate(&my_backend);
95 
96   // Host setup
97   float host_float_alpha = 0;
98   viennacl::vector<float> host_float_x = viennacl::scalar_vector<float>(size, 1.0f, viennacl::context(viennacl::MAIN_MEMORY));
99   viennacl::vector<float> host_float_y = viennacl::scalar_vector<float>(size, 2.0f, viennacl::context(viennacl::MAIN_MEMORY));
100 
101   double host_double_alpha = 0;
102   viennacl::vector<double> host_double_x = viennacl::scalar_vector<double>(size, 1.0, viennacl::context(viennacl::MAIN_MEMORY));
103   viennacl::vector<double> host_double_y = viennacl::scalar_vector<double>(size, 2.0, viennacl::context(viennacl::MAIN_MEMORY));
104 
105   // CUDA setup
106 #ifdef VIENNACL_WITH_CUDA
107   float cuda_float_alpha = 0;
108   viennacl::vector<float> cuda_float_x = viennacl::scalar_vector<float>(size, 1.0f, viennacl::context(viennacl::CUDA_MEMORY));
109   viennacl::vector<float> cuda_float_y = viennacl::scalar_vector<float>(size, 2.0f, viennacl::context(viennacl::CUDA_MEMORY));
110 
111   double cuda_double_alpha = 0;
112   viennacl::vector<double> cuda_double_x = viennacl::scalar_vector<double>(size, 1.0, viennacl::context(viennacl::CUDA_MEMORY));
113   viennacl::vector<double> cuda_double_y = viennacl::scalar_vector<double>(size, 2.0, viennacl::context(viennacl::CUDA_MEMORY));
114 #endif
115 
116   // OpenCL setup
117 #ifdef VIENNACL_WITH_OPENCL
118   ViennaCLInt context_id = 0;
119   float opencl_float_alpha = 0;
120   viennacl::vector<float> opencl_float_x = viennacl::scalar_vector<float>(size, 1.0f, viennacl::context(viennacl::ocl::get_context(context_id)));
121   viennacl::vector<float> opencl_float_y = viennacl::scalar_vector<float>(size, 2.0f, viennacl::context(viennacl::ocl::get_context(context_id)));
122 
123   double opencl_double_alpha = 0;
124   viennacl::vector<double> *opencl_double_x = NULL;
125   viennacl::vector<double> *opencl_double_y = NULL;
126   if ( viennacl::ocl::current_device().double_support() )
127   {
128     opencl_double_x = new viennacl::vector<double>(viennacl::scalar_vector<double>(size, 1.0, viennacl::context(viennacl::ocl::get_context(context_id))));
129     opencl_double_y = new viennacl::vector<double>(viennacl::scalar_vector<double>(size, 2.0, viennacl::context(viennacl::ocl::get_context(context_id))));
130   }
131 
132   ViennaCLBackendSetOpenCLContextID(my_backend, context_id);
133 #endif
134 
135   // consistency checks:
136   check(ref_float_x, host_float_x, eps_float);
137   check(ref_float_y, host_float_y, eps_float);
138   check(ref_double_x, host_double_x, eps_double);
139   check(ref_double_y, host_double_y, eps_double);
140 #ifdef VIENNACL_WITH_CUDA
141   check(ref_float_x, cuda_float_x, eps_float);
142   check(ref_float_y, cuda_float_y, eps_float);
143   check(ref_double_x, cuda_double_x, eps_double);
144   check(ref_double_y, cuda_double_y, eps_double);
145 #endif
146 #ifdef VIENNACL_WITH_OPENCL
147   check(ref_float_x, opencl_float_x, eps_float);
148   check(ref_float_y, opencl_float_y, eps_float);
149   if ( viennacl::ocl::current_device().double_support() )
150   {
151     check(ref_double_x, *opencl_double_x, eps_double);
152     check(ref_double_y, *opencl_double_y, eps_double);
153   }
154 #endif
155 
156   // ASUM
157   std::cout << std::endl << "-- Testing xASUM...";
158   ref_float_alpha  = 0;
159   ref_double_alpha = 0;
160   for (std::size_t i=0; i<size/4; ++i)
161   {
162     ref_float_alpha  += std::fabs(ref_float_x[2 + 3*i]);
163     ref_double_alpha += std::fabs(ref_double_x[2 + 3*i]);
164   }
165 
166   std::cout << std::endl << "Host: ";
167   ViennaCLHostSasum(my_backend, ViennaCLInt(size/4),
168                     &host_float_alpha,
169                     viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 2, 3);
170   check(ref_float_alpha, host_float_alpha, eps_float);
171   ViennaCLHostDasum(my_backend, ViennaCLInt(size/4),
172                     &host_double_alpha,
173                     viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 2, 3);
174   check(ref_double_alpha, host_double_alpha, eps_double);
175 
176 
177 #ifdef VIENNACL_WITH_CUDA
178   std::cout << std::endl << "CUDA: ";
179   ViennaCLCUDASasum(my_backend, ViennaCLInt(size/4),
180                     &cuda_float_alpha,
181                     viennacl::cuda_arg(cuda_float_x), 2, 3);
182   check(ref_float_alpha, cuda_float_alpha, eps_float);
183   ViennaCLCUDADasum(my_backend, ViennaCLInt(size/4),
184                     &cuda_double_alpha,
185                     viennacl::cuda_arg(cuda_double_x), 2, 3);
186   check(ref_double_alpha, cuda_double_alpha, eps_double);
187 #endif
188 
189 #ifdef VIENNACL_WITH_OPENCL
190   std::cout << std::endl << "OpenCL: ";
191   ViennaCLOpenCLSasum(my_backend, ViennaCLInt(size/4),
192                       &opencl_float_alpha,
193                       viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 3);
194   check(ref_float_alpha, opencl_float_alpha, eps_float);
195   if ( viennacl::ocl::current_device().double_support() )
196   {
197     ViennaCLOpenCLDasum(my_backend, ViennaCLInt(size/4),
198                         &opencl_double_alpha,
199                         viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 3);
200     check(ref_double_alpha, opencl_double_alpha, eps_double);
201   }
202 #endif
203 
204 
205 
206   // AXPY
207   std::cout << std::endl << "-- Testing xAXPY...";
208   for (std::size_t i=0; i<size/3; ++i)
209   {
210     ref_float_y[1 + 2*i]  += 2.0f * ref_float_x[0 + 2*i];
211     ref_double_y[1 + 2*i] += 2.0  * ref_double_x[0 + 2*i];
212   }
213 
214   std::cout << std::endl << "Host: ";
215   ViennaCLHostSaxpy(my_backend, ViennaCLInt(size/3),
216                     2.0f,
217                     viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 0, 2,
218                     viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_y), 1, 2);
219   check(ref_float_x, host_float_x, eps_float);
220   check(ref_float_y, host_float_y, eps_float);
221   ViennaCLHostDaxpy(my_backend, ViennaCLInt(size/3),
222                     2.0,
223                     viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 0, 2,
224                     viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_y), 1, 2);
225   check(ref_double_x, host_double_x, eps_double);
226   check(ref_double_y, host_double_y, eps_double);
227 
228 
229 #ifdef VIENNACL_WITH_CUDA
230   std::cout << std::endl << "CUDA: ";
231   ViennaCLCUDASaxpy(my_backend, ViennaCLInt(size/3),
232                     2.0f,
233                     viennacl::cuda_arg(cuda_float_x), 0, 2,
234                     viennacl::cuda_arg(cuda_float_y), 1, 2);
235   check(ref_float_x, cuda_float_x, eps_float);
236   check(ref_float_y, cuda_float_y, eps_float);
237   ViennaCLCUDADaxpy(my_backend, ViennaCLInt(size/3),
238                     2.0,
239                     viennacl::cuda_arg(cuda_double_x), 0, 2,
240                     viennacl::cuda_arg(cuda_double_y), 1, 2);
241   check(ref_double_x, cuda_double_x, eps_double);
242   check(ref_double_y, cuda_double_y, eps_double);
243 #endif
244 
245 #ifdef VIENNACL_WITH_OPENCL
246   std::cout << std::endl << "OpenCL: ";
247   ViennaCLOpenCLSaxpy(my_backend, ViennaCLInt(size/3),
248                       2.0f,
249                       viennacl::traits::opencl_handle(opencl_float_x).get(), 0, 2,
250                       viennacl::traits::opencl_handle(opencl_float_y).get(), 1, 2);
251   check(ref_float_x, opencl_float_x, eps_float);
252   check(ref_float_y, opencl_float_y, eps_float);
253   if ( viennacl::ocl::current_device().double_support() )
254   {
255     ViennaCLOpenCLDaxpy(my_backend, ViennaCLInt(size/3),
256                         2.0,
257                         viennacl::traits::opencl_handle(*opencl_double_x).get(), 0, 2,
258                         viennacl::traits::opencl_handle(*opencl_double_y).get(), 1, 2);
259     check(ref_double_x, *opencl_double_x, eps_double);
260     check(ref_double_y, *opencl_double_y, eps_double);
261   }
262 #endif
263 
264 
265 
266   // COPY
267   std::cout << std::endl << "-- Testing xCOPY...";
268   for (std::size_t i=0; i<size/3; ++i)
269   {
270     ref_float_y[0 + 2*i]  = ref_float_x[1 + 2*i];
271     ref_double_y[0 + 2*i] = ref_double_x[1 + 2*i];
272   }
273 
274   std::cout << std::endl << "Host: ";
275   ViennaCLHostScopy(my_backend, ViennaCLInt(size/3),
276                     viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 1, 2,
277                     viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_y), 0, 2);
278   check(ref_float_x, host_float_x, eps_float);
279   check(ref_float_y, host_float_y, eps_float);
280   ViennaCLHostDcopy(my_backend, ViennaCLInt(size/3),
281                     viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 1, 2,
282                     viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_y), 0, 2);
283   check(ref_double_x, host_double_x, eps_double);
284   check(ref_double_y, host_double_y, eps_double);
285 
286 
287 #ifdef VIENNACL_WITH_CUDA
288   std::cout << std::endl << "CUDA: ";
289   ViennaCLCUDAScopy(my_backend, ViennaCLInt(size/3),
290                     viennacl::cuda_arg(cuda_float_x), 1, 2,
291                     viennacl::cuda_arg(cuda_float_y), 0, 2);
292   check(ref_float_x, cuda_float_x, eps_float);
293   check(ref_float_y, cuda_float_y, eps_float);
294   ViennaCLCUDADcopy(my_backend, ViennaCLInt(size/3),
295                     viennacl::cuda_arg(cuda_double_x), 1, 2,
296                     viennacl::cuda_arg(cuda_double_y), 0, 2);
297   check(ref_double_x, cuda_double_x, eps_double);
298   check(ref_double_y, cuda_double_y, eps_double);
299 #endif
300 
301 #ifdef VIENNACL_WITH_OPENCL
302   std::cout << std::endl << "OpenCL: ";
303   ViennaCLOpenCLScopy(my_backend, ViennaCLInt(size/3),
304                       viennacl::traits::opencl_handle(opencl_float_x).get(), 1, 2,
305                       viennacl::traits::opencl_handle(opencl_float_y).get(), 0, 2);
306   check(ref_float_x, opencl_float_x, eps_float);
307   check(ref_float_y, opencl_float_y, eps_float);
308   if ( viennacl::ocl::current_device().double_support() )
309   {
310     ViennaCLOpenCLDcopy(my_backend, ViennaCLInt(size/3),
311                         viennacl::traits::opencl_handle(*opencl_double_x).get(), 1, 2,
312                         viennacl::traits::opencl_handle(*opencl_double_y).get(), 0, 2);
313     check(ref_double_x, *opencl_double_x, eps_double);
314     check(ref_double_y, *opencl_double_y, eps_double);
315   }
316 #endif
317 
318 
319 
320   // DOT
321   std::cout << std::endl << "-- Testing xDOT...";
322   ref_float_alpha  = 0;
323   ref_double_alpha = 0;
324   for (std::size_t i=0; i<size/2; ++i)
325   {
326     ref_float_alpha  += ref_float_y[3 + i]  * ref_float_x[2 + i];
327     ref_double_alpha += ref_double_y[3 + i] * ref_double_x[2 + i];
328   }
329 
330   std::cout << std::endl << "Host: ";
331   ViennaCLHostSdot(my_backend, ViennaCLInt(size/2),
332                    &host_float_alpha,
333                    viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 2, 1,
334                    viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_y), 3, 1);
335   check(ref_float_alpha, host_float_alpha, eps_float);
336   ViennaCLHostDdot(my_backend, ViennaCLInt(size/2),
337                    &host_double_alpha,
338                    viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 2, 1,
339                    viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_y), 3, 1);
340   check(ref_double_alpha, host_double_alpha, eps_double);
341 
342 
343 #ifdef VIENNACL_WITH_CUDA
344   std::cout << std::endl << "CUDA: ";
345   ViennaCLCUDASdot(my_backend, ViennaCLInt(size/2),
346                    &cuda_float_alpha,
347                    viennacl::cuda_arg(cuda_float_x), 2, 1,
348                    viennacl::cuda_arg(cuda_float_y), 3, 1);
349   check(ref_float_alpha, cuda_float_alpha, eps_float);
350   ViennaCLCUDADdot(my_backend, ViennaCLInt(size/2),
351                    &cuda_double_alpha,
352                    viennacl::cuda_arg(cuda_double_x), 2, 1,
353                    viennacl::cuda_arg(cuda_double_y), 3, 1);
354   check(ref_double_alpha, cuda_double_alpha, eps_double);
355 #endif
356 
357 #ifdef VIENNACL_WITH_OPENCL
358   std::cout << std::endl << "OpenCL: ";
359   ViennaCLOpenCLSdot(my_backend, ViennaCLInt(size/2),
360                      &opencl_float_alpha,
361                      viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 1,
362                      viennacl::traits::opencl_handle(opencl_float_y).get(), 3, 1);
363   check(ref_float_alpha, opencl_float_alpha, eps_float);
364   if ( viennacl::ocl::current_device().double_support() )
365   {
366     ViennaCLOpenCLDdot(my_backend, ViennaCLInt(size/2),
367                        &opencl_double_alpha,
368                        viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 1,
369                        viennacl::traits::opencl_handle(*opencl_double_y).get(), 3, 1);
370     check(ref_double_alpha, opencl_double_alpha, eps_double);
371   }
372 #endif
373 
374 
375 
376   // NRM2
377   std::cout << std::endl << "-- Testing xNRM2...";
378   ref_float_alpha  = 0;
379   ref_double_alpha = 0;
380   for (std::size_t i=0; i<size/3; ++i)
381   {
382     ref_float_alpha  += ref_float_x[1 + 2*i]  * ref_float_x[1 + 2*i];
383     ref_double_alpha += ref_double_x[1 + 2*i] * ref_double_x[1 + 2*i];
384   }
385   ref_float_alpha = std::sqrt(ref_float_alpha);
386   ref_double_alpha = std::sqrt(ref_double_alpha);
387 
388   std::cout << std::endl << "Host: ";
389   ViennaCLHostSnrm2(my_backend, ViennaCLInt(size/3),
390                     &host_float_alpha,
391                     viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 1, 2);
392   check(ref_float_alpha, host_float_alpha, eps_float);
393   ViennaCLHostDnrm2(my_backend, ViennaCLInt(size/3),
394                     &host_double_alpha,
395                     viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 1, 2);
396   check(ref_double_alpha, host_double_alpha, eps_double);
397 
398 
399 #ifdef VIENNACL_WITH_CUDA
400   std::cout << std::endl << "CUDA: ";
401   ViennaCLCUDASnrm2(my_backend, ViennaCLInt(size/3),
402                     &cuda_float_alpha,
403                     viennacl::cuda_arg(cuda_float_x), 1, 2);
404   check(ref_float_alpha, cuda_float_alpha, eps_float);
405   ViennaCLCUDADnrm2(my_backend, ViennaCLInt(size/3),
406                     &cuda_double_alpha,
407                     viennacl::cuda_arg(cuda_double_x), 1, 2);
408   check(ref_double_alpha, cuda_double_alpha, eps_double);
409 #endif
410 
411 #ifdef VIENNACL_WITH_OPENCL
412   std::cout << std::endl << "OpenCL: ";
413   ViennaCLOpenCLSnrm2(my_backend, ViennaCLInt(size/3),
414                       &opencl_float_alpha,
415                       viennacl::traits::opencl_handle(opencl_float_x).get(), 1, 2);
416   check(ref_float_alpha, opencl_float_alpha, eps_float);
417   if ( viennacl::ocl::current_device().double_support() )
418   {
419     ViennaCLOpenCLDnrm2(my_backend, ViennaCLInt(size/3),
420                         &opencl_double_alpha,
421                         viennacl::traits::opencl_handle(*opencl_double_x).get(), 1, 2);
422     check(ref_double_alpha, opencl_double_alpha, eps_double);
423   }
424 #endif
425 
426 
427 
428 
429   // ROT
430   std::cout << std::endl << "-- Testing xROT...";
431   for (std::size_t i=0; i<size/4; ++i)
432   {
433     float tmp            =  0.6f * ref_float_x[2 + 3*i] + 0.8f * ref_float_y[1 + 2*i];
434     ref_float_y[1 + 2*i] = -0.8f * ref_float_x[2 + 3*i] + 0.6f * ref_float_y[1 + 2*i];;
435     ref_float_x[2 + 3*i] = tmp;
436 
437     double tmp2           =  0.6 * ref_double_x[2 + 3*i] + 0.8 * ref_double_y[1 + 2*i];
438     ref_double_y[1 + 2*i] = -0.8 * ref_double_x[2 + 3*i] + 0.6 * ref_double_y[1 + 2*i];;
439     ref_double_x[2 + 3*i] = tmp2;
440   }
441 
442   std::cout << std::endl << "Host: ";
443   ViennaCLHostSrot(my_backend, ViennaCLInt(size/4),
444                    viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 2, 3,
445                    viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_y), 1, 2,
446                    0.6f, 0.8f);
447   check(ref_float_x, host_float_x, eps_float);
448   check(ref_float_y, host_float_y, eps_float);
449   ViennaCLHostDrot(my_backend, ViennaCLInt(size/4),
450                    viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 2, 3,
451                    viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_y), 1, 2,
452                    0.6, 0.8);
453   check(ref_double_x, host_double_x, eps_double);
454   check(ref_double_y, host_double_y, eps_double);
455 
456 
457 #ifdef VIENNACL_WITH_CUDA
458   std::cout << std::endl << "CUDA: ";
459   ViennaCLCUDASrot(my_backend, ViennaCLInt(size/4),
460                    viennacl::cuda_arg(cuda_float_x), 2, 3,
461                    viennacl::cuda_arg(cuda_float_y), 1, 2,
462                    0.6f, 0.8f);
463   check(ref_float_x, cuda_float_x, eps_float);
464   check(ref_float_y, cuda_float_y, eps_float);
465   ViennaCLCUDADrot(my_backend, ViennaCLInt(size/4),
466                    viennacl::cuda_arg(cuda_double_x), 2, 3,
467                    viennacl::cuda_arg(cuda_double_y), 1, 2,
468                    0.6, 0.8);
469   check(ref_double_x, cuda_double_x, eps_double);
470   check(ref_double_y, cuda_double_y, eps_double);
471 #endif
472 
473 #ifdef VIENNACL_WITH_OPENCL
474   std::cout << std::endl << "OpenCL: ";
475   ViennaCLOpenCLSrot(my_backend, ViennaCLInt(size/4),
476                      viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 3,
477                      viennacl::traits::opencl_handle(opencl_float_y).get(), 1, 2,
478                      0.6f, 0.8f);
479   check(ref_float_x, opencl_float_x, eps_float);
480   check(ref_float_y, opencl_float_y, eps_float);
481   if ( viennacl::ocl::current_device().double_support() )
482   {
483     ViennaCLOpenCLDrot(my_backend, ViennaCLInt(size/4),
484                        viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 3,
485                        viennacl::traits::opencl_handle(*opencl_double_y).get(), 1, 2,
486                        0.6, 0.8);
487     check(ref_double_x, *opencl_double_x, eps_double);
488     check(ref_double_y, *opencl_double_y, eps_double);
489   }
490 #endif
491 
492 
493 
494   // SCAL
495   std::cout << std::endl << "-- Testing xSCAL...";
496   for (std::size_t i=0; i<size/4; ++i)
497   {
498     ref_float_x[1 + 3*i]  *= 2.0f;
499     ref_double_x[1 + 3*i] *= 2.0;
500   }
501 
502   std::cout << std::endl << "Host: ";
503   ViennaCLHostSscal(my_backend, ViennaCLInt(size/4),
504                     2.0f,
505                     viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 1, 3);
506   check(ref_float_x, host_float_x, eps_float);
507   ViennaCLHostDscal(my_backend, ViennaCLInt(size/4),
508                     2.0,
509                     viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 1, 3);
510   check(ref_double_x, host_double_x, eps_double);
511 
512 #ifdef VIENNACL_WITH_CUDA
513   std::cout << std::endl << "CUDA: ";
514   ViennaCLCUDASscal(my_backend, ViennaCLInt(size/4),
515                     2.0f,
516                     viennacl::cuda_arg(cuda_float_x), 1, 3);
517   check(ref_float_x, cuda_float_x, eps_float);
518   ViennaCLCUDADscal(my_backend, ViennaCLInt(size/4),
519                     2.0,
520                     viennacl::cuda_arg(cuda_double_x), 1, 3);
521   check(ref_double_x, cuda_double_x, eps_double);
522 #endif
523 
524 #ifdef VIENNACL_WITH_OPENCL
525   std::cout << std::endl << "OpenCL: ";
526   ViennaCLOpenCLSscal(my_backend, ViennaCLInt(size/4),
527                       2.0f,
528                       viennacl::traits::opencl_handle(opencl_float_x).get(), 1, 3);
529   check(ref_float_x, opencl_float_x, eps_float);
530   if ( viennacl::ocl::current_device().double_support() )
531   {
532     ViennaCLOpenCLDscal(my_backend, ViennaCLInt(size/4),
533                         2.0,
534                         viennacl::traits::opencl_handle(*opencl_double_x).get(), 1, 3);
535     check(ref_double_x, *opencl_double_x, eps_double);
536   }
537 #endif
538 
539 
540   // SWAP
541   std::cout << std::endl << "-- Testing xSWAP...";
542   for (std::size_t i=0; i<size/3; ++i)
543   {
544     float tmp = ref_float_x[2 + 2*i];
545     ref_float_x[2 + 2*i] = ref_float_y[1 + 2*i];
546     ref_float_y[1 + 2*i] = tmp;
547 
548     double tmp2 = ref_double_x[2 + 2*i];
549     ref_double_x[2 + 2*i] = ref_double_y[1 + 2*i];
550     ref_double_y[1 + 2*i] = tmp2;
551   }
552 
553   std::cout << std::endl << "Host: ";
554   ViennaCLHostSswap(my_backend, ViennaCLInt(size/3),
555                     viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 2, 2,
556                     viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_y), 1, 2);
557   check(ref_float_y, host_float_y, eps_float);
558   ViennaCLHostDswap(my_backend, ViennaCLInt(size/3),
559                     viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 2, 2,
560                     viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_y), 1, 2);
561   check(ref_double_y, host_double_y, eps_double);
562 
563 
564 #ifdef VIENNACL_WITH_CUDA
565   std::cout << std::endl << "CUDA: ";
566   ViennaCLCUDASswap(my_backend, ViennaCLInt(size/3),
567                     viennacl::cuda_arg(cuda_float_x), 2, 2,
568                     viennacl::cuda_arg(cuda_float_y), 1, 2);
569   check(ref_float_y, cuda_float_y, eps_float);
570   ViennaCLCUDADswap(my_backend, ViennaCLInt(size/3),
571                     viennacl::cuda_arg(cuda_double_x), 2, 2,
572                     viennacl::cuda_arg(cuda_double_y), 1, 2);
573   check(ref_double_y, cuda_double_y, eps_double);
574 #endif
575 
576 #ifdef VIENNACL_WITH_OPENCL
577   std::cout << std::endl << "OpenCL: ";
578   ViennaCLOpenCLSswap(my_backend, ViennaCLInt(size/3),
579                       viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 2,
580                       viennacl::traits::opencl_handle(opencl_float_y).get(), 1, 2);
581   check(ref_float_y, opencl_float_y, eps_float);
582   if ( viennacl::ocl::current_device().double_support() )
583   {
584     ViennaCLOpenCLDswap(my_backend, ViennaCLInt(size/3),
585                         viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 2,
586                         viennacl::traits::opencl_handle(*opencl_double_y).get(), 1, 2);
587     check(ref_double_y, *opencl_double_y, eps_double);
588   }
589 #endif
590 
591 
592   // IAMAX
593   std::cout << std::endl << "-- Testing IxASUM...";
594   ViennaCLInt ref_index = 0;
595   ref_float_alpha = 0;
596   for (std::size_t i=0; i<size/3; ++i)
597   {
598     if (ref_float_x[0 + 2*i] > std::fabs(ref_float_alpha))
599     {
600       ref_index = ViennaCLInt(i);
601       ref_float_alpha = std::fabs(ref_float_x[0 + 2*i]);
602     }
603   }
604 
605   std::cout << std::endl << "Host: ";
606   ViennaCLInt idx = 0;
607   ViennaCLHostiSamax(my_backend, ViennaCLInt(size/3),
608                      &idx,
609                      viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 0, 2);
610   check(static_cast<float>(ref_index), static_cast<float>(idx), eps_float);
611   idx = 0;
612   ViennaCLHostiDamax(my_backend, ViennaCLInt(size/3),
613                      &idx,
614                      viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 0, 2);
615   check(ref_index, idx, eps_double);
616 
617 #ifdef VIENNACL_WITH_CUDA
618   std::cout << std::endl << "CUDA: ";
619   idx = 0;
620   ViennaCLCUDAiSamax(my_backend, ViennaCLInt(size/3),
621                      &idx,
622                      viennacl::cuda_arg(cuda_float_x), 0, 2);
623   check(ref_float_x[2*ref_index], ref_float_x[2*idx], eps_float);
624   idx = 0;
625   ViennaCLCUDAiDamax(my_backend, ViennaCLInt(size/3),
626                      &idx,
627                      viennacl::cuda_arg(cuda_double_x), 0, 2);
628   check(ref_double_x[2*ref_index], ref_double_x[2*idx], eps_double);
629 #endif
630 
631 #ifdef VIENNACL_WITH_OPENCL
632   std::cout << std::endl << "OpenCL: ";
633   idx = 0;
634   ViennaCLOpenCLiSamax(my_backend, ViennaCLInt(size/3),
635                        &idx,
636                        viennacl::traits::opencl_handle(opencl_float_x).get(), 0, 2);
637   check(ref_float_x[2*static_cast<std::size_t>(ref_index)], ref_float_x[2*static_cast<std::size_t>(idx)], eps_float);
638   idx = 0;
639   if ( viennacl::ocl::current_device().double_support() )
640   {
641     ViennaCLOpenCLiDamax(my_backend, ViennaCLInt(size/3),
642                          &idx,
643                          viennacl::traits::opencl_handle(*opencl_double_x).get(), 0, 2);
644     check(ref_double_x[2*static_cast<std::size_t>(ref_index)], ref_double_x[2*static_cast<std::size_t>(idx)], eps_double);
645   }
646 #endif
647 
648 #ifdef VIENNACL_WITH_OPENCL
649   //cleanup
650   if ( viennacl::ocl::current_device().double_support() )
651   {
652     delete opencl_double_x;
653     delete opencl_double_y;
654   }
655 #endif
656 
657   ViennaCLBackendDestroy(&my_backend);
658 
659   //
660   //  That's it.
661   //
662   std::cout << std::endl << "!!!! TEST COMPLETED SUCCESSFULLY !!!!" << std::endl;
663 
664   return EXIT_SUCCESS;
665 }
666 
667