1 
2 // =================================================================================================
3 // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
4 // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
5 // width of 100 characters per line.
6 //
7 // Author(s):
8 //   Cedric Nugteren <www.cedricnugteren.nl>
9 //
10 // This file implements the Xamax class (see the header for information about the class).
11 //
12 // =================================================================================================
13 
14 #include "routines/level1/xamax.hpp"
15 
16 #include <string>
17 #include <vector>
18 
19 namespace clblast {
20 // =================================================================================================
21 
22 // Constructor: forwards to base class constructor
23 template <typename T>
Xamax(Queue & queue,EventPointer event,const std::string & name)24 Xamax<T>::Xamax(Queue &queue, EventPointer event, const std::string &name):
25     Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>(), {}, {
26     #include "../../kernels/level1/xamax.opencl"
27     }) {
28 }
29 
30 // =================================================================================================
31 
32 // The main routine
33 template <typename T>
DoAmax(const size_t n,const Buffer<unsigned int> & imax_buffer,const size_t imax_offset,const Buffer<T> & x_buffer,const size_t x_offset,const size_t x_inc)34 void Xamax<T>::DoAmax(const size_t n,
35                       const Buffer<unsigned int> &imax_buffer, const size_t imax_offset,
36                       const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
37 
38   // Makes sure all dimensions are larger than zero
39   if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
40 
41   // Tests the vectors for validity
42   TestVectorX(n, x_buffer, x_offset, x_inc);
43   TestVectorIndex(1, imax_buffer, imax_offset);
44 
45   // Retrieves the Xamax kernels from the compiled binary
46   auto kernel1 = Kernel(program_, "Xamax");
47   auto kernel2 = Kernel(program_, "XamaxEpilogue");
48 
49   // Creates the buffer for intermediate values
50   auto temp_size = 2*db_["WGS2"];
51   auto temp_buffer1 = Buffer<T>(context_, temp_size);
52   auto temp_buffer2 = Buffer<unsigned int>(context_, temp_size);
53 
54   // Sets the kernel arguments
55   kernel1.SetArgument(0, static_cast<int>(n));
56   kernel1.SetArgument(1, x_buffer());
57   kernel1.SetArgument(2, static_cast<int>(x_offset));
58   kernel1.SetArgument(3, static_cast<int>(x_inc));
59   kernel1.SetArgument(4, temp_buffer1());
60   kernel1.SetArgument(5, temp_buffer2());
61 
62   // Event waiting list
63   auto eventWaitList = std::vector<Event>();
64 
65   // Launches the main kernel
66   auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size};
67   auto local1 = std::vector<size_t>{db_["WGS1"]};
68   auto kernelEvent = Event();
69   RunKernel(kernel1, queue_, device_, global1, local1, kernelEvent.pointer());
70   eventWaitList.push_back(kernelEvent);
71 
72   // Sets the arguments for the epilogue kernel
73   kernel2.SetArgument(0, temp_buffer1());
74   kernel2.SetArgument(1, temp_buffer2());
75   kernel2.SetArgument(2, imax_buffer());
76   kernel2.SetArgument(3, static_cast<int>(imax_offset));
77 
78   // Launches the epilogue kernel
79   auto global2 = std::vector<size_t>{db_["WGS2"]};
80   auto local2 = std::vector<size_t>{db_["WGS2"]};
81   RunKernel(kernel2, queue_, device_, global2, local2, event_, eventWaitList);
82 }
83 
84 // =================================================================================================
85 
86 // Compiles the templated class
87 template class Xamax<half>;
88 template class Xamax<float>;
89 template class Xamax<double>;
90 template class Xamax<float2>;
91 template class Xamax<double2>;
92 
93 // =================================================================================================
94 } // namespace clblast
95