1
2 // =================================================================================================
3 // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
4 // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
5 // width of 100 characters per line.
6 //
7 // Author(s):
8 // Cedric Nugteren <www.cedricnugteren.nl>
9 //
10 // This file implements the Xamax class (see the header for information about the class).
11 //
12 // =================================================================================================
13
14 #include "routines/level1/xamax.hpp"
15
16 #include <string>
17 #include <vector>
18
19 namespace clblast {
20 // =================================================================================================
21
22 // Constructor: forwards to base class constructor
23 template <typename T>
Xamax(Queue & queue,EventPointer event,const std::string & name)24 Xamax<T>::Xamax(Queue &queue, EventPointer event, const std::string &name):
25 Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>(), {}, {
26 #include "../../kernels/level1/xamax.opencl"
27 }) {
28 }
29
30 // =================================================================================================
31
32 // The main routine
33 template <typename T>
DoAmax(const size_t n,const Buffer<unsigned int> & imax_buffer,const size_t imax_offset,const Buffer<T> & x_buffer,const size_t x_offset,const size_t x_inc)34 void Xamax<T>::DoAmax(const size_t n,
35 const Buffer<unsigned int> &imax_buffer, const size_t imax_offset,
36 const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
37
38 // Makes sure all dimensions are larger than zero
39 if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
40
41 // Tests the vectors for validity
42 TestVectorX(n, x_buffer, x_offset, x_inc);
43 TestVectorIndex(1, imax_buffer, imax_offset);
44
45 // Retrieves the Xamax kernels from the compiled binary
46 auto kernel1 = Kernel(program_, "Xamax");
47 auto kernel2 = Kernel(program_, "XamaxEpilogue");
48
49 // Creates the buffer for intermediate values
50 auto temp_size = 2*db_["WGS2"];
51 auto temp_buffer1 = Buffer<T>(context_, temp_size);
52 auto temp_buffer2 = Buffer<unsigned int>(context_, temp_size);
53
54 // Sets the kernel arguments
55 kernel1.SetArgument(0, static_cast<int>(n));
56 kernel1.SetArgument(1, x_buffer());
57 kernel1.SetArgument(2, static_cast<int>(x_offset));
58 kernel1.SetArgument(3, static_cast<int>(x_inc));
59 kernel1.SetArgument(4, temp_buffer1());
60 kernel1.SetArgument(5, temp_buffer2());
61
62 // Event waiting list
63 auto eventWaitList = std::vector<Event>();
64
65 // Launches the main kernel
66 auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size};
67 auto local1 = std::vector<size_t>{db_["WGS1"]};
68 auto kernelEvent = Event();
69 RunKernel(kernel1, queue_, device_, global1, local1, kernelEvent.pointer());
70 eventWaitList.push_back(kernelEvent);
71
72 // Sets the arguments for the epilogue kernel
73 kernel2.SetArgument(0, temp_buffer1());
74 kernel2.SetArgument(1, temp_buffer2());
75 kernel2.SetArgument(2, imax_buffer());
76 kernel2.SetArgument(3, static_cast<int>(imax_offset));
77
78 // Launches the epilogue kernel
79 auto global2 = std::vector<size_t>{db_["WGS2"]};
80 auto local2 = std::vector<size_t>{db_["WGS2"]};
81 RunKernel(kernel2, queue_, device_, global2, local2, event_, eventWaitList);
82 }
83
84 // =================================================================================================
85
86 // Compiles the templated class
87 template class Xamax<half>;
88 template class Xamax<float>;
89 template class Xamax<double>;
90 template class Xamax<float2>;
91 template class Xamax<double2>;
92
93 // =================================================================================================
94 } // namespace clblast
95