1
2// =================================================================================================
3// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
4// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
5// width of 100 characters per line.
6//
7// Author(s):
8//   Cedric Nugteren <www.cedricnugteren.nl>
9//
10// This file contains common functions for matrix update kernels (Xger, Xher).
11//
12// =================================================================================================
13
14// Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string
15// literal). Comment-out this line for syntax-highlighting when developing.
16R"(
17
18// =================================================================================================
19
20// Parameters set by the tuner or by the database. Here they are given a basic default value in case
21// this kernel file is used outside of the CLBlast library.
22
23#ifndef WGS1
24  #define WGS1 8    // The local work-group size in first dimension
25#endif
26#ifndef WGS2
27  #define WGS2 8    // The local work-group size in second dimension
28#endif
29#ifndef WPT
30  #define WPT 1     // The amount of work-per-thread in both dimensions
31#endif
32
33// =================================================================================================
34
35// Returns an element from a vector
36INLINE_FUNC real LoadVector(const int id, const int max,
37                            __global real* gm, const int offset, const int inc,
38                            const int do_conjugate) {
39  if (id < max) {
40    real result = gm[id*inc + offset];
41    if (do_conjugate) {
42      #if defined(ROUTINE_GERC) || defined(ROUTINE_HER) || defined(ROUTINE_HPR) || defined(ROUTINE_HER2) || defined(ROUTINE_HPR2)
43        COMPLEX_CONJUGATE(result);
44      #endif
45    }
46    return result;
47  }
48  else {
49    real default_result;
50    SetToZero(default_result);
51    return default_result;
52  }
53}
54
55// Performs the rank-1 matrix update
56INLINE_FUNC void MatrixUpdate(const int id1, const int id2, const int max1, const int max2,
57                              __global real* agm, const int a_offset, const int a_ld,
58                              const real alpha, const real xvalue, const real yvalue,
59                              const int is_upper) {
60
61  // Bounds of a regular matrix
62  if (id1 < max1 && id2 < max2) {
63
64    #if defined(ROUTINE_SPR) || defined(ROUTINE_HPR)
65      int a_index;
66      if (is_upper) {
67        a_index = (id1 <= id2) ? ((id2+1)*id2)/2 + id1 : ((id1+1)*id1)/2 + id2;
68      }
69      else {
70        a_index = (id1 >= id2) ? ((2*a_ld-(id2+1))*id2)/2 + id1 : ((2*a_ld-(id1+1))*id1)/2 + id2;
71      }
72      a_index += a_offset;
73    #else
74      const int a_index = id2*a_ld + id1 + a_offset;
75    #endif
76
77    // Loads the current value of the A matrix
78    const real avalue = agm[a_index];
79
80    // Computes result = alpha * x[i] * y[j] + a[i][j]
81    #if PRECISION == 3232 || PRECISION == 6464
82      real ax;
83      ax.x = MulReal(alpha, xvalue);
84      ax.y = MulImag(alpha, xvalue);
85      real result;
86      result.x = MulReal(ax, yvalue) + avalue.x;
87      result.y = MulImag(ax, yvalue) + avalue.y;
88    #else
89      real result = alpha * xvalue * yvalue + avalue;
90    #endif
91
92    // For hermetian matrices
93    #if defined(ROUTINE_HER) || defined(ROUTINE_HPR)
94      if (id1 == id2) { result.y = ZERO; }
95    #endif
96
97    // Stores the final result
98    agm[a_index] = result;
99  }
100}
101
102// Performs the rank-2 matrix update
103INLINE_FUNC void MatrixUpdate2(const int id1, const int id2, const int max1, const int max2,
104                               __global real* agm, const int a_offset, const int a_ld,
105                               const real alpha1, const real xvalue, const real yvalue,
106                               const real alpha2, const real xtvalue, const real ytvalue,
107                               const int is_upper) {
108
109  // Bounds of a regular matrix
110  if (id1 < max1 && id2 < max2) {
111
112    #if defined(ROUTINE_SPR2) || defined(ROUTINE_HPR2)
113      int a_index;
114      if (is_upper) {
115        a_index = (id1 <= id2) ? ((id2+1)*id2)/2 + id1 : ((id1+1)*id1)/2 + id2;
116      }
117      else {
118        a_index = (id1 >= id2) ? ((2*a_ld-(id2+1))*id2)/2 + id1 : ((2*a_ld-(id1+1))*id1)/2 + id2;
119      }
120      a_index += a_offset;
121    #else
122      const int a_index = id2*a_ld + id1 + a_offset;
123    #endif
124
125    // Loads the current value of the A matrix
126    const real avalue = agm[a_index];
127
128    // Computes result = alpha * x[i] * y[j] + alpha * x[j] * y[i] + a[i][j]
129    #if PRECISION == 3232 || PRECISION == 6464
130      real ax;
131      ax.x = MulReal(alpha2, xvalue);
132      ax.y = MulImag(alpha2, xvalue);
133      real atx;
134      atx.x = MulReal(alpha1, xtvalue);
135      atx.y = MulImag(alpha1, xtvalue);
136      real result;
137      result.x = MulReal(ax, yvalue) + MulReal(atx, ytvalue) + avalue.x;
138      result.y = MulImag(ax, yvalue) + MulImag(atx, ytvalue) + avalue.y;
139    #else
140      real result = alpha1 * xvalue * yvalue + alpha2 * xtvalue * ytvalue + avalue;
141    #endif
142
143    // For hermetian matrices
144    #if defined(ROUTINE_HER2) || defined(ROUTINE_HPR2)
145      if (id1 == id2) { result.y = ZERO; }
146    #endif
147
148    // Stores the final result
149    agm[a_index] = result;
150  }
151}
152
153// =================================================================================================
154
155// End of the C++11 raw string literal
156)"
157
158// =================================================================================================
159