1 2// ================================================================================================= 3// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5// width of 100 characters per line. 6// 7// Author(s): 8// Cedric Nugteren <www.cedricnugteren.nl> 9// 10// This file contains common functions for matrix update kernels (Xger, Xher). 11// 12// ================================================================================================= 13 14// Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string 15// literal). Comment-out this line for syntax-highlighting when developing. 16R"( 17 18// ================================================================================================= 19 20// Parameters set by the tuner or by the database. Here they are given a basic default value in case 21// this kernel file is used outside of the CLBlast library. 22 23#ifndef WGS1 24 #define WGS1 8 // The local work-group size in first dimension 25#endif 26#ifndef WGS2 27 #define WGS2 8 // The local work-group size in second dimension 28#endif 29#ifndef WPT 30 #define WPT 1 // The amount of work-per-thread in both dimensions 31#endif 32 33// ================================================================================================= 34 35// Returns an element from a vector 36INLINE_FUNC real LoadVector(const int id, const int max, 37 __global real* gm, const int offset, const int inc, 38 const int do_conjugate) { 39 if (id < max) { 40 real result = gm[id*inc + offset]; 41 if (do_conjugate) { 42 #if defined(ROUTINE_GERC) || defined(ROUTINE_HER) || defined(ROUTINE_HPR) || defined(ROUTINE_HER2) || defined(ROUTINE_HPR2) 43 COMPLEX_CONJUGATE(result); 44 #endif 45 } 46 return result; 47 } 48 else { 49 real default_result; 50 SetToZero(default_result); 51 return default_result; 52 } 53} 54 55// Performs the rank-1 matrix update 56INLINE_FUNC void MatrixUpdate(const int id1, const int id2, const int max1, const int max2, 57 __global real* agm, const int a_offset, const int a_ld, 58 const real alpha, const real xvalue, const real yvalue, 59 const int is_upper) { 60 61 // Bounds of a regular matrix 62 if (id1 < max1 && id2 < max2) { 63 64 #if defined(ROUTINE_SPR) || defined(ROUTINE_HPR) 65 int a_index; 66 if (is_upper) { 67 a_index = (id1 <= id2) ? ((id2+1)*id2)/2 + id1 : ((id1+1)*id1)/2 + id2; 68 } 69 else { 70 a_index = (id1 >= id2) ? ((2*a_ld-(id2+1))*id2)/2 + id1 : ((2*a_ld-(id1+1))*id1)/2 + id2; 71 } 72 a_index += a_offset; 73 #else 74 const int a_index = id2*a_ld + id1 + a_offset; 75 #endif 76 77 // Loads the current value of the A matrix 78 const real avalue = agm[a_index]; 79 80 // Computes result = alpha * x[i] * y[j] + a[i][j] 81 #if PRECISION == 3232 || PRECISION == 6464 82 real ax; 83 ax.x = MulReal(alpha, xvalue); 84 ax.y = MulImag(alpha, xvalue); 85 real result; 86 result.x = MulReal(ax, yvalue) + avalue.x; 87 result.y = MulImag(ax, yvalue) + avalue.y; 88 #else 89 real result = alpha * xvalue * yvalue + avalue; 90 #endif 91 92 // For hermetian matrices 93 #if defined(ROUTINE_HER) || defined(ROUTINE_HPR) 94 if (id1 == id2) { result.y = ZERO; } 95 #endif 96 97 // Stores the final result 98 agm[a_index] = result; 99 } 100} 101 102// Performs the rank-2 matrix update 103INLINE_FUNC void MatrixUpdate2(const int id1, const int id2, const int max1, const int max2, 104 __global real* agm, const int a_offset, const int a_ld, 105 const real alpha1, const real xvalue, const real yvalue, 106 const real alpha2, const real xtvalue, const real ytvalue, 107 const int is_upper) { 108 109 // Bounds of a regular matrix 110 if (id1 < max1 && id2 < max2) { 111 112 #if defined(ROUTINE_SPR2) || defined(ROUTINE_HPR2) 113 int a_index; 114 if (is_upper) { 115 a_index = (id1 <= id2) ? ((id2+1)*id2)/2 + id1 : ((id1+1)*id1)/2 + id2; 116 } 117 else { 118 a_index = (id1 >= id2) ? ((2*a_ld-(id2+1))*id2)/2 + id1 : ((2*a_ld-(id1+1))*id1)/2 + id2; 119 } 120 a_index += a_offset; 121 #else 122 const int a_index = id2*a_ld + id1 + a_offset; 123 #endif 124 125 // Loads the current value of the A matrix 126 const real avalue = agm[a_index]; 127 128 // Computes result = alpha * x[i] * y[j] + alpha * x[j] * y[i] + a[i][j] 129 #if PRECISION == 3232 || PRECISION == 6464 130 real ax; 131 ax.x = MulReal(alpha2, xvalue); 132 ax.y = MulImag(alpha2, xvalue); 133 real atx; 134 atx.x = MulReal(alpha1, xtvalue); 135 atx.y = MulImag(alpha1, xtvalue); 136 real result; 137 result.x = MulReal(ax, yvalue) + MulReal(atx, ytvalue) + avalue.x; 138 result.y = MulImag(ax, yvalue) + MulImag(atx, ytvalue) + avalue.y; 139 #else 140 real result = alpha1 * xvalue * yvalue + alpha2 * xtvalue * ytvalue + avalue; 141 #endif 142 143 // For hermetian matrices 144 #if defined(ROUTINE_HER2) || defined(ROUTINE_HPR2) 145 if (id1 == id2) { result.y = ZERO; } 146 #endif 147 148 // Stores the final result 149 agm[a_index] = result; 150 } 151} 152 153// ================================================================================================= 154 155// End of the C++11 raw string literal 156)" 157 158// ================================================================================================= 159