1dnl ---------------------------------------------------------------------- 2dnl r_truth2 --- Dot Product 3dnl r <--- beta * r + alpha * x^T * y 4dnl r <--- beta * r + alpha * x^H * y 5dnl ---------------------------------------------------------------------- 6dnl 7dnl 8include(cblas.m4)dnl 9dnl 10dnl 11dnl Usage: 12dnl R_TRUTH2 ($1, $2, $3, $4) 13dnl R_TRUTH2_HEAD ($1, $2, $3, $4) 14dnl R_TRUTH2_NAME ($1, $2, $3, $4) 15dnl R_TRUTH2_PARAMS ($1, $2, $3, $4) 16dnl R_TRUTH2_COMMENT($1, $2, $3, $4) 17dnl 18dnl $1 -- type of alpha, beta, r. 19dnl $2 -- type of a 20dnl $3 -- type of b 21dnl $4 -- Set to `_x' for _x routines. Otherwise set to `'. 22dnl 23dnl 24define(`R_TRUTH2_NAME', `$1_r_truth2')dnl 25dnl 26dnl 27define(`R_TRUTH2_PARAMS', 28 `enum blas_conj_type conj, int n, $1_scalar alpha, 29 const $2_array x, int incx, $1_scalar beta, 30 const $3_array head_y, const $3_array tail_y, int incy, $1_array r, 31 double *head_r_true, double *tail_r_true')dnl 32dnl 33dnl ---------------------------------------------------------------------- 34dnl Usage: ZERO_RETURN(a, a_type) ... *a = 0 35dnl ---------------------------------------------------------------------- 36define(`ZERO_RETURN', `ifelse( 37 `$2',`real_E', `*HEAD($1) = *TAIL($1) = 0.0;', 38 `$2',`complex_E', `HEAD($1)[0] = TAIL($1)[0] = HEAD($1)[1] = TAIL($1)[1] = 0.0;', 39 `error')')dnl 40dnl 41dnl 42define(`R_TRUTH2_HEAD', 43 `void R_TRUTH2_NAME($1, $2, $3, $4)(R_TRUTH2_PARAMS($1, $2, $3, $4))')dnl 44dnl 45dnl 46define(`R_TRUTH2_COMMENT',` 47/* 48 * Purpose 49 * ======= 50 * 51 * This routine computes the inner product: 52 * 53 * r_true <- beta * r + alpha * SUM_{i=0, n-1} x[i] * (head_y[i] + tail_y[i]) 54 * 55 * Arguments 56 * ========= 57 * 58 * conj (input) enum blas_conj_type 59 * When x and y are complex vectors, specifies whether vector 60 * components x[i] are used unconjugated or conjugated. 61 * 62 * n (input) int 63 * The length of vectors x and y. 64 * 65 * alpha (input) $1_scalar 66 * 67 * x (input) const $2_array 68 * Array of length n. 69 * 70 * incx (input) int 71 * The stride used to access components x[i]. 72 * 73 * beta (input) $1_scalar 74 * 75 * head_y 76 * tail_y (input) const $3_array 77 * Array of length n. 78 * 79 * incy (input) int 80 * The stride used to access components y[i]. 81 * 82 * r (input) $1_array 83 * 84 * head_r_true 85 * tail_r_true (output) $1_array 86 * The truth. 87 * 88 */')dnl 89dnl 90dnl 91dnl Usage: DOT_BODY($1, $2, $3, $4, $5, $6) 92dnl Generates the main body of the product code. 93dnl $1 - type of alpha, beta, r 94dnl $2 - type of a 95dnl $3 - type of b 96dnl $4 - type of sum/prod 97dnl $5 - type of temp 98dnl $6 - [optional] String `FPU' is passed if FPU fix 99dnl is needed. Empty string is passed otherwise. 100dnl 101define(`DOT_BODY', ` 102 int i, ix = 0, iy = 0; 103 PTR_CAST(r, $1) 104 PTR_CAST(x, $2, `const') 105 PTR_CAST(head_y, $3, `const') 106 PTR_CAST(tail_y, $3, `const') 107 SCALAR_CAST(alpha, $1) 108 SCALAR_CAST(beta, $1) 109 DECLARE(x_ii, $2) 110 DECLARE(y_ii, $3) 111 DECLARE(r_v, $1) 112 DECLARE(prod, $4) 113 DECLARE(sum, $4) 114 DECLARE(tmp1, $5) 115 DECLARE(tmp2, $5) 116 ifelse(`$6', `FPU', `FPU_FIX_DECL;') 117 118 /* Immediate return */ 119 if (n < 0) { 120 ZERO_RETURN(r_true, $5) 121 return; 122 } 123 124 ifelse(`$6', `FPU', `FPU_FIX_START;') 125 126 GET_VECTOR_ELEMENT(r_v, r_i, 0, $1) 127 ZERO(sum, $4) 128 INC_ADJUST(incx, $2) 129 INC_ADJUST(incy, $3) 130 if (incx < 0) ix = (-n+1)*incx; 131 if (incy < 0) iy = (-n+1)*incy; 132dnl *** Only bother to check the value of conj if x is complex. 133IF_COMPLEX($2, ` 134 if (conj == blas_conj) { 135 for (i = 0; i < n; ++i) { 136 GET_VECTOR_ELEMENT(x_ii, x_i, ix, $2) 137 GET_VECTOR_ELEMENT(y_ii, head_y_i, iy, $3) 138 CONJ(x_ii, $2, blas_conj) 139 MUL(prod, $4, x_ii, $2, y_ii, $3) /* prod = x[i] * head_y[i] */ 140 ADD(sum, $4, sum, $4, prod, $4) /* sum = sum+prod */ 141 GET_VECTOR_ELEMENT(y_ii, tail_y_i, iy, $3) 142 MUL(prod, $4, x_ii, $2, y_ii, $3) /* prod = x[i] * tail_y[i] */ 143 ADD(sum, $4, sum, $4, prod, $4) /* sum = sum+prod */ 144 ix += incx; 145 iy += incy; 146 } /* endfor */ 147 } else { 148 /* do not conjugate */ 149') 150 for (i = 0; i < n; ++i) { 151 GET_VECTOR_ELEMENT(x_ii, x_i, ix, $2) 152 GET_VECTOR_ELEMENT(y_ii, head_y_i, iy, $3) 153 CONJ(x_ii, $2, blas_no_conj) 154 MUL(prod, $4, x_ii, $2, y_ii, $3) /* prod = x[i] * head_y[i] */ 155 ADD(sum, $4, sum, $4, prod, $4) /* sum = sum+prod */ 156 GET_VECTOR_ELEMENT(y_ii, tail_y_i, iy, $3) 157 MUL(prod, $4, x_ii, $2, y_ii, $3) /* prod = x[i] * tail_y[i] */ 158 ADD(sum, $4, sum, $4, prod, $4) /* sum = sum+prod */ 159 ix += incx; 160 iy += incy; 161 } /* endfor */ 162dnl *** Close the outer if, if x is complex 163IF_COMPLEX($2, `}') 164 165 MUL(tmp1, $5, sum, $4, alpha_i, $1) /* tmp1 = sum*alpha */ 166 MUL(tmp2, $5, r_v, $1, beta_i, $1) /* tmp2 = r*beta */ 167 ADD(tmp1, $5, tmp1, $5, tmp2, $5) /* tmp1 = tmp1+tmp2 */ 168 ASSIGN_SCALAR_TO_PTR(r_true, $4, tmp1, $5) /* *r = tmp1 */ 169 170 ifelse(`$6', `FPU', `FPU_FIX_STOP;') 171')dnl 172dnl 173dnl 174define(`R_TRUTH2', 175 `R_TRUTH2_HEAD($1, $2, $3, $4) 176 R_TRUTH2_COMMENT($1, $2, $3, $4) 177 { 178dnl static const char routine_name[] = "R_TRUTH2_NAME($1, $2, $3, $4)"; 179 DOT_BODY($1_type, $2_type, $3_type, 180 SUM_TYPE_X($1_type, $2_type, $3_type, E), 181 TMP_TYPE_X($1_type, E), FPU) 182 }')dnl 183dnl 184dnl 185define(`PROTOTYPES', `dnl 186R_TRUTH2_HEAD(s, s, s, _x); 187R_TRUTH2_HEAD(d, d, d, _x); 188R_TRUTH2_HEAD(c, c, c, _x); 189R_TRUTH2_HEAD(z, z, z, _x); 190')dnl 191dnl 192dnl 193define(`SOURCE', `dnl 194#include "blas_extended.h" 195#include "blas_extended_private.h" 196 197R_TRUTH2(s, s, s, _x) 198R_TRUTH2(d, d, d, _x) 199R_TRUTH2(c, c, c, _x) 200R_TRUTH2(z, z, z, _x) 201')dnl 202dnl 203dnl 204ifdef(`prototypes_only', `PROTOTYPES()', `SOURCE()')dnl 205dnl 206dnl 207