1dnl ----------------------------------------------------------------------
2dnl r_truth2 --- Dot Product
3dnl   r <--- beta * r + alpha * x^T * y
4dnl   r <--- beta * r + alpha * x^H * y
5dnl ----------------------------------------------------------------------
6dnl
7dnl
8include(cblas.m4)dnl
9dnl
10dnl
11dnl  Usage:
12dnl    R_TRUTH2   ($1, $2, $3, $4)
13dnl    R_TRUTH2_HEAD   ($1, $2, $3, $4)
14dnl    R_TRUTH2_NAME   ($1, $2, $3, $4)
15dnl    R_TRUTH2_PARAMS ($1, $2, $3, $4)
16dnl    R_TRUTH2_COMMENT($1, $2, $3, $4)
17dnl
18dnl      $1 -- type of alpha, beta, r.
19dnl      $2 -- type of a
20dnl      $3 -- type of b
21dnl      $4 -- Set to `_x' for _x routines.  Otherwise set to `'.
22dnl
23dnl
24define(`R_TRUTH2_NAME', `$1_r_truth2')dnl
25dnl
26dnl
27define(`R_TRUTH2_PARAMS',
28  `enum blas_conj_type conj, int n, $1_scalar alpha,
29   const $2_array x, int incx, $1_scalar beta,
30   const $3_array head_y, const $3_array tail_y, int incy, $1_array r,
31   double *head_r_true, double *tail_r_true')dnl
32dnl
33dnl ----------------------------------------------------------------------
34dnl Usage: ZERO_RETURN(a, a_type) ... *a = 0
35dnl ----------------------------------------------------------------------
36define(`ZERO_RETURN', `ifelse(
37	`$2',`real_E', `*HEAD($1) = *TAIL($1) = 0.0;',
38	`$2',`complex_E', `HEAD($1)[0] = TAIL($1)[0] = HEAD($1)[1] = TAIL($1)[1] = 0.0;',
39	`error')')dnl
40dnl
41dnl
42define(`R_TRUTH2_HEAD',
43  `void R_TRUTH2_NAME($1, $2, $3, $4)(R_TRUTH2_PARAMS($1, $2, $3, $4))')dnl
44dnl
45dnl
46define(`R_TRUTH2_COMMENT',`
47/*
48 * Purpose
49 * =======
50 *
51 * This routine computes the inner product:
52 *
53 *   r_true <- beta * r + alpha * SUM_{i=0, n-1} x[i] * (head_y[i] + tail_y[i])
54 *
55 * Arguments
56 * =========
57 *
58 * conj   (input) enum blas_conj_type
59 *        When x and y are complex vectors, specifies whether vector
60 *        components x[i] are used unconjugated or conjugated.
61 *
62 * n      (input) int
63 *        The length of vectors x and y.
64 *
65 * alpha  (input) $1_scalar
66 *
67 * x      (input) const $2_array
68 *        Array of length n.
69 *
70 * incx   (input) int
71 *        The stride used to access components x[i].
72 *
73 * beta   (input) $1_scalar
74 *
75 * head_y
76 * tail_y (input) const $3_array
77 *        Array of length n.
78 *
79 * incy   (input) int
80 *        The stride used to access components y[i].
81 *
82 * r      (input) $1_array
83 *
84 * head_r_true
85 * tail_r_true (output) $1_array
86 *        The truth.
87 *
88 */')dnl
89dnl
90dnl
91dnl  Usage: DOT_BODY($1, $2, $3, $4, $5, $6)
92dnl  Generates the main body of the product code.
93dnl    $1 - type of alpha, beta, r
94dnl    $2 - type of a
95dnl    $3 - type of b
96dnl    $4 - type of sum/prod
97dnl    $5 - type of temp
98dnl    $6 - [optional] String `FPU' is passed if FPU fix
99dnl         is needed.  Empty string is passed otherwise.
100dnl
101define(`DOT_BODY', `
102  int i, ix = 0, iy = 0;
103  PTR_CAST(r, $1)
104  PTR_CAST(x, $2, `const')
105  PTR_CAST(head_y, $3, `const')
106  PTR_CAST(tail_y, $3, `const')
107  SCALAR_CAST(alpha, $1)
108  SCALAR_CAST(beta, $1)
109  DECLARE(x_ii, $2)
110  DECLARE(y_ii, $3)
111  DECLARE(r_v, $1)
112  DECLARE(prod, $4)
113  DECLARE(sum, $4)
114  DECLARE(tmp1, $5)
115  DECLARE(tmp2, $5)
116  ifelse(`$6', `FPU', `FPU_FIX_DECL;')
117
118  /* Immediate return */
119  if (n < 0) {
120    ZERO_RETURN(r_true, $5)
121    return;
122  }
123
124  ifelse(`$6', `FPU', `FPU_FIX_START;')
125
126  GET_VECTOR_ELEMENT(r_v, r_i, 0, $1)
127  ZERO(sum, $4)
128  INC_ADJUST(incx, $2)
129  INC_ADJUST(incy, $3)
130  if (incx < 0) ix = (-n+1)*incx;
131  if (incy < 0) iy = (-n+1)*incy;
132dnl *** Only bother to check the value of conj if x is complex.
133IF_COMPLEX($2, `
134  if (conj == blas_conj) {
135    for (i = 0; i < n; ++i) {
136      GET_VECTOR_ELEMENT(x_ii, x_i, ix, $2)
137      GET_VECTOR_ELEMENT(y_ii, head_y_i, iy, $3)
138      CONJ(x_ii, $2, blas_conj)
139      MUL(prod, $4, x_ii, $2, y_ii, $3) /* prod = x[i] * head_y[i] */
140      ADD(sum, $4, sum, $4, prod, $4) /* sum = sum+prod */
141      GET_VECTOR_ELEMENT(y_ii, tail_y_i, iy, $3)
142      MUL(prod, $4, x_ii, $2, y_ii, $3) /* prod = x[i] * tail_y[i] */
143      ADD(sum, $4, sum, $4, prod, $4) /* sum = sum+prod */
144      ix += incx;
145      iy += incy;
146    } /* endfor */
147  } else {
148    /* do not conjugate */
149')
150  for (i = 0; i < n; ++i) {
151    GET_VECTOR_ELEMENT(x_ii, x_i, ix, $2)
152    GET_VECTOR_ELEMENT(y_ii, head_y_i, iy, $3)
153    CONJ(x_ii, $2, blas_no_conj)
154    MUL(prod, $4, x_ii, $2, y_ii, $3) /* prod = x[i] * head_y[i] */
155    ADD(sum, $4, sum, $4, prod, $4) /* sum = sum+prod */
156    GET_VECTOR_ELEMENT(y_ii, tail_y_i, iy, $3)
157    MUL(prod, $4, x_ii, $2, y_ii, $3) /* prod = x[i] * tail_y[i] */
158    ADD(sum, $4, sum, $4, prod, $4) /* sum = sum+prod */
159    ix += incx;
160    iy += incy;
161  } /* endfor */
162dnl *** Close the outer if, if x is complex
163IF_COMPLEX($2, `}')
164
165  MUL(tmp1, $5, sum, $4, alpha_i, $1) /* tmp1 = sum*alpha */
166  MUL(tmp2, $5, r_v, $1, beta_i, $1) /* tmp2 = r*beta */
167  ADD(tmp1, $5, tmp1, $5, tmp2, $5) /* tmp1 = tmp1+tmp2 */
168  ASSIGN_SCALAR_TO_PTR(r_true, $4, tmp1, $5)       /* *r = tmp1 */
169
170  ifelse(`$6', `FPU', `FPU_FIX_STOP;')
171')dnl
172dnl
173dnl
174define(`R_TRUTH2',
175  `R_TRUTH2_HEAD($1, $2, $3, $4)
176   R_TRUTH2_COMMENT($1, $2, $3, $4)
177   {
178dnl  static const char routine_name[] = "R_TRUTH2_NAME($1, $2, $3, $4)";
179     DOT_BODY($1_type, $2_type, $3_type,
180              SUM_TYPE_X($1_type, $2_type, $3_type, E),
181	      TMP_TYPE_X($1_type, E), FPU)
182   }')dnl
183dnl
184dnl
185define(`PROTOTYPES', `dnl
186R_TRUTH2_HEAD(s, s, s, _x);
187R_TRUTH2_HEAD(d, d, d, _x);
188R_TRUTH2_HEAD(c, c, c, _x);
189R_TRUTH2_HEAD(z, z, z, _x);
190')dnl
191dnl
192dnl
193define(`SOURCE', `dnl
194#include "blas_extended.h"
195#include "blas_extended_private.h"
196
197R_TRUTH2(s, s, s, _x)
198R_TRUTH2(d, d, d, _x)
199R_TRUTH2(c, c, c, _x)
200R_TRUTH2(z, z, z, _x)
201')dnl
202dnl
203dnl
204ifdef(`prototypes_only', `PROTOTYPES()', `SOURCE()')dnl
205dnl
206dnl
207