1 /*
2 * -- High Performance Computing Linpack Benchmark (HPL)
3 * HPL - 2.3 - December 2, 2018
4 * Antoine P. Petitet
5 * University of Tennessee, Knoxville
6 * Innovative Computing Laboratory
7 * (C) Copyright 2000-2008 All Rights Reserved
8 *
9 * -- Copyright notice and Licensing terms:
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 *
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 *
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions, and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * 3. All advertising materials mentioning features or use of this
23 * software must display the following acknowledgement:
24 * This product includes software developed at the University of
25 * Tennessee, Knoxville, Innovative Computing Laboratory.
26 *
27 * 4. The name of the University, the name of the Laboratory, or the
28 * names of its contributors may not be used to endorse or promote
29 * products derived from this software without specific written
30 * permission.
31 *
32 * -- Disclaimer:
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 * ---------------------------------------------------------------------
46 */
47 /*
48 * Include files
49 */
50 #include "hpl.h"
51
52 #ifndef HPL_dtrsv
53
54 #ifdef HPL_CALL_VSIPL
55
56 #ifdef STDC_HEADERS
HPL_dtrsvLNN(const int N,const double * A,const int LDA,double * X,const int INCX)57 static void HPL_dtrsvLNN
58 (
59 const int N,
60 const double * A,
61 const int LDA,
62 double * X,
63 const int INCX
64 )
65 #else
66 static void HPL_dtrsvLNN( N, A, LDA, X, INCX )
67 const int INCX, LDA, N;
68 const double * A;
69 double * X;
70 #endif
71 {
72 int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
73 register double t0;
74
75 for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX )
76 {
77 X[jx] /= A[jaj]; t0 = X[jx];
78 for( i = j+1, iaij = jaj+1, ix = jx + INCX;
79 i < N; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; }
80 }
81 }
82
83 #ifdef STDC_HEADERS
HPL_dtrsvLNU(const int N,const double * A,const int LDA,double * X,const int INCX)84 static void HPL_dtrsvLNU
85 (
86 const int N,
87 const double * A,
88 const int LDA,
89 double * X,
90 const int INCX
91 )
92 #else
93 static void HPL_dtrsvLNU( N, A, LDA, X, INCX )
94 const int INCX, LDA, N;
95 const double * A;
96 double * X;
97 #endif
98 {
99 int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
100 register double t0;
101
102 for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX )
103 {
104 t0 = X[jx];
105 for( i = j+1, iaij = jaj+1, ix = jx + INCX;
106 i < N; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; }
107 }
108 }
109
110 #ifdef STDC_HEADERS
HPL_dtrsvLTN(const int N,const double * A,const int LDA,double * X,const int INCX)111 static void HPL_dtrsvLTN
112 (
113 const int N,
114 const double * A,
115 const int LDA,
116 double * X,
117 const int INCX
118 )
119 #else
120 static void HPL_dtrsvLTN( N, A, LDA, X, INCX )
121 const int INCX, LDA, N;
122 const double * A;
123 double * X;
124 #endif
125 {
126 int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
127 register double t0;
128
129 for( j = N-1, jaj = (N-1)*(ldap1), jx = (N-1)*INCX;
130 j >= 0; j--, jaj -= ldap1, jx -= INCX )
131 {
132 t0 = X[jx];
133 for( i = j+1, iaij = 1+jaj, ix = jx + INCX;
134 i < N; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; }
135 t0 /= A[jaj]; X[jx] = t0;
136 }
137 }
138
139 #ifdef STDC_HEADERS
HPL_dtrsvLTU(const int N,const double * A,const int LDA,double * X,const int INCX)140 static void HPL_dtrsvLTU
141 (
142 const int N,
143 const double * A,
144 const int LDA,
145 double * X,
146 const int INCX
147 )
148 #else
149 static void HPL_dtrsvLTU( N, A, LDA, X, INCX )
150 const int INCX, LDA, N;
151 const double * A;
152 double * X;
153 #endif
154 {
155 int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
156 register double t0;
157
158 for( j = N-1, jaj = (N-1)*(ldap1), jx = (N-1)*INCX;
159 j >= 0; j--, jaj -= ldap1, jx -= INCX )
160 {
161 t0 = X[jx];
162 for( i = j+1, iaij = 1+jaj, ix = jx + INCX;
163 i < N; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; }
164 X[jx] = t0;
165 }
166 }
167
168
169 #ifdef STDC_HEADERS
HPL_dtrsvUNN(const int N,const double * A,const int LDA,double * X,const int INCX)170 static void HPL_dtrsvUNN
171 (
172 const int N,
173 const double * A,
174 const int LDA,
175 double * X,
176 const int INCX
177 )
178 #else
179 static void HPL_dtrsvUNN( N, A, LDA, X, INCX )
180 const int INCX, LDA, N;
181 const double * A;
182 double * X;
183 #endif
184 {
185 int i, iaij, ix, j, jaj, jx;
186 register double t0;
187
188 for( j = N-1, jaj = (N-1)*LDA, jx = (N-1)*INCX;
189 j >= 0; j--, jaj -= LDA, jx -= INCX )
190 {
191 X[jx] /= A[j+jaj]; t0 = X[jx];
192 for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
193 { X[ix] -= t0 * A[iaij]; }
194 }
195 }
196
197
198 #ifdef STDC_HEADERS
HPL_dtrsvUNU(const int N,const double * A,const int LDA,double * X,const int INCX)199 static void HPL_dtrsvUNU
200 (
201 const int N,
202 const double * A,
203 const int LDA,
204 double * X,
205 const int INCX
206 )
207 #else
208 static void HPL_dtrsvUNU( N, A, LDA, X, INCX )
209 const int INCX, LDA, N;
210 const double * A;
211 double * X;
212 #endif
213 {
214 int i, iaij, ix, j, jaj, jx;
215 register double t0;
216
217 for( j = N-1, jaj = (N-1)*LDA, jx = (N-1)*INCX;
218 j >= 0; j--, jaj -= LDA, jx -= INCX )
219 {
220 t0 = X[jx];
221 for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
222 { X[ix] -= t0 * A[iaij]; }
223 }
224 }
225
226
227 #ifdef STDC_HEADERS
HPL_dtrsvUTN(const int N,const double * A,const int LDA,double * X,const int INCX)228 static void HPL_dtrsvUTN
229 (
230 const int N,
231 const double * A,
232 const int LDA,
233 double * X,
234 const int INCX
235 )
236 #else
237 static void HPL_dtrsvUTN( N, A, LDA, X, INCX )
238 const int INCX, LDA, N;
239 const double * A;
240 double * X;
241 #endif
242 {
243 int i, iaij, ix, j, jaj, jx;
244 register double t0;
245
246 for( j = 0, jaj = 0,jx = 0; j < N; j++, jaj += LDA, jx += INCX )
247 {
248 t0 = X[jx];
249 for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
250 { t0 -= A[iaij] * X[ix]; }
251 t0 /= A[iaij]; X[jx] = t0;
252 }
253 }
254
255 #ifdef STDC_HEADERS
HPL_dtrsvUTU(const int N,const double * A,const int LDA,double * X,const int INCX)256 static void HPL_dtrsvUTU
257 (
258 const int N,
259 const double * A,
260 const int LDA,
261 double * X,
262 const int INCX
263 )
264 #else
265 static void HPL_dtrsvUTU( N, A, LDA, X, INCX )
266 const int INCX, LDA, N;
267 const double * A;
268 double * X;
269 #endif
270 {
271 int i, iaij, ix, j, jaj, jx;
272 register double t0;
273
274 for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX )
275 {
276 t0 = X[jx];
277 for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
278 { t0 -= A[iaij] * X[ix]; }
279 X[jx] = t0;
280 }
281 }
282
283 #ifdef STDC_HEADERS
HPL_dtrsv0(const enum HPL_UPLO UPLO,const enum HPL_TRANS TRANS,const enum HPL_DIAG DIAG,const int N,const double * A,const int LDA,double * X,const int INCX)284 static void HPL_dtrsv0
285 (
286 const enum HPL_UPLO UPLO,
287 const enum HPL_TRANS TRANS,
288 const enum HPL_DIAG DIAG,
289 const int N,
290 const double * A,
291 const int LDA,
292 double * X,
293 const int INCX
294 )
295 #else
296 static void HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
297 const enum HPL_UPLO UPLO;
298 const enum HPL_TRANS TRANS;
299 const enum HPL_DIAG DIAG;
300 const int INCX, LDA, N;
301 const double * A;
302 double * X;
303 #endif
304 {
305 if( N == 0 ) return;
306
307 if( UPLO == HplUpper )
308 {
309 if( TRANS == HplNoTrans )
310 {
311 if( DIAG == HplNonUnit ) { HPL_dtrsvUNN( N, A, LDA, X, INCX ); }
312 else { HPL_dtrsvUNU( N, A, LDA, X, INCX ); }
313 }
314 else
315 {
316 if( DIAG == HplNonUnit ) { HPL_dtrsvUTN( N, A, LDA, X, INCX ); }
317 else { HPL_dtrsvUTU( N, A, LDA, X, INCX ); }
318 }
319 }
320 else
321 {
322 if( TRANS == HplNoTrans )
323 {
324 if( DIAG == HplNonUnit ) { HPL_dtrsvLNN( N, A, LDA, X, INCX ); }
325 else { HPL_dtrsvLNU( N, A, LDA, X, INCX ); }
326 }
327 else
328 {
329 if( DIAG == HplNonUnit ) { HPL_dtrsvLTN( N, A, LDA, X, INCX ); }
330 else { HPL_dtrsvLTU( N, A, LDA, X, INCX ); }
331 }
332 }
333 }
334
335 #endif
336
337 #ifdef STDC_HEADERS
HPL_dtrsv(const enum HPL_ORDER ORDER,const enum HPL_UPLO UPLO,const enum HPL_TRANS TRANS,const enum HPL_DIAG DIAG,const int N,const double * A,const int LDA,double * X,const int INCX)338 void HPL_dtrsv
339 (
340 const enum HPL_ORDER ORDER,
341 const enum HPL_UPLO UPLO,
342 const enum HPL_TRANS TRANS,
343 const enum HPL_DIAG DIAG,
344 const int N,
345 const double * A,
346 const int LDA,
347 double * X,
348 const int INCX
349 )
350 #else
351 void HPL_dtrsv
352 ( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
353 const enum HPL_ORDER ORDER;
354 const enum HPL_UPLO UPLO;
355 const enum HPL_TRANS TRANS;
356 const enum HPL_DIAG DIAG;
357 const int N;
358 const double * A;
359 const int LDA;
360 double * X;
361 const int INCX;
362 #endif
363 {
364 /*
365 * Purpose
366 * =======
367 *
368 * HPL_dtrsv solves one of the systems of equations
369 *
370 * A * x = b, or A^T * x = b,
371 *
372 * where b and x are n-element vectors and A is an n by n non-unit, or
373 * unit, upper or lower triangular matrix.
374 *
375 * No test for singularity or near-singularity is included in this
376 * routine. Such tests must be performed before calling this routine.
377 *
378 * Arguments
379 * =========
380 *
381 * ORDER (local input) const enum HPL_ORDER
382 * On entry, ORDER specifies the storage format of the operands
383 * as follows:
384 * ORDER = HplRowMajor,
385 * ORDER = HplColumnMajor.
386 *
387 * UPLO (local input) const enum HPL_UPLO
388 * On entry, UPLO specifies whether the upper or lower
389 * triangular part of the array A is to be referenced. When
390 * UPLO==HplUpper, only the upper triangular part of A is to be
391 * referenced, otherwise only the lower triangular part of A is
392 * to be referenced.
393 *
394 * TRANS (local input) const enum HPL_TRANS
395 * On entry, TRANS specifies the equations to be solved as
396 * follows:
397 * TRANS==HplNoTrans A * x = b,
398 * TRANS==HplTrans A^T * x = b.
399 *
400 * DIAG (local input) const enum HPL_DIAG
401 * On entry, DIAG specifies whether A is unit triangular or
402 * not. When DIAG==HplUnit, A is assumed to be unit triangular,
403 * and otherwise, A is not assumed to be unit triangular.
404 *
405 * N (local input) const int
406 * On entry, N specifies the order of the matrix A. N must be at
407 * least zero.
408 *
409 * A (local input) const double *
410 * On entry, A points to an array of size equal to or greater
411 * than LDA * n. Before entry with UPLO==HplUpper, the leading
412 * n by n upper triangular part of the array A must contain the
413 * upper triangular matrix and the strictly lower triangular
414 * part of A is not referenced. When UPLO==HplLower on entry,
415 * the leading n by n lower triangular part of the array A must
416 * contain the lower triangular matrix and the strictly upper
417 * triangular part of A is not referenced.
418 *
419 * Note that when DIAG==HplUnit, the diagonal elements of A
420 * not referenced either, but are assumed to be unity.
421 *
422 * LDA (local input) const int
423 * On entry, LDA specifies the leading dimension of A as
424 * declared in the calling (sub) program. LDA must be at
425 * least MAX(1,n).
426 *
427 * X (local input/output) double *
428 * On entry, X is an incremented array of dimension at least
429 * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x.
430 * Before entry, the incremented array X must contain the n
431 * element right-hand side vector b. On exit, X is overwritten
432 * with the solution vector x.
433 *
434 * INCX (local input) const int
435 * On entry, INCX specifies the increment for the elements of X.
436 * INCX must not be zero.
437 *
438 * ---------------------------------------------------------------------
439 */
440 #ifdef HPL_CALL_CBLAS
441 cblas_dtrsv( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX );
442 #endif
443 #ifdef HPL_CALL_VSIPL
444 if( ORDER == HplColumnMajor )
445 {
446 HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX );
447 }
448 else
449 {
450 HPL_dtrsv0( ( UPLO == HplUpper ? HplLower : HplUpper ),
451 ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ),
452 DIAG, N, A, LDA, X, INCX );
453 }
454 #endif
455 #ifdef HPL_CALL_FBLAS
456 #ifdef StringSunStyle
457 #ifdef HPL_USE_F77_INTEGER_DEF
458 F77_INTEGER IONE = 1;
459 #else
460 int IONE = 1;
461 #endif
462 #endif
463 #ifdef StringStructVal
464 F77_CHAR fuplo, ftran, fdiag;
465 #endif
466 #ifdef StringStructPtr
467 F77_CHAR fuplo, ftran, fdiag;
468 #endif
469 #ifdef StringCrayStyle
470 F77_CHAR fuplo, ftran, fdiag;
471 #endif
472
473 #ifdef HPL_USE_F77_INTEGER_DEF
474 const F77_INTEGER F77N = N, F77lda = LDA, F77incx = INCX;
475 #else
476 #define F77N N
477 #define F77lda LDA
478 #define F77incx INCX
479 #endif
480 char cuplo, ctran, cdiag;
481
482 if( ORDER == HplColumnMajor )
483 {
484 cuplo = ( UPLO == HplUpper ? 'U' : 'L' );
485 ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
486 }
487 else
488 {
489 cuplo = ( UPLO == HplUpper ? 'L' : 'U' );
490 ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
491 }
492 cdiag = ( DIAG == HplNonUnit ? 'N' : 'U' );
493
494 #ifdef StringSunStyle
495 F77dtrsv( &cuplo, &ctran, &cdiag, &F77N, A, &F77lda, X, &F77incx,
496 IONE, IONE, IONE );
497 #endif
498 #ifdef StringCrayStyle
499 ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
500 fuplo = HPL_C2F_CHAR( cuplo );
501 F77dtrsv( fuplo, ftran, fdiag, &F77N, A, &F77lda, X, &F77incx );
502 #endif
503 #ifdef StringStructVal
504 fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran;
505 fdiag.len = 1; fdiag.cp = &cdiag;
506 F77dtrsv( fuplo, ftran, fdiag, &F77N, A, &F77lda, X, &F77incx );
507 #endif
508 #ifdef StringStructPtr
509 fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran;
510 fdiag.len = 1; fdiag.cp = &cdiag;
511 F77dtrsv( &fuplo, &ftran, &fdiag, &F77N, A, &F77lda, X, &F77incx );
512 #endif
513
514 #endif
515 /*
516 * End of HPL_dtrsv
517 */
518 }
519
520 #endif
521