1 /*
2  * -- High Performance Computing Linpack Benchmark (HPL)
3  *    HPL - 2.3 - December 2, 2018
4  *    Antoine P. Petitet
5  *    University of Tennessee, Knoxville
6  *    Innovative Computing Laboratory
7  *    (C) Copyright 2000-2008 All Rights Reserved
8  *
9  * -- Copyright notice and Licensing terms:
10  *
11  * Redistribution  and  use in  source and binary forms, with or without
12  * modification, are  permitted provided  that the following  conditions
13  * are met:
14  *
15  * 1. Redistributions  of  source  code  must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  *
18  * 2. Redistributions in binary form must reproduce  the above copyright
19  * notice, this list of conditions,  and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution.
21  *
22  * 3. All  advertising  materials  mentioning  features  or  use of this
23  * software must display the following acknowledgement:
24  * This  product  includes  software  developed  at  the  University  of
25  * Tennessee, Knoxville, Innovative Computing Laboratory.
26  *
27  * 4. The name of the  University,  the name of the  Laboratory,  or the
28  * names  of  its  contributors  may  not  be used to endorse or promote
29  * products  derived   from   this  software  without  specific  written
30  * permission.
31  *
32  * -- Disclaimer:
33  *
34  * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38  * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39  * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  * ---------------------------------------------------------------------
46  */
47 /*
48  * Include files
49  */
50 #include "hpl.h"
51 
52 #ifndef HPL_dtrsm
53 
54 #ifdef HPL_CALL_VSIPL
55 
56 #ifdef STDC_HEADERS
HPL_dtrsmLLNN(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)57 static void HPL_dtrsmLLNN
58 (
59    const int                  M,
60    const int                  N,
61    const double               ALPHA,
62    const double               * A,
63    const int                  LDA,
64    double                     * B,
65    const int                  LDB
66 )
67 #else
68 static void HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB )
69    const int                  LDA, LDB, M, N;
70    const double               ALPHA;
71    const double               * A;
72    double                     * B;
73 #endif
74 {
75    int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
76 
77    for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
78    {
79       for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
80       for( k = 0, jak  = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 )
81       {
82          B[ibkj] /= A[k+jak];
83          for( i = k+1,    iaik  = k+1+jak, ibij  = k+1+jbj;
84               i < M; i++, iaik +=1,        ibij += 1 )
85          { B[ibij] -= B[ibkj] * A[iaik]; }
86       }
87    }
88 }
89 
90 #ifdef STDC_HEADERS
HPL_dtrsmLLNU(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)91 static void HPL_dtrsmLLNU
92 (
93    const int                  M,
94    const int                  N,
95    const double               ALPHA,
96    const double               * A,
97    const int                  LDA,
98    double                     * B,
99    const int                  LDB
100 )
101 #else
102 static void HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB )
103    const int                  LDA, LDB, M, N;
104    const double               ALPHA;
105    const double               * A;
106    double                     * B;
107 #endif
108 {
109    int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
110 
111    for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
112    {
113       for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
114       for( k = 0, jak  = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 )
115       {
116          for( i = k+1,    iaik  = k+1+jak, ibij  = k+1+jbj;
117               i < M; i++, iaik +=1,        ibij += 1 )
118          { B[ibij] -= B[ibkj] * A[iaik]; }
119       }
120    }
121 }
122 
123 #ifdef STDC_HEADERS
HPL_dtrsmLLTN(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)124 static void HPL_dtrsmLLTN
125 (
126    const int                  M,
127    const int                  N,
128    const double               ALPHA,
129    const double               * A,
130    const int                  LDA,
131    double                     * B,
132    const int                  LDB
133 )
134 #else
135 static void HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB )
136    const int                  LDA, LDB, M, N;
137    const double               ALPHA;
138    const double               * A;
139    double                     * B;
140 #endif
141 {
142    register double            t0;
143    int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
144 
145    for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
146    {
147       for( i = M-1,     jai  = (M-1)*LDA, ibij  = M-1+jbj;
148            i >= 0; i--, jai -= LDA,       ibij -= 1 )
149       {
150          t0 = ALPHA * B[ibij];
151          for( k = i+1,    iaki  = i+1+jai, ibkj  = i+1+jbj;
152               k < M; k++, iaki += 1,       ibkj += 1 )
153          { t0 -= A[iaki] * B[ibkj]; }
154          t0 /= A[i+jai];
155          B[ibij] = t0;
156       }
157    }
158 }
159 
160 #ifdef STDC_HEADERS
HPL_dtrsmLLTU(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)161 static void HPL_dtrsmLLTU
162 (
163    const int                  M,
164    const int                  N,
165    const double               ALPHA,
166    const double               * A,
167    const int                  LDA,
168    double                     * B,
169    const int                  LDB
170 )
171 #else
172 static void HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB )
173    const int                  LDA, LDB, M, N;
174    const double               ALPHA;
175    const double               * A;
176    double                     * B;
177 #endif
178 {
179    register double            t0;
180    int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
181 
182    for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
183    {
184       for( i = M-1,     jai  = (M-1)*LDA, ibij  = M-1+jbj;
185            i >= 0; i--, jai -= LDA,       ibij -= 1 )
186       {
187          t0 = ALPHA * B[ibij];
188          for( k = i+1,    iaki  = i+1+jai, ibkj  = i+1+jbj;
189               k < M; k++, iaki += 1,       ibkj += 1 )
190          { t0 -= A[iaki] * B[ibkj]; }
191          B[ibij] = t0;
192       }
193    }
194 }
195 
196 #ifdef STDC_HEADERS
HPL_dtrsmLUNN(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)197 static void HPL_dtrsmLUNN
198 (
199    const int                  M,
200    const int                  N,
201    const double               ALPHA,
202    const double               * A,
203    const int                  LDA,
204    double                     * B,
205    const int                  LDB
206 )
207 #else
208 static void HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB )
209    const int                  LDA, LDB, M, N;
210    const double               ALPHA;
211    const double               * A;
212    double                     * B;
213 #endif
214 {
215    int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
216 
217    for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
218    {
219       for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
220       for( k = M-1,     jak  = (M-1)*LDA, ibkj  = M-1+jbj;
221            k >= 0; k--, jak -= LDA,       ibkj -= 1 )
222       {
223          B[ibkj] /= A[k+jak];
224          for( i = 0,      iaik  = jak, ibij  = jbj;
225               i < k; i++, iaik += 1,   ibij += 1 )
226          { B[ibij] -= B[ibkj] * A[iaik]; }
227       }
228    }
229 }
230 
231 
232 #ifdef STDC_HEADERS
HPL_dtrsmLUNU(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)233 static void HPL_dtrsmLUNU
234 (
235    const int                  M,
236    const int                  N,
237    const double               ALPHA,
238    const double               * A,
239    const int                  LDA,
240    double                     * B,
241    const int                  LDB
242 )
243 #else
244 static void HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB )
245    const int                  LDA, LDB, M, N;
246    const double               ALPHA;
247    const double               * A;
248    double                     * B;
249 #endif
250 {
251    int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
252 
253    for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
254    {
255       for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
256       for( k = M-1,     jak  = (M-1)*LDA, ibkj  = M-1+jbj;
257            k >= 0; k--, jak -= LDA,       ibkj -= 1 )
258       {
259          for( i = 0,      iaik  = jak, ibij  = jbj;
260               i < k; i++, iaik += 1,   ibij += 1 )
261          { B[ibij] -= B[ibkj] * A[iaik]; }
262       }
263    }
264 }
265 
266 
267 #ifdef STDC_HEADERS
HPL_dtrsmLUTN(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)268 static void HPL_dtrsmLUTN
269 (
270    const int                  M,
271    const int                  N,
272    const double               ALPHA,
273    const double               * A,
274    const int                  LDA,
275    double                     * B,
276    const int                  LDB
277 )
278 #else
279 static void HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB )
280    const int                  LDA, LDB, M, N;
281    const double               ALPHA;
282    const double               * A;
283    double                     * B;
284 #endif
285 {
286    int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
287    register double            t0;
288 
289    for( j = 0, jbj  = 0; j < N; j++, jbj += LDB )
290    {
291       for( i = 0, jai  = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 )
292       {
293          t0 = ALPHA * B[ibij];
294          for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 )
295          { t0 -= A[iaki] * B[ibkj]; }
296          t0 /= A[i+jai];
297          B[ibij] = t0;
298       }
299    }
300 }
301 
302 
303 #ifdef STDC_HEADERS
HPL_dtrsmLUTU(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)304 static void HPL_dtrsmLUTU
305 (
306    const int                  M,
307    const int                  N,
308    const double               ALPHA,
309    const double               * A,
310    const int                  LDA,
311    double                     * B,
312    const int                  LDB
313 )
314 #else
315 static void HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB )
316    const int                  LDA, LDB, M, N;
317    const double               ALPHA;
318    const double               * A;
319    double                     * B;
320 #endif
321 {
322    register double            t0;
323    int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
324 
325    for( j = 0, jbj  = 0; j < N; j++, jbj += LDB )
326    {
327       for( i = 0, jai  = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 )
328       {
329          t0 = ALPHA * B[ibij];
330          for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 )
331          { t0 -= A[iaki] * B[ibkj]; }
332          B[ibij] = t0;
333       }
334    }
335 }
336 
337 
338 #ifdef STDC_HEADERS
HPL_dtrsmRLNN(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)339 static void HPL_dtrsmRLNN
340 (
341    const int                  M,
342    const int                  N,
343    const double               ALPHA,
344    const double               * A,
345    const int                  LDA,
346    double                     * B,
347    const int                  LDB
348 )
349 #else
350 static void HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB )
351    const int                  LDA, LDB, M, N;
352    const double               ALPHA;
353    const double               * A;
354    double                     * B;
355 #endif
356 {
357    int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
358 
359    for( j = N-1,      jaj  = (N-1)*LDA, jbj  = (N-1)*LDB;
360         j >= 0;  j--, jaj -= LDA,       jbj -= LDB )
361    {
362       for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
363       for( k = j+1,    iakj  = j+1+jaj, jbk  = (j+1)*LDB;
364            k < N; k++, iakj += 1,       jbk += LDB )
365       {
366          for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
367          { B[ibij] -= A[iakj] * B[ibik]; }
368       }
369       for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; }
370    }
371 }
372 
373 
374 #ifdef STDC_HEADERS
HPL_dtrsmRLNU(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)375 static void HPL_dtrsmRLNU
376 (
377    const int                  M,
378    const int                  N,
379    const double               ALPHA,
380    const double               * A,
381    const int                  LDA,
382    double                     * B,
383    const int                  LDB
384 )
385 #else
386 static void HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB )
387    const int                  LDA, LDB, M, N;
388    const double               ALPHA;
389    const double               * A;
390    double                     * B;
391 #endif
392 {
393    int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
394 
395    for( j = N-1,      jaj  = (N-1)*LDA, jbj  = (N-1)*LDB;
396         j >= 0;  j--, jaj -= LDA,       jbj -= LDB )
397    {
398       for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
399       for( k = j+1,    iakj  = j+1+jaj, jbk  = (j+1)*LDB;
400            k < N; k++, iakj += 1,       jbk += LDB )
401       {
402          for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
403          { B[ibij] -= A[iakj] * B[ibik]; }
404       }
405    }
406 }
407 
408 
409 #ifdef STDC_HEADERS
HPL_dtrsmRLTN(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)410 static void HPL_dtrsmRLTN
411 (
412    const int                  M,
413    const int                  N,
414    const double               ALPHA,
415    const double               * A,
416    const int                  LDA,
417    double                     * B,
418    const int                  LDB
419 )
420 #else
421 static void HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB )
422    const int                  LDA, LDB, M, N;
423    const double               ALPHA;
424    const double               * A;
425    double                     * B;
426 #endif
427 {
428    register double            t0;
429    int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
430 
431    for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB )
432    {
433       for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; }
434       for( j = k+1,    iajk  = (k+1)+jak, jbj  = (k+1)*LDB;
435            j < N; j++, iajk += 1,         jbj += LDB )
436       {
437          t0 = A[iajk];
438          for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
439          { B[ibij] -= t0 * B[ibik]; }
440       }
441       for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
442    }
443 }
444 
445 
446 #ifdef STDC_HEADERS
HPL_dtrsmRLTU(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)447 static void HPL_dtrsmRLTU
448 (
449    const int                  M,
450    const int                  N,
451    const double               ALPHA,
452    const double               * A,
453    const int                  LDA,
454    double                     * B,
455    const int                  LDB
456 )
457 #else
458 static void HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB )
459    const int                  LDA, LDB, M, N;
460    const double               ALPHA;
461    const double               * A;
462    double                     * B;
463 #endif
464 {
465    register double            t0;
466    int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
467 
468    for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB )
469    {
470       for( j = k+1,    iajk  = (k+1)+jak, jbj  = (k+1)*LDB;
471            j < N; j++, iajk += 1,         jbj += LDB )
472       {
473          t0 = A[iajk];
474          for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
475          { B[ibij] -= t0 * B[ibik]; }
476       }
477       for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
478    }
479 }
480 
481 
482 #ifdef STDC_HEADERS
HPL_dtrsmRUNN(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)483 static void HPL_dtrsmRUNN
484 (
485    const int                  M,
486    const int                  N,
487    const double               ALPHA,
488    const double               * A,
489    const int                  LDA,
490    double                     * B,
491    const int                  LDB
492 )
493 #else
494 static void HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB )
495    const int                  LDA, LDB, M, N;
496    const double               ALPHA;
497    const double               * A;
498    double                     * B;
499 #endif
500 {
501    int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
502 
503    for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB )
504    {
505       for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
506       for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB )
507       {
508          for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
509          { B[ibij] -= A[iakj] * B[ibik]; }
510       }
511       for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; }
512    }
513 }
514 
515 
516 #ifdef STDC_HEADERS
HPL_dtrsmRUNU(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)517 static void HPL_dtrsmRUNU
518 (
519    const int                  M,
520    const int                  N,
521    const double               ALPHA,
522    const double               * A,
523    const int                  LDA,
524    double                     * B,
525    const int                  LDB
526 )
527 #else
528 static void HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB )
529    const int                  LDA, LDB, M, N;
530    const double               ALPHA;
531    const double               * A;
532    double                     * B;
533 #endif
534 {
535    int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
536 
537    for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB )
538    {
539       for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
540       for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB )
541       {
542          for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
543          { B[ibij] -= A[iakj] * B[ibik]; }
544       }
545    }
546 }
547 
548 
549 #ifdef STDC_HEADERS
HPL_dtrsmRUTN(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)550 static void HPL_dtrsmRUTN
551 (
552    const int                  M,
553    const int                  N,
554    const double               ALPHA,
555    const double               * A,
556    const int                  LDA,
557    double                     * B,
558    const int                  LDB
559 )
560 #else
561 static void HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB )
562    const int                  LDA, LDB, M, N;
563    const double               ALPHA;
564    const double               * A;
565    double                     * B;
566 #endif
567 {
568    register double            t0;
569    int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
570 
571    for( k = N-1,     jak  = (N-1)*LDA, jbk  = (N-1)*LDB;
572         k >= 0; k--, jak -= LDA,       jbk -= LDB )
573    {
574       for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; }
575       for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB )
576       {
577          t0 = A[iajk];
578          for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
579          { B[ibij] -= t0 * B[ibik]; }
580       }
581       for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
582    }
583 }
584 
585 #ifdef STDC_HEADERS
HPL_dtrsmRUTU(const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)586 static void HPL_dtrsmRUTU
587 (
588    const int                  M,
589    const int                  N,
590    const double               ALPHA,
591    const double               * A,
592    const int                  LDA,
593    double                     * B,
594    const int                  LDB
595 )
596 #else
597 static void HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB )
598    const int                  LDA, LDB, M, N;
599    const double               ALPHA;
600    const double               * A;
601    double                     * B;
602 #endif
603 {
604    register double            t0;
605    int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
606 
607    for( k = N-1,     jak  = (N-1)*LDA, jbk  = (N-1)*LDB;
608         k >= 0; k--, jak -= LDA,       jbk -= LDB )
609    {
610       for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB )
611       {
612          t0 = A[iajk];
613          for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
614          { B[ibij] -= t0 * B[ibik]; }
615       }
616       for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
617    }
618 }
619 
620 #ifdef STDC_HEADERS
HPL_dtrsm0(const enum HPL_SIDE SIDE,const enum HPL_UPLO UPLO,const enum HPL_TRANS TRANS,const enum HPL_DIAG DIAG,const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)621 static void HPL_dtrsm0
622 (
623    const enum HPL_SIDE        SIDE,
624    const enum HPL_UPLO        UPLO,
625    const enum HPL_TRANS       TRANS,
626    const enum HPL_DIAG        DIAG,
627    const int                  M,
628    const int                  N,
629    const double               ALPHA,
630    const double               * A,
631    const int                  LDA,
632    double                     * B,
633    const int                  LDB
634 )
635 #else
636 static void HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB )
637    const enum HPL_SIDE        SIDE;
638    const enum HPL_UPLO        UPLO;
639    const enum HPL_TRANS       TRANS;
640    const enum HPL_DIAG        DIAG;
641    const int                  LDA, LDB, M, N;
642    const double               ALPHA;
643    const double               * A;
644    double                     * B;
645 #endif
646 {
647    int                        i, j;
648 
649    if( ( M == 0 ) || ( N == 0 ) ) return;
650 
651    if( ALPHA == HPL_rzero )
652    {
653       for( j = 0; j < N; j++ )
654       {  for( i = 0; i < M; i++ ) *(B+i+j*LDB) = HPL_rzero; }
655       return;
656    }
657 
658    if( SIDE == HplLeft )
659    {
660       if( UPLO == HplUpper )
661       {
662          if( TRANS == HplNoTrans )
663          {
664             if( DIAG == HplNonUnit )
665             {      HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ); }
666             else { HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ); }
667          }
668          else
669          {
670             if( DIAG == HplNonUnit )
671             {      HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ); }
672             else { HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ); }
673          }
674       }
675       else
676       {
677          if( TRANS == HplNoTrans )
678          {
679             if( DIAG == HplNonUnit )
680             {      HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ); }
681             else { HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ); }
682          }
683          else
684          {
685             if( DIAG == HplNonUnit )
686             {      HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ); }
687             else { HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ); }
688          }
689       }
690    }
691    else
692    {
693       if( UPLO == HplUpper )
694       {
695          if( TRANS == HplNoTrans )
696          {
697             if( DIAG == HplNonUnit )
698             {      HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ); }
699             else { HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ); }
700          }
701          else
702          {
703             if( DIAG == HplNonUnit )
704             {      HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ); }
705             else { HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ); }
706          }
707       }
708       else
709       {
710          if( TRANS == HplNoTrans )
711          {
712             if( DIAG == HplNonUnit )
713             {      HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ); }
714             else { HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ); }
715          }
716          else
717          {
718             if( DIAG == HplNonUnit )
719             {      HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ); }
720             else { HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ); }
721          }
722       }
723    }
724 }
725 
726 #endif
727 
728 #ifdef STDC_HEADERS
HPL_dtrsm(const enum HPL_ORDER ORDER,const enum HPL_SIDE SIDE,const enum HPL_UPLO UPLO,const enum HPL_TRANS TRANS,const enum HPL_DIAG DIAG,const int M,const int N,const double ALPHA,const double * A,const int LDA,double * B,const int LDB)729 void HPL_dtrsm
730 (
731    const enum HPL_ORDER             ORDER,
732    const enum HPL_SIDE              SIDE,
733    const enum HPL_UPLO              UPLO,
734    const enum HPL_TRANS             TRANS,
735    const enum HPL_DIAG              DIAG,
736    const int                        M,
737    const int                        N,
738    const double                     ALPHA,
739    const double *                   A,
740    const int                        LDA,
741    double *                         B,
742    const int                        LDB
743 )
744 #else
745 void HPL_dtrsm
746 ( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB )
747    const enum HPL_ORDER             ORDER;
748    const enum HPL_SIDE              SIDE;
749    const enum HPL_UPLO              UPLO;
750    const enum HPL_TRANS             TRANS;
751    const enum HPL_DIAG              DIAG;
752    const int                        M;
753    const int                        N;
754    const double                     ALPHA;
755    const double *                   A;
756    const int                        LDA;
757    double *                         B;
758    const int                        LDB;
759 #endif
760 {
761 /*
762  * Purpose
763  * =======
764  *
765  * HPL_dtrsm solves one of the matrix equations
766  *
767  *    op( A ) * X = alpha * B,   or  X * op( A ) = alpha * B,
768  *
769  * where alpha is a scalar, X and B are m by n matrices, A is a unit, or
770  * non-unit, upper or lower triangular matrix and op(A) is one of
771  *
772  *    op( A ) = A   or   op( A ) = A^T.
773  *
774  * The matrix X is overwritten on B.
775  *
776  * No test for  singularity  or  near-singularity  is included  in  this
777  * routine. Such tests must be performed before calling this routine.
778  *
779  * Arguments
780  * =========
781  *
782  * ORDER   (local input)                 const enum HPL_ORDER
783  *         On entry, ORDER  specifies the storage format of the operands
784  *         as follows:
785  *            ORDER = HplRowMajor,
786  *            ORDER = HplColumnMajor.
787  *
788  * SIDE    (local input)                 const enum HPL_SIDE
789  *         On entry, SIDE  specifies  whether  op(A) appears on the left
790  *         or right of X as follows:
791  *            SIDE==HplLeft    op( A ) * X = alpha * B,
792  *            SIDE==HplRight   X * op( A ) = alpha * B.
793  *
794  * UPLO    (local input)                 const enum HPL_UPLO
795  *         On  entry,   UPLO   specifies  whether  the  upper  or  lower
796  *         triangular  part  of the array  A  is to be referenced.  When
797  *         UPLO==HplUpper, only  the upper triangular part of A is to be
798  *         referenced, otherwise only the lower triangular part of A is
799  *         to be referenced.
800  *
801  * TRANS   (local input)                 const enum HPL_TRANS
802  *         On entry, TRANSA  specifies the form of  op(A)  to be used in
803  *         the matrix-matrix operation follows:
804  *            TRANSA==HplNoTrans    : op( A ) = A,
805  *            TRANSA==HplTrans      : op( A ) = A^T,
806  *            TRANSA==HplConjTrans  : op( A ) = A^T.
807  *
808  * DIAG    (local input)                 const enum HPL_DIAG
809  *         On entry,  DIAG  specifies  whether  A  is unit triangular or
810  *         not. When DIAG==HplUnit,  A is assumed to be unit triangular,
811  *         and otherwise, A is not assumed to be unit triangular.
812  *
813  * M       (local input)                 const int
814  *         On entry,  M  specifies  the number of rows of the  matrix B.
815  *         M must be at least zero.
816  *
817  * N       (local input)                 const int
818  *         On entry, N  specifies the number of columns of the matrix B.
819  *         N must be at least zero.
820  *
821  * ALPHA   (local input)                 const double
822  *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
823  *         supplied  as  zero then the elements of the matrix B need not
824  *         be set on input.
825  *
826  * A       (local input)                 const double *
827  *         On entry,  A  points  to an array of size equal to or greater
828  *         than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
829  *         otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
830  *         k by k upper triangular  part of the array A must contain the
831  *         upper triangular  matrix and the  strictly  lower  triangular
832  *         part of A is not referenced.  When  UPLO==HplLower on  entry,
833  *         the  leading k by k lower triangular part of the array A must
834  *         contain the lower triangular matrix  and  the  strictly upper
835  *         triangular part of A is not referenced.
836  *
837  *         Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
838  *         not referenced  either,  but are assumed to be unity.
839  *
840  * LDA     (local input)                 const int
841  *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
842  *         declared  in  the  calling  (sub) program.  LDA  must  be  at
843  *         least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
844  *
845  * B       (local input/output)          double *
846  *         On entry,  B  points  to an array of size equal to or greater
847  *         than LDB * n.  Before entry, the leading  m by n  part of the
848  *         array B must contain the matrix  B, except when beta is zero,
849  *         in which case B need not be set on entry.  On exit, the array
850  *         B is overwritten by the m by n solution matrix.
851  *
852  * LDB     (local input)                 const int
853  *         On entry,  LDB  specifies  the  leading  dimension  of  B  as
854  *         declared  in  the  calling  (sub) program.  LDB  must  be  at
855  *         least MAX(1,m).
856  *
857  * ---------------------------------------------------------------------
858  */
859 #ifdef HPL_CALL_CBLAS
860    cblas_dtrsm( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB );
861 #endif
862 #ifdef HPL_CALL_VSIPL
863    if( ORDER == HplColumnMajor )
864    {
865       HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB );
866    }
867    else
868    {
869       HPL_dtrsm0( ( SIDE == HplRight ? HplLeft  : HplRight ),
870                   ( UPLO == HplLower ? HplUpper : HplLower ),
871                   TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB );
872    }
873 #endif
874 #ifdef HPL_CALL_FBLAS
875    double                    alpha = ALPHA;
876 #ifdef StringSunStyle
877 #if defined( HPL_USE_F77_INTEGER_DEF )
878    F77_INTEGER               IONE = 1;
879 #else
880    int                       IONE = 1;
881 #endif
882 #endif
883 #ifdef StringStructVal
884    F77_CHAR                  fside;
885    F77_CHAR                  fuplo;
886    F77_CHAR                  ftran;
887    F77_CHAR                  fdiag;
888 #endif
889 #ifdef StringStructPtr
890    F77_CHAR                  fside;
891    F77_CHAR                  fuplo;
892    F77_CHAR                  ftran;
893    F77_CHAR                  fdiag;
894 #endif
895 #ifdef StringCrayStyle
896    F77_CHAR                  fside;
897    F77_CHAR                  fuplo;
898    F77_CHAR                  ftran;
899    F77_CHAR                  fdiag;
900 #endif
901 #ifdef HPL_USE_F77_INTEGER_DEF
902    const F77_INTEGER         F77M   = M,   F77N   = N,
903                              F77lda = LDA, F77ldb = LDB;
904 #else
905 #define  F77M                M
906 #define  F77N                N
907 #define  F77lda              LDA
908 #define  F77ldb              LDB
909 #endif
910    char                      cside, cuplo, ctran, cdiag;
911 
912    if(      TRANS == HplNoTrans ) ctran = 'N';
913    else if( TRANS == HplTrans   ) ctran = 'T';
914    else                           ctran = 'C';
915    cdiag = ( DIAG == HplUnit  ? 'U' : 'N' );
916 
917    if( ORDER == HplColumnMajor )
918    {
919       cside = ( SIDE == HplRight ? 'R' : 'L' );
920       cuplo = ( UPLO == HplLower ? 'L' : 'U' );
921 #ifdef StringSunStyle
922       F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77M, &F77N, &alpha,
923                 A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE );
924 #endif
925 #ifdef StringCrayStyle
926       fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo );
927       ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
928       F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77M, &F77N, &alpha,
929                 A, &F77lda, B, &F77ldb );
930 #endif
931 #ifdef StringStructVal
932       fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
933       ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
934       F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77M, &F77N, &alpha,
935                 A, &F77lda, B, &F77ldb );
936 #endif
937 #ifdef StringStructPtr
938       fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
939       ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
940       F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77M, &F77N, &alpha,
941                 A, &F77lda, B, &F77ldb );
942 #endif
943    }
944    else
945    {
946       cside = ( SIDE == HplRight ? 'L' : 'R' );
947       cuplo = ( UPLO == HplLower ? 'U' : 'L' );
948 #ifdef StringSunStyle
949       F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77N, &F77M, &alpha,
950                 A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE );
951 #endif
952 #ifdef StringCrayStyle
953       fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo );
954       ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
955       F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77N, &F77M, &alpha,
956                 A, &F77lda, B, &F77ldb );
957 #endif
958 #ifdef StringStructVal
959       fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
960       ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
961       F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77N, &F77M, &alpha,
962                 A, &F77lda, B, &F77ldb );
963 #endif
964 #ifdef StringStructPtr
965       fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
966       ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
967       F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77N, &F77M, &alpha,
968                 A, &F77lda, B, &F77ldb );
969 #endif
970    }
971 #endif
972 /*
973  * End of HPL_dtrsm
974  */
975 }
976 
977 #endif
978