1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin.           */
3 /* All rights reserved.                                              */
4 /*                                                                   */
5 /* Redistribution and use in source and binary forms, with or        */
6 /* without modification, are permitted provided that the following   */
7 /* conditions are met:                                               */
8 /*                                                                   */
9 /*   1. Redistributions of source code must retain the above         */
10 /*      copyright notice, this list of conditions and the following  */
11 /*      disclaimer.                                                  */
12 /*                                                                   */
13 /*   2. Redistributions in binary form must reproduce the above      */
14 /*      copyright notice, this list of conditions and the following  */
15 /*      disclaimer in the documentation and/or other materials       */
16 /*      provided with the distribution.                              */
17 /*                                                                   */
18 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
32 /*                                                                   */
33 /* The views and conclusions contained in the software and           */
34 /* documentation are those of the authors and should not be          */
35 /* interpreted as representing official policies, either expressed   */
36 /* or implied, of The University of Texas at Austin.                 */
37 /*********************************************************************/
38 
39 #include <stdio.h>
40 #include "common.h"
41 
42 #ifdef UNIT
43 #define TRTI2	TRTI2_LU
44 #define TRMM	TRMM_LNLU
45 #define TRSM	TRSM_RNLU
46 #else
47 #define TRTI2	TRTI2_LN
48 #define TRMM	TRMM_LNLN
49 #define TRSM	TRSM_RNLN
50 #endif
51 
CNAME(blas_arg_t * args,BLASLONG * range_m,BLASLONG * range_n,FLOAT * sa,FLOAT * sb,BLASLONG mypos)52 blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos) {
53 
54   BLASLONG n, info;
55   BLASLONG bk, i, blocking, start_i;
56   int mode;
57   BLASLONG lda; // , range_N[2];
58   blas_arg_t newarg;
59   FLOAT *a;
60   FLOAT alpha[2] = { ONE, ZERO};
61   FLOAT beta [2] = {-ONE, ZERO};
62 
63 #ifndef COMPLEX
64 #ifdef XDOUBLE
65   mode  =  BLAS_XDOUBLE | BLAS_REAL;
66 #elif defined(DOUBLE)
67   mode  =  BLAS_DOUBLE  | BLAS_REAL;
68 #else
69   mode  =  BLAS_SINGLE  | BLAS_REAL;
70 #endif
71 #else
72 #ifdef XDOUBLE
73   mode  =  BLAS_XDOUBLE | BLAS_COMPLEX;
74 #elif defined(DOUBLE)
75   mode  =  BLAS_DOUBLE  | BLAS_COMPLEX;
76 #else
77   mode  =  BLAS_SINGLE  | BLAS_COMPLEX;
78 #endif
79 #endif
80 
81   n = args -> n;
82   a  = (FLOAT *)args -> a;
83   lda = args -> lda;
84 
85   if (range_n)  n = range_n[1] - range_n[0];
86 
87   if (n <= DTB_ENTRIES) {
88     info = TRTI2(args, NULL, range_n, sa, sb, 0);
89     return info;
90   }
91 
92   blocking = GEMM_Q;
93   if (n < 4 * GEMM_Q) blocking = (n + 3) / 4;
94 
95   start_i = 0;
96   while (start_i < n) start_i += blocking;
97   start_i -= blocking;
98 
99   for (i = start_i; i >= 0; i -= blocking) {
100     bk = n - i;
101     if (bk > blocking) bk = blocking;
102 
103     /* range_N[0] = i;
104     range_N[1] = i + bk; */
105 
106     newarg.lda = lda;
107     newarg.ldb = lda;
108     newarg.ldc = lda;
109     newarg.alpha = alpha;
110 
111     newarg.m = n - bk - i;
112     newarg.n = bk;
113     newarg.a = a + ( i       + i * lda) * COMPSIZE;
114     newarg.b = a + ((i + bk) + i * lda) * COMPSIZE;
115 
116     newarg.beta  = beta;
117     newarg.nthreads = args -> nthreads;
118 
119     gemm_thread_m(mode, &newarg, NULL, NULL, TRSM, sa, sb, args -> nthreads);
120 
121     newarg.m = bk;
122     newarg.n = bk;
123 
124     newarg.a = a + (i + i * lda) * COMPSIZE;
125 
126     CNAME  (&newarg, NULL, NULL, sa, sb, 0);
127 
128     newarg.m = n - bk - i;
129     newarg.n = i;
130     newarg.k = bk;
131 
132     newarg.a = a + (i + bk + i * lda) * COMPSIZE;
133     newarg.b = a + (i               ) * COMPSIZE;
134     newarg.c = a + (i + bk          ) * COMPSIZE;
135 
136     newarg.beta  = NULL;
137 
138     gemm_thread_n(mode, &newarg, NULL, NULL, GEMM_NN, sa, sb, args -> nthreads);
139 
140     newarg.a = a + (i      + i * lda) * COMPSIZE;
141     newarg.b = a + (i               ) * COMPSIZE;
142 
143     newarg.m = bk;
144     newarg.n = i;
145 
146     gemm_thread_n(mode, &newarg, NULL, NULL, TRMM, sa, sb, args -> nthreads);
147   }
148 
149 
150   return 0;
151 }
152