1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin.           */
3 /* All rights reserved.                                              */
4 /*                                                                   */
5 /* Redistribution and use in source and binary forms, with or        */
6 /* without modification, are permitted provided that the following   */
7 /* conditions are met:                                               */
8 /*                                                                   */
9 /*   1. Redistributions of source code must retain the above         */
10 /*      copyright notice, this list of conditions and the following  */
11 /*      disclaimer.                                                  */
12 /*                                                                   */
13 /*   2. Redistributions in binary form must reproduce the above      */
14 /*      copyright notice, this list of conditions and the following  */
15 /*      disclaimer in the documentation and/or other materials       */
16 /*      provided with the distribution.                              */
17 /*                                                                   */
18 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
32 /*                                                                   */
33 /* The views and conclusions contained in the software and           */
34 /* documentation are those of the authors and should not be          */
35 /* interpreted as representing official policies, either expressed   */
36 /* or implied, of The University of Texas at Austin.                 */
37 /*********************************************************************/
38 
39 #include <stdio.h>
40 #include "common.h"
41 
42 #ifndef CONJA
43 #ifndef CONJB
44 #define GEMM_KERNEL	GEMM_KERNEL_N
45 #else
46 #define GEMM_KERNEL	GEMM_KERNEL_R
47 #endif
48 #else
49 #ifndef CONJB
50 #define GEMM_KERNEL	GEMM_KERNEL_L
51 #else
52 #define GEMM_KERNEL	GEMM_KERNEL_B
53 #endif
54 #endif
55 
CNAME(BLASLONG m,BLASLONG n,BLASLONG k,FLOAT alpha_r,FLOAT alpha_i,FLOAT * a,FLOAT * b,FLOAT * c,BLASLONG ldc,BLASLONG offset)56 int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
57 #ifdef COMPLEX
58 	   FLOAT alpha_i,
59 #endif
60 	   FLOAT *a, FLOAT *b, FLOAT *c, BLASLONG ldc, BLASLONG offset){
61 
62   BLASLONG i, j;
63   BLASLONG loop;
64   FLOAT *cc, *ss;
65   FLOAT subbuffer[GEMM_UNROLL_MN * (GEMM_UNROLL_MN + 1) * COMPSIZE];
66 
67   if (m + offset < 0) {
68 #ifndef LOWER
69     GEMM_KERNEL(m, n, k,
70 		alpha_r,
71 #ifdef COMPLEX
72 		alpha_i,
73 #endif
74 		a, b, c, ldc);
75 #endif
76     return 0;
77   }
78 
79   if (n < offset) {
80 #ifdef LOWER
81     GEMM_KERNEL(m, n, k,
82 		alpha_r,
83 #ifdef COMPLEX
84 		alpha_i,
85 #endif
86 		a, b, c, ldc);
87 #endif
88     return 0;
89   }
90 
91   if (offset > 0) {
92 #ifdef LOWER
93     GEMM_KERNEL(m, offset, k,
94 		alpha_r,
95 #ifdef COMPLEX
96 		alpha_i,
97 #endif
98 		a, b, c, ldc);
99 #endif
100     b += offset * k   * COMPSIZE;
101     c += offset * ldc * COMPSIZE;
102     n -= offset;
103     offset = 0;
104 
105     if (n <= 0) return 0;
106   }
107 
108   if (n > m + offset) {
109 #ifndef LOWER
110       GEMM_KERNEL(m, n - m - offset, k,
111 		  alpha_r,
112 #ifdef COMPLEX
113 		  alpha_i,
114 #endif
115 		  a,
116 		  b + (m + offset) * k   * COMPSIZE,
117 		  c + (m + offset) * ldc * COMPSIZE, ldc);
118 #endif
119 
120     n = m + offset;
121     if (n <= 0) return 0;
122   }
123 
124   if (offset < 0) {
125 #ifndef LOWER
126     GEMM_KERNEL(-offset, n, k,
127 		alpha_r,
128 #ifdef COMPLEX
129 		alpha_i,
130 #endif
131 		a, b, c, ldc);
132 #endif
133     a -= offset * k   * COMPSIZE;
134     c -= offset       * COMPSIZE;
135     m += offset;
136     offset = 0;
137 
138   if (m <= 0) return 0;
139   }
140 
141   if (m > n - offset) {
142 #ifdef LOWER
143     GEMM_KERNEL(m - n + offset, n, k,
144 		alpha_r,
145 #ifdef COMPLEX
146 		alpha_i,
147 #endif
148 		a + (n - offset) * k * COMPSIZE,
149 		b,
150 		c + (n - offset)     * COMPSIZE, ldc);
151 #endif
152     m = n + offset;
153 
154   if (m <= 0) return 0;
155   }
156 
157   for (loop = 0; loop < n; loop += GEMM_UNROLL_MN) {
158 
159     int mm, nn;
160 
161     mm = (loop & ~(GEMM_UNROLL_MN - 1));
162     nn = MIN(GEMM_UNROLL_MN, n - loop);
163 
164 #ifndef LOWER
165     GEMM_KERNEL(mm, nn, k,
166 		  alpha_r,
167 #ifdef COMPLEX
168 		  alpha_i,
169 #endif
170 		  a, b + loop * k * COMPSIZE, c + loop * ldc * COMPSIZE, ldc);
171 #endif
172 
173     GEMM_BETA(nn, nn, 0, ZERO,
174 #ifdef COMPLEX
175 	      ZERO,
176 #endif
177 	      NULL, 0, NULL, 0, subbuffer, nn);
178 
179     GEMM_KERNEL(nn, nn, k,
180 		alpha_r,
181 #ifdef COMPLEX
182 		alpha_i,
183 #endif
184 		a + loop * k * COMPSIZE, b + loop * k * COMPSIZE, subbuffer, nn);
185 
186     cc = c + (loop + loop * ldc) * COMPSIZE;
187     ss = subbuffer;
188 
189 #ifndef LOWER
190  for (j = 0; j < nn; j ++) {
191     for (i = 0; i <= j; i ++) {
192 #ifndef COMPLEX
193       cc[i] += ss[i];
194 #else
195       cc[i * 2 + 0] += ss[i * 2 + 0];
196       cc[i * 2 + 1] += ss[i * 2 + 1];
197 #endif
198     }
199     ss += nn  * COMPSIZE;
200     cc += ldc * COMPSIZE;
201   }
202 #else
203   for (j = 0; j < nn; j ++) {
204     for (i = j; i < nn; i ++) {
205 #ifndef COMPLEX
206       cc[i] += ss[i];
207 #else
208       cc[i * 2 + 0] += ss[i * 2 + 0];
209       cc[i * 2 + 1] += ss[i * 2 + 1];
210 #endif
211     }
212     ss += nn  * COMPSIZE;
213     cc += ldc * COMPSIZE;
214   }
215 #endif
216 
217 #ifdef LOWER
218     GEMM_KERNEL(m - mm - nn, nn, k,
219 		  alpha_r,
220 #ifdef COMPLEX
221 		  alpha_i,
222 #endif
223 		  a + (mm + nn) * k * COMPSIZE, b + loop * k * COMPSIZE,
224 		  c + (mm + nn + loop * ldc) * COMPSIZE, ldc);
225 #endif
226 
227   }
228 
229     return 0;
230 }
231