1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin.           */
3 /* All rights reserved.                                              */
4 /*                                                                   */
5 /* Redistribution and use in source and binary forms, with or        */
6 /* without modification, are permitted provided that the following   */
7 /* conditions are met:                                               */
8 /*                                                                   */
9 /*   1. Redistributions of source code must retain the above         */
10 /*      copyright notice, this list of conditions and the following  */
11 /*      disclaimer.                                                  */
12 /*                                                                   */
13 /*   2. Redistributions in binary form must reproduce the above      */
14 /*      copyright notice, this list of conditions and the following  */
15 /*      disclaimer in the documentation and/or other materials       */
16 /*      provided with the distribution.                              */
17 /*                                                                   */
18 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
32 /*                                                                   */
33 /* The views and conclusions contained in the software and           */
34 /* documentation are those of the authors and should not be          */
35 /* interpreted as representing official policies, either expressed   */
36 /* or implied, of The University of Texas at Austin.                 */
37 /*********************************************************************/
38 
39 #include <stdio.h>
40 #include "common.h"
41 
42 #ifndef CONJ
43 #define GEMM_KERNEL	GEMM_KERNEL_R
44 #define GEMM_KERNEL_B0	GEMM_KERNEL_R_B0
45 #else
46 #define GEMM_KERNEL	GEMM_KERNEL_L
47 #define GEMM_KERNEL_B0	GEMM_KERNEL_L_B0
48 #endif
49 
CNAME(BLASLONG m,BLASLONG n,BLASLONG k,FLOAT alpha_r,FLOAT alpha_i,FLOAT * a,FLOAT * b,FLOAT * c,BLASLONG ldc,BLASLONG offset,int flag)50 int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
51 	   FLOAT *a, FLOAT *b, FLOAT *c, BLASLONG ldc, BLASLONG offset, int flag){
52 
53   BLASLONG i, j;
54   BLASLONG loop;
55   FLOAT subbuffer[GEMM_UNROLL_MN * GEMM_UNROLL_MN * COMPSIZE];
56 
57   if (m + offset < 0) {
58 #ifndef LOWER
59     GEMM_KERNEL(m, n, k,
60 		alpha_r,
61 #ifdef COMPLEX
62 		alpha_i,
63 #endif
64 		a, b, c, ldc);
65 #endif
66     return 0;
67   }
68 
69   if (n < offset) {
70 #ifdef LOWER
71     GEMM_KERNEL(m, n, k,
72 		alpha_r,
73 #ifdef COMPLEX
74 		alpha_i,
75 #endif
76 		a, b, c, ldc);
77 #endif
78     return 0;
79   }
80 
81 
82   if (offset > 0) {
83 #ifdef LOWER
84     GEMM_KERNEL(m, offset, k,
85 		alpha_r,
86 #ifdef COMPLEX
87 		alpha_i,
88 #endif
89 		a, b, c, ldc);
90 #endif
91     b += offset * k   * COMPSIZE;
92     c += offset * ldc * COMPSIZE;
93     n -= offset;
94     offset = 0;
95 
96     if (n <= 0) return 0;
97   }
98 
99   if (n > m + offset) {
100 #ifndef LOWER
101       GEMM_KERNEL(m, n - m - offset, k,
102 		  alpha_r,
103 #ifdef COMPLEX
104 		  alpha_i,
105 #endif
106 		  a,
107 		  b + (m + offset) * k   * COMPSIZE,
108 		  c + (m + offset) * ldc * COMPSIZE, ldc);
109 #endif
110 
111     n = m + offset;
112     if (n <= 0) return 0;
113   }
114 
115 
116   if (offset < 0) {
117 #ifndef LOWER
118     GEMM_KERNEL(-offset, n, k,
119 		alpha_r,
120 #ifdef COMPLEX
121 		alpha_i,
122 #endif
123 		a, b, c, ldc);
124 #endif
125     a -= offset * k   * COMPSIZE;
126     c -= offset       * COMPSIZE;
127     m += offset;
128     offset = 0;
129 
130   if (m <= 0) return 0;
131   }
132 
133   if (m > n - offset) {
134 #ifdef LOWER
135     GEMM_KERNEL(m - n + offset, n, k,
136 		alpha_r,
137 #ifdef COMPLEX
138 		alpha_i,
139 #endif
140 		a + (n - offset) * k * COMPSIZE,
141 		b,
142 		c + (n - offset)     * COMPSIZE, ldc);
143 #endif
144     m = n + offset;
145   if (m <= 0) return 0;
146   }
147 
148   for (loop = 0; loop < n; loop += GEMM_UNROLL_MN) {
149 
150     int mm, nn;
151 
152     mm = (loop & ~(GEMM_UNROLL_MN - 1));
153     nn = MIN(GEMM_UNROLL_MN, n - loop);
154 
155 #ifndef LOWER
156     GEMM_KERNEL(mm, nn, k,
157 		  alpha_r,
158 #ifdef COMPLEX
159 		  alpha_i,
160 #endif
161 		  a, b + loop * k * COMPSIZE, c + loop * ldc * COMPSIZE, ldc);
162 #endif
163 
164     if (flag) {
165       GEMM_BETA(nn, nn, 0, ZERO,
166 #ifdef COMPLEX
167 		ZERO,
168 #endif
169 		NULL, 0, NULL, 0, subbuffer, nn);
170 
171       GEMM_KERNEL(nn, nn, k,
172 		    alpha_r,
173 #ifdef COMPLEX
174 		    alpha_i,
175 #endif
176 		    a + loop * k * COMPSIZE, b + loop * k * COMPSIZE, subbuffer, nn);
177 
178 
179 #ifndef LOWER
180 
181       for (j = 0; j < nn; j ++) {
182 	for (i = 0; i <= j; i ++) {
183 	  c[(i + loop + (j + loop) * ldc) * 2 + 0] +=
184 	    subbuffer[(i + j * nn) * 2 + 0] + subbuffer[(j + i * nn) * 2 + 0];
185 	  if (i != j) {
186 	    c[(i + loop + (j + loop) * ldc) * 2 + 1] +=
187 	      subbuffer[(i + j * nn) * 2 + 1] - subbuffer[(j + i * nn) * 2 + 1];
188 	  } else {
189 	    c[(i + loop + (j + loop) * ldc) * 2 + 1] = ZERO;
190 	  }
191 	}
192       }
193 #else
194       for (j = 0; j < nn; j ++) {
195 	for (i = j; i < nn; i ++) {
196 	  c[(i + loop + (j + loop) * ldc) * 2 + 0] +=
197 	    subbuffer[(i + j * nn) * 2 + 0] + subbuffer[(j + i * nn) * 2 + 0];
198 	  if (i != j) {
199 	    c[(i + loop + (j + loop) * ldc) * 2 + 1] +=
200 	      subbuffer[(i + j * nn) * 2 + 1] - subbuffer[(j + i * nn) * 2 + 1];
201 	  } else {
202 	    c[(i + loop + (j + loop) * ldc) * 2 + 1]  = ZERO;
203 	  }
204 	}
205       }
206 #endif
207     }
208 
209 #ifdef LOWER
210     GEMM_KERNEL(m - mm - nn, nn, k,
211 		  alpha_r,
212 #ifdef COMPLEX
213 		  alpha_i,
214 #endif
215 		  a + (mm + nn) * k * COMPSIZE, b + loop * k * COMPSIZE,
216 		  c + (mm + nn + loop * ldc) * COMPSIZE, ldc);
217 #endif
218   }
219 
220     return 0;
221 }
222