1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
4 /* */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
8 /* */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
11 /* disclaimer. */
12 /* */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
17 /* */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
32 /* */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
38
39 #include <stdio.h>
40 #include "common.h"
41
42 #ifndef UNIT
43 #define INV(a) (ONE / (a))
44 #else
45 #define INV(a) (ONE)
46 #endif
47
CNAME(BLASLONG m,BLASLONG n,FLOAT * a,BLASLONG lda,BLASLONG offset,FLOAT * b)48 int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *b){
49
50 BLASLONG i, ii, j, jj;
51
52 FLOAT data01, data02, data03, data04, data05, data06, data07, data08;
53 FLOAT data09, data10, data11, data12, data13, data14, data15, data16;
54 FLOAT *a1, *a2, *a3, *a4;
55
56 jj = offset;
57
58 j = (n >> 2);
59 while (j > 0){
60
61 a1 = a + 0 * lda;
62 a2 = a + 1 * lda;
63 a3 = a + 2 * lda;
64 a4 = a + 3 * lda;
65
66 i = (m >> 2);
67 ii = 0;
68 while (i > 0) {
69
70 if (ii == jj) {
71 #ifndef UNIT
72 data01 = *(a1 + 0);
73 #endif
74 data02 = *(a1 + 1);
75 data03 = *(a1 + 2);
76 data04 = *(a1 + 3);
77
78 #ifndef UNIT
79 data06 = *(a2 + 1);
80 #endif
81 data07 = *(a2 + 2);
82 data08 = *(a2 + 3);
83
84 #ifndef UNIT
85 data11 = *(a3 + 2);
86 #endif
87 data12 = *(a3 + 3);
88
89 #ifndef UNIT
90 data16 = *(a4 + 3);
91 #endif
92
93 *(b + 0) = INV(data01);
94
95 *(b + 4) = data02;
96 *(b + 5) = INV(data06);
97
98 *(b + 8) = data03;
99 *(b + 9) = data07;
100 *(b + 10) = INV(data11);
101
102 *(b + 12) = data04;
103 *(b + 13) = data08;
104 *(b + 14) = data12;
105 *(b + 15) = INV(data16);
106 }
107
108 if (ii > jj) {
109
110 data01 = *(a1 + 0);
111 data02 = *(a1 + 1);
112 data03 = *(a1 + 2);
113 data04 = *(a1 + 3);
114
115 data05 = *(a2 + 0);
116 data06 = *(a2 + 1);
117 data07 = *(a2 + 2);
118 data08 = *(a2 + 3);
119
120 data09 = *(a3 + 0);
121 data10 = *(a3 + 1);
122 data11 = *(a3 + 2);
123 data12 = *(a3 + 3);
124
125 data13 = *(a4 + 0);
126 data14 = *(a4 + 1);
127 data15 = *(a4 + 2);
128 data16 = *(a4 + 3);
129
130 *(b + 0) = data01;
131 *(b + 1) = data05;
132 *(b + 2) = data09;
133 *(b + 3) = data13;
134 *(b + 4) = data02;
135 *(b + 5) = data06;
136 *(b + 6) = data10;
137 *(b + 7) = data14;
138
139 *(b + 8) = data03;
140 *(b + 9) = data07;
141 *(b + 10) = data11;
142 *(b + 11) = data15;
143 *(b + 12) = data04;
144 *(b + 13) = data08;
145 *(b + 14) = data12;
146 *(b + 15) = data16;
147 }
148
149 a1 += 4;
150 a2 += 4;
151 a3 += 4;
152 a4 += 4;
153 b += 16;
154
155 i --;
156 ii += 4;
157 }
158
159 if ((m & 2) != 0) {
160
161 if (ii== jj) {
162 #ifndef UNIT
163 data01 = *(a1 + 0);
164 #endif
165 data02 = *(a1 + 1);
166
167 #ifndef UNIT
168 data06 = *(a2 + 1);
169 #endif
170
171 *(b + 0) = INV(data01);
172
173 *(b + 4) = data02;
174 *(b + 5) = INV(data06);
175 }
176
177 if (ii > jj) {
178 data01 = *(a1 + 0);
179 data02 = *(a1 + 1);
180 data03 = *(a2 + 0);
181 data04 = *(a2 + 1);
182 data05 = *(a3 + 0);
183 data06 = *(a3 + 1);
184 data07 = *(a4 + 0);
185 data08 = *(a4 + 1);
186
187 *(b + 0) = data01;
188 *(b + 1) = data03;
189 *(b + 2) = data05;
190 *(b + 3) = data07;
191 *(b + 4) = data02;
192 *(b + 5) = data04;
193 *(b + 6) = data06;
194 *(b + 7) = data08;
195 }
196
197 a1 += 2;
198 a2 += 2;
199 a3 += 2;
200 a4 += 2;
201 b += 8;
202
203 ii += 2;
204 }
205
206 if ((m & 1) != 0) {
207
208 if (ii== jj) {
209 #ifndef UNIT
210 data01 = *(a1 + 0);
211 #endif
212 *(b + 0) = INV(data01);
213 }
214
215 if (ii > jj) {
216 data01 = *(a1 + 0);
217 data02 = *(a2 + 0);
218 data03 = *(a3 + 0);
219 data04 = *(a4 + 0);
220
221 *(b + 0) = data01;
222 *(b + 1) = data02;
223 *(b + 2) = data03;
224 *(b + 3) = data04;
225 }
226 b += 4;
227 }
228
229 a += 4 * lda;
230 jj += 4;
231 j --;
232 }
233
234 if (n & 2) {
235 a1 = a + 0 * lda;
236 a2 = a + 1 * lda;
237
238 i = (m >> 1);
239 ii = 0;
240 while (i > 0) {
241
242 if (ii == jj) {
243
244 #ifndef UNIT
245 data01 = *(a1 + 0);
246 #endif
247 data02 = *(a1 + 1);
248
249 #ifndef UNIT
250 data04 = *(a2 + 1);
251 #endif
252
253 *(b + 0) = INV(data01);
254 *(b + 2) = data02;
255 *(b + 3) = INV(data04);
256 }
257
258 if (ii > jj) {
259 data01 = *(a1 + 0);
260 data02 = *(a1 + 1);
261 data03 = *(a2 + 0);
262 data04 = *(a2 + 1);
263
264 *(b + 0) = data01;
265 *(b + 1) = data03;
266 *(b + 2) = data02;
267 *(b + 3) = data04;
268 }
269
270 a1 += 2;
271 a2 += 2;
272 b += 4;
273
274 i --;
275 ii += 2;
276 }
277
278 if ((m & 1) != 0) {
279
280 if (ii== jj) {
281 #ifndef UNIT
282 data01 = *(a1 + 0);
283 #endif
284 *(b + 0) = INV(data01);
285 }
286
287 if (ii > jj) {
288 data01 = *(a1 + 0);
289 data02 = *(a2 + 0);
290 *(b + 0) = data01;
291 *(b + 1) = data02;
292 }
293 b += 2;
294 }
295 a += 2 * lda;
296 jj += 2;
297 }
298
299 if (n & 1) {
300 a1 = a + 0 * lda;
301
302 i = m;
303 ii = 0;
304 while (i > 0) {
305
306 if (ii == jj) {
307 #ifndef UNIT
308 data01 = *(a1 + 0);
309 #endif
310 *(b + 0) = INV(data01);
311 }
312
313 if (ii > jj) {
314 data01 = *(a1 + 0);
315 *(b + 0) = data01;
316 }
317
318 a1+= 1;
319 b += 1;
320 i --;
321 ii += 1;
322 }
323 }
324
325 return 0;
326 }
327