1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin.           */
3 /* All rights reserved.                                              */
4 /*                                                                   */
5 /* Redistribution and use in source and binary forms, with or        */
6 /* without modification, are permitted provided that the following   */
7 /* conditions are met:                                               */
8 /*                                                                   */
9 /*   1. Redistributions of source code must retain the above         */
10 /*      copyright notice, this list of conditions and the following  */
11 /*      disclaimer.                                                  */
12 /*                                                                   */
13 /*   2. Redistributions in binary form must reproduce the above      */
14 /*      copyright notice, this list of conditions and the following  */
15 /*      disclaimer in the documentation and/or other materials       */
16 /*      provided with the distribution.                              */
17 /*                                                                   */
18 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
32 /*                                                                   */
33 /* The views and conclusions contained in the software and           */
34 /* documentation are those of the authors and should not be          */
35 /* interpreted as representing official policies, either expressed   */
36 /* or implied, of The University of Texas at Austin.                 */
37 /*********************************************************************/
38 
39 #include <stdio.h>
40 #include "common.h"
41 
42 #ifndef UNIT
43 #define INV(a) (ONE / (a))
44 #else
45 #define INV(a) (ONE)
46 #endif
47 
CNAME(BLASLONG m,BLASLONG n,FLOAT * a,BLASLONG lda,BLASLONG offset,FLOAT * b)48 int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *b){
49 
50   BLASLONG i, ii, j, jj;
51 
52   FLOAT data01, data02, data03, data04, data05, data06, data07, data08;
53   FLOAT data09, data10, data11, data12, data13, data14, data15, data16;
54   FLOAT *a1, *a2, *a3, *a4;
55 
56   jj = offset;
57 
58   j = (n >> 2);
59   while (j > 0){
60 
61     a1 = a + 0 * lda;
62     a2 = a + 1 * lda;
63     a3 = a + 2 * lda;
64     a4 = a + 3 * lda;
65 
66     i = (m >> 2);
67     ii = 0;
68     while (i > 0) {
69 
70       if (ii == jj) {
71 #ifndef UNIT
72 	data01 = *(a1 + 0);
73 #endif
74 	data02 = *(a1 + 1);
75 	data03 = *(a1 + 2);
76 	data04 = *(a1 + 3);
77 
78 #ifndef UNIT
79 	data06 = *(a2 + 1);
80 #endif
81 	data07 = *(a2 + 2);
82 	data08 = *(a2 + 3);
83 
84 #ifndef UNIT
85 	data11 = *(a3 + 2);
86 #endif
87 	data12 = *(a3 + 3);
88 
89 #ifndef UNIT
90 	data16 = *(a4 + 3);
91 #endif
92 
93 	*(b +  0) = INV(data01);
94 
95 	*(b +  4) = data02;
96 	*(b +  5) = INV(data06);
97 
98 	*(b +  8) = data03;
99 	*(b +  9) = data07;
100 	*(b + 10) = INV(data11);
101 
102 	*(b + 12) = data04;
103 	*(b + 13) = data08;
104 	*(b + 14) = data12;
105 	*(b + 15) = INV(data16);
106       }
107 
108       if (ii > jj) {
109 
110 	data01 = *(a1 + 0);
111 	data02 = *(a1 + 1);
112 	data03 = *(a1 + 2);
113 	data04 = *(a1 + 3);
114 
115 	data05 = *(a2 + 0);
116 	data06 = *(a2 + 1);
117 	data07 = *(a2 + 2);
118 	data08 = *(a2 + 3);
119 
120 	data09 = *(a3 + 0);
121 	data10 = *(a3 + 1);
122 	data11 = *(a3 + 2);
123 	data12 = *(a3 + 3);
124 
125 	data13 = *(a4 + 0);
126 	data14 = *(a4 + 1);
127 	data15 = *(a4 + 2);
128 	data16 = *(a4 + 3);
129 
130 	*(b +  0) = data01;
131 	*(b +  1) = data05;
132 	*(b +  2) = data09;
133 	*(b +  3) = data13;
134 	*(b +  4) = data02;
135 	*(b +  5) = data06;
136 	*(b +  6) = data10;
137 	*(b +  7) = data14;
138 
139 	*(b +  8) = data03;
140 	*(b +  9) = data07;
141 	*(b + 10) = data11;
142 	*(b + 11) = data15;
143 	*(b + 12) = data04;
144 	*(b + 13) = data08;
145 	*(b + 14) = data12;
146 	*(b + 15) = data16;
147       }
148 
149       a1 += 4;
150       a2 += 4;
151       a3 += 4;
152       a4 += 4;
153       b += 16;
154 
155       i  --;
156       ii += 4;
157     }
158 
159     if ((m & 2) != 0) {
160 
161       if (ii== jj) {
162 #ifndef UNIT
163 	data01 = *(a1 + 0);
164 #endif
165 	data02 = *(a1 + 1);
166 
167 #ifndef UNIT
168 	data06 = *(a2 + 1);
169 #endif
170 
171 	*(b +  0) = INV(data01);
172 
173 	*(b +  4) = data02;
174 	*(b +  5) = INV(data06);
175       }
176 
177       if (ii > jj) {
178 	data01 = *(a1 + 0);
179 	data02 = *(a1 + 1);
180 	data03 = *(a2 + 0);
181 	data04 = *(a2 + 1);
182 	data05 = *(a3 + 0);
183 	data06 = *(a3 + 1);
184 	data07 = *(a4 + 0);
185 	data08 = *(a4 + 1);
186 
187 	*(b +  0) = data01;
188 	*(b +  1) = data03;
189 	*(b +  2) = data05;
190 	*(b +  3) = data07;
191 	*(b +  4) = data02;
192 	*(b +  5) = data04;
193 	*(b +  6) = data06;
194 	*(b +  7) = data08;
195       }
196 
197       a1 += 2;
198       a2 += 2;
199       a3 += 2;
200       a4 += 2;
201       b += 8;
202 
203       ii += 2;
204     }
205 
206     if ((m & 1) != 0) {
207 
208       if (ii== jj) {
209 #ifndef UNIT
210 	data01 = *(a1 + 0);
211 #endif
212 	*(b +  0) = INV(data01);
213       }
214 
215       if (ii > jj)  {
216 	data01 = *(a1 + 0);
217 	data02 = *(a2 + 0);
218 	data03 = *(a3 + 0);
219 	data04 = *(a4 + 0);
220 
221 	*(b +  0) = data01;
222 	*(b +  1) = data02;
223 	*(b +  2) = data03;
224 	*(b +  3) = data04;
225       }
226       b += 4;
227     }
228 
229     a += 4 * lda;
230     jj += 4;
231     j  --;
232   }
233 
234   if (n & 2) {
235     a1 = a + 0 * lda;
236     a2 = a + 1 * lda;
237 
238     i = (m >> 1);
239     ii = 0;
240     while (i > 0) {
241 
242       if (ii == jj) {
243 
244 #ifndef UNIT
245 	data01 = *(a1 + 0);
246 #endif
247 	data02 = *(a1 + 1);
248 
249 #ifndef UNIT
250 	data04 = *(a2 + 1);
251 #endif
252 
253 	*(b +  0) = INV(data01);
254 	*(b +  2) = data02;
255 	*(b +  3) = INV(data04);
256       }
257 
258       if (ii > jj) {
259 	data01 = *(a1 + 0);
260 	data02 = *(a1 + 1);
261 	data03 = *(a2 + 0);
262 	data04 = *(a2 + 1);
263 
264 	*(b +  0) = data01;
265 	*(b +  1) = data03;
266 	*(b +  2) = data02;
267 	*(b +  3) = data04;
268       }
269 
270       a1 += 2;
271       a2 += 2;
272       b += 4;
273 
274       i  --;
275       ii += 2;
276     }
277 
278     if ((m & 1) != 0) {
279 
280       if (ii== jj) {
281 #ifndef UNIT
282 	data01 = *(a1 + 0);
283 #endif
284 	*(b +  0) = INV(data01);
285       }
286 
287       if (ii > jj)  {
288 	data01 = *(a1 + 0);
289 	data02 = *(a2 + 0);
290 	*(b +  0) = data01;
291 	*(b +  1) = data02;
292       }
293       b += 2;
294     }
295     a += 2 * lda;
296     jj += 2;
297   }
298 
299   if (n & 1) {
300     a1 = a + 0 * lda;
301 
302     i = m;
303     ii = 0;
304     while (i > 0) {
305 
306       if (ii == jj) {
307 #ifndef UNIT
308 	data01 = *(a1 + 0);
309 #endif
310 	*(b +  0) = INV(data01);
311       }
312 
313       if (ii > jj) {
314 	data01 = *(a1 + 0);
315 	*(b +  0) = data01;
316       }
317 
318       a1+= 1;
319       b += 1;
320       i  --;
321       ii += 1;
322     }
323   }
324 
325   return 0;
326 }
327