1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin.           */
3 /* All rights reserved.                                              */
4 /*                                                                   */
5 /* Redistribution and use in source and binary forms, with or        */
6 /* without modification, are permitted provided that the following   */
7 /* conditions are met:                                               */
8 /*                                                                   */
9 /*   1. Redistributions of source code must retain the above         */
10 /*      copyright notice, this list of conditions and the following  */
11 /*      disclaimer.                                                  */
12 /*                                                                   */
13 /*   2. Redistributions in binary form must reproduce the above      */
14 /*      copyright notice, this list of conditions and the following  */
15 /*      disclaimer in the documentation and/or other materials       */
16 /*      provided with the distribution.                              */
17 /*                                                                   */
18 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
32 /*                                                                   */
33 /* The views and conclusions contained in the software and           */
34 /* documentation are those of the authors and should not be          */
35 /* interpreted as representing official policies, either expressed   */
36 /* or implied, of The University of Texas at Austin.                 */
37 /*********************************************************************/
38 
39 #include <stdio.h>
40 #include "common.h"
41 
42 #define a2	(a1 + 2)
43 #define a4	(a3 + 2)
44 
CNAME(BLASLONG n,BLASLONG k1,BLASLONG k2,FLOAT * a,BLASLONG lda,blasint * ipiv,FLOAT * buffer)45 int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint *ipiv, FLOAT *buffer){
46 
47   BLASLONG i, j, ip1, ip2;
48   blasint *piv;
49   FLOAT *a1, *a3;
50   FLOAT *b1, *b2, *b3, *b4;
51   FLOAT A1, A2, A3, A4;
52   FLOAT A5, A6, A7, A8;
53   FLOAT B1, B2, B3, B4;
54   FLOAT B5, B6, B7, B8;
55 
56   a -= 2;
57   lda *= 2;
58   k1 --;
59 
60  ipiv += k1;
61 
62   if (n  <= 0) return 0;
63 
64   j = (n >> 1);
65   if (j > 0) {
66     do {
67       piv = ipiv;
68 
69       a1 = a + (k1 + 1) * 2;
70       a3 = a1 + lda;
71 
72       ip1 = *(piv + 0) * 2;
73       ip2 = *(piv + 1) * 2;
74       piv += 2;
75 
76       b1 = a + ip1;
77       b2 = a + ip2;
78 
79       b3 = b1 + lda;
80       b4 = b2 + lda;
81 
82       i = ((k2 - k1) >> 1);
83 
84       if (i > 0) {
85 	do {
86 	  A1 = *(a1 + 0);
87 	  A2 = *(a1 + 1);
88 	  A3 = *(a2 + 0);
89 	  A4 = *(a2 + 1);
90 	  A5 = *(a3 + 0);
91 	  A6 = *(a3 + 1);
92 	  A7 = *(a4 + 0);
93 	  A8 = *(a4 + 1);
94 
95 	  B1 = *(b1 + 0);
96 	  B2 = *(b1 + 1);
97 	  B3 = *(b2 + 0);
98 	  B4 = *(b2 + 1);
99 	  B5 = *(b3 + 0);
100 	  B6 = *(b3 + 1);
101 	  B7 = *(b4 + 0);
102 	  B8 = *(b4 + 1);
103 
104 	  ip1 = *(piv + 0) * 2;
105 	  ip2 = *(piv + 1) * 2;
106 	  piv += 2;
107 
108 	  if (b1 == a1) {
109 	    if (b2 == a2) {
110 	      *(buffer + 0) = A1;
111 	      *(buffer + 1) = A2;
112 	      *(buffer + 2) = A5;
113 	      *(buffer + 3) = A6;
114 	      *(buffer + 4) = A3;
115 	      *(buffer + 5) = A4;
116 	      *(buffer + 6) = A7;
117 	      *(buffer + 7) = A8;
118 	    } else {
119 	      *(buffer + 0) = A1;
120 	      *(buffer + 1) = A2;
121 	      *(buffer + 2) = A5;
122 	      *(buffer + 3) = A6;
123 	      *(buffer + 4) = B3;
124 	      *(buffer + 5) = B4;
125 	      *(buffer + 6) = B7;
126 	      *(buffer + 7) = B8;
127 
128 	      *(b2 + 0) = A3;
129 	      *(b2 + 1) = A4;
130 	      *(b4 + 0) = A7;
131 	      *(b4 + 1) = A8;
132 	    }
133 	} else
134 	  if (b1 == a2) {
135 	    if (b2 == a2) {
136 	      *(buffer + 0) = A3;
137 	      *(buffer + 1) = A4;
138 	      *(buffer + 2) = A7;
139 	      *(buffer + 3) = A8;
140 	      *(buffer + 4) = A1;
141 	      *(buffer + 5) = A2;
142 	      *(buffer + 6) = A5;
143 	      *(buffer + 7) = A6;
144 	    } else {
145 	      *(buffer + 0) = A3;
146 	      *(buffer + 1) = A4;
147 	      *(buffer + 2) = A7;
148 	      *(buffer + 3) = A8;
149 	      *(buffer + 4) = B3;
150 	      *(buffer + 5) = B4;
151 	      *(buffer + 6) = B7;
152 	      *(buffer + 7) = B8;
153 
154 	      *(b2 + 0) = A1;
155 	      *(b2 + 1) = A2;
156 	      *(b4 + 0) = A5;
157 	      *(b4 + 1) = A6;
158 	    }
159 	  } else {
160 	    if (b2 == a2) {
161 	      *(buffer + 0) = B1;
162 	      *(buffer + 1) = B2;
163 	      *(buffer + 2) = B5;
164 	      *(buffer + 3) = B6;
165 	      *(buffer + 4) = A3;
166 	      *(buffer + 5) = A4;
167 	      *(buffer + 6) = A7;
168 	      *(buffer + 7) = A8;
169 
170 	      *(b1 + 0) = A1;
171 	      *(b1 + 1) = A2;
172 	      *(b3 + 0) = A5;
173 	      *(b3 + 1) = A6;
174 	    } else
175 	      if (b2 == b1) {
176 		*(buffer + 0) = B1;
177 		*(buffer + 1) = B2;
178 		*(buffer + 2) = B5;
179 		*(buffer + 3) = B6;
180 		*(buffer + 4) = A1;
181 		*(buffer + 5) = A2;
182 		*(buffer + 6) = A5;
183 		*(buffer + 7) = A6;
184 
185 		*(b1 + 0) = A3;
186 		*(b1 + 1) = A4;
187 		*(b3 + 0) = A7;
188 		*(b3 + 1) = A8;
189 	      } else {
190 		*(buffer + 0) = B1;
191 		*(buffer + 1) = B2;
192 		*(buffer + 2) = B5;
193 		*(buffer + 3) = B6;
194 		*(buffer + 4) = B3;
195 		*(buffer + 5) = B4;
196 		*(buffer + 6) = B7;
197 		*(buffer + 7) = B8;
198 		*(b1 + 0) = A1;
199 		*(b1 + 1) = A2;
200 		*(b2 + 0) = A3;
201 		*(b2 + 1) = A4;
202 		*(b3 + 0) = A5;
203 		*(b3 + 1) = A6;
204 		*(b4 + 0) = A7;
205 		*(b4 + 1) = A8;
206 	      }
207 	    }
208 
209 	    buffer += 8;
210 
211 	    b1 = a + ip1;
212 	    b2 = a + ip2;
213 
214 	    b3 = b1 + lda;
215 	    b4 = b2 + lda;
216 
217 	    a1 += 4;
218 	    a3 += 4;
219 
220 	    i --;
221 	} while (i > 0);
222       }
223 
224       i = ((k2 - k1) & 1);
225 
226       if (i > 0) {
227 	A1 = *(a1 + 0);
228 	A2 = *(a1 + 1);
229 	B1 = *(b1 + 0);
230 	B2 = *(b1 + 1);
231 	A3 = *(a3 + 0);
232 	A4 = *(a3 + 1);
233 	B3 = *(b3 + 0);
234 	B4 = *(b3 + 1);
235 
236 	if (a1 == b1) {
237 	  *(buffer + 0) = A1;
238 	  *(buffer + 1) = A2;
239 	  *(buffer + 2) = A3;
240 	  *(buffer + 3) = A4;
241 
242 	} else {
243 	  *(buffer + 0) = B1;
244 	  *(buffer + 1) = B2;
245 	  *(buffer + 2) = B3;
246 	  *(buffer + 3) = B4;
247 	  *(b1 + 0) = A1;
248 	  *(b1 + 1) = A2;
249 	  *(b3 + 0) = A3;
250 	  *(b3 + 1) = A4;
251 	}
252 	buffer += 4;
253       }
254 
255       a += 2 * lda;
256       j --;
257     } while (j > 0);
258   }
259 
260   if (n & 1) {
261     piv = ipiv;
262 
263     a1 = a + (k1 + 1) * 2;
264 
265     ip1 = *(piv + 0) * 2;
266     ip2 = *(piv + 1) * 2;
267     piv += 2;
268 
269     b1 = a + ip1;
270     b2 = a + ip2;
271 
272     i = ((k2 - k1) >> 1);
273 
274     if (i > 0) {
275       do {
276 	A1 = *(a1 + 0);
277 	A2 = *(a1 + 1);
278 	A3 = *(a2 + 0);
279 	A4 = *(a2 + 1);
280 	B1 = *(b1 + 0);
281 	B2 = *(b1 + 1);
282 	B3 = *(b2 + 0);
283 	B4 = *(b2 + 1);
284 
285 	ip1 = *(piv + 0) * 2;
286 	ip2 = *(piv + 1) * 2;
287 	piv += 2;
288 
289 	if (b1 == a1) {
290 	  if (b2 == a2) {
291 	    *(buffer + 0) = A1;
292 	    *(buffer + 1) = A2;
293 	    *(buffer + 2) = A3;
294 	    *(buffer + 3) = A4;
295 	  } else {
296 	    *(buffer + 0) = A1;
297 	    *(buffer + 1) = A2;
298 	    *(buffer + 2) = B3;
299 	    *(buffer + 3) = B4;
300 
301 	    *(b2 + 0) = A3;
302 	    *(b2 + 1) = A4;
303 	  }
304 	} else
305 	  if (b1 == a2) {
306 	    if (b2 == a2) {
307 	      *(buffer + 0) = A3;
308 	      *(buffer + 1) = A4;
309 	      *(buffer + 2) = A1;
310 	      *(buffer + 3) = A2;
311 	    } else {
312 	      *(buffer + 0) = A3;
313 	      *(buffer + 1) = A4;
314 	      *(buffer + 2) = B3;
315 	      *(buffer + 3) = B4;
316 	      *(b2 + 0) = A1;
317 	      *(b2 + 1) = A2;
318 	    }
319 	  } else {
320 	    if (b2 == a2) {
321 	      *(buffer + 0) = B1;
322 	      *(buffer + 1) = B2;
323 	      *(buffer + 2) = A3;
324 	      *(buffer + 3) = A4;
325 	      *(b1 + 0) = A1;
326 	      *(b1 + 1) = A2;
327 	    } else
328 	      if (b2 == b1) {
329 		*(buffer + 0) = B1;
330 		*(buffer + 1) = B2;
331 		*(buffer + 2) = A1;
332 		*(buffer + 3) = A2;
333 		*(b1 + 0) = A3;
334 		*(b1 + 1) = A4;
335 	      } else {
336 		*(buffer + 0) = B1;
337 		*(buffer + 1) = B2;
338 		*(buffer + 2) = B3;
339 		*(buffer + 3) = B4;
340 		*(b1 + 0) = A1;
341 		*(b1 + 1) = A2;
342 		*(b2 + 0) = A3;
343 		*(b2 + 1) = A4;
344 	      }
345 	  }
346 
347 	buffer += 4;
348 
349 	b1 = a + ip1;
350 	b2 = a + ip2;
351 
352 	a1 += 4;
353 
354 	i --;
355       } while (i > 0);
356     }
357 
358     i = ((k2 - k1) & 1);
359 
360     if (i > 0) {
361       A1 = *(a1 + 0);
362       A2 = *(a1 + 1);
363       B1 = *(b1 + 0);
364       B2 = *(b1 + 1);
365 
366       if (a1 == b1) {
367 	*(buffer + 0) = A1;
368 	*(buffer + 1) = A2;
369       } else {
370 	*(buffer + 0) = B1;
371 	*(buffer + 1) = B2;
372 	*(b1 + 0) = A1;
373 	*(b1 + 1) = A2;
374       }
375       // buffer += 2;
376     }
377   }
378 
379   return 0;
380 }
381 
382