1 /*
2 
3    BLIS
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
6 
7    Copyright (C) 2014, The University of Texas at Austin
8 
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions are
11    met:
12     - Redistributions of source code must retain the above copyright
13       notice, this list of conditions and the following disclaimer.
14     - Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17     - Neither the name(s) of the copyright holder(s) nor the names of its
18       contributors may be used to endorse or promote products derived
19       from this software without specific prior written permission.
20 
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 */
34 
35 #include "blis.h"
36 
37 #undef  GENTFUNC
38 #define GENTFUNC( ctype, ch, opname, arch, suf ) \
39 \
40 void PASTEMAC3(ch,opname,arch,suf) \
41      ( \
42        conj_t           conjp, \
43        dim_t            n, \
44        void*   restrict kappa, \
45        void*   restrict p,             inc_t ldp, \
46        void*   restrict a, inc_t inca, inc_t lda, \
47        cntx_t* restrict cntx  \
48      ) \
49 { \
50 	ctype* restrict kappa_cast = kappa; \
51 	ctype* restrict pi1        = p; \
52 	ctype* restrict alpha1     = a; \
53 \
54 	if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
55 	{ \
56 		if ( bli_is_conj( conjp ) ) \
57 		{ \
58 			for ( ; n != 0; --n ) \
59 			{ \
60 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
61 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
62 \
63 				pi1    += ldp; \
64 				alpha1 += lda; \
65 			} \
66 		} \
67 		else \
68 		{ \
69 			for ( ; n != 0; --n ) \
70 			{ \
71 				PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
72 				PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
73 \
74 				pi1    += ldp; \
75 				alpha1 += lda; \
76 			} \
77 		} \
78 	} \
79 	else \
80 	{ \
81 		if ( bli_is_conj( conjp ) ) \
82 		{ \
83 			for ( ; n != 0; --n ) \
84 			{ \
85 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
86 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
87 \
88 				pi1    += ldp; \
89 				alpha1 += lda; \
90 			} \
91 		} \
92 		else \
93 		{ \
94 			for ( ; n != 0; --n ) \
95 			{ \
96 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
97 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
98 \
99 				pi1    += ldp; \
100 				alpha1 += lda; \
101 			} \
102 		} \
103 	} \
104 }
105 
106 INSERT_GENTFUNC_BASIC2( unpackm_2xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX )
107 
108 
109 
110 
111 #undef  GENTFUNC
112 #define GENTFUNC( ctype, ch, opname, arch, suf ) \
113 \
114 void PASTEMAC3(ch,opname,arch,suf) \
115      ( \
116        conj_t           conjp, \
117        dim_t            n, \
118        void*   restrict kappa, \
119        void*   restrict p,             inc_t ldp, \
120        void*   restrict a, inc_t inca, inc_t lda, \
121        cntx_t* restrict cntx  \
122      ) \
123 { \
124 	ctype* restrict kappa_cast = kappa; \
125 	ctype* restrict pi1        = p; \
126 	ctype* restrict alpha1     = a; \
127 \
128 	if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
129 	{ \
130 		if ( bli_is_conj( conjp ) ) \
131 		{ \
132 			for ( ; n != 0; --n ) \
133 			{ \
134 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
135 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
136 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
137 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
138 \
139 				pi1    += ldp; \
140 				alpha1 += lda; \
141 			} \
142 		} \
143 		else \
144 		{ \
145 			for ( ; n != 0; --n ) \
146 			{ \
147 				PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
148 				PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
149 				PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
150 				PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
151 \
152 				pi1    += ldp; \
153 				alpha1 += lda; \
154 			} \
155 		} \
156 	} \
157 	else \
158 	{ \
159 		if ( bli_is_conj( conjp ) ) \
160 		{ \
161 			for ( ; n != 0; --n ) \
162 			{ \
163 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
164 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
165 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
166 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
167 \
168 				pi1    += ldp; \
169 				alpha1 += lda; \
170 			} \
171 		} \
172 		else \
173 		{ \
174 			for ( ; n != 0; --n ) \
175 			{ \
176 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
177 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
178 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
179 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
180 \
181 				pi1    += ldp; \
182 				alpha1 += lda; \
183 			} \
184 		} \
185 	} \
186 }
187 
188 INSERT_GENTFUNC_BASIC2( unpackm_4xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX )
189 
190 
191 
192 
193 #undef  GENTFUNC
194 #define GENTFUNC( ctype, ch, opname, arch, suf ) \
195 \
196 void PASTEMAC3(ch,opname,arch,suf) \
197      ( \
198        conj_t           conjp, \
199        dim_t            n, \
200        void*   restrict kappa, \
201        void*   restrict p,             inc_t ldp, \
202        void*   restrict a, inc_t inca, inc_t lda, \
203        cntx_t* restrict cntx  \
204      ) \
205 { \
206 	ctype* restrict kappa_cast = kappa; \
207 	ctype* restrict pi1        = p; \
208 	ctype* restrict alpha1     = a; \
209 \
210 	if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
211 	{ \
212 		if ( bli_is_conj( conjp ) ) \
213 		{ \
214 			for ( ; n != 0; --n ) \
215 			{ \
216 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
217 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
218 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
219 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
220 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
221 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
222 \
223 				pi1    += ldp; \
224 				alpha1 += lda; \
225 			} \
226 		} \
227 		else \
228 		{ \
229 			for ( ; n != 0; --n ) \
230 			{ \
231 				PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
232 				PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
233 				PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
234 				PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
235 				PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
236 				PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
237 \
238 				pi1    += ldp; \
239 				alpha1 += lda; \
240 			} \
241 		} \
242 	} \
243 	else \
244 	{ \
245 		if ( bli_is_conj( conjp ) ) \
246 		{ \
247 			for ( ; n != 0; --n ) \
248 			{ \
249 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
250 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
251 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
252 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
253 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
254 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
255 \
256 				pi1    += ldp; \
257 				alpha1 += lda; \
258 			} \
259 		} \
260 		else \
261 		{ \
262 			for ( ; n != 0; --n ) \
263 			{ \
264 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
265 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
266 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
267 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
268 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
269 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
270 \
271 				pi1    += ldp; \
272 				alpha1 += lda; \
273 			} \
274 		} \
275 	} \
276 }
277 
278 INSERT_GENTFUNC_BASIC2( unpackm_6xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX )
279 
280 
281 
282 
283 #undef  GENTFUNC
284 #define GENTFUNC( ctype, ch, opname, arch, suf ) \
285 \
286 void PASTEMAC3(ch,opname,arch,suf) \
287      ( \
288        conj_t           conjp, \
289        dim_t            n, \
290        void*   restrict kappa, \
291        void*   restrict p,             inc_t ldp, \
292        void*   restrict a, inc_t inca, inc_t lda, \
293        cntx_t* restrict cntx  \
294      ) \
295 { \
296 	ctype* restrict kappa_cast = kappa; \
297 	ctype* restrict pi1        = p; \
298 	ctype* restrict alpha1     = a; \
299 \
300 	if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
301 	{ \
302 		if ( bli_is_conj( conjp ) ) \
303 		{ \
304 			for ( ; n != 0; --n ) \
305 			{ \
306 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
307 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
308 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
309 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
310 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
311 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
312 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
313 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
314 \
315 				pi1    += ldp; \
316 				alpha1 += lda; \
317 			} \
318 		} \
319 		else \
320 		{ \
321 			for ( ; n != 0; --n ) \
322 			{ \
323 				PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
324 				PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
325 				PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
326 				PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
327 				PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
328 				PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
329 				PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
330 				PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
331 \
332 				pi1    += ldp; \
333 				alpha1 += lda; \
334 			} \
335 		} \
336 	} \
337 	else \
338 	{ \
339 		if ( bli_is_conj( conjp ) ) \
340 		{ \
341 			for ( ; n != 0; --n ) \
342 			{ \
343 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
344 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
345 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
346 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
347 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
348 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
349 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
350 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
351 \
352 				pi1    += ldp; \
353 				alpha1 += lda; \
354 			} \
355 		} \
356 		else \
357 		{ \
358 			for ( ; n != 0; --n ) \
359 			{ \
360 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
361 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
362 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
363 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
364 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
365 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
366 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
367 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
368 \
369 				pi1    += ldp; \
370 				alpha1 += lda; \
371 			} \
372 		} \
373 	} \
374 }
375 
376 INSERT_GENTFUNC_BASIC2( unpackm_8xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX )
377 
378 
379 
380 
381 #undef  GENTFUNC
382 #define GENTFUNC( ctype, ch, opname, arch, suf ) \
383 \
384 void PASTEMAC3(ch,opname,arch,suf) \
385      ( \
386        conj_t           conjp, \
387        dim_t            n, \
388        void*   restrict kappa, \
389        void*   restrict p,             inc_t ldp, \
390        void*   restrict a, inc_t inca, inc_t lda, \
391        cntx_t* restrict cntx  \
392      ) \
393 { \
394 	ctype* restrict kappa_cast = kappa; \
395 	ctype* restrict pi1        = p; \
396 	ctype* restrict alpha1     = a; \
397 \
398 	if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
399 	{ \
400 		if ( bli_is_conj( conjp ) ) \
401 		{ \
402 			for ( ; n != 0; --n ) \
403 			{ \
404 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
405 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
406 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
407 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
408 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
409 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
410 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
411 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
412 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
413 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
414 \
415 				pi1    += ldp; \
416 				alpha1 += lda; \
417 			} \
418 		} \
419 		else \
420 		{ \
421 			for ( ; n != 0; --n ) \
422 			{ \
423 				PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
424 				PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
425 				PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
426 				PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
427 				PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
428 				PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
429 				PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
430 				PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
431 				PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
432 				PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
433 \
434 				pi1    += ldp; \
435 				alpha1 += lda; \
436 			} \
437 		} \
438 	} \
439 	else \
440 	{ \
441 		if ( bli_is_conj( conjp ) ) \
442 		{ \
443 			for ( ; n != 0; --n ) \
444 			{ \
445 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
446 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
447 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
448 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
449 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
450 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
451 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
452 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
453 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
454 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
455 \
456 				pi1    += ldp; \
457 				alpha1 += lda; \
458 			} \
459 		} \
460 		else \
461 		{ \
462 			for ( ; n != 0; --n ) \
463 			{ \
464 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
465 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
466 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
467 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
468 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
469 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
470 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
471 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
472 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
473 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
474 \
475 				pi1    += ldp; \
476 				alpha1 += lda; \
477 			} \
478 		} \
479 	} \
480 }
481 
482 INSERT_GENTFUNC_BASIC2( unpackm_10xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX )
483 
484 
485 
486 
487 #undef  GENTFUNC
488 #define GENTFUNC( ctype, ch, opname, arch, suf ) \
489 \
490 void PASTEMAC3(ch,opname,arch,suf) \
491      ( \
492        conj_t           conjp, \
493        dim_t            n, \
494        void*   restrict kappa, \
495        void*   restrict p,             inc_t ldp, \
496        void*   restrict a, inc_t inca, inc_t lda, \
497        cntx_t* restrict cntx  \
498      ) \
499 { \
500 	ctype* restrict kappa_cast = kappa; \
501 	ctype* restrict pi1        = p; \
502 	ctype* restrict alpha1     = a; \
503 \
504 	if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
505 	{ \
506 		if ( bli_is_conj( conjp ) ) \
507 		{ \
508 			for ( ; n != 0; --n ) \
509 			{ \
510 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
511 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
512 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
513 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
514 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
515 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
516 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
517 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
518 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
519 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
520 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
521 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
522 \
523 				pi1    += ldp; \
524 				alpha1 += lda; \
525 			} \
526 		} \
527 		else \
528 		{ \
529 			for ( ; n != 0; --n ) \
530 			{ \
531 				PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
532 				PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
533 				PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
534 				PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
535 				PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
536 				PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
537 				PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
538 				PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
539 				PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
540 				PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
541 				PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
542 				PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
543 \
544 				pi1    += ldp; \
545 				alpha1 += lda; \
546 			} \
547 		} \
548 	} \
549 	else \
550 	{ \
551 		if ( bli_is_conj( conjp ) ) \
552 		{ \
553 			for ( ; n != 0; --n ) \
554 			{ \
555 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
556 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
557 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
558 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
559 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
560 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
561 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
562 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
563 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
564 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
565 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
566 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
567 \
568 				pi1    += ldp; \
569 				alpha1 += lda; \
570 			} \
571 		} \
572 		else \
573 		{ \
574 			for ( ; n != 0; --n ) \
575 			{ \
576 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
577 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
578 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
579 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
580 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
581 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
582 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
583 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
584 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
585 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
586 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
587 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
588 \
589 				pi1    += ldp; \
590 				alpha1 += lda; \
591 			} \
592 		} \
593 	} \
594 }
595 
596 INSERT_GENTFUNC_BASIC2( unpackm_12xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX )
597 
598 
599 
600 
601 #undef  GENTFUNC
602 #define GENTFUNC( ctype, ch, opname, arch, suf ) \
603 \
604 void PASTEMAC3(ch,opname,arch,suf) \
605      ( \
606        conj_t           conjp, \
607        dim_t            n, \
608        void*   restrict kappa, \
609        void*   restrict p,             inc_t ldp, \
610        void*   restrict a, inc_t inca, inc_t lda, \
611        cntx_t* restrict cntx  \
612      ) \
613 { \
614 	ctype* restrict kappa_cast = kappa; \
615 	ctype* restrict pi1        = p; \
616 	ctype* restrict alpha1     = a; \
617 \
618 	if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
619 	{ \
620 		if ( bli_is_conj( conjp ) ) \
621 		{ \
622 			for ( ; n != 0; --n ) \
623 			{ \
624 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
625 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
626 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
627 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
628 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
629 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
630 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
631 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
632 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
633 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
634 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
635 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
636 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 12), *(alpha1 + 12*inca) ); \
637 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 13), *(alpha1 + 13*inca) ); \
638 \
639 				pi1    += ldp; \
640 				alpha1 += lda; \
641 			} \
642 		} \
643 		else \
644 		{ \
645 			for ( ; n != 0; --n ) \
646 			{ \
647 				PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
648 				PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
649 				PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
650 				PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
651 				PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
652 				PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
653 				PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
654 				PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
655 				PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
656 				PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
657 				PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
658 				PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
659 				PASTEMAC2(ch,ch,copys)( *(pi1 + 12), *(alpha1 + 12*inca) ); \
660 				PASTEMAC2(ch,ch,copys)( *(pi1 + 13), *(alpha1 + 13*inca) ); \
661 \
662 				pi1    += ldp; \
663 				alpha1 += lda; \
664 			} \
665 		} \
666 	} \
667 	else \
668 	{ \
669 		if ( bli_is_conj( conjp ) ) \
670 		{ \
671 			for ( ; n != 0; --n ) \
672 			{ \
673 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
674 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
675 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
676 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
677 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
678 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
679 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
680 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
681 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
682 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
683 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
684 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
685 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
686 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
687 \
688 				pi1    += ldp; \
689 				alpha1 += lda; \
690 			} \
691 		} \
692 		else \
693 		{ \
694 			for ( ; n != 0; --n ) \
695 			{ \
696 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
697 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
698 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
699 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
700 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
701 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
702 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
703 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
704 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
705 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
706 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
707 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
708 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
709 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
710 \
711 				pi1    += ldp; \
712 				alpha1 += lda; \
713 			} \
714 		} \
715 	} \
716 }
717 
718 INSERT_GENTFUNC_BASIC2( unpackm_14xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX )
719 
720 
721 
722 
723 #undef  GENTFUNC
724 #define GENTFUNC( ctype, ch, opname, arch, suf ) \
725 \
726 void PASTEMAC3(ch,opname,arch,suf) \
727      ( \
728        conj_t           conjp, \
729        dim_t            n, \
730        void*   restrict kappa, \
731        void*   restrict p,             inc_t ldp, \
732        void*   restrict a, inc_t inca, inc_t lda, \
733        cntx_t* restrict cntx  \
734      ) \
735 { \
736 	ctype* restrict kappa_cast = kappa; \
737 	ctype* restrict pi1        = p; \
738 	ctype* restrict alpha1     = a; \
739 \
740 	if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \
741 	{ \
742 		if ( bli_is_conj( conjp ) ) \
743 		{ \
744 			for ( ; n != 0; --n ) \
745 			{ \
746 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
747 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
748 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
749 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
750 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
751 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
752 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
753 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
754 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
755 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
756 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
757 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
758 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 12), *(alpha1 + 12*inca) ); \
759 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 13), *(alpha1 + 13*inca) ); \
760 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 14), *(alpha1 + 14*inca) ); \
761 				PASTEMAC2(ch,ch,copyjs)( *(pi1 + 15), *(alpha1 + 15*inca) ); \
762 \
763 				pi1    += ldp; \
764 				alpha1 += lda; \
765 			} \
766 		} \
767 		else \
768 		{ \
769 			for ( ; n != 0; --n ) \
770 			{ \
771 				PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
772 				PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
773 				PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
774 				PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
775 				PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
776 				PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
777 				PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
778 				PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
779 				PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
780 				PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
781 				PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
782 				PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
783 				PASTEMAC2(ch,ch,copys)( *(pi1 + 12), *(alpha1 + 12*inca) ); \
784 				PASTEMAC2(ch,ch,copys)( *(pi1 + 13), *(alpha1 + 13*inca) ); \
785 				PASTEMAC2(ch,ch,copys)( *(pi1 + 14), *(alpha1 + 14*inca) ); \
786 				PASTEMAC2(ch,ch,copys)( *(pi1 + 15), *(alpha1 + 15*inca) ); \
787 \
788 				pi1    += ldp; \
789 				alpha1 += lda; \
790 			} \
791 		} \
792 	} \
793 	else \
794 	{ \
795 		if ( bli_is_conj( conjp ) ) \
796 		{ \
797 			for ( ; n != 0; --n ) \
798 			{ \
799 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
800 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
801 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
802 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
803 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
804 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
805 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
806 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
807 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
808 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
809 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
810 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
811 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
812 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
813 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \
814 				PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \
815 \
816 				pi1    += ldp; \
817 				alpha1 += lda; \
818 			} \
819 		} \
820 		else \
821 		{ \
822 			for ( ; n != 0; --n ) \
823 			{ \
824 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
825 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
826 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
827 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
828 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
829 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
830 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
831 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
832 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
833 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
834 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
835 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
836 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
837 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
838 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \
839 				PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \
840 \
841 				pi1    += ldp; \
842 				alpha1 += lda; \
843 			} \
844 		} \
845 	} \
846 }
847 
848 INSERT_GENTFUNC_BASIC2( unpackm_16xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX )
849 
850