1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "blis1.h"
12 
bl1_sher2k(uplo1_t uplo,trans1_t trans,int m,int k,float * alpha,float * a,int a_rs,int a_cs,float * b,int b_rs,int b_cs,float * beta,float * c,int c_rs,int c_cs)13 void bl1_sher2k( uplo1_t uplo, trans1_t trans, int m, int k, float* alpha, float* a, int a_rs, int a_cs, float* b, int b_rs, int b_cs, float* beta, float* c, int c_rs, int c_cs )
14 {
15 	bl1_ssyr2k( uplo,
16 	            trans,
17 	            m,
18 	            k,
19 	            alpha,
20 	            a, a_rs, a_cs,
21 	            b, b_rs, b_cs,
22 	            beta,
23 	            c, c_rs, c_cs );
24 }
25 
bl1_dher2k(uplo1_t uplo,trans1_t trans,int m,int k,double * alpha,double * a,int a_rs,int a_cs,double * b,int b_rs,int b_cs,double * beta,double * c,int c_rs,int c_cs)26 void bl1_dher2k( uplo1_t uplo, trans1_t trans, int m, int k, double* alpha, double* a, int a_rs, int a_cs, double* b, int b_rs, int b_cs, double* beta, double* c, int c_rs, int c_cs )
27 {
28 	bl1_dsyr2k( uplo,
29 	            trans,
30 	            m,
31 	            k,
32 	            alpha,
33 	            a, a_rs, a_cs,
34 	            b, b_rs, b_cs,
35 	            beta,
36 	            c, c_rs, c_cs );
37 }
38 
bl1_cher2k(uplo1_t uplo,trans1_t trans,int m,int k,scomplex * alpha,scomplex * a,int a_rs,int a_cs,scomplex * b,int b_rs,int b_cs,float * beta,scomplex * c,int c_rs,int c_cs)39 void bl1_cher2k( uplo1_t uplo, trans1_t trans, int m, int k, scomplex* alpha, scomplex* a, int a_rs, int a_cs, scomplex* b, int b_rs, int b_cs, float* beta, scomplex* c, int c_rs, int c_cs )
40 {
41 	uplo1_t    uplo_save = uplo;
42 	int       m_save    = m;
43 	scomplex* a_save    = a;
44 	scomplex* b_save    = b;
45 	scomplex* c_save    = c;
46 	int       a_rs_save = a_rs;
47 	int       a_cs_save = a_cs;
48 	int       b_rs_save = b_rs;
49 	int       b_cs_save = b_cs;
50 	int       c_rs_save = c_rs;
51 	int       c_cs_save = c_cs;
52 	float     zero_r = bl1_s0();
53 	scomplex  one    = bl1_c1();
54 	scomplex  alpha_copy;
55 	scomplex* a_copy;
56 	scomplex* b_copy;
57 	scomplex* c_conj;
58 	int       lda, inca;
59 	int       ldb, incb;
60 	int       ldc, incc;
61 	int       lda_copy, inca_copy;
62 	int       ldb_copy, incb_copy;
63 	int       ldc_conj, incc_conj;
64 	int       her2k_needs_copya      = FALSE;
65 	int       her2k_needs_copyb      = FALSE;
66 	int       her2k_needs_conj       = FALSE;
67 	int       her2k_needs_alpha_conj = FALSE;
68 
69 	// Return early if possible.
70 	if ( bl1_zero_dim2( m, k ) ) return;
71 
72 	// If necessary, allocate, initialize, and use a temporary contiguous
73 	// copy of each matrix rather than the original matrices.
74 	bl1_ccreate_contigmt( trans,
75 	                      m,
76 	                      k,
77 	                      a_save, a_rs_save, a_cs_save,
78 	                      &a,     &a_rs,     &a_cs );
79 
80 	bl1_ccreate_contigmt( trans,
81 	                      m,
82 	                      k,
83 	                      b_save, b_rs_save, b_cs_save,
84 	                      &b,     &b_rs,     &b_cs );
85 
86 	bl1_ccreate_contigmr( uplo,
87 	                      m,
88 	                      m,
89 	                      c_save, c_rs_save, c_cs_save,
90 	                      &c,     &c_rs,     &c_cs );
91 
92 	// Initialize with values assuming column-major storage.
93 	lda  = a_cs;
94 	inca = a_rs;
95 	ldb  = b_cs;
96 	incb = b_rs;
97 	ldc  = c_cs;
98 	incc = c_rs;
99 
100 	// Adjust the parameters based on the storage of each matrix.
101 	if ( bl1_is_col_storage( c_rs, c_cs ) )
102 	{
103 		if ( bl1_is_col_storage( a_rs, a_cs ) )
104 		{
105 			if ( bl1_is_col_storage( b_rs, b_cs ) )
106 			{
107 				// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
108 				// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
109 			}
110 			else // if ( bl1_is_row_storage( b_rs, b_cs ) )
111 			{
112 				// requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
113 				// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
114 				her2k_needs_copyb = TRUE;
115 			}
116 		}
117 		else // if ( bl1_is_row_storage( a_rs, a_cs ) )
118 		{
119 			if ( bl1_is_col_storage( b_rs, b_cs ) )
120 			{
121 				// requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
122 				// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
123 				her2k_needs_copya = TRUE;
124 			}
125 			else // if ( bl1_is_row_storage( b_rs, b_cs ) )
126 			{
127 				// requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
128 				// requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
129 				bl1_swap_ints( lda, inca );
130 				bl1_swap_ints( ldb, incb );
131 
132 				bl1_toggle_conjtrans( trans );
133 
134 				her2k_needs_conj       = TRUE;
135 				her2k_needs_alpha_conj = TRUE;
136 			}
137 		}
138 	}
139 	else // if ( bl1_is_row_storage( c_rs, c_cs ) )
140 	{
141 		if ( bl1_is_col_storage( a_rs, a_cs ) )
142 		{
143 			if ( bl1_is_col_storage( b_rs, b_cs ) )
144 			{
145 				// requested operation:  uplo( C_r ) += A_c * B_c' + B_c * A_c'
146 				// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
147 				bl1_swap_ints( ldc, incc );
148 
149 				bl1_toggle_uplo( uplo );
150 
151 				her2k_needs_conj = TRUE;
152 			}
153 			else // if ( bl1_is_row_storage( b_rs, b_cs ) )
154 			{
155 				// requested operation:  uplo( C_r ) += A_c * B_r' + B_r * A_c'
156 				// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
157 				her2k_needs_copyb = TRUE;
158 
159 				bl1_swap_ints( ldc, incc );
160 
161 				bl1_toggle_uplo( uplo );
162 
163 				her2k_needs_conj = TRUE;
164 			}
165 		}
166 		else // if ( bl1_is_row_storage( a_rs, a_cs ) )
167 		{
168 			if ( bl1_is_col_storage( b_rs, b_cs ) )
169 			{
170 				// requested operation:  uplo( C_r ) += A_r * B_c' + B_c * A_r'
171 				// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
172 				her2k_needs_copya = TRUE;
173 
174 				bl1_swap_ints( ldc, incc );
175 
176 				bl1_toggle_uplo( uplo );
177 
178 				her2k_needs_conj = TRUE;
179 			}
180 			else // if ( bl1_is_row_storage( b_rs, b_cs ) )
181 			{
182 				// requested operation:  uplo( C_r ) += A_r * B_r' + B_r * A_r'
183 				// requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
184 				bl1_swap_ints( ldc, incc );
185 				bl1_swap_ints( lda, inca );
186 				bl1_swap_ints( ldb, incb );
187 
188 				bl1_toggle_uplo( uplo );
189 				bl1_toggle_conjtrans( trans );
190 
191 				her2k_needs_alpha_conj = TRUE;
192 			}
193 		}
194 	}
195 
196 	// Make a copy of alpha and conjugate if necessary.
197 	alpha_copy = *alpha;
198 	if ( her2k_needs_alpha_conj )
199 	{
200 		bl1_zconjs( &alpha_copy );
201 	}
202 
203 	a_copy    = a;
204 	lda_copy  = lda;
205 	inca_copy = inca;
206 
207 	// There are two cases where we need to copy A column-major storage.
208 	// We handle those two cases here.
209 	if ( her2k_needs_copya )
210 	{
211 		int m_a;
212 		int n_a;
213 
214 		// Determine the dimensions of A according to the value of trans. We
215 		// need this in order to set the leading dimension of the copy of A.
216 		bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );
217 
218 		// We need a temporary matrix to hold a column-major copy of A.
219 		a_copy    = bl1_callocm( m, k );
220 		lda_copy  = m_a;
221 		inca_copy = 1;
222 
223 		// Copy the contents of A into A_copy.
224 		bl1_ccopymt( BLIS1_NO_TRANSPOSE,
225                      m_a,
226                      n_a,
227 		             a,      inca,      lda,
228 		             a_copy, inca_copy, lda_copy );
229 	}
230 
231 	b_copy    = b;
232 	ldb_copy  = ldb;
233 	incb_copy = incb;
234 
235 	// There are two cases where we need to copy B column-major storage.
236 	// We handle those two cases here.
237 	if ( her2k_needs_copyb )
238 	{
239 		int m_b;
240 		int n_b;
241 
242 		// Determine the dimensions of B according to the value of trans. We
243 		// need this in order to set the leading dimension of the copy of B.
244 		bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );
245 
246 		// We need a temporary matrix to hold a column-major copy of B.
247 		b_copy    = bl1_callocm( m, k );
248 		ldb_copy  = m_b;
249 		incb_copy = 1;
250 
251 		// Copy the contents of B into B_copy.
252 		bl1_ccopymt( BLIS1_NO_TRANSPOSE,
253                      m_b,
254                      n_b,
255 		             b,      incb,      ldb,
256 		             b_copy, incb_copy, ldb_copy );
257 	}
258 
259 	// There are two cases where we need to perform the rank-2k product and
260 	// then axpy the result into C with a conjugation. We handle those two
261 	// cases here.
262 	if ( her2k_needs_conj )
263 	{
264 		// We need a temporary matrix for holding the rank-k product.
265 		c_conj    = bl1_callocm( m, m );
266 		ldc_conj  = m;
267 		incc_conj = 1;
268 
269 		// Compute the rank-2k product.
270 		bl1_cher2k_blas( uplo,
271 		                 trans,
272 		                 m,
273 		                 k,
274 		                 &alpha_copy,
275 		                 a_copy, lda_copy,
276 		                 b_copy, ldb_copy,
277 		                 &zero_r,
278 		                 c_conj, ldc_conj );
279 
280 		// Scale C by beta.
281 		bl1_csscalmr( uplo,
282 		              m,
283 		              m,
284 		              beta,
285 		              c, incc, ldc );
286 
287 		// And finally, accumulate the rank-2k product in C_conj into C
288 		// with a conjugation.
289 		bl1_caxpymrt( uplo,
290 		              BLIS1_CONJ_NO_TRANSPOSE,
291 		              m,
292 		              m,
293 		              &one,
294 		              c_conj, incc_conj, ldc_conj,
295 		              c,      incc,      ldc );
296 
297 		// Free the temporary matrix for C.
298 		bl1_cfree( c_conj );
299 	}
300 	else
301 	{
302 		bl1_cher2k_blas( uplo,
303 		                 trans,
304 		                 m,
305 		                 k,
306 		                 &alpha_copy,
307 		                 a_copy, lda_copy,
308 		                 b_copy, ldb_copy,
309 		                 beta,
310 		                 c, ldc );
311 	}
312 
313 	if ( her2k_needs_copya )
314 		bl1_cfree( a_copy );
315 
316 	if ( her2k_needs_copyb )
317 		bl1_cfree( b_copy );
318 
319 	// Free any temporary contiguous matrices, copying the result back to
320 	// the original matrix.
321 	bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
322 	                   &a,     &a_rs,     &a_cs );
323 
324 	bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
325 	                   &b,     &b_rs,     &b_cs );
326 
327 	bl1_cfree_saved_contigmr( uplo_save,
328 	                          m_save,
329 	                          m_save,
330 	                          c_save, c_rs_save, c_cs_save,
331 	                          &c,     &c_rs,     &c_cs );
332 }
333 
bl1_zher2k(uplo1_t uplo,trans1_t trans,int m,int k,dcomplex * alpha,dcomplex * a,int a_rs,int a_cs,dcomplex * b,int b_rs,int b_cs,double * beta,dcomplex * c,int c_rs,int c_cs)334 void bl1_zher2k( uplo1_t uplo, trans1_t trans, int m, int k, dcomplex* alpha, dcomplex* a, int a_rs, int a_cs, dcomplex* b, int b_rs, int b_cs, double* beta, dcomplex* c, int c_rs, int c_cs )
335 {
336 	uplo1_t    uplo_save = uplo;
337 	int       m_save    = m;
338 	dcomplex* a_save    = a;
339 	dcomplex* b_save    = b;
340 	dcomplex* c_save    = c;
341 	int       a_rs_save = a_rs;
342 	int       a_cs_save = a_cs;
343 	int       b_rs_save = b_rs;
344 	int       b_cs_save = b_cs;
345 	int       c_rs_save = c_rs;
346 	int       c_cs_save = c_cs;
347 	double    zero_r = bl1_d0();
348 	dcomplex  one    = bl1_z1();
349 	dcomplex  alpha_copy;
350 	dcomplex* a_copy;
351 	dcomplex* b_copy;
352 	dcomplex* c_conj;
353 	int       lda, inca;
354 	int       ldb, incb;
355 	int       ldc, incc;
356 	int       lda_copy, inca_copy;
357 	int       ldb_copy, incb_copy;
358 	int       ldc_conj, incc_conj;
359 	int       her2k_needs_copya      = FALSE;
360 	int       her2k_needs_copyb      = FALSE;
361 	int       her2k_needs_conj       = FALSE;
362 	int       her2k_needs_alpha_conj = FALSE;
363 
364 	// Return early if possible.
365 	if ( bl1_zero_dim2( m, k ) ) return;
366 
367 	// If necessary, allocate, initialize, and use a temporary contiguous
368 	// copy of each matrix rather than the original matrices.
369 	bl1_zcreate_contigmt( trans,
370 	                      m,
371 	                      k,
372 	                      a_save, a_rs_save, a_cs_save,
373 	                      &a,     &a_rs,     &a_cs );
374 
375 	bl1_zcreate_contigmt( trans,
376 	                      m,
377 	                      k,
378 	                      b_save, b_rs_save, b_cs_save,
379 	                      &b,     &b_rs,     &b_cs );
380 
381 	bl1_zcreate_contigmr( uplo,
382 	                      m,
383 	                      m,
384 	                      c_save, c_rs_save, c_cs_save,
385 	                      &c,     &c_rs,     &c_cs );
386 
387 	// Initialize with values assuming column-major storage.
388 	lda  = a_cs;
389 	inca = a_rs;
390 	ldb  = b_cs;
391 	incb = b_rs;
392 	ldc  = c_cs;
393 	incc = c_rs;
394 
395 	// Adjust the parameters based on the storage of each matrix.
396 	if ( bl1_is_col_storage( c_rs, c_cs ) )
397 	{
398 		if ( bl1_is_col_storage( a_rs, a_cs ) )
399 		{
400 			if ( bl1_is_col_storage( b_rs, b_cs ) )
401 			{
402 				// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
403 				// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
404 			}
405 			else // if ( bl1_is_row_storage( b_rs, b_cs ) )
406 			{
407 				// requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
408 				// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
409 				her2k_needs_copyb = TRUE;
410 			}
411 		}
412 		else // if ( bl1_is_row_storage( a_rs, a_cs ) )
413 		{
414 			if ( bl1_is_col_storage( b_rs, b_cs ) )
415 			{
416 				// requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
417 				// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
418 				her2k_needs_copya = TRUE;
419 			}
420 			else // if ( bl1_is_row_storage( b_rs, b_cs ) )
421 			{
422 				// requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
423 				// requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
424 				bl1_swap_ints( lda, inca );
425 				bl1_swap_ints( ldb, incb );
426 
427 				bl1_toggle_conjtrans( trans );
428 
429 				her2k_needs_conj       = TRUE;
430 				her2k_needs_alpha_conj = TRUE;
431 			}
432 		}
433 	}
434 	else // if ( bl1_is_row_storage( c_rs, c_cs ) )
435 	{
436 		if ( bl1_is_col_storage( a_rs, a_cs ) )
437 		{
438 			if ( bl1_is_col_storage( b_rs, b_cs ) )
439 			{
440 				// requested operation:  uplo( C_r ) += A_c * B_c' + B_c * A_c'
441 				// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
442 				bl1_swap_ints( ldc, incc );
443 
444 				bl1_toggle_uplo( uplo );
445 
446 				her2k_needs_conj = TRUE;
447 			}
448 			else // if ( bl1_is_row_storage( b_rs, b_cs ) )
449 			{
450 				// requested operation:  uplo( C_r ) += A_c * B_r' + B_r * A_c'
451 				// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
452 				her2k_needs_copyb = TRUE;
453 
454 				bl1_swap_ints( ldc, incc );
455 
456 				bl1_toggle_uplo( uplo );
457 
458 				her2k_needs_conj = TRUE;
459 			}
460 		}
461 		else // if ( bl1_is_row_storage( a_rs, a_cs ) )
462 		{
463 			if ( bl1_is_col_storage( b_rs, b_cs ) )
464 			{
465 				// requested operation:  uplo( C_r ) += A_r * B_c' + B_c * A_r'
466 				// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
467 				her2k_needs_copya = TRUE;
468 
469 				bl1_swap_ints( ldc, incc );
470 
471 				bl1_toggle_uplo( uplo );
472 
473 				her2k_needs_conj = TRUE;
474 			}
475 			else // if ( bl1_is_row_storage( b_rs, b_cs ) )
476 			{
477 				// requested operation:  uplo( C_r ) += A_r * B_r' + B_r * A_r'
478 				// requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
479 				bl1_swap_ints( ldc, incc );
480 				bl1_swap_ints( lda, inca );
481 				bl1_swap_ints( ldb, incb );
482 
483 				bl1_toggle_uplo( uplo );
484 				bl1_toggle_conjtrans( trans );
485 
486 				her2k_needs_alpha_conj = TRUE;
487 			}
488 		}
489 	}
490 
491 	// Make a copy of alpha and conjugate if necessary.
492 	alpha_copy = *alpha;
493 	if ( her2k_needs_alpha_conj )
494 	{
495 		bl1_zconjs( &alpha_copy );
496 	}
497 
498 	a_copy    = a;
499 	lda_copy  = lda;
500 	inca_copy = inca;
501 
502 	// There are two cases where we need to copy A column-major storage.
503 	// We handle those two cases here.
504 	if ( her2k_needs_copya )
505 	{
506 		int m_a;
507 		int n_a;
508 
509 		// Determine the dimensions of A according to the value of trans. We
510 		// need this in order to set the leading dimension of the copy of A.
511 		bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );
512 
513 		// We need a temporary matrix to hold a column-major copy of A.
514 		a_copy    = bl1_zallocm( m, k );
515 		lda_copy  = m_a;
516 		inca_copy = 1;
517 
518 		// Copy the contents of A into A_copy.
519 		bl1_zcopymt( BLIS1_NO_TRANSPOSE,
520                      m_a,
521                      n_a,
522 		             a,      inca,      lda,
523 		             a_copy, inca_copy, lda_copy );
524 	}
525 
526 	b_copy    = b;
527 	ldb_copy  = ldb;
528 	incb_copy = incb;
529 
530 	// There are two cases where we need to copy B column-major storage.
531 	// We handle those two cases here.
532 	if ( her2k_needs_copyb )
533 	{
534 		int m_b;
535 		int n_b;
536 
537 		// Determine the dimensions of B according to the value of trans. We
538 		// need this in order to set the leading dimension of the copy of B.
539 		bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );
540 
541 		// We need a temporary matrix to hold a column-major copy of B.
542 		b_copy    = bl1_zallocm( m, k );
543 		ldb_copy  = m_b;
544 		incb_copy = 1;
545 
546 		// Copy the contents of B into B_copy.
547 		bl1_zcopymt( BLIS1_NO_TRANSPOSE,
548                      m_b,
549                      n_b,
550 		             b,      incb,      ldb,
551 		             b_copy, incb_copy, ldb_copy );
552 	}
553 
554 	// There are two cases where we need to perform the rank-2k product and
555 	// then axpy the result into C with a conjugation. We handle those two
556 	// cases here.
557 	if ( her2k_needs_conj )
558 	{
559 		// We need a temporary matrix for holding the rank-k product.
560 		c_conj    = bl1_zallocm( m, m );
561 		ldc_conj  = m;
562 		incc_conj = 1;
563 
564 		// Compute the rank-2k product.
565 		bl1_zher2k_blas( uplo,
566 		                 trans,
567 		                 m,
568 		                 k,
569 		                 &alpha_copy,
570 		                 a_copy, lda_copy,
571 		                 b_copy, ldb_copy,
572 		                 &zero_r,
573 		                 c_conj, ldc_conj );
574 
575 		// Scale C by beta.
576 		bl1_zdscalmr( uplo,
577 		              m,
578 		              m,
579 		              beta,
580 		              c, incc, ldc );
581 
582 		// And finally, accumulate the rank-2k product in C_conj into C
583 		// with a conjugation.
584 		bl1_zaxpymrt( uplo,
585 		              BLIS1_CONJ_NO_TRANSPOSE,
586 		              m,
587 		              m,
588 		              &one,
589 		              c_conj, incc_conj, ldc_conj,
590 		              c,      incc,      ldc );
591 
592 		// Free the temporary matrix for C.
593 		bl1_zfree( c_conj );
594 	}
595 	else
596 	{
597 		bl1_zher2k_blas( uplo,
598 		                 trans,
599 		                 m,
600 		                 k,
601 		                 &alpha_copy,
602 		                 a_copy, lda_copy,
603 		                 b_copy, ldb_copy,
604 		                 beta,
605 		                 c, ldc );
606 	}
607 
608 	if ( her2k_needs_copya )
609 		bl1_zfree( a_copy );
610 
611 	if ( her2k_needs_copyb )
612 		bl1_zfree( b_copy );
613 
614 	// Free any temporary contiguous matrices, copying the result back to
615 	// the original matrix.
616 	bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
617 	                   &a,     &a_rs,     &a_cs );
618 
619 	bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
620 	                   &b,     &b_rs,     &b_cs );
621 
622 	bl1_zfree_saved_contigmr( uplo_save,
623 	                          m_save,
624 	                          m_save,
625 	                          c_save, c_rs_save, c_cs_save,
626 	                          &c,     &c_rs,     &c_cs );
627 }
628 
629 // --- Classic routine wrappers ---
630 
bl1_cher2k_blas(uplo1_t uplo,trans1_t trans,int m,int k,scomplex * alpha,scomplex * a,int lda,scomplex * b,int ldb,float * beta,scomplex * c,int ldc)631 void bl1_cher2k_blas( uplo1_t uplo, trans1_t trans, int m, int k, scomplex* alpha, scomplex* a, int lda, scomplex* b, int ldb, float*  beta, scomplex* c, int ldc )
632 {
633 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
634 	enum CBLAS_ORDER     cblas_order = CblasColMajor;
635 	enum CBLAS_UPLO      cblas_uplo;
636 	enum CBLAS_TRANSPOSE cblas_trans;
637 
638 	bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
639 	bl1_param_map_to_netlib_trans( trans, &cblas_trans );
640 
641 	cblas_cher2k( cblas_order,
642 	              cblas_uplo,
643 	              cblas_trans,
644 	              m,
645 	              k,
646 	              alpha,
647 	              a, lda,
648 	              b, ldb,
649 	              *beta,
650 	              c, ldc );
651 #else
652 	char blas_uplo;
653 	char blas_trans;
654 
655 	bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
656 	bl1_param_map_to_netlib_trans( trans, &blas_trans );
657 
658 	F77_cher2k( &blas_uplo,
659 	            &blas_trans,
660 	            &m,
661 	            &k,
662 	            alpha,
663 	            a, &lda,
664 	            b, &ldb,
665 	            beta,
666 	            c, &ldc );
667 #endif
668 }
669 
bl1_zher2k_blas(uplo1_t uplo,trans1_t trans,int m,int k,dcomplex * alpha,dcomplex * a,int lda,dcomplex * b,int ldb,double * beta,dcomplex * c,int ldc)670 void bl1_zher2k_blas( uplo1_t uplo, trans1_t trans, int m, int k, dcomplex* alpha, dcomplex* a, int lda, dcomplex* b, int ldb, double* beta, dcomplex* c, int ldc )
671 {
672 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
673 	enum CBLAS_ORDER     cblas_order = CblasColMajor;
674 	enum CBLAS_UPLO      cblas_uplo;
675 	enum CBLAS_TRANSPOSE cblas_trans;
676 
677 	bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
678 	bl1_param_map_to_netlib_trans( trans, &cblas_trans );
679 
680 	cblas_zher2k( cblas_order,
681 	              cblas_uplo,
682 	              cblas_trans,
683 	              m,
684 	              k,
685 	              alpha,
686 	              a, lda,
687 	              b, ldb,
688 	              *beta,
689 	              c, ldc );
690 #else
691 	char blas_uplo;
692 	char blas_trans;
693 
694 	bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
695 	bl1_param_map_to_netlib_trans( trans, &blas_trans );
696 
697 	F77_zher2k( &blas_uplo,
698 	            &blas_trans,
699 	            &m,
700 	            &k,
701 	            alpha,
702 	            a, &lda,
703 	            b, &ldb,
704 	            beta,
705 	            c, &ldc );
706 #endif
707 }
708 
709