1 /*
2 * Copyright (c) 2009, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL3_HEMM_TCC
34 #define CXXBLAS_LEVEL3_HEMM_TCC 1
35
36 #include "xflens/cxxblas/cxxblas.h"
37
38 namespace cxxblas {
39
40 template <typename IndexType, typename ALPHA, typename MA, typename MB,
41 typename BETA, typename MC>
42 void
hemm_generic(StorageOrder order,Side sideA,StorageUpLo upLoA,IndexType m,IndexType n,const ALPHA & alpha,const MA * A,IndexType ldA,const MB * B,IndexType ldB,const BETA & beta,MC * C,IndexType ldC)43 hemm_generic(StorageOrder order,
44 Side sideA, StorageUpLo upLoA,
45 IndexType m, IndexType n,
46 const ALPHA &alpha,
47 const MA *A, IndexType ldA,
48 const MB *B, IndexType ldB,
49 const BETA &beta,
50 MC *C, IndexType ldC)
51 {
52 if (order==ColMajor) {
53 upLoA = (upLoA==Upper) ? Lower : Upper;
54 sideA = (sideA==Left) ? Right : Left;
55 hemm_generic(RowMajor, sideA, upLoA, n, m,
56 alpha, A, ldA, B, ldB,
57 beta,
58 C, ldC);
59 return;
60 }
61 gescal_init(order, m, n, beta, C, ldC);
62 if (sideA==Right) {
63 for (IndexType i=0; i<m; ++i) {
64 hemv(order, upLoA, Conj, n, alpha, A, ldA, B+i*ldB, IndexType(1),
65 BETA(1), C+i*ldC, IndexType(1));
66 }
67 }
68 if (sideA==Left) {
69 for (IndexType j=0; j<n; ++j) {
70 hemv(order, upLoA, NoTrans, m, alpha, A, ldA, B+j, ldB,
71 BETA(1), C+j, ldC);
72 }
73 }
74 }
75
76 template <typename IndexType, typename ALPHA, typename MA, typename MB,
77 typename BETA, typename MC>
78 void
hemm(StorageOrder order,Side side,StorageUpLo upLo,IndexType m,IndexType n,const ALPHA & alpha,const MA * A,IndexType ldA,const MB * B,IndexType ldB,const BETA & beta,MC * C,IndexType ldC)79 hemm(StorageOrder order,
80 Side side, StorageUpLo upLo,
81 IndexType m, IndexType n,
82 const ALPHA &alpha,
83 const MA *A, IndexType ldA,
84 const MB *B, IndexType ldB,
85 const BETA &beta,
86 MC *C, IndexType ldC)
87 {
88 CXXBLAS_DEBUG_OUT("hemm_generic");
89
90 hemm_generic(order, side, upLo, m, n, alpha, A, ldA, B, ldB, beta, C, ldC);
91 }
92
93 #ifdef HAVE_CBLAS
94
95 template <typename IndexType>
96 typename If<IndexType>::isBlasCompatibleInteger
hemm(StorageOrder order,Side side,StorageUpLo upLo,IndexType m,IndexType n,const ComplexFloat & alpha,const ComplexFloat * A,IndexType ldA,const ComplexFloat * B,IndexType ldB,const ComplexFloat & beta,ComplexFloat * C,IndexType ldC)97 hemm(StorageOrder order,
98 Side side, StorageUpLo upLo,
99 IndexType m, IndexType n,
100 const ComplexFloat &alpha,
101 const ComplexFloat *A, IndexType ldA,
102 const ComplexFloat *B, IndexType ldB,
103 const ComplexFloat &beta,
104 ComplexFloat *C, IndexType ldC)
105 {
106 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_chemm");
107
108 cblas_chemm(CBLAS::getCblasType(order),
109 CBLAS::getCblasType(side), CBLAS::getCblasType(upLo),
110 m, n,
111 reinterpret_cast<const float *>(&alpha),
112 reinterpret_cast<const float *>(A), ldA,
113 reinterpret_cast<const float *>(B), ldB,
114 reinterpret_cast<const float *>(&beta),
115 reinterpret_cast<float *>(C), ldC);
116 }
117
118 template <typename IndexType>
119 typename If<IndexType>::isBlasCompatibleInteger
hemm(StorageOrder order,Side side,StorageUpLo upLo,IndexType m,IndexType n,const ComplexDouble & alpha,const ComplexDouble * A,IndexType ldA,const ComplexDouble * B,IndexType ldB,const ComplexDouble & beta,ComplexDouble * C,IndexType ldC)120 hemm(StorageOrder order,
121 Side side, StorageUpLo upLo,
122 IndexType m, IndexType n,
123 const ComplexDouble &alpha,
124 const ComplexDouble *A, IndexType ldA,
125 const ComplexDouble *B, IndexType ldB,
126 const ComplexDouble &beta,
127 ComplexDouble *C, IndexType ldC)
128 {
129 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zhemm");
130
131 cblas_zhemm(CBLAS::getCblasType(order),
132 CBLAS::getCblasType(side), CBLAS::getCblasType(upLo),
133 m, n,
134 reinterpret_cast<const double *>(&alpha),
135 reinterpret_cast<const double *>(A), ldA,
136 reinterpret_cast<const double *>(B), ldB,
137 reinterpret_cast<const double *>(&beta),
138 reinterpret_cast<double *>(C), ldC);
139 }
140
141 #endif // HAVE_CBLAS
142
143 } // namespace cxxblas
144
145 #endif // CXXBLAS_LEVEL3_HEMM_TCC
146