1 /*
2  *   Copyright (c) 2009, Michael Lehn
3  *
4  *   All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *   1) Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *   2) Redistributions in binary form must reproduce the above copyright
13  *      notice, this list of conditions and the following disclaimer in
14  *      the documentation and/or other materials provided with the
15  *      distribution.
16  *   3) Neither the name of the FLENS development group nor the names of
17  *      its contributors may be used to endorse or promote products derived
18  *      from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #ifndef CXXBLAS_LEVEL3_HEMM_TCC
34 #define CXXBLAS_LEVEL3_HEMM_TCC 1
35 
36 #include "xflens/cxxblas/cxxblas.h"
37 
38 namespace cxxblas {
39 
40 template <typename IndexType, typename ALPHA, typename MA, typename MB,
41           typename BETA, typename MC>
42 void
hemm_generic(StorageOrder order,Side sideA,StorageUpLo upLoA,IndexType m,IndexType n,const ALPHA & alpha,const MA * A,IndexType ldA,const MB * B,IndexType ldB,const BETA & beta,MC * C,IndexType ldC)43 hemm_generic(StorageOrder order,
44              Side sideA, StorageUpLo upLoA,
45              IndexType m, IndexType n,
46              const ALPHA &alpha,
47              const MA *A, IndexType ldA,
48              const MB *B, IndexType ldB,
49              const BETA &beta,
50              MC *C, IndexType ldC)
51 {
52     if (order==ColMajor) {
53         upLoA = (upLoA==Upper) ? Lower : Upper;
54         sideA = (sideA==Left) ? Right : Left;
55         hemm_generic(RowMajor, sideA, upLoA, n, m,
56                      alpha, A, ldA, B, ldB,
57                      beta,
58                      C, ldC);
59         return;
60     }
61     gescal_init(order, m, n, beta, C, ldC);
62     if (sideA==Right) {
63         for (IndexType i=0; i<m; ++i) {
64             hemv(order, upLoA, Conj, n, alpha, A, ldA, B+i*ldB, IndexType(1),
65                  BETA(1), C+i*ldC, IndexType(1));
66         }
67     }
68     if (sideA==Left) {
69         for (IndexType j=0; j<n; ++j) {
70             hemv(order, upLoA, NoTrans, m, alpha, A, ldA, B+j, ldB,
71                  BETA(1), C+j, ldC);
72         }
73     }
74 }
75 
76 template <typename IndexType, typename ALPHA, typename MA, typename MB,
77           typename BETA, typename MC>
78 void
hemm(StorageOrder order,Side side,StorageUpLo upLo,IndexType m,IndexType n,const ALPHA & alpha,const MA * A,IndexType ldA,const MB * B,IndexType ldB,const BETA & beta,MC * C,IndexType ldC)79 hemm(StorageOrder order,
80      Side side, StorageUpLo upLo,
81      IndexType m, IndexType n,
82      const ALPHA &alpha,
83      const MA *A, IndexType ldA,
84      const MB *B, IndexType ldB,
85      const BETA &beta,
86      MC *C, IndexType ldC)
87 {
88     CXXBLAS_DEBUG_OUT("hemm_generic");
89 
90     hemm_generic(order, side, upLo, m, n, alpha, A, ldA, B, ldB, beta, C, ldC);
91 }
92 
93 #ifdef HAVE_CBLAS
94 
95 template <typename IndexType>
96 typename If<IndexType>::isBlasCompatibleInteger
hemm(StorageOrder order,Side side,StorageUpLo upLo,IndexType m,IndexType n,const ComplexFloat & alpha,const ComplexFloat * A,IndexType ldA,const ComplexFloat * B,IndexType ldB,const ComplexFloat & beta,ComplexFloat * C,IndexType ldC)97 hemm(StorageOrder order,
98      Side side, StorageUpLo upLo,
99      IndexType m, IndexType n,
100      const ComplexFloat &alpha,
101      const ComplexFloat *A, IndexType ldA,
102      const ComplexFloat *B, IndexType ldB,
103      const ComplexFloat &beta,
104      ComplexFloat *C, IndexType ldC)
105 {
106     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_chemm");
107 
108     cblas_chemm(CBLAS::getCblasType(order),
109                 CBLAS::getCblasType(side), CBLAS::getCblasType(upLo),
110                 m, n,
111                 reinterpret_cast<const float *>(&alpha),
112                 reinterpret_cast<const float *>(A), ldA,
113                 reinterpret_cast<const float *>(B), ldB,
114                 reinterpret_cast<const float *>(&beta),
115                 reinterpret_cast<float *>(C), ldC);
116 }
117 
118 template <typename IndexType>
119 typename If<IndexType>::isBlasCompatibleInteger
hemm(StorageOrder order,Side side,StorageUpLo upLo,IndexType m,IndexType n,const ComplexDouble & alpha,const ComplexDouble * A,IndexType ldA,const ComplexDouble * B,IndexType ldB,const ComplexDouble & beta,ComplexDouble * C,IndexType ldC)120 hemm(StorageOrder order,
121      Side side, StorageUpLo upLo,
122      IndexType m, IndexType n,
123      const ComplexDouble &alpha,
124      const ComplexDouble *A, IndexType ldA,
125      const ComplexDouble *B, IndexType ldB,
126      const ComplexDouble &beta,
127      ComplexDouble *C, IndexType ldC)
128 {
129     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zhemm");
130 
131     cblas_zhemm(CBLAS::getCblasType(order),
132                 CBLAS::getCblasType(side), CBLAS::getCblasType(upLo),
133                 m, n,
134                 reinterpret_cast<const double *>(&alpha),
135                 reinterpret_cast<const double *>(A), ldA,
136                 reinterpret_cast<const double *>(B), ldB,
137                 reinterpret_cast<const double *>(&beta),
138                 reinterpret_cast<double *>(C), ldC);
139 }
140 
141 #endif // HAVE_CBLAS
142 
143 } // namespace cxxblas
144 
145 #endif // CXXBLAS_LEVEL3_HEMM_TCC
146