1 /* blas/source_gemm_c.h 2 * 3 * Copyright (C) 2001, 2007 Brian Gough 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 3 of the License, or (at 8 * your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 */ 19 20 { 21 INDEX i, j, k; 22 INDEX n1, n2; 23 INDEX ldf, ldg; 24 int conjF, conjG, TransF, TransG; 25 const BASE *F, *G; 26 27 const BASE alpha_real = CONST_REAL0(alpha); 28 const BASE alpha_imag = CONST_IMAG0(alpha); 29 30 const BASE beta_real = CONST_REAL0(beta); 31 const BASE beta_imag = CONST_IMAG0(beta); 32 33 if ((alpha_real == 0.0 && alpha_imag == 0.0) 34 && (beta_real == 1.0 && beta_imag == 0.0)) 35 return; 36 37 if (Order == CblasRowMajor) { 38 n1 = M; 39 n2 = N; 40 F = (const BASE *)A; 41 ldf = lda; 42 conjF = (TransA == CblasConjTrans) ? -1 : 1; 43 TransF = (TransA == CblasNoTrans) ? CblasNoTrans : CblasTrans; 44 G = (const BASE *)B; 45 ldg = ldb; 46 conjG = (TransB == CblasConjTrans) ? -1 : 1; 47 TransG = (TransB == CblasNoTrans) ? CblasNoTrans : CblasTrans; 48 } else { 49 n1 = N; 50 n2 = M; 51 F = (const BASE *)B; 52 ldf = ldb; 53 conjF = (TransB == CblasConjTrans) ? -1 : 1; 54 TransF = (TransB == CblasNoTrans) ? CblasNoTrans : CblasTrans; 55 G = (const BASE *)A; 56 ldg = lda; 57 conjG = (TransA == CblasConjTrans) ? -1 : 1; 58 TransG = (TransA == CblasNoTrans) ? CblasNoTrans : CblasTrans; 59 } 60 61 /* form y := beta*y */ 62 if (beta_real == 0.0 && beta_imag == 0.0) { 63 for (i = 0; i < n1; i++) { 64 for (j = 0; j < n2; j++) { 65 REAL(C, ldc * i + j) = 0.0; 66 IMAG(C, ldc * i + j) = 0.0; 67 } 68 } 69 } else if (!(beta_real == 1.0 && beta_imag == 0.0)) { 70 for (i = 0; i < n1; i++) { 71 for (j = 0; j < n2; j++) { 72 const BASE Cij_real = REAL(C, ldc * i + j); 73 const BASE Cij_imag = IMAG(C, ldc * i + j); 74 REAL(C, ldc * i + j) = beta_real * Cij_real - beta_imag * Cij_imag; 75 IMAG(C, ldc * i + j) = beta_real * Cij_imag + beta_imag * Cij_real; 76 } 77 } 78 } 79 80 if (alpha_real == 0.0 && alpha_imag == 0.0) 81 return; 82 83 if (TransF == CblasNoTrans && TransG == CblasNoTrans) { 84 85 /* form C := alpha*A*B + C */ 86 87 for (k = 0; k < K; k++) { 88 for (i = 0; i < n1; i++) { 89 const BASE Fik_real = CONST_REAL(F, ldf * i + k); 90 const BASE Fik_imag = conjF * CONST_IMAG(F, ldf * i + k); 91 const BASE temp_real = alpha_real * Fik_real - alpha_imag * Fik_imag; 92 const BASE temp_imag = alpha_real * Fik_imag + alpha_imag * Fik_real; 93 if (!(temp_real == 0.0 && temp_imag == 0.0)) { 94 for (j = 0; j < n2; j++) { 95 const BASE Gkj_real = CONST_REAL(G, ldg * k + j); 96 const BASE Gkj_imag = conjG * CONST_IMAG(G, ldg * k + j); 97 REAL(C, ldc * i + j) += temp_real * Gkj_real - temp_imag * Gkj_imag; 98 IMAG(C, ldc * i + j) += temp_real * Gkj_imag + temp_imag * Gkj_real; 99 } 100 } 101 } 102 } 103 104 } else if (TransF == CblasNoTrans && TransG == CblasTrans) { 105 106 /* form C := alpha*A*B' + C */ 107 108 for (i = 0; i < n1; i++) { 109 for (j = 0; j < n2; j++) { 110 BASE temp_real = 0.0; 111 BASE temp_imag = 0.0; 112 for (k = 0; k < K; k++) { 113 const BASE Fik_real = CONST_REAL(F, ldf * i + k); 114 const BASE Fik_imag = conjF * CONST_IMAG(F, ldf * i + k); 115 const BASE Gjk_real = CONST_REAL(G, ldg * j + k); 116 const BASE Gjk_imag = conjG * CONST_IMAG(G, ldg * j + k); 117 temp_real += Fik_real * Gjk_real - Fik_imag * Gjk_imag; 118 temp_imag += Fik_real * Gjk_imag + Fik_imag * Gjk_real; 119 } 120 REAL(C, ldc * i + j) += alpha_real * temp_real - alpha_imag * temp_imag; 121 IMAG(C, ldc * i + j) += alpha_real * temp_imag + alpha_imag * temp_real; 122 } 123 } 124 125 } else if (TransF == CblasTrans && TransG == CblasNoTrans) { 126 127 for (k = 0; k < K; k++) { 128 for (i = 0; i < n1; i++) { 129 const BASE Fki_real = CONST_REAL(F, ldf * k + i); 130 const BASE Fki_imag = conjF * CONST_IMAG(F, ldf * k + i); 131 const BASE temp_real = alpha_real * Fki_real - alpha_imag * Fki_imag; 132 const BASE temp_imag = alpha_real * Fki_imag + alpha_imag * Fki_real; 133 if (!(temp_real == 0.0 && temp_imag == 0.0)) { 134 for (j = 0; j < n2; j++) { 135 const BASE Gkj_real = CONST_REAL(G, ldg * k + j); 136 const BASE Gkj_imag = conjG * CONST_IMAG(G, ldg * k + j); 137 REAL(C, ldc * i + j) += temp_real * Gkj_real - temp_imag * Gkj_imag; 138 IMAG(C, ldc * i + j) += temp_real * Gkj_imag + temp_imag * Gkj_real; 139 } 140 } 141 } 142 } 143 144 } else if (TransF == CblasTrans && TransG == CblasTrans) { 145 146 for (i = 0; i < n1; i++) { 147 for (j = 0; j < n2; j++) { 148 BASE temp_real = 0.0; 149 BASE temp_imag = 0.0; 150 for (k = 0; k < K; k++) { 151 const BASE Fki_real = CONST_REAL(F, ldf * k + i); 152 const BASE Fki_imag = conjF * CONST_IMAG(F, ldf * k + i); 153 const BASE Gjk_real = CONST_REAL(G, ldg * j + k); 154 const BASE Gjk_imag = conjG * CONST_IMAG(G, ldg * j + k); 155 156 temp_real += Fki_real * Gjk_real - Fki_imag * Gjk_imag; 157 temp_imag += Fki_real * Gjk_imag + Fki_imag * Gjk_real; 158 } 159 REAL(C, ldc * i + j) += alpha_real * temp_real - alpha_imag * temp_imag; 160 IMAG(C, ldc * i + j) += alpha_real * temp_imag + alpha_imag * temp_real; 161 } 162 } 163 164 } else { 165 BLAS_ERROR("unrecognized operation"); 166 } 167 } 168