1 /* blas/source_trsm_c.h 2 * 3 * Copyright (C) 2001, 2007 Brian Gough 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 3 of the License, or (at 8 * your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 */ 19 20 { 21 INDEX i, j, k; 22 INDEX n1, n2; 23 const int nonunit = (Diag == CblasNonUnit); 24 const int conj = (TransA == CblasConjTrans) ? -1 : 1; 25 int side, uplo, trans; 26 27 const BASE alpha_real = CONST_REAL0(alpha); 28 const BASE alpha_imag = CONST_IMAG0(alpha); 29 30 if (Order == CblasRowMajor) { 31 n1 = M; 32 n2 = N; 33 side = Side; 34 uplo = Uplo; 35 trans = TransA; 36 trans = (TransA == CblasNoTrans) ? CblasNoTrans : CblasTrans; 37 } else { 38 n1 = N; 39 n2 = M; 40 side = (Side == CblasLeft) ? CblasRight : CblasLeft; /* exchanged */ 41 uplo = (Uplo == CblasUpper) ? CblasLower : CblasUpper; /* exchanged */ 42 trans = (TransA == CblasNoTrans) ? CblasNoTrans : CblasTrans; /* same */ 43 } 44 45 if (side == CblasLeft && uplo == CblasUpper && trans == CblasNoTrans) { 46 47 /* form B := alpha * inv(TriU(A)) *B */ 48 49 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) { 50 for (i = 0; i < n1; i++) { 51 for (j = 0; j < n2; j++) { 52 const BASE Bij_real = REAL(B, ldb * i + j); 53 const BASE Bij_imag = IMAG(B, ldb * i + j); 54 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag; 55 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real; 56 } 57 } 58 } 59 60 for (i = n1; i > 0 && i--;) { 61 if (nonunit) { 62 const BASE Aii_real = CONST_REAL(A, lda * i + i); 63 const BASE Aii_imag = conj * CONST_IMAG(A, lda * i + i); 64 const BASE s = xhypot(Aii_real, Aii_imag); 65 const BASE a_real = Aii_real / s; 66 const BASE a_imag = Aii_imag / s; 67 68 for (j = 0; j < n2; j++) { 69 const BASE Bij_real = REAL(B, ldb * i + j); 70 const BASE Bij_imag = IMAG(B, ldb * i + j); 71 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s; 72 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s; 73 } 74 } 75 76 for (k = 0; k < i; k++) { 77 const BASE Aki_real = CONST_REAL(A, k * lda + i); 78 const BASE Aki_imag = conj * CONST_IMAG(A, k * lda + i); 79 for (j = 0; j < n2; j++) { 80 const BASE Bij_real = REAL(B, ldb * i + j); 81 const BASE Bij_imag = IMAG(B, ldb * i + j); 82 REAL(B, ldb * k + j) -= Aki_real * Bij_real - Aki_imag * Bij_imag; 83 IMAG(B, ldb * k + j) -= Aki_real * Bij_imag + Aki_imag * Bij_real; 84 } 85 } 86 } 87 88 } else if (side == CblasLeft && uplo == CblasUpper && trans == CblasTrans) { 89 90 /* form B := alpha * inv(TriU(A))' *B */ 91 92 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) { 93 for (i = 0; i < n1; i++) { 94 for (j = 0; j < n2; j++) { 95 const BASE Bij_real = REAL(B, ldb * i + j); 96 const BASE Bij_imag = IMAG(B, ldb * i + j); 97 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag; 98 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real; 99 } 100 } 101 } 102 103 for (i = 0; i < n1; i++) { 104 105 if (nonunit) { 106 const BASE Aii_real = CONST_REAL(A, lda * i + i); 107 const BASE Aii_imag = conj * CONST_IMAG(A, lda * i + i); 108 const BASE s = xhypot(Aii_real, Aii_imag); 109 const BASE a_real = Aii_real / s; 110 const BASE a_imag = Aii_imag / s; 111 112 for (j = 0; j < n2; j++) { 113 const BASE Bij_real = REAL(B, ldb * i + j); 114 const BASE Bij_imag = IMAG(B, ldb * i + j); 115 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s; 116 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s; 117 } 118 } 119 120 for (k = i + 1; k < n1; k++) { 121 const BASE Aik_real = CONST_REAL(A, i * lda + k); 122 const BASE Aik_imag = conj * CONST_IMAG(A, i * lda + k); 123 for (j = 0; j < n2; j++) { 124 const BASE Bij_real = REAL(B, ldb * i + j); 125 const BASE Bij_imag = IMAG(B, ldb * i + j); 126 REAL(B, ldb * k + j) -= Aik_real * Bij_real - Aik_imag * Bij_imag; 127 IMAG(B, ldb * k + j) -= Aik_real * Bij_imag + Aik_imag * Bij_real; 128 } 129 } 130 } 131 132 } else if (side == CblasLeft && uplo == CblasLower && trans == CblasNoTrans) { 133 134 /* form B := alpha * inv(TriL(A))*B */ 135 136 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) { 137 for (i = 0; i < n1; i++) { 138 for (j = 0; j < n2; j++) { 139 const BASE Bij_real = REAL(B, ldb * i + j); 140 const BASE Bij_imag = IMAG(B, ldb * i + j); 141 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag; 142 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real; 143 } 144 } 145 } 146 147 for (i = 0; i < n1; i++) { 148 149 if (nonunit) { 150 const BASE Aii_real = CONST_REAL(A, lda * i + i); 151 const BASE Aii_imag = conj * CONST_IMAG(A, lda * i + i); 152 const BASE s = xhypot(Aii_real, Aii_imag); 153 const BASE a_real = Aii_real / s; 154 const BASE a_imag = Aii_imag / s; 155 156 for (j = 0; j < n2; j++) { 157 const BASE Bij_real = REAL(B, ldb * i + j); 158 const BASE Bij_imag = IMAG(B, ldb * i + j); 159 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s; 160 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s; 161 } 162 } 163 164 for (k = i + 1; k < n1; k++) { 165 const BASE Aki_real = CONST_REAL(A, k * lda + i); 166 const BASE Aki_imag = conj * CONST_IMAG(A, k * lda + i); 167 for (j = 0; j < n2; j++) { 168 const BASE Bij_real = REAL(B, ldb * i + j); 169 const BASE Bij_imag = IMAG(B, ldb * i + j); 170 REAL(B, ldb * k + j) -= Aki_real * Bij_real - Aki_imag * Bij_imag; 171 IMAG(B, ldb * k + j) -= Aki_real * Bij_imag + Aki_imag * Bij_real; 172 } 173 } 174 } 175 176 177 } else if (side == CblasLeft && uplo == CblasLower && trans == CblasTrans) { 178 179 /* form B := alpha * TriL(A)' *B */ 180 181 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) { 182 for (i = 0; i < n1; i++) { 183 for (j = 0; j < n2; j++) { 184 const BASE Bij_real = REAL(B, ldb * i + j); 185 const BASE Bij_imag = IMAG(B, ldb * i + j); 186 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag; 187 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real; 188 } 189 } 190 } 191 192 for (i = n1; i > 0 && i--;) { 193 if (nonunit) { 194 const BASE Aii_real = CONST_REAL(A, lda * i + i); 195 const BASE Aii_imag = conj * CONST_IMAG(A, lda * i + i); 196 const BASE s = xhypot(Aii_real, Aii_imag); 197 const BASE a_real = Aii_real / s; 198 const BASE a_imag = Aii_imag / s; 199 200 for (j = 0; j < n2; j++) { 201 const BASE Bij_real = REAL(B, ldb * i + j); 202 const BASE Bij_imag = IMAG(B, ldb * i + j); 203 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s; 204 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s; 205 } 206 } 207 208 for (k = 0; k < i; k++) { 209 const BASE Aik_real = CONST_REAL(A, i * lda + k); 210 const BASE Aik_imag = conj * CONST_IMAG(A, i * lda + k); 211 for (j = 0; j < n2; j++) { 212 const BASE Bij_real = REAL(B, ldb * i + j); 213 const BASE Bij_imag = IMAG(B, ldb * i + j); 214 REAL(B, ldb * k + j) -= Aik_real * Bij_real - Aik_imag * Bij_imag; 215 IMAG(B, ldb * k + j) -= Aik_real * Bij_imag + Aik_imag * Bij_real; 216 } 217 } 218 } 219 220 } else if (side == CblasRight && uplo == CblasUpper && trans == CblasNoTrans) { 221 222 /* form B := alpha * B * inv(TriU(A)) */ 223 224 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) { 225 for (i = 0; i < n1; i++) { 226 for (j = 0; j < n2; j++) { 227 const BASE Bij_real = REAL(B, ldb * i + j); 228 const BASE Bij_imag = IMAG(B, ldb * i + j); 229 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag; 230 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real; 231 } 232 } 233 } 234 235 for (i = 0; i < n1; i++) { 236 for (j = 0; j < n2; j++) { 237 if (nonunit) { 238 const BASE Ajj_real = CONST_REAL(A, lda * j + j); 239 const BASE Ajj_imag = conj * CONST_IMAG(A, lda * j + j); 240 const BASE s = xhypot(Ajj_real, Ajj_imag); 241 const BASE a_real = Ajj_real / s; 242 const BASE a_imag = Ajj_imag / s; 243 const BASE Bij_real = REAL(B, ldb * i + j); 244 const BASE Bij_imag = IMAG(B, ldb * i + j); 245 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s; 246 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s; 247 } 248 249 { 250 const BASE Bij_real = REAL(B, ldb * i + j); 251 const BASE Bij_imag = IMAG(B, ldb * i + j); 252 for (k = j + 1; k < n2; k++) { 253 const BASE Ajk_real = CONST_REAL(A, j * lda + k); 254 const BASE Ajk_imag = conj * CONST_IMAG(A, j * lda + k); 255 REAL(B, ldb * i + k) -= Ajk_real * Bij_real - Ajk_imag * Bij_imag; 256 IMAG(B, ldb * i + k) -= Ajk_real * Bij_imag + Ajk_imag * Bij_real; 257 } 258 } 259 } 260 } 261 262 } else if (side == CblasRight && uplo == CblasUpper && trans == CblasTrans) { 263 264 /* form B := alpha * B * inv(TriU(A))' */ 265 266 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) { 267 for (i = 0; i < n1; i++) { 268 for (j = 0; j < n2; j++) { 269 const BASE Bij_real = REAL(B, ldb * i + j); 270 const BASE Bij_imag = IMAG(B, ldb * i + j); 271 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag; 272 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real; 273 } 274 } 275 } 276 277 for (i = 0; i < n1; i++) { 278 for (j = n2; j > 0 && j--;) { 279 280 if (nonunit) { 281 const BASE Ajj_real = CONST_REAL(A, lda * j + j); 282 const BASE Ajj_imag = conj * CONST_IMAG(A, lda * j + j); 283 const BASE s = xhypot(Ajj_real, Ajj_imag); 284 const BASE a_real = Ajj_real / s; 285 const BASE a_imag = Ajj_imag / s; 286 const BASE Bij_real = REAL(B, ldb * i + j); 287 const BASE Bij_imag = IMAG(B, ldb * i + j); 288 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s; 289 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s; 290 } 291 292 { 293 const BASE Bij_real = REAL(B, ldb * i + j); 294 const BASE Bij_imag = IMAG(B, ldb * i + j); 295 for (k = 0; k < j; k++) { 296 const BASE Akj_real = CONST_REAL(A, k * lda + j); 297 const BASE Akj_imag = conj * CONST_IMAG(A, k * lda + j); 298 REAL(B, ldb * i + k) -= Akj_real * Bij_real - Akj_imag * Bij_imag; 299 IMAG(B, ldb * i + k) -= Akj_real * Bij_imag + Akj_imag * Bij_real; 300 } 301 } 302 } 303 } 304 305 306 } else if (side == CblasRight && uplo == CblasLower && trans == CblasNoTrans) { 307 308 /* form B := alpha * B * inv(TriL(A)) */ 309 310 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) { 311 for (i = 0; i < n1; i++) { 312 for (j = 0; j < n2; j++) { 313 const BASE Bij_real = REAL(B, ldb * i + j); 314 const BASE Bij_imag = IMAG(B, ldb * i + j); 315 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag; 316 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real; 317 } 318 } 319 } 320 321 for (i = 0; i < n1; i++) { 322 for (j = n2; j > 0 && j--;) { 323 324 if (nonunit) { 325 const BASE Ajj_real = CONST_REAL(A, lda * j + j); 326 const BASE Ajj_imag = conj * CONST_IMAG(A, lda * j + j); 327 const BASE s = xhypot(Ajj_real, Ajj_imag); 328 const BASE a_real = Ajj_real / s; 329 const BASE a_imag = Ajj_imag / s; 330 const BASE Bij_real = REAL(B, ldb * i + j); 331 const BASE Bij_imag = IMAG(B, ldb * i + j); 332 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s; 333 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s; 334 } 335 336 { 337 const BASE Bij_real = REAL(B, ldb * i + j); 338 const BASE Bij_imag = IMAG(B, ldb * i + j); 339 for (k = 0; k < j; k++) { 340 const BASE Ajk_real = CONST_REAL(A, j * lda + k); 341 const BASE Ajk_imag = conj * CONST_IMAG(A, j * lda + k); 342 REAL(B, ldb * i + k) -= Ajk_real * Bij_real - Ajk_imag * Bij_imag; 343 IMAG(B, ldb * i + k) -= Ajk_real * Bij_imag + Ajk_imag * Bij_real; 344 } 345 } 346 } 347 } 348 349 } else if (side == CblasRight && uplo == CblasLower && trans == CblasTrans) { 350 351 /* form B := alpha * B * inv(TriL(A))' */ 352 353 354 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) { 355 for (i = 0; i < n1; i++) { 356 for (j = 0; j < n2; j++) { 357 const BASE Bij_real = REAL(B, ldb * i + j); 358 const BASE Bij_imag = IMAG(B, ldb * i + j); 359 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag; 360 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real; 361 } 362 } 363 } 364 365 for (i = 0; i < n1; i++) { 366 for (j = 0; j < n2; j++) { 367 if (nonunit) { 368 const BASE Ajj_real = CONST_REAL(A, lda * j + j); 369 const BASE Ajj_imag = conj * CONST_IMAG(A, lda * j + j); 370 const BASE s = xhypot(Ajj_real, Ajj_imag); 371 const BASE a_real = Ajj_real / s; 372 const BASE a_imag = Ajj_imag / s; 373 const BASE Bij_real = REAL(B, ldb * i + j); 374 const BASE Bij_imag = IMAG(B, ldb * i + j); 375 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s; 376 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s; 377 } 378 379 { 380 const BASE Bij_real = REAL(B, ldb * i + j); 381 const BASE Bij_imag = IMAG(B, ldb * i + j); 382 383 for (k = j + 1; k < n2; k++) { 384 const BASE Akj_real = CONST_REAL(A, k * lda + j); 385 const BASE Akj_imag = conj * CONST_IMAG(A, k * lda + j); 386 REAL(B, ldb * i + k) -= Akj_real * Bij_real - Akj_imag * Bij_imag; 387 IMAG(B, ldb * i + k) -= Akj_real * Bij_imag + Akj_imag * Bij_real; 388 } 389 } 390 } 391 } 392 393 394 } else { 395 BLAS_ERROR("unrecognized operation"); 396 } 397 } 398