1 /************************************************************************* 2 Copyright (c) 2005-2007, Sergey Bochkanov (ALGLIB project). 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions are 6 met: 7 8 - Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 11 - Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer listed 13 in this license in the documentation and/or other materials 14 provided with the distribution. 15 16 - Neither the name of the copyright holders nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 *************************************************************************/ 32 33 #ifndef _blas_h 34 #define _blas_h 35 36 #include "ap.h" 37 #include "amp.h" 38 namespace blas 39 { 40 template<unsigned int Precision> 41 amp::ampf<Precision> vectornorm2(const ap::template_1d_array< amp::ampf<Precision> >& x, 42 int i1, 43 int i2); 44 template<unsigned int Precision> 45 int vectoridxabsmax(const ap::template_1d_array< amp::ampf<Precision> >& x, 46 int i1, 47 int i2); 48 template<unsigned int Precision> 49 int columnidxabsmax(const ap::template_2d_array< amp::ampf<Precision> >& x, 50 int i1, 51 int i2, 52 int j); 53 template<unsigned int Precision> 54 int rowidxabsmax(const ap::template_2d_array< amp::ampf<Precision> >& x, 55 int j1, 56 int j2, 57 int i); 58 template<unsigned int Precision> 59 amp::ampf<Precision> upperhessenberg1norm(const ap::template_2d_array< amp::ampf<Precision> >& a, 60 int i1, 61 int i2, 62 int j1, 63 int j2, 64 ap::template_1d_array< amp::ampf<Precision> >& work); 65 template<unsigned int Precision> 66 void copymatrix(const ap::template_2d_array< amp::ampf<Precision> >& a, 67 int is1, 68 int is2, 69 int js1, 70 int js2, 71 ap::template_2d_array< amp::ampf<Precision> >& b, 72 int id1, 73 int id2, 74 int jd1, 75 int jd2); 76 template<unsigned int Precision> 77 void inplacetranspose(ap::template_2d_array< amp::ampf<Precision> >& a, 78 int i1, 79 int i2, 80 int j1, 81 int j2, 82 ap::template_1d_array< amp::ampf<Precision> >& work); 83 template<unsigned int Precision> 84 void copyandtranspose(const ap::template_2d_array< amp::ampf<Precision> >& a, 85 int is1, 86 int is2, 87 int js1, 88 int js2, 89 ap::template_2d_array< amp::ampf<Precision> >& b, 90 int id1, 91 int id2, 92 int jd1, 93 int jd2); 94 template<unsigned int Precision> 95 void matrixvectormultiply(const ap::template_2d_array< amp::ampf<Precision> >& a, 96 int i1, 97 int i2, 98 int j1, 99 int j2, 100 bool trans, 101 const ap::template_1d_array< amp::ampf<Precision> >& x, 102 int ix1, 103 int ix2, 104 amp::ampf<Precision> alpha, 105 ap::template_1d_array< amp::ampf<Precision> >& y, 106 int iy1, 107 int iy2, 108 amp::ampf<Precision> beta); 109 template<unsigned int Precision> 110 amp::ampf<Precision> pythag2(amp::ampf<Precision> x, 111 amp::ampf<Precision> y); 112 template<unsigned int Precision> 113 void matrixmatrixmultiply(const ap::template_2d_array< amp::ampf<Precision> >& a, 114 int ai1, 115 int ai2, 116 int aj1, 117 int aj2, 118 bool transa, 119 const ap::template_2d_array< amp::ampf<Precision> >& b, 120 int bi1, 121 int bi2, 122 int bj1, 123 int bj2, 124 bool transb, 125 amp::ampf<Precision> alpha, 126 ap::template_2d_array< amp::ampf<Precision> >& c, 127 int ci1, 128 int ci2, 129 int cj1, 130 int cj2, 131 amp::ampf<Precision> beta, 132 ap::template_1d_array< amp::ampf<Precision> >& work); 133 134 135 template<unsigned int Precision> vectornorm2(const ap::template_1d_array<amp::ampf<Precision>> & x,int i1,int i2)136 amp::ampf<Precision> vectornorm2(const ap::template_1d_array< amp::ampf<Precision> >& x, 137 int i1, 138 int i2) 139 { 140 amp::ampf<Precision> result; 141 int n; 142 int ix; 143 amp::ampf<Precision> absxi; 144 amp::ampf<Precision> scl; 145 amp::ampf<Precision> ssq; 146 147 148 n = i2-i1+1; 149 if( n<1 ) 150 { 151 result = 0; 152 return result; 153 } 154 if( n==1 ) 155 { 156 result = amp::abs<Precision>(x(i1)); 157 return result; 158 } 159 scl = 0; 160 ssq = 1; 161 for(ix=i1; ix<=i2; ix++) 162 { 163 if( x(ix)!=0 ) 164 { 165 absxi = amp::abs<Precision>(x(ix)); 166 if( scl<absxi ) 167 { 168 ssq = 1+ssq*amp::sqr<Precision>(scl/absxi); 169 scl = absxi; 170 } 171 else 172 { 173 ssq = ssq+amp::sqr<Precision>(absxi/scl); 174 } 175 } 176 } 177 result = scl*amp::sqrt<Precision>(ssq); 178 return result; 179 } 180 181 182 template<unsigned int Precision> vectoridxabsmax(const ap::template_1d_array<amp::ampf<Precision>> & x,int i1,int i2)183 int vectoridxabsmax(const ap::template_1d_array< amp::ampf<Precision> >& x, 184 int i1, 185 int i2) 186 { 187 int result; 188 int i; 189 amp::ampf<Precision> a; 190 191 192 result = i1; 193 a = amp::abs<Precision>(x(result)); 194 for(i=i1+1; i<=i2; i++) 195 { 196 if( amp::abs<Precision>(x(i))>amp::abs<Precision>(x(result)) ) 197 { 198 result = i; 199 } 200 } 201 return result; 202 } 203 204 205 template<unsigned int Precision> columnidxabsmax(const ap::template_2d_array<amp::ampf<Precision>> & x,int i1,int i2,int j)206 int columnidxabsmax(const ap::template_2d_array< amp::ampf<Precision> >& x, 207 int i1, 208 int i2, 209 int j) 210 { 211 int result; 212 int i; 213 amp::ampf<Precision> a; 214 215 216 result = i1; 217 a = amp::abs<Precision>(x(result,j)); 218 for(i=i1+1; i<=i2; i++) 219 { 220 if( amp::abs<Precision>(x(i,j))>amp::abs<Precision>(x(result,j)) ) 221 { 222 result = i; 223 } 224 } 225 return result; 226 } 227 228 229 template<unsigned int Precision> rowidxabsmax(const ap::template_2d_array<amp::ampf<Precision>> & x,int j1,int j2,int i)230 int rowidxabsmax(const ap::template_2d_array< amp::ampf<Precision> >& x, 231 int j1, 232 int j2, 233 int i) 234 { 235 int result; 236 int j; 237 amp::ampf<Precision> a; 238 239 240 result = j1; 241 a = amp::abs<Precision>(x(i,result)); 242 for(j=j1+1; j<=j2; j++) 243 { 244 if( amp::abs<Precision>(x(i,j))>amp::abs<Precision>(x(i,result)) ) 245 { 246 result = j; 247 } 248 } 249 return result; 250 } 251 252 253 template<unsigned int Precision> upperhessenberg1norm(const ap::template_2d_array<amp::ampf<Precision>> & a,int i1,int i2,int j1,int j2,ap::template_1d_array<amp::ampf<Precision>> & work)254 amp::ampf<Precision> upperhessenberg1norm(const ap::template_2d_array< amp::ampf<Precision> >& a, 255 int i1, 256 int i2, 257 int j1, 258 int j2, 259 ap::template_1d_array< amp::ampf<Precision> >& work) 260 { 261 amp::ampf<Precision> result; 262 int i; 263 int j; 264 265 266 ap::ap_error::make_assertion(i2-i1==j2-j1); 267 for(j=j1; j<=j2; j++) 268 { 269 work(j) = 0; 270 } 271 for(i=i1; i<=i2; i++) 272 { 273 for(j=ap::maxint(j1, j1+i-i1-1); j<=j2; j++) 274 { 275 work(j) = work(j)+amp::abs<Precision>(a(i,j)); 276 } 277 } 278 result = 0; 279 for(j=j1; j<=j2; j++) 280 { 281 result = amp::maximum<Precision>(result, work(j)); 282 } 283 return result; 284 } 285 286 287 template<unsigned int Precision> copymatrix(const ap::template_2d_array<amp::ampf<Precision>> & a,int is1,int is2,int js1,int js2,ap::template_2d_array<amp::ampf<Precision>> & b,int id1,int id2,int jd1,int jd2)288 void copymatrix(const ap::template_2d_array< amp::ampf<Precision> >& a, 289 int is1, 290 int is2, 291 int js1, 292 int js2, 293 ap::template_2d_array< amp::ampf<Precision> >& b, 294 int id1, 295 int id2, 296 int jd1, 297 int jd2) 298 { 299 int isrc; 300 int idst; 301 302 303 if( is1>is2 || js1>js2 ) 304 { 305 return; 306 } 307 ap::ap_error::make_assertion(is2-is1==id2-id1); 308 ap::ap_error::make_assertion(js2-js1==jd2-jd1); 309 for(isrc=is1; isrc<=is2; isrc++) 310 { 311 idst = isrc-is1+id1; 312 ap::vmove(b.getrow(idst, jd1, jd2), a.getrow(isrc, js1, js2)); 313 } 314 } 315 316 317 template<unsigned int Precision> inplacetranspose(ap::template_2d_array<amp::ampf<Precision>> & a,int i1,int i2,int j1,int j2,ap::template_1d_array<amp::ampf<Precision>> & work)318 void inplacetranspose(ap::template_2d_array< amp::ampf<Precision> >& a, 319 int i1, 320 int i2, 321 int j1, 322 int j2, 323 ap::template_1d_array< amp::ampf<Precision> >& work) 324 { 325 int i; 326 int j; 327 int ips; 328 int jps; 329 int l; 330 331 332 if( i1>i2 || j1>j2 ) 333 { 334 return; 335 } 336 ap::ap_error::make_assertion(i1-i2==j1-j2); 337 for(i=i1; i<=i2-1; i++) 338 { 339 j = j1+i-i1; 340 ips = i+1; 341 jps = j1+ips-i1; 342 l = i2-i; 343 ap::vmove(work.getvector(1, l), a.getcolumn(j, ips, i2)); 344 ap::vmove(a.getcolumn(j, ips, i2), a.getrow(i, jps, j2)); 345 ap::vmove(a.getrow(i, jps, j2), work.getvector(1, l)); 346 } 347 } 348 349 350 template<unsigned int Precision> copyandtranspose(const ap::template_2d_array<amp::ampf<Precision>> & a,int is1,int is2,int js1,int js2,ap::template_2d_array<amp::ampf<Precision>> & b,int id1,int id2,int jd1,int jd2)351 void copyandtranspose(const ap::template_2d_array< amp::ampf<Precision> >& a, 352 int is1, 353 int is2, 354 int js1, 355 int js2, 356 ap::template_2d_array< amp::ampf<Precision> >& b, 357 int id1, 358 int id2, 359 int jd1, 360 int jd2) 361 { 362 int isrc; 363 int jdst; 364 365 366 if( is1>is2 || js1>js2 ) 367 { 368 return; 369 } 370 ap::ap_error::make_assertion(is2-is1==jd2-jd1); 371 ap::ap_error::make_assertion(js2-js1==id2-id1); 372 for(isrc=is1; isrc<=is2; isrc++) 373 { 374 jdst = isrc-is1+jd1; 375 ap::vmove(b.getcolumn(jdst, id1, id2), a.getrow(isrc, js1, js2)); 376 } 377 } 378 379 380 template<unsigned int Precision> matrixvectormultiply(const ap::template_2d_array<amp::ampf<Precision>> & a,int i1,int i2,int j1,int j2,bool trans,const ap::template_1d_array<amp::ampf<Precision>> & x,int ix1,int ix2,amp::ampf<Precision> alpha,ap::template_1d_array<amp::ampf<Precision>> & y,int iy1,int iy2,amp::ampf<Precision> beta)381 void matrixvectormultiply(const ap::template_2d_array< amp::ampf<Precision> >& a, 382 int i1, 383 int i2, 384 int j1, 385 int j2, 386 bool trans, 387 const ap::template_1d_array< amp::ampf<Precision> >& x, 388 int ix1, 389 int ix2, 390 amp::ampf<Precision> alpha, 391 ap::template_1d_array< amp::ampf<Precision> >& y, 392 int iy1, 393 int iy2, 394 amp::ampf<Precision> beta) 395 { 396 int i; 397 amp::ampf<Precision> v; 398 399 400 if( !trans ) 401 { 402 403 // 404 // y := alpha*A*x + beta*y; 405 // 406 if( i1>i2 || j1>j2 ) 407 { 408 return; 409 } 410 ap::ap_error::make_assertion(j2-j1==ix2-ix1); 411 ap::ap_error::make_assertion(i2-i1==iy2-iy1); 412 413 // 414 // beta*y 415 // 416 if( beta==0 ) 417 { 418 for(i=iy1; i<=iy2; i++) 419 { 420 y(i) = 0; 421 } 422 } 423 else 424 { 425 ap::vmul(y.getvector(iy1, iy2), beta); 426 } 427 428 // 429 // alpha*A*x 430 // 431 for(i=i1; i<=i2; i++) 432 { 433 v = ap::vdotproduct(a.getrow(i, j1, j2), x.getvector(ix1, ix2)); 434 y(iy1+i-i1) = y(iy1+i-i1)+alpha*v; 435 } 436 } 437 else 438 { 439 440 // 441 // y := alpha*A'*x + beta*y; 442 // 443 if( i1>i2 || j1>j2 ) 444 { 445 return; 446 } 447 ap::ap_error::make_assertion(i2-i1==ix2-ix1); 448 ap::ap_error::make_assertion(j2-j1==iy2-iy1); 449 450 // 451 // beta*y 452 // 453 if( beta==0 ) 454 { 455 for(i=iy1; i<=iy2; i++) 456 { 457 y(i) = 0; 458 } 459 } 460 else 461 { 462 ap::vmul(y.getvector(iy1, iy2), beta); 463 } 464 465 // 466 // alpha*A'*x 467 // 468 for(i=i1; i<=i2; i++) 469 { 470 v = alpha*x(ix1+i-i1); 471 ap::vadd(y.getvector(iy1, iy2), a.getrow(i, j1, j2), v); 472 } 473 } 474 } 475 476 477 template<unsigned int Precision> pythag2(amp::ampf<Precision> x,amp::ampf<Precision> y)478 amp::ampf<Precision> pythag2(amp::ampf<Precision> x, 479 amp::ampf<Precision> y) 480 { 481 amp::ampf<Precision> result; 482 amp::ampf<Precision> w; 483 amp::ampf<Precision> xabs; 484 amp::ampf<Precision> yabs; 485 amp::ampf<Precision> z; 486 487 488 xabs = amp::abs<Precision>(x); 489 yabs = amp::abs<Precision>(y); 490 w = amp::maximum<Precision>(xabs, yabs); 491 z = amp::minimum<Precision>(xabs, yabs); 492 if( z==0 ) 493 { 494 result = w; 495 } 496 else 497 { 498 result = w*amp::sqrt<Precision>(1+amp::sqr<Precision>(z/w)); 499 } 500 return result; 501 } 502 503 504 template<unsigned int Precision> matrixmatrixmultiply(const ap::template_2d_array<amp::ampf<Precision>> & a,int ai1,int ai2,int aj1,int aj2,bool transa,const ap::template_2d_array<amp::ampf<Precision>> & b,int bi1,int bi2,int bj1,int bj2,bool transb,amp::ampf<Precision> alpha,ap::template_2d_array<amp::ampf<Precision>> & c,int ci1,int ci2,int cj1,int cj2,amp::ampf<Precision> beta,ap::template_1d_array<amp::ampf<Precision>> & work)505 void matrixmatrixmultiply(const ap::template_2d_array< amp::ampf<Precision> >& a, 506 int ai1, 507 int ai2, 508 int aj1, 509 int aj2, 510 bool transa, 511 const ap::template_2d_array< amp::ampf<Precision> >& b, 512 int bi1, 513 int bi2, 514 int bj1, 515 int bj2, 516 bool transb, 517 amp::ampf<Precision> alpha, 518 ap::template_2d_array< amp::ampf<Precision> >& c, 519 int ci1, 520 int ci2, 521 int cj1, 522 int cj2, 523 amp::ampf<Precision> beta, 524 ap::template_1d_array< amp::ampf<Precision> >& work) 525 { 526 int arows; 527 int acols; 528 int brows; 529 int bcols; 530 int crows; 531 int ccols; 532 int i; 533 int j; 534 int k; 535 int l; 536 int r; 537 amp::ampf<Precision> v; 538 539 540 541 // 542 // Setup 543 // 544 if( !transa ) 545 { 546 arows = ai2-ai1+1; 547 acols = aj2-aj1+1; 548 } 549 else 550 { 551 arows = aj2-aj1+1; 552 acols = ai2-ai1+1; 553 } 554 if( !transb ) 555 { 556 brows = bi2-bi1+1; 557 bcols = bj2-bj1+1; 558 } 559 else 560 { 561 brows = bj2-bj1+1; 562 bcols = bi2-bi1+1; 563 } 564 ap::ap_error::make_assertion(acols==brows); 565 if( arows<=0 || acols<=0 || brows<=0 || bcols<=0 ) 566 { 567 return; 568 } 569 crows = arows; 570 ccols = bcols; 571 572 // 573 // Test WORK 574 // 575 i = ap::maxint(arows, acols); 576 i = ap::maxint(brows, i); 577 i = ap::maxint(i, bcols); 578 work(1) = 0; 579 work(i) = 0; 580 581 // 582 // Prepare C 583 // 584 if( beta==0 ) 585 { 586 for(i=ci1; i<=ci2; i++) 587 { 588 for(j=cj1; j<=cj2; j++) 589 { 590 c(i,j) = 0; 591 } 592 } 593 } 594 else 595 { 596 for(i=ci1; i<=ci2; i++) 597 { 598 ap::vmul(c.getrow(i, cj1, cj2), beta); 599 } 600 } 601 602 // 603 // A*B 604 // 605 if( !transa && !transb ) 606 { 607 for(l=ai1; l<=ai2; l++) 608 { 609 for(r=bi1; r<=bi2; r++) 610 { 611 v = alpha*a(l,aj1+r-bi1); 612 k = ci1+l-ai1; 613 ap::vadd(c.getrow(k, cj1, cj2), b.getrow(r, bj1, bj2), v); 614 } 615 } 616 return; 617 } 618 619 // 620 // A*B' 621 // 622 if( !transa && transb ) 623 { 624 if( arows*acols<brows*bcols ) 625 { 626 for(r=bi1; r<=bi2; r++) 627 { 628 for(l=ai1; l<=ai2; l++) 629 { 630 v = ap::vdotproduct(a.getrow(l, aj1, aj2), b.getrow(r, bj1, bj2)); 631 c(ci1+l-ai1,cj1+r-bi1) = c(ci1+l-ai1,cj1+r-bi1)+alpha*v; 632 } 633 } 634 return; 635 } 636 else 637 { 638 for(l=ai1; l<=ai2; l++) 639 { 640 for(r=bi1; r<=bi2; r++) 641 { 642 v = ap::vdotproduct(a.getrow(l, aj1, aj2), b.getrow(r, bj1, bj2)); 643 c(ci1+l-ai1,cj1+r-bi1) = c(ci1+l-ai1,cj1+r-bi1)+alpha*v; 644 } 645 } 646 return; 647 } 648 } 649 650 // 651 // A'*B 652 // 653 if( transa && !transb ) 654 { 655 for(l=aj1; l<=aj2; l++) 656 { 657 for(r=bi1; r<=bi2; r++) 658 { 659 v = alpha*a(ai1+r-bi1,l); 660 k = ci1+l-aj1; 661 ap::vadd(c.getrow(k, cj1, cj2), b.getrow(r, bj1, bj2), v); 662 } 663 } 664 return; 665 } 666 667 // 668 // A'*B' 669 // 670 if( transa && transb ) 671 { 672 if( arows*acols<brows*bcols ) 673 { 674 for(r=bi1; r<=bi2; r++) 675 { 676 for(i=1; i<=crows; i++) 677 { 678 work(i) = amp::ampf<Precision>("0.0"); 679 } 680 for(l=ai1; l<=ai2; l++) 681 { 682 v = alpha*b(r,bj1+l-ai1); 683 k = cj1+r-bi1; 684 ap::vadd(work.getvector(1, crows), a.getrow(l, aj1, aj2), v); 685 } 686 ap::vadd(c.getcolumn(k, ci1, ci2), work.getvector(1, crows)); 687 } 688 return; 689 } 690 else 691 { 692 for(l=aj1; l<=aj2; l++) 693 { 694 k = ai2-ai1+1; 695 ap::vmove(work.getvector(1, k), a.getcolumn(l, ai1, ai2)); 696 for(r=bi1; r<=bi2; r++) 697 { 698 v = ap::vdotproduct(work.getvector(1, k), b.getrow(r, bj1, bj2)); 699 c(ci1+l-aj1,cj1+r-bi1) = c(ci1+l-aj1,cj1+r-bi1)+alpha*v; 700 } 701 } 702 return; 703 } 704 } 705 } 706 } // namespace 707 708 #endif 709