1 /* ***** BEGIN LICENSE BLOCK ***** 2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 3 * 4 * The contents of this file are subject to the Mozilla Public License Version 5 * 1.1 (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * http://www.mozilla.org/MPL/ 8 * 9 * Software distributed under the License is distributed on an "AS IS" basis, 10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 11 * for the specific language governing rights and limitations under the 12 * License. 13 * 14 * The Original Code is JTransforms. 15 * 16 * The Initial Developer of the Original Code is 17 * Piotr Wendykier, Emory University. 18 * Portions created by the Initial Developer are Copyright (C) 2007-2009 19 * the Initial Developer. All Rights Reserved. 20 * 21 * Alternatively, the contents of this file may be used under the terms of 22 * either the GNU General Public License Version 2 or later (the "GPL"), or 23 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 24 * in which case the provisions of the GPL or the LGPL are applicable instead 25 * of those above. If you wish to allow use of your version of this file only 26 * under the terms of either the GPL or the LGPL, and not to allow others to 27 * use your version of this file under the terms of the MPL, indicate your 28 * decision by deleting the provisions above and replace them with the notice 29 * and other provisions required by the GPL or the LGPL. If you do not delete 30 * the provisions above, a recipient may use your version of this file under 31 * the terms of any one of the MPL, the GPL or the LGPL. 32 * 33 * ***** END LICENSE BLOCK ***** */ 34 35 package edu.emory.mathcs.jtransforms.dst; 36 37 import java.util.concurrent.Future; 38 39 import edu.emory.mathcs.utils.ConcurrencyUtils; 40 41 /** 42 * Computes 3D Discrete Sine Transform (DST) of double precision data. The sizes 43 * of all three dimensions can be arbitrary numbers. This is a parallel 44 * implementation optimized for SMP systems.<br> 45 * <br> 46 * Part of code is derived from General Purpose FFT Package written by Takuya Ooura 47 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html) 48 * 49 * @author Piotr Wendykier (piotr.wendykier@gmail.com) 50 * 51 */ 52 public class DoubleDST_3D { 53 54 private int slices; 55 56 private int rows; 57 58 private int columns; 59 60 private int sliceStride; 61 62 private int rowStride; 63 64 private double[] t; 65 66 private DoubleDST_1D dstSlices, dstRows, dstColumns; 67 68 private int oldNthreads; 69 70 private int nt; 71 72 private boolean isPowerOfTwo = false; 73 74 private boolean useThreads = false; 75 76 /** 77 * Creates new instance of DoubleDST_3D. 78 * 79 * @param slices 80 * number of slices 81 * @param rows 82 * number of rows 83 * @param columns 84 * number of columns 85 */ DoubleDST_3D(int slices, int rows, int columns)86 public DoubleDST_3D(int slices, int rows, int columns) { 87 if (slices <= 1 || rows <= 1 || columns <= 1) { 88 throw new IllegalArgumentException("slices, rows and columns must be greater than 1"); 89 } 90 this.slices = slices; 91 this.rows = rows; 92 this.columns = columns; 93 this.sliceStride = rows * columns; 94 this.rowStride = columns; 95 if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) { 96 this.useThreads = true; 97 } 98 if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) { 99 isPowerOfTwo = true; 100 oldNthreads = ConcurrencyUtils.getNumberOfThreads(); 101 nt = slices; 102 if (nt < rows) { 103 nt = rows; 104 } 105 nt *= 4; 106 if (oldNthreads > 1) { 107 nt *= oldNthreads; 108 } 109 if (columns == 2) { 110 nt >>= 1; 111 } 112 t = new double[nt]; 113 } 114 dstSlices = new DoubleDST_1D(slices); 115 if (slices == rows) { 116 dstRows = dstSlices; 117 } else { 118 dstRows = new DoubleDST_1D(rows); 119 } 120 if (slices == columns) { 121 dstColumns = dstSlices; 122 } else if (rows == columns) { 123 dstColumns = dstRows; 124 } else { 125 dstColumns = new DoubleDST_1D(columns); 126 } 127 } 128 129 /** 130 * Computes the 3D forward DST (DST-II) leaving the result in <code>a</code> 131 * . The data is stored in 1D array addressed in slice-major, then 132 * row-major, then column-major, in order of significance, i.e. the element 133 * (i,j,k) of 3D array x[slices][rows][columns] is stored in a[i*sliceStride 134 * + j*rowStride + k], where sliceStride = rows * columns and rowStride = 135 * columns. 136 * 137 * @param a 138 * data to transform 139 * @param scale 140 * if true then scaling is performed 141 */ forward(final double[] a, final boolean scale)142 public void forward(final double[] a, final boolean scale) { 143 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 144 if (isPowerOfTwo) { 145 if (nthreads != oldNthreads) { 146 nt = slices; 147 if (nt < rows) { 148 nt = rows; 149 } 150 nt *= 4; 151 if (nthreads > 1) { 152 nt *= nthreads; 153 } 154 if (columns == 2) { 155 nt >>= 1; 156 } 157 t = new double[nt]; 158 oldNthreads = nthreads; 159 } 160 if ((nthreads > 1) && useThreads) { 161 ddxt3da_subth(-1, a, scale); 162 ddxt3db_subth(-1, a, scale); 163 } else { 164 ddxt3da_sub(-1, a, scale); 165 ddxt3db_sub(-1, a, scale); 166 } 167 } else { 168 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 169 Future<?>[] futures = new Future[nthreads]; 170 int p = slices / nthreads; 171 for (int l = 0; l < nthreads; l++) { 172 final int firstSlice = l * p; 173 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 174 futures[l] = ConcurrencyUtils.submit(new Runnable() { 175 public void run() { 176 for (int s = firstSlice; s < lastSlice; s++) { 177 int idx1 = s * sliceStride; 178 for (int r = 0; r < rows; r++) { 179 dstColumns.forward(a, idx1 + r * rowStride, scale); 180 } 181 } 182 } 183 }); 184 } 185 ConcurrencyUtils.waitForCompletion(futures); 186 187 for (int l = 0; l < nthreads; l++) { 188 final int firstSlice = l * p; 189 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 190 futures[l] = ConcurrencyUtils.submit(new Runnable() { 191 public void run() { 192 double[] temp = new double[rows]; 193 for (int s = firstSlice; s < lastSlice; s++) { 194 int idx1 = s * sliceStride; 195 for (int c = 0; c < columns; c++) { 196 for (int r = 0; r < rows; r++) { 197 int idx3 = idx1 + r * rowStride + c; 198 temp[r] = a[idx3]; 199 } 200 dstRows.forward(temp, scale); 201 for (int r = 0; r < rows; r++) { 202 int idx3 = idx1 + r * rowStride + c; 203 a[idx3] = temp[r]; 204 } 205 } 206 } 207 } 208 }); 209 } 210 ConcurrencyUtils.waitForCompletion(futures); 211 212 p = rows / nthreads; 213 for (int l = 0; l < nthreads; l++) { 214 final int firstRow = l * p; 215 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 216 futures[l] = ConcurrencyUtils.submit(new Runnable() { 217 public void run() { 218 double[] temp = new double[slices]; 219 for (int r = firstRow; r < lastRow; r++) { 220 int idx1 = r * rowStride; 221 for (int c = 0; c < columns; c++) { 222 for (int s = 0; s < slices; s++) { 223 int idx3 = s * sliceStride + idx1 + c; 224 temp[s] = a[idx3]; 225 } 226 dstSlices.forward(temp, scale); 227 for (int s = 0; s < slices; s++) { 228 int idx3 = s * sliceStride + idx1 + c; 229 a[idx3] = temp[s]; 230 } 231 } 232 } 233 } 234 }); 235 } 236 ConcurrencyUtils.waitForCompletion(futures); 237 238 } else { 239 for (int s = 0; s < slices; s++) { 240 int idx1 = s * sliceStride; 241 for (int r = 0; r < rows; r++) { 242 dstColumns.forward(a, idx1 + r * rowStride, scale); 243 } 244 } 245 double[] temp = new double[rows]; 246 for (int s = 0; s < slices; s++) { 247 int idx1 = s * sliceStride; 248 for (int c = 0; c < columns; c++) { 249 for (int r = 0; r < rows; r++) { 250 int idx3 = idx1 + r * rowStride + c; 251 temp[r] = a[idx3]; 252 } 253 dstRows.forward(temp, scale); 254 for (int r = 0; r < rows; r++) { 255 int idx3 = idx1 + r * rowStride + c; 256 a[idx3] = temp[r]; 257 } 258 } 259 } 260 temp = new double[slices]; 261 for (int r = 0; r < rows; r++) { 262 int idx1 = r * rowStride; 263 for (int c = 0; c < columns; c++) { 264 for (int s = 0; s < slices; s++) { 265 int idx3 = s * sliceStride + idx1 + c; 266 temp[s] = a[idx3]; 267 } 268 dstSlices.forward(temp, scale); 269 for (int s = 0; s < slices; s++) { 270 int idx3 = s * sliceStride + idx1 + c; 271 a[idx3] = temp[s]; 272 } 273 } 274 } 275 } 276 } 277 } 278 279 /** 280 * Computes the 3D forward DST (DST-II) leaving the result in <code>a</code> 281 * . The data is stored in 3D array. 282 * 283 * @param a 284 * data to transform 285 * @param scale 286 * if true then scaling is performed 287 */ forward(final double[][][] a, final boolean scale)288 public void forward(final double[][][] a, final boolean scale) { 289 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 290 if (isPowerOfTwo) { 291 if (nthreads != oldNthreads) { 292 nt = slices; 293 if (nt < rows) { 294 nt = rows; 295 } 296 nt *= 4; 297 if (nthreads > 1) { 298 nt *= nthreads; 299 } 300 if (columns == 2) { 301 nt >>= 1; 302 } 303 t = new double[nt]; 304 oldNthreads = nthreads; 305 } 306 if ((nthreads > 1) && useThreads) { 307 ddxt3da_subth(-1, a, scale); 308 ddxt3db_subth(-1, a, scale); 309 } else { 310 ddxt3da_sub(-1, a, scale); 311 ddxt3db_sub(-1, a, scale); 312 } 313 } else { 314 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 315 Future<?>[] futures = new Future[nthreads]; 316 int p = slices / nthreads; 317 for (int l = 0; l < nthreads; l++) { 318 final int firstSlice = l * p; 319 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 320 futures[l] = ConcurrencyUtils.submit(new Runnable() { 321 public void run() { 322 for (int s = firstSlice; s < lastSlice; s++) { 323 for (int r = 0; r < rows; r++) { 324 dstColumns.forward(a[s][r], scale); 325 } 326 } 327 } 328 }); 329 } 330 ConcurrencyUtils.waitForCompletion(futures); 331 332 for (int l = 0; l < nthreads; l++) { 333 final int firstSlice = l * p; 334 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 335 futures[l] = ConcurrencyUtils.submit(new Runnable() { 336 public void run() { 337 double[] temp = new double[rows]; 338 for (int s = firstSlice; s < lastSlice; s++) { 339 for (int c = 0; c < columns; c++) { 340 for (int r = 0; r < rows; r++) { 341 temp[r] = a[s][r][c]; 342 } 343 dstRows.forward(temp, scale); 344 for (int r = 0; r < rows; r++) { 345 a[s][r][c] = temp[r]; 346 } 347 } 348 } 349 } 350 }); 351 } 352 ConcurrencyUtils.waitForCompletion(futures); 353 354 p = rows / nthreads; 355 for (int l = 0; l < nthreads; l++) { 356 final int firstRow = l * p; 357 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 358 futures[l] = ConcurrencyUtils.submit(new Runnable() { 359 public void run() { 360 double[] temp = new double[slices]; 361 for (int r = firstRow; r < lastRow; r++) { 362 for (int c = 0; c < columns; c++) { 363 for (int s = 0; s < slices; s++) { 364 temp[s] = a[s][r][c]; 365 } 366 dstSlices.forward(temp, scale); 367 for (int s = 0; s < slices; s++) { 368 a[s][r][c] = temp[s]; 369 } 370 } 371 } 372 } 373 }); 374 } 375 ConcurrencyUtils.waitForCompletion(futures); 376 377 } else { 378 for (int s = 0; s < slices; s++) { 379 for (int r = 0; r < rows; r++) { 380 dstColumns.forward(a[s][r], scale); 381 } 382 } 383 double[] temp = new double[rows]; 384 for (int s = 0; s < slices; s++) { 385 for (int c = 0; c < columns; c++) { 386 for (int r = 0; r < rows; r++) { 387 temp[r] = a[s][r][c]; 388 } 389 dstRows.forward(temp, scale); 390 for (int r = 0; r < rows; r++) { 391 a[s][r][c] = temp[r]; 392 } 393 } 394 } 395 temp = new double[slices]; 396 for (int r = 0; r < rows; r++) { 397 for (int c = 0; c < columns; c++) { 398 for (int s = 0; s < slices; s++) { 399 temp[s] = a[s][r][c]; 400 } 401 dstSlices.forward(temp, scale); 402 for (int s = 0; s < slices; s++) { 403 a[s][r][c] = temp[s]; 404 } 405 } 406 } 407 } 408 } 409 } 410 411 /** 412 * Computes the 3D inverse DST (DST-III) leaving the result in 413 * <code>a</code>. The data is stored in 1D array addressed in slice-major, 414 * then row-major, then column-major, in order of significance, i.e. the 415 * element (i,j,k) of 3D array x[slices][rows][columns] is stored in 416 * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * columns 417 * and rowStride = columns. 418 * 419 * @param a 420 * data to transform 421 * @param scale 422 * if true then scaling is performed 423 */ inverse(final double[] a, final boolean scale)424 public void inverse(final double[] a, final boolean scale) { 425 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 426 if (isPowerOfTwo) { 427 if (nthreads != oldNthreads) { 428 nt = slices; 429 if (nt < rows) { 430 nt = rows; 431 } 432 nt *= 4; 433 if (nthreads > 1) { 434 nt *= nthreads; 435 } 436 if (columns == 2) { 437 nt >>= 1; 438 } 439 t = new double[nt]; 440 oldNthreads = nthreads; 441 } 442 if ((nthreads > 1) && useThreads) { 443 ddxt3da_subth(1, a, scale); 444 ddxt3db_subth(1, a, scale); 445 } else { 446 ddxt3da_sub(1, a, scale); 447 ddxt3db_sub(1, a, scale); 448 } 449 } else { 450 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 451 Future<?>[] futures = new Future[nthreads]; 452 int p = slices / nthreads; 453 for (int l = 0; l < nthreads; l++) { 454 final int firstSlice = l * p; 455 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 456 futures[l] = ConcurrencyUtils.submit(new Runnable() { 457 public void run() { 458 for (int s = firstSlice; s < lastSlice; s++) { 459 int idx1 = s * sliceStride; 460 for (int r = 0; r < rows; r++) { 461 dstColumns.inverse(a, idx1 + r * rowStride, scale); 462 } 463 } 464 } 465 }); 466 } 467 ConcurrencyUtils.waitForCompletion(futures); 468 469 for (int l = 0; l < nthreads; l++) { 470 final int firstSlice = l * p; 471 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 472 futures[l] = ConcurrencyUtils.submit(new Runnable() { 473 public void run() { 474 double[] temp = new double[rows]; 475 for (int s = firstSlice; s < lastSlice; s++) { 476 int idx1 = s * sliceStride; 477 for (int c = 0; c < columns; c++) { 478 for (int r = 0; r < rows; r++) { 479 int idx3 = idx1 + r * rowStride + c; 480 temp[r] = a[idx3]; 481 } 482 dstRows.inverse(temp, scale); 483 for (int r = 0; r < rows; r++) { 484 int idx3 = idx1 + r * rowStride + c; 485 a[idx3] = temp[r]; 486 } 487 } 488 } 489 } 490 }); 491 } 492 ConcurrencyUtils.waitForCompletion(futures); 493 494 p = rows / nthreads; 495 for (int l = 0; l < nthreads; l++) { 496 final int firstRow = l * p; 497 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 498 futures[l] = ConcurrencyUtils.submit(new Runnable() { 499 public void run() { 500 double[] temp = new double[slices]; 501 for (int r = firstRow; r < lastRow; r++) { 502 int idx1 = r * rowStride; 503 for (int c = 0; c < columns; c++) { 504 for (int s = 0; s < slices; s++) { 505 int idx3 = s * sliceStride + idx1 + c; 506 temp[s] = a[idx3]; 507 } 508 dstSlices.inverse(temp, scale); 509 for (int s = 0; s < slices; s++) { 510 int idx3 = s * sliceStride + idx1 + c; 511 a[idx3] = temp[s]; 512 } 513 } 514 } 515 } 516 }); 517 } 518 ConcurrencyUtils.waitForCompletion(futures); 519 520 } else { 521 for (int s = 0; s < slices; s++) { 522 int idx1 = s * sliceStride; 523 for (int r = 0; r < rows; r++) { 524 dstColumns.inverse(a, idx1 + r * rowStride, scale); 525 } 526 } 527 double[] temp = new double[rows]; 528 for (int s = 0; s < slices; s++) { 529 int idx1 = s * sliceStride; 530 for (int c = 0; c < columns; c++) { 531 for (int r = 0; r < rows; r++) { 532 int idx3 = idx1 + r * rowStride + c; 533 temp[r] = a[idx3]; 534 } 535 dstRows.inverse(temp, scale); 536 for (int r = 0; r < rows; r++) { 537 int idx3 = idx1 + r * rowStride + c; 538 a[idx3] = temp[r]; 539 } 540 } 541 } 542 temp = new double[slices]; 543 for (int r = 0; r < rows; r++) { 544 int idx1 = r * rowStride; 545 for (int c = 0; c < columns; c++) { 546 for (int s = 0; s < slices; s++) { 547 int idx3 = s * sliceStride + idx1 + c; 548 temp[s] = a[idx3]; 549 } 550 dstSlices.inverse(temp, scale); 551 for (int s = 0; s < slices; s++) { 552 int idx3 = s * sliceStride + idx1 + c; 553 a[idx3] = temp[s]; 554 } 555 } 556 } 557 } 558 } 559 560 } 561 562 /** 563 * Computes the 3D inverse DST (DST-III) leaving the result in 564 * <code>a</code>. The data is stored in 3D array. 565 * 566 * @param a 567 * data to transform 568 * @param scale 569 * if true then scaling is performed 570 */ inverse(final double[][][] a, final boolean scale)571 public void inverse(final double[][][] a, final boolean scale) { 572 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 573 if (isPowerOfTwo) { 574 if (nthreads != oldNthreads) { 575 nt = slices; 576 if (nt < rows) { 577 nt = rows; 578 } 579 nt *= 4; 580 if (nthreads > 1) { 581 nt *= nthreads; 582 } 583 if (columns == 2) { 584 nt >>= 1; 585 } 586 t = new double[nt]; 587 oldNthreads = nthreads; 588 } 589 if ((nthreads > 1) && useThreads) { 590 ddxt3da_subth(1, a, scale); 591 ddxt3db_subth(1, a, scale); 592 } else { 593 ddxt3da_sub(1, a, scale); 594 ddxt3db_sub(1, a, scale); 595 } 596 } else { 597 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 598 Future<?>[] futures = new Future[nthreads]; 599 int p = slices / nthreads; 600 for (int l = 0; l < nthreads; l++) { 601 final int firstSlice = l * p; 602 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 603 futures[l] = ConcurrencyUtils.submit(new Runnable() { 604 public void run() { 605 for (int s = firstSlice; s < lastSlice; s++) { 606 for (int r = 0; r < rows; r++) { 607 dstColumns.inverse(a[s][r], scale); 608 } 609 } 610 } 611 }); 612 } 613 ConcurrencyUtils.waitForCompletion(futures); 614 615 for (int l = 0; l < nthreads; l++) { 616 final int firstSlice = l * p; 617 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 618 futures[l] = ConcurrencyUtils.submit(new Runnable() { 619 public void run() { 620 double[] temp = new double[rows]; 621 for (int s = firstSlice; s < lastSlice; s++) { 622 for (int c = 0; c < columns; c++) { 623 for (int r = 0; r < rows; r++) { 624 temp[r] = a[s][r][c]; 625 } 626 dstRows.inverse(temp, scale); 627 for (int r = 0; r < rows; r++) { 628 a[s][r][c] = temp[r]; 629 } 630 } 631 } 632 } 633 }); 634 } 635 ConcurrencyUtils.waitForCompletion(futures); 636 637 p = rows / nthreads; 638 for (int l = 0; l < nthreads; l++) { 639 final int firstRow = l * p; 640 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 641 futures[l] = ConcurrencyUtils.submit(new Runnable() { 642 public void run() { 643 double[] temp = new double[slices]; 644 for (int r = firstRow; r < lastRow; r++) { 645 for (int c = 0; c < columns; c++) { 646 for (int s = 0; s < slices; s++) { 647 temp[s] = a[s][r][c]; 648 } 649 dstSlices.inverse(temp, scale); 650 for (int s = 0; s < slices; s++) { 651 a[s][r][c] = temp[s]; 652 } 653 } 654 } 655 } 656 }); 657 } 658 ConcurrencyUtils.waitForCompletion(futures); 659 660 } else { 661 for (int s = 0; s < slices; s++) { 662 for (int r = 0; r < rows; r++) { 663 dstColumns.inverse(a[s][r], scale); 664 } 665 } 666 double[] temp = new double[rows]; 667 for (int s = 0; s < slices; s++) { 668 for (int c = 0; c < columns; c++) { 669 for (int r = 0; r < rows; r++) { 670 temp[r] = a[s][r][c]; 671 } 672 dstRows.inverse(temp, scale); 673 for (int r = 0; r < rows; r++) { 674 a[s][r][c] = temp[r]; 675 } 676 } 677 } 678 temp = new double[slices]; 679 for (int r = 0; r < rows; r++) { 680 for (int c = 0; c < columns; c++) { 681 for (int s = 0; s < slices; s++) { 682 temp[s] = a[s][r][c]; 683 } 684 dstSlices.inverse(temp, scale); 685 for (int s = 0; s < slices; s++) { 686 a[s][r][c] = temp[s]; 687 } 688 } 689 } 690 } 691 } 692 } 693 ddxt3da_sub(int isgn, double[] a, boolean scale)694 private void ddxt3da_sub(int isgn, double[] a, boolean scale) { 695 int idx0, idx1, idx2; 696 697 if (isgn == -1) { 698 for (int s = 0; s < slices; s++) { 699 idx0 = s * sliceStride; 700 for (int r = 0; r < rows; r++) { 701 dstColumns.forward(a, idx0 + r * rowStride, scale); 702 } 703 if (columns > 2) { 704 for (int c = 0; c < columns; c += 4) { 705 for (int r = 0; r < rows; r++) { 706 idx1 = idx0 + r * rowStride + c; 707 idx2 = rows + r; 708 t[r] = a[idx1]; 709 t[idx2] = a[idx1 + 1]; 710 t[idx2 + rows] = a[idx1 + 2]; 711 t[idx2 + 2 * rows] = a[idx1 + 3]; 712 } 713 dstRows.forward(t, 0, scale); 714 dstRows.forward(t, rows, scale); 715 dstRows.forward(t, 2 * rows, scale); 716 dstRows.forward(t, 3 * rows, scale); 717 for (int r = 0; r < rows; r++) { 718 idx1 = idx0 + r * rowStride + c; 719 idx2 = rows + r; 720 a[idx1] = t[r]; 721 a[idx1 + 1] = t[idx2]; 722 a[idx1 + 2] = t[idx2 + rows]; 723 a[idx1 + 3] = t[idx2 + 2 * rows]; 724 } 725 } 726 } else if (columns == 2) { 727 for (int r = 0; r < rows; r++) { 728 idx1 = idx0 + r * rowStride; 729 t[r] = a[idx1]; 730 t[rows + r] = a[idx1 + 1]; 731 } 732 dstRows.forward(t, 0, scale); 733 dstRows.forward(t, rows, scale); 734 for (int r = 0; r < rows; r++) { 735 idx1 = idx0 + r * rowStride; 736 a[idx1] = t[r]; 737 a[idx1 + 1] = t[rows + r]; 738 } 739 } 740 } 741 } else { 742 for (int s = 0; s < slices; s++) { 743 idx0 = s * sliceStride; 744 for (int r = 0; r < rows; r++) { 745 dstColumns.inverse(a, idx0 + r * rowStride, scale); 746 } 747 if (columns > 2) { 748 for (int c = 0; c < columns; c += 4) { 749 for (int r = 0; r < rows; r++) { 750 idx1 = idx0 + r * rowStride + c; 751 idx2 = rows + r; 752 t[r] = a[idx1]; 753 t[idx2] = a[idx1 + 1]; 754 t[idx2 + rows] = a[idx1 + 2]; 755 t[idx2 + 2 * rows] = a[idx1 + 3]; 756 } 757 dstRows.inverse(t, 0, scale); 758 dstRows.inverse(t, rows, scale); 759 dstRows.inverse(t, 2 * rows, scale); 760 dstRows.inverse(t, 3 * rows, scale); 761 for (int r = 0; r < rows; r++) { 762 idx1 = idx0 + r * rowStride + c; 763 idx2 = rows + r; 764 a[idx1] = t[r]; 765 a[idx1 + 1] = t[idx2]; 766 a[idx1 + 2] = t[idx2 + rows]; 767 a[idx1 + 3] = t[idx2 + 2 * rows]; 768 } 769 } 770 } else if (columns == 2) { 771 for (int r = 0; r < rows; r++) { 772 idx1 = idx0 + r * rowStride; 773 t[r] = a[idx1]; 774 t[rows + r] = a[idx1 + 1]; 775 } 776 dstRows.inverse(t, 0, scale); 777 dstRows.inverse(t, rows, scale); 778 for (int r = 0; r < rows; r++) { 779 idx1 = idx0 + r * rowStride; 780 a[idx1] = t[r]; 781 a[idx1 + 1] = t[rows + r]; 782 } 783 } 784 } 785 } 786 } 787 ddxt3da_sub(int isgn, double[][][] a, boolean scale)788 private void ddxt3da_sub(int isgn, double[][][] a, boolean scale) { 789 int idx2; 790 791 if (isgn == -1) { 792 for (int s = 0; s < slices; s++) { 793 for (int r = 0; r < rows; r++) { 794 dstColumns.forward(a[s][r], scale); 795 } 796 if (columns > 2) { 797 for (int c = 0; c < columns; c += 4) { 798 for (int r = 0; r < rows; r++) { 799 idx2 = rows + r; 800 t[r] = a[s][r][c]; 801 t[idx2] = a[s][r][c + 1]; 802 t[idx2 + rows] = a[s][r][c + 2]; 803 t[idx2 + 2 * rows] = a[s][r][c + 3]; 804 } 805 dstRows.forward(t, 0, scale); 806 dstRows.forward(t, rows, scale); 807 dstRows.forward(t, 2 * rows, scale); 808 dstRows.forward(t, 3 * rows, scale); 809 for (int r = 0; r < rows; r++) { 810 idx2 = rows + r; 811 a[s][r][c] = t[r]; 812 a[s][r][c + 1] = t[idx2]; 813 a[s][r][c + 2] = t[idx2 + rows]; 814 a[s][r][c + 3] = t[idx2 + 2 * rows]; 815 } 816 } 817 } else if (columns == 2) { 818 for (int r = 0; r < rows; r++) { 819 t[r] = a[s][r][0]; 820 t[rows + r] = a[s][r][1]; 821 } 822 dstRows.forward(t, 0, scale); 823 dstRows.forward(t, rows, scale); 824 for (int r = 0; r < rows; r++) { 825 a[s][r][0] = t[r]; 826 a[s][r][1] = t[rows + r]; 827 } 828 } 829 } 830 } else { 831 for (int s = 0; s < slices; s++) { 832 for (int r = 0; r < rows; r++) { 833 dstColumns.inverse(a[s][r], scale); 834 } 835 if (columns > 2) { 836 for (int c = 0; c < columns; c += 4) { 837 for (int r = 0; r < rows; r++) { 838 idx2 = rows + r; 839 t[r] = a[s][r][c]; 840 t[idx2] = a[s][r][c + 1]; 841 t[idx2 + rows] = a[s][r][c + 2]; 842 t[idx2 + 2 * rows] = a[s][r][c + 3]; 843 } 844 dstRows.inverse(t, 0, scale); 845 dstRows.inverse(t, rows, scale); 846 dstRows.inverse(t, 2 * rows, scale); 847 dstRows.inverse(t, 3 * rows, scale); 848 for (int r = 0; r < rows; r++) { 849 idx2 = rows + r; 850 a[s][r][c] = t[r]; 851 a[s][r][c + 1] = t[idx2]; 852 a[s][r][c + 2] = t[idx2 + rows]; 853 a[s][r][c + 3] = t[idx2 + 2 * rows]; 854 } 855 } 856 } else if (columns == 2) { 857 for (int r = 0; r < rows; r++) { 858 t[r] = a[s][r][0]; 859 t[rows + r] = a[s][r][1]; 860 } 861 dstRows.inverse(t, 0, scale); 862 dstRows.inverse(t, rows, scale); 863 for (int r = 0; r < rows; r++) { 864 a[s][r][0] = t[r]; 865 a[s][r][1] = t[rows + r]; 866 } 867 } 868 } 869 } 870 } 871 ddxt3db_sub(int isgn, double[] a, boolean scale)872 private void ddxt3db_sub(int isgn, double[] a, boolean scale) { 873 int idx0, idx1, idx2; 874 875 if (isgn == -1) { 876 if (columns > 2) { 877 for (int r = 0; r < rows; r++) { 878 idx0 = r * rowStride; 879 for (int c = 0; c < columns; c += 4) { 880 for (int s = 0; s < slices; s++) { 881 idx1 = s * sliceStride + idx0 + c; 882 idx2 = slices + s; 883 t[s] = a[idx1]; 884 t[idx2] = a[idx1 + 1]; 885 t[idx2 + slices] = a[idx1 + 2]; 886 t[idx2 + 2 * slices] = a[idx1 + 3]; 887 } 888 dstSlices.forward(t, 0, scale); 889 dstSlices.forward(t, slices, scale); 890 dstSlices.forward(t, 2 * slices, scale); 891 dstSlices.forward(t, 3 * slices, scale); 892 for (int s = 0; s < slices; s++) { 893 idx1 = s * sliceStride + idx0 + c; 894 idx2 = slices + s; 895 a[idx1] = t[s]; 896 a[idx1 + 1] = t[idx2]; 897 a[idx1 + 2] = t[idx2 + slices]; 898 a[idx1 + 3] = t[idx2 + 2 * slices]; 899 } 900 } 901 } 902 } else if (columns == 2) { 903 for (int r = 0; r < rows; r++) { 904 idx0 = r * rowStride; 905 for (int s = 0; s < slices; s++) { 906 idx1 = s * sliceStride + idx0; 907 t[s] = a[idx1]; 908 t[slices + s] = a[idx1 + 1]; 909 } 910 dstSlices.forward(t, 0, scale); 911 dstSlices.forward(t, slices, scale); 912 for (int s = 0; s < slices; s++) { 913 idx1 = s * sliceStride + idx0; 914 a[idx1] = t[s]; 915 a[idx1 + 1] = t[slices + s]; 916 } 917 } 918 } 919 } else { 920 if (columns > 2) { 921 for (int r = 0; r < rows; r++) { 922 idx0 = r * rowStride; 923 for (int c = 0; c < columns; c += 4) { 924 for (int s = 0; s < slices; s++) { 925 idx1 = s * sliceStride + idx0 + c; 926 idx2 = slices + s; 927 t[s] = a[idx1]; 928 t[idx2] = a[idx1 + 1]; 929 t[idx2 + slices] = a[idx1 + 2]; 930 t[idx2 + 2 * slices] = a[idx1 + 3]; 931 } 932 dstSlices.inverse(t, 0, scale); 933 dstSlices.inverse(t, slices, scale); 934 dstSlices.inverse(t, 2 * slices, scale); 935 dstSlices.inverse(t, 3 * slices, scale); 936 937 for (int s = 0; s < slices; s++) { 938 idx1 = s * sliceStride + idx0 + c; 939 idx2 = slices + s; 940 a[idx1] = t[s]; 941 a[idx1 + 1] = t[idx2]; 942 a[idx1 + 2] = t[idx2 + slices]; 943 a[idx1 + 3] = t[idx2 + 2 * slices]; 944 } 945 } 946 } 947 } else if (columns == 2) { 948 for (int r = 0; r < rows; r++) { 949 idx0 = r * rowStride; 950 for (int s = 0; s < slices; s++) { 951 idx1 = s * sliceStride + idx0; 952 t[s] = a[idx1]; 953 t[slices + s] = a[idx1 + 1]; 954 } 955 dstSlices.inverse(t, 0, scale); 956 dstSlices.inverse(t, slices, scale); 957 for (int s = 0; s < slices; s++) { 958 idx1 = s * sliceStride + idx0; 959 a[idx1] = t[s]; 960 a[idx1 + 1] = t[slices + s]; 961 } 962 } 963 } 964 } 965 } 966 ddxt3db_sub(int isgn, double[][][] a, boolean scale)967 private void ddxt3db_sub(int isgn, double[][][] a, boolean scale) { 968 int idx2; 969 970 if (isgn == -1) { 971 if (columns > 2) { 972 for (int r = 0; r < rows; r++) { 973 for (int c = 0; c < columns; c += 4) { 974 for (int s = 0; s < slices; s++) { 975 idx2 = slices + s; 976 t[s] = a[s][r][c]; 977 t[idx2] = a[s][r][c + 1]; 978 t[idx2 + slices] = a[s][r][c + 2]; 979 t[idx2 + 2 * slices] = a[s][r][c + 3]; 980 } 981 dstSlices.forward(t, 0, scale); 982 dstSlices.forward(t, slices, scale); 983 dstSlices.forward(t, 2 * slices, scale); 984 dstSlices.forward(t, 3 * slices, scale); 985 for (int s = 0; s < slices; s++) { 986 idx2 = slices + s; 987 a[s][r][c] = t[s]; 988 a[s][r][c + 1] = t[idx2]; 989 a[s][r][c + 2] = t[idx2 + slices]; 990 a[s][r][c + 3] = t[idx2 + 2 * slices]; 991 } 992 } 993 } 994 } else if (columns == 2) { 995 for (int r = 0; r < rows; r++) { 996 for (int s = 0; s < slices; s++) { 997 t[s] = a[s][r][0]; 998 t[slices + s] = a[s][r][1]; 999 } 1000 dstSlices.forward(t, 0, scale); 1001 dstSlices.forward(t, slices, scale); 1002 for (int s = 0; s < slices; s++) { 1003 a[s][r][0] = t[s]; 1004 a[s][r][1] = t[slices + s]; 1005 } 1006 } 1007 } 1008 } else { 1009 if (columns > 2) { 1010 for (int r = 0; r < rows; r++) { 1011 for (int c = 0; c < columns; c += 4) { 1012 for (int s = 0; s < slices; s++) { 1013 idx2 = slices + s; 1014 t[s] = a[s][r][c]; 1015 t[idx2] = a[s][r][c + 1]; 1016 t[idx2 + slices] = a[s][r][c + 2]; 1017 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1018 } 1019 dstSlices.inverse(t, 0, scale); 1020 dstSlices.inverse(t, slices, scale); 1021 dstSlices.inverse(t, 2 * slices, scale); 1022 dstSlices.inverse(t, 3 * slices, scale); 1023 1024 for (int s = 0; s < slices; s++) { 1025 idx2 = slices + s; 1026 a[s][r][c] = t[s]; 1027 a[s][r][c + 1] = t[idx2]; 1028 a[s][r][c + 2] = t[idx2 + slices]; 1029 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1030 } 1031 } 1032 } 1033 } else if (columns == 2) { 1034 for (int r = 0; r < rows; r++) { 1035 for (int s = 0; s < slices; s++) { 1036 t[s] = a[s][r][0]; 1037 t[slices + s] = a[s][r][1]; 1038 } 1039 dstSlices.inverse(t, 0, scale); 1040 dstSlices.inverse(t, slices, scale); 1041 for (int s = 0; s < slices; s++) { 1042 a[s][r][0] = t[s]; 1043 a[s][r][1] = t[slices + s]; 1044 } 1045 } 1046 } 1047 } 1048 } 1049 ddxt3da_subth(final int isgn, final double[] a, final boolean scale)1050 private void ddxt3da_subth(final int isgn, final double[] a, final boolean scale) { 1051 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads(); 1052 1053 int nt = 4 * rows; 1054 if (columns == 2) { 1055 nt >>= 1; 1056 } 1057 Future<?>[] futures = new Future[nthreads]; 1058 1059 for (int i = 0; i < nthreads; i++) { 1060 final int n0 = i; 1061 final int startt = nt * i; 1062 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1063 1064 public void run() { 1065 int idx0, idx1, idx2; 1066 if (isgn == -1) { 1067 for (int s = n0; s < slices; s += nthreads) { 1068 idx0 = s * sliceStride; 1069 for (int r = 0; r < rows; r++) { 1070 dstColumns.forward(a, idx0 + r * rowStride, scale); 1071 } 1072 if (columns > 2) { 1073 for (int c = 0; c < columns; c += 4) { 1074 for (int r = 0; r < rows; r++) { 1075 idx1 = idx0 + r * rowStride + c; 1076 idx2 = startt + rows + r; 1077 t[startt + r] = a[idx1]; 1078 t[idx2] = a[idx1 + 1]; 1079 t[idx2 + rows] = a[idx1 + 2]; 1080 t[idx2 + 2 * rows] = a[idx1 + 3]; 1081 } 1082 dstRows.forward(t, startt, scale); 1083 dstRows.forward(t, startt + rows, scale); 1084 dstRows.forward(t, startt + 2 * rows, scale); 1085 dstRows.forward(t, startt + 3 * rows, scale); 1086 for (int r = 0; r < rows; r++) { 1087 idx1 = idx0 + r * rowStride + c; 1088 idx2 = startt + rows + r; 1089 a[idx1] = t[startt + r]; 1090 a[idx1 + 1] = t[idx2]; 1091 a[idx1 + 2] = t[idx2 + rows]; 1092 a[idx1 + 3] = t[idx2 + 2 * rows]; 1093 } 1094 } 1095 } else if (columns == 2) { 1096 for (int r = 0; r < rows; r++) { 1097 idx1 = idx0 + r * rowStride; 1098 t[startt + r] = a[idx1]; 1099 t[startt + rows + r] = a[idx1 + 1]; 1100 } 1101 dstRows.forward(t, startt, scale); 1102 dstRows.forward(t, startt + rows, scale); 1103 for (int r = 0; r < rows; r++) { 1104 idx1 = idx0 + r * rowStride; 1105 a[idx1] = t[startt + r]; 1106 a[idx1 + 1] = t[startt + rows + r]; 1107 } 1108 } 1109 } 1110 } else { 1111 for (int s = n0; s < slices; s += nthreads) { 1112 idx0 = s * sliceStride; 1113 for (int r = 0; r < rows; r++) { 1114 dstColumns.inverse(a, idx0 + r * rowStride, scale); 1115 } 1116 if (columns > 2) { 1117 for (int c = 0; c < columns; c += 4) { 1118 for (int r = 0; r < rows; r++) { 1119 idx1 = idx0 + r * rowStride + c; 1120 idx2 = startt + rows + r; 1121 t[startt + r] = a[idx1]; 1122 t[idx2] = a[idx1 + 1]; 1123 t[idx2 + rows] = a[idx1 + 2]; 1124 t[idx2 + 2 * rows] = a[idx1 + 3]; 1125 } 1126 dstRows.inverse(t, startt, scale); 1127 dstRows.inverse(t, startt + rows, scale); 1128 dstRows.inverse(t, startt + 2 * rows, scale); 1129 dstRows.inverse(t, startt + 3 * rows, scale); 1130 for (int r = 0; r < rows; r++) { 1131 idx1 = idx0 + r * rowStride + c; 1132 idx2 = startt + rows + r; 1133 a[idx1] = t[startt + r]; 1134 a[idx1 + 1] = t[idx2]; 1135 a[idx1 + 2] = t[idx2 + rows]; 1136 a[idx1 + 3] = t[idx2 + 2 * rows]; 1137 } 1138 } 1139 } else if (columns == 2) { 1140 for (int r = 0; r < rows; r++) { 1141 idx1 = idx0 + r * rowStride; 1142 t[startt + r] = a[idx1]; 1143 t[startt + rows + r] = a[idx1 + 1]; 1144 } 1145 dstRows.inverse(t, startt, scale); 1146 dstRows.inverse(t, startt + rows, scale); 1147 for (int r = 0; r < rows; r++) { 1148 idx1 = idx0 + r * rowStride; 1149 a[idx1] = t[startt + r]; 1150 a[idx1 + 1] = t[startt + rows + r]; 1151 } 1152 } 1153 } 1154 } 1155 } 1156 }); 1157 } 1158 ConcurrencyUtils.waitForCompletion(futures); 1159 } 1160 ddxt3da_subth(final int isgn, final double[][][] a, final boolean scale)1161 private void ddxt3da_subth(final int isgn, final double[][][] a, final boolean scale) { 1162 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads(); 1163 int nt = 4 * rows; 1164 if (columns == 2) { 1165 nt >>= 1; 1166 } 1167 Future<?>[] futures = new Future[nthreads]; 1168 1169 for (int i = 0; i < nthreads; i++) { 1170 final int n0 = i; 1171 final int startt = nt * i; 1172 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1173 1174 public void run() { 1175 int idx2; 1176 if (isgn == -1) { 1177 for (int s = n0; s < slices; s += nthreads) { 1178 for (int r = 0; r < rows; r++) { 1179 dstColumns.forward(a[s][r], scale); 1180 } 1181 if (columns > 2) { 1182 for (int c = 0; c < columns; c += 4) { 1183 for (int r = 0; r < rows; r++) { 1184 idx2 = startt + rows + r; 1185 t[startt + r] = a[s][r][c]; 1186 t[idx2] = a[s][r][c + 1]; 1187 t[idx2 + rows] = a[s][r][c + 2]; 1188 t[idx2 + 2 * rows] = a[s][r][c + 3]; 1189 } 1190 dstRows.forward(t, startt, scale); 1191 dstRows.forward(t, startt + rows, scale); 1192 dstRows.forward(t, startt + 2 * rows, scale); 1193 dstRows.forward(t, startt + 3 * rows, scale); 1194 for (int r = 0; r < rows; r++) { 1195 idx2 = startt + rows + r; 1196 a[s][r][c] = t[startt + r]; 1197 a[s][r][c + 1] = t[idx2]; 1198 a[s][r][c + 2] = t[idx2 + rows]; 1199 a[s][r][c + 3] = t[idx2 + 2 * rows]; 1200 } 1201 } 1202 } else if (columns == 2) { 1203 for (int r = 0; r < rows; r++) { 1204 t[startt + r] = a[s][r][0]; 1205 t[startt + rows + r] = a[s][r][1]; 1206 } 1207 dstRows.forward(t, startt, scale); 1208 dstRows.forward(t, startt + rows, scale); 1209 for (int r = 0; r < rows; r++) { 1210 a[s][r][0] = t[startt + r]; 1211 a[s][r][1] = t[startt + rows + r]; 1212 } 1213 } 1214 } 1215 } else { 1216 for (int s = n0; s < slices; s += nthreads) { 1217 for (int r = 0; r < rows; r++) { 1218 dstColumns.inverse(a[s][r], scale); 1219 } 1220 if (columns > 2) { 1221 for (int c = 0; c < columns; c += 4) { 1222 for (int r = 0; r < rows; r++) { 1223 idx2 = startt + rows + r; 1224 t[startt + r] = a[s][r][c]; 1225 t[idx2] = a[s][r][c + 1]; 1226 t[idx2 + rows] = a[s][r][c + 2]; 1227 t[idx2 + 2 * rows] = a[s][r][c + 3]; 1228 } 1229 dstRows.inverse(t, startt, scale); 1230 dstRows.inverse(t, startt + rows, scale); 1231 dstRows.inverse(t, startt + 2 * rows, scale); 1232 dstRows.inverse(t, startt + 3 * rows, scale); 1233 for (int r = 0; r < rows; r++) { 1234 idx2 = startt + rows + r; 1235 a[s][r][c] = t[startt + r]; 1236 a[s][r][c + 1] = t[idx2]; 1237 a[s][r][c + 2] = t[idx2 + rows]; 1238 a[s][r][c + 3] = t[idx2 + 2 * rows]; 1239 } 1240 } 1241 } else if (columns == 2) { 1242 for (int r = 0; r < rows; r++) { 1243 t[startt + r] = a[s][r][0]; 1244 t[startt + rows + r] = a[s][r][1]; 1245 } 1246 dstRows.inverse(t, startt, scale); 1247 dstRows.inverse(t, startt + rows, scale); 1248 for (int r = 0; r < rows; r++) { 1249 a[s][r][0] = t[startt + r]; 1250 a[s][r][1] = t[startt + rows + r]; 1251 } 1252 } 1253 } 1254 } 1255 } 1256 }); 1257 } 1258 ConcurrencyUtils.waitForCompletion(futures); 1259 } 1260 ddxt3db_subth(final int isgn, final double[] a, final boolean scale)1261 private void ddxt3db_subth(final int isgn, final double[] a, final boolean scale) { 1262 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 1263 int nt = 4 * slices; 1264 if (columns == 2) { 1265 nt >>= 1; 1266 } 1267 Future<?>[] futures = new Future[nthreads]; 1268 1269 for (int i = 0; i < nthreads; i++) { 1270 final int n0 = i; 1271 final int startt = nt * i; 1272 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1273 1274 public void run() { 1275 int idx0, idx1, idx2; 1276 if (isgn == -1) { 1277 if (columns > 2) { 1278 for (int r = n0; r < rows; r += nthreads) { 1279 idx0 = r * rowStride; 1280 for (int c = 0; c < columns; c += 4) { 1281 for (int s = 0; s < slices; s++) { 1282 idx1 = s * sliceStride + idx0 + c; 1283 idx2 = startt + slices + s; 1284 t[startt + s] = a[idx1]; 1285 t[idx2] = a[idx1 + 1]; 1286 t[idx2 + slices] = a[idx1 + 2]; 1287 t[idx2 + 2 * slices] = a[idx1 + 3]; 1288 } 1289 dstSlices.forward(t, startt, scale); 1290 dstSlices.forward(t, startt + slices, scale); 1291 dstSlices.forward(t, startt + 2 * slices, scale); 1292 dstSlices.forward(t, startt + 3 * slices, scale); 1293 for (int s = 0; s < slices; s++) { 1294 idx1 = s * sliceStride + idx0 + c; 1295 idx2 = startt + slices + s; 1296 a[idx1] = t[startt + s]; 1297 a[idx1 + 1] = t[idx2]; 1298 a[idx1 + 2] = t[idx2 + slices]; 1299 a[idx1 + 3] = t[idx2 + 2 * slices]; 1300 } 1301 } 1302 } 1303 } else if (columns == 2) { 1304 for (int r = n0; r < rows; r += nthreads) { 1305 idx0 = r * rowStride; 1306 for (int s = 0; s < slices; s++) { 1307 idx1 = s * sliceStride + idx0; 1308 t[startt + s] = a[idx1]; 1309 t[startt + slices + s] = a[idx1 + 1]; 1310 } 1311 dstSlices.forward(t, startt, scale); 1312 dstSlices.forward(t, startt + slices, scale); 1313 for (int s = 0; s < slices; s++) { 1314 idx1 = s * sliceStride + idx0; 1315 a[idx1] = t[startt + s]; 1316 a[idx1 + 1] = t[startt + slices + s]; 1317 } 1318 } 1319 } 1320 } else { 1321 if (columns > 2) { 1322 for (int r = n0; r < rows; r += nthreads) { 1323 idx0 = r * rowStride; 1324 for (int c = 0; c < columns; c += 4) { 1325 for (int s = 0; s < slices; s++) { 1326 idx1 = s * sliceStride + idx0 + c; 1327 idx2 = startt + slices + s; 1328 t[startt + s] = a[idx1]; 1329 t[idx2] = a[idx1 + 1]; 1330 t[idx2 + slices] = a[idx1 + 2]; 1331 t[idx2 + 2 * slices] = a[idx1 + 3]; 1332 } 1333 dstSlices.inverse(t, startt, scale); 1334 dstSlices.inverse(t, startt + slices, scale); 1335 dstSlices.inverse(t, startt + 2 * slices, scale); 1336 dstSlices.inverse(t, startt + 3 * slices, scale); 1337 for (int s = 0; s < slices; s++) { 1338 idx1 = s * sliceStride + idx0 + c; 1339 idx2 = startt + slices + s; 1340 a[idx1] = t[startt + s]; 1341 a[idx1 + 1] = t[idx2]; 1342 a[idx1 + 2] = t[idx2 + slices]; 1343 a[idx1 + 3] = t[idx2 + 2 * slices]; 1344 } 1345 } 1346 } 1347 } else if (columns == 2) { 1348 for (int r = n0; r < rows; r += nthreads) { 1349 idx0 = r * rowStride; 1350 for (int s = 0; s < slices; s++) { 1351 idx1 = s * sliceStride + idx0; 1352 t[startt + s] = a[idx1]; 1353 t[startt + slices + s] = a[idx1 + 1]; 1354 } 1355 dstSlices.inverse(t, startt, scale); 1356 dstSlices.inverse(t, startt + slices, scale); 1357 1358 for (int s = 0; s < slices; s++) { 1359 idx1 = s * sliceStride + idx0; 1360 a[idx1] = t[startt + s]; 1361 a[idx1 + 1] = t[startt + slices + s]; 1362 } 1363 } 1364 } 1365 } 1366 } 1367 }); 1368 } 1369 ConcurrencyUtils.waitForCompletion(futures); 1370 } 1371 ddxt3db_subth(final int isgn, final double[][][] a, final boolean scale)1372 private void ddxt3db_subth(final int isgn, final double[][][] a, final boolean scale) { 1373 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 1374 int nt = 4 * slices; 1375 if (columns == 2) { 1376 nt >>= 1; 1377 } 1378 Future<?>[] futures = new Future[nthreads]; 1379 1380 for (int i = 0; i < nthreads; i++) { 1381 final int n0 = i; 1382 final int startt = nt * i; 1383 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1384 1385 public void run() { 1386 int idx2; 1387 if (isgn == -1) { 1388 if (columns > 2) { 1389 for (int r = n0; r < rows; r += nthreads) { 1390 for (int c = 0; c < columns; c += 4) { 1391 for (int s = 0; s < slices; s++) { 1392 idx2 = startt + slices + s; 1393 t[startt + s] = a[s][r][c]; 1394 t[idx2] = a[s][r][c + 1]; 1395 t[idx2 + slices] = a[s][r][c + 2]; 1396 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1397 } 1398 dstSlices.forward(t, startt, scale); 1399 dstSlices.forward(t, startt + slices, scale); 1400 dstSlices.forward(t, startt + 2 * slices, scale); 1401 dstSlices.forward(t, startt + 3 * slices, scale); 1402 for (int s = 0; s < slices; s++) { 1403 idx2 = startt + slices + s; 1404 a[s][r][c] = t[startt + s]; 1405 a[s][r][c + 1] = t[idx2]; 1406 a[s][r][c + 2] = t[idx2 + slices]; 1407 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1408 } 1409 } 1410 } 1411 } else if (columns == 2) { 1412 for (int r = n0; r < rows; r += nthreads) { 1413 for (int s = 0; s < slices; s++) { 1414 t[startt + s] = a[s][r][0]; 1415 t[startt + slices + s] = a[s][r][1]; 1416 } 1417 dstSlices.forward(t, startt, scale); 1418 dstSlices.forward(t, startt + slices, scale); 1419 for (int s = 0; s < slices; s++) { 1420 a[s][r][0] = t[startt + s]; 1421 a[s][r][1] = t[startt + slices + s]; 1422 } 1423 } 1424 } 1425 } else { 1426 if (columns > 2) { 1427 for (int r = n0; r < rows; r += nthreads) { 1428 for (int c = 0; c < columns; c += 4) { 1429 for (int s = 0; s < slices; s++) { 1430 idx2 = startt + slices + s; 1431 t[startt + s] = a[s][r][c]; 1432 t[idx2] = a[s][r][c + 1]; 1433 t[idx2 + slices] = a[s][r][c + 2]; 1434 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1435 } 1436 dstSlices.inverse(t, startt, scale); 1437 dstSlices.inverse(t, startt + slices, scale); 1438 dstSlices.inverse(t, startt + 2 * slices, scale); 1439 dstSlices.inverse(t, startt + 3 * slices, scale); 1440 for (int s = 0; s < slices; s++) { 1441 idx2 = startt + slices + s; 1442 a[s][r][c] = t[startt + s]; 1443 a[s][r][c + 1] = t[idx2]; 1444 a[s][r][c + 2] = t[idx2 + slices]; 1445 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1446 } 1447 } 1448 } 1449 } else if (columns == 2) { 1450 for (int r = n0; r < rows; r += nthreads) { 1451 for (int s = 0; s < slices; s++) { 1452 t[startt + s] = a[s][r][0]; 1453 t[startt + slices + s] = a[s][r][1]; 1454 } 1455 dstSlices.inverse(t, startt, scale); 1456 dstSlices.inverse(t, startt + slices, scale); 1457 1458 for (int s = 0; s < slices; s++) { 1459 a[s][r][0] = t[startt + s]; 1460 a[s][r][1] = t[startt + slices + s]; 1461 } 1462 } 1463 } 1464 } 1465 } 1466 }); 1467 } 1468 ConcurrencyUtils.waitForCompletion(futures); 1469 } 1470 } 1471