1 /* ***** BEGIN LICENSE BLOCK ***** 2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 3 * 4 * The contents of this file are subject to the Mozilla Public License Version 5 * 1.1 (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * http://www.mozilla.org/MPL/ 8 * 9 * Software distributed under the License is distributed on an "AS IS" basis, 10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 11 * for the specific language governing rights and limitations under the 12 * License. 13 * 14 * The Original Code is JTransforms. 15 * 16 * The Initial Developer of the Original Code is 17 * Piotr Wendykier, Emory University. 18 * Portions created by the Initial Developer are Copyright (C) 2007-2009 19 * the Initial Developer. All Rights Reserved. 20 * 21 * Alternatively, the contents of this file may be used under the terms of 22 * either the GNU General Public License Version 2 or later (the "GPL"), or 23 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 24 * in which case the provisions of the GPL or the LGPL are applicable instead 25 * of those above. If you wish to allow use of your version of this file only 26 * under the terms of either the GPL or the LGPL, and not to allow others to 27 * use your version of this file under the terms of the MPL, indicate your 28 * decision by deleting the provisions above and replace them with the notice 29 * and other provisions required by the GPL or the LGPL. If you do not delete 30 * the provisions above, a recipient may use your version of this file under 31 * the terms of any one of the MPL, the GPL or the LGPL. 32 * 33 * ***** END LICENSE BLOCK ***** */ 34 35 package edu.emory.mathcs.jtransforms.dct; 36 37 import java.util.concurrent.Future; 38 39 import edu.emory.mathcs.utils.ConcurrencyUtils; 40 41 /** 42 * Computes 3D Discrete Cosine Transform (DCT) of double precision data. The 43 * sizes of all three dimensions can be arbitrary numbers. This is a parallel 44 * implementation of split-radix and mixed-radix algorithms optimized for SMP 45 * systems. <br> 46 * <br> 47 * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura 48 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html) 49 * 50 * @author Piotr Wendykier (piotr.wendykier@gmail.com) 51 * 52 */ 53 public class DoubleDCT_3D { 54 55 private int slices; 56 57 private int rows; 58 59 private int columns; 60 61 private int sliceStride; 62 63 private int rowStride; 64 65 private double[] t; 66 67 private DoubleDCT_1D dctSlices, dctRows, dctColumns; 68 69 private int oldNthreads; 70 71 private int nt; 72 73 private boolean isPowerOfTwo = false; 74 75 private boolean useThreads = false; 76 77 /** 78 * Creates new instance of DoubleDCT_3D. 79 * 80 * @param slices 81 * number of slices 82 * @param rows 83 * number of rows 84 * @param columns 85 * number of columns 86 */ DoubleDCT_3D(int slices, int rows, int columns)87 public DoubleDCT_3D(int slices, int rows, int columns) { 88 if (slices <= 1 || rows <= 1 || columns <= 1) { 89 throw new IllegalArgumentException("slices, rows and columns must be greater than 1"); 90 } 91 this.slices = slices; 92 this.rows = rows; 93 this.columns = columns; 94 this.sliceStride = rows * columns; 95 this.rowStride = columns; 96 if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) { 97 this.useThreads = true; 98 } 99 if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) { 100 isPowerOfTwo = true; 101 102 oldNthreads = ConcurrencyUtils.getNumberOfThreads(); 103 nt = slices; 104 if (nt < rows) { 105 nt = rows; 106 } 107 nt *= 4; 108 if (oldNthreads > 1) { 109 nt *= oldNthreads; 110 } 111 if (columns == 2) { 112 nt >>= 1; 113 } 114 t = new double[nt]; 115 } 116 dctSlices = new DoubleDCT_1D(slices); 117 if (slices == rows) { 118 dctRows = dctSlices; 119 } else { 120 dctRows = new DoubleDCT_1D(rows); 121 } 122 if (slices == columns) { 123 dctColumns = dctSlices; 124 } else if (rows == columns) { 125 dctColumns = dctRows; 126 } else { 127 dctColumns = new DoubleDCT_1D(columns); 128 } 129 } 130 131 /** 132 * Computes the 3D forward DCT (DCT-II) leaving the result in <code>a</code> 133 * . The data is stored in 1D array addressed in slice-major, then 134 * row-major, then column-major, in order of significance, i.e. the element 135 * (i,j,k) of 3D array x[slices][rows][columns] is stored in a[i*sliceStride 136 * + j*rowStride + k], where sliceStride = rows * columns and rowStride = 137 * columns. 138 * 139 * @param a 140 * data to transform 141 * @param scale 142 * if true then scaling is performed 143 */ forward(final double[] a, final boolean scale)144 public void forward(final double[] a, final boolean scale) { 145 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 146 if (isPowerOfTwo) { 147 if (nthreads != oldNthreads) { 148 nt = slices; 149 if (nt < rows) { 150 nt = rows; 151 } 152 nt *= 4; 153 if (nthreads > 1) { 154 nt *= nthreads; 155 } 156 if (columns == 2) { 157 nt >>= 1; 158 } 159 t = new double[nt]; 160 oldNthreads = nthreads; 161 } 162 if ((nthreads > 1) && useThreads) { 163 ddxt3da_subth(-1, a, scale); 164 ddxt3db_subth(-1, a, scale); 165 } else { 166 ddxt3da_sub(-1, a, scale); 167 ddxt3db_sub(-1, a, scale); 168 } 169 } else { 170 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 171 Future<?>[] futures = new Future[nthreads]; 172 int p = slices / nthreads; 173 for (int l = 0; l < nthreads; l++) { 174 final int firstSlice = l * p; 175 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 176 futures[l] = ConcurrencyUtils.submit(new Runnable() { 177 public void run() { 178 for (int s = firstSlice; s < lastSlice; s++) { 179 int idx1 = s * sliceStride; 180 for (int r = 0; r < rows; r++) { 181 dctColumns.forward(a, idx1 + r * rowStride, scale); 182 } 183 } 184 } 185 }); 186 } 187 ConcurrencyUtils.waitForCompletion(futures); 188 189 for (int l = 0; l < nthreads; l++) { 190 final int firstSlice = l * p; 191 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 192 futures[l] = ConcurrencyUtils.submit(new Runnable() { 193 public void run() { 194 double[] temp = new double[rows]; 195 for (int s = firstSlice; s < lastSlice; s++) { 196 int idx1 = s * sliceStride; 197 for (int c = 0; c < columns; c++) { 198 for (int r = 0; r < rows; r++) { 199 int idx3 = idx1 + r * rowStride + c; 200 temp[r] = a[idx3]; 201 } 202 dctRows.forward(temp, scale); 203 for (int r = 0; r < rows; r++) { 204 int idx3 = idx1 + r * rowStride + c; 205 a[idx3] = temp[r]; 206 } 207 } 208 } 209 } 210 }); 211 } 212 ConcurrencyUtils.waitForCompletion(futures); 213 214 p = rows / nthreads; 215 for (int l = 0; l < nthreads; l++) { 216 final int firstRow = l * p; 217 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 218 futures[l] = ConcurrencyUtils.submit(new Runnable() { 219 public void run() { 220 double[] temp = new double[slices]; 221 for (int r = firstRow; r < lastRow; r++) { 222 int idx1 = r * rowStride; 223 for (int c = 0; c < columns; c++) { 224 for (int s = 0; s < slices; s++) { 225 int idx3 = s * sliceStride + idx1 + c; 226 temp[s] = a[idx3]; 227 } 228 dctSlices.forward(temp, scale); 229 for (int s = 0; s < slices; s++) { 230 int idx3 = s * sliceStride + idx1 + c; 231 a[idx3] = temp[s]; 232 } 233 } 234 } 235 } 236 }); 237 } 238 ConcurrencyUtils.waitForCompletion(futures); 239 240 } else { 241 for (int s = 0; s < slices; s++) { 242 int idx1 = s * sliceStride; 243 for (int r = 0; r < rows; r++) { 244 dctColumns.forward(a, idx1 + r * rowStride, scale); 245 } 246 } 247 double[] temp = new double[rows]; 248 for (int s = 0; s < slices; s++) { 249 int idx1 = s * sliceStride; 250 for (int c = 0; c < columns; c++) { 251 for (int r = 0; r < rows; r++) { 252 int idx3 = idx1 + r * rowStride + c; 253 temp[r] = a[idx3]; 254 } 255 dctRows.forward(temp, scale); 256 for (int r = 0; r < rows; r++) { 257 int idx3 = idx1 + r * rowStride + c; 258 a[idx3] = temp[r]; 259 } 260 } 261 } 262 temp = new double[slices]; 263 for (int r = 0; r < rows; r++) { 264 int idx1 = r * rowStride; 265 for (int c = 0; c < columns; c++) { 266 for (int s = 0; s < slices; s++) { 267 int idx3 = s * sliceStride + idx1 + c; 268 temp[s] = a[idx3]; 269 } 270 dctSlices.forward(temp, scale); 271 for (int s = 0; s < slices; s++) { 272 int idx3 = s * sliceStride + idx1 + c; 273 a[idx3] = temp[s]; 274 } 275 } 276 } 277 } 278 } 279 } 280 281 /** 282 * Computes the 3D forward DCT (DCT-II) leaving the result in <code>a</code> 283 * . The data is stored in 3D array 284 * 285 * @param a 286 * data to transform 287 * @param scale 288 * if true then scaling is performed 289 */ forward(final double[][][] a, final boolean scale)290 public void forward(final double[][][] a, final boolean scale) { 291 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 292 if (isPowerOfTwo) { 293 if (nthreads != oldNthreads) { 294 nt = slices; 295 if (nt < rows) { 296 nt = rows; 297 } 298 nt *= 4; 299 if (nthreads > 1) { 300 nt *= nthreads; 301 } 302 if (columns == 2) { 303 nt >>= 1; 304 } 305 t = new double[nt]; 306 oldNthreads = nthreads; 307 } 308 if ((nthreads > 1) && useThreads) { 309 ddxt3da_subth(-1, a, scale); 310 ddxt3db_subth(-1, a, scale); 311 } else { 312 ddxt3da_sub(-1, a, scale); 313 ddxt3db_sub(-1, a, scale); 314 } 315 } else { 316 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 317 Future<?>[] futures = new Future[nthreads]; 318 int p = slices / nthreads; 319 for (int l = 0; l < nthreads; l++) { 320 final int firstSlice = l * p; 321 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 322 futures[l] = ConcurrencyUtils.submit(new Runnable() { 323 public void run() { 324 for (int s = firstSlice; s < lastSlice; s++) { 325 for (int r = 0; r < rows; r++) { 326 dctColumns.forward(a[s][r], scale); 327 } 328 } 329 } 330 }); 331 } 332 ConcurrencyUtils.waitForCompletion(futures); 333 334 for (int l = 0; l < nthreads; l++) { 335 final int firstSlice = l * p; 336 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 337 futures[l] = ConcurrencyUtils.submit(new Runnable() { 338 public void run() { 339 double[] temp = new double[rows]; 340 for (int s = firstSlice; s < lastSlice; s++) { 341 for (int c = 0; c < columns; c++) { 342 for (int r = 0; r < rows; r++) { 343 temp[r] = a[s][r][c]; 344 } 345 dctRows.forward(temp, scale); 346 for (int r = 0; r < rows; r++) { 347 a[s][r][c] = temp[r]; 348 } 349 } 350 } 351 } 352 }); 353 } 354 ConcurrencyUtils.waitForCompletion(futures); 355 356 p = rows / nthreads; 357 for (int l = 0; l < nthreads; l++) { 358 final int firstRow = l * p; 359 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 360 futures[l] = ConcurrencyUtils.submit(new Runnable() { 361 public void run() { 362 double[] temp = new double[slices]; 363 for (int r = firstRow; r < lastRow; r++) { 364 for (int c = 0; c < columns; c++) { 365 for (int s = 0; s < slices; s++) { 366 temp[s] = a[s][r][c]; 367 } 368 dctSlices.forward(temp, scale); 369 for (int s = 0; s < slices; s++) { 370 a[s][r][c] = temp[s]; 371 } 372 } 373 } 374 } 375 }); 376 } 377 ConcurrencyUtils.waitForCompletion(futures); 378 379 } else { 380 for (int s = 0; s < slices; s++) { 381 for (int r = 0; r < rows; r++) { 382 dctColumns.forward(a[s][r], scale); 383 } 384 } 385 double[] temp = new double[rows]; 386 for (int s = 0; s < slices; s++) { 387 for (int c = 0; c < columns; c++) { 388 for (int r = 0; r < rows; r++) { 389 temp[r] = a[s][r][c]; 390 } 391 dctRows.forward(temp, scale); 392 for (int r = 0; r < rows; r++) { 393 a[s][r][c] = temp[r]; 394 } 395 } 396 } 397 temp = new double[slices]; 398 for (int r = 0; r < rows; r++) { 399 for (int c = 0; c < columns; c++) { 400 for (int s = 0; s < slices; s++) { 401 temp[s] = a[s][r][c]; 402 } 403 dctSlices.forward(temp, scale); 404 for (int s = 0; s < slices; s++) { 405 a[s][r][c] = temp[s]; 406 } 407 } 408 } 409 } 410 } 411 } 412 413 /** 414 * Computes the 3D inverse DCT (DCT-III) leaving the result in 415 * <code>a</code>. The data is stored in 1D array addressed in slice-major, 416 * then row-major, then column-major, in order of significance, i.e. the 417 * element (i,j,k) of 3D array x[slices][rows][columns] is stored in 418 * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * columns 419 * and rowStride = columns. 420 * 421 * @param a 422 * data to transform 423 * @param scale 424 * if true then scaling is performed 425 */ inverse(final double[] a, final boolean scale)426 public void inverse(final double[] a, final boolean scale) { 427 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 428 if (isPowerOfTwo) { 429 if (nthreads != oldNthreads) { 430 nt = slices; 431 if (nt < rows) { 432 nt = rows; 433 } 434 nt *= 4; 435 if (nthreads > 1) { 436 nt *= nthreads; 437 } 438 if (columns == 2) { 439 nt >>= 1; 440 } 441 t = new double[nt]; 442 oldNthreads = nthreads; 443 } 444 if ((nthreads > 1) && useThreads) { 445 ddxt3da_subth(1, a, scale); 446 ddxt3db_subth(1, a, scale); 447 } else { 448 ddxt3da_sub(1, a, scale); 449 ddxt3db_sub(1, a, scale); 450 } 451 } else { 452 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 453 Future<?>[] futures = new Future[nthreads]; 454 int p = slices / nthreads; 455 for (int l = 0; l < nthreads; l++) { 456 final int firstSlice = l * p; 457 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 458 futures[l] = ConcurrencyUtils.submit(new Runnable() { 459 public void run() { 460 for (int s = firstSlice; s < lastSlice; s++) { 461 int idx1 = s * sliceStride; 462 for (int r = 0; r < rows; r++) { 463 dctColumns.inverse(a, idx1 + r * rowStride, scale); 464 } 465 } 466 } 467 }); 468 } 469 ConcurrencyUtils.waitForCompletion(futures); 470 471 for (int l = 0; l < nthreads; l++) { 472 final int firstSlice = l * p; 473 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 474 futures[l] = ConcurrencyUtils.submit(new Runnable() { 475 public void run() { 476 double[] temp = new double[rows]; 477 for (int s = firstSlice; s < lastSlice; s++) { 478 int idx1 = s * sliceStride; 479 for (int c = 0; c < columns; c++) { 480 for (int r = 0; r < rows; r++) { 481 int idx3 = idx1 + r * rowStride + c; 482 temp[r] = a[idx3]; 483 } 484 dctRows.inverse(temp, scale); 485 for (int r = 0; r < rows; r++) { 486 int idx3 = idx1 + r * rowStride + c; 487 a[idx3] = temp[r]; 488 } 489 } 490 } 491 } 492 }); 493 } 494 ConcurrencyUtils.waitForCompletion(futures); 495 496 p = rows / nthreads; 497 for (int l = 0; l < nthreads; l++) { 498 final int firstRow = l * p; 499 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 500 futures[l] = ConcurrencyUtils.submit(new Runnable() { 501 public void run() { 502 double[] temp = new double[slices]; 503 for (int r = firstRow; r < lastRow; r++) { 504 int idx1 = r * rowStride; 505 for (int c = 0; c < columns; c++) { 506 for (int s = 0; s < slices; s++) { 507 int idx3 = s * sliceStride + idx1 + c; 508 temp[s] = a[idx3]; 509 } 510 dctSlices.inverse(temp, scale); 511 for (int s = 0; s < slices; s++) { 512 int idx3 = s * sliceStride + idx1 + c; 513 a[idx3] = temp[s]; 514 } 515 } 516 } 517 } 518 }); 519 } 520 ConcurrencyUtils.waitForCompletion(futures); 521 522 } else { 523 for (int s = 0; s < slices; s++) { 524 int idx1 = s * sliceStride; 525 for (int r = 0; r < rows; r++) { 526 dctColumns.inverse(a, idx1 + r * rowStride, scale); 527 } 528 } 529 double[] temp = new double[rows]; 530 for (int s = 0; s < slices; s++) { 531 int idx1 = s * sliceStride; 532 for (int c = 0; c < columns; c++) { 533 for (int r = 0; r < rows; r++) { 534 int idx3 = idx1 + r * rowStride + c; 535 temp[r] = a[idx3]; 536 } 537 dctRows.inverse(temp, scale); 538 for (int r = 0; r < rows; r++) { 539 int idx3 = idx1 + r * rowStride + c; 540 a[idx3] = temp[r]; 541 } 542 } 543 } 544 temp = new double[slices]; 545 for (int r = 0; r < rows; r++) { 546 int idx1 = r * rowStride; 547 for (int c = 0; c < columns; c++) { 548 for (int s = 0; s < slices; s++) { 549 int idx3 = s * sliceStride + idx1 + c; 550 temp[s] = a[idx3]; 551 } 552 dctSlices.inverse(temp, scale); 553 for (int s = 0; s < slices; s++) { 554 int idx3 = s * sliceStride + idx1 + c; 555 a[idx3] = temp[s]; 556 } 557 } 558 } 559 } 560 } 561 } 562 563 /** 564 * Computes the 3D inverse DCT (DCT-III) leaving the result in 565 * <code>a</code>. The data is stored in 3D array. 566 * 567 * @param a 568 * data to transform 569 * @param scale 570 * if true then scaling is performed 571 */ inverse(final double[][][] a, final boolean scale)572 public void inverse(final double[][][] a, final boolean scale) { 573 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 574 if (isPowerOfTwo) { 575 if (nthreads != oldNthreads) { 576 nt = slices; 577 if (nt < rows) { 578 nt = rows; 579 } 580 nt *= 4; 581 if (nthreads > 1) { 582 nt *= nthreads; 583 } 584 if (columns == 2) { 585 nt >>= 1; 586 } 587 t = new double[nt]; 588 oldNthreads = nthreads; 589 } 590 if ((nthreads > 1) && useThreads) { 591 ddxt3da_subth(1, a, scale); 592 ddxt3db_subth(1, a, scale); 593 } else { 594 ddxt3da_sub(1, a, scale); 595 ddxt3db_sub(1, a, scale); 596 } 597 } else { 598 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 599 Future<?>[] futures = new Future[nthreads]; 600 int p = slices / nthreads; 601 for (int l = 0; l < nthreads; l++) { 602 final int firstSlice = l * p; 603 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 604 futures[l] = ConcurrencyUtils.submit(new Runnable() { 605 public void run() { 606 for (int s = firstSlice; s < lastSlice; s++) { 607 for (int r = 0; r < rows; r++) { 608 dctColumns.inverse(a[s][r], scale); 609 } 610 } 611 } 612 }); 613 } 614 ConcurrencyUtils.waitForCompletion(futures); 615 616 for (int l = 0; l < nthreads; l++) { 617 final int firstSlice = l * p; 618 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 619 futures[l] = ConcurrencyUtils.submit(new Runnable() { 620 public void run() { 621 double[] temp = new double[rows]; 622 for (int s = firstSlice; s < lastSlice; s++) { 623 for (int c = 0; c < columns; c++) { 624 for (int r = 0; r < rows; r++) { 625 temp[r] = a[s][r][c]; 626 } 627 dctRows.inverse(temp, scale); 628 for (int r = 0; r < rows; r++) { 629 a[s][r][c] = temp[r]; 630 } 631 } 632 } 633 } 634 }); 635 } 636 ConcurrencyUtils.waitForCompletion(futures); 637 638 p = rows / nthreads; 639 for (int l = 0; l < nthreads; l++) { 640 final int firstRow = l * p; 641 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 642 futures[l] = ConcurrencyUtils.submit(new Runnable() { 643 public void run() { 644 double[] temp = new double[slices]; 645 for (int r = firstRow; r < lastRow; r++) { 646 for (int c = 0; c < columns; c++) { 647 for (int s = 0; s < slices; s++) { 648 temp[s] = a[s][r][c]; 649 } 650 dctSlices.inverse(temp, scale); 651 for (int s = 0; s < slices; s++) { 652 a[s][r][c] = temp[s]; 653 } 654 } 655 } 656 } 657 }); 658 } 659 ConcurrencyUtils.waitForCompletion(futures); 660 661 } else { 662 for (int s = 0; s < slices; s++) { 663 for (int r = 0; r < rows; r++) { 664 dctColumns.inverse(a[s][r], scale); 665 } 666 } 667 double[] temp = new double[rows]; 668 for (int s = 0; s < slices; s++) { 669 for (int c = 0; c < columns; c++) { 670 for (int r = 0; r < rows; r++) { 671 temp[r] = a[s][r][c]; 672 } 673 dctRows.inverse(temp, scale); 674 for (int r = 0; r < rows; r++) { 675 a[s][r][c] = temp[r]; 676 } 677 } 678 } 679 temp = new double[slices]; 680 for (int r = 0; r < rows; r++) { 681 for (int c = 0; c < columns; c++) { 682 for (int s = 0; s < slices; s++) { 683 temp[s] = a[s][r][c]; 684 } 685 dctSlices.inverse(temp, scale); 686 for (int s = 0; s < slices; s++) { 687 a[s][r][c] = temp[s]; 688 } 689 } 690 } 691 } 692 } 693 } 694 ddxt3da_sub(int isgn, double[] a, boolean scale)695 private void ddxt3da_sub(int isgn, double[] a, boolean scale) { 696 int idx0, idx1, idx2; 697 698 if (isgn == -1) { 699 for (int s = 0; s < slices; s++) { 700 idx0 = s * sliceStride; 701 for (int r = 0; r < rows; r++) { 702 dctColumns.forward(a, idx0 + r * rowStride, scale); 703 } 704 if (columns > 2) { 705 for (int c = 0; c < columns; c += 4) { 706 for (int r = 0; r < rows; r++) { 707 idx1 = idx0 + r * rowStride + c; 708 idx2 = rows + r; 709 t[r] = a[idx1]; 710 t[idx2] = a[idx1 + 1]; 711 t[idx2 + rows] = a[idx1 + 2]; 712 t[idx2 + 2 * rows] = a[idx1 + 3]; 713 } 714 dctRows.forward(t, 0, scale); 715 dctRows.forward(t, rows, scale); 716 dctRows.forward(t, 2 * rows, scale); 717 dctRows.forward(t, 3 * rows, scale); 718 for (int r = 0; r < rows; r++) { 719 idx1 = idx0 + r * rowStride + c; 720 idx2 = rows + r; 721 a[idx1] = t[r]; 722 a[idx1 + 1] = t[idx2]; 723 a[idx1 + 2] = t[idx2 + rows]; 724 a[idx1 + 3] = t[idx2 + 2 * rows]; 725 } 726 } 727 } else if (columns == 2) { 728 for (int r = 0; r < rows; r++) { 729 idx1 = idx0 + r * rowStride; 730 t[r] = a[idx1]; 731 t[rows + r] = a[idx1 + 1]; 732 } 733 dctRows.forward(t, 0, scale); 734 dctRows.forward(t, rows, scale); 735 for (int r = 0; r < rows; r++) { 736 idx1 = idx0 + r * rowStride; 737 a[idx1] = t[r]; 738 a[idx1 + 1] = t[rows + r]; 739 } 740 } 741 } 742 } else { 743 for (int s = 0; s < slices; s++) { 744 idx0 = s * sliceStride; 745 for (int r = 0; r < rows; r++) { 746 dctColumns.inverse(a, idx0 + r * rowStride, scale); 747 } 748 if (columns > 2) { 749 for (int c = 0; c < columns; c += 4) { 750 for (int r = 0; r < rows; r++) { 751 idx1 = idx0 + r * rowStride + c; 752 idx2 = rows + r; 753 t[r] = a[idx1]; 754 t[idx2] = a[idx1 + 1]; 755 t[idx2 + rows] = a[idx1 + 2]; 756 t[idx2 + 2 * rows] = a[idx1 + 3]; 757 } 758 dctRows.inverse(t, 0, scale); 759 dctRows.inverse(t, rows, scale); 760 dctRows.inverse(t, 2 * rows, scale); 761 dctRows.inverse(t, 3 * rows, scale); 762 for (int r = 0; r < rows; r++) { 763 idx1 = idx0 + r * rowStride + c; 764 idx2 = rows + r; 765 a[idx1] = t[r]; 766 a[idx1 + 1] = t[idx2]; 767 a[idx1 + 2] = t[idx2 + rows]; 768 a[idx1 + 3] = t[idx2 + 2 * rows]; 769 } 770 } 771 } else if (columns == 2) { 772 for (int r = 0; r < rows; r++) { 773 idx1 = idx0 + r * rowStride; 774 t[r] = a[idx1]; 775 t[rows + r] = a[idx1 + 1]; 776 } 777 dctRows.inverse(t, 0, scale); 778 dctRows.inverse(t, rows, scale); 779 for (int r = 0; r < rows; r++) { 780 idx1 = idx0 + r * rowStride; 781 a[idx1] = t[r]; 782 a[idx1 + 1] = t[rows + r]; 783 } 784 } 785 } 786 } 787 } 788 ddxt3da_sub(int isgn, double[][][] a, boolean scale)789 private void ddxt3da_sub(int isgn, double[][][] a, boolean scale) { 790 int idx2; 791 792 if (isgn == -1) { 793 for (int s = 0; s < slices; s++) { 794 for (int r = 0; r < rows; r++) { 795 dctColumns.forward(a[s][r], scale); 796 } 797 if (columns > 2) { 798 for (int c = 0; c < columns; c += 4) { 799 for (int r = 0; r < rows; r++) { 800 idx2 = rows + r; 801 t[r] = a[s][r][c]; 802 t[idx2] = a[s][r][c + 1]; 803 t[idx2 + rows] = a[s][r][c + 2]; 804 t[idx2 + 2 * rows] = a[s][r][c + 3]; 805 } 806 dctRows.forward(t, 0, scale); 807 dctRows.forward(t, rows, scale); 808 dctRows.forward(t, 2 * rows, scale); 809 dctRows.forward(t, 3 * rows, scale); 810 for (int r = 0; r < rows; r++) { 811 idx2 = rows + r; 812 a[s][r][c] = t[r]; 813 a[s][r][c + 1] = t[idx2]; 814 a[s][r][c + 2] = t[idx2 + rows]; 815 a[s][r][c + 3] = t[idx2 + 2 * rows]; 816 } 817 } 818 } else if (columns == 2) { 819 for (int r = 0; r < rows; r++) { 820 t[r] = a[s][r][0]; 821 t[rows + r] = a[s][r][1]; 822 } 823 dctRows.forward(t, 0, scale); 824 dctRows.forward(t, rows, scale); 825 for (int r = 0; r < rows; r++) { 826 a[s][r][0] = t[r]; 827 a[s][r][1] = t[rows + r]; 828 } 829 } 830 } 831 } else { 832 for (int s = 0; s < slices; s++) { 833 for (int r = 0; r < rows; r++) { 834 dctColumns.inverse(a[s][r], scale); 835 } 836 if (columns > 2) { 837 for (int c = 0; c < columns; c += 4) { 838 for (int r = 0; r < rows; r++) { 839 idx2 = rows + r; 840 t[r] = a[s][r][c]; 841 t[idx2] = a[s][r][c + 1]; 842 t[idx2 + rows] = a[s][r][c + 2]; 843 t[idx2 + 2 * rows] = a[s][r][c + 3]; 844 } 845 dctRows.inverse(t, 0, scale); 846 dctRows.inverse(t, rows, scale); 847 dctRows.inverse(t, 2 * rows, scale); 848 dctRows.inverse(t, 3 * rows, scale); 849 for (int r = 0; r < rows; r++) { 850 idx2 = rows + r; 851 a[s][r][c] = t[r]; 852 a[s][r][c + 1] = t[idx2]; 853 a[s][r][c + 2] = t[idx2 + rows]; 854 a[s][r][c + 3] = t[idx2 + 2 * rows]; 855 } 856 } 857 } else if (columns == 2) { 858 for (int r = 0; r < rows; r++) { 859 t[r] = a[s][r][0]; 860 t[rows + r] = a[s][r][1]; 861 } 862 dctRows.inverse(t, 0, scale); 863 dctRows.inverse(t, rows, scale); 864 for (int r = 0; r < rows; r++) { 865 a[s][r][0] = t[r]; 866 a[s][r][1] = t[rows + r]; 867 } 868 } 869 } 870 } 871 } 872 ddxt3db_sub(int isgn, double[] a, boolean scale)873 private void ddxt3db_sub(int isgn, double[] a, boolean scale) { 874 int idx0, idx1, idx2; 875 876 if (isgn == -1) { 877 if (columns > 2) { 878 for (int r = 0; r < rows; r++) { 879 idx0 = r * rowStride; 880 for (int c = 0; c < columns; c += 4) { 881 for (int s = 0; s < slices; s++) { 882 idx1 = s * sliceStride + idx0 + c; 883 idx2 = slices + s; 884 t[s] = a[idx1]; 885 t[idx2] = a[idx1 + 1]; 886 t[idx2 + slices] = a[idx1 + 2]; 887 t[idx2 + 2 * slices] = a[idx1 + 3]; 888 } 889 dctSlices.forward(t, 0, scale); 890 dctSlices.forward(t, slices, scale); 891 dctSlices.forward(t, 2 * slices, scale); 892 dctSlices.forward(t, 3 * slices, scale); 893 for (int s = 0; s < slices; s++) { 894 idx1 = s * sliceStride + idx0 + c; 895 idx2 = slices + s; 896 a[idx1] = t[s]; 897 a[idx1 + 1] = t[idx2]; 898 a[idx1 + 2] = t[idx2 + slices]; 899 a[idx1 + 3] = t[idx2 + 2 * slices]; 900 } 901 } 902 } 903 } else if (columns == 2) { 904 for (int r = 0; r < rows; r++) { 905 idx0 = r * rowStride; 906 for (int s = 0; s < slices; s++) { 907 idx1 = s * sliceStride + idx0; 908 t[s] = a[idx1]; 909 t[slices + s] = a[idx1 + 1]; 910 } 911 dctSlices.forward(t, 0, scale); 912 dctSlices.forward(t, slices, scale); 913 for (int s = 0; s < slices; s++) { 914 idx1 = s * sliceStride + idx0; 915 a[idx1] = t[s]; 916 a[idx1 + 1] = t[slices + s]; 917 } 918 } 919 } 920 } else { 921 if (columns > 2) { 922 for (int r = 0; r < rows; r++) { 923 idx0 = r * rowStride; 924 for (int c = 0; c < columns; c += 4) { 925 for (int s = 0; s < slices; s++) { 926 idx1 = s * sliceStride + idx0 + c; 927 idx2 = slices + s; 928 t[s] = a[idx1]; 929 t[idx2] = a[idx1 + 1]; 930 t[idx2 + slices] = a[idx1 + 2]; 931 t[idx2 + 2 * slices] = a[idx1 + 3]; 932 } 933 dctSlices.inverse(t, 0, scale); 934 dctSlices.inverse(t, slices, scale); 935 dctSlices.inverse(t, 2 * slices, scale); 936 dctSlices.inverse(t, 3 * slices, scale); 937 938 for (int s = 0; s < slices; s++) { 939 idx1 = s * sliceStride + idx0 + c; 940 idx2 = slices + s; 941 a[idx1] = t[s]; 942 a[idx1 + 1] = t[idx2]; 943 a[idx1 + 2] = t[idx2 + slices]; 944 a[idx1 + 3] = t[idx2 + 2 * slices]; 945 } 946 } 947 } 948 } else if (columns == 2) { 949 for (int r = 0; r < rows; r++) { 950 idx0 = r * rowStride; 951 for (int s = 0; s < slices; s++) { 952 idx1 = s * sliceStride + idx0; 953 t[s] = a[idx1]; 954 t[slices + s] = a[idx1 + 1]; 955 } 956 dctSlices.inverse(t, 0, scale); 957 dctSlices.inverse(t, slices, scale); 958 for (int s = 0; s < slices; s++) { 959 idx1 = s * sliceStride + idx0; 960 a[idx1] = t[s]; 961 a[idx1 + 1] = t[slices + s]; 962 } 963 } 964 } 965 } 966 } 967 ddxt3db_sub(int isgn, double[][][] a, boolean scale)968 private void ddxt3db_sub(int isgn, double[][][] a, boolean scale) { 969 int idx2; 970 971 if (isgn == -1) { 972 if (columns > 2) { 973 for (int r = 0; r < rows; r++) { 974 for (int c = 0; c < columns; c += 4) { 975 for (int s = 0; s < slices; s++) { 976 idx2 = slices + s; 977 t[s] = a[s][r][c]; 978 t[idx2] = a[s][r][c + 1]; 979 t[idx2 + slices] = a[s][r][c + 2]; 980 t[idx2 + 2 * slices] = a[s][r][c + 3]; 981 } 982 dctSlices.forward(t, 0, scale); 983 dctSlices.forward(t, slices, scale); 984 dctSlices.forward(t, 2 * slices, scale); 985 dctSlices.forward(t, 3 * slices, scale); 986 for (int s = 0; s < slices; s++) { 987 idx2 = slices + s; 988 a[s][r][c] = t[s]; 989 a[s][r][c + 1] = t[idx2]; 990 a[s][r][c + 2] = t[idx2 + slices]; 991 a[s][r][c + 3] = t[idx2 + 2 * slices]; 992 } 993 } 994 } 995 } else if (columns == 2) { 996 for (int r = 0; r < rows; r++) { 997 for (int s = 0; s < slices; s++) { 998 t[s] = a[s][r][0]; 999 t[slices + s] = a[s][r][1]; 1000 } 1001 dctSlices.forward(t, 0, scale); 1002 dctSlices.forward(t, slices, scale); 1003 for (int s = 0; s < slices; s++) { 1004 a[s][r][0] = t[s]; 1005 a[s][r][1] = t[slices + s]; 1006 } 1007 } 1008 } 1009 } else { 1010 if (columns > 2) { 1011 for (int r = 0; r < rows; r++) { 1012 for (int c = 0; c < columns; c += 4) { 1013 for (int s = 0; s < slices; s++) { 1014 idx2 = slices + s; 1015 t[s] = a[s][r][c]; 1016 t[idx2] = a[s][r][c + 1]; 1017 t[idx2 + slices] = a[s][r][c + 2]; 1018 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1019 } 1020 dctSlices.inverse(t, 0, scale); 1021 dctSlices.inverse(t, slices, scale); 1022 dctSlices.inverse(t, 2 * slices, scale); 1023 dctSlices.inverse(t, 3 * slices, scale); 1024 1025 for (int s = 0; s < slices; s++) { 1026 idx2 = slices + s; 1027 a[s][r][c] = t[s]; 1028 a[s][r][c + 1] = t[idx2]; 1029 a[s][r][c + 2] = t[idx2 + slices]; 1030 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1031 } 1032 } 1033 } 1034 } else if (columns == 2) { 1035 for (int r = 0; r < rows; r++) { 1036 for (int s = 0; s < slices; s++) { 1037 t[s] = a[s][r][0]; 1038 t[slices + s] = a[s][r][1]; 1039 } 1040 dctSlices.inverse(t, 0, scale); 1041 dctSlices.inverse(t, slices, scale); 1042 for (int s = 0; s < slices; s++) { 1043 a[s][r][0] = t[s]; 1044 a[s][r][1] = t[slices + s]; 1045 } 1046 } 1047 } 1048 } 1049 } 1050 ddxt3da_subth(final int isgn, final double[] a, final boolean scale)1051 private void ddxt3da_subth(final int isgn, final double[] a, final boolean scale) { 1052 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads(); 1053 int nt = 4 * rows; 1054 if (columns == 2) { 1055 nt >>= 1; 1056 } 1057 Future<?>[] futures = new Future[nthreads]; 1058 1059 for (int i = 0; i < nthreads; i++) { 1060 final int n0 = i; 1061 final int startt = nt * i; 1062 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1063 1064 public void run() { 1065 int idx0, idx1, idx2; 1066 if (isgn == -1) { 1067 for (int s = n0; s < slices; s += nthreads) { 1068 idx0 = s * sliceStride; 1069 for (int r = 0; r < rows; r++) { 1070 dctColumns.forward(a, idx0 + r * rowStride, scale); 1071 } 1072 if (columns > 2) { 1073 for (int c = 0; c < columns; c += 4) { 1074 for (int r = 0; r < rows; r++) { 1075 idx1 = idx0 + r * rowStride + c; 1076 idx2 = startt + rows + r; 1077 t[startt + r] = a[idx1]; 1078 t[idx2] = a[idx1 + 1]; 1079 t[idx2 + rows] = a[idx1 + 2]; 1080 t[idx2 + 2 * rows] = a[idx1 + 3]; 1081 } 1082 dctRows.forward(t, startt, scale); 1083 dctRows.forward(t, startt + rows, scale); 1084 dctRows.forward(t, startt + 2 * rows, scale); 1085 dctRows.forward(t, startt + 3 * rows, scale); 1086 for (int r = 0; r < rows; r++) { 1087 idx1 = idx0 + r * rowStride + c; 1088 idx2 = startt + rows + r; 1089 a[idx1] = t[startt + r]; 1090 a[idx1 + 1] = t[idx2]; 1091 a[idx1 + 2] = t[idx2 + rows]; 1092 a[idx1 + 3] = t[idx2 + 2 * rows]; 1093 } 1094 } 1095 } else if (columns == 2) { 1096 for (int r = 0; r < rows; r++) { 1097 idx1 = idx0 + r * rowStride; 1098 t[startt + r] = a[idx1]; 1099 t[startt + rows + r] = a[idx1 + 1]; 1100 } 1101 dctRows.forward(t, startt, scale); 1102 dctRows.forward(t, startt + rows, scale); 1103 for (int r = 0; r < rows; r++) { 1104 idx1 = idx0 + r * rowStride; 1105 a[idx1] = t[startt + r]; 1106 a[idx1 + 1] = t[startt + rows + r]; 1107 } 1108 } 1109 } 1110 } else { 1111 for (int s = n0; s < slices; s += nthreads) { 1112 idx0 = s * sliceStride; 1113 for (int r = 0; r < rows; r++) { 1114 dctColumns.inverse(a, idx0 + r * rowStride, scale); 1115 } 1116 if (columns > 2) { 1117 for (int c = 0; c < columns; c += 4) { 1118 for (int j = 0; j < rows; j++) { 1119 idx1 = idx0 + j * rowStride + c; 1120 idx2 = startt + rows + j; 1121 t[startt + j] = a[idx1]; 1122 t[idx2] = a[idx1 + 1]; 1123 t[idx2 + rows] = a[idx1 + 2]; 1124 t[idx2 + 2 * rows] = a[idx1 + 3]; 1125 } 1126 dctRows.inverse(t, startt, scale); 1127 dctRows.inverse(t, startt + rows, scale); 1128 dctRows.inverse(t, startt + 2 * rows, scale); 1129 dctRows.inverse(t, startt + 3 * rows, scale); 1130 for (int j = 0; j < rows; j++) { 1131 idx1 = idx0 + j * rowStride + c; 1132 idx2 = startt + rows + j; 1133 a[idx1] = t[startt + j]; 1134 a[idx1 + 1] = t[idx2]; 1135 a[idx1 + 2] = t[idx2 + rows]; 1136 a[idx1 + 3] = t[idx2 + 2 * rows]; 1137 } 1138 } 1139 } else if (columns == 2) { 1140 for (int r = 0; r < rows; r++) { 1141 idx1 = idx0 + r * rowStride; 1142 t[startt + r] = a[idx1]; 1143 t[startt + rows + r] = a[idx1 + 1]; 1144 } 1145 dctRows.inverse(t, startt, scale); 1146 dctRows.inverse(t, startt + rows, scale); 1147 for (int r = 0; r < rows; r++) { 1148 idx1 = idx0 + r * rowStride; 1149 a[idx1] = t[startt + r]; 1150 a[idx1 + 1] = t[startt + rows + r]; 1151 } 1152 } 1153 } 1154 } 1155 } 1156 }); 1157 } 1158 ConcurrencyUtils.waitForCompletion(futures); 1159 } 1160 ddxt3da_subth(final int isgn, final double[][][] a, final boolean scale)1161 private void ddxt3da_subth(final int isgn, final double[][][] a, final boolean scale) { 1162 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads(); 1163 int nt = 4 * rows; 1164 if (columns == 2) { 1165 nt >>= 1; 1166 } 1167 Future<?>[] futures = new Future[nthreads]; 1168 1169 for (int i = 0; i < nthreads; i++) { 1170 final int n0 = i; 1171 final int startt = nt * i; 1172 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1173 1174 public void run() { 1175 int idx2; 1176 if (isgn == -1) { 1177 for (int s = n0; s < slices; s += nthreads) { 1178 for (int r = 0; r < rows; r++) { 1179 dctColumns.forward(a[s][r], scale); 1180 } 1181 if (columns > 2) { 1182 for (int c = 0; c < columns; c += 4) { 1183 for (int r = 0; r < rows; r++) { 1184 idx2 = startt + rows + r; 1185 t[startt + r] = a[s][r][c]; 1186 t[idx2] = a[s][r][c + 1]; 1187 t[idx2 + rows] = a[s][r][c + 2]; 1188 t[idx2 + 2 * rows] = a[s][r][c + 3]; 1189 } 1190 dctRows.forward(t, startt, scale); 1191 dctRows.forward(t, startt + rows, scale); 1192 dctRows.forward(t, startt + 2 * rows, scale); 1193 dctRows.forward(t, startt + 3 * rows, scale); 1194 for (int r = 0; r < rows; r++) { 1195 idx2 = startt + rows + r; 1196 a[s][r][c] = t[startt + r]; 1197 a[s][r][c + 1] = t[idx2]; 1198 a[s][r][c + 2] = t[idx2 + rows]; 1199 a[s][r][c + 3] = t[idx2 + 2 * rows]; 1200 } 1201 } 1202 } else if (columns == 2) { 1203 for (int r = 0; r < rows; r++) { 1204 t[startt + r] = a[s][r][0]; 1205 t[startt + rows + r] = a[s][r][1]; 1206 } 1207 dctRows.forward(t, startt, scale); 1208 dctRows.forward(t, startt + rows, scale); 1209 for (int r = 0; r < rows; r++) { 1210 a[s][r][0] = t[startt + r]; 1211 a[s][r][1] = t[startt + rows + r]; 1212 } 1213 } 1214 } 1215 } else { 1216 for (int s = n0; s < slices; s += nthreads) { 1217 for (int r = 0; r < rows; r++) { 1218 dctColumns.inverse(a[s][r], scale); 1219 } 1220 if (columns > 2) { 1221 for (int c = 0; c < columns; c += 4) { 1222 for (int r = 0; r < rows; r++) { 1223 idx2 = startt + rows + r; 1224 t[startt + r] = a[s][r][c]; 1225 t[idx2] = a[s][r][c + 1]; 1226 t[idx2 + rows] = a[s][r][c + 2]; 1227 t[idx2 + 2 * rows] = a[s][r][c + 3]; 1228 } 1229 dctRows.inverse(t, startt, scale); 1230 dctRows.inverse(t, startt + rows, scale); 1231 dctRows.inverse(t, startt + 2 * rows, scale); 1232 dctRows.inverse(t, startt + 3 * rows, scale); 1233 for (int r = 0; r < rows; r++) { 1234 idx2 = startt + rows + r; 1235 a[s][r][c] = t[startt + r]; 1236 a[s][r][c + 1] = t[idx2]; 1237 a[s][r][c + 2] = t[idx2 + rows]; 1238 a[s][r][c + 3] = t[idx2 + 2 * rows]; 1239 } 1240 } 1241 } else if (columns == 2) { 1242 for (int r = 0; r < rows; r++) { 1243 t[startt + r] = a[s][r][0]; 1244 t[startt + rows + r] = a[s][r][1]; 1245 } 1246 dctRows.inverse(t, startt, scale); 1247 dctRows.inverse(t, startt + rows, scale); 1248 for (int r = 0; r < rows; r++) { 1249 a[s][r][0] = t[startt + r]; 1250 a[s][r][1] = t[startt + rows + r]; 1251 } 1252 } 1253 } 1254 } 1255 } 1256 }); 1257 } 1258 ConcurrencyUtils.waitForCompletion(futures); 1259 } 1260 ddxt3db_subth(final int isgn, final double[] a, final boolean scale)1261 private void ddxt3db_subth(final int isgn, final double[] a, final boolean scale) { 1262 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 1263 int nt = 4 * slices; 1264 if (columns == 2) { 1265 nt >>= 1; 1266 } 1267 Future<?>[] futures = new Future[nthreads]; 1268 for (int i = 0; i < nthreads; i++) { 1269 final int n0 = i; 1270 final int startt = nt * i; 1271 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1272 1273 public void run() { 1274 int idx0, idx1, idx2; 1275 if (isgn == -1) { 1276 if (columns > 2) { 1277 for (int r = n0; r < rows; r += nthreads) { 1278 idx0 = r * rowStride; 1279 for (int c = 0; c < columns; c += 4) { 1280 for (int s = 0; s < slices; s++) { 1281 idx1 = s * sliceStride + idx0 + c; 1282 idx2 = startt + slices + s; 1283 t[startt + s] = a[idx1]; 1284 t[idx2] = a[idx1 + 1]; 1285 t[idx2 + slices] = a[idx1 + 2]; 1286 t[idx2 + 2 * slices] = a[idx1 + 3]; 1287 } 1288 dctSlices.forward(t, startt, scale); 1289 dctSlices.forward(t, startt + slices, scale); 1290 dctSlices.forward(t, startt + 2 * slices, scale); 1291 dctSlices.forward(t, startt + 3 * slices, scale); 1292 for (int s = 0; s < slices; s++) { 1293 idx1 = s * sliceStride + idx0 + c; 1294 idx2 = startt + slices + s; 1295 a[idx1] = t[startt + s]; 1296 a[idx1 + 1] = t[idx2]; 1297 a[idx1 + 2] = t[idx2 + slices]; 1298 a[idx1 + 3] = t[idx2 + 2 * slices]; 1299 } 1300 } 1301 } 1302 } else if (columns == 2) { 1303 for (int r = n0; r < rows; r += nthreads) { 1304 idx0 = r * rowStride; 1305 for (int s = 0; s < slices; s++) { 1306 idx1 = s * sliceStride + idx0; 1307 t[startt + s] = a[idx1]; 1308 t[startt + slices + s] = a[idx1 + 1]; 1309 } 1310 dctSlices.forward(t, startt, scale); 1311 dctSlices.forward(t, startt + slices, scale); 1312 for (int s = 0; s < slices; s++) { 1313 idx1 = s * sliceStride + idx0; 1314 a[idx1] = t[startt + s]; 1315 a[idx1 + 1] = t[startt + slices + s]; 1316 } 1317 } 1318 } 1319 } else { 1320 if (columns > 2) { 1321 for (int r = n0; r < rows; r += nthreads) { 1322 idx0 = r * rowStride; 1323 for (int c = 0; c < columns; c += 4) { 1324 for (int s = 0; s < slices; s++) { 1325 idx1 = s * sliceStride + idx0 + c; 1326 idx2 = startt + slices + s; 1327 t[startt + s] = a[idx1]; 1328 t[idx2] = a[idx1 + 1]; 1329 t[idx2 + slices] = a[idx1 + 2]; 1330 t[idx2 + 2 * slices] = a[idx1 + 3]; 1331 } 1332 dctSlices.inverse(t, startt, scale); 1333 dctSlices.inverse(t, startt + slices, scale); 1334 dctSlices.inverse(t, startt + 2 * slices, scale); 1335 dctSlices.inverse(t, startt + 3 * slices, scale); 1336 for (int s = 0; s < slices; s++) { 1337 idx1 = s * sliceStride + idx0 + c; 1338 idx2 = startt + slices + s; 1339 a[idx1] = t[startt + s]; 1340 a[idx1 + 1] = t[idx2]; 1341 a[idx1 + 2] = t[idx2 + slices]; 1342 a[idx1 + 3] = t[idx2 + 2 * slices]; 1343 } 1344 } 1345 } 1346 } else if (columns == 2) { 1347 for (int r = n0; r < rows; r += nthreads) { 1348 idx0 = r * rowStride; 1349 for (int s = 0; s < slices; s++) { 1350 idx1 = s * sliceStride + idx0; 1351 t[startt + s] = a[idx1]; 1352 t[startt + slices + s] = a[idx1 + 1]; 1353 } 1354 dctSlices.inverse(t, startt, scale); 1355 dctSlices.inverse(t, startt + slices, scale); 1356 1357 for (int s = 0; s < slices; s++) { 1358 idx1 = s * sliceStride + idx0; 1359 a[idx1] = t[startt + s]; 1360 a[idx1 + 1] = t[startt + slices + s]; 1361 } 1362 } 1363 } 1364 } 1365 } 1366 }); 1367 } 1368 ConcurrencyUtils.waitForCompletion(futures); 1369 } 1370 ddxt3db_subth(final int isgn, final double[][][] a, final boolean scale)1371 private void ddxt3db_subth(final int isgn, final double[][][] a, final boolean scale) { 1372 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 1373 int nt = 4 * slices; 1374 if (columns == 2) { 1375 nt >>= 1; 1376 } 1377 Future<?>[] futures = new Future[nthreads]; 1378 1379 for (int i = 0; i < nthreads; i++) { 1380 final int n0 = i; 1381 final int startt = nt * i; 1382 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1383 1384 public void run() { 1385 int idx2; 1386 if (isgn == -1) { 1387 if (columns > 2) { 1388 for (int r = n0; r < rows; r += nthreads) { 1389 for (int c = 0; c < columns; c += 4) { 1390 for (int s = 0; s < slices; s++) { 1391 idx2 = startt + slices + s; 1392 t[startt + s] = a[s][r][c]; 1393 t[idx2] = a[s][r][c + 1]; 1394 t[idx2 + slices] = a[s][r][c + 2]; 1395 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1396 } 1397 dctSlices.forward(t, startt, scale); 1398 dctSlices.forward(t, startt + slices, scale); 1399 dctSlices.forward(t, startt + 2 * slices, scale); 1400 dctSlices.forward(t, startt + 3 * slices, scale); 1401 for (int s = 0; s < slices; s++) { 1402 idx2 = startt + slices + s; 1403 a[s][r][c] = t[startt + s]; 1404 a[s][r][c + 1] = t[idx2]; 1405 a[s][r][c + 2] = t[idx2 + slices]; 1406 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1407 } 1408 } 1409 } 1410 } else if (columns == 2) { 1411 for (int r = n0; r < rows; r += nthreads) { 1412 for (int s = 0; s < slices; s++) { 1413 t[startt + s] = a[s][r][0]; 1414 t[startt + slices + s] = a[s][r][1]; 1415 } 1416 dctSlices.forward(t, startt, scale); 1417 dctSlices.forward(t, startt + slices, scale); 1418 for (int s = 0; s < slices; s++) { 1419 a[s][r][0] = t[startt + s]; 1420 a[s][r][1] = t[startt + slices + s]; 1421 } 1422 } 1423 } 1424 } else { 1425 if (columns > 2) { 1426 for (int r = n0; r < rows; r += nthreads) { 1427 for (int c = 0; c < columns; c += 4) { 1428 for (int s = 0; s < slices; s++) { 1429 idx2 = startt + slices + s; 1430 t[startt + s] = a[s][r][c]; 1431 t[idx2] = a[s][r][c + 1]; 1432 t[idx2 + slices] = a[s][r][c + 2]; 1433 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1434 } 1435 dctSlices.inverse(t, startt, scale); 1436 dctSlices.inverse(t, startt + slices, scale); 1437 dctSlices.inverse(t, startt + 2 * slices, scale); 1438 dctSlices.inverse(t, startt + 3 * slices, scale); 1439 for (int s = 0; s < slices; s++) { 1440 idx2 = startt + slices + s; 1441 a[s][r][c] = t[startt + s]; 1442 a[s][r][c + 1] = t[idx2]; 1443 a[s][r][c + 2] = t[idx2 + slices]; 1444 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1445 } 1446 } 1447 } 1448 } else if (columns == 2) { 1449 for (int r = n0; r < rows; r += nthreads) { 1450 for (int s = 0; s < slices; s++) { 1451 t[startt + s] = a[s][r][0]; 1452 t[startt + slices + s] = a[s][r][1]; 1453 } 1454 dctSlices.inverse(t, startt, scale); 1455 dctSlices.inverse(t, startt + slices, scale); 1456 1457 for (int s = 0; s < slices; s++) { 1458 a[s][r][0] = t[startt + s]; 1459 a[s][r][1] = t[startt + slices + s]; 1460 } 1461 } 1462 } 1463 } 1464 } 1465 }); 1466 } 1467 ConcurrencyUtils.waitForCompletion(futures); 1468 } 1469 } 1470