1 /* ***** BEGIN LICENSE BLOCK *****
2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3  *
4  * The contents of this file are subject to the Mozilla Public License Version
5  * 1.1 (the "License"); you may not use this file except in compliance with
6  * the License. You may obtain a copy of the License at
7  * http://www.mozilla.org/MPL/
8  *
9  * Software distributed under the License is distributed on an "AS IS" basis,
10  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11  * for the specific language governing rights and limitations under the
12  * License.
13  *
14  * The Original Code is JTransforms.
15  *
16  * The Initial Developer of the Original Code is
17  * Piotr Wendykier, Emory University.
18  * Portions created by the Initial Developer are Copyright (C) 2007-2009
19  * the Initial Developer. All Rights Reserved.
20  *
21  * Alternatively, the contents of this file may be used under the terms of
22  * either the GNU General Public License Version 2 or later (the "GPL"), or
23  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
24  * in which case the provisions of the GPL or the LGPL are applicable instead
25  * of those above. If you wish to allow use of your version of this file only
26  * under the terms of either the GPL or the LGPL, and not to allow others to
27  * use your version of this file under the terms of the MPL, indicate your
28  * decision by deleting the provisions above and replace them with the notice
29  * and other provisions required by the GPL or the LGPL. If you do not delete
30  * the provisions above, a recipient may use your version of this file under
31  * the terms of any one of the MPL, the GPL or the LGPL.
32  *
33  * ***** END LICENSE BLOCK ***** */
34 
35 package edu.emory.mathcs.jtransforms.dct;
36 
37 import java.util.concurrent.Future;
38 
39 import edu.emory.mathcs.utils.ConcurrencyUtils;
40 
41 /**
42  * Computes 3D Discrete Cosine Transform (DCT) of double precision data. The
43  * sizes of all three dimensions can be arbitrary numbers. This is a parallel
44  * implementation of split-radix and mixed-radix algorithms optimized for SMP
45  * systems. <br>
46  * <br>
47  * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura
48  * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
49  *
50  * @author Piotr Wendykier (piotr.wendykier@gmail.com)
51  *
52  */
53 public class DoubleDCT_3D {
54 
55     private int slices;
56 
57     private int rows;
58 
59     private int columns;
60 
61     private int sliceStride;
62 
63     private int rowStride;
64 
65     private double[] t;
66 
67     private DoubleDCT_1D dctSlices, dctRows, dctColumns;
68 
69     private int oldNthreads;
70 
71     private int nt;
72 
73     private boolean isPowerOfTwo = false;
74 
75     private boolean useThreads = false;
76 
77     /**
78      * Creates new instance of DoubleDCT_3D.
79      *
80      * @param slices
81      *            number of slices
82      * @param rows
83      *            number of rows
84      * @param columns
85      *            number of columns
86      */
DoubleDCT_3D(int slices, int rows, int columns)87     public DoubleDCT_3D(int slices, int rows, int columns) {
88         if (slices <= 1 || rows <= 1 || columns <= 1) {
89             throw new IllegalArgumentException("slices, rows and columns must be greater than 1");
90         }
91         this.slices = slices;
92         this.rows = rows;
93         this.columns = columns;
94         this.sliceStride = rows * columns;
95         this.rowStride = columns;
96         if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) {
97             this.useThreads = true;
98         }
99         if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
100             isPowerOfTwo = true;
101 
102             oldNthreads = ConcurrencyUtils.getNumberOfThreads();
103             nt = slices;
104             if (nt < rows) {
105                 nt = rows;
106             }
107             nt *= 4;
108             if (oldNthreads > 1) {
109                 nt *= oldNthreads;
110             }
111             if (columns == 2) {
112                 nt >>= 1;
113             }
114             t = new double[nt];
115         }
116         dctSlices = new DoubleDCT_1D(slices);
117         if (slices == rows) {
118             dctRows = dctSlices;
119         } else {
120             dctRows = new DoubleDCT_1D(rows);
121         }
122         if (slices == columns) {
123             dctColumns = dctSlices;
124         } else if (rows == columns) {
125             dctColumns = dctRows;
126         } else {
127             dctColumns = new DoubleDCT_1D(columns);
128         }
129     }
130 
131     /**
132      * Computes the 3D forward DCT (DCT-II) leaving the result in <code>a</code>
133      * . The data is stored in 1D array addressed in slice-major, then
134      * row-major, then column-major, in order of significance, i.e. the element
135      * (i,j,k) of 3D array x[slices][rows][columns] is stored in a[i*sliceStride
136      * + j*rowStride + k], where sliceStride = rows * columns and rowStride =
137      * columns.
138      *
139      * @param a
140      *            data to transform
141      * @param scale
142      *            if true then scaling is performed
143      */
forward(final double[] a, final boolean scale)144     public void forward(final double[] a, final boolean scale) {
145         int nthreads = ConcurrencyUtils.getNumberOfThreads();
146         if (isPowerOfTwo) {
147             if (nthreads != oldNthreads) {
148                 nt = slices;
149                 if (nt < rows) {
150                     nt = rows;
151                 }
152                 nt *= 4;
153                 if (nthreads > 1) {
154                     nt *= nthreads;
155                 }
156                 if (columns == 2) {
157                     nt >>= 1;
158                 }
159                 t = new double[nt];
160                 oldNthreads = nthreads;
161             }
162             if ((nthreads > 1) && useThreads) {
163                 ddxt3da_subth(-1, a, scale);
164                 ddxt3db_subth(-1, a, scale);
165             } else {
166                 ddxt3da_sub(-1, a, scale);
167                 ddxt3db_sub(-1, a, scale);
168             }
169         } else {
170             if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
171                 Future<?>[] futures = new Future[nthreads];
172                 int p = slices / nthreads;
173                 for (int l = 0; l < nthreads; l++) {
174                     final int firstSlice = l * p;
175                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
176                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
177                         public void run() {
178                             for (int s = firstSlice; s < lastSlice; s++) {
179                                 int idx1 = s * sliceStride;
180                                 for (int r = 0; r < rows; r++) {
181                                     dctColumns.forward(a, idx1 + r * rowStride, scale);
182                                 }
183                             }
184                         }
185                     });
186                 }
187                 ConcurrencyUtils.waitForCompletion(futures);
188 
189                 for (int l = 0; l < nthreads; l++) {
190                     final int firstSlice = l * p;
191                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
192                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
193                         public void run() {
194                             double[] temp = new double[rows];
195                             for (int s = firstSlice; s < lastSlice; s++) {
196                                 int idx1 = s * sliceStride;
197                                 for (int c = 0; c < columns; c++) {
198                                     for (int r = 0; r < rows; r++) {
199                                         int idx3 = idx1 + r * rowStride + c;
200                                         temp[r] = a[idx3];
201                                     }
202                                     dctRows.forward(temp, scale);
203                                     for (int r = 0; r < rows; r++) {
204                                         int idx3 = idx1 + r * rowStride + c;
205                                         a[idx3] = temp[r];
206                                     }
207                                 }
208                             }
209                         }
210                     });
211                 }
212                 ConcurrencyUtils.waitForCompletion(futures);
213 
214                 p = rows / nthreads;
215                 for (int l = 0; l < nthreads; l++) {
216                     final int firstRow = l * p;
217                     final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
218                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
219                         public void run() {
220                             double[] temp = new double[slices];
221                             for (int r = firstRow; r < lastRow; r++) {
222                                 int idx1 = r * rowStride;
223                                 for (int c = 0; c < columns; c++) {
224                                     for (int s = 0; s < slices; s++) {
225                                         int idx3 = s * sliceStride + idx1 + c;
226                                         temp[s] = a[idx3];
227                                     }
228                                     dctSlices.forward(temp, scale);
229                                     for (int s = 0; s < slices; s++) {
230                                         int idx3 = s * sliceStride + idx1 + c;
231                                         a[idx3] = temp[s];
232                                     }
233                                 }
234                             }
235                         }
236                     });
237                 }
238                 ConcurrencyUtils.waitForCompletion(futures);
239 
240             } else {
241                 for (int s = 0; s < slices; s++) {
242                     int idx1 = s * sliceStride;
243                     for (int r = 0; r < rows; r++) {
244                         dctColumns.forward(a, idx1 + r * rowStride, scale);
245                     }
246                 }
247                 double[] temp = new double[rows];
248                 for (int s = 0; s < slices; s++) {
249                     int idx1 = s * sliceStride;
250                     for (int c = 0; c < columns; c++) {
251                         for (int r = 0; r < rows; r++) {
252                             int idx3 = idx1 + r * rowStride + c;
253                             temp[r] = a[idx3];
254                         }
255                         dctRows.forward(temp, scale);
256                         for (int r = 0; r < rows; r++) {
257                             int idx3 = idx1 + r * rowStride + c;
258                             a[idx3] = temp[r];
259                         }
260                     }
261                 }
262                 temp = new double[slices];
263                 for (int r = 0; r < rows; r++) {
264                     int idx1 = r * rowStride;
265                     for (int c = 0; c < columns; c++) {
266                         for (int s = 0; s < slices; s++) {
267                             int idx3 = s * sliceStride + idx1 + c;
268                             temp[s] = a[idx3];
269                         }
270                         dctSlices.forward(temp, scale);
271                         for (int s = 0; s < slices; s++) {
272                             int idx3 = s * sliceStride + idx1 + c;
273                             a[idx3] = temp[s];
274                         }
275                     }
276                 }
277             }
278         }
279     }
280 
281     /**
282      * Computes the 3D forward DCT (DCT-II) leaving the result in <code>a</code>
283      * . The data is stored in 3D array
284      *
285      * @param a
286      *            data to transform
287      * @param scale
288      *            if true then scaling is performed
289      */
forward(final double[][][] a, final boolean scale)290     public void forward(final double[][][] a, final boolean scale) {
291         int nthreads = ConcurrencyUtils.getNumberOfThreads();
292         if (isPowerOfTwo) {
293             if (nthreads != oldNthreads) {
294                 nt = slices;
295                 if (nt < rows) {
296                     nt = rows;
297                 }
298                 nt *= 4;
299                 if (nthreads > 1) {
300                     nt *= nthreads;
301                 }
302                 if (columns == 2) {
303                     nt >>= 1;
304                 }
305                 t = new double[nt];
306                 oldNthreads = nthreads;
307             }
308             if ((nthreads > 1) && useThreads) {
309                 ddxt3da_subth(-1, a, scale);
310                 ddxt3db_subth(-1, a, scale);
311             } else {
312                 ddxt3da_sub(-1, a, scale);
313                 ddxt3db_sub(-1, a, scale);
314             }
315         } else {
316             if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
317                 Future<?>[] futures = new Future[nthreads];
318                 int p = slices / nthreads;
319                 for (int l = 0; l < nthreads; l++) {
320                     final int firstSlice = l * p;
321                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
322                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
323                         public void run() {
324                             for (int s = firstSlice; s < lastSlice; s++) {
325                                 for (int r = 0; r < rows; r++) {
326                                     dctColumns.forward(a[s][r], scale);
327                                 }
328                             }
329                         }
330                     });
331                 }
332                 ConcurrencyUtils.waitForCompletion(futures);
333 
334                 for (int l = 0; l < nthreads; l++) {
335                     final int firstSlice = l * p;
336                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
337                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
338                         public void run() {
339                             double[] temp = new double[rows];
340                             for (int s = firstSlice; s < lastSlice; s++) {
341                                 for (int c = 0; c < columns; c++) {
342                                     for (int r = 0; r < rows; r++) {
343                                         temp[r] = a[s][r][c];
344                                     }
345                                     dctRows.forward(temp, scale);
346                                     for (int r = 0; r < rows; r++) {
347                                         a[s][r][c] = temp[r];
348                                     }
349                                 }
350                             }
351                         }
352                     });
353                 }
354                 ConcurrencyUtils.waitForCompletion(futures);
355 
356                 p = rows / nthreads;
357                 for (int l = 0; l < nthreads; l++) {
358                     final int firstRow = l * p;
359                     final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
360                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
361                         public void run() {
362                             double[] temp = new double[slices];
363                             for (int r = firstRow; r < lastRow; r++) {
364                                 for (int c = 0; c < columns; c++) {
365                                     for (int s = 0; s < slices; s++) {
366                                         temp[s] = a[s][r][c];
367                                     }
368                                     dctSlices.forward(temp, scale);
369                                     for (int s = 0; s < slices; s++) {
370                                         a[s][r][c] = temp[s];
371                                     }
372                                 }
373                             }
374                         }
375                     });
376                 }
377                 ConcurrencyUtils.waitForCompletion(futures);
378 
379             } else {
380                 for (int s = 0; s < slices; s++) {
381                     for (int r = 0; r < rows; r++) {
382                         dctColumns.forward(a[s][r], scale);
383                     }
384                 }
385                 double[] temp = new double[rows];
386                 for (int s = 0; s < slices; s++) {
387                     for (int c = 0; c < columns; c++) {
388                         for (int r = 0; r < rows; r++) {
389                             temp[r] = a[s][r][c];
390                         }
391                         dctRows.forward(temp, scale);
392                         for (int r = 0; r < rows; r++) {
393                             a[s][r][c] = temp[r];
394                         }
395                     }
396                 }
397                 temp = new double[slices];
398                 for (int r = 0; r < rows; r++) {
399                     for (int c = 0; c < columns; c++) {
400                         for (int s = 0; s < slices; s++) {
401                             temp[s] = a[s][r][c];
402                         }
403                         dctSlices.forward(temp, scale);
404                         for (int s = 0; s < slices; s++) {
405                             a[s][r][c] = temp[s];
406                         }
407                     }
408                 }
409             }
410         }
411     }
412 
413     /**
414      * Computes the 3D inverse DCT (DCT-III) leaving the result in
415      * <code>a</code>. The data is stored in 1D array addressed in slice-major,
416      * then row-major, then column-major, in order of significance, i.e. the
417      * element (i,j,k) of 3D array x[slices][rows][columns] is stored in
418      * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * columns
419      * and rowStride = columns.
420      *
421      * @param a
422      *            data to transform
423      * @param scale
424      *            if true then scaling is performed
425      */
inverse(final double[] a, final boolean scale)426     public void inverse(final double[] a, final boolean scale) {
427         int nthreads = ConcurrencyUtils.getNumberOfThreads();
428         if (isPowerOfTwo) {
429             if (nthreads != oldNthreads) {
430                 nt = slices;
431                 if (nt < rows) {
432                     nt = rows;
433                 }
434                 nt *= 4;
435                 if (nthreads > 1) {
436                     nt *= nthreads;
437                 }
438                 if (columns == 2) {
439                     nt >>= 1;
440                 }
441                 t = new double[nt];
442                 oldNthreads = nthreads;
443             }
444             if ((nthreads > 1) && useThreads) {
445                 ddxt3da_subth(1, a, scale);
446                 ddxt3db_subth(1, a, scale);
447             } else {
448                 ddxt3da_sub(1, a, scale);
449                 ddxt3db_sub(1, a, scale);
450             }
451         } else {
452             if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
453                 Future<?>[] futures = new Future[nthreads];
454                 int p = slices / nthreads;
455                 for (int l = 0; l < nthreads; l++) {
456                     final int firstSlice = l * p;
457                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
458                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
459                         public void run() {
460                             for (int s = firstSlice; s < lastSlice; s++) {
461                                 int idx1 = s * sliceStride;
462                                 for (int r = 0; r < rows; r++) {
463                                     dctColumns.inverse(a, idx1 + r * rowStride, scale);
464                                 }
465                             }
466                         }
467                     });
468                 }
469                 ConcurrencyUtils.waitForCompletion(futures);
470 
471                 for (int l = 0; l < nthreads; l++) {
472                     final int firstSlice = l * p;
473                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
474                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
475                         public void run() {
476                             double[] temp = new double[rows];
477                             for (int s = firstSlice; s < lastSlice; s++) {
478                                 int idx1 = s * sliceStride;
479                                 for (int c = 0; c < columns; c++) {
480                                     for (int r = 0; r < rows; r++) {
481                                         int idx3 = idx1 + r * rowStride + c;
482                                         temp[r] = a[idx3];
483                                     }
484                                     dctRows.inverse(temp, scale);
485                                     for (int r = 0; r < rows; r++) {
486                                         int idx3 = idx1 + r * rowStride + c;
487                                         a[idx3] = temp[r];
488                                     }
489                                 }
490                             }
491                         }
492                     });
493                 }
494                 ConcurrencyUtils.waitForCompletion(futures);
495 
496                 p = rows / nthreads;
497                 for (int l = 0; l < nthreads; l++) {
498                     final int firstRow = l * p;
499                     final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
500                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
501                         public void run() {
502                             double[] temp = new double[slices];
503                             for (int r = firstRow; r < lastRow; r++) {
504                                 int idx1 = r * rowStride;
505                                 for (int c = 0; c < columns; c++) {
506                                     for (int s = 0; s < slices; s++) {
507                                         int idx3 = s * sliceStride + idx1 + c;
508                                         temp[s] = a[idx3];
509                                     }
510                                     dctSlices.inverse(temp, scale);
511                                     for (int s = 0; s < slices; s++) {
512                                         int idx3 = s * sliceStride + idx1 + c;
513                                         a[idx3] = temp[s];
514                                     }
515                                 }
516                             }
517                         }
518                     });
519                 }
520                 ConcurrencyUtils.waitForCompletion(futures);
521 
522             } else {
523                 for (int s = 0; s < slices; s++) {
524                     int idx1 = s * sliceStride;
525                     for (int r = 0; r < rows; r++) {
526                         dctColumns.inverse(a, idx1 + r * rowStride, scale);
527                     }
528                 }
529                 double[] temp = new double[rows];
530                 for (int s = 0; s < slices; s++) {
531                     int idx1 = s * sliceStride;
532                     for (int c = 0; c < columns; c++) {
533                         for (int r = 0; r < rows; r++) {
534                             int idx3 = idx1 + r * rowStride + c;
535                             temp[r] = a[idx3];
536                         }
537                         dctRows.inverse(temp, scale);
538                         for (int r = 0; r < rows; r++) {
539                             int idx3 = idx1 + r * rowStride + c;
540                             a[idx3] = temp[r];
541                         }
542                     }
543                 }
544                 temp = new double[slices];
545                 for (int r = 0; r < rows; r++) {
546                     int idx1 = r * rowStride;
547                     for (int c = 0; c < columns; c++) {
548                         for (int s = 0; s < slices; s++) {
549                             int idx3 = s * sliceStride + idx1 + c;
550                             temp[s] = a[idx3];
551                         }
552                         dctSlices.inverse(temp, scale);
553                         for (int s = 0; s < slices; s++) {
554                             int idx3 = s * sliceStride + idx1 + c;
555                             a[idx3] = temp[s];
556                         }
557                     }
558                 }
559             }
560         }
561     }
562 
563     /**
564      * Computes the 3D inverse DCT (DCT-III) leaving the result in
565      * <code>a</code>. The data is stored in 3D array.
566      *
567      * @param a
568      *            data to transform
569      * @param scale
570      *            if true then scaling is performed
571      */
inverse(final double[][][] a, final boolean scale)572     public void inverse(final double[][][] a, final boolean scale) {
573         int nthreads = ConcurrencyUtils.getNumberOfThreads();
574         if (isPowerOfTwo) {
575             if (nthreads != oldNthreads) {
576                 nt = slices;
577                 if (nt < rows) {
578                     nt = rows;
579                 }
580                 nt *= 4;
581                 if (nthreads > 1) {
582                     nt *= nthreads;
583                 }
584                 if (columns == 2) {
585                     nt >>= 1;
586                 }
587                 t = new double[nt];
588                 oldNthreads = nthreads;
589             }
590             if ((nthreads > 1) && useThreads) {
591                 ddxt3da_subth(1, a, scale);
592                 ddxt3db_subth(1, a, scale);
593             } else {
594                 ddxt3da_sub(1, a, scale);
595                 ddxt3db_sub(1, a, scale);
596             }
597         } else {
598             if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
599                 Future<?>[] futures = new Future[nthreads];
600                 int p = slices / nthreads;
601                 for (int l = 0; l < nthreads; l++) {
602                     final int firstSlice = l * p;
603                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
604                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
605                         public void run() {
606                             for (int s = firstSlice; s < lastSlice; s++) {
607                                 for (int r = 0; r < rows; r++) {
608                                     dctColumns.inverse(a[s][r], scale);
609                                 }
610                             }
611                         }
612                     });
613                 }
614                 ConcurrencyUtils.waitForCompletion(futures);
615 
616                 for (int l = 0; l < nthreads; l++) {
617                     final int firstSlice = l * p;
618                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
619                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
620                         public void run() {
621                             double[] temp = new double[rows];
622                             for (int s = firstSlice; s < lastSlice; s++) {
623                                 for (int c = 0; c < columns; c++) {
624                                     for (int r = 0; r < rows; r++) {
625                                         temp[r] = a[s][r][c];
626                                     }
627                                     dctRows.inverse(temp, scale);
628                                     for (int r = 0; r < rows; r++) {
629                                         a[s][r][c] = temp[r];
630                                     }
631                                 }
632                             }
633                         }
634                     });
635                 }
636                 ConcurrencyUtils.waitForCompletion(futures);
637 
638                 p = rows / nthreads;
639                 for (int l = 0; l < nthreads; l++) {
640                     final int firstRow = l * p;
641                     final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
642                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
643                         public void run() {
644                             double[] temp = new double[slices];
645                             for (int r = firstRow; r < lastRow; r++) {
646                                 for (int c = 0; c < columns; c++) {
647                                     for (int s = 0; s < slices; s++) {
648                                         temp[s] = a[s][r][c];
649                                     }
650                                     dctSlices.inverse(temp, scale);
651                                     for (int s = 0; s < slices; s++) {
652                                         a[s][r][c] = temp[s];
653                                     }
654                                 }
655                             }
656                         }
657                     });
658                 }
659                 ConcurrencyUtils.waitForCompletion(futures);
660 
661             } else {
662                 for (int s = 0; s < slices; s++) {
663                     for (int r = 0; r < rows; r++) {
664                         dctColumns.inverse(a[s][r], scale);
665                     }
666                 }
667                 double[] temp = new double[rows];
668                 for (int s = 0; s < slices; s++) {
669                     for (int c = 0; c < columns; c++) {
670                         for (int r = 0; r < rows; r++) {
671                             temp[r] = a[s][r][c];
672                         }
673                         dctRows.inverse(temp, scale);
674                         for (int r = 0; r < rows; r++) {
675                             a[s][r][c] = temp[r];
676                         }
677                     }
678                 }
679                 temp = new double[slices];
680                 for (int r = 0; r < rows; r++) {
681                     for (int c = 0; c < columns; c++) {
682                         for (int s = 0; s < slices; s++) {
683                             temp[s] = a[s][r][c];
684                         }
685                         dctSlices.inverse(temp, scale);
686                         for (int s = 0; s < slices; s++) {
687                             a[s][r][c] = temp[s];
688                         }
689                     }
690                 }
691             }
692         }
693     }
694 
ddxt3da_sub(int isgn, double[] a, boolean scale)695     private void ddxt3da_sub(int isgn, double[] a, boolean scale) {
696         int idx0, idx1, idx2;
697 
698         if (isgn == -1) {
699             for (int s = 0; s < slices; s++) {
700                 idx0 = s * sliceStride;
701                 for (int r = 0; r < rows; r++) {
702                     dctColumns.forward(a, idx0 + r * rowStride, scale);
703                 }
704                 if (columns > 2) {
705                     for (int c = 0; c < columns; c += 4) {
706                         for (int r = 0; r < rows; r++) {
707                             idx1 = idx0 + r * rowStride + c;
708                             idx2 = rows + r;
709                             t[r] = a[idx1];
710                             t[idx2] = a[idx1 + 1];
711                             t[idx2 + rows] = a[idx1 + 2];
712                             t[idx2 + 2 * rows] = a[idx1 + 3];
713                         }
714                         dctRows.forward(t, 0, scale);
715                         dctRows.forward(t, rows, scale);
716                         dctRows.forward(t, 2 * rows, scale);
717                         dctRows.forward(t, 3 * rows, scale);
718                         for (int r = 0; r < rows; r++) {
719                             idx1 = idx0 + r * rowStride + c;
720                             idx2 = rows + r;
721                             a[idx1] = t[r];
722                             a[idx1 + 1] = t[idx2];
723                             a[idx1 + 2] = t[idx2 + rows];
724                             a[idx1 + 3] = t[idx2 + 2 * rows];
725                         }
726                     }
727                 } else if (columns == 2) {
728                     for (int r = 0; r < rows; r++) {
729                         idx1 = idx0 + r * rowStride;
730                         t[r] = a[idx1];
731                         t[rows + r] = a[idx1 + 1];
732                     }
733                     dctRows.forward(t, 0, scale);
734                     dctRows.forward(t, rows, scale);
735                     for (int r = 0; r < rows; r++) {
736                         idx1 = idx0 + r * rowStride;
737                         a[idx1] = t[r];
738                         a[idx1 + 1] = t[rows + r];
739                     }
740                 }
741             }
742         } else {
743             for (int s = 0; s < slices; s++) {
744                 idx0 = s * sliceStride;
745                 for (int r = 0; r < rows; r++) {
746                     dctColumns.inverse(a, idx0 + r * rowStride, scale);
747                 }
748                 if (columns > 2) {
749                     for (int c = 0; c < columns; c += 4) {
750                         for (int r = 0; r < rows; r++) {
751                             idx1 = idx0 + r * rowStride + c;
752                             idx2 = rows + r;
753                             t[r] = a[idx1];
754                             t[idx2] = a[idx1 + 1];
755                             t[idx2 + rows] = a[idx1 + 2];
756                             t[idx2 + 2 * rows] = a[idx1 + 3];
757                         }
758                         dctRows.inverse(t, 0, scale);
759                         dctRows.inverse(t, rows, scale);
760                         dctRows.inverse(t, 2 * rows, scale);
761                         dctRows.inverse(t, 3 * rows, scale);
762                         for (int r = 0; r < rows; r++) {
763                             idx1 = idx0 + r * rowStride + c;
764                             idx2 = rows + r;
765                             a[idx1] = t[r];
766                             a[idx1 + 1] = t[idx2];
767                             a[idx1 + 2] = t[idx2 + rows];
768                             a[idx1 + 3] = t[idx2 + 2 * rows];
769                         }
770                     }
771                 } else if (columns == 2) {
772                     for (int r = 0; r < rows; r++) {
773                         idx1 = idx0 + r * rowStride;
774                         t[r] = a[idx1];
775                         t[rows + r] = a[idx1 + 1];
776                     }
777                     dctRows.inverse(t, 0, scale);
778                     dctRows.inverse(t, rows, scale);
779                     for (int r = 0; r < rows; r++) {
780                         idx1 = idx0 + r * rowStride;
781                         a[idx1] = t[r];
782                         a[idx1 + 1] = t[rows + r];
783                     }
784                 }
785             }
786         }
787     }
788 
ddxt3da_sub(int isgn, double[][][] a, boolean scale)789     private void ddxt3da_sub(int isgn, double[][][] a, boolean scale) {
790         int idx2;
791 
792         if (isgn == -1) {
793             for (int s = 0; s < slices; s++) {
794                 for (int r = 0; r < rows; r++) {
795                     dctColumns.forward(a[s][r], scale);
796                 }
797                 if (columns > 2) {
798                     for (int c = 0; c < columns; c += 4) {
799                         for (int r = 0; r < rows; r++) {
800                             idx2 = rows + r;
801                             t[r] = a[s][r][c];
802                             t[idx2] = a[s][r][c + 1];
803                             t[idx2 + rows] = a[s][r][c + 2];
804                             t[idx2 + 2 * rows] = a[s][r][c + 3];
805                         }
806                         dctRows.forward(t, 0, scale);
807                         dctRows.forward(t, rows, scale);
808                         dctRows.forward(t, 2 * rows, scale);
809                         dctRows.forward(t, 3 * rows, scale);
810                         for (int r = 0; r < rows; r++) {
811                             idx2 = rows + r;
812                             a[s][r][c] = t[r];
813                             a[s][r][c + 1] = t[idx2];
814                             a[s][r][c + 2] = t[idx2 + rows];
815                             a[s][r][c + 3] = t[idx2 + 2 * rows];
816                         }
817                     }
818                 } else if (columns == 2) {
819                     for (int r = 0; r < rows; r++) {
820                         t[r] = a[s][r][0];
821                         t[rows + r] = a[s][r][1];
822                     }
823                     dctRows.forward(t, 0, scale);
824                     dctRows.forward(t, rows, scale);
825                     for (int r = 0; r < rows; r++) {
826                         a[s][r][0] = t[r];
827                         a[s][r][1] = t[rows + r];
828                     }
829                 }
830             }
831         } else {
832             for (int s = 0; s < slices; s++) {
833                 for (int r = 0; r < rows; r++) {
834                     dctColumns.inverse(a[s][r], scale);
835                 }
836                 if (columns > 2) {
837                     for (int c = 0; c < columns; c += 4) {
838                         for (int r = 0; r < rows; r++) {
839                             idx2 = rows + r;
840                             t[r] = a[s][r][c];
841                             t[idx2] = a[s][r][c + 1];
842                             t[idx2 + rows] = a[s][r][c + 2];
843                             t[idx2 + 2 * rows] = a[s][r][c + 3];
844                         }
845                         dctRows.inverse(t, 0, scale);
846                         dctRows.inverse(t, rows, scale);
847                         dctRows.inverse(t, 2 * rows, scale);
848                         dctRows.inverse(t, 3 * rows, scale);
849                         for (int r = 0; r < rows; r++) {
850                             idx2 = rows + r;
851                             a[s][r][c] = t[r];
852                             a[s][r][c + 1] = t[idx2];
853                             a[s][r][c + 2] = t[idx2 + rows];
854                             a[s][r][c + 3] = t[idx2 + 2 * rows];
855                         }
856                     }
857                 } else if (columns == 2) {
858                     for (int r = 0; r < rows; r++) {
859                         t[r] = a[s][r][0];
860                         t[rows + r] = a[s][r][1];
861                     }
862                     dctRows.inverse(t, 0, scale);
863                     dctRows.inverse(t, rows, scale);
864                     for (int r = 0; r < rows; r++) {
865                         a[s][r][0] = t[r];
866                         a[s][r][1] = t[rows + r];
867                     }
868                 }
869             }
870         }
871     }
872 
ddxt3db_sub(int isgn, double[] a, boolean scale)873     private void ddxt3db_sub(int isgn, double[] a, boolean scale) {
874         int idx0, idx1, idx2;
875 
876         if (isgn == -1) {
877             if (columns > 2) {
878                 for (int r = 0; r < rows; r++) {
879                     idx0 = r * rowStride;
880                     for (int c = 0; c < columns; c += 4) {
881                         for (int s = 0; s < slices; s++) {
882                             idx1 = s * sliceStride + idx0 + c;
883                             idx2 = slices + s;
884                             t[s] = a[idx1];
885                             t[idx2] = a[idx1 + 1];
886                             t[idx2 + slices] = a[idx1 + 2];
887                             t[idx2 + 2 * slices] = a[idx1 + 3];
888                         }
889                         dctSlices.forward(t, 0, scale);
890                         dctSlices.forward(t, slices, scale);
891                         dctSlices.forward(t, 2 * slices, scale);
892                         dctSlices.forward(t, 3 * slices, scale);
893                         for (int s = 0; s < slices; s++) {
894                             idx1 = s * sliceStride + idx0 + c;
895                             idx2 = slices + s;
896                             a[idx1] = t[s];
897                             a[idx1 + 1] = t[idx2];
898                             a[idx1 + 2] = t[idx2 + slices];
899                             a[idx1 + 3] = t[idx2 + 2 * slices];
900                         }
901                     }
902                 }
903             } else if (columns == 2) {
904                 for (int r = 0; r < rows; r++) {
905                     idx0 = r * rowStride;
906                     for (int s = 0; s < slices; s++) {
907                         idx1 = s * sliceStride + idx0;
908                         t[s] = a[idx1];
909                         t[slices + s] = a[idx1 + 1];
910                     }
911                     dctSlices.forward(t, 0, scale);
912                     dctSlices.forward(t, slices, scale);
913                     for (int s = 0; s < slices; s++) {
914                         idx1 = s * sliceStride + idx0;
915                         a[idx1] = t[s];
916                         a[idx1 + 1] = t[slices + s];
917                     }
918                 }
919             }
920         } else {
921             if (columns > 2) {
922                 for (int r = 0; r < rows; r++) {
923                     idx0 = r * rowStride;
924                     for (int c = 0; c < columns; c += 4) {
925                         for (int s = 0; s < slices; s++) {
926                             idx1 = s * sliceStride + idx0 + c;
927                             idx2 = slices + s;
928                             t[s] = a[idx1];
929                             t[idx2] = a[idx1 + 1];
930                             t[idx2 + slices] = a[idx1 + 2];
931                             t[idx2 + 2 * slices] = a[idx1 + 3];
932                         }
933                         dctSlices.inverse(t, 0, scale);
934                         dctSlices.inverse(t, slices, scale);
935                         dctSlices.inverse(t, 2 * slices, scale);
936                         dctSlices.inverse(t, 3 * slices, scale);
937 
938                         for (int s = 0; s < slices; s++) {
939                             idx1 = s * sliceStride + idx0 + c;
940                             idx2 = slices + s;
941                             a[idx1] = t[s];
942                             a[idx1 + 1] = t[idx2];
943                             a[idx1 + 2] = t[idx2 + slices];
944                             a[idx1 + 3] = t[idx2 + 2 * slices];
945                         }
946                     }
947                 }
948             } else if (columns == 2) {
949                 for (int r = 0; r < rows; r++) {
950                     idx0 = r * rowStride;
951                     for (int s = 0; s < slices; s++) {
952                         idx1 = s * sliceStride + idx0;
953                         t[s] = a[idx1];
954                         t[slices + s] = a[idx1 + 1];
955                     }
956                     dctSlices.inverse(t, 0, scale);
957                     dctSlices.inverse(t, slices, scale);
958                     for (int s = 0; s < slices; s++) {
959                         idx1 = s * sliceStride + idx0;
960                         a[idx1] = t[s];
961                         a[idx1 + 1] = t[slices + s];
962                     }
963                 }
964             }
965         }
966     }
967 
ddxt3db_sub(int isgn, double[][][] a, boolean scale)968     private void ddxt3db_sub(int isgn, double[][][] a, boolean scale) {
969         int idx2;
970 
971         if (isgn == -1) {
972             if (columns > 2) {
973                 for (int r = 0; r < rows; r++) {
974                     for (int c = 0; c < columns; c += 4) {
975                         for (int s = 0; s < slices; s++) {
976                             idx2 = slices + s;
977                             t[s] = a[s][r][c];
978                             t[idx2] = a[s][r][c + 1];
979                             t[idx2 + slices] = a[s][r][c + 2];
980                             t[idx2 + 2 * slices] = a[s][r][c + 3];
981                         }
982                         dctSlices.forward(t, 0, scale);
983                         dctSlices.forward(t, slices, scale);
984                         dctSlices.forward(t, 2 * slices, scale);
985                         dctSlices.forward(t, 3 * slices, scale);
986                         for (int s = 0; s < slices; s++) {
987                             idx2 = slices + s;
988                             a[s][r][c] = t[s];
989                             a[s][r][c + 1] = t[idx2];
990                             a[s][r][c + 2] = t[idx2 + slices];
991                             a[s][r][c + 3] = t[idx2 + 2 * slices];
992                         }
993                     }
994                 }
995             } else if (columns == 2) {
996                 for (int r = 0; r < rows; r++) {
997                     for (int s = 0; s < slices; s++) {
998                         t[s] = a[s][r][0];
999                         t[slices + s] = a[s][r][1];
1000                     }
1001                     dctSlices.forward(t, 0, scale);
1002                     dctSlices.forward(t, slices, scale);
1003                     for (int s = 0; s < slices; s++) {
1004                         a[s][r][0] = t[s];
1005                         a[s][r][1] = t[slices + s];
1006                     }
1007                 }
1008             }
1009         } else {
1010             if (columns > 2) {
1011                 for (int r = 0; r < rows; r++) {
1012                     for (int c = 0; c < columns; c += 4) {
1013                         for (int s = 0; s < slices; s++) {
1014                             idx2 = slices + s;
1015                             t[s] = a[s][r][c];
1016                             t[idx2] = a[s][r][c + 1];
1017                             t[idx2 + slices] = a[s][r][c + 2];
1018                             t[idx2 + 2 * slices] = a[s][r][c + 3];
1019                         }
1020                         dctSlices.inverse(t, 0, scale);
1021                         dctSlices.inverse(t, slices, scale);
1022                         dctSlices.inverse(t, 2 * slices, scale);
1023                         dctSlices.inverse(t, 3 * slices, scale);
1024 
1025                         for (int s = 0; s < slices; s++) {
1026                             idx2 = slices + s;
1027                             a[s][r][c] = t[s];
1028                             a[s][r][c + 1] = t[idx2];
1029                             a[s][r][c + 2] = t[idx2 + slices];
1030                             a[s][r][c + 3] = t[idx2 + 2 * slices];
1031                         }
1032                     }
1033                 }
1034             } else if (columns == 2) {
1035                 for (int r = 0; r < rows; r++) {
1036                     for (int s = 0; s < slices; s++) {
1037                         t[s] = a[s][r][0];
1038                         t[slices + s] = a[s][r][1];
1039                     }
1040                     dctSlices.inverse(t, 0, scale);
1041                     dctSlices.inverse(t, slices, scale);
1042                     for (int s = 0; s < slices; s++) {
1043                         a[s][r][0] = t[s];
1044                         a[s][r][1] = t[slices + s];
1045                     }
1046                 }
1047             }
1048         }
1049     }
1050 
ddxt3da_subth(final int isgn, final double[] a, final boolean scale)1051     private void ddxt3da_subth(final int isgn, final double[] a, final boolean scale) {
1052         final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1053         int nt = 4 * rows;
1054         if (columns == 2) {
1055             nt >>= 1;
1056         }
1057         Future<?>[] futures = new Future[nthreads];
1058 
1059         for (int i = 0; i < nthreads; i++) {
1060             final int n0 = i;
1061             final int startt = nt * i;
1062             futures[i] = ConcurrencyUtils.submit(new Runnable() {
1063 
1064                 public void run() {
1065                     int idx0, idx1, idx2;
1066                     if (isgn == -1) {
1067                         for (int s = n0; s < slices; s += nthreads) {
1068                             idx0 = s * sliceStride;
1069                             for (int r = 0; r < rows; r++) {
1070                                 dctColumns.forward(a, idx0 + r * rowStride, scale);
1071                             }
1072                             if (columns > 2) {
1073                                 for (int c = 0; c < columns; c += 4) {
1074                                     for (int r = 0; r < rows; r++) {
1075                                         idx1 = idx0 + r * rowStride + c;
1076                                         idx2 = startt + rows + r;
1077                                         t[startt + r] = a[idx1];
1078                                         t[idx2] = a[idx1 + 1];
1079                                         t[idx2 + rows] = a[idx1 + 2];
1080                                         t[idx2 + 2 * rows] = a[idx1 + 3];
1081                                     }
1082                                     dctRows.forward(t, startt, scale);
1083                                     dctRows.forward(t, startt + rows, scale);
1084                                     dctRows.forward(t, startt + 2 * rows, scale);
1085                                     dctRows.forward(t, startt + 3 * rows, scale);
1086                                     for (int r = 0; r < rows; r++) {
1087                                         idx1 = idx0 + r * rowStride + c;
1088                                         idx2 = startt + rows + r;
1089                                         a[idx1] = t[startt + r];
1090                                         a[idx1 + 1] = t[idx2];
1091                                         a[idx1 + 2] = t[idx2 + rows];
1092                                         a[idx1 + 3] = t[idx2 + 2 * rows];
1093                                     }
1094                                 }
1095                             } else if (columns == 2) {
1096                                 for (int r = 0; r < rows; r++) {
1097                                     idx1 = idx0 + r * rowStride;
1098                                     t[startt + r] = a[idx1];
1099                                     t[startt + rows + r] = a[idx1 + 1];
1100                                 }
1101                                 dctRows.forward(t, startt, scale);
1102                                 dctRows.forward(t, startt + rows, scale);
1103                                 for (int r = 0; r < rows; r++) {
1104                                     idx1 = idx0 + r * rowStride;
1105                                     a[idx1] = t[startt + r];
1106                                     a[idx1 + 1] = t[startt + rows + r];
1107                                 }
1108                             }
1109                         }
1110                     } else {
1111                         for (int s = n0; s < slices; s += nthreads) {
1112                             idx0 = s * sliceStride;
1113                             for (int r = 0; r < rows; r++) {
1114                                 dctColumns.inverse(a, idx0 + r * rowStride, scale);
1115                             }
1116                             if (columns > 2) {
1117                                 for (int c = 0; c < columns; c += 4) {
1118                                     for (int j = 0; j < rows; j++) {
1119                                         idx1 = idx0 + j * rowStride + c;
1120                                         idx2 = startt + rows + j;
1121                                         t[startt + j] = a[idx1];
1122                                         t[idx2] = a[idx1 + 1];
1123                                         t[idx2 + rows] = a[idx1 + 2];
1124                                         t[idx2 + 2 * rows] = a[idx1 + 3];
1125                                     }
1126                                     dctRows.inverse(t, startt, scale);
1127                                     dctRows.inverse(t, startt + rows, scale);
1128                                     dctRows.inverse(t, startt + 2 * rows, scale);
1129                                     dctRows.inverse(t, startt + 3 * rows, scale);
1130                                     for (int j = 0; j < rows; j++) {
1131                                         idx1 = idx0 + j * rowStride + c;
1132                                         idx2 = startt + rows + j;
1133                                         a[idx1] = t[startt + j];
1134                                         a[idx1 + 1] = t[idx2];
1135                                         a[idx1 + 2] = t[idx2 + rows];
1136                                         a[idx1 + 3] = t[idx2 + 2 * rows];
1137                                     }
1138                                 }
1139                             } else if (columns == 2) {
1140                                 for (int r = 0; r < rows; r++) {
1141                                     idx1 = idx0 + r * rowStride;
1142                                     t[startt + r] = a[idx1];
1143                                     t[startt + rows + r] = a[idx1 + 1];
1144                                 }
1145                                 dctRows.inverse(t, startt, scale);
1146                                 dctRows.inverse(t, startt + rows, scale);
1147                                 for (int r = 0; r < rows; r++) {
1148                                     idx1 = idx0 + r * rowStride;
1149                                     a[idx1] = t[startt + r];
1150                                     a[idx1 + 1] = t[startt + rows + r];
1151                                 }
1152                             }
1153                         }
1154                     }
1155                 }
1156             });
1157         }
1158         ConcurrencyUtils.waitForCompletion(futures);
1159     }
1160 
ddxt3da_subth(final int isgn, final double[][][] a, final boolean scale)1161     private void ddxt3da_subth(final int isgn, final double[][][] a, final boolean scale) {
1162         final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1163         int nt = 4 * rows;
1164         if (columns == 2) {
1165             nt >>= 1;
1166         }
1167         Future<?>[] futures = new Future[nthreads];
1168 
1169         for (int i = 0; i < nthreads; i++) {
1170             final int n0 = i;
1171             final int startt = nt * i;
1172             futures[i] = ConcurrencyUtils.submit(new Runnable() {
1173 
1174                 public void run() {
1175                     int idx2;
1176                     if (isgn == -1) {
1177                         for (int s = n0; s < slices; s += nthreads) {
1178                             for (int r = 0; r < rows; r++) {
1179                                 dctColumns.forward(a[s][r], scale);
1180                             }
1181                             if (columns > 2) {
1182                                 for (int c = 0; c < columns; c += 4) {
1183                                     for (int r = 0; r < rows; r++) {
1184                                         idx2 = startt + rows + r;
1185                                         t[startt + r] = a[s][r][c];
1186                                         t[idx2] = a[s][r][c + 1];
1187                                         t[idx2 + rows] = a[s][r][c + 2];
1188                                         t[idx2 + 2 * rows] = a[s][r][c + 3];
1189                                     }
1190                                     dctRows.forward(t, startt, scale);
1191                                     dctRows.forward(t, startt + rows, scale);
1192                                     dctRows.forward(t, startt + 2 * rows, scale);
1193                                     dctRows.forward(t, startt + 3 * rows, scale);
1194                                     for (int r = 0; r < rows; r++) {
1195                                         idx2 = startt + rows + r;
1196                                         a[s][r][c] = t[startt + r];
1197                                         a[s][r][c + 1] = t[idx2];
1198                                         a[s][r][c + 2] = t[idx2 + rows];
1199                                         a[s][r][c + 3] = t[idx2 + 2 * rows];
1200                                     }
1201                                 }
1202                             } else if (columns == 2) {
1203                                 for (int r = 0; r < rows; r++) {
1204                                     t[startt + r] = a[s][r][0];
1205                                     t[startt + rows + r] = a[s][r][1];
1206                                 }
1207                                 dctRows.forward(t, startt, scale);
1208                                 dctRows.forward(t, startt + rows, scale);
1209                                 for (int r = 0; r < rows; r++) {
1210                                     a[s][r][0] = t[startt + r];
1211                                     a[s][r][1] = t[startt + rows + r];
1212                                 }
1213                             }
1214                         }
1215                     } else {
1216                         for (int s = n0; s < slices; s += nthreads) {
1217                             for (int r = 0; r < rows; r++) {
1218                                 dctColumns.inverse(a[s][r], scale);
1219                             }
1220                             if (columns > 2) {
1221                                 for (int c = 0; c < columns; c += 4) {
1222                                     for (int r = 0; r < rows; r++) {
1223                                         idx2 = startt + rows + r;
1224                                         t[startt + r] = a[s][r][c];
1225                                         t[idx2] = a[s][r][c + 1];
1226                                         t[idx2 + rows] = a[s][r][c + 2];
1227                                         t[idx2 + 2 * rows] = a[s][r][c + 3];
1228                                     }
1229                                     dctRows.inverse(t, startt, scale);
1230                                     dctRows.inverse(t, startt + rows, scale);
1231                                     dctRows.inverse(t, startt + 2 * rows, scale);
1232                                     dctRows.inverse(t, startt + 3 * rows, scale);
1233                                     for (int r = 0; r < rows; r++) {
1234                                         idx2 = startt + rows + r;
1235                                         a[s][r][c] = t[startt + r];
1236                                         a[s][r][c + 1] = t[idx2];
1237                                         a[s][r][c + 2] = t[idx2 + rows];
1238                                         a[s][r][c + 3] = t[idx2 + 2 * rows];
1239                                     }
1240                                 }
1241                             } else if (columns == 2) {
1242                                 for (int r = 0; r < rows; r++) {
1243                                     t[startt + r] = a[s][r][0];
1244                                     t[startt + rows + r] = a[s][r][1];
1245                                 }
1246                                 dctRows.inverse(t, startt, scale);
1247                                 dctRows.inverse(t, startt + rows, scale);
1248                                 for (int r = 0; r < rows; r++) {
1249                                     a[s][r][0] = t[startt + r];
1250                                     a[s][r][1] = t[startt + rows + r];
1251                                 }
1252                             }
1253                         }
1254                     }
1255                 }
1256             });
1257         }
1258         ConcurrencyUtils.waitForCompletion(futures);
1259     }
1260 
ddxt3db_subth(final int isgn, final double[] a, final boolean scale)1261     private void ddxt3db_subth(final int isgn, final double[] a, final boolean scale) {
1262         final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1263         int nt = 4 * slices;
1264         if (columns == 2) {
1265             nt >>= 1;
1266         }
1267         Future<?>[] futures = new Future[nthreads];
1268         for (int i = 0; i < nthreads; i++) {
1269             final int n0 = i;
1270             final int startt = nt * i;
1271             futures[i] = ConcurrencyUtils.submit(new Runnable() {
1272 
1273                 public void run() {
1274                     int idx0, idx1, idx2;
1275                     if (isgn == -1) {
1276                         if (columns > 2) {
1277                             for (int r = n0; r < rows; r += nthreads) {
1278                                 idx0 = r * rowStride;
1279                                 for (int c = 0; c < columns; c += 4) {
1280                                     for (int s = 0; s < slices; s++) {
1281                                         idx1 = s * sliceStride + idx0 + c;
1282                                         idx2 = startt + slices + s;
1283                                         t[startt + s] = a[idx1];
1284                                         t[idx2] = a[idx1 + 1];
1285                                         t[idx2 + slices] = a[idx1 + 2];
1286                                         t[idx2 + 2 * slices] = a[idx1 + 3];
1287                                     }
1288                                     dctSlices.forward(t, startt, scale);
1289                                     dctSlices.forward(t, startt + slices, scale);
1290                                     dctSlices.forward(t, startt + 2 * slices, scale);
1291                                     dctSlices.forward(t, startt + 3 * slices, scale);
1292                                     for (int s = 0; s < slices; s++) {
1293                                         idx1 = s * sliceStride + idx0 + c;
1294                                         idx2 = startt + slices + s;
1295                                         a[idx1] = t[startt + s];
1296                                         a[idx1 + 1] = t[idx2];
1297                                         a[idx1 + 2] = t[idx2 + slices];
1298                                         a[idx1 + 3] = t[idx2 + 2 * slices];
1299                                     }
1300                                 }
1301                             }
1302                         } else if (columns == 2) {
1303                             for (int r = n0; r < rows; r += nthreads) {
1304                                 idx0 = r * rowStride;
1305                                 for (int s = 0; s < slices; s++) {
1306                                     idx1 = s * sliceStride + idx0;
1307                                     t[startt + s] = a[idx1];
1308                                     t[startt + slices + s] = a[idx1 + 1];
1309                                 }
1310                                 dctSlices.forward(t, startt, scale);
1311                                 dctSlices.forward(t, startt + slices, scale);
1312                                 for (int s = 0; s < slices; s++) {
1313                                     idx1 = s * sliceStride + idx0;
1314                                     a[idx1] = t[startt + s];
1315                                     a[idx1 + 1] = t[startt + slices + s];
1316                                 }
1317                             }
1318                         }
1319                     } else {
1320                         if (columns > 2) {
1321                             for (int r = n0; r < rows; r += nthreads) {
1322                                 idx0 = r * rowStride;
1323                                 for (int c = 0; c < columns; c += 4) {
1324                                     for (int s = 0; s < slices; s++) {
1325                                         idx1 = s * sliceStride + idx0 + c;
1326                                         idx2 = startt + slices + s;
1327                                         t[startt + s] = a[idx1];
1328                                         t[idx2] = a[idx1 + 1];
1329                                         t[idx2 + slices] = a[idx1 + 2];
1330                                         t[idx2 + 2 * slices] = a[idx1 + 3];
1331                                     }
1332                                     dctSlices.inverse(t, startt, scale);
1333                                     dctSlices.inverse(t, startt + slices, scale);
1334                                     dctSlices.inverse(t, startt + 2 * slices, scale);
1335                                     dctSlices.inverse(t, startt + 3 * slices, scale);
1336                                     for (int s = 0; s < slices; s++) {
1337                                         idx1 = s * sliceStride + idx0 + c;
1338                                         idx2 = startt + slices + s;
1339                                         a[idx1] = t[startt + s];
1340                                         a[idx1 + 1] = t[idx2];
1341                                         a[idx1 + 2] = t[idx2 + slices];
1342                                         a[idx1 + 3] = t[idx2 + 2 * slices];
1343                                     }
1344                                 }
1345                             }
1346                         } else if (columns == 2) {
1347                             for (int r = n0; r < rows; r += nthreads) {
1348                                 idx0 = r * rowStride;
1349                                 for (int s = 0; s < slices; s++) {
1350                                     idx1 = s * sliceStride + idx0;
1351                                     t[startt + s] = a[idx1];
1352                                     t[startt + slices + s] = a[idx1 + 1];
1353                                 }
1354                                 dctSlices.inverse(t, startt, scale);
1355                                 dctSlices.inverse(t, startt + slices, scale);
1356 
1357                                 for (int s = 0; s < slices; s++) {
1358                                     idx1 = s * sliceStride + idx0;
1359                                     a[idx1] = t[startt + s];
1360                                     a[idx1 + 1] = t[startt + slices + s];
1361                                 }
1362                             }
1363                         }
1364                     }
1365                 }
1366             });
1367         }
1368         ConcurrencyUtils.waitForCompletion(futures);
1369     }
1370 
ddxt3db_subth(final int isgn, final double[][][] a, final boolean scale)1371     private void ddxt3db_subth(final int isgn, final double[][][] a, final boolean scale) {
1372         final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1373         int nt = 4 * slices;
1374         if (columns == 2) {
1375             nt >>= 1;
1376         }
1377         Future<?>[] futures = new Future[nthreads];
1378 
1379         for (int i = 0; i < nthreads; i++) {
1380             final int n0 = i;
1381             final int startt = nt * i;
1382             futures[i] = ConcurrencyUtils.submit(new Runnable() {
1383 
1384                 public void run() {
1385                     int idx2;
1386                     if (isgn == -1) {
1387                         if (columns > 2) {
1388                             for (int r = n0; r < rows; r += nthreads) {
1389                                 for (int c = 0; c < columns; c += 4) {
1390                                     for (int s = 0; s < slices; s++) {
1391                                         idx2 = startt + slices + s;
1392                                         t[startt + s] = a[s][r][c];
1393                                         t[idx2] = a[s][r][c + 1];
1394                                         t[idx2 + slices] = a[s][r][c + 2];
1395                                         t[idx2 + 2 * slices] = a[s][r][c + 3];
1396                                     }
1397                                     dctSlices.forward(t, startt, scale);
1398                                     dctSlices.forward(t, startt + slices, scale);
1399                                     dctSlices.forward(t, startt + 2 * slices, scale);
1400                                     dctSlices.forward(t, startt + 3 * slices, scale);
1401                                     for (int s = 0; s < slices; s++) {
1402                                         idx2 = startt + slices + s;
1403                                         a[s][r][c] = t[startt + s];
1404                                         a[s][r][c + 1] = t[idx2];
1405                                         a[s][r][c + 2] = t[idx2 + slices];
1406                                         a[s][r][c + 3] = t[idx2 + 2 * slices];
1407                                     }
1408                                 }
1409                             }
1410                         } else if (columns == 2) {
1411                             for (int r = n0; r < rows; r += nthreads) {
1412                                 for (int s = 0; s < slices; s++) {
1413                                     t[startt + s] = a[s][r][0];
1414                                     t[startt + slices + s] = a[s][r][1];
1415                                 }
1416                                 dctSlices.forward(t, startt, scale);
1417                                 dctSlices.forward(t, startt + slices, scale);
1418                                 for (int s = 0; s < slices; s++) {
1419                                     a[s][r][0] = t[startt + s];
1420                                     a[s][r][1] = t[startt + slices + s];
1421                                 }
1422                             }
1423                         }
1424                     } else {
1425                         if (columns > 2) {
1426                             for (int r = n0; r < rows; r += nthreads) {
1427                                 for (int c = 0; c < columns; c += 4) {
1428                                     for (int s = 0; s < slices; s++) {
1429                                         idx2 = startt + slices + s;
1430                                         t[startt + s] = a[s][r][c];
1431                                         t[idx2] = a[s][r][c + 1];
1432                                         t[idx2 + slices] = a[s][r][c + 2];
1433                                         t[idx2 + 2 * slices] = a[s][r][c + 3];
1434                                     }
1435                                     dctSlices.inverse(t, startt, scale);
1436                                     dctSlices.inverse(t, startt + slices, scale);
1437                                     dctSlices.inverse(t, startt + 2 * slices, scale);
1438                                     dctSlices.inverse(t, startt + 3 * slices, scale);
1439                                     for (int s = 0; s < slices; s++) {
1440                                         idx2 = startt + slices + s;
1441                                         a[s][r][c] = t[startt + s];
1442                                         a[s][r][c + 1] = t[idx2];
1443                                         a[s][r][c + 2] = t[idx2 + slices];
1444                                         a[s][r][c + 3] = t[idx2 + 2 * slices];
1445                                     }
1446                                 }
1447                             }
1448                         } else if (columns == 2) {
1449                             for (int r = n0; r < rows; r += nthreads) {
1450                                 for (int s = 0; s < slices; s++) {
1451                                     t[startt + s] = a[s][r][0];
1452                                     t[startt + slices + s] = a[s][r][1];
1453                                 }
1454                                 dctSlices.inverse(t, startt, scale);
1455                                 dctSlices.inverse(t, startt + slices, scale);
1456 
1457                                 for (int s = 0; s < slices; s++) {
1458                                     a[s][r][0] = t[startt + s];
1459                                     a[s][r][1] = t[startt + slices + s];
1460                                 }
1461                             }
1462                         }
1463                     }
1464                 }
1465             });
1466         }
1467         ConcurrencyUtils.waitForCompletion(futures);
1468     }
1469 }
1470