1 /* ***** BEGIN LICENSE BLOCK *****
2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3  *
4  * The contents of this file are subject to the Mozilla Public License Version
5  * 1.1 (the "License"); you may not use this file except in compliance with
6  * the License. You may obtain a copy of the License at
7  * http://www.mozilla.org/MPL/
8  *
9  * Software distributed under the License is distributed on an "AS IS" basis,
10  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11  * for the specific language governing rights and limitations under the
12  * License.
13  *
14  * The Original Code is JTransforms.
15  *
16  * The Initial Developer of the Original Code is
17  * Piotr Wendykier, Emory University.
18  * Portions created by the Initial Developer are Copyright (C) 2007-2009
19  * the Initial Developer. All Rights Reserved.
20  *
21  * Alternatively, the contents of this file may be used under the terms of
22  * either the GNU General Public License Version 2 or later (the "GPL"), or
23  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
24  * in which case the provisions of the GPL or the LGPL are applicable instead
25  * of those above. If you wish to allow use of your version of this file only
26  * under the terms of either the GPL or the LGPL, and not to allow others to
27  * use your version of this file under the terms of the MPL, indicate your
28  * decision by deleting the provisions above and replace them with the notice
29  * and other provisions required by the GPL or the LGPL. If you do not delete
30  * the provisions above, a recipient may use your version of this file under
31  * the terms of any one of the MPL, the GPL or the LGPL.
32  *
33  * ***** END LICENSE BLOCK ***** */
34 
35 package edu.emory.mathcs.jtransforms.dst;
36 
37 import java.util.concurrent.Future;
38 
39 import edu.emory.mathcs.utils.ConcurrencyUtils;
40 
41 /**
42  * Computes 3D Discrete Sine Transform (DST) of double precision data. The sizes
43  * of all three dimensions can be arbitrary numbers. This is a parallel
44  * implementation optimized for SMP systems.<br>
45  * <br>
46  * Part of code is derived from General Purpose FFT Package written by Takuya Ooura
47  * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
48  *
49  * @author Piotr Wendykier (piotr.wendykier@gmail.com)
50  *
51  */
52 public class DoubleDST_3D {
53 
54     private int slices;
55 
56     private int rows;
57 
58     private int columns;
59 
60     private int sliceStride;
61 
62     private int rowStride;
63 
64     private double[] t;
65 
66     private DoubleDST_1D dstSlices, dstRows, dstColumns;
67 
68     private int oldNthreads;
69 
70     private int nt;
71 
72     private boolean isPowerOfTwo = false;
73 
74     private boolean useThreads = false;
75 
76     /**
77      * Creates new instance of DoubleDST_3D.
78      *
79      * @param slices
80      *            number of slices
81      * @param rows
82      *            number of rows
83      * @param columns
84      *            number of columns
85      */
DoubleDST_3D(int slices, int rows, int columns)86     public DoubleDST_3D(int slices, int rows, int columns) {
87         if (slices <= 1 || rows <= 1 || columns <= 1) {
88             throw new IllegalArgumentException("slices, rows and columns must be greater than 1");
89         }
90         this.slices = slices;
91         this.rows = rows;
92         this.columns = columns;
93         this.sliceStride = rows * columns;
94         this.rowStride = columns;
95         if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) {
96             this.useThreads = true;
97         }
98         if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
99             isPowerOfTwo = true;
100             oldNthreads = ConcurrencyUtils.getNumberOfThreads();
101             nt = slices;
102             if (nt < rows) {
103                 nt = rows;
104             }
105             nt *= 4;
106             if (oldNthreads > 1) {
107                 nt *= oldNthreads;
108             }
109             if (columns == 2) {
110                 nt >>= 1;
111             }
112             t = new double[nt];
113         }
114         dstSlices = new DoubleDST_1D(slices);
115         if (slices == rows) {
116             dstRows = dstSlices;
117         } else {
118             dstRows = new DoubleDST_1D(rows);
119         }
120         if (slices == columns) {
121             dstColumns = dstSlices;
122         } else if (rows == columns) {
123             dstColumns = dstRows;
124         } else {
125             dstColumns = new DoubleDST_1D(columns);
126         }
127     }
128 
129     /**
130      * Computes the 3D forward DST (DST-II) leaving the result in <code>a</code>
131      * . The data is stored in 1D array addressed in slice-major, then
132      * row-major, then column-major, in order of significance, i.e. the element
133      * (i,j,k) of 3D array x[slices][rows][columns] is stored in a[i*sliceStride
134      * + j*rowStride + k], where sliceStride = rows * columns and rowStride =
135      * columns.
136      *
137      * @param a
138      *            data to transform
139      * @param scale
140      *            if true then scaling is performed
141      */
forward(final double[] a, final boolean scale)142     public void forward(final double[] a, final boolean scale) {
143         int nthreads = ConcurrencyUtils.getNumberOfThreads();
144         if (isPowerOfTwo) {
145             if (nthreads != oldNthreads) {
146                 nt = slices;
147                 if (nt < rows) {
148                     nt = rows;
149                 }
150                 nt *= 4;
151                 if (nthreads > 1) {
152                     nt *= nthreads;
153                 }
154                 if (columns == 2) {
155                     nt >>= 1;
156                 }
157                 t = new double[nt];
158                 oldNthreads = nthreads;
159             }
160             if ((nthreads > 1) && useThreads) {
161                 ddxt3da_subth(-1, a, scale);
162                 ddxt3db_subth(-1, a, scale);
163             } else {
164                 ddxt3da_sub(-1, a, scale);
165                 ddxt3db_sub(-1, a, scale);
166             }
167         } else {
168             if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
169                 Future<?>[] futures = new Future[nthreads];
170                 int p = slices / nthreads;
171                 for (int l = 0; l < nthreads; l++) {
172                     final int firstSlice = l * p;
173                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
174                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
175                         public void run() {
176                             for (int s = firstSlice; s < lastSlice; s++) {
177                                 int idx1 = s * sliceStride;
178                                 for (int r = 0; r < rows; r++) {
179                                     dstColumns.forward(a, idx1 + r * rowStride, scale);
180                                 }
181                             }
182                         }
183                     });
184                 }
185                 ConcurrencyUtils.waitForCompletion(futures);
186 
187                 for (int l = 0; l < nthreads; l++) {
188                     final int firstSlice = l * p;
189                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
190                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
191                         public void run() {
192                             double[] temp = new double[rows];
193                             for (int s = firstSlice; s < lastSlice; s++) {
194                                 int idx1 = s * sliceStride;
195                                 for (int c = 0; c < columns; c++) {
196                                     for (int r = 0; r < rows; r++) {
197                                         int idx3 = idx1 + r * rowStride + c;
198                                         temp[r] = a[idx3];
199                                     }
200                                     dstRows.forward(temp, scale);
201                                     for (int r = 0; r < rows; r++) {
202                                         int idx3 = idx1 + r * rowStride + c;
203                                         a[idx3] = temp[r];
204                                     }
205                                 }
206                             }
207                         }
208                     });
209                 }
210                 ConcurrencyUtils.waitForCompletion(futures);
211 
212                 p = rows / nthreads;
213                 for (int l = 0; l < nthreads; l++) {
214                     final int firstRow = l * p;
215                     final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
216                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
217                         public void run() {
218                             double[] temp = new double[slices];
219                             for (int r = firstRow; r < lastRow; r++) {
220                                 int idx1 = r * rowStride;
221                                 for (int c = 0; c < columns; c++) {
222                                     for (int s = 0; s < slices; s++) {
223                                         int idx3 = s * sliceStride + idx1 + c;
224                                         temp[s] = a[idx3];
225                                     }
226                                     dstSlices.forward(temp, scale);
227                                     for (int s = 0; s < slices; s++) {
228                                         int idx3 = s * sliceStride + idx1 + c;
229                                         a[idx3] = temp[s];
230                                     }
231                                 }
232                             }
233                         }
234                     });
235                 }
236                 ConcurrencyUtils.waitForCompletion(futures);
237 
238             } else {
239                 for (int s = 0; s < slices; s++) {
240                     int idx1 = s * sliceStride;
241                     for (int r = 0; r < rows; r++) {
242                         dstColumns.forward(a, idx1 + r * rowStride, scale);
243                     }
244                 }
245                 double[] temp = new double[rows];
246                 for (int s = 0; s < slices; s++) {
247                     int idx1 = s * sliceStride;
248                     for (int c = 0; c < columns; c++) {
249                         for (int r = 0; r < rows; r++) {
250                             int idx3 = idx1 + r * rowStride + c;
251                             temp[r] = a[idx3];
252                         }
253                         dstRows.forward(temp, scale);
254                         for (int r = 0; r < rows; r++) {
255                             int idx3 = idx1 + r * rowStride + c;
256                             a[idx3] = temp[r];
257                         }
258                     }
259                 }
260                 temp = new double[slices];
261                 for (int r = 0; r < rows; r++) {
262                     int idx1 = r * rowStride;
263                     for (int c = 0; c < columns; c++) {
264                         for (int s = 0; s < slices; s++) {
265                             int idx3 = s * sliceStride + idx1 + c;
266                             temp[s] = a[idx3];
267                         }
268                         dstSlices.forward(temp, scale);
269                         for (int s = 0; s < slices; s++) {
270                             int idx3 = s * sliceStride + idx1 + c;
271                             a[idx3] = temp[s];
272                         }
273                     }
274                 }
275             }
276         }
277     }
278 
279     /**
280      * Computes the 3D forward DST (DST-II) leaving the result in <code>a</code>
281      * . The data is stored in 3D array.
282      *
283      * @param a
284      *            data to transform
285      * @param scale
286      *            if true then scaling is performed
287      */
forward(final double[][][] a, final boolean scale)288     public void forward(final double[][][] a, final boolean scale) {
289         int nthreads = ConcurrencyUtils.getNumberOfThreads();
290         if (isPowerOfTwo) {
291             if (nthreads != oldNthreads) {
292                 nt = slices;
293                 if (nt < rows) {
294                     nt = rows;
295                 }
296                 nt *= 4;
297                 if (nthreads > 1) {
298                     nt *= nthreads;
299                 }
300                 if (columns == 2) {
301                     nt >>= 1;
302                 }
303                 t = new double[nt];
304                 oldNthreads = nthreads;
305             }
306             if ((nthreads > 1) && useThreads) {
307                 ddxt3da_subth(-1, a, scale);
308                 ddxt3db_subth(-1, a, scale);
309             } else {
310                 ddxt3da_sub(-1, a, scale);
311                 ddxt3db_sub(-1, a, scale);
312             }
313         } else {
314             if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
315                 Future<?>[] futures = new Future[nthreads];
316                 int p = slices / nthreads;
317                 for (int l = 0; l < nthreads; l++) {
318                     final int firstSlice = l * p;
319                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
320                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
321                         public void run() {
322                             for (int s = firstSlice; s < lastSlice; s++) {
323                                 for (int r = 0; r < rows; r++) {
324                                     dstColumns.forward(a[s][r], scale);
325                                 }
326                             }
327                         }
328                     });
329                 }
330                 ConcurrencyUtils.waitForCompletion(futures);
331 
332                 for (int l = 0; l < nthreads; l++) {
333                     final int firstSlice = l * p;
334                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
335                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
336                         public void run() {
337                             double[] temp = new double[rows];
338                             for (int s = firstSlice; s < lastSlice; s++) {
339                                 for (int c = 0; c < columns; c++) {
340                                     for (int r = 0; r < rows; r++) {
341                                         temp[r] = a[s][r][c];
342                                     }
343                                     dstRows.forward(temp, scale);
344                                     for (int r = 0; r < rows; r++) {
345                                         a[s][r][c] = temp[r];
346                                     }
347                                 }
348                             }
349                         }
350                     });
351                 }
352                 ConcurrencyUtils.waitForCompletion(futures);
353 
354                 p = rows / nthreads;
355                 for (int l = 0; l < nthreads; l++) {
356                     final int firstRow = l * p;
357                     final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
358                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
359                         public void run() {
360                             double[] temp = new double[slices];
361                             for (int r = firstRow; r < lastRow; r++) {
362                                 for (int c = 0; c < columns; c++) {
363                                     for (int s = 0; s < slices; s++) {
364                                         temp[s] = a[s][r][c];
365                                     }
366                                     dstSlices.forward(temp, scale);
367                                     for (int s = 0; s < slices; s++) {
368                                         a[s][r][c] = temp[s];
369                                     }
370                                 }
371                             }
372                         }
373                     });
374                 }
375                 ConcurrencyUtils.waitForCompletion(futures);
376 
377             } else {
378                 for (int s = 0; s < slices; s++) {
379                     for (int r = 0; r < rows; r++) {
380                         dstColumns.forward(a[s][r], scale);
381                     }
382                 }
383                 double[] temp = new double[rows];
384                 for (int s = 0; s < slices; s++) {
385                     for (int c = 0; c < columns; c++) {
386                         for (int r = 0; r < rows; r++) {
387                             temp[r] = a[s][r][c];
388                         }
389                         dstRows.forward(temp, scale);
390                         for (int r = 0; r < rows; r++) {
391                             a[s][r][c] = temp[r];
392                         }
393                     }
394                 }
395                 temp = new double[slices];
396                 for (int r = 0; r < rows; r++) {
397                     for (int c = 0; c < columns; c++) {
398                         for (int s = 0; s < slices; s++) {
399                             temp[s] = a[s][r][c];
400                         }
401                         dstSlices.forward(temp, scale);
402                         for (int s = 0; s < slices; s++) {
403                             a[s][r][c] = temp[s];
404                         }
405                     }
406                 }
407             }
408         }
409     }
410 
411     /**
412      * Computes the 3D inverse DST (DST-III) leaving the result in
413      * <code>a</code>. The data is stored in 1D array addressed in slice-major,
414      * then row-major, then column-major, in order of significance, i.e. the
415      * element (i,j,k) of 3D array x[slices][rows][columns] is stored in
416      * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * columns
417      * and rowStride = columns.
418      *
419      * @param a
420      *            data to transform
421      * @param scale
422      *            if true then scaling is performed
423      */
inverse(final double[] a, final boolean scale)424     public void inverse(final double[] a, final boolean scale) {
425         int nthreads = ConcurrencyUtils.getNumberOfThreads();
426         if (isPowerOfTwo) {
427             if (nthreads != oldNthreads) {
428                 nt = slices;
429                 if (nt < rows) {
430                     nt = rows;
431                 }
432                 nt *= 4;
433                 if (nthreads > 1) {
434                     nt *= nthreads;
435                 }
436                 if (columns == 2) {
437                     nt >>= 1;
438                 }
439                 t = new double[nt];
440                 oldNthreads = nthreads;
441             }
442             if ((nthreads > 1) && useThreads) {
443                 ddxt3da_subth(1, a, scale);
444                 ddxt3db_subth(1, a, scale);
445             } else {
446                 ddxt3da_sub(1, a, scale);
447                 ddxt3db_sub(1, a, scale);
448             }
449         } else {
450             if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
451                 Future<?>[] futures = new Future[nthreads];
452                 int p = slices / nthreads;
453                 for (int l = 0; l < nthreads; l++) {
454                     final int firstSlice = l * p;
455                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
456                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
457                         public void run() {
458                             for (int s = firstSlice; s < lastSlice; s++) {
459                                 int idx1 = s * sliceStride;
460                                 for (int r = 0; r < rows; r++) {
461                                     dstColumns.inverse(a, idx1 + r * rowStride, scale);
462                                 }
463                             }
464                         }
465                     });
466                 }
467                 ConcurrencyUtils.waitForCompletion(futures);
468 
469                 for (int l = 0; l < nthreads; l++) {
470                     final int firstSlice = l * p;
471                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
472                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
473                         public void run() {
474                             double[] temp = new double[rows];
475                             for (int s = firstSlice; s < lastSlice; s++) {
476                                 int idx1 = s * sliceStride;
477                                 for (int c = 0; c < columns; c++) {
478                                     for (int r = 0; r < rows; r++) {
479                                         int idx3 = idx1 + r * rowStride + c;
480                                         temp[r] = a[idx3];
481                                     }
482                                     dstRows.inverse(temp, scale);
483                                     for (int r = 0; r < rows; r++) {
484                                         int idx3 = idx1 + r * rowStride + c;
485                                         a[idx3] = temp[r];
486                                     }
487                                 }
488                             }
489                         }
490                     });
491                 }
492                 ConcurrencyUtils.waitForCompletion(futures);
493 
494                 p = rows / nthreads;
495                 for (int l = 0; l < nthreads; l++) {
496                     final int firstRow = l * p;
497                     final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
498                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
499                         public void run() {
500                             double[] temp = new double[slices];
501                             for (int r = firstRow; r < lastRow; r++) {
502                                 int idx1 = r * rowStride;
503                                 for (int c = 0; c < columns; c++) {
504                                     for (int s = 0; s < slices; s++) {
505                                         int idx3 = s * sliceStride + idx1 + c;
506                                         temp[s] = a[idx3];
507                                     }
508                                     dstSlices.inverse(temp, scale);
509                                     for (int s = 0; s < slices; s++) {
510                                         int idx3 = s * sliceStride + idx1 + c;
511                                         a[idx3] = temp[s];
512                                     }
513                                 }
514                             }
515                         }
516                     });
517                 }
518                 ConcurrencyUtils.waitForCompletion(futures);
519 
520             } else {
521                 for (int s = 0; s < slices; s++) {
522                     int idx1 = s * sliceStride;
523                     for (int r = 0; r < rows; r++) {
524                         dstColumns.inverse(a, idx1 + r * rowStride, scale);
525                     }
526                 }
527                 double[] temp = new double[rows];
528                 for (int s = 0; s < slices; s++) {
529                     int idx1 = s * sliceStride;
530                     for (int c = 0; c < columns; c++) {
531                         for (int r = 0; r < rows; r++) {
532                             int idx3 = idx1 + r * rowStride + c;
533                             temp[r] = a[idx3];
534                         }
535                         dstRows.inverse(temp, scale);
536                         for (int r = 0; r < rows; r++) {
537                             int idx3 = idx1 + r * rowStride + c;
538                             a[idx3] = temp[r];
539                         }
540                     }
541                 }
542                 temp = new double[slices];
543                 for (int r = 0; r < rows; r++) {
544                     int idx1 = r * rowStride;
545                     for (int c = 0; c < columns; c++) {
546                         for (int s = 0; s < slices; s++) {
547                             int idx3 = s * sliceStride + idx1 + c;
548                             temp[s] = a[idx3];
549                         }
550                         dstSlices.inverse(temp, scale);
551                         for (int s = 0; s < slices; s++) {
552                             int idx3 = s * sliceStride + idx1 + c;
553                             a[idx3] = temp[s];
554                         }
555                     }
556                 }
557             }
558         }
559 
560     }
561 
562     /**
563      * Computes the 3D inverse DST (DST-III) leaving the result in
564      * <code>a</code>. The data is stored in 3D array.
565      *
566      * @param a
567      *            data to transform
568      * @param scale
569      *            if true then scaling is performed
570      */
inverse(final double[][][] a, final boolean scale)571     public void inverse(final double[][][] a, final boolean scale) {
572         int nthreads = ConcurrencyUtils.getNumberOfThreads();
573         if (isPowerOfTwo) {
574             if (nthreads != oldNthreads) {
575                 nt = slices;
576                 if (nt < rows) {
577                     nt = rows;
578                 }
579                 nt *= 4;
580                 if (nthreads > 1) {
581                     nt *= nthreads;
582                 }
583                 if (columns == 2) {
584                     nt >>= 1;
585                 }
586                 t = new double[nt];
587                 oldNthreads = nthreads;
588             }
589             if ((nthreads > 1) && useThreads) {
590                 ddxt3da_subth(1, a, scale);
591                 ddxt3db_subth(1, a, scale);
592             } else {
593                 ddxt3da_sub(1, a, scale);
594                 ddxt3db_sub(1, a, scale);
595             }
596         } else {
597             if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
598                 Future<?>[] futures = new Future[nthreads];
599                 int p = slices / nthreads;
600                 for (int l = 0; l < nthreads; l++) {
601                     final int firstSlice = l * p;
602                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
603                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
604                         public void run() {
605                             for (int s = firstSlice; s < lastSlice; s++) {
606                                 for (int r = 0; r < rows; r++) {
607                                     dstColumns.inverse(a[s][r], scale);
608                                 }
609                             }
610                         }
611                     });
612                 }
613                 ConcurrencyUtils.waitForCompletion(futures);
614 
615                 for (int l = 0; l < nthreads; l++) {
616                     final int firstSlice = l * p;
617                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
618                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
619                         public void run() {
620                             double[] temp = new double[rows];
621                             for (int s = firstSlice; s < lastSlice; s++) {
622                                 for (int c = 0; c < columns; c++) {
623                                     for (int r = 0; r < rows; r++) {
624                                         temp[r] = a[s][r][c];
625                                     }
626                                     dstRows.inverse(temp, scale);
627                                     for (int r = 0; r < rows; r++) {
628                                         a[s][r][c] = temp[r];
629                                     }
630                                 }
631                             }
632                         }
633                     });
634                 }
635                 ConcurrencyUtils.waitForCompletion(futures);
636 
637                 p = rows / nthreads;
638                 for (int l = 0; l < nthreads; l++) {
639                     final int firstRow = l * p;
640                     final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
641                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
642                         public void run() {
643                             double[] temp = new double[slices];
644                             for (int r = firstRow; r < lastRow; r++) {
645                                 for (int c = 0; c < columns; c++) {
646                                     for (int s = 0; s < slices; s++) {
647                                         temp[s] = a[s][r][c];
648                                     }
649                                     dstSlices.inverse(temp, scale);
650                                     for (int s = 0; s < slices; s++) {
651                                         a[s][r][c] = temp[s];
652                                     }
653                                 }
654                             }
655                         }
656                     });
657                 }
658                 ConcurrencyUtils.waitForCompletion(futures);
659 
660             } else {
661                 for (int s = 0; s < slices; s++) {
662                     for (int r = 0; r < rows; r++) {
663                         dstColumns.inverse(a[s][r], scale);
664                     }
665                 }
666                 double[] temp = new double[rows];
667                 for (int s = 0; s < slices; s++) {
668                     for (int c = 0; c < columns; c++) {
669                         for (int r = 0; r < rows; r++) {
670                             temp[r] = a[s][r][c];
671                         }
672                         dstRows.inverse(temp, scale);
673                         for (int r = 0; r < rows; r++) {
674                             a[s][r][c] = temp[r];
675                         }
676                     }
677                 }
678                 temp = new double[slices];
679                 for (int r = 0; r < rows; r++) {
680                     for (int c = 0; c < columns; c++) {
681                         for (int s = 0; s < slices; s++) {
682                             temp[s] = a[s][r][c];
683                         }
684                         dstSlices.inverse(temp, scale);
685                         for (int s = 0; s < slices; s++) {
686                             a[s][r][c] = temp[s];
687                         }
688                     }
689                 }
690             }
691         }
692     }
693 
ddxt3da_sub(int isgn, double[] a, boolean scale)694     private void ddxt3da_sub(int isgn, double[] a, boolean scale) {
695         int idx0, idx1, idx2;
696 
697         if (isgn == -1) {
698             for (int s = 0; s < slices; s++) {
699                 idx0 = s * sliceStride;
700                 for (int r = 0; r < rows; r++) {
701                     dstColumns.forward(a, idx0 + r * rowStride, scale);
702                 }
703                 if (columns > 2) {
704                     for (int c = 0; c < columns; c += 4) {
705                         for (int r = 0; r < rows; r++) {
706                             idx1 = idx0 + r * rowStride + c;
707                             idx2 = rows + r;
708                             t[r] = a[idx1];
709                             t[idx2] = a[idx1 + 1];
710                             t[idx2 + rows] = a[idx1 + 2];
711                             t[idx2 + 2 * rows] = a[idx1 + 3];
712                         }
713                         dstRows.forward(t, 0, scale);
714                         dstRows.forward(t, rows, scale);
715                         dstRows.forward(t, 2 * rows, scale);
716                         dstRows.forward(t, 3 * rows, scale);
717                         for (int r = 0; r < rows; r++) {
718                             idx1 = idx0 + r * rowStride + c;
719                             idx2 = rows + r;
720                             a[idx1] = t[r];
721                             a[idx1 + 1] = t[idx2];
722                             a[idx1 + 2] = t[idx2 + rows];
723                             a[idx1 + 3] = t[idx2 + 2 * rows];
724                         }
725                     }
726                 } else if (columns == 2) {
727                     for (int r = 0; r < rows; r++) {
728                         idx1 = idx0 + r * rowStride;
729                         t[r] = a[idx1];
730                         t[rows + r] = a[idx1 + 1];
731                     }
732                     dstRows.forward(t, 0, scale);
733                     dstRows.forward(t, rows, scale);
734                     for (int r = 0; r < rows; r++) {
735                         idx1 = idx0 + r * rowStride;
736                         a[idx1] = t[r];
737                         a[idx1 + 1] = t[rows + r];
738                     }
739                 }
740             }
741         } else {
742             for (int s = 0; s < slices; s++) {
743                 idx0 = s * sliceStride;
744                 for (int r = 0; r < rows; r++) {
745                     dstColumns.inverse(a, idx0 + r * rowStride, scale);
746                 }
747                 if (columns > 2) {
748                     for (int c = 0; c < columns; c += 4) {
749                         for (int r = 0; r < rows; r++) {
750                             idx1 = idx0 + r * rowStride + c;
751                             idx2 = rows + r;
752                             t[r] = a[idx1];
753                             t[idx2] = a[idx1 + 1];
754                             t[idx2 + rows] = a[idx1 + 2];
755                             t[idx2 + 2 * rows] = a[idx1 + 3];
756                         }
757                         dstRows.inverse(t, 0, scale);
758                         dstRows.inverse(t, rows, scale);
759                         dstRows.inverse(t, 2 * rows, scale);
760                         dstRows.inverse(t, 3 * rows, scale);
761                         for (int r = 0; r < rows; r++) {
762                             idx1 = idx0 + r * rowStride + c;
763                             idx2 = rows + r;
764                             a[idx1] = t[r];
765                             a[idx1 + 1] = t[idx2];
766                             a[idx1 + 2] = t[idx2 + rows];
767                             a[idx1 + 3] = t[idx2 + 2 * rows];
768                         }
769                     }
770                 } else if (columns == 2) {
771                     for (int r = 0; r < rows; r++) {
772                         idx1 = idx0 + r * rowStride;
773                         t[r] = a[idx1];
774                         t[rows + r] = a[idx1 + 1];
775                     }
776                     dstRows.inverse(t, 0, scale);
777                     dstRows.inverse(t, rows, scale);
778                     for (int r = 0; r < rows; r++) {
779                         idx1 = idx0 + r * rowStride;
780                         a[idx1] = t[r];
781                         a[idx1 + 1] = t[rows + r];
782                     }
783                 }
784             }
785         }
786     }
787 
ddxt3da_sub(int isgn, double[][][] a, boolean scale)788     private void ddxt3da_sub(int isgn, double[][][] a, boolean scale) {
789         int idx2;
790 
791         if (isgn == -1) {
792             for (int s = 0; s < slices; s++) {
793                 for (int r = 0; r < rows; r++) {
794                     dstColumns.forward(a[s][r], scale);
795                 }
796                 if (columns > 2) {
797                     for (int c = 0; c < columns; c += 4) {
798                         for (int r = 0; r < rows; r++) {
799                             idx2 = rows + r;
800                             t[r] = a[s][r][c];
801                             t[idx2] = a[s][r][c + 1];
802                             t[idx2 + rows] = a[s][r][c + 2];
803                             t[idx2 + 2 * rows] = a[s][r][c + 3];
804                         }
805                         dstRows.forward(t, 0, scale);
806                         dstRows.forward(t, rows, scale);
807                         dstRows.forward(t, 2 * rows, scale);
808                         dstRows.forward(t, 3 * rows, scale);
809                         for (int r = 0; r < rows; r++) {
810                             idx2 = rows + r;
811                             a[s][r][c] = t[r];
812                             a[s][r][c + 1] = t[idx2];
813                             a[s][r][c + 2] = t[idx2 + rows];
814                             a[s][r][c + 3] = t[idx2 + 2 * rows];
815                         }
816                     }
817                 } else if (columns == 2) {
818                     for (int r = 0; r < rows; r++) {
819                         t[r] = a[s][r][0];
820                         t[rows + r] = a[s][r][1];
821                     }
822                     dstRows.forward(t, 0, scale);
823                     dstRows.forward(t, rows, scale);
824                     for (int r = 0; r < rows; r++) {
825                         a[s][r][0] = t[r];
826                         a[s][r][1] = t[rows + r];
827                     }
828                 }
829             }
830         } else {
831             for (int s = 0; s < slices; s++) {
832                 for (int r = 0; r < rows; r++) {
833                     dstColumns.inverse(a[s][r], scale);
834                 }
835                 if (columns > 2) {
836                     for (int c = 0; c < columns; c += 4) {
837                         for (int r = 0; r < rows; r++) {
838                             idx2 = rows + r;
839                             t[r] = a[s][r][c];
840                             t[idx2] = a[s][r][c + 1];
841                             t[idx2 + rows] = a[s][r][c + 2];
842                             t[idx2 + 2 * rows] = a[s][r][c + 3];
843                         }
844                         dstRows.inverse(t, 0, scale);
845                         dstRows.inverse(t, rows, scale);
846                         dstRows.inverse(t, 2 * rows, scale);
847                         dstRows.inverse(t, 3 * rows, scale);
848                         for (int r = 0; r < rows; r++) {
849                             idx2 = rows + r;
850                             a[s][r][c] = t[r];
851                             a[s][r][c + 1] = t[idx2];
852                             a[s][r][c + 2] = t[idx2 + rows];
853                             a[s][r][c + 3] = t[idx2 + 2 * rows];
854                         }
855                     }
856                 } else if (columns == 2) {
857                     for (int r = 0; r < rows; r++) {
858                         t[r] = a[s][r][0];
859                         t[rows + r] = a[s][r][1];
860                     }
861                     dstRows.inverse(t, 0, scale);
862                     dstRows.inverse(t, rows, scale);
863                     for (int r = 0; r < rows; r++) {
864                         a[s][r][0] = t[r];
865                         a[s][r][1] = t[rows + r];
866                     }
867                 }
868             }
869         }
870     }
871 
ddxt3db_sub(int isgn, double[] a, boolean scale)872     private void ddxt3db_sub(int isgn, double[] a, boolean scale) {
873         int idx0, idx1, idx2;
874 
875         if (isgn == -1) {
876             if (columns > 2) {
877                 for (int r = 0; r < rows; r++) {
878                     idx0 = r * rowStride;
879                     for (int c = 0; c < columns; c += 4) {
880                         for (int s = 0; s < slices; s++) {
881                             idx1 = s * sliceStride + idx0 + c;
882                             idx2 = slices + s;
883                             t[s] = a[idx1];
884                             t[idx2] = a[idx1 + 1];
885                             t[idx2 + slices] = a[idx1 + 2];
886                             t[idx2 + 2 * slices] = a[idx1 + 3];
887                         }
888                         dstSlices.forward(t, 0, scale);
889                         dstSlices.forward(t, slices, scale);
890                         dstSlices.forward(t, 2 * slices, scale);
891                         dstSlices.forward(t, 3 * slices, scale);
892                         for (int s = 0; s < slices; s++) {
893                             idx1 = s * sliceStride + idx0 + c;
894                             idx2 = slices + s;
895                             a[idx1] = t[s];
896                             a[idx1 + 1] = t[idx2];
897                             a[idx1 + 2] = t[idx2 + slices];
898                             a[idx1 + 3] = t[idx2 + 2 * slices];
899                         }
900                     }
901                 }
902             } else if (columns == 2) {
903                 for (int r = 0; r < rows; r++) {
904                     idx0 = r * rowStride;
905                     for (int s = 0; s < slices; s++) {
906                         idx1 = s * sliceStride + idx0;
907                         t[s] = a[idx1];
908                         t[slices + s] = a[idx1 + 1];
909                     }
910                     dstSlices.forward(t, 0, scale);
911                     dstSlices.forward(t, slices, scale);
912                     for (int s = 0; s < slices; s++) {
913                         idx1 = s * sliceStride + idx0;
914                         a[idx1] = t[s];
915                         a[idx1 + 1] = t[slices + s];
916                     }
917                 }
918             }
919         } else {
920             if (columns > 2) {
921                 for (int r = 0; r < rows; r++) {
922                     idx0 = r * rowStride;
923                     for (int c = 0; c < columns; c += 4) {
924                         for (int s = 0; s < slices; s++) {
925                             idx1 = s * sliceStride + idx0 + c;
926                             idx2 = slices + s;
927                             t[s] = a[idx1];
928                             t[idx2] = a[idx1 + 1];
929                             t[idx2 + slices] = a[idx1 + 2];
930                             t[idx2 + 2 * slices] = a[idx1 + 3];
931                         }
932                         dstSlices.inverse(t, 0, scale);
933                         dstSlices.inverse(t, slices, scale);
934                         dstSlices.inverse(t, 2 * slices, scale);
935                         dstSlices.inverse(t, 3 * slices, scale);
936 
937                         for (int s = 0; s < slices; s++) {
938                             idx1 = s * sliceStride + idx0 + c;
939                             idx2 = slices + s;
940                             a[idx1] = t[s];
941                             a[idx1 + 1] = t[idx2];
942                             a[idx1 + 2] = t[idx2 + slices];
943                             a[idx1 + 3] = t[idx2 + 2 * slices];
944                         }
945                     }
946                 }
947             } else if (columns == 2) {
948                 for (int r = 0; r < rows; r++) {
949                     idx0 = r * rowStride;
950                     for (int s = 0; s < slices; s++) {
951                         idx1 = s * sliceStride + idx0;
952                         t[s] = a[idx1];
953                         t[slices + s] = a[idx1 + 1];
954                     }
955                     dstSlices.inverse(t, 0, scale);
956                     dstSlices.inverse(t, slices, scale);
957                     for (int s = 0; s < slices; s++) {
958                         idx1 = s * sliceStride + idx0;
959                         a[idx1] = t[s];
960                         a[idx1 + 1] = t[slices + s];
961                     }
962                 }
963             }
964         }
965     }
966 
ddxt3db_sub(int isgn, double[][][] a, boolean scale)967     private void ddxt3db_sub(int isgn, double[][][] a, boolean scale) {
968         int idx2;
969 
970         if (isgn == -1) {
971             if (columns > 2) {
972                 for (int r = 0; r < rows; r++) {
973                     for (int c = 0; c < columns; c += 4) {
974                         for (int s = 0; s < slices; s++) {
975                             idx2 = slices + s;
976                             t[s] = a[s][r][c];
977                             t[idx2] = a[s][r][c + 1];
978                             t[idx2 + slices] = a[s][r][c + 2];
979                             t[idx2 + 2 * slices] = a[s][r][c + 3];
980                         }
981                         dstSlices.forward(t, 0, scale);
982                         dstSlices.forward(t, slices, scale);
983                         dstSlices.forward(t, 2 * slices, scale);
984                         dstSlices.forward(t, 3 * slices, scale);
985                         for (int s = 0; s < slices; s++) {
986                             idx2 = slices + s;
987                             a[s][r][c] = t[s];
988                             a[s][r][c + 1] = t[idx2];
989                             a[s][r][c + 2] = t[idx2 + slices];
990                             a[s][r][c + 3] = t[idx2 + 2 * slices];
991                         }
992                     }
993                 }
994             } else if (columns == 2) {
995                 for (int r = 0; r < rows; r++) {
996                     for (int s = 0; s < slices; s++) {
997                         t[s] = a[s][r][0];
998                         t[slices + s] = a[s][r][1];
999                     }
1000                     dstSlices.forward(t, 0, scale);
1001                     dstSlices.forward(t, slices, scale);
1002                     for (int s = 0; s < slices; s++) {
1003                         a[s][r][0] = t[s];
1004                         a[s][r][1] = t[slices + s];
1005                     }
1006                 }
1007             }
1008         } else {
1009             if (columns > 2) {
1010                 for (int r = 0; r < rows; r++) {
1011                     for (int c = 0; c < columns; c += 4) {
1012                         for (int s = 0; s < slices; s++) {
1013                             idx2 = slices + s;
1014                             t[s] = a[s][r][c];
1015                             t[idx2] = a[s][r][c + 1];
1016                             t[idx2 + slices] = a[s][r][c + 2];
1017                             t[idx2 + 2 * slices] = a[s][r][c + 3];
1018                         }
1019                         dstSlices.inverse(t, 0, scale);
1020                         dstSlices.inverse(t, slices, scale);
1021                         dstSlices.inverse(t, 2 * slices, scale);
1022                         dstSlices.inverse(t, 3 * slices, scale);
1023 
1024                         for (int s = 0; s < slices; s++) {
1025                             idx2 = slices + s;
1026                             a[s][r][c] = t[s];
1027                             a[s][r][c + 1] = t[idx2];
1028                             a[s][r][c + 2] = t[idx2 + slices];
1029                             a[s][r][c + 3] = t[idx2 + 2 * slices];
1030                         }
1031                     }
1032                 }
1033             } else if (columns == 2) {
1034                 for (int r = 0; r < rows; r++) {
1035                     for (int s = 0; s < slices; s++) {
1036                         t[s] = a[s][r][0];
1037                         t[slices + s] = a[s][r][1];
1038                     }
1039                     dstSlices.inverse(t, 0, scale);
1040                     dstSlices.inverse(t, slices, scale);
1041                     for (int s = 0; s < slices; s++) {
1042                         a[s][r][0] = t[s];
1043                         a[s][r][1] = t[slices + s];
1044                     }
1045                 }
1046             }
1047         }
1048     }
1049 
ddxt3da_subth(final int isgn, final double[] a, final boolean scale)1050     private void ddxt3da_subth(final int isgn, final double[] a, final boolean scale) {
1051         final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1052 
1053         int nt = 4 * rows;
1054         if (columns == 2) {
1055             nt >>= 1;
1056         }
1057         Future<?>[] futures = new Future[nthreads];
1058 
1059         for (int i = 0; i < nthreads; i++) {
1060             final int n0 = i;
1061             final int startt = nt * i;
1062             futures[i] = ConcurrencyUtils.submit(new Runnable() {
1063 
1064                 public void run() {
1065                     int idx0, idx1, idx2;
1066                     if (isgn == -1) {
1067                         for (int s = n0; s < slices; s += nthreads) {
1068                             idx0 = s * sliceStride;
1069                             for (int r = 0; r < rows; r++) {
1070                                 dstColumns.forward(a, idx0 + r * rowStride, scale);
1071                             }
1072                             if (columns > 2) {
1073                                 for (int c = 0; c < columns; c += 4) {
1074                                     for (int r = 0; r < rows; r++) {
1075                                         idx1 = idx0 + r * rowStride + c;
1076                                         idx2 = startt + rows + r;
1077                                         t[startt + r] = a[idx1];
1078                                         t[idx2] = a[idx1 + 1];
1079                                         t[idx2 + rows] = a[idx1 + 2];
1080                                         t[idx2 + 2 * rows] = a[idx1 + 3];
1081                                     }
1082                                     dstRows.forward(t, startt, scale);
1083                                     dstRows.forward(t, startt + rows, scale);
1084                                     dstRows.forward(t, startt + 2 * rows, scale);
1085                                     dstRows.forward(t, startt + 3 * rows, scale);
1086                                     for (int r = 0; r < rows; r++) {
1087                                         idx1 = idx0 + r * rowStride + c;
1088                                         idx2 = startt + rows + r;
1089                                         a[idx1] = t[startt + r];
1090                                         a[idx1 + 1] = t[idx2];
1091                                         a[idx1 + 2] = t[idx2 + rows];
1092                                         a[idx1 + 3] = t[idx2 + 2 * rows];
1093                                     }
1094                                 }
1095                             } else if (columns == 2) {
1096                                 for (int r = 0; r < rows; r++) {
1097                                     idx1 = idx0 + r * rowStride;
1098                                     t[startt + r] = a[idx1];
1099                                     t[startt + rows + r] = a[idx1 + 1];
1100                                 }
1101                                 dstRows.forward(t, startt, scale);
1102                                 dstRows.forward(t, startt + rows, scale);
1103                                 for (int r = 0; r < rows; r++) {
1104                                     idx1 = idx0 + r * rowStride;
1105                                     a[idx1] = t[startt + r];
1106                                     a[idx1 + 1] = t[startt + rows + r];
1107                                 }
1108                             }
1109                         }
1110                     } else {
1111                         for (int s = n0; s < slices; s += nthreads) {
1112                             idx0 = s * sliceStride;
1113                             for (int r = 0; r < rows; r++) {
1114                                 dstColumns.inverse(a, idx0 + r * rowStride, scale);
1115                             }
1116                             if (columns > 2) {
1117                                 for (int c = 0; c < columns; c += 4) {
1118                                     for (int r = 0; r < rows; r++) {
1119                                         idx1 = idx0 + r * rowStride + c;
1120                                         idx2 = startt + rows + r;
1121                                         t[startt + r] = a[idx1];
1122                                         t[idx2] = a[idx1 + 1];
1123                                         t[idx2 + rows] = a[idx1 + 2];
1124                                         t[idx2 + 2 * rows] = a[idx1 + 3];
1125                                     }
1126                                     dstRows.inverse(t, startt, scale);
1127                                     dstRows.inverse(t, startt + rows, scale);
1128                                     dstRows.inverse(t, startt + 2 * rows, scale);
1129                                     dstRows.inverse(t, startt + 3 * rows, scale);
1130                                     for (int r = 0; r < rows; r++) {
1131                                         idx1 = idx0 + r * rowStride + c;
1132                                         idx2 = startt + rows + r;
1133                                         a[idx1] = t[startt + r];
1134                                         a[idx1 + 1] = t[idx2];
1135                                         a[idx1 + 2] = t[idx2 + rows];
1136                                         a[idx1 + 3] = t[idx2 + 2 * rows];
1137                                     }
1138                                 }
1139                             } else if (columns == 2) {
1140                                 for (int r = 0; r < rows; r++) {
1141                                     idx1 = idx0 + r * rowStride;
1142                                     t[startt + r] = a[idx1];
1143                                     t[startt + rows + r] = a[idx1 + 1];
1144                                 }
1145                                 dstRows.inverse(t, startt, scale);
1146                                 dstRows.inverse(t, startt + rows, scale);
1147                                 for (int r = 0; r < rows; r++) {
1148                                     idx1 = idx0 + r * rowStride;
1149                                     a[idx1] = t[startt + r];
1150                                     a[idx1 + 1] = t[startt + rows + r];
1151                                 }
1152                             }
1153                         }
1154                     }
1155                 }
1156             });
1157         }
1158         ConcurrencyUtils.waitForCompletion(futures);
1159     }
1160 
ddxt3da_subth(final int isgn, final double[][][] a, final boolean scale)1161     private void ddxt3da_subth(final int isgn, final double[][][] a, final boolean scale) {
1162         final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1163         int nt = 4 * rows;
1164         if (columns == 2) {
1165             nt >>= 1;
1166         }
1167         Future<?>[] futures = new Future[nthreads];
1168 
1169         for (int i = 0; i < nthreads; i++) {
1170             final int n0 = i;
1171             final int startt = nt * i;
1172             futures[i] = ConcurrencyUtils.submit(new Runnable() {
1173 
1174                 public void run() {
1175                     int idx2;
1176                     if (isgn == -1) {
1177                         for (int s = n0; s < slices; s += nthreads) {
1178                             for (int r = 0; r < rows; r++) {
1179                                 dstColumns.forward(a[s][r], scale);
1180                             }
1181                             if (columns > 2) {
1182                                 for (int c = 0; c < columns; c += 4) {
1183                                     for (int r = 0; r < rows; r++) {
1184                                         idx2 = startt + rows + r;
1185                                         t[startt + r] = a[s][r][c];
1186                                         t[idx2] = a[s][r][c + 1];
1187                                         t[idx2 + rows] = a[s][r][c + 2];
1188                                         t[idx2 + 2 * rows] = a[s][r][c + 3];
1189                                     }
1190                                     dstRows.forward(t, startt, scale);
1191                                     dstRows.forward(t, startt + rows, scale);
1192                                     dstRows.forward(t, startt + 2 * rows, scale);
1193                                     dstRows.forward(t, startt + 3 * rows, scale);
1194                                     for (int r = 0; r < rows; r++) {
1195                                         idx2 = startt + rows + r;
1196                                         a[s][r][c] = t[startt + r];
1197                                         a[s][r][c + 1] = t[idx2];
1198                                         a[s][r][c + 2] = t[idx2 + rows];
1199                                         a[s][r][c + 3] = t[idx2 + 2 * rows];
1200                                     }
1201                                 }
1202                             } else if (columns == 2) {
1203                                 for (int r = 0; r < rows; r++) {
1204                                     t[startt + r] = a[s][r][0];
1205                                     t[startt + rows + r] = a[s][r][1];
1206                                 }
1207                                 dstRows.forward(t, startt, scale);
1208                                 dstRows.forward(t, startt + rows, scale);
1209                                 for (int r = 0; r < rows; r++) {
1210                                     a[s][r][0] = t[startt + r];
1211                                     a[s][r][1] = t[startt + rows + r];
1212                                 }
1213                             }
1214                         }
1215                     } else {
1216                         for (int s = n0; s < slices; s += nthreads) {
1217                             for (int r = 0; r < rows; r++) {
1218                                 dstColumns.inverse(a[s][r], scale);
1219                             }
1220                             if (columns > 2) {
1221                                 for (int c = 0; c < columns; c += 4) {
1222                                     for (int r = 0; r < rows; r++) {
1223                                         idx2 = startt + rows + r;
1224                                         t[startt + r] = a[s][r][c];
1225                                         t[idx2] = a[s][r][c + 1];
1226                                         t[idx2 + rows] = a[s][r][c + 2];
1227                                         t[idx2 + 2 * rows] = a[s][r][c + 3];
1228                                     }
1229                                     dstRows.inverse(t, startt, scale);
1230                                     dstRows.inverse(t, startt + rows, scale);
1231                                     dstRows.inverse(t, startt + 2 * rows, scale);
1232                                     dstRows.inverse(t, startt + 3 * rows, scale);
1233                                     for (int r = 0; r < rows; r++) {
1234                                         idx2 = startt + rows + r;
1235                                         a[s][r][c] = t[startt + r];
1236                                         a[s][r][c + 1] = t[idx2];
1237                                         a[s][r][c + 2] = t[idx2 + rows];
1238                                         a[s][r][c + 3] = t[idx2 + 2 * rows];
1239                                     }
1240                                 }
1241                             } else if (columns == 2) {
1242                                 for (int r = 0; r < rows; r++) {
1243                                     t[startt + r] = a[s][r][0];
1244                                     t[startt + rows + r] = a[s][r][1];
1245                                 }
1246                                 dstRows.inverse(t, startt, scale);
1247                                 dstRows.inverse(t, startt + rows, scale);
1248                                 for (int r = 0; r < rows; r++) {
1249                                     a[s][r][0] = t[startt + r];
1250                                     a[s][r][1] = t[startt + rows + r];
1251                                 }
1252                             }
1253                         }
1254                     }
1255                 }
1256             });
1257         }
1258         ConcurrencyUtils.waitForCompletion(futures);
1259     }
1260 
ddxt3db_subth(final int isgn, final double[] a, final boolean scale)1261     private void ddxt3db_subth(final int isgn, final double[] a, final boolean scale) {
1262         final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1263         int nt = 4 * slices;
1264         if (columns == 2) {
1265             nt >>= 1;
1266         }
1267         Future<?>[] futures = new Future[nthreads];
1268 
1269         for (int i = 0; i < nthreads; i++) {
1270             final int n0 = i;
1271             final int startt = nt * i;
1272             futures[i] = ConcurrencyUtils.submit(new Runnable() {
1273 
1274                 public void run() {
1275                     int idx0, idx1, idx2;
1276                     if (isgn == -1) {
1277                         if (columns > 2) {
1278                             for (int r = n0; r < rows; r += nthreads) {
1279                                 idx0 = r * rowStride;
1280                                 for (int c = 0; c < columns; c += 4) {
1281                                     for (int s = 0; s < slices; s++) {
1282                                         idx1 = s * sliceStride + idx0 + c;
1283                                         idx2 = startt + slices + s;
1284                                         t[startt + s] = a[idx1];
1285                                         t[idx2] = a[idx1 + 1];
1286                                         t[idx2 + slices] = a[idx1 + 2];
1287                                         t[idx2 + 2 * slices] = a[idx1 + 3];
1288                                     }
1289                                     dstSlices.forward(t, startt, scale);
1290                                     dstSlices.forward(t, startt + slices, scale);
1291                                     dstSlices.forward(t, startt + 2 * slices, scale);
1292                                     dstSlices.forward(t, startt + 3 * slices, scale);
1293                                     for (int s = 0; s < slices; s++) {
1294                                         idx1 = s * sliceStride + idx0 + c;
1295                                         idx2 = startt + slices + s;
1296                                         a[idx1] = t[startt + s];
1297                                         a[idx1 + 1] = t[idx2];
1298                                         a[idx1 + 2] = t[idx2 + slices];
1299                                         a[idx1 + 3] = t[idx2 + 2 * slices];
1300                                     }
1301                                 }
1302                             }
1303                         } else if (columns == 2) {
1304                             for (int r = n0; r < rows; r += nthreads) {
1305                                 idx0 = r * rowStride;
1306                                 for (int s = 0; s < slices; s++) {
1307                                     idx1 = s * sliceStride + idx0;
1308                                     t[startt + s] = a[idx1];
1309                                     t[startt + slices + s] = a[idx1 + 1];
1310                                 }
1311                                 dstSlices.forward(t, startt, scale);
1312                                 dstSlices.forward(t, startt + slices, scale);
1313                                 for (int s = 0; s < slices; s++) {
1314                                     idx1 = s * sliceStride + idx0;
1315                                     a[idx1] = t[startt + s];
1316                                     a[idx1 + 1] = t[startt + slices + s];
1317                                 }
1318                             }
1319                         }
1320                     } else {
1321                         if (columns > 2) {
1322                             for (int r = n0; r < rows; r += nthreads) {
1323                                 idx0 = r * rowStride;
1324                                 for (int c = 0; c < columns; c += 4) {
1325                                     for (int s = 0; s < slices; s++) {
1326                                         idx1 = s * sliceStride + idx0 + c;
1327                                         idx2 = startt + slices + s;
1328                                         t[startt + s] = a[idx1];
1329                                         t[idx2] = a[idx1 + 1];
1330                                         t[idx2 + slices] = a[idx1 + 2];
1331                                         t[idx2 + 2 * slices] = a[idx1 + 3];
1332                                     }
1333                                     dstSlices.inverse(t, startt, scale);
1334                                     dstSlices.inverse(t, startt + slices, scale);
1335                                     dstSlices.inverse(t, startt + 2 * slices, scale);
1336                                     dstSlices.inverse(t, startt + 3 * slices, scale);
1337                                     for (int s = 0; s < slices; s++) {
1338                                         idx1 = s * sliceStride + idx0 + c;
1339                                         idx2 = startt + slices + s;
1340                                         a[idx1] = t[startt + s];
1341                                         a[idx1 + 1] = t[idx2];
1342                                         a[idx1 + 2] = t[idx2 + slices];
1343                                         a[idx1 + 3] = t[idx2 + 2 * slices];
1344                                     }
1345                                 }
1346                             }
1347                         } else if (columns == 2) {
1348                             for (int r = n0; r < rows; r += nthreads) {
1349                                 idx0 = r * rowStride;
1350                                 for (int s = 0; s < slices; s++) {
1351                                     idx1 = s * sliceStride + idx0;
1352                                     t[startt + s] = a[idx1];
1353                                     t[startt + slices + s] = a[idx1 + 1];
1354                                 }
1355                                 dstSlices.inverse(t, startt, scale);
1356                                 dstSlices.inverse(t, startt + slices, scale);
1357 
1358                                 for (int s = 0; s < slices; s++) {
1359                                     idx1 = s * sliceStride + idx0;
1360                                     a[idx1] = t[startt + s];
1361                                     a[idx1 + 1] = t[startt + slices + s];
1362                                 }
1363                             }
1364                         }
1365                     }
1366                 }
1367             });
1368         }
1369         ConcurrencyUtils.waitForCompletion(futures);
1370     }
1371 
ddxt3db_subth(final int isgn, final double[][][] a, final boolean scale)1372     private void ddxt3db_subth(final int isgn, final double[][][] a, final boolean scale) {
1373         final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1374         int nt = 4 * slices;
1375         if (columns == 2) {
1376             nt >>= 1;
1377         }
1378         Future<?>[] futures = new Future[nthreads];
1379 
1380         for (int i = 0; i < nthreads; i++) {
1381             final int n0 = i;
1382             final int startt = nt * i;
1383             futures[i] = ConcurrencyUtils.submit(new Runnable() {
1384 
1385                 public void run() {
1386                     int idx2;
1387                     if (isgn == -1) {
1388                         if (columns > 2) {
1389                             for (int r = n0; r < rows; r += nthreads) {
1390                                 for (int c = 0; c < columns; c += 4) {
1391                                     for (int s = 0; s < slices; s++) {
1392                                         idx2 = startt + slices + s;
1393                                         t[startt + s] = a[s][r][c];
1394                                         t[idx2] = a[s][r][c + 1];
1395                                         t[idx2 + slices] = a[s][r][c + 2];
1396                                         t[idx2 + 2 * slices] = a[s][r][c + 3];
1397                                     }
1398                                     dstSlices.forward(t, startt, scale);
1399                                     dstSlices.forward(t, startt + slices, scale);
1400                                     dstSlices.forward(t, startt + 2 * slices, scale);
1401                                     dstSlices.forward(t, startt + 3 * slices, scale);
1402                                     for (int s = 0; s < slices; s++) {
1403                                         idx2 = startt + slices + s;
1404                                         a[s][r][c] = t[startt + s];
1405                                         a[s][r][c + 1] = t[idx2];
1406                                         a[s][r][c + 2] = t[idx2 + slices];
1407                                         a[s][r][c + 3] = t[idx2 + 2 * slices];
1408                                     }
1409                                 }
1410                             }
1411                         } else if (columns == 2) {
1412                             for (int r = n0; r < rows; r += nthreads) {
1413                                 for (int s = 0; s < slices; s++) {
1414                                     t[startt + s] = a[s][r][0];
1415                                     t[startt + slices + s] = a[s][r][1];
1416                                 }
1417                                 dstSlices.forward(t, startt, scale);
1418                                 dstSlices.forward(t, startt + slices, scale);
1419                                 for (int s = 0; s < slices; s++) {
1420                                     a[s][r][0] = t[startt + s];
1421                                     a[s][r][1] = t[startt + slices + s];
1422                                 }
1423                             }
1424                         }
1425                     } else {
1426                         if (columns > 2) {
1427                             for (int r = n0; r < rows; r += nthreads) {
1428                                 for (int c = 0; c < columns; c += 4) {
1429                                     for (int s = 0; s < slices; s++) {
1430                                         idx2 = startt + slices + s;
1431                                         t[startt + s] = a[s][r][c];
1432                                         t[idx2] = a[s][r][c + 1];
1433                                         t[idx2 + slices] = a[s][r][c + 2];
1434                                         t[idx2 + 2 * slices] = a[s][r][c + 3];
1435                                     }
1436                                     dstSlices.inverse(t, startt, scale);
1437                                     dstSlices.inverse(t, startt + slices, scale);
1438                                     dstSlices.inverse(t, startt + 2 * slices, scale);
1439                                     dstSlices.inverse(t, startt + 3 * slices, scale);
1440                                     for (int s = 0; s < slices; s++) {
1441                                         idx2 = startt + slices + s;
1442                                         a[s][r][c] = t[startt + s];
1443                                         a[s][r][c + 1] = t[idx2];
1444                                         a[s][r][c + 2] = t[idx2 + slices];
1445                                         a[s][r][c + 3] = t[idx2 + 2 * slices];
1446                                     }
1447                                 }
1448                             }
1449                         } else if (columns == 2) {
1450                             for (int r = n0; r < rows; r += nthreads) {
1451                                 for (int s = 0; s < slices; s++) {
1452                                     t[startt + s] = a[s][r][0];
1453                                     t[startt + slices + s] = a[s][r][1];
1454                                 }
1455                                 dstSlices.inverse(t, startt, scale);
1456                                 dstSlices.inverse(t, startt + slices, scale);
1457 
1458                                 for (int s = 0; s < slices; s++) {
1459                                     a[s][r][0] = t[startt + s];
1460                                     a[s][r][1] = t[startt + slices + s];
1461                                 }
1462                             }
1463                         }
1464                     }
1465                 }
1466             });
1467         }
1468         ConcurrencyUtils.waitForCompletion(futures);
1469     }
1470 }
1471