1 /* ***** BEGIN LICENSE BLOCK *****
2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3  *
4  * The contents of this file are subject to the Mozilla Public License Version
5  * 1.1 (the "License"); you may not use this file except in compliance with
6  * the License. You may obtain a copy of the License at
7  * http://www.mozilla.org/MPL/
8  *
9  * Software distributed under the License is distributed on an "AS IS" basis,
10  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11  * for the specific language governing rights and limitations under the
12  * License.
13  *
14  * The Original Code is JTransforms.
15  *
16  * The Initial Developer of the Original Code is
17  * Piotr Wendykier, Emory University.
18  * Portions created by the Initial Developer are Copyright (C) 2007-2009
19  * the Initial Developer. All Rights Reserved.
20  *
21  * Alternatively, the contents of this file may be used under the terms of
22  * either the GNU General Public License Version 2 or later (the "GPL"), or
23  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
24  * in which case the provisions of the GPL or the LGPL are applicable instead
25  * of those above. If you wish to allow use of your version of this file only
26  * under the terms of either the GPL or the LGPL, and not to allow others to
27  * use your version of this file under the terms of the MPL, indicate your
28  * decision by deleting the provisions above and replace them with the notice
29  * and other provisions required by the GPL or the LGPL. If you do not delete
30  * the provisions above, a recipient may use your version of this file under
31  * the terms of any one of the MPL, the GPL or the LGPL.
32  *
33  * ***** END LICENSE BLOCK ***** */
34 
35 package edu.emory.mathcs.jtransforms.dht;
36 
37 import java.util.concurrent.Future;
38 
39 import edu.emory.mathcs.utils.ConcurrencyUtils;
40 
41 /**
42  * Computes 3D Discrete Hartley Transform (DHT) of real, double precision data.
43  * The sizes of all three dimensions can be arbitrary numbers. This is a
44  * parallel implementation optimized for SMP systems.<br>
45  * <br>
46  * Part of code is derived from General Purpose FFT Package written by Takuya Ooura
47  * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
48  *
49  * @author Piotr Wendykier (piotr.wendykier@gmail.com)
50  *
51  */
52 public class DoubleDHT_3D {
53 
54     private int slices;
55 
56     private int rows;
57 
58     private int columns;
59 
60     private int sliceStride;
61 
62     private int rowStride;
63 
64     private double[] t;
65 
66     private DoubleDHT_1D dhtSlices, dhtRows, dhtColumns;
67 
68     private int oldNthreads;
69 
70     private int nt;
71 
72     private boolean isPowerOfTwo = false;
73 
74     private boolean useThreads = false;
75 
76     /**
77      * Creates new instance of DoubleDHT_3D.
78      *
79      * @param slices
80      *            number of slices
81      * @param rows
82      *            number of rows
83      * @param columns
84      *            number of columns
85      */
DoubleDHT_3D(int slices, int rows, int columns)86     public DoubleDHT_3D(int slices, int rows, int columns) {
87         if (slices <= 1 || rows <= 1 || columns <= 1) {
88             throw new IllegalArgumentException("slices, rows and columns must be greater than 1");
89         }
90         this.slices = slices;
91         this.rows = rows;
92         this.columns = columns;
93         this.sliceStride = rows * columns;
94         this.rowStride = columns;
95         if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) {
96             this.useThreads = true;
97         }
98         if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
99             isPowerOfTwo = true;
100             oldNthreads = ConcurrencyUtils.getNumberOfThreads();
101             nt = slices;
102             if (nt < rows) {
103                 nt = rows;
104             }
105             nt *= 4;
106             if (oldNthreads > 1) {
107                 nt *= oldNthreads;
108             }
109             if (columns == 2) {
110                 nt >>= 1;
111             }
112             t = new double[nt];
113         }
114         dhtSlices = new DoubleDHT_1D(slices);
115         if (slices == rows) {
116             dhtRows = dhtSlices;
117         } else {
118             dhtRows = new DoubleDHT_1D(rows);
119         }
120         if (slices == columns) {
121             dhtColumns = dhtSlices;
122         } else if (rows == columns) {
123             dhtColumns = dhtRows;
124         } else {
125             dhtColumns = new DoubleDHT_1D(columns);
126         }
127     }
128 
129     /**
130      * Computes the 3D real, forward DHT leaving the result in <code>a</code>.
131      * The data is stored in 1D array addressed in slice-major, then row-major,
132      * then column-major, in order of significance, i.e. the element (i,j,k) of
133      * 3D array x[slices][rows][columns] is stored in a[i*sliceStride +
134      * j*rowStride + k], where sliceStride = rows * columns and rowStride =
135      * columns.
136      *
137      * @param a
138      *            data to transform
139      */
forward(final double[] a)140     public void forward(final double[] a) {
141         int nthreads = ConcurrencyUtils.getNumberOfThreads();
142         if (isPowerOfTwo) {
143             if (nthreads != oldNthreads) {
144                 nt = slices;
145                 if (nt < rows) {
146                     nt = rows;
147                 }
148                 nt *= 4;
149                 if (nthreads > 1) {
150                     nt *= nthreads;
151                 }
152                 if (columns == 2) {
153                     nt >>= 1;
154                 }
155                 t = new double[nt];
156                 oldNthreads = nthreads;
157             }
158             if ((nthreads > 1) && useThreads) {
159                 ddxt3da_subth(-1, a, true);
160                 ddxt3db_subth(-1, a, true);
161             } else {
162                 ddxt3da_sub(-1, a, true);
163                 ddxt3db_sub(-1, a, true);
164             }
165             yTransform(a);
166         } else {
167             if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
168                 Future<?>[] futures = new Future[nthreads];
169                 int p = slices / nthreads;
170                 for (int l = 0; l < nthreads; l++) {
171                     final int firstSlice = l * p;
172                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
173                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
174                         public void run() {
175                             for (int s = firstSlice; s < lastSlice; s++) {
176                                 int idx1 = s * sliceStride;
177                                 for (int r = 0; r < rows; r++) {
178                                     dhtColumns.forward(a, idx1 + r * rowStride);
179                                 }
180                             }
181                         }
182                     });
183                 }
184                 ConcurrencyUtils.waitForCompletion(futures);
185 
186                 for (int l = 0; l < nthreads; l++) {
187                     final int firstSlice = l * p;
188                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
189                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
190                         public void run() {
191                             double[] temp = new double[rows];
192                             for (int s = firstSlice; s < lastSlice; s++) {
193                                 int idx1 = s * sliceStride;
194                                 for (int c = 0; c < columns; c++) {
195                                     for (int r = 0; r < rows; r++) {
196                                         int idx3 = idx1 + r * rowStride + c;
197                                         temp[r] = a[idx3];
198                                     }
199                                     dhtRows.forward(temp);
200                                     for (int r = 0; r < rows; r++) {
201                                         int idx3 = idx1 + r * rowStride + c;
202                                         a[idx3] = temp[r];
203                                     }
204                                 }
205                             }
206                         }
207                     });
208                 }
209                 ConcurrencyUtils.waitForCompletion(futures);
210 
211                 p = rows / nthreads;
212                 for (int l = 0; l < nthreads; l++) {
213                     final int startRow = l * p;
214                     final int stopRow;
215                     if (l == nthreads - 1) {
216                         stopRow = rows;
217                     } else {
218                         stopRow = startRow + p;
219                     }
220                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
221                         public void run() {
222                             double[] temp = new double[slices];
223                             for (int r = startRow; r < stopRow; r++) {
224                                 int idx1 = r * rowStride;
225                                 for (int c = 0; c < columns; c++) {
226                                     for (int s = 0; s < slices; s++) {
227                                         int idx3 = s * sliceStride + idx1 + c;
228                                         temp[s] = a[idx3];
229                                     }
230                                     dhtSlices.forward(temp);
231                                     for (int s = 0; s < slices; s++) {
232                                         int idx3 = s * sliceStride + idx1 + c;
233                                         a[idx3] = temp[s];
234                                     }
235                                 }
236                             }
237                         }
238                     });
239                 }
240                 ConcurrencyUtils.waitForCompletion(futures);
241 
242             } else {
243                 for (int s = 0; s < slices; s++) {
244                     int idx1 = s * sliceStride;
245                     for (int r = 0; r < rows; r++) {
246                         dhtColumns.forward(a, idx1 + r * rowStride);
247                     }
248                 }
249                 double[] temp = new double[rows];
250                 for (int s = 0; s < slices; s++) {
251                     int idx1 = s * sliceStride;
252                     for (int c = 0; c < columns; c++) {
253                         for (int r = 0; r < rows; r++) {
254                             int idx3 = idx1 + r * rowStride + c;
255                             temp[r] = a[idx3];
256                         }
257                         dhtRows.forward(temp);
258                         for (int r = 0; r < rows; r++) {
259                             int idx3 = idx1 + r * rowStride + c;
260                             a[idx3] = temp[r];
261                         }
262                     }
263                 }
264                 temp = new double[slices];
265                 for (int r = 0; r < rows; r++) {
266                     int idx1 = r * rowStride;
267                     for (int c = 0; c < columns; c++) {
268                         for (int s = 0; s < slices; s++) {
269                             int idx3 = s * sliceStride + idx1 + c;
270                             temp[s] = a[idx3];
271                         }
272                         dhtSlices.forward(temp);
273                         for (int s = 0; s < slices; s++) {
274                             int idx3 = s * sliceStride + idx1 + c;
275                             a[idx3] = temp[s];
276                         }
277                     }
278                 }
279             }
280             yTransform(a);
281         }
282     }
283 
284     /**
285      * Computes the 3D real, forward DHT leaving the result in <code>a</code>.
286      * The data is stored in 3D array.
287      *
288      * @param a
289      *            data to transform
290      */
forward(final double[][][] a)291     public void forward(final double[][][] a) {
292         int nthreads = ConcurrencyUtils.getNumberOfThreads();
293         if (isPowerOfTwo) {
294             if (nthreads != oldNthreads) {
295                 nt = slices;
296                 if (nt < rows) {
297                     nt = rows;
298                 }
299                 nt *= 4;
300                 if (nthreads > 1) {
301                     nt *= nthreads;
302                 }
303                 if (columns == 2) {
304                     nt >>= 1;
305                 }
306                 t = new double[nt];
307                 oldNthreads = nthreads;
308             }
309             if ((nthreads > 1) && useThreads) {
310                 ddxt3da_subth(-1, a, true);
311                 ddxt3db_subth(-1, a, true);
312             } else {
313                 ddxt3da_sub(-1, a, true);
314                 ddxt3db_sub(-1, a, true);
315             }
316             yTransform(a);
317         } else {
318             if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
319                 Future<?>[] futures = new Future[nthreads];
320                 int p = slices / nthreads;
321                 for (int l = 0; l < nthreads; l++) {
322                     final int firstSlice = l * p;
323                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
324                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
325                         public void run() {
326                             for (int s = firstSlice; s < lastSlice; s++) {
327                                 for (int r = 0; r < rows; r++) {
328                                     dhtColumns.forward(a[s][r]);
329                                 }
330                             }
331                         }
332                     });
333                 }
334                 ConcurrencyUtils.waitForCompletion(futures);
335 
336                 for (int l = 0; l < nthreads; l++) {
337                     final int firstSlice = l * p;
338                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
339                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
340                         public void run() {
341                             double[] temp = new double[rows];
342                             for (int s = firstSlice; s < lastSlice; s++) {
343                                 for (int c = 0; c < columns; c++) {
344                                     for (int r = 0; r < rows; r++) {
345                                         temp[r] = a[s][r][c];
346                                     }
347                                     dhtRows.forward(temp);
348                                     for (int r = 0; r < rows; r++) {
349                                         a[s][r][c] = temp[r];
350                                     }
351                                 }
352                             }
353                         }
354                     });
355                 }
356                 ConcurrencyUtils.waitForCompletion(futures);
357 
358                 p = rows / nthreads;
359                 for (int l = 0; l < nthreads; l++) {
360                     final int firstRow = l * p;
361                     final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
362                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
363                         public void run() {
364                             double[] temp = new double[slices];
365                             for (int r = firstRow; r < lastRow; r++) {
366                                 for (int c = 0; c < columns; c++) {
367                                     for (int s = 0; s < slices; s++) {
368                                         temp[s] = a[s][r][c];
369                                     }
370                                     dhtSlices.forward(temp);
371                                     for (int s = 0; s < slices; s++) {
372                                         a[s][r][c] = temp[s];
373                                     }
374                                 }
375                             }
376                         }
377                     });
378                 }
379                 ConcurrencyUtils.waitForCompletion(futures);
380 
381             } else {
382                 for (int s = 0; s < slices; s++) {
383                     for (int r = 0; r < rows; r++) {
384                         dhtColumns.forward(a[s][r]);
385                     }
386                 }
387                 double[] temp = new double[rows];
388                 for (int s = 0; s < slices; s++) {
389                     for (int c = 0; c < columns; c++) {
390                         for (int r = 0; r < rows; r++) {
391                             temp[r] = a[s][r][c];
392                         }
393                         dhtRows.forward(temp);
394                         for (int r = 0; r < rows; r++) {
395                             a[s][r][c] = temp[r];
396                         }
397                     }
398                 }
399                 temp = new double[slices];
400                 for (int r = 0; r < rows; r++) {
401                     for (int c = 0; c < columns; c++) {
402                         for (int s = 0; s < slices; s++) {
403                             temp[s] = a[s][r][c];
404                         }
405                         dhtSlices.forward(temp);
406                         for (int s = 0; s < slices; s++) {
407                             a[s][r][c] = temp[s];
408                         }
409                     }
410                 }
411             }
412             yTransform(a);
413         }
414     }
415 
416     /**
417      * Computes the 3D real, inverse DHT leaving the result in <code>a</code>.
418      * The data is stored in 1D array addressed in slice-major, then row-major,
419      * then column-major, in order of significance, i.e. the element (i,j,k) of
420      * 3D array x[slices][rows][columns] is stored in a[i*sliceStride +
421      * j*rowStride + k], where sliceStride = rows * columns and rowStride =
422      * columns.
423      *
424      * @param a
425      *            data to transform
426      * @param scale
427      *            if true then scaling is performed
428      */
inverse(final double[] a, final boolean scale)429     public void inverse(final double[] a, final boolean scale) {
430         int nthreads = ConcurrencyUtils.getNumberOfThreads();
431         if (isPowerOfTwo) {
432             if (nthreads != oldNthreads) {
433                 nt = slices;
434                 if (nt < rows) {
435                     nt = rows;
436                 }
437                 nt *= 4;
438                 if (nthreads > 1) {
439                     nt *= nthreads;
440                 }
441                 if (columns == 2) {
442                     nt >>= 1;
443                 }
444                 t = new double[nt];
445                 oldNthreads = nthreads;
446             }
447             if ((nthreads > 1) && useThreads) {
448                 ddxt3da_subth(1, a, scale);
449                 ddxt3db_subth(1, a, scale);
450             } else {
451                 ddxt3da_sub(1, a, scale);
452                 ddxt3db_sub(1, a, scale);
453             }
454             yTransform(a);
455         } else {
456             if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
457                 Future<?>[] futures = new Future[nthreads];
458                 int p = slices / nthreads;
459                 for (int l = 0; l < nthreads; l++) {
460                     final int firstSlice = l * p;
461                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
462                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
463                         public void run() {
464                             for (int s = firstSlice; s < lastSlice; s++) {
465                                 int idx1 = s * sliceStride;
466                                 for (int r = 0; r < rows; r++) {
467                                     dhtColumns.inverse(a, idx1 + r * rowStride, scale);
468                                 }
469                             }
470                         }
471                     });
472                 }
473                 ConcurrencyUtils.waitForCompletion(futures);
474 
475                 for (int l = 0; l < nthreads; l++) {
476                     final int firstSlice = l * p;
477                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
478                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
479                         public void run() {
480                             double[] temp = new double[rows];
481                             for (int s = firstSlice; s < lastSlice; s++) {
482                                 int idx1 = s * sliceStride;
483                                 for (int c = 0; c < columns; c++) {
484                                     for (int r = 0; r < rows; r++) {
485                                         int idx3 = idx1 + r * rowStride + c;
486                                         temp[r] = a[idx3];
487                                     }
488                                     dhtRows.inverse(temp, scale);
489                                     for (int r = 0; r < rows; r++) {
490                                         int idx3 = idx1 + r * rowStride + c;
491                                         a[idx3] = temp[r];
492                                     }
493                                 }
494                             }
495                         }
496                     });
497                 }
498                 ConcurrencyUtils.waitForCompletion(futures);
499 
500                 p = rows / nthreads;
501                 for (int l = 0; l < nthreads; l++) {
502                     final int firstRow = l * p;
503                     final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
504                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
505                         public void run() {
506                             double[] temp = new double[slices];
507                             for (int r = firstRow; r < lastRow; r++) {
508                                 int idx1 = r * rowStride;
509                                 for (int c = 0; c < columns; c++) {
510                                     for (int s = 0; s < slices; s++) {
511                                         int idx3 = s * sliceStride + idx1 + c;
512                                         temp[s] = a[idx3];
513                                     }
514                                     dhtSlices.inverse(temp, scale);
515                                     for (int s = 0; s < slices; s++) {
516                                         int idx3 = s * sliceStride + idx1 + c;
517                                         a[idx3] = temp[s];
518                                     }
519                                 }
520                             }
521                         }
522                     });
523                 }
524                 ConcurrencyUtils.waitForCompletion(futures);
525 
526             } else {
527                 for (int s = 0; s < slices; s++) {
528                     int idx1 = s * sliceStride;
529                     for (int r = 0; r < rows; r++) {
530                         dhtColumns.inverse(a, idx1 + r * rowStride, scale);
531                     }
532                 }
533                 double[] temp = new double[rows];
534                 for (int s = 0; s < slices; s++) {
535                     int idx1 = s * sliceStride;
536                     for (int c = 0; c < columns; c++) {
537                         for (int r = 0; r < rows; r++) {
538                             int idx3 = idx1 + r * rowStride + c;
539                             temp[r] = a[idx3];
540                         }
541                         dhtRows.inverse(temp, scale);
542                         for (int r = 0; r < rows; r++) {
543                             int idx3 = idx1 + r * rowStride + c;
544                             a[idx3] = temp[r];
545                         }
546                     }
547                 }
548                 temp = new double[slices];
549                 for (int r = 0; r < rows; r++) {
550                     int idx1 = r * rowStride;
551                     for (int c = 0; c < columns; c++) {
552                         for (int s = 0; s < slices; s++) {
553                             int idx3 = s * sliceStride + idx1 + c;
554                             temp[s] = a[idx3];
555                         }
556                         dhtSlices.inverse(temp, scale);
557                         for (int s = 0; s < slices; s++) {
558                             int idx3 = s * sliceStride + idx1 + c;
559                             a[idx3] = temp[s];
560                         }
561                     }
562                 }
563             }
564             yTransform(a);
565         }
566     }
567 
568     /**
569      * Computes the 3D real, inverse DHT leaving the result in <code>a</code>.
570      * The data is stored in 3D array.
571      *
572      * @param a
573      *            data to transform
574      * @param scale
575      *            if true then scaling is performed
576      */
inverse(final double[][][] a, final boolean scale)577     public void inverse(final double[][][] a, final boolean scale) {
578         int nthreads = ConcurrencyUtils.getNumberOfThreads();
579         if (isPowerOfTwo) {
580             if (nthreads != oldNthreads) {
581                 nt = slices;
582                 if (nt < rows) {
583                     nt = rows;
584                 }
585                 nt *= 4;
586                 if (nthreads > 1) {
587                     nt *= nthreads;
588                 }
589                 if (columns == 2) {
590                     nt >>= 1;
591                 }
592                 t = new double[nt];
593                 oldNthreads = nthreads;
594             }
595             if ((nthreads > 1) && useThreads) {
596                 ddxt3da_subth(1, a, scale);
597                 ddxt3db_subth(1, a, scale);
598             } else {
599                 ddxt3da_sub(1, a, scale);
600                 ddxt3db_sub(1, a, scale);
601             }
602             yTransform(a);
603         } else {
604             if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
605                 Future<?>[] futures = new Future[nthreads];
606                 int p = slices / nthreads;
607                 for (int l = 0; l < nthreads; l++) {
608                     final int firstSlice = l * p;
609                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
610                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
611                         public void run() {
612                             for (int s = firstSlice; s < lastSlice; s++) {
613                                 for (int r = 0; r < rows; r++) {
614                                     dhtColumns.inverse(a[s][r], scale);
615                                 }
616                             }
617                         }
618                     });
619                 }
620                 ConcurrencyUtils.waitForCompletion(futures);
621 
622                 for (int l = 0; l < nthreads; l++) {
623                     final int firstSlice = l * p;
624                     final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
625                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
626                         public void run() {
627                             double[] temp = new double[rows];
628                             for (int s = firstSlice; s < lastSlice; s++) {
629                                 for (int c = 0; c < columns; c++) {
630                                     for (int r = 0; r < rows; r++) {
631                                         temp[r] = a[s][r][c];
632                                     }
633                                     dhtRows.inverse(temp, scale);
634                                     for (int r = 0; r < rows; r++) {
635                                         a[s][r][c] = temp[r];
636                                     }
637                                 }
638                             }
639                         }
640                     });
641                 }
642                 ConcurrencyUtils.waitForCompletion(futures);
643 
644                 p = rows / nthreads;
645                 for (int l = 0; l < nthreads; l++) {
646                     final int firstRow = l * p;
647                     final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
648                     futures[l] = ConcurrencyUtils.submit(new Runnable() {
649                         public void run() {
650                             double[] temp = new double[slices];
651                             for (int r = firstRow; r < lastRow; r++) {
652                                 for (int c = 0; c < columns; c++) {
653                                     for (int s = 0; s < slices; s++) {
654                                         temp[s] = a[s][r][c];
655                                     }
656                                     dhtSlices.inverse(temp, scale);
657                                     for (int s = 0; s < slices; s++) {
658                                         a[s][r][c] = temp[s];
659                                     }
660                                 }
661                             }
662                         }
663                     });
664                 }
665                 ConcurrencyUtils.waitForCompletion(futures);
666 
667             } else {
668                 for (int s = 0; s < slices; s++) {
669                     for (int r = 0; r < rows; r++) {
670                         dhtColumns.inverse(a[s][r], scale);
671                     }
672                 }
673                 double[] temp = new double[rows];
674                 for (int s = 0; s < slices; s++) {
675                     for (int c = 0; c < columns; c++) {
676                         for (int r = 0; r < rows; r++) {
677                             temp[r] = a[s][r][c];
678                         }
679                         dhtRows.inverse(temp, scale);
680                         for (int r = 0; r < rows; r++) {
681                             a[s][r][c] = temp[r];
682                         }
683                     }
684                 }
685                 temp = new double[slices];
686                 for (int r = 0; r < rows; r++) {
687                     for (int c = 0; c < columns; c++) {
688                         for (int s = 0; s < slices; s++) {
689                             temp[s] = a[s][r][c];
690                         }
691                         dhtSlices.inverse(temp, scale);
692                         for (int s = 0; s < slices; s++) {
693                             a[s][r][c] = temp[s];
694                         }
695                     }
696                 }
697             }
698             yTransform(a);
699         }
700     }
701 
ddxt3da_sub(int isgn, double[] a, boolean scale)702     private void ddxt3da_sub(int isgn, double[] a, boolean scale) {
703         int idx0, idx1, idx2;
704 
705         if (isgn == -1) {
706             for (int s = 0; s < slices; s++) {
707                 idx0 = s * sliceStride;
708                 for (int r = 0; r < rows; r++) {
709                     dhtColumns.forward(a, idx0 + r * rowStride);
710                 }
711                 if (columns > 2) {
712                     for (int c = 0; c < columns; c += 4) {
713                         for (int r = 0; r < rows; r++) {
714                             idx1 = idx0 + r * rowStride + c;
715                             idx2 = rows + r;
716                             t[r] = a[idx1];
717                             t[idx2] = a[idx1 + 1];
718                             t[idx2 + rows] = a[idx1 + 2];
719                             t[idx2 + 2 * rows] = a[idx1 + 3];
720                         }
721                         dhtRows.forward(t, 0);
722                         dhtRows.forward(t, rows);
723                         dhtRows.forward(t, 2 * rows);
724                         dhtRows.forward(t, 3 * rows);
725                         for (int r = 0; r < rows; r++) {
726                             idx1 = idx0 + r * rowStride + c;
727                             idx2 = rows + r;
728                             a[idx1] = t[r];
729                             a[idx1 + 1] = t[idx2];
730                             a[idx1 + 2] = t[idx2 + rows];
731                             a[idx1 + 3] = t[idx2 + 2 * rows];
732                         }
733                     }
734                 } else if (columns == 2) {
735                     for (int r = 0; r < rows; r++) {
736                         idx1 = idx0 + r * rowStride;
737                         t[r] = a[idx1];
738                         t[rows + r] = a[idx1 + 1];
739                     }
740                     dhtRows.forward(t, 0);
741                     dhtRows.forward(t, rows);
742                     for (int r = 0; r < rows; r++) {
743                         idx1 = idx0 + r * rowStride;
744                         a[idx1] = t[r];
745                         a[idx1 + 1] = t[rows + r];
746                     }
747                 }
748             }
749         } else {
750             for (int s = 0; s < slices; s++) {
751                 idx0 = s * sliceStride;
752                 for (int r = 0; r < rows; r++) {
753                     dhtColumns.inverse(a, idx0 + r * rowStride, scale);
754                 }
755                 if (columns > 2) {
756                     for (int c = 0; c < columns; c += 4) {
757                         for (int r = 0; r < rows; r++) {
758                             idx1 = idx0 + r * rowStride + c;
759                             idx2 = rows + r;
760                             t[r] = a[idx1];
761                             t[idx2] = a[idx1 + 1];
762                             t[idx2 + rows] = a[idx1 + 2];
763                             t[idx2 + 2 * rows] = a[idx1 + 3];
764                         }
765                         dhtRows.inverse(t, 0, scale);
766                         dhtRows.inverse(t, rows, scale);
767                         dhtRows.inverse(t, 2 * rows, scale);
768                         dhtRows.inverse(t, 3 * rows, scale);
769                         for (int r = 0; r < rows; r++) {
770                             idx1 = idx0 + r * rowStride + c;
771                             idx2 = rows + r;
772                             a[idx1] = t[r];
773                             a[idx1 + 1] = t[idx2];
774                             a[idx1 + 2] = t[idx2 + rows];
775                             a[idx1 + 3] = t[idx2 + 2 * rows];
776                         }
777                     }
778                 } else if (columns == 2) {
779                     for (int r = 0; r < rows; r++) {
780                         idx1 = idx0 + r * rowStride;
781                         t[r] = a[idx1];
782                         t[rows + r] = a[idx1 + 1];
783                     }
784                     dhtRows.inverse(t, 0, scale);
785                     dhtRows.inverse(t, rows, scale);
786                     for (int r = 0; r < rows; r++) {
787                         idx1 = idx0 + r * rowStride;
788                         a[idx1] = t[r];
789                         a[idx1 + 1] = t[rows + r];
790                     }
791                 }
792             }
793         }
794     }
795 
ddxt3da_sub(int isgn, double[][][] a, boolean scale)796     private void ddxt3da_sub(int isgn, double[][][] a, boolean scale) {
797         int idx2;
798 
799         if (isgn == -1) {
800             for (int s = 0; s < slices; s++) {
801                 for (int r = 0; r < rows; r++) {
802                     dhtColumns.forward(a[s][r]);
803                 }
804                 if (columns > 2) {
805                     for (int c = 0; c < columns; c += 4) {
806                         for (int r = 0; r < rows; r++) {
807                             idx2 = rows + r;
808                             t[r] = a[s][r][c];
809                             t[idx2] = a[s][r][c + 1];
810                             t[idx2 + rows] = a[s][r][c + 2];
811                             t[idx2 + 2 * rows] = a[s][r][c + 3];
812                         }
813                         dhtRows.forward(t, 0);
814                         dhtRows.forward(t, rows);
815                         dhtRows.forward(t, 2 * rows);
816                         dhtRows.forward(t, 3 * rows);
817                         for (int r = 0; r < rows; r++) {
818                             idx2 = rows + r;
819                             a[s][r][c] = t[r];
820                             a[s][r][c + 1] = t[idx2];
821                             a[s][r][c + 2] = t[idx2 + rows];
822                             a[s][r][c + 3] = t[idx2 + 2 * rows];
823                         }
824                     }
825                 } else if (columns == 2) {
826                     for (int r = 0; r < rows; r++) {
827                         t[r] = a[s][r][0];
828                         t[rows + r] = a[s][r][1];
829                     }
830                     dhtRows.forward(t, 0);
831                     dhtRows.forward(t, rows);
832                     for (int r = 0; r < rows; r++) {
833                         a[s][r][0] = t[r];
834                         a[s][r][1] = t[rows + r];
835                     }
836                 }
837             }
838         } else {
839             for (int s = 0; s < slices; s++) {
840                 for (int r = 0; r < rows; r++) {
841                     dhtColumns.inverse(a[s][r], scale);
842                 }
843                 if (columns > 2) {
844                     for (int c = 0; c < columns; c += 4) {
845                         for (int r = 0; r < rows; r++) {
846                             idx2 = rows + r;
847                             t[r] = a[s][r][c];
848                             t[idx2] = a[s][r][c + 1];
849                             t[idx2 + rows] = a[s][r][c + 2];
850                             t[idx2 + 2 * rows] = a[s][r][c + 3];
851                         }
852                         dhtRows.inverse(t, 0, scale);
853                         dhtRows.inverse(t, rows, scale);
854                         dhtRows.inverse(t, 2 * rows, scale);
855                         dhtRows.inverse(t, 3 * rows, scale);
856                         for (int r = 0; r < rows; r++) {
857                             idx2 = rows + r;
858                             a[s][r][c] = t[r];
859                             a[s][r][c + 1] = t[idx2];
860                             a[s][r][c + 2] = t[idx2 + rows];
861                             a[s][r][c + 3] = t[idx2 + 2 * rows];
862                         }
863                     }
864                 } else if (columns == 2) {
865                     for (int r = 0; r < rows; r++) {
866                         t[r] = a[s][r][0];
867                         t[rows + r] = a[s][r][1];
868                     }
869                     dhtRows.inverse(t, 0, scale);
870                     dhtRows.inverse(t, rows, scale);
871                     for (int r = 0; r < rows; r++) {
872                         a[s][r][0] = t[r];
873                         a[s][r][1] = t[rows + r];
874                     }
875                 }
876             }
877         }
878     }
879 
ddxt3db_sub(int isgn, double[] a, boolean scale)880     private void ddxt3db_sub(int isgn, double[] a, boolean scale) {
881         int idx0, idx1, idx2;
882 
883         if (isgn == -1) {
884             if (columns > 2) {
885                 for (int r = 0; r < rows; r++) {
886                     idx0 = r * rowStride;
887                     for (int c = 0; c < columns; c += 4) {
888                         for (int s = 0; s < slices; s++) {
889                             idx1 = s * sliceStride + idx0 + c;
890                             idx2 = slices + s;
891                             t[s] = a[idx1];
892                             t[idx2] = a[idx1 + 1];
893                             t[idx2 + slices] = a[idx1 + 2];
894                             t[idx2 + 2 * slices] = a[idx1 + 3];
895                         }
896                         dhtSlices.forward(t, 0);
897                         dhtSlices.forward(t, slices);
898                         dhtSlices.forward(t, 2 * slices);
899                         dhtSlices.forward(t, 3 * slices);
900                         for (int s = 0; s < slices; s++) {
901                             idx1 = s * sliceStride + idx0 + c;
902                             idx2 = slices + s;
903                             a[idx1] = t[s];
904                             a[idx1 + 1] = t[idx2];
905                             a[idx1 + 2] = t[idx2 + slices];
906                             a[idx1 + 3] = t[idx2 + 2 * slices];
907                         }
908                     }
909                 }
910             } else if (columns == 2) {
911                 for (int r = 0; r < rows; r++) {
912                     idx0 = r * rowStride;
913                     for (int s = 0; s < slices; s++) {
914                         idx1 = s * sliceStride + idx0;
915                         t[s] = a[idx1];
916                         t[slices + s] = a[idx1 + 1];
917                     }
918                     dhtSlices.forward(t, 0);
919                     dhtSlices.forward(t, slices);
920                     for (int s = 0; s < slices; s++) {
921                         idx1 = s * sliceStride + idx0;
922                         a[idx1] = t[s];
923                         a[idx1 + 1] = t[slices + s];
924                     }
925                 }
926             }
927         } else {
928             if (columns > 2) {
929                 for (int r = 0; r < rows; r++) {
930                     idx0 = r * rowStride;
931                     for (int c = 0; c < columns; c += 4) {
932                         for (int s = 0; s < slices; s++) {
933                             idx1 = s * sliceStride + idx0 + c;
934                             idx2 = slices + s;
935                             t[s] = a[idx1];
936                             t[idx2] = a[idx1 + 1];
937                             t[idx2 + slices] = a[idx1 + 2];
938                             t[idx2 + 2 * slices] = a[idx1 + 3];
939                         }
940                         dhtSlices.inverse(t, 0, scale);
941                         dhtSlices.inverse(t, slices, scale);
942                         dhtSlices.inverse(t, 2 * slices, scale);
943                         dhtSlices.inverse(t, 3 * slices, scale);
944 
945                         for (int s = 0; s < slices; s++) {
946                             idx1 = s * sliceStride + idx0 + c;
947                             idx2 = slices + s;
948                             a[idx1] = t[s];
949                             a[idx1 + 1] = t[idx2];
950                             a[idx1 + 2] = t[idx2 + slices];
951                             a[idx1 + 3] = t[idx2 + 2 * slices];
952                         }
953                     }
954                 }
955             } else if (columns == 2) {
956                 for (int r = 0; r < rows; r++) {
957                     idx0 = r * rowStride;
958                     for (int s = 0; s < slices; s++) {
959                         idx1 = s * sliceStride + idx0;
960                         t[s] = a[idx1];
961                         t[slices + s] = a[idx1 + 1];
962                     }
963                     dhtSlices.inverse(t, 0, scale);
964                     dhtSlices.inverse(t, slices, scale);
965                     for (int s = 0; s < slices; s++) {
966                         idx1 = s * sliceStride + idx0;
967                         a[idx1] = t[s];
968                         a[idx1 + 1] = t[slices + s];
969                     }
970                 }
971             }
972         }
973     }
974 
ddxt3db_sub(int isgn, double[][][] a, boolean scale)975     private void ddxt3db_sub(int isgn, double[][][] a, boolean scale) {
976         int idx2;
977 
978         if (isgn == -1) {
979             if (columns > 2) {
980                 for (int r = 0; r < rows; r++) {
981                     for (int c = 0; c < columns; c += 4) {
982                         for (int s = 0; s < slices; s++) {
983                             idx2 = slices + s;
984                             t[s] = a[s][r][c];
985                             t[idx2] = a[s][r][c + 1];
986                             t[idx2 + slices] = a[s][r][c + 2];
987                             t[idx2 + 2 * slices] = a[s][r][c + 3];
988                         }
989                         dhtSlices.forward(t, 0);
990                         dhtSlices.forward(t, slices);
991                         dhtSlices.forward(t, 2 * slices);
992                         dhtSlices.forward(t, 3 * slices);
993                         for (int s = 0; s < slices; s++) {
994                             idx2 = slices + s;
995                             a[s][r][c] = t[s];
996                             a[s][r][c + 1] = t[idx2];
997                             a[s][r][c + 2] = t[idx2 + slices];
998                             a[s][r][c + 3] = t[idx2 + 2 * slices];
999                         }
1000                     }
1001                 }
1002             } else if (columns == 2) {
1003                 for (int r = 0; r < rows; r++) {
1004                     for (int s = 0; s < slices; s++) {
1005                         t[s] = a[s][r][0];
1006                         t[slices + s] = a[s][r][1];
1007                     }
1008                     dhtSlices.forward(t, 0);
1009                     dhtSlices.forward(t, slices);
1010                     for (int s = 0; s < slices; s++) {
1011                         a[s][r][0] = t[s];
1012                         a[s][r][1] = t[slices + s];
1013                     }
1014                 }
1015             }
1016         } else {
1017             if (columns > 2) {
1018                 for (int r = 0; r < rows; r++) {
1019                     for (int c = 0; c < columns; c += 4) {
1020                         for (int s = 0; s < slices; s++) {
1021                             idx2 = slices + s;
1022                             t[s] = a[s][r][c];
1023                             t[idx2] = a[s][r][c + 1];
1024                             t[idx2 + slices] = a[s][r][c + 2];
1025                             t[idx2 + 2 * slices] = a[s][r][c + 3];
1026                         }
1027                         dhtSlices.inverse(t, 0, scale);
1028                         dhtSlices.inverse(t, slices, scale);
1029                         dhtSlices.inverse(t, 2 * slices, scale);
1030                         dhtSlices.inverse(t, 3 * slices, scale);
1031 
1032                         for (int s = 0; s < slices; s++) {
1033                             idx2 = slices + s;
1034                             a[s][r][c] = t[s];
1035                             a[s][r][c + 1] = t[idx2];
1036                             a[s][r][c + 2] = t[idx2 + slices];
1037                             a[s][r][c + 3] = t[idx2 + 2 * slices];
1038                         }
1039                     }
1040                 }
1041             } else if (columns == 2) {
1042                 for (int r = 0; r < rows; r++) {
1043                     for (int s = 0; s < slices; s++) {
1044                         t[s] = a[s][r][0];
1045                         t[slices + s] = a[s][r][1];
1046                     }
1047                     dhtSlices.inverse(t, 0, scale);
1048                     dhtSlices.inverse(t, slices, scale);
1049                     for (int s = 0; s < slices; s++) {
1050                         a[s][r][0] = t[s];
1051                         a[s][r][1] = t[slices + s];
1052                     }
1053                 }
1054             }
1055         }
1056     }
1057 
ddxt3da_subth(final int isgn, final double[] a, final boolean scale)1058     private void ddxt3da_subth(final int isgn, final double[] a, final boolean scale) {
1059         final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1060         int nt = 4 * rows;
1061         if (columns == 2) {
1062             nt >>= 1;
1063         }
1064         Future<?>[] futures = new Future[nthreads];
1065 
1066         for (int i = 0; i < nthreads; i++) {
1067             final int n0 = i;
1068             final int startt = nt * i;
1069             futures[i] = ConcurrencyUtils.submit(new Runnable() {
1070 
1071                 public void run() {
1072                     int idx0, idx1, idx2;
1073                     if (isgn == -1) {
1074                         for (int s = n0; s < slices; s += nthreads) {
1075                             idx0 = s * sliceStride;
1076                             for (int r = 0; r < rows; r++) {
1077                                 dhtColumns.forward(a, idx0 + r * rowStride);
1078                             }
1079                             if (columns > 2) {
1080                                 for (int c = 0; c < columns; c += 4) {
1081                                     for (int r = 0; r < rows; r++) {
1082                                         idx1 = idx0 + r * rowStride + c;
1083                                         idx2 = startt + rows + r;
1084                                         t[startt + r] = a[idx1];
1085                                         t[idx2] = a[idx1 + 1];
1086                                         t[idx2 + rows] = a[idx1 + 2];
1087                                         t[idx2 + 2 * rows] = a[idx1 + 3];
1088                                     }
1089                                     dhtRows.forward(t, startt);
1090                                     dhtRows.forward(t, startt + rows);
1091                                     dhtRows.forward(t, startt + 2 * rows);
1092                                     dhtRows.forward(t, startt + 3 * rows);
1093                                     for (int r = 0; r < rows; r++) {
1094                                         idx1 = idx0 + r * rowStride + c;
1095                                         idx2 = startt + rows + r;
1096                                         a[idx1] = t[startt + r];
1097                                         a[idx1 + 1] = t[idx2];
1098                                         a[idx1 + 2] = t[idx2 + rows];
1099                                         a[idx1 + 3] = t[idx2 + 2 * rows];
1100                                     }
1101                                 }
1102                             } else if (columns == 2) {
1103                                 for (int r = 0; r < rows; r++) {
1104                                     idx1 = idx0 + r * rowStride;
1105                                     t[startt + r] = a[idx1];
1106                                     t[startt + rows + r] = a[idx1 + 1];
1107                                 }
1108                                 dhtRows.forward(t, startt);
1109                                 dhtRows.forward(t, startt + rows);
1110                                 for (int r = 0; r < rows; r++) {
1111                                     idx1 = idx0 + r * rowStride;
1112                                     a[idx1] = t[startt + r];
1113                                     a[idx1 + 1] = t[startt + rows + r];
1114                                 }
1115                             }
1116                         }
1117                     } else {
1118                         for (int s = n0; s < slices; s += nthreads) {
1119                             idx0 = s * sliceStride;
1120                             for (int r = 0; r < rows; r++) {
1121                                 dhtColumns.inverse(a, idx0 + r * rowStride, scale);
1122                             }
1123                             if (columns > 2) {
1124                                 for (int c = 0; c < columns; c += 4) {
1125                                     for (int r = 0; r < rows; r++) {
1126                                         idx1 = idx0 + r * rowStride + c;
1127                                         idx2 = startt + rows + r;
1128                                         t[startt + r] = a[idx1];
1129                                         t[idx2] = a[idx1 + 1];
1130                                         t[idx2 + rows] = a[idx1 + 2];
1131                                         t[idx2 + 2 * rows] = a[idx1 + 3];
1132                                     }
1133                                     dhtRows.inverse(t, startt, scale);
1134                                     dhtRows.inverse(t, startt + rows, scale);
1135                                     dhtRows.inverse(t, startt + 2 * rows, scale);
1136                                     dhtRows.inverse(t, startt + 3 * rows, scale);
1137                                     for (int r = 0; r < rows; r++) {
1138                                         idx1 = idx0 + r * rowStride + c;
1139                                         idx2 = startt + rows + r;
1140                                         a[idx1] = t[startt + r];
1141                                         a[idx1 + 1] = t[idx2];
1142                                         a[idx1 + 2] = t[idx2 + rows];
1143                                         a[idx1 + 3] = t[idx2 + 2 * rows];
1144                                     }
1145                                 }
1146                             } else if (columns == 2) {
1147                                 for (int r = 0; r < rows; r++) {
1148                                     idx1 = idx0 + r * rowStride;
1149                                     t[startt + r] = a[idx1];
1150                                     t[startt + rows + r] = a[idx1 + 1];
1151                                 }
1152                                 dhtRows.inverse(t, startt, scale);
1153                                 dhtRows.inverse(t, startt + rows, scale);
1154                                 for (int r = 0; r < rows; r++) {
1155                                     idx1 = idx0 + r * rowStride;
1156                                     a[idx1] = t[startt + r];
1157                                     a[idx1 + 1] = t[startt + rows + r];
1158                                 }
1159                             }
1160                         }
1161                     }
1162                 }
1163             });
1164         }
1165         ConcurrencyUtils.waitForCompletion(futures);
1166     }
1167 
ddxt3da_subth(final int isgn, final double[][][] a, final boolean scale)1168     private void ddxt3da_subth(final int isgn, final double[][][] a, final boolean scale) {
1169         final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1170         int nt = 4 * rows;
1171         if (columns == 2) {
1172             nt >>= 1;
1173         }
1174         Future<?>[] futures = new Future[nthreads];
1175 
1176         for (int i = 0; i < nthreads; i++) {
1177             final int n0 = i;
1178             final int startt = nt * i;
1179             futures[i] = ConcurrencyUtils.submit(new Runnable() {
1180 
1181                 public void run() {
1182                     int idx2;
1183                     if (isgn == -1) {
1184                         for (int s = n0; s < slices; s += nthreads) {
1185                             for (int r = 0; r < rows; r++) {
1186                                 dhtColumns.forward(a[s][r]);
1187                             }
1188                             if (columns > 2) {
1189                                 for (int c = 0; c < columns; c += 4) {
1190                                     for (int r = 0; r < rows; r++) {
1191                                         idx2 = startt + rows + r;
1192                                         t[startt + r] = a[s][r][c];
1193                                         t[idx2] = a[s][r][c + 1];
1194                                         t[idx2 + rows] = a[s][r][c + 2];
1195                                         t[idx2 + 2 * rows] = a[s][r][c + 3];
1196                                     }
1197                                     dhtRows.forward(t, startt);
1198                                     dhtRows.forward(t, startt + rows);
1199                                     dhtRows.forward(t, startt + 2 * rows);
1200                                     dhtRows.forward(t, startt + 3 * rows);
1201                                     for (int r = 0; r < rows; r++) {
1202                                         idx2 = startt + rows + r;
1203                                         a[s][r][c] = t[startt + r];
1204                                         a[s][r][c + 1] = t[idx2];
1205                                         a[s][r][c + 2] = t[idx2 + rows];
1206                                         a[s][r][c + 3] = t[idx2 + 2 * rows];
1207                                     }
1208                                 }
1209                             } else if (columns == 2) {
1210                                 for (int r = 0; r < rows; r++) {
1211                                     t[startt + r] = a[s][r][0];
1212                                     t[startt + rows + r] = a[s][r][1];
1213                                 }
1214                                 dhtRows.forward(t, startt);
1215                                 dhtRows.forward(t, startt + rows);
1216                                 for (int r = 0; r < rows; r++) {
1217                                     a[s][r][0] = t[startt + r];
1218                                     a[s][r][1] = t[startt + rows + r];
1219                                 }
1220                             }
1221                         }
1222                     } else {
1223                         for (int s = n0; s < slices; s += nthreads) {
1224                             for (int r = 0; r < rows; r++) {
1225                                 dhtColumns.inverse(a[s][r], scale);
1226                             }
1227                             if (columns > 2) {
1228                                 for (int c = 0; c < columns; c += 4) {
1229                                     for (int r = 0; r < rows; r++) {
1230                                         idx2 = startt + rows + r;
1231                                         t[startt + r] = a[s][r][c];
1232                                         t[idx2] = a[s][r][c + 1];
1233                                         t[idx2 + rows] = a[s][r][c + 2];
1234                                         t[idx2 + 2 * rows] = a[s][r][c + 3];
1235                                     }
1236                                     dhtRows.inverse(t, startt, scale);
1237                                     dhtRows.inverse(t, startt + rows, scale);
1238                                     dhtRows.inverse(t, startt + 2 * rows, scale);
1239                                     dhtRows.inverse(t, startt + 3 * rows, scale);
1240                                     for (int r = 0; r < rows; r++) {
1241                                         idx2 = startt + rows + r;
1242                                         a[s][r][c] = t[startt + r];
1243                                         a[s][r][c + 1] = t[idx2];
1244                                         a[s][r][c + 2] = t[idx2 + rows];
1245                                         a[s][r][c + 3] = t[idx2 + 2 * rows];
1246                                     }
1247                                 }
1248                             } else if (columns == 2) {
1249                                 for (int r = 0; r < rows; r++) {
1250                                     t[startt + r] = a[s][r][0];
1251                                     t[startt + rows + r] = a[s][r][1];
1252                                 }
1253                                 dhtRows.inverse(t, startt, scale);
1254                                 dhtRows.inverse(t, startt + rows, scale);
1255                                 for (int r = 0; r < rows; r++) {
1256                                     a[s][r][0] = t[startt + r];
1257                                     a[s][r][1] = t[startt + rows + r];
1258                                 }
1259                             }
1260                         }
1261                     }
1262                 }
1263             });
1264         }
1265         ConcurrencyUtils.waitForCompletion(futures);
1266     }
1267 
ddxt3db_subth(final int isgn, final double[] a, final boolean scale)1268     private void ddxt3db_subth(final int isgn, final double[] a, final boolean scale) {
1269         final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1270         int nt = 4 * slices;
1271         if (columns == 2) {
1272             nt >>= 1;
1273         }
1274         Future<?>[] futures = new Future[nthreads];
1275 
1276         for (int i = 0; i < nthreads; i++) {
1277             final int n0 = i;
1278             final int startt = nt * i;
1279             futures[i] = ConcurrencyUtils.submit(new Runnable() {
1280 
1281                 public void run() {
1282                     int idx0, idx1, idx2;
1283                     if (isgn == -1) {
1284                         if (columns > 2) {
1285                             for (int r = n0; r < rows; r += nthreads) {
1286                                 idx0 = r * rowStride;
1287                                 for (int c = 0; c < columns; c += 4) {
1288                                     for (int s = 0; s < slices; s++) {
1289                                         idx1 = s * sliceStride + idx0 + c;
1290                                         idx2 = startt + slices + s;
1291                                         t[startt + s] = a[idx1];
1292                                         t[idx2] = a[idx1 + 1];
1293                                         t[idx2 + slices] = a[idx1 + 2];
1294                                         t[idx2 + 2 * slices] = a[idx1 + 3];
1295                                     }
1296                                     dhtSlices.forward(t, startt);
1297                                     dhtSlices.forward(t, startt + slices);
1298                                     dhtSlices.forward(t, startt + 2 * slices);
1299                                     dhtSlices.forward(t, startt + 3 * slices);
1300                                     for (int s = 0; s < slices; s++) {
1301                                         idx1 = s * sliceStride + idx0 + c;
1302                                         idx2 = startt + slices + s;
1303                                         a[idx1] = t[startt + s];
1304                                         a[idx1 + 1] = t[idx2];
1305                                         a[idx1 + 2] = t[idx2 + slices];
1306                                         a[idx1 + 3] = t[idx2 + 2 * slices];
1307                                     }
1308                                 }
1309                             }
1310                         } else if (columns == 2) {
1311                             for (int r = n0; r < rows; r += nthreads) {
1312                                 idx0 = r * rowStride;
1313                                 for (int s = 0; s < slices; s++) {
1314                                     idx1 = s * sliceStride + idx0;
1315                                     t[startt + s] = a[idx1];
1316                                     t[startt + slices + s] = a[idx1 + 1];
1317                                 }
1318                                 dhtSlices.forward(t, startt);
1319                                 dhtSlices.forward(t, startt + slices);
1320                                 for (int s = 0; s < slices; s++) {
1321                                     idx1 = s * sliceStride + idx0;
1322                                     a[idx1] = t[startt + s];
1323                                     a[idx1 + 1] = t[startt + slices + s];
1324                                 }
1325                             }
1326                         }
1327                     } else {
1328                         if (columns > 2) {
1329                             for (int r = n0; r < rows; r += nthreads) {
1330                                 idx0 = r * rowStride;
1331                                 for (int c = 0; c < columns; c += 4) {
1332                                     for (int s = 0; s < slices; s++) {
1333                                         idx1 = s * sliceStride + idx0 + c;
1334                                         idx2 = startt + slices + s;
1335                                         t[startt + s] = a[idx1];
1336                                         t[idx2] = a[idx1 + 1];
1337                                         t[idx2 + slices] = a[idx1 + 2];
1338                                         t[idx2 + 2 * slices] = a[idx1 + 3];
1339                                     }
1340                                     dhtSlices.inverse(t, startt, scale);
1341                                     dhtSlices.inverse(t, startt + slices, scale);
1342                                     dhtSlices.inverse(t, startt + 2 * slices, scale);
1343                                     dhtSlices.inverse(t, startt + 3 * slices, scale);
1344                                     for (int s = 0; s < slices; s++) {
1345                                         idx1 = s * sliceStride + idx0 + c;
1346                                         idx2 = startt + slices + s;
1347                                         a[idx1] = t[startt + s];
1348                                         a[idx1 + 1] = t[idx2];
1349                                         a[idx1 + 2] = t[idx2 + slices];
1350                                         a[idx1 + 3] = t[idx2 + 2 * slices];
1351                                     }
1352                                 }
1353                             }
1354                         } else if (columns == 2) {
1355                             for (int r = n0; r < rows; r += nthreads) {
1356                                 idx0 = r * rowStride;
1357                                 for (int s = 0; s < slices; s++) {
1358                                     idx1 = s * sliceStride + idx0;
1359                                     t[startt + s] = a[idx1];
1360                                     t[startt + slices + s] = a[idx1 + 1];
1361                                 }
1362                                 dhtSlices.inverse(t, startt, scale);
1363                                 dhtSlices.inverse(t, startt + slices, scale);
1364                                 for (int s = 0; s < slices; s++) {
1365                                     idx1 = s * sliceStride + idx0;
1366                                     a[idx1] = t[startt + s];
1367                                     a[idx1 + 1] = t[startt + slices + s];
1368                                 }
1369                             }
1370                         }
1371                     }
1372                 }
1373             });
1374         }
1375         ConcurrencyUtils.waitForCompletion(futures);
1376     }
1377 
ddxt3db_subth(final int isgn, final double[][][] a, final boolean scale)1378     private void ddxt3db_subth(final int isgn, final double[][][] a, final boolean scale) {
1379         final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1380         int nt = 4 * slices;
1381         if (columns == 2) {
1382             nt >>= 1;
1383         }
1384         Future<?>[] futures = new Future[nthreads];
1385 
1386         for (int i = 0; i < nthreads; i++) {
1387             final int n0 = i;
1388             final int startt = nt * i;
1389             futures[i] = ConcurrencyUtils.submit(new Runnable() {
1390 
1391                 public void run() {
1392                     int idx2;
1393                     if (isgn == -1) {
1394                         if (columns > 2) {
1395                             for (int r = n0; r < rows; r += nthreads) {
1396                                 for (int c = 0; c < columns; c += 4) {
1397                                     for (int s = 0; s < slices; s++) {
1398                                         idx2 = startt + slices + s;
1399                                         t[startt + s] = a[s][r][c];
1400                                         t[idx2] = a[s][r][c + 1];
1401                                         t[idx2 + slices] = a[s][r][c + 2];
1402                                         t[idx2 + 2 * slices] = a[s][r][c + 3];
1403                                     }
1404                                     dhtSlices.forward(t, startt);
1405                                     dhtSlices.forward(t, startt + slices);
1406                                     dhtSlices.forward(t, startt + 2 * slices);
1407                                     dhtSlices.forward(t, startt + 3 * slices);
1408                                     for (int s = 0; s < slices; s++) {
1409                                         idx2 = startt + slices + s;
1410                                         a[s][r][c] = t[startt + s];
1411                                         a[s][r][c + 1] = t[idx2];
1412                                         a[s][r][c + 2] = t[idx2 + slices];
1413                                         a[s][r][c + 3] = t[idx2 + 2 * slices];
1414                                     }
1415                                 }
1416                             }
1417                         } else if (columns == 2) {
1418                             for (int r = n0; r < rows; r += nthreads) {
1419                                 for (int s = 0; s < slices; s++) {
1420                                     t[startt + s] = a[s][r][0];
1421                                     t[startt + slices + s] = a[s][r][1];
1422                                 }
1423                                 dhtSlices.forward(t, startt);
1424                                 dhtSlices.forward(t, startt + slices);
1425                                 for (int s = 0; s < slices; s++) {
1426                                     a[s][r][0] = t[startt + s];
1427                                     a[s][r][1] = t[startt + slices + s];
1428                                 }
1429                             }
1430                         }
1431                     } else {
1432                         if (columns > 2) {
1433                             for (int r = n0; r < rows; r += nthreads) {
1434                                 for (int c = 0; c < columns; c += 4) {
1435                                     for (int s = 0; s < slices; s++) {
1436                                         idx2 = startt + slices + s;
1437                                         t[startt + s] = a[s][r][c];
1438                                         t[idx2] = a[s][r][c + 1];
1439                                         t[idx2 + slices] = a[s][r][c + 2];
1440                                         t[idx2 + 2 * slices] = a[s][r][c + 3];
1441                                     }
1442                                     dhtSlices.inverse(t, startt, scale);
1443                                     dhtSlices.inverse(t, startt + slices, scale);
1444                                     dhtSlices.inverse(t, startt + 2 * slices, scale);
1445                                     dhtSlices.inverse(t, startt + 3 * slices, scale);
1446                                     for (int s = 0; s < slices; s++) {
1447                                         idx2 = startt + slices + s;
1448                                         a[s][r][c] = t[startt + s];
1449                                         a[s][r][c + 1] = t[idx2];
1450                                         a[s][r][c + 2] = t[idx2 + slices];
1451                                         a[s][r][c + 3] = t[idx2 + 2 * slices];
1452                                     }
1453                                 }
1454                             }
1455                         } else if (columns == 2) {
1456                             for (int r = n0; r < rows; r += nthreads) {
1457                                 for (int s = 0; s < slices; s++) {
1458                                     t[startt + s] = a[s][r][0];
1459                                     t[startt + slices + s] = a[s][r][1];
1460                                 }
1461                                 dhtSlices.inverse(t, startt, scale);
1462                                 dhtSlices.inverse(t, startt + slices, scale);
1463 
1464                                 for (int s = 0; s < slices; s++) {
1465                                     a[s][r][0] = t[startt + s];
1466                                     a[s][r][1] = t[startt + slices + s];
1467                                 }
1468                             }
1469                         }
1470                     }
1471                 }
1472             });
1473         }
1474         ConcurrencyUtils.waitForCompletion(futures);
1475     }
1476 
yTransform(double[] a)1477     private void yTransform(double[] a) {
1478         double A, B, C, D, E, F, G, H;
1479         int cC, rC, sC;
1480         int idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9, idx10, idx11, idx12;
1481         for (int s = 0; s <= slices / 2; s++) {
1482             sC = (slices - s) % slices;
1483             idx9 = s * sliceStride;
1484             idx10 = sC * sliceStride;
1485             for (int r = 0; r <= rows / 2; r++) {
1486                 rC = (rows - r) % rows;
1487                 idx11 = r * rowStride;
1488                 idx12 = rC * rowStride;
1489                 for (int c = 0; c <= columns / 2; c++) {
1490                     cC = (columns - c) % columns;
1491                     idx1 = idx9 + idx12 + c;
1492                     idx2 = idx9 + idx11 + cC;
1493                     idx3 = idx10 + idx11 + c;
1494                     idx4 = idx10 + idx12 + cC;
1495                     idx5 = idx10 + idx12 + c;
1496                     idx6 = idx10 + idx11 + cC;
1497                     idx7 = idx9 + idx11 + c;
1498                     idx8 = idx9 + idx12 + cC;
1499                     A = a[idx1];
1500                     B = a[idx2];
1501                     C = a[idx3];
1502                     D = a[idx4];
1503                     E = a[idx5];
1504                     F = a[idx6];
1505                     G = a[idx7];
1506                     H = a[idx8];
1507                     a[idx7] = (A + B + C - D) / 2;
1508                     a[idx3] = (E + F + G - H) / 2;
1509                     a[idx1] = (G + H + E - F) / 2;
1510                     a[idx5] = (C + D + A - B) / 2;
1511                     a[idx2] = (H + G + F - E) / 2;
1512                     a[idx6] = (D + C + B - A) / 2;
1513                     a[idx8] = (B + A + D - C) / 2;
1514                     a[idx4] = (F + E + H - G) / 2;
1515                 }
1516             }
1517         }
1518     }
1519 
yTransform(double[][][] a)1520     private void yTransform(double[][][] a) {
1521         double A, B, C, D, E, F, G, H;
1522         int cC, rC, sC;
1523         for (int s = 0; s <= slices / 2; s++) {
1524             sC = (slices - s) % slices;
1525             for (int r = 0; r <= rows / 2; r++) {
1526                 rC = (rows - r) % rows;
1527                 for (int c = 0; c <= columns / 2; c++) {
1528                     cC = (columns - c) % columns;
1529                     A = a[s][rC][c];
1530                     B = a[s][r][cC];
1531                     C = a[sC][r][c];
1532                     D = a[sC][rC][cC];
1533                     E = a[sC][rC][c];
1534                     F = a[sC][r][cC];
1535                     G = a[s][r][c];
1536                     H = a[s][rC][cC];
1537                     a[s][r][c] = (A + B + C - D) / 2;
1538                     a[sC][r][c] = (E + F + G - H) / 2;
1539                     a[s][rC][c] = (G + H + E - F) / 2;
1540                     a[sC][rC][c] = (C + D + A - B) / 2;
1541                     a[s][r][cC] = (H + G + F - E) / 2;
1542                     a[sC][r][cC] = (D + C + B - A) / 2;
1543                     a[s][rC][cC] = (B + A + D - C) / 2;
1544                     a[sC][rC][cC] = (F + E + H - G) / 2;
1545                 }
1546             }
1547         }
1548     }
1549 
1550 }
1551