/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is JTransforms.
*
* The Initial Developer of the Original Code is
* Piotr Wendykier, Emory University.
* Portions created by the Initial Developer are Copyright (C) 2007-2009
* the Initial Developer. All Rights Reserved.
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
package edu.emory.mathcs.jtransforms.dct;
import java.util.concurrent.Future;
import edu.emory.mathcs.utils.ConcurrencyUtils;
/**
* Computes 3D Discrete Cosine Transform (DCT) of single precision data. The
* sizes of all three dimensions can be arbitrary numbers. This is a parallel
* implementation of split-radix and mixed-radix algorithms optimized for SMP
* systems.
*
* Part of code is derived from General Purpose FFT Package written by Takuya Ooura
* (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
*
* @author Piotr Wendykier (piotr.wendykier@gmail.com)
*
*/
public class FloatDCT_3D {
private int slices;
private int rows;
private int columns;
private int sliceStride;
private int rowStride;
private float[] t;
private FloatDCT_1D dctSlices, dctRows, dctColumns;
private int oldNthreads;
private int nt;
private boolean isPowerOfTwo = false;
private boolean useThreads = false;
/**
* Creates new instance of FloatDCT_3D.
*
* @param slices
* number of slices
* @param rows
* number of rows
* @param columns
* number of columns
*/
public FloatDCT_3D(int slices, int rows, int columns) {
if (slices <= 1 || rows <= 1 || columns <= 1) {
throw new IllegalArgumentException("slices, rows and columns must be greater than 1");
}
this.slices = slices;
this.rows = rows;
this.columns = columns;
this.sliceStride = rows * columns;
this.rowStride = columns;
if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) {
this.useThreads = true;
}
if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
isPowerOfTwo = true;
oldNthreads = ConcurrencyUtils.getNumberOfThreads();
nt = slices;
if (nt < rows) {
nt = rows;
}
nt *= 4;
if (oldNthreads > 1) {
nt *= oldNthreads;
}
if (columns == 2) {
nt >>= 1;
}
t = new float[nt];
}
dctSlices = new FloatDCT_1D(slices);
if (slices == rows) {
dctRows = dctSlices;
} else {
dctRows = new FloatDCT_1D(rows);
}
if (slices == columns) {
dctColumns = dctSlices;
} else if (rows == columns) {
dctColumns = dctRows;
} else {
dctColumns = new FloatDCT_1D(columns);
}
}
/**
* Computes the 3D forward DCT (DCT-II) leaving the result in a
* . The data is stored in 1D array addressed in slice-major, then
* row-major, then column-major, in order of significance, i.e. the element
* (i,j,k) of 3D array x[slices][rows][columns] is stored in a[i*sliceStride
* + j*rowStride + k], where sliceStride = rows * columns and rowStride =
* columns.
*
* @param a
* data to transform
* @param scale
* if true then scaling is performed
*/
public void forward(final float[] a, final boolean scale) {
int nthreads = ConcurrencyUtils.getNumberOfThreads();
if (isPowerOfTwo) {
if (nthreads != oldNthreads) {
nt = slices;
if (nt < rows) {
nt = rows;
}
nt *= 4;
if (nthreads > 1) {
nt *= nthreads;
}
if (columns == 2) {
nt >>= 1;
}
t = new float[nt];
oldNthreads = nthreads;
}
if ((nthreads > 1) && useThreads) {
ddxt3da_subth(-1, a, scale);
ddxt3db_subth(-1, a, scale);
} else {
ddxt3da_sub(-1, a, scale);
ddxt3db_sub(-1, a, scale);
}
} else {
if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
Future>[] futures = new Future[nthreads];
int p = slices / nthreads;
for (int l = 0; l < nthreads; l++) {
final int firstSlice = l * p;
final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
futures[l] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
for (int s = firstSlice; s < lastSlice; s++) {
int idx1 = s * sliceStride;
for (int r = 0; r < rows; r++) {
dctColumns.forward(a, idx1 + r * rowStride, scale);
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
for (int l = 0; l < nthreads; l++) {
final int firstSlice = l * p;
final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
futures[l] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
float[] temp = new float[rows];
for (int s = firstSlice; s < lastSlice; s++) {
int idx1 = s * sliceStride;
for (int c = 0; c < columns; c++) {
for (int r = 0; r < rows; r++) {
int idx3 = idx1 + r * rowStride + c;
temp[r] = a[idx3];
}
dctRows.forward(temp, scale);
for (int r = 0; r < rows; r++) {
int idx3 = idx1 + r * rowStride + c;
a[idx3] = temp[r];
}
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
p = rows / nthreads;
for (int l = 0; l < nthreads; l++) {
final int firstRow = l * p;
final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
futures[l] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
float[] temp = new float[slices];
for (int r = firstRow; r < lastRow; r++) {
int idx1 = r * rowStride;
for (int c = 0; c < columns; c++) {
for (int s = 0; s < slices; s++) {
int idx3 = s * sliceStride + idx1 + c;
temp[s] = a[idx3];
}
dctSlices.forward(temp, scale);
for (int s = 0; s < slices; s++) {
int idx3 = s * sliceStride + idx1 + c;
a[idx3] = temp[s];
}
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
} else {
for (int s = 0; s < slices; s++) {
int idx1 = s * sliceStride;
for (int r = 0; r < rows; r++) {
dctColumns.forward(a, idx1 + r * rowStride, scale);
}
}
float[] temp = new float[rows];
for (int s = 0; s < slices; s++) {
int idx1 = s * sliceStride;
for (int c = 0; c < columns; c++) {
for (int r = 0; r < rows; r++) {
int idx3 = idx1 + r * rowStride + c;
temp[r] = a[idx3];
}
dctRows.forward(temp, scale);
for (int r = 0; r < rows; r++) {
int idx3 = idx1 + r * rowStride + c;
a[idx3] = temp[r];
}
}
}
temp = new float[slices];
for (int r = 0; r < rows; r++) {
int idx1 = r * rowStride;
for (int c = 0; c < columns; c++) {
for (int s = 0; s < slices; s++) {
int idx3 = s * sliceStride + idx1 + c;
temp[s] = a[idx3];
}
dctSlices.forward(temp, scale);
for (int s = 0; s < slices; s++) {
int idx3 = s * sliceStride + idx1 + c;
a[idx3] = temp[s];
}
}
}
}
}
}
/**
* Computes the 3D forward DCT (DCT-II) leaving the result in a
* . The data is stored in 3D array
*
* @param a
* data to transform
* @param scale
* if true then scaling is performed
*/
public void forward(final float[][][] a, final boolean scale) {
int nthreads = ConcurrencyUtils.getNumberOfThreads();
if (isPowerOfTwo) {
if (nthreads != oldNthreads) {
nt = slices;
if (nt < rows) {
nt = rows;
}
nt *= 4;
if (nthreads > 1) {
nt *= nthreads;
}
if (columns == 2) {
nt >>= 1;
}
t = new float[nt];
oldNthreads = nthreads;
}
if ((nthreads > 1) && useThreads) {
ddxt3da_subth(-1, a, scale);
ddxt3db_subth(-1, a, scale);
} else {
ddxt3da_sub(-1, a, scale);
ddxt3db_sub(-1, a, scale);
}
} else {
if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
Future>[] futures = new Future[nthreads];
int p = slices / nthreads;
for (int l = 0; l < nthreads; l++) {
final int firstSlice = l * p;
final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
futures[l] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
for (int s = firstSlice; s < lastSlice; s++) {
for (int r = 0; r < rows; r++) {
dctColumns.forward(a[s][r], scale);
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
for (int l = 0; l < nthreads; l++) {
final int firstSlice = l * p;
final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
futures[l] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
float[] temp = new float[rows];
for (int s = firstSlice; s < lastSlice; s++) {
for (int c = 0; c < columns; c++) {
for (int r = 0; r < rows; r++) {
temp[r] = a[s][r][c];
}
dctRows.forward(temp, scale);
for (int r = 0; r < rows; r++) {
a[s][r][c] = temp[r];
}
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
p = rows / nthreads;
for (int l = 0; l < nthreads; l++) {
final int firstRow = l * p;
final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
futures[l] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
float[] temp = new float[slices];
for (int r = firstRow; r < lastRow; r++) {
for (int c = 0; c < columns; c++) {
for (int s = 0; s < slices; s++) {
temp[s] = a[s][r][c];
}
dctSlices.forward(temp, scale);
for (int s = 0; s < slices; s++) {
a[s][r][c] = temp[s];
}
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
} else {
for (int s = 0; s < slices; s++) {
for (int r = 0; r < rows; r++) {
dctColumns.forward(a[s][r], scale);
}
}
float[] temp = new float[rows];
for (int s = 0; s < slices; s++) {
for (int c = 0; c < columns; c++) {
for (int r = 0; r < rows; r++) {
temp[r] = a[s][r][c];
}
dctRows.forward(temp, scale);
for (int r = 0; r < rows; r++) {
a[s][r][c] = temp[r];
}
}
}
temp = new float[slices];
for (int r = 0; r < rows; r++) {
for (int c = 0; c < columns; c++) {
for (int s = 0; s < slices; s++) {
temp[s] = a[s][r][c];
}
dctSlices.forward(temp, scale);
for (int s = 0; s < slices; s++) {
a[s][r][c] = temp[s];
}
}
}
}
}
}
/**
* Computes the 3D inverse DCT (DCT-III) leaving the result in
* a
. The data is stored in 1D array addressed in slice-major,
* then row-major, then column-major, in order of significance, i.e. the
* element (i,j,k) of 3D array x[slices][rows][columns] is stored in
* a[i*sliceStride + j*rowStride + k], where sliceStride = rows * columns
* and rowStride = columns.
*
* @param a
* data to transform
* @param scale
* if true then scaling is performed
*/
public void inverse(final float[] a, final boolean scale) {
int nthreads = ConcurrencyUtils.getNumberOfThreads();
if (isPowerOfTwo) {
if (nthreads != oldNthreads) {
nt = slices;
if (nt < rows) {
nt = rows;
}
nt *= 4;
if (nthreads > 1) {
nt *= nthreads;
}
if (columns == 2) {
nt >>= 1;
}
t = new float[nt];
oldNthreads = nthreads;
}
if ((nthreads > 1) && useThreads) {
ddxt3da_subth(1, a, scale);
ddxt3db_subth(1, a, scale);
} else {
ddxt3da_sub(1, a, scale);
ddxt3db_sub(1, a, scale);
}
} else {
if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
Future>[] futures = new Future[nthreads];
int p = slices / nthreads;
for (int l = 0; l < nthreads; l++) {
final int firstSlice = l * p;
final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
futures[l] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
for (int s = firstSlice; s < lastSlice; s++) {
int idx1 = s * sliceStride;
for (int r = 0; r < rows; r++) {
dctColumns.inverse(a, idx1 + r * rowStride, scale);
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
for (int l = 0; l < nthreads; l++) {
final int firstSlice = l * p;
final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
futures[l] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
float[] temp = new float[rows];
for (int s = firstSlice; s < lastSlice; s++) {
int idx1 = s * sliceStride;
for (int c = 0; c < columns; c++) {
for (int r = 0; r < rows; r++) {
int idx3 = idx1 + r * rowStride + c;
temp[r] = a[idx3];
}
dctRows.inverse(temp, scale);
for (int r = 0; r < rows; r++) {
int idx3 = idx1 + r * rowStride + c;
a[idx3] = temp[r];
}
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
p = rows / nthreads;
for (int l = 0; l < nthreads; l++) {
final int firstRow = l * p;
final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
futures[l] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
float[] temp = new float[slices];
for (int r = firstRow; r < lastRow; r++) {
int idx1 = r * rowStride;
for (int c = 0; c < columns; c++) {
for (int s = 0; s < slices; s++) {
int idx3 = s * sliceStride + idx1 + c;
temp[s] = a[idx3];
}
dctSlices.inverse(temp, scale);
for (int s = 0; s < slices; s++) {
int idx3 = s * sliceStride + idx1 + c;
a[idx3] = temp[s];
}
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
} else {
for (int s = 0; s < slices; s++) {
int idx1 = s * sliceStride;
for (int r = 0; r < rows; r++) {
dctColumns.inverse(a, idx1 + r * rowStride, scale);
}
}
float[] temp = new float[rows];
for (int s = 0; s < slices; s++) {
int idx1 = s * sliceStride;
for (int c = 0; c < columns; c++) {
for (int r = 0; r < rows; r++) {
int idx3 = idx1 + r * rowStride + c;
temp[r] = a[idx3];
}
dctRows.inverse(temp, scale);
for (int r = 0; r < rows; r++) {
int idx3 = idx1 + r * rowStride + c;
a[idx3] = temp[r];
}
}
}
temp = new float[slices];
for (int r = 0; r < rows; r++) {
int idx1 = r * rowStride;
for (int c = 0; c < columns; c++) {
for (int s = 0; s < slices; s++) {
int idx3 = s * sliceStride + idx1 + c;
temp[s] = a[idx3];
}
dctSlices.inverse(temp, scale);
for (int s = 0; s < slices; s++) {
int idx3 = s * sliceStride + idx1 + c;
a[idx3] = temp[s];
}
}
}
}
}
}
/**
* Computes the 3D inverse DCT (DCT-III) leaving the result in
* a
. The data is stored in 3D array.
*
* @param a
* data to transform
* @param scale
* if true then scaling is performed
*/
public void inverse(final float[][][] a, final boolean scale) {
int nthreads = ConcurrencyUtils.getNumberOfThreads();
if (isPowerOfTwo) {
if (nthreads != oldNthreads) {
nt = slices;
if (nt < rows) {
nt = rows;
}
nt *= 4;
if (nthreads > 1) {
nt *= nthreads;
}
if (columns == 2) {
nt >>= 1;
}
t = new float[nt];
oldNthreads = nthreads;
}
if ((nthreads > 1) && useThreads) {
ddxt3da_subth(1, a, scale);
ddxt3db_subth(1, a, scale);
} else {
ddxt3da_sub(1, a, scale);
ddxt3db_sub(1, a, scale);
}
} else {
if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
Future>[] futures = new Future[nthreads];
int p = slices / nthreads;
for (int l = 0; l < nthreads; l++) {
final int firstSlice = l * p;
final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
futures[l] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
for (int s = firstSlice; s < lastSlice; s++) {
for (int r = 0; r < rows; r++) {
dctColumns.inverse(a[s][r], scale);
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
for (int l = 0; l < nthreads; l++) {
final int firstSlice = l * p;
final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
futures[l] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
float[] temp = new float[rows];
for (int s = firstSlice; s < lastSlice; s++) {
for (int c = 0; c < columns; c++) {
for (int r = 0; r < rows; r++) {
temp[r] = a[s][r][c];
}
dctRows.inverse(temp, scale);
for (int r = 0; r < rows; r++) {
a[s][r][c] = temp[r];
}
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
p = rows / nthreads;
for (int l = 0; l < nthreads; l++) {
final int firstRow = l * p;
final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
futures[l] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
float[] temp = new float[slices];
for (int r = firstRow; r < lastRow; r++) {
for (int c = 0; c < columns; c++) {
for (int s = 0; s < slices; s++) {
temp[s] = a[s][r][c];
}
dctSlices.inverse(temp, scale);
for (int s = 0; s < slices; s++) {
a[s][r][c] = temp[s];
}
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
} else {
for (int s = 0; s < slices; s++) {
for (int r = 0; r < rows; r++) {
dctColumns.inverse(a[s][r], scale);
}
}
float[] temp = new float[rows];
for (int s = 0; s < slices; s++) {
for (int c = 0; c < columns; c++) {
for (int r = 0; r < rows; r++) {
temp[r] = a[s][r][c];
}
dctRows.inverse(temp, scale);
for (int r = 0; r < rows; r++) {
a[s][r][c] = temp[r];
}
}
}
temp = new float[slices];
for (int r = 0; r < rows; r++) {
for (int c = 0; c < columns; c++) {
for (int s = 0; s < slices; s++) {
temp[s] = a[s][r][c];
}
dctSlices.inverse(temp, scale);
for (int s = 0; s < slices; s++) {
a[s][r][c] = temp[s];
}
}
}
}
}
}
private void ddxt3da_sub(int isgn, float[] a, boolean scale) {
int idx0, idx1, idx2;
if (isgn == -1) {
for (int s = 0; s < slices; s++) {
idx0 = s * sliceStride;
for (int r = 0; r < rows; r++) {
dctColumns.forward(a, idx0 + r * rowStride, scale);
}
if (columns > 2) {
for (int c = 0; c < columns; c += 4) {
for (int r = 0; r < rows; r++) {
idx1 = idx0 + r * rowStride + c;
idx2 = rows + r;
t[r] = a[idx1];
t[idx2] = a[idx1 + 1];
t[idx2 + rows] = a[idx1 + 2];
t[idx2 + 2 * rows] = a[idx1 + 3];
}
dctRows.forward(t, 0, scale);
dctRows.forward(t, rows, scale);
dctRows.forward(t, 2 * rows, scale);
dctRows.forward(t, 3 * rows, scale);
for (int r = 0; r < rows; r++) {
idx1 = idx0 + r * rowStride + c;
idx2 = rows + r;
a[idx1] = t[r];
a[idx1 + 1] = t[idx2];
a[idx1 + 2] = t[idx2 + rows];
a[idx1 + 3] = t[idx2 + 2 * rows];
}
}
} else if (columns == 2) {
for (int r = 0; r < rows; r++) {
idx1 = idx0 + r * rowStride;
t[r] = a[idx1];
t[rows + r] = a[idx1 + 1];
}
dctRows.forward(t, 0, scale);
dctRows.forward(t, rows, scale);
for (int r = 0; r < rows; r++) {
idx1 = idx0 + r * rowStride;
a[idx1] = t[r];
a[idx1 + 1] = t[rows + r];
}
}
}
} else {
for (int s = 0; s < slices; s++) {
idx0 = s * sliceStride;
for (int r = 0; r < rows; r++) {
dctColumns.inverse(a, idx0 + r * rowStride, scale);
}
if (columns > 2) {
for (int c = 0; c < columns; c += 4) {
for (int r = 0; r < rows; r++) {
idx1 = idx0 + r * rowStride + c;
idx2 = rows + r;
t[r] = a[idx1];
t[idx2] = a[idx1 + 1];
t[idx2 + rows] = a[idx1 + 2];
t[idx2 + 2 * rows] = a[idx1 + 3];
}
dctRows.inverse(t, 0, scale);
dctRows.inverse(t, rows, scale);
dctRows.inverse(t, 2 * rows, scale);
dctRows.inverse(t, 3 * rows, scale);
for (int r = 0; r < rows; r++) {
idx1 = idx0 + r * rowStride + c;
idx2 = rows + r;
a[idx1] = t[r];
a[idx1 + 1] = t[idx2];
a[idx1 + 2] = t[idx2 + rows];
a[idx1 + 3] = t[idx2 + 2 * rows];
}
}
} else if (columns == 2) {
for (int r = 0; r < rows; r++) {
idx1 = idx0 + r * rowStride;
t[r] = a[idx1];
t[rows + r] = a[idx1 + 1];
}
dctRows.inverse(t, 0, scale);
dctRows.inverse(t, rows, scale);
for (int r = 0; r < rows; r++) {
idx1 = idx0 + r * rowStride;
a[idx1] = t[r];
a[idx1 + 1] = t[rows + r];
}
}
}
}
}
private void ddxt3da_sub(int isgn, float[][][] a, boolean scale) {
int idx2;
if (isgn == -1) {
for (int s = 0; s < slices; s++) {
for (int r = 0; r < rows; r++) {
dctColumns.forward(a[s][r], scale);
}
if (columns > 2) {
for (int c = 0; c < columns; c += 4) {
for (int r = 0; r < rows; r++) {
idx2 = rows + r;
t[r] = a[s][r][c];
t[idx2] = a[s][r][c + 1];
t[idx2 + rows] = a[s][r][c + 2];
t[idx2 + 2 * rows] = a[s][r][c + 3];
}
dctRows.forward(t, 0, scale);
dctRows.forward(t, rows, scale);
dctRows.forward(t, 2 * rows, scale);
dctRows.forward(t, 3 * rows, scale);
for (int r = 0; r < rows; r++) {
idx2 = rows + r;
a[s][r][c] = t[r];
a[s][r][c + 1] = t[idx2];
a[s][r][c + 2] = t[idx2 + rows];
a[s][r][c + 3] = t[idx2 + 2 * rows];
}
}
} else if (columns == 2) {
for (int r = 0; r < rows; r++) {
t[r] = a[s][r][0];
t[rows + r] = a[s][r][1];
}
dctRows.forward(t, 0, scale);
dctRows.forward(t, rows, scale);
for (int r = 0; r < rows; r++) {
a[s][r][0] = t[r];
a[s][r][1] = t[rows + r];
}
}
}
} else {
for (int s = 0; s < slices; s++) {
for (int r = 0; r < rows; r++) {
dctColumns.inverse(a[s][r], scale);
}
if (columns > 2) {
for (int c = 0; c < columns; c += 4) {
for (int r = 0; r < rows; r++) {
idx2 = rows + r;
t[r] = a[s][r][c];
t[idx2] = a[s][r][c + 1];
t[idx2 + rows] = a[s][r][c + 2];
t[idx2 + 2 * rows] = a[s][r][c + 3];
}
dctRows.inverse(t, 0, scale);
dctRows.inverse(t, rows, scale);
dctRows.inverse(t, 2 * rows, scale);
dctRows.inverse(t, 3 * rows, scale);
for (int r = 0; r < rows; r++) {
idx2 = rows + r;
a[s][r][c] = t[r];
a[s][r][c + 1] = t[idx2];
a[s][r][c + 2] = t[idx2 + rows];
a[s][r][c + 3] = t[idx2 + 2 * rows];
}
}
} else if (columns == 2) {
for (int r = 0; r < rows; r++) {
t[r] = a[s][r][0];
t[rows + r] = a[s][r][1];
}
dctRows.inverse(t, 0, scale);
dctRows.inverse(t, rows, scale);
for (int r = 0; r < rows; r++) {
a[s][r][0] = t[r];
a[s][r][1] = t[rows + r];
}
}
}
}
}
private void ddxt3db_sub(int isgn, float[] a, boolean scale) {
int idx0, idx1, idx2;
if (isgn == -1) {
if (columns > 2) {
for (int r = 0; r < rows; r++) {
idx0 = r * rowStride;
for (int c = 0; c < columns; c += 4) {
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0 + c;
idx2 = slices + s;
t[s] = a[idx1];
t[idx2] = a[idx1 + 1];
t[idx2 + slices] = a[idx1 + 2];
t[idx2 + 2 * slices] = a[idx1 + 3];
}
dctSlices.forward(t, 0, scale);
dctSlices.forward(t, slices, scale);
dctSlices.forward(t, 2 * slices, scale);
dctSlices.forward(t, 3 * slices, scale);
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0 + c;
idx2 = slices + s;
a[idx1] = t[s];
a[idx1 + 1] = t[idx2];
a[idx1 + 2] = t[idx2 + slices];
a[idx1 + 3] = t[idx2 + 2 * slices];
}
}
}
} else if (columns == 2) {
for (int r = 0; r < rows; r++) {
idx0 = r * rowStride;
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0;
t[s] = a[idx1];
t[slices + s] = a[idx1 + 1];
}
dctSlices.forward(t, 0, scale);
dctSlices.forward(t, slices, scale);
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0;
a[idx1] = t[s];
a[idx1 + 1] = t[slices + s];
}
}
}
} else {
if (columns > 2) {
for (int r = 0; r < rows; r++) {
idx0 = r * rowStride;
for (int c = 0; c < columns; c += 4) {
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0 + c;
idx2 = slices + s;
t[s] = a[idx1];
t[idx2] = a[idx1 + 1];
t[idx2 + slices] = a[idx1 + 2];
t[idx2 + 2 * slices] = a[idx1 + 3];
}
dctSlices.inverse(t, 0, scale);
dctSlices.inverse(t, slices, scale);
dctSlices.inverse(t, 2 * slices, scale);
dctSlices.inverse(t, 3 * slices, scale);
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0 + c;
idx2 = slices + s;
a[idx1] = t[s];
a[idx1 + 1] = t[idx2];
a[idx1 + 2] = t[idx2 + slices];
a[idx1 + 3] = t[idx2 + 2 * slices];
}
}
}
} else if (columns == 2) {
for (int r = 0; r < rows; r++) {
idx0 = r * rowStride;
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0;
t[s] = a[idx1];
t[slices + s] = a[idx1 + 1];
}
dctSlices.inverse(t, 0, scale);
dctSlices.inverse(t, slices, scale);
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0;
a[idx1] = t[s];
a[idx1 + 1] = t[slices + s];
}
}
}
}
}
private void ddxt3db_sub(int isgn, float[][][] a, boolean scale) {
int idx2;
if (isgn == -1) {
if (columns > 2) {
for (int r = 0; r < rows; r++) {
for (int c = 0; c < columns; c += 4) {
for (int s = 0; s < slices; s++) {
idx2 = slices + s;
t[s] = a[s][r][c];
t[idx2] = a[s][r][c + 1];
t[idx2 + slices] = a[s][r][c + 2];
t[idx2 + 2 * slices] = a[s][r][c + 3];
}
dctSlices.forward(t, 0, scale);
dctSlices.forward(t, slices, scale);
dctSlices.forward(t, 2 * slices, scale);
dctSlices.forward(t, 3 * slices, scale);
for (int s = 0; s < slices; s++) {
idx2 = slices + s;
a[s][r][c] = t[s];
a[s][r][c + 1] = t[idx2];
a[s][r][c + 2] = t[idx2 + slices];
a[s][r][c + 3] = t[idx2 + 2 * slices];
}
}
}
} else if (columns == 2) {
for (int r = 0; r < rows; r++) {
for (int s = 0; s < slices; s++) {
t[s] = a[s][r][0];
t[slices + s] = a[s][r][1];
}
dctSlices.forward(t, 0, scale);
dctSlices.forward(t, slices, scale);
for (int s = 0; s < slices; s++) {
a[s][r][0] = t[s];
a[s][r][1] = t[slices + s];
}
}
}
} else {
if (columns > 2) {
for (int r = 0; r < rows; r++) {
for (int c = 0; c < columns; c += 4) {
for (int s = 0; s < slices; s++) {
idx2 = slices + s;
t[s] = a[s][r][c];
t[idx2] = a[s][r][c + 1];
t[idx2 + slices] = a[s][r][c + 2];
t[idx2 + 2 * slices] = a[s][r][c + 3];
}
dctSlices.inverse(t, 0, scale);
dctSlices.inverse(t, slices, scale);
dctSlices.inverse(t, 2 * slices, scale);
dctSlices.inverse(t, 3 * slices, scale);
for (int s = 0; s < slices; s++) {
idx2 = slices + s;
a[s][r][c] = t[s];
a[s][r][c + 1] = t[idx2];
a[s][r][c + 2] = t[idx2 + slices];
a[s][r][c + 3] = t[idx2 + 2 * slices];
}
}
}
} else if (columns == 2) {
for (int r = 0; r < rows; r++) {
for (int s = 0; s < slices; s++) {
t[s] = a[s][r][0];
t[slices + s] = a[s][r][1];
}
dctSlices.inverse(t, 0, scale);
dctSlices.inverse(t, slices, scale);
for (int s = 0; s < slices; s++) {
a[s][r][0] = t[s];
a[s][r][1] = t[slices + s];
}
}
}
}
}
private void ddxt3da_subth(final int isgn, final float[] a, final boolean scale) {
final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
int nt = 4 * rows;
if (columns == 2) {
nt >>= 1;
}
Future>[] futures = new Future[nthreads];
for (int i = 0; i < nthreads; i++) {
final int n0 = i;
final int startt = nt * i;
futures[i] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
int idx0, idx1, idx2;
if (isgn == -1) {
for (int s = n0; s < slices; s += nthreads) {
idx0 = s * sliceStride;
for (int r = 0; r < rows; r++) {
dctColumns.forward(a, idx0 + r * rowStride, scale);
}
if (columns > 2) {
for (int c = 0; c < columns; c += 4) {
for (int r = 0; r < rows; r++) {
idx1 = idx0 + r * rowStride + c;
idx2 = startt + rows + r;
t[startt + r] = a[idx1];
t[idx2] = a[idx1 + 1];
t[idx2 + rows] = a[idx1 + 2];
t[idx2 + 2 * rows] = a[idx1 + 3];
}
dctRows.forward(t, startt, scale);
dctRows.forward(t, startt + rows, scale);
dctRows.forward(t, startt + 2 * rows, scale);
dctRows.forward(t, startt + 3 * rows, scale);
for (int r = 0; r < rows; r++) {
idx1 = idx0 + r * rowStride + c;
idx2 = startt + rows + r;
a[idx1] = t[startt + r];
a[idx1 + 1] = t[idx2];
a[idx1 + 2] = t[idx2 + rows];
a[idx1 + 3] = t[idx2 + 2 * rows];
}
}
} else if (columns == 2) {
for (int r = 0; r < rows; r++) {
idx1 = idx0 + r * rowStride;
t[startt + r] = a[idx1];
t[startt + rows + r] = a[idx1 + 1];
}
dctRows.forward(t, startt, scale);
dctRows.forward(t, startt + rows, scale);
for (int r = 0; r < rows; r++) {
idx1 = idx0 + r * rowStride;
a[idx1] = t[startt + r];
a[idx1 + 1] = t[startt + rows + r];
}
}
}
} else {
for (int s = n0; s < slices; s += nthreads) {
idx0 = s * sliceStride;
for (int r = 0; r < rows; r++) {
dctColumns.inverse(a, idx0 + r * rowStride, scale);
}
if (columns > 2) {
for (int c = 0; c < columns; c += 4) {
for (int j = 0; j < rows; j++) {
idx1 = idx0 + j * rowStride + c;
idx2 = startt + rows + j;
t[startt + j] = a[idx1];
t[idx2] = a[idx1 + 1];
t[idx2 + rows] = a[idx1 + 2];
t[idx2 + 2 * rows] = a[idx1 + 3];
}
dctRows.inverse(t, startt, scale);
dctRows.inverse(t, startt + rows, scale);
dctRows.inverse(t, startt + 2 * rows, scale);
dctRows.inverse(t, startt + 3 * rows, scale);
for (int j = 0; j < rows; j++) {
idx1 = idx0 + j * rowStride + c;
idx2 = startt + rows + j;
a[idx1] = t[startt + j];
a[idx1 + 1] = t[idx2];
a[idx1 + 2] = t[idx2 + rows];
a[idx1 + 3] = t[idx2 + 2 * rows];
}
}
} else if (columns == 2) {
for (int r = 0; r < rows; r++) {
idx1 = idx0 + r * rowStride;
t[startt + r] = a[idx1];
t[startt + rows + r] = a[idx1 + 1];
}
dctRows.inverse(t, startt, scale);
dctRows.inverse(t, startt + rows, scale);
for (int r = 0; r < rows; r++) {
idx1 = idx0 + r * rowStride;
a[idx1] = t[startt + r];
a[idx1 + 1] = t[startt + rows + r];
}
}
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
}
private void ddxt3da_subth(final int isgn, final float[][][] a, final boolean scale) {
final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
int nt = 4 * rows;
if (columns == 2) {
nt >>= 1;
}
Future>[] futures = new Future[nthreads];
for (int i = 0; i < nthreads; i++) {
final int n0 = i;
final int startt = nt * i;
futures[i] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
int idx2;
if (isgn == -1) {
for (int s = n0; s < slices; s += nthreads) {
for (int r = 0; r < rows; r++) {
dctColumns.forward(a[s][r], scale);
}
if (columns > 2) {
for (int c = 0; c < columns; c += 4) {
for (int r = 0; r < rows; r++) {
idx2 = startt + rows + r;
t[startt + r] = a[s][r][c];
t[idx2] = a[s][r][c + 1];
t[idx2 + rows] = a[s][r][c + 2];
t[idx2 + 2 * rows] = a[s][r][c + 3];
}
dctRows.forward(t, startt, scale);
dctRows.forward(t, startt + rows, scale);
dctRows.forward(t, startt + 2 * rows, scale);
dctRows.forward(t, startt + 3 * rows, scale);
for (int r = 0; r < rows; r++) {
idx2 = startt + rows + r;
a[s][r][c] = t[startt + r];
a[s][r][c + 1] = t[idx2];
a[s][r][c + 2] = t[idx2 + rows];
a[s][r][c + 3] = t[idx2 + 2 * rows];
}
}
} else if (columns == 2) {
for (int r = 0; r < rows; r++) {
t[startt + r] = a[s][r][0];
t[startt + rows + r] = a[s][r][1];
}
dctRows.forward(t, startt, scale);
dctRows.forward(t, startt + rows, scale);
for (int r = 0; r < rows; r++) {
a[s][r][0] = t[startt + r];
a[s][r][1] = t[startt + rows + r];
}
}
}
} else {
for (int s = n0; s < slices; s += nthreads) {
for (int r = 0; r < rows; r++) {
dctColumns.inverse(a[s][r], scale);
}
if (columns > 2) {
for (int c = 0; c < columns; c += 4) {
for (int r = 0; r < rows; r++) {
idx2 = startt + rows + r;
t[startt + r] = a[s][r][c];
t[idx2] = a[s][r][c + 1];
t[idx2 + rows] = a[s][r][c + 2];
t[idx2 + 2 * rows] = a[s][r][c + 3];
}
dctRows.inverse(t, startt, scale);
dctRows.inverse(t, startt + rows, scale);
dctRows.inverse(t, startt + 2 * rows, scale);
dctRows.inverse(t, startt + 3 * rows, scale);
for (int r = 0; r < rows; r++) {
idx2 = startt + rows + r;
a[s][r][c] = t[startt + r];
a[s][r][c + 1] = t[idx2];
a[s][r][c + 2] = t[idx2 + rows];
a[s][r][c + 3] = t[idx2 + 2 * rows];
}
}
} else if (columns == 2) {
for (int r = 0; r < rows; r++) {
t[startt + r] = a[s][r][0];
t[startt + rows + r] = a[s][r][1];
}
dctRows.inverse(t, startt, scale);
dctRows.inverse(t, startt + rows, scale);
for (int r = 0; r < rows; r++) {
a[s][r][0] = t[startt + r];
a[s][r][1] = t[startt + rows + r];
}
}
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
}
private void ddxt3db_subth(final int isgn, final float[] a, final boolean scale) {
final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
int nt = 4 * slices;
if (columns == 2) {
nt >>= 1;
}
Future>[] futures = new Future[nthreads];
for (int i = 0; i < nthreads; i++) {
final int n0 = i;
final int startt = nt * i;
futures[i] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
int idx0, idx1, idx2;
if (isgn == -1) {
if (columns > 2) {
for (int r = n0; r < rows; r += nthreads) {
idx0 = r * rowStride;
for (int c = 0; c < columns; c += 4) {
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0 + c;
idx2 = startt + slices + s;
t[startt + s] = a[idx1];
t[idx2] = a[idx1 + 1];
t[idx2 + slices] = a[idx1 + 2];
t[idx2 + 2 * slices] = a[idx1 + 3];
}
dctSlices.forward(t, startt, scale);
dctSlices.forward(t, startt + slices, scale);
dctSlices.forward(t, startt + 2 * slices, scale);
dctSlices.forward(t, startt + 3 * slices, scale);
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0 + c;
idx2 = startt + slices + s;
a[idx1] = t[startt + s];
a[idx1 + 1] = t[idx2];
a[idx1 + 2] = t[idx2 + slices];
a[idx1 + 3] = t[idx2 + 2 * slices];
}
}
}
} else if (columns == 2) {
for (int r = n0; r < rows; r += nthreads) {
idx0 = r * rowStride;
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0;
t[startt + s] = a[idx1];
t[startt + slices + s] = a[idx1 + 1];
}
dctSlices.forward(t, startt, scale);
dctSlices.forward(t, startt + slices, scale);
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0;
a[idx1] = t[startt + s];
a[idx1 + 1] = t[startt + slices + s];
}
}
}
} else {
if (columns > 2) {
for (int r = n0; r < rows; r += nthreads) {
idx0 = r * rowStride;
for (int c = 0; c < columns; c += 4) {
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0 + c;
idx2 = startt + slices + s;
t[startt + s] = a[idx1];
t[idx2] = a[idx1 + 1];
t[idx2 + slices] = a[idx1 + 2];
t[idx2 + 2 * slices] = a[idx1 + 3];
}
dctSlices.inverse(t, startt, scale);
dctSlices.inverse(t, startt + slices, scale);
dctSlices.inverse(t, startt + 2 * slices, scale);
dctSlices.inverse(t, startt + 3 * slices, scale);
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0 + c;
idx2 = startt + slices + s;
a[idx1] = t[startt + s];
a[idx1 + 1] = t[idx2];
a[idx1 + 2] = t[idx2 + slices];
a[idx1 + 3] = t[idx2 + 2 * slices];
}
}
}
} else if (columns == 2) {
for (int r = n0; r < rows; r += nthreads) {
idx0 = r * rowStride;
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0;
t[startt + s] = a[idx1];
t[startt + slices + s] = a[idx1 + 1];
}
dctSlices.inverse(t, startt, scale);
dctSlices.inverse(t, startt + slices, scale);
for (int s = 0; s < slices; s++) {
idx1 = s * sliceStride + idx0;
a[idx1] = t[startt + s];
a[idx1 + 1] = t[startt + slices + s];
}
}
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
}
private void ddxt3db_subth(final int isgn, final float[][][] a, final boolean scale) {
final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
int nt = 4 * slices;
if (columns == 2) {
nt >>= 1;
}
Future>[] futures = new Future[nthreads];
for (int i = 0; i < nthreads; i++) {
final int n0 = i;
final int startt = nt * i;
futures[i] = ConcurrencyUtils.submit(new Runnable() {
public void run() {
int idx2;
if (isgn == -1) {
if (columns > 2) {
for (int r = n0; r < rows; r += nthreads) {
for (int c = 0; c < columns; c += 4) {
for (int s = 0; s < slices; s++) {
idx2 = startt + slices + s;
t[startt + s] = a[s][r][c];
t[idx2] = a[s][r][c + 1];
t[idx2 + slices] = a[s][r][c + 2];
t[idx2 + 2 * slices] = a[s][r][c + 3];
}
dctSlices.forward(t, startt, scale);
dctSlices.forward(t, startt + slices, scale);
dctSlices.forward(t, startt + 2 * slices, scale);
dctSlices.forward(t, startt + 3 * slices, scale);
for (int s = 0; s < slices; s++) {
idx2 = startt + slices + s;
a[s][r][c] = t[startt + s];
a[s][r][c + 1] = t[idx2];
a[s][r][c + 2] = t[idx2 + slices];
a[s][r][c + 3] = t[idx2 + 2 * slices];
}
}
}
} else if (columns == 2) {
for (int r = n0; r < rows; r += nthreads) {
for (int s = 0; s < slices; s++) {
t[startt + s] = a[s][r][0];
t[startt + slices + s] = a[s][r][1];
}
dctSlices.forward(t, startt, scale);
dctSlices.forward(t, startt + slices, scale);
for (int s = 0; s < slices; s++) {
a[s][r][0] = t[startt + s];
a[s][r][1] = t[startt + slices + s];
}
}
}
} else {
if (columns > 2) {
for (int r = n0; r < rows; r += nthreads) {
for (int c = 0; c < columns; c += 4) {
for (int s = 0; s < slices; s++) {
idx2 = startt + slices + s;
t[startt + s] = a[s][r][c];
t[idx2] = a[s][r][c + 1];
t[idx2 + slices] = a[s][r][c + 2];
t[idx2 + 2 * slices] = a[s][r][c + 3];
}
dctSlices.inverse(t, startt, scale);
dctSlices.inverse(t, startt + slices, scale);
dctSlices.inverse(t, startt + 2 * slices, scale);
dctSlices.inverse(t, startt + 3 * slices, scale);
for (int s = 0; s < slices; s++) {
idx2 = startt + slices + s;
a[s][r][c] = t[startt + s];
a[s][r][c + 1] = t[idx2];
a[s][r][c + 2] = t[idx2 + slices];
a[s][r][c + 3] = t[idx2 + 2 * slices];
}
}
}
} else if (columns == 2) {
for (int r = n0; r < rows; r += nthreads) {
for (int s = 0; s < slices; s++) {
t[startt + s] = a[s][r][0];
t[startt + slices + s] = a[s][r][1];
}
dctSlices.inverse(t, startt, scale);
dctSlices.inverse(t, startt + slices, scale);
for (int s = 0; s < slices; s++) {
a[s][r][0] = t[startt + s];
a[s][r][1] = t[startt + slices + s];
}
}
}
}
}
});
}
ConcurrencyUtils.waitForCompletion(futures);
}
}