1 Unit JFDctFst;
2
3 { This file contains a fast, not so accurate integer implementation of the
4 forward DCT (Discrete Cosine Transform).
5
6 A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
7 on each column. Direct algorithms are also available, but they are
8 much more complex and seem not to be any faster when reduced to code.
9
10 This implementation is based on Arai, Agui, and Nakajima's algorithm for
11 scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
12 Japanese, but the algorithm is described in the Pennebaker & Mitchell
13 JPEG textbook (see REFERENCES section in file README). The following code
14 is based directly on figure 4-8 in P&M.
15 While an 8-point DCT cannot be done in less than 11 multiplies, it is
16 possible to arrange the computation so that many of the multiplies are
17 simple scalings of the final outputs. These multiplies can then be
18 folded into the multiplications or divisions by the JPEG quantization
19 table entries. The AA&N method leaves only 5 multiplies and 29 adds
20 to be done in the DCT itself.
21 The primary disadvantage of this method is that with fixed-point math,
22 accuracy is lost due to imprecise representation of the scaled
23 quantization values. The smaller the quantization table entry, the less
24 precise the scaled value, so this implementation does worse with high-
25 quality-setting files than with low-quality ones. }
26
27 { Original: jfdctfst.c ; Copyright (C) 1994-1996, Thomas G. Lane. }
28
29
30 interface
31
32 {$I jconfig.inc}
33
34 uses
35 jmorecfg,
36 jinclude,
37 jpeglib,
38 jdct; { Private declarations for DCT subsystem }
39
40
41 { Perform the forward DCT on one block of samples. }
42
43 {GLOBAL}
44 procedure jpeg_fdct_ifast (var data : array of DCTELEM);
45
46 implementation
47
48 { This module is specialized to the case DCTSIZE = 8. }
49
50 {$ifndef DCTSIZE_IS_8}
51 Sorry, this code only copes with 8x8 DCTs. { deliberate syntax err }
52 {$endif}
53
54
55 { Scaling decisions are generally the same as in the LL&M algorithm;
56 see jfdctint.c for more details. However, we choose to descale
57 (right shift) multiplication products as soon as they are formed,
58 rather than carrying additional fractional bits into subsequent additions.
59 This compromises accuracy slightly, but it lets us save a few shifts.
60 More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
61 everywhere except in the multiplications proper; this saves a good deal
62 of work on 16-bit-int machines.
63
64 Again to save a few shifts, the intermediate results between pass 1 and
65 pass 2 are not upscaled, but are represented only to integral precision.
66
67 A final compromise is to represent the multiplicative constants to only
68 8 fractional bits, rather than 13. This saves some shifting work on some
69 machines, and may also reduce the cost of multiplication (since there
70 are fewer one-bits in the constants). }
71
72 const
73 CONST_BITS = 8;
74 const
75 CONST_SCALE = (INT32(1) shl CONST_BITS);
76
77
78 const
79 FIX_0_382683433 = INT32(Round(CONST_SCALE * 0.382683433)); {98}
80 FIX_0_541196100 = INT32(Round(CONST_SCALE * 0.541196100)); {139}
81 FIX_0_707106781 = INT32(Round(CONST_SCALE * 0.707106781)); {181}
82 FIX_1_306562965 = INT32(Round(CONST_SCALE * 1.306562965)); {334}
83
84 { Descale and correctly round an INT32 value that's scaled by N bits.
85 We assume RIGHT_SHIFT rounds towards minus infinity, so adding
86 the fudge factor is correct for either sign of X. }
87
DESCALEnull88 function DESCALE(x : INT32; n : int) : INT32;
89 var
90 shift_temp : INT32;
91 begin
92 { We can gain a little more speed, with a further compromise in accuracy,
93 by omitting the addition in a descaling shift. This yields an incorrectly
94 rounded result half the time... }
95 {$ifndef USE_ACCURATE_ROUNDING}
96 shift_temp := x;
97 {$else}
98 shift_temp := x + (INT32(1) shl (n-1));
99 {$endif}
100
101 {$ifdef RIGHT_SHIFT_IS_UNSIGNED}
102 if shift_temp < 0 then
103 Descale := (shift_temp shr n) or ((not INT32(0)) shl (32-n))
104 else
105 {$endif}
106 Descale := (shift_temp shr n);
107 end;
108
109 { Multiply a DCTELEM variable by an INT32 constant, and immediately
110 descale to yield a DCTELEM result. }
111
112
MULTIPLYnull113 function MULTIPLY(X : DCTELEM; Y: INT32): DCTELEM;
114 begin
115 Multiply := DeScale((X) * (Y), CONST_BITS);
116 end;
117
118
119 { Perform the forward DCT on one block of samples. }
120
121 {GLOBAL}
122 procedure jpeg_fdct_ifast (var data : array of DCTELEM);
123 type
124 PWorkspace = ^TWorkspace;
125 TWorkspace = array [0..DCTSIZE2-1] of DCTELEM;
126 var
127 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7 : DCTELEM;
128 tmp10, tmp11, tmp12, tmp13 : DCTELEM;
129 z1, z2, z3, z4, z5, z11, z13 : DCTELEM;
130 dataptr : PWorkspace;
131 ctr : int;
132 {SHIFT_TEMPS}
133 begin
134 { Pass 1: process rows. }
135
136 dataptr := PWorkspace(@data);
137 for ctr := DCTSIZE-1 downto 0 do
138 begin
139 tmp0 := dataptr^[0] + dataptr^[7];
140 tmp7 := dataptr^[0] - dataptr^[7];
141 tmp1 := dataptr^[1] + dataptr^[6];
142 tmp6 := dataptr^[1] - dataptr^[6];
143 tmp2 := dataptr^[2] + dataptr^[5];
144 tmp5 := dataptr^[2] - dataptr^[5];
145 tmp3 := dataptr^[3] + dataptr^[4];
146 tmp4 := dataptr^[3] - dataptr^[4];
147
148 { Even part }
149
150 tmp10 := tmp0 + tmp3; { phase 2 }
151 tmp13 := tmp0 - tmp3;
152 tmp11 := tmp1 + tmp2;
153 tmp12 := tmp1 - tmp2;
154
155 dataptr^[0] := tmp10 + tmp11; { phase 3 }
156 dataptr^[4] := tmp10 - tmp11;
157
158 z1 := MULTIPLY(tmp12 + tmp13, FIX_0_707106781); { c4 }
159 dataptr^[2] := tmp13 + z1; { phase 5 }
160 dataptr^[6] := tmp13 - z1;
161
162 { Odd part }
163
164 tmp10 := tmp4 + tmp5; { phase 2 }
165 tmp11 := tmp5 + tmp6;
166 tmp12 := tmp6 + tmp7;
167
168 { The rotator is modified from fig 4-8 to avoid extra negations. }
169 z5 := MULTIPLY(tmp10 - tmp12, FIX_0_382683433); { c6 }
170 z2 := MULTIPLY(tmp10, FIX_0_541196100) + z5; { c2-c6 }
171 z4 := MULTIPLY(tmp12, FIX_1_306562965) + z5; { c2+c6 }
172 z3 := MULTIPLY(tmp11, FIX_0_707106781); { c4 }
173
174 z11 := tmp7 + z3; { phase 5 }
175 z13 := tmp7 - z3;
176
177 dataptr^[5] := z13 + z2; { phase 6 }
178 dataptr^[3] := z13 - z2;
179 dataptr^[1] := z11 + z4;
180 dataptr^[7] := z11 - z4;
181
182 Inc(DCTELEMPTR(dataptr), DCTSIZE); { advance pointer to next row }
183 end;
184
185 { Pass 2: process columns. }
186
187 dataptr := PWorkspace(@data);
188 for ctr := DCTSIZE-1 downto 0 do
189 begin
190 tmp0 := dataptr^[DCTSIZE*0] + dataptr^[DCTSIZE*7];
191 tmp7 := dataptr^[DCTSIZE*0] - dataptr^[DCTSIZE*7];
192 tmp1 := dataptr^[DCTSIZE*1] + dataptr^[DCTSIZE*6];
193 tmp6 := dataptr^[DCTSIZE*1] - dataptr^[DCTSIZE*6];
194 tmp2 := dataptr^[DCTSIZE*2] + dataptr^[DCTSIZE*5];
195 tmp5 := dataptr^[DCTSIZE*2] - dataptr^[DCTSIZE*5];
196 tmp3 := dataptr^[DCTSIZE*3] + dataptr^[DCTSIZE*4];
197 tmp4 := dataptr^[DCTSIZE*3] - dataptr^[DCTSIZE*4];
198
199 { Even part }
200
201 tmp10 := tmp0 + tmp3; { phase 2 }
202 tmp13 := tmp0 - tmp3;
203 tmp11 := tmp1 + tmp2;
204 tmp12 := tmp1 - tmp2;
205
206 dataptr^[DCTSIZE*0] := tmp10 + tmp11; { phase 3 }
207 dataptr^[DCTSIZE*4] := tmp10 - tmp11;
208
209 z1 := MULTIPLY(tmp12 + tmp13, FIX_0_707106781); { c4 }
210 dataptr^[DCTSIZE*2] := tmp13 + z1; { phase 5 }
211 dataptr^[DCTSIZE*6] := tmp13 - z1;
212
213 { Odd part }
214
215 tmp10 := tmp4 + tmp5; { phase 2 }
216 tmp11 := tmp5 + tmp6;
217 tmp12 := tmp6 + tmp7;
218
219 { The rotator is modified from fig 4-8 to avoid extra negations. }
220 z5 := MULTIPLY(tmp10 - tmp12, FIX_0_382683433); { c6 }
221 z2 := MULTIPLY(tmp10, FIX_0_541196100) + z5; { c2-c6 }
222 z4 := MULTIPLY(tmp12, FIX_1_306562965) + z5; { c2+c6 }
223 z3 := MULTIPLY(tmp11, FIX_0_707106781); { c4 }
224
225 z11 := tmp7 + z3; { phase 5 }
226 z13 := tmp7 - z3;
227
228 dataptr^[DCTSIZE*5] := z13 + z2; { phase 6 }
229 dataptr^[DCTSIZE*3] := z13 - z2;
230 dataptr^[DCTSIZE*1] := z11 + z4;
231 dataptr^[DCTSIZE*7] := z11 - z4;
232
233 Inc(DCTELEMPTR(dataptr)); { advance pointer to next column }
234 end;
235 end;
236
237 end.
238