DEADSOFTWARE

hopefully no more windows
[d2df-editor.git] / src / lib / vampimg / JpegLib / imjfdctint.pas
1 unit imjfdctint;
4 { This file contains a slow-but-accurate integer implementation of the
5 forward DCT (Discrete Cosine Transform).
7 A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
8 on each column. Direct algorithms are also available, but they are
9 much more complex and seem not to be any faster when reduced to code.
11 This implementation is based on an algorithm described in
12 C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
13 Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
14 Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
15 The primary algorithm described there uses 11 multiplies and 29 adds.
16 We use their alternate method with 12 multiplies and 32 adds.
17 The advantage of this method is that no data path contains more than one
18 multiplication; this allows a very simple and accurate implementation in
19 scaled fixed-point arithmetic, with a minimal number of shifts. }
21 { Original : jfdctint.c ; Copyright (C) 1991-1996, Thomas G. Lane. }
23 interface
25 {$I imjconfig.inc}
27 uses
28 imjmorecfg,
29 imjinclude,
30 imjutils,
31 imjpeglib,
32 imjdct; { Private declarations for DCT subsystem }
35 { Perform the forward DCT on one block of samples. }
37 {GLOBAL}
38 procedure jpeg_fdct_islow (var data : array of DCTELEM);
40 implementation
42 { This module is specialized to the case DCTSIZE = 8. }
44 {$ifndef DCTSIZE_IS_8}
45 Sorry, this code only copes with 8x8 DCTs. { deliberate syntax err }
46 {$endif}
49 { The poop on this scaling stuff is as follows:
51 Each 1-D DCT step produces outputs which are a factor of sqrt(N)
52 larger than the true DCT outputs. The final outputs are therefore
53 a factor of N larger than desired; since N=8 this can be cured by
54 a simple right shift at the end of the algorithm. The advantage of
55 this arrangement is that we save two multiplications per 1-D DCT,
56 because the y0 and y4 outputs need not be divided by sqrt(N).
57 In the IJG code, this factor of 8 is removed by the quantization step
58 (in jcdctmgr.c), NOT in this module.
60 We have to do addition and subtraction of the integer inputs, which
61 is no problem, and multiplication by fractional constants, which is
62 a problem to do in integer arithmetic. We multiply all the constants
63 by CONST_SCALE and convert them to integer constants (thus retaining
64 CONST_BITS bits of precision in the constants). After doing a
65 multiplication we have to divide the product by CONST_SCALE, with proper
66 rounding, to produce the correct output. This division can be done
67 cheaply as a right shift of CONST_BITS bits. We postpone shifting
68 as long as possible so that partial sums can be added together with
69 full fractional precision.
71 The outputs of the first pass are scaled up by PASS1_BITS bits so that
72 they are represented to better-than-integral precision. These outputs
73 require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
74 with the recommended scaling. (For 12-bit sample data, the intermediate
75 array is INT32 anyway.)
77 To avoid overflow of the 32-bit intermediate results in pass 2, we must
78 have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
79 shows that the values given below are the most effective. }
81 {$ifdef BITS_IN_JSAMPLE_IS_8}
82 const
83 CONST_BITS = 13;
84 PASS1_BITS = 2;
85 {$else}
86 const
87 CONST_BITS = 13;
88 PASS1_BITS = 1; { lose a little precision to avoid overflow }
89 {$endif}
91 const
92 CONST_SCALE = (INT32(1) shl CONST_BITS);
94 const
95 FIX_0_298631336 = INT32(Round(CONST_SCALE * 0.298631336)); {2446}
96 FIX_0_390180644 = INT32(Round(CONST_SCALE * 0.390180644)); {3196}
97 FIX_0_541196100 = INT32(Round(CONST_SCALE * 0.541196100)); {4433}
98 FIX_0_765366865 = INT32(Round(CONST_SCALE * 0.765366865)); {6270}
99 FIX_0_899976223 = INT32(Round(CONST_SCALE * 0.899976223)); {7373}
100 FIX_1_175875602 = INT32(Round(CONST_SCALE * 1.175875602)); {9633}
101 FIX_1_501321110 = INT32(Round(CONST_SCALE * 1.501321110)); {12299}
102 FIX_1_847759065 = INT32(Round(CONST_SCALE * 1.847759065)); {15137}
103 FIX_1_961570560 = INT32(Round(CONST_SCALE * 1.961570560)); {16069}
104 FIX_2_053119869 = INT32(Round(CONST_SCALE * 2.053119869)); {16819}
105 FIX_2_562915447 = INT32(Round(CONST_SCALE * 2.562915447)); {20995}
106 FIX_3_072711026 = INT32(Round(CONST_SCALE * 3.072711026)); {25172}
109 { Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
110 For 8-bit samples with the recommended scaling, all the variable
111 and constant values involved are no more than 16 bits wide, so a
112 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
113 For 12-bit samples, a full 32-bit multiplication will be needed. }
115 {$ifdef BITS_IN_JSAMPLE_IS_8}
117 {MULTIPLY16C16(var,const)}
118 function Multiply(X, Y: int): INT32;
119 begin
120 Multiply := int(X) * INT32(Y);
121 end;
123 {$else}
124 function Multiply(X, Y: INT32): INT32;
125 begin
126 Multiply := X * Y;
127 end;
128 {$endif}
130 { Descale and correctly round an INT32 value that's scaled by N bits.
131 We assume RIGHT_SHIFT rounds towards minus infinity, so adding
132 the fudge factor is correct for either sign of X. }
134 function DESCALE(x : INT32; n : int) : INT32;
135 var
136 shift_temp : INT32;
137 begin
138 {$ifdef RIGHT_SHIFT_IS_UNSIGNED}
139 shift_temp := x + (INT32(1) shl (n-1));
140 if shift_temp < 0 then
141 Descale := (shift_temp shr n) or ((not INT32(0)) shl (32-n))
142 else
143 Descale := (shift_temp shr n);
144 {$else}
145 Descale := (x + (INT32(1) shl (n-1)) shr n;
146 {$endif}
147 end;
150 { Perform the forward DCT on one block of samples. }
152 {GLOBAL}
153 procedure jpeg_fdct_islow (var data : array of DCTELEM);
154 type
155 PWorkspace = ^TWorkspace;
156 TWorkspace = array [0..DCTSIZE2-1] of DCTELEM;
157 var
158 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7 : INT32;
159 tmp10, tmp11, tmp12, tmp13 : INT32;
160 z1, z2, z3, z4, z5 : INT32;
161 dataptr : PWorkspace;
162 ctr : int;
163 {SHIFT_TEMPS}
164 begin
166 { Pass 1: process rows. }
167 { Note results are scaled up by sqrt(8) compared to a true DCT; }
168 { furthermore, we scale the results by 2**PASS1_BITS. }
170 dataptr := PWorkspace(@data);
171 for ctr := DCTSIZE-1 downto 0 do
172 begin
173 tmp0 := dataptr^[0] + dataptr^[7];
174 tmp7 := dataptr^[0] - dataptr^[7];
175 tmp1 := dataptr^[1] + dataptr^[6];
176 tmp6 := dataptr^[1] - dataptr^[6];
177 tmp2 := dataptr^[2] + dataptr^[5];
178 tmp5 := dataptr^[2] - dataptr^[5];
179 tmp3 := dataptr^[3] + dataptr^[4];
180 tmp4 := dataptr^[3] - dataptr^[4];
182 { Even part per LL&M figure 1 --- note that published figure is faulty;
183 rotator "sqrt(2)*c1" should be "sqrt(2)*c6". }
185 tmp10 := tmp0 + tmp3;
186 tmp13 := tmp0 - tmp3;
187 tmp11 := tmp1 + tmp2;
188 tmp12 := tmp1 - tmp2;
190 dataptr^[0] := DCTELEM ((tmp10 + tmp11) shl PASS1_BITS);
191 dataptr^[4] := DCTELEM ((tmp10 - tmp11) shl PASS1_BITS);
193 z1 := MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
194 dataptr^[2] := DCTELEM (DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
195 CONST_BITS-PASS1_BITS));
196 dataptr^[6] := DCTELEM (DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
197 CONST_BITS-PASS1_BITS));
199 { Odd part per figure 8 --- note paper omits factor of sqrt(2).
200 cK represents cos(K*pi/16).
201 i0..i3 in the paper are tmp4..tmp7 here. }
203 z1 := tmp4 + tmp7;
204 z2 := tmp5 + tmp6;
205 z3 := tmp4 + tmp6;
206 z4 := tmp5 + tmp7;
207 z5 := MULTIPLY(z3 + z4, FIX_1_175875602); { sqrt(2) * c3 }
209 tmp4 := MULTIPLY(tmp4, FIX_0_298631336); { sqrt(2) * (-c1+c3+c5-c7) }
210 tmp5 := MULTIPLY(tmp5, FIX_2_053119869); { sqrt(2) * ( c1+c3-c5+c7) }
211 tmp6 := MULTIPLY(tmp6, FIX_3_072711026); { sqrt(2) * ( c1+c3+c5-c7) }
212 tmp7 := MULTIPLY(tmp7, FIX_1_501321110); { sqrt(2) * ( c1+c3-c5-c7) }
213 z1 := MULTIPLY(z1, - FIX_0_899976223); { sqrt(2) * (c7-c3) }
214 z2 := MULTIPLY(z2, - FIX_2_562915447); { sqrt(2) * (-c1-c3) }
215 z3 := MULTIPLY(z3, - FIX_1_961570560); { sqrt(2) * (-c3-c5) }
216 z4 := MULTIPLY(z4, - FIX_0_390180644); { sqrt(2) * (c5-c3) }
218 Inc(z3, z5);
219 Inc(z4, z5);
221 dataptr^[7] := DCTELEM(DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS));
222 dataptr^[5] := DCTELEM(DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS));
223 dataptr^[3] := DCTELEM(DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS));
224 dataptr^[1] := DCTELEM(DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS));
226 Inc(DCTELEMPTR(dataptr), DCTSIZE); { advance pointer to next row }
227 end;
229 { Pass 2: process columns.
230 We remove the PASS1_BITS scaling, but leave the results scaled up
231 by an overall factor of 8. }
233 dataptr := PWorkspace(@data);
234 for ctr := DCTSIZE-1 downto 0 do
235 begin
236 tmp0 := dataptr^[DCTSIZE*0] + dataptr^[DCTSIZE*7];
237 tmp7 := dataptr^[DCTSIZE*0] - dataptr^[DCTSIZE*7];
238 tmp1 := dataptr^[DCTSIZE*1] + dataptr^[DCTSIZE*6];
239 tmp6 := dataptr^[DCTSIZE*1] - dataptr^[DCTSIZE*6];
240 tmp2 := dataptr^[DCTSIZE*2] + dataptr^[DCTSIZE*5];
241 tmp5 := dataptr^[DCTSIZE*2] - dataptr^[DCTSIZE*5];
242 tmp3 := dataptr^[DCTSIZE*3] + dataptr^[DCTSIZE*4];
243 tmp4 := dataptr^[DCTSIZE*3] - dataptr^[DCTSIZE*4];
245 { Even part per LL&M figure 1 --- note that published figure is faulty;
246 rotator "sqrt(2)*c1" should be "sqrt(2)*c6". }
248 tmp10 := tmp0 + tmp3;
249 tmp13 := tmp0 - tmp3;
250 tmp11 := tmp1 + tmp2;
251 tmp12 := tmp1 - tmp2;
253 dataptr^[DCTSIZE*0] := DCTELEM (DESCALE(tmp10 + tmp11, PASS1_BITS));
254 dataptr^[DCTSIZE*4] := DCTELEM (DESCALE(tmp10 - tmp11, PASS1_BITS));
256 z1 := MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
257 dataptr^[DCTSIZE*2] := DCTELEM (DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
258 CONST_BITS+PASS1_BITS));
259 dataptr^[DCTSIZE*6] := DCTELEM (DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
260 CONST_BITS+PASS1_BITS));
262 { Odd part per figure 8 --- note paper omits factor of sqrt(2).
263 cK represents cos(K*pi/16).
264 i0..i3 in the paper are tmp4..tmp7 here. }
266 z1 := tmp4 + tmp7;
267 z2 := tmp5 + tmp6;
268 z3 := tmp4 + tmp6;
269 z4 := tmp5 + tmp7;
270 z5 := MULTIPLY(z3 + z4, FIX_1_175875602); { sqrt(2) * c3 }
272 tmp4 := MULTIPLY(tmp4, FIX_0_298631336); { sqrt(2) * (-c1+c3+c5-c7) }
273 tmp5 := MULTIPLY(tmp5, FIX_2_053119869); { sqrt(2) * ( c1+c3-c5+c7) }
274 tmp6 := MULTIPLY(tmp6, FIX_3_072711026); { sqrt(2) * ( c1+c3+c5-c7) }
275 tmp7 := MULTIPLY(tmp7, FIX_1_501321110); { sqrt(2) * ( c1+c3-c5-c7) }
276 z1 := MULTIPLY(z1, - FIX_0_899976223); { sqrt(2) * (c7-c3) }
277 z2 := MULTIPLY(z2, - FIX_2_562915447); { sqrt(2) * (-c1-c3) }
278 z3 := MULTIPLY(z3, - FIX_1_961570560); { sqrt(2) * (-c3-c5) }
279 z4 := MULTIPLY(z4, - FIX_0_390180644); { sqrt(2) * (c5-c3) }
281 Inc(z3, z5);
282 Inc(z4, z5);
284 dataptr^[DCTSIZE*7] := DCTELEM (DESCALE(tmp4 + z1 + z3,
285 CONST_BITS+PASS1_BITS));
286 dataptr^[DCTSIZE*5] := DCTELEM (DESCALE(tmp5 + z2 + z4,
287 CONST_BITS+PASS1_BITS));
288 dataptr^[DCTSIZE*3] := DCTELEM (DESCALE(tmp6 + z2 + z3,
289 CONST_BITS+PASS1_BITS));
290 dataptr^[DCTSIZE*1] := DCTELEM (DESCALE(tmp7 + z1 + z4,
291 CONST_BITS+PASS1_BITS));
293 Inc(DCTELEMPTR(dataptr)); { advance pointer to next column }
294 end;
295 end;
297 end.