src/lib/vampimg/JpegLib/imjfdctint.pas

   1 unit imjfdctint;
   2
   3
   4 { This file contains a slow-but-accurate integer implementation of the
   5   forward DCT (Discrete Cosine Transform).
   6
   7   A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
   8   on each column.  Direct algorithms are also available, but they are
   9   much more complex and seem not to be any faster when reduced to code.
  10
  11   This implementation is based on an algorithm described in
  12     C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
  13     Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
  14     Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
  15   The primary algorithm described there uses 11 multiplies and 29 adds.
  16   We use their alternate method with 12 multiplies and 32 adds.
  17   The advantage of this method is that no data path contains more than one
  18   multiplication; this allows a very simple and accurate implementation in
  19   scaled fixed-point arithmetic, with a minimal number of shifts. }
  20
  21 { Original : jfdctint.c ; Copyright (C) 1991-1996, Thomas G. Lane. }
  22
  23 interface
  24
  25 {$I imjconfig.inc}
  26
  27 uses
  28   imjmorecfg,
  29   imjinclude,
  30   imjutils,
  31   imjpeglib,
  32   imjdct;   { Private declarations for DCT subsystem }
  33
  34
  35 { Perform the forward DCT on one block of samples. }
  36
  37 {GLOBAL}
  38 procedure jpeg_fdct_islow (var data : array of DCTELEM);
  39
  40 implementation
  41
  42 { This module is specialized to the case DCTSIZE = 8. }
  43
  44 {$ifndef DCTSIZE_IS_8}
  45   Sorry, this code only copes with 8x8 DCTs. { deliberate syntax err }
  46 {$endif}
  47
  48
  49 { The poop on this scaling stuff is as follows:
  50
  51   Each 1-D DCT step produces outputs which are a factor of sqrt(N)
  52   larger than the true DCT outputs.  The final outputs are therefore
  53   a factor of N larger than desired; since N=8 this can be cured by
  54   a simple right shift at the end of the algorithm.  The advantage of
  55   this arrangement is that we save two multiplications per 1-D DCT,
  56   because the y0 and y4 outputs need not be divided by sqrt(N).
  57   In the IJG code, this factor of 8 is removed by the quantization step
  58   (in jcdctmgr.c), NOT in this module.
  59
  60   We have to do addition and subtraction of the integer inputs, which
  61   is no problem, and multiplication by fractional constants, which is
  62   a problem to do in integer arithmetic.  We multiply all the constants
  63   by CONST_SCALE and convert them to integer constants (thus retaining
  64   CONST_BITS bits of precision in the constants).  After doing a
  65   multiplication we have to divide the product by CONST_SCALE, with proper
  66   rounding, to produce the correct output.  This division can be done
  67   cheaply as a right shift of CONST_BITS bits.  We postpone shifting
  68   as long as possible so that partial sums can be added together with
  69   full fractional precision.
  70
  71   The outputs of the first pass are scaled up by PASS1_BITS bits so that
  72   they are represented to better-than-integral precision.  These outputs
  73   require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
  74   with the recommended scaling.  (For 12-bit sample data, the intermediate
  75   array is INT32 anyway.)
  76
  77   To avoid overflow of the 32-bit intermediate results in pass 2, we must
  78   have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
  79   shows that the values given below are the most effective. }
  80
  81 {$ifdef BITS_IN_JSAMPLE_IS_8}
  82 const
  83   CONST_BITS = 13;
  84   PASS1_BITS = 2;
  85 {$else}
  86 const
  87   CONST_BITS = 13;
  88   PASS1_BITS = 1; { lose a little precision to avoid overflow }
  89 {$endif}
  90
  91 const
  92   CONST_SCALE = (INT32(1) shl CONST_BITS);
  93
  94 const
  95   FIX_0_298631336 = INT32(Round(CONST_SCALE * 0.298631336));  {2446}
  96   FIX_0_390180644 = INT32(Round(CONST_SCALE * 0.390180644));  {3196}
  97   FIX_0_541196100 = INT32(Round(CONST_SCALE * 0.541196100));  {4433}
  98   FIX_0_765366865 = INT32(Round(CONST_SCALE * 0.765366865));  {6270}
  99   FIX_0_899976223 = INT32(Round(CONST_SCALE * 0.899976223));  {7373}
 100   FIX_1_175875602 = INT32(Round(CONST_SCALE * 1.175875602));  {9633}
 101   FIX_1_501321110 = INT32(Round(CONST_SCALE * 1.501321110));  {12299}
 102   FIX_1_847759065 = INT32(Round(CONST_SCALE * 1.847759065));  {15137}
 103   FIX_1_961570560 = INT32(Round(CONST_SCALE * 1.961570560));  {16069}
 104   FIX_2_053119869 = INT32(Round(CONST_SCALE * 2.053119869));  {16819}
 105   FIX_2_562915447 = INT32(Round(CONST_SCALE * 2.562915447));  {20995}
 106   FIX_3_072711026 = INT32(Round(CONST_SCALE * 3.072711026));  {25172}
 107
 108
 109 { Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
 110   For 8-bit samples with the recommended scaling, all the variable
 111   and constant values involved are no more than 16 bits wide, so a
 112   16x16->32 bit multiply can be used instead of a full 32x32 multiply.
 113   For 12-bit samples, a full 32-bit multiplication will be needed. }
 114
 115 {$ifdef BITS_IN_JSAMPLE_IS_8}
 116
 117    {MULTIPLY16C16(var,const)}
 118    function Multiply(X, Y: int): INT32;
 119    begin
 120      Multiply := int(X) * INT32(Y);
 121    end;
 122
 123 {$else}
 124    function Multiply(X, Y: INT32): INT32;
 125    begin
 126      Multiply := X * Y;
 127    end;
 128 {$endif}
 129
 130 { Descale and correctly round an INT32 value that's scaled by N bits.
 131   We assume RIGHT_SHIFT rounds towards minus infinity, so adding
 132   the fudge factor is correct for either sign of X. }
 133
 134 function DESCALE(x : INT32; n : int) : INT32;
 135 var
 136   shift_temp : INT32;
 137 begin
 138 {$ifdef RIGHT_SHIFT_IS_UNSIGNED}
 139   shift_temp := x + (INT32(1) shl (n-1));
 140   if shift_temp < 0 then
 141     Descale :=  (shift_temp shr n) or ((not INT32(0)) shl (32-n))
 142   else
 143     Descale :=  (shift_temp shr n);
 144 {$else}
 145   Descale := (x + (INT32(1) shl (n-1)) shr n;
 146 {$endif}
 147 end;
 148
 149
 150 { Perform the forward DCT on one block of samples. }
 151
 152 {GLOBAL}
 153 procedure jpeg_fdct_islow (var data : array of DCTELEM);
 154 type
 155   PWorkspace = ^TWorkspace;
 156   TWorkspace = array [0..DCTSIZE2-1] of DCTELEM;
 157 var
 158   tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7 : INT32;
 159   tmp10, tmp11, tmp12, tmp13 : INT32;
 160   z1, z2, z3, z4, z5 : INT32;
 161   dataptr : PWorkspace;
 162   ctr : int;
 163   {SHIFT_TEMPS}
 164 begin
 165
 166   { Pass 1: process rows. }
 167   { Note results are scaled up by sqrt(8) compared to a true DCT; }
 168   { furthermore, we scale the results by 2**PASS1_BITS. }
 169
 170   dataptr := PWorkspace(@data);
 171   for ctr := DCTSIZE-1 downto 0 do
 172   begin
 173     tmp0 := dataptr^[0] + dataptr^[7];
 174     tmp7 := dataptr^[0] - dataptr^[7];
 175     tmp1 := dataptr^[1] + dataptr^[6];
 176     tmp6 := dataptr^[1] - dataptr^[6];
 177     tmp2 := dataptr^[2] + dataptr^[5];
 178     tmp5 := dataptr^[2] - dataptr^[5];
 179     tmp3 := dataptr^[3] + dataptr^[4];
 180     tmp4 := dataptr^[3] - dataptr^[4];
 181
 182     { Even part per LL&M figure 1 --- note that published figure is faulty;
 183       rotator "sqrt(2)*c1" should be "sqrt(2)*c6".  }
 184
 185     tmp10 := tmp0 + tmp3;
 186     tmp13 := tmp0 - tmp3;
 187     tmp11 := tmp1 + tmp2;
 188     tmp12 := tmp1 - tmp2;
 189
 190     dataptr^[0] := DCTELEM ((tmp10 + tmp11) shl PASS1_BITS);
 191     dataptr^[4] := DCTELEM ((tmp10 - tmp11) shl PASS1_BITS);
 192
 193     z1 := MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
 194     dataptr^[2] := DCTELEM (DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
 195            CONST_BITS-PASS1_BITS));
 196     dataptr^[6] := DCTELEM (DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
 197            CONST_BITS-PASS1_BITS));
 198
 199     { Odd part per figure 8 --- note paper omits factor of sqrt(2).
 200       cK represents cos(K*pi/16).
 201       i0..i3 in the paper are tmp4..tmp7 here. }
 202
 203     z1 := tmp4 + tmp7;
 204     z2 := tmp5 + tmp6;
 205     z3 := tmp4 + tmp6;
 206     z4 := tmp5 + tmp7;
 207     z5 := MULTIPLY(z3 + z4, FIX_1_175875602); { sqrt(2) * c3 }
 208
 209     tmp4 := MULTIPLY(tmp4, FIX_0_298631336); { sqrt(2) * (-c1+c3+c5-c7) }
 210     tmp5 := MULTIPLY(tmp5, FIX_2_053119869); { sqrt(2) * ( c1+c3-c5+c7) }
 211     tmp6 := MULTIPLY(tmp6, FIX_3_072711026); { sqrt(2) * ( c1+c3+c5-c7) }
 212     tmp7 := MULTIPLY(tmp7, FIX_1_501321110); { sqrt(2) * ( c1+c3-c5-c7) }
 213     z1 := MULTIPLY(z1, - FIX_0_899976223); { sqrt(2) * (c7-c3) }
 214     z2 := MULTIPLY(z2, - FIX_2_562915447); { sqrt(2) * (-c1-c3) }
 215     z3 := MULTIPLY(z3, - FIX_1_961570560); { sqrt(2) * (-c3-c5) }
 216     z4 := MULTIPLY(z4, - FIX_0_390180644); { sqrt(2) * (c5-c3) }
 217
 218     Inc(z3, z5);
 219     Inc(z4, z5);
 220
 221     dataptr^[7] := DCTELEM(DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS));
 222     dataptr^[5] := DCTELEM(DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS));
 223     dataptr^[3] := DCTELEM(DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS));
 224     dataptr^[1] := DCTELEM(DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS));
 225
 226     Inc(DCTELEMPTR(dataptr), DCTSIZE);  { advance pointer to next row }
 227   end;
 228
 229   { Pass 2: process columns.
 230     We remove the PASS1_BITS scaling, but leave the results scaled up
 231     by an overall factor of 8. }
 232
 233   dataptr := PWorkspace(@data);
 234   for ctr := DCTSIZE-1 downto 0 do
 235   begin
 236     tmp0 := dataptr^[DCTSIZE*0] + dataptr^[DCTSIZE*7];
 237     tmp7 := dataptr^[DCTSIZE*0] - dataptr^[DCTSIZE*7];
 238     tmp1 := dataptr^[DCTSIZE*1] + dataptr^[DCTSIZE*6];
 239     tmp6 := dataptr^[DCTSIZE*1] - dataptr^[DCTSIZE*6];
 240     tmp2 := dataptr^[DCTSIZE*2] + dataptr^[DCTSIZE*5];
 241     tmp5 := dataptr^[DCTSIZE*2] - dataptr^[DCTSIZE*5];
 242     tmp3 := dataptr^[DCTSIZE*3] + dataptr^[DCTSIZE*4];
 243     tmp4 := dataptr^[DCTSIZE*3] - dataptr^[DCTSIZE*4];
 244
 245     { Even part per LL&M figure 1 --- note that published figure is faulty;
 246       rotator "sqrt(2)*c1" should be "sqrt(2)*c6". }
 247
 248     tmp10 := tmp0 + tmp3;
 249     tmp13 := tmp0 - tmp3;
 250     tmp11 := tmp1 + tmp2;
 251     tmp12 := tmp1 - tmp2;
 252
 253     dataptr^[DCTSIZE*0] := DCTELEM (DESCALE(tmp10 + tmp11, PASS1_BITS));
 254     dataptr^[DCTSIZE*4] := DCTELEM (DESCALE(tmp10 - tmp11, PASS1_BITS));
 255
 256     z1 := MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
 257     dataptr^[DCTSIZE*2] := DCTELEM (DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
 258              CONST_BITS+PASS1_BITS));
 259     dataptr^[DCTSIZE*6] := DCTELEM (DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
 260              CONST_BITS+PASS1_BITS));
 261
 262     { Odd part per figure 8 --- note paper omits factor of sqrt(2).
 263       cK represents cos(K*pi/16).
 264       i0..i3 in the paper are tmp4..tmp7 here. }
 265
 266     z1 := tmp4 + tmp7;
 267     z2 := tmp5 + tmp6;
 268     z3 := tmp4 + tmp6;
 269     z4 := tmp5 + tmp7;
 270     z5 := MULTIPLY(z3 + z4, FIX_1_175875602); { sqrt(2) * c3 }
 271
 272     tmp4 := MULTIPLY(tmp4, FIX_0_298631336); { sqrt(2) * (-c1+c3+c5-c7) }
 273     tmp5 := MULTIPLY(tmp5, FIX_2_053119869); { sqrt(2) * ( c1+c3-c5+c7) }
 274     tmp6 := MULTIPLY(tmp6, FIX_3_072711026); { sqrt(2) * ( c1+c3+c5-c7) }
 275     tmp7 := MULTIPLY(tmp7, FIX_1_501321110); { sqrt(2) * ( c1+c3-c5-c7) }
 276     z1 := MULTIPLY(z1, - FIX_0_899976223); { sqrt(2) * (c7-c3) }
 277     z2 := MULTIPLY(z2, - FIX_2_562915447); { sqrt(2) * (-c1-c3) }
 278     z3 := MULTIPLY(z3, - FIX_1_961570560); { sqrt(2) * (-c3-c5) }
 279     z4 := MULTIPLY(z4, - FIX_0_390180644); { sqrt(2) * (c5-c3) }
 280
 281     Inc(z3, z5);
 282     Inc(z4, z5);
 283
 284     dataptr^[DCTSIZE*7] := DCTELEM (DESCALE(tmp4 + z1 + z3,
 285              CONST_BITS+PASS1_BITS));
 286     dataptr^[DCTSIZE*5] := DCTELEM (DESCALE(tmp5 + z2 + z4,
 287              CONST_BITS+PASS1_BITS));
 288     dataptr^[DCTSIZE*3] := DCTELEM (DESCALE(tmp6 + z2 + z3,
 289              CONST_BITS+PASS1_BITS));
 290     dataptr^[DCTSIZE*1] := DCTELEM (DESCALE(tmp7 + z1 + z4,
 291              CONST_BITS+PASS1_BITS));
 292
 293     Inc(DCTELEMPTR(dataptr)); { advance pointer to next column }
 294   end;
 295 end;
 296
 297 end.