src/lib/vampimg/JpegLib/imjfdctfst.pas

   1 unit imjfdctfst;
   2
   3 { This file contains a fast, not so accurate integer implementation of the
   4   forward DCT (Discrete Cosine Transform).
   5
   6   A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
   7   on each column.  Direct algorithms are also available, but they are
   8   much more complex and seem not to be any faster when reduced to code.
   9
  10   This implementation is based on Arai, Agui, and Nakajima's algorithm for
  11   scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
  12   Japanese, but the algorithm is described in the Pennebaker & Mitchell
  13   JPEG textbook (see REFERENCES section in file README).  The following code
  14   is based directly on figure 4-8 in P&M.
  15   While an 8-point DCT cannot be done in less than 11 multiplies, it is
  16   possible to arrange the computation so that many of the multiplies are
  17   simple scalings of the final outputs.  These multiplies can then be
  18   folded into the multiplications or divisions by the JPEG quantization
  19   table entries.  The AA&N method leaves only 5 multiplies and 29 adds
  20   to be done in the DCT itself.
  21   The primary disadvantage of this method is that with fixed-point math,
  22   accuracy is lost due to imprecise representation of the scaled
  23   quantization values.  The smaller the quantization table entry, the less
  24   precise the scaled value, so this implementation does worse with high-
  25   quality-setting files than with low-quality ones. }
  26
  27 { Original: jfdctfst.c ; Copyright (C) 1994-1996, Thomas G. Lane. }
  28
  29
  30 interface
  31
  32 {$I imjconfig.inc}
  33
  34 uses
  35   imjmorecfg,
  36   imjinclude,
  37   imjpeglib,
  38   imjdct;     { Private declarations for DCT subsystem }
  39
  40
  41 { Perform the forward DCT on one block of samples. }
  42
  43 {GLOBAL}
  44 procedure jpeg_fdct_ifast (var data : array of DCTELEM);
  45
  46 implementation
  47
  48 { This module is specialized to the case DCTSIZE = 8. }
  49
  50 {$ifndef DCTSIZE_IS_8}
  51   Sorry, this code only copes with 8x8 DCTs. { deliberate syntax err }
  52 {$endif}
  53
  54
  55 { Scaling decisions are generally the same as in the LL&M algorithm;
  56   see jfdctint.c for more details.  However, we choose to descale
  57   (right shift) multiplication products as soon as they are formed,
  58   rather than carrying additional fractional bits into subsequent additions.
  59   This compromises accuracy slightly, but it lets us save a few shifts.
  60   More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
  61   everywhere except in the multiplications proper; this saves a good deal
  62   of work on 16-bit-int machines.
  63
  64   Again to save a few shifts, the intermediate results between pass 1 and
  65   pass 2 are not upscaled, but are represented only to integral precision.
  66
  67   A final compromise is to represent the multiplicative constants to only
  68   8 fractional bits, rather than 13.  This saves some shifting work on some
  69   machines, and may also reduce the cost of multiplication (since there
  70   are fewer one-bits in the constants). }
  71
  72 const
  73   CONST_BITS = 8;
  74 const
  75   CONST_SCALE = (INT32(1) shl CONST_BITS);
  76
  77
  78 const
  79   FIX_0_382683433 = INT32(Round(CONST_SCALE * 0.382683433)); {98}
  80   FIX_0_541196100 = INT32(Round(CONST_SCALE * 0.541196100)); {139}
  81   FIX_0_707106781 = INT32(Round(CONST_SCALE * 0.707106781)); {181}
  82   FIX_1_306562965 = INT32(Round(CONST_SCALE * 1.306562965)); {334}
  83
  84 { Descale and correctly round an INT32 value that's scaled by N bits.
  85   We assume RIGHT_SHIFT rounds towards minus infinity, so adding
  86   the fudge factor is correct for either sign of X. }
  87
  88 function DESCALE(x : INT32; n : int) : INT32;
  89 var
  90   shift_temp : INT32;
  91 begin
  92 { We can gain a little more speed, with a further compromise in accuracy,
  93   by omitting the addition in a descaling shift.  This yields an incorrectly
  94   rounded result half the time... }
  95 {$ifndef USE_ACCURATE_ROUNDING}
  96   shift_temp := x;
  97 {$else}
  98   shift_temp := x + (INT32(1) shl (n-1));
  99 {$endif}
 100
 101 {$ifdef RIGHT_SHIFT_IS_UNSIGNED}
 102   if shift_temp < 0 then
 103     Descale :=  (shift_temp shr n) or ((not INT32(0)) shl (32-n))
 104   else
 105 {$endif}
 106     Descale :=  (shift_temp shr n);
 107 end;
 108
 109 { Multiply a DCTELEM variable by an INT32 constant, and immediately
 110   descale to yield a DCTELEM result. }
 111
 112
 113    function MULTIPLY(X : DCTELEM; Y: INT32): DCTELEM;
 114    begin
 115      Multiply := DeScale((X) * (Y), CONST_BITS);
 116    end;
 117
 118
 119 { Perform the forward DCT on one block of samples. }
 120
 121 {GLOBAL}
 122 procedure jpeg_fdct_ifast (var data : array of DCTELEM);
 123 type
 124   PWorkspace = ^TWorkspace;
 125   TWorkspace = array [0..DCTSIZE2-1] of DCTELEM;
 126 var
 127   tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7 : DCTELEM;
 128   tmp10, tmp11, tmp12, tmp13 : DCTELEM;
 129   z1, z2, z3, z4, z5, z11, z13 : DCTELEM;
 130   dataptr :  PWorkspace;
 131   ctr : int;
 132   {SHIFT_TEMPS}
 133 begin
 134   { Pass 1: process rows. }
 135
 136   dataptr := PWorkspace(@data);
 137   for ctr := DCTSIZE-1 downto 0 do
 138   begin
 139     tmp0 := dataptr^[0] + dataptr^[7];
 140     tmp7 := dataptr^[0] - dataptr^[7];
 141     tmp1 := dataptr^[1] + dataptr^[6];
 142     tmp6 := dataptr^[1] - dataptr^[6];
 143     tmp2 := dataptr^[2] + dataptr^[5];
 144     tmp5 := dataptr^[2] - dataptr^[5];
 145     tmp3 := dataptr^[3] + dataptr^[4];
 146     tmp4 := dataptr^[3] - dataptr^[4];
 147
 148     { Even part }
 149
 150     tmp10 := tmp0 + tmp3; { phase 2 }
 151     tmp13 := tmp0 - tmp3;
 152     tmp11 := tmp1 + tmp2;
 153     tmp12 := tmp1 - tmp2;
 154
 155     dataptr^[0] := tmp10 + tmp11; { phase 3 }
 156     dataptr^[4] := tmp10 - tmp11;
 157
 158     z1 := MULTIPLY(tmp12 + tmp13, FIX_0_707106781); { c4 }
 159     dataptr^[2] := tmp13 + z1;  { phase 5 }
 160     dataptr^[6] := tmp13 - z1;
 161
 162     { Odd part }
 163
 164     tmp10 := tmp4 + tmp5; { phase 2 }
 165     tmp11 := tmp5 + tmp6;
 166     tmp12 := tmp6 + tmp7;
 167
 168     { The rotator is modified from fig 4-8 to avoid extra negations. }
 169     z5 := MULTIPLY(tmp10 - tmp12, FIX_0_382683433); { c6 }
 170     z2 := MULTIPLY(tmp10, FIX_0_541196100) + z5; { c2-c6 }
 171     z4 := MULTIPLY(tmp12, FIX_1_306562965) + z5; { c2+c6 }
 172     z3 := MULTIPLY(tmp11, FIX_0_707106781); { c4 }
 173
 174     z11 := tmp7 + z3;   { phase 5 }
 175     z13 := tmp7 - z3;
 176
 177     dataptr^[5] := z13 + z2;  { phase 6 }
 178     dataptr^[3] := z13 - z2;
 179     dataptr^[1] := z11 + z4;
 180     dataptr^[7] := z11 - z4;
 181
 182     Inc(DCTELEMPTR(dataptr), DCTSIZE);  { advance pointer to next row }
 183   end;
 184
 185   { Pass 2: process columns. }
 186
 187   dataptr := PWorkspace(@data);
 188   for ctr := DCTSIZE-1 downto 0 do
 189   begin
 190     tmp0 := dataptr^[DCTSIZE*0] + dataptr^[DCTSIZE*7];
 191     tmp7 := dataptr^[DCTSIZE*0] - dataptr^[DCTSIZE*7];
 192     tmp1 := dataptr^[DCTSIZE*1] + dataptr^[DCTSIZE*6];
 193     tmp6 := dataptr^[DCTSIZE*1] - dataptr^[DCTSIZE*6];
 194     tmp2 := dataptr^[DCTSIZE*2] + dataptr^[DCTSIZE*5];
 195     tmp5 := dataptr^[DCTSIZE*2] - dataptr^[DCTSIZE*5];
 196     tmp3 := dataptr^[DCTSIZE*3] + dataptr^[DCTSIZE*4];
 197     tmp4 := dataptr^[DCTSIZE*3] - dataptr^[DCTSIZE*4];
 198
 199     { Even part }
 200
 201     tmp10 := tmp0 + tmp3; { phase 2 }
 202     tmp13 := tmp0 - tmp3;
 203     tmp11 := tmp1 + tmp2;
 204     tmp12 := tmp1 - tmp2;
 205
 206     dataptr^[DCTSIZE*0] := tmp10 + tmp11; { phase 3 }
 207     dataptr^[DCTSIZE*4] := tmp10 - tmp11;
 208
 209     z1 := MULTIPLY(tmp12 + tmp13, FIX_0_707106781); { c4 }
 210     dataptr^[DCTSIZE*2] := tmp13 + z1; { phase 5 }
 211     dataptr^[DCTSIZE*6] := tmp13 - z1;
 212
 213     { Odd part }
 214
 215     tmp10 := tmp4 + tmp5; { phase 2 }
 216     tmp11 := tmp5 + tmp6;
 217     tmp12 := tmp6 + tmp7;
 218
 219     { The rotator is modified from fig 4-8 to avoid extra negations. }
 220     z5 := MULTIPLY(tmp10 - tmp12, FIX_0_382683433); { c6 }
 221     z2 := MULTIPLY(tmp10, FIX_0_541196100) + z5; { c2-c6 }
 222     z4 := MULTIPLY(tmp12, FIX_1_306562965) + z5; { c2+c6 }
 223     z3 := MULTIPLY(tmp11, FIX_0_707106781); { c4 }
 224
 225     z11 := tmp7 + z3;   { phase 5 }
 226     z13 := tmp7 - z3;
 227
 228     dataptr^[DCTSIZE*5] := z13 + z2; { phase 6 }
 229     dataptr^[DCTSIZE*3] := z13 - z2;
 230     dataptr^[DCTSIZE*1] := z11 + z4;
 231     dataptr^[DCTSIZE*7] := z11 - z4;
 232
 233     Inc(DCTELEMPTR(dataptr)); { advance pointer to next column }
 234   end;
 235 end;
 236
 237 end.