3 { This file contains Huffman entropy encoding routines.
5 Much of the complexity here has to do with supporting output suspension.
6 If the data destination module demands suspension, we want to be able to
7 back up to the start of the current MCU. To do this, we copy state
8 variables into local working storage, and update them back to the
9 permanent JPEG objects only upon successful completion of an MCU. }
11 { Original: jchuff.c; Copyright (C) 1991-1997, Thomas G. Lane. }
13 interface
15 {$I imjconfig.inc}
17 uses
19 imjpeglib,
20 imjdeferr,
21 imjerror,
22 imjutils,
23 imjinclude,
24 imjcomapi;
26 { The legal range of a DCT coefficient is
27 -1024 .. +1023 for 8-bit data;
28 -16384 .. +16383 for 12-bit data.
29 Hence the magnitude should always fit in 10 or 14 bits respectively. }
32 {$ifdef BITS_IN_JSAMPLE_IS_8}
33 const
35 {$else}
36 const
38 {$endif}
40 { Derived data constructed for each Huffman table }
41 { Declarations shared with jcphuff.c }
42 type
47 { If no code has been allocated for a symbol S, ehufsi[S] contains 0 }
49 { for JCHUFF und JCPHUFF }
50 type
54 { Compute the derived values for a Huffman table.
55 Note this is also used by jcphuff.c. }
57 {GLOBAL}
63 { Generate the optimal coding for the given counts, fill htbl.
64 Note this is also used by jcphuff.c. }
66 {GLOBAL}
71 { Module initialization routine for Huffman entropy encoding. }
73 {GLOBAL}
76 implementation
78 { Expanded entropy encoder object for Huffman encoding.
80 The savable_state subrecord contains fields that change within an MCU,
81 but must not be updated permanently until we complete the MCU. }
83 type
88 { last DC coef for each component }
92 type
99 { These fields are NOT loaded into local working state. }
103 { Pointers to derived tables (these workspaces have image lifespan) }
110 {$endif}
115 { Working state while writing an MCU.
116 This struct contains all the fields that are needed by subroutines. }
118 type
127 { Forward declarations }
128 {METHODDEF}
132 {METHODDEF}
134 {$ifdef ENTROPY_OPT_SUPPORTED}
135 {METHODDEF}
140 {METHODDEF}
142 {$endif}
145 { Initialize for a Huffman-compressed scan.
146 If gather_statistics is TRUE, we do not output anything during the scan,
147 just count the Huffman symbols used and generate Huffman code tables. }
149 {METHODDEF}
152 var
156 begin
160 begin
161 {$ifdef ENTROPY_OPT_SUPPORTED}
164 {$else}
166 {$endif}
167 end
168 else
169 begin
175 begin
180 begin
181 {$ifdef ENTROPY_OPT_SUPPORTED}
182 { Check for invalid table indexes }
183 { (make_c_derived_tbl does this in the other path) }
188 { Allocate and zero the statistics tables }
189 { Note that jpeg_gen_optimal_table expects 257 entries in each table! }
200 {$endif}
201 end
202 else
203 begin
204 { Compute derived values for Huffman tables }
205 { We may do this more than once for a table, but it's not expensive }
211 { Initialize DC predictions to 0 }
215 { Initialize bit buffer to empty }
219 { Initialize restart stuff }
225 { Compute the derived values for a Huffman table.
226 This routine also performs some validation checks on the table.
228 Note this is also used by jcphuff.c. }
230 {GLOBAL}
235 var
242 begin
243 { Note that huffsize[] and huffcode[] are filled in code-length order,
244 paralleling the order of the symbols themselves in htbl->huffval[]. }
246 { Find the input Huffman table }
251 else
256 { Allocate a workspace if we haven't already done so. }
263 { Figure C.1: make table of Huffman code length for each symbol }
267 begin
272 begin
281 { Figure C.2: generate the codes themselves }
282 { We also validate that the counts represent a legal Huffman code tree. }
288 begin
290 begin
295 { code is now 1 more than the last code used for codelength si; but
296 it must still fit in si bits, since no code is allowed to be all ones. }
304 { Figure C.3: generate encoding tables }
305 { These are code and size indexed by symbol value }
307 { Set all codeless symbols to have code length 0;
308 this lets us detect duplicate VAL entries here, and later
309 allows emit_bits to detect any attempt to emit such symbols. }
313 { This is also a convenient place to check for out-of-range
314 and duplicated VAL entries. We allow 0..255 for AC symbols
315 but only 0..15 for DC. (We could constrain them further
316 based on data depth and mode, but this seems enough.) }
320 else
324 begin
334 { Outputting bytes to the file }
337 {LOCAL}
339 { Empty the output buffer; return TRUE if successful, FALSE if must suspend }
340 var
342 begin
346 begin
348 exit;
350 { After a successful buffer dump, must reset buffer pointers }
357 { Outputting bits to the file }
359 { Only the right 24 bits of put_buffer are used; the valid bits are
360 left-justified in this part. At most 16 bits can be passed to emit_bits
361 in one call, and we never retain more than 7 bits in put_buffer
362 between calls, so 24 bits are sufficient. }
365 {LOCAL}
369 { Emit some bits; return TRUE if successful, FALSE if must suspend }
370 var
371 { This routine is heavily used, so it's worth coding tightly. }
374 var
376 begin
380 { if size is 0, caller used an invalid Huffman table entry }
385 { mask off any extra bits in code }
390 { align incoming bits }
392 { and merge with old buffer contents }
394 begin
397 {emit_byte(state, c, return FALSE);}
398 { Emit a byte, return FALSE if must suspend. }
404 begin
406 exit;
410 begin
411 {emit_byte(state, 0, return FALSE);}
417 begin
419 exit;
434 {LOCAL}
436 begin
438 begin
440 exit;
448 { Encode a single block's worth of coefficients }
450 {LOCAL}
456 var
460 begin
461 { Encode the DC coefficient difference per section F.1.2.1 }
467 begin
469 { For a negative input, want temp2 := bitwise complement of abs(input) }
470 { This code assumes we are on a two's complement machine }
474 { Find the number of bits needed for the magnitude of the coefficient }
477 begin
482 { Check for out-of-range coefficient values.
483 Since we're encoding a difference, the range limit is twice as much. }
488 { Emit the Huffman-coded symbol for the number of bits }
490 begin
492 exit;
495 { Emit that number of bits of the value, if positive, }
496 { or the complement of its magnitude, if negative. }
499 begin
501 exit;
504 { Encode the AC coefficients per section F.1.2.2 }
509 begin
512 begin
514 end
515 else
516 begin
517 { if run length > 15, must emit special run-length-16 codes ($F0) }
519 begin
521 begin
523 exit;
530 begin
532 { This code assumes we are on a two's complement machine }
536 { Find the number of bits needed for the magnitude of the coefficient }
538 repeat
543 { Check for out-of-range coefficient values }
547 { Emit Huffman symbol for run length / number of bits }
550 begin
552 exit;
555 { Emit that number of bits of the value, if positive, }
556 { or the complement of its magnitude, if negative. }
558 begin
560 exit;
567 { If the last coef(s) were zero, emit an end-of-block code }
570 begin
572 exit;
579 { Emit a restart marker & resynchronize predictions. }
581 {LOCAL}
584 var
586 begin
588 begin
590 exit;
593 {emit_byte(state, $FF, return FALSE);}
594 { Emit a byte, return FALSE if must suspend. }
600 begin
602 exit;
605 {emit_byte(state, JPEG_RST0 + restart_num, return FALSE);}
606 { Emit a byte, return FALSE if must suspend. }
612 begin
614 exit;
617 { Re-initialize DC predictions to 0 }
621 { The restart counter is not updated until we successfully write the MCU. }
627 { Encode and output one MCU's worth of Huffman-compressed coefficients. }
629 {METHODDEF}
632 var
637 begin
639 { Load up working state }
642 {ASSIGN_STATE(state.cur, entropy^.saved);}
646 { Emit restart marker if needed }
648 begin
651 begin
653 exit;
657 { Encode the MCU data blocks }
659 begin
667 begin
669 exit;
671 { Update last_dc_val }
675 { Completed MCU, so update state }
678 {ASSIGN_STATE(entropy^.saved, state.cur);}
681 { Update restart-interval state too }
683 begin
685 begin
698 { Finish up at the end of a Huffman-compressed scan. }
700 {METHODDEF}
702 var
705 begin
708 { Load up working state ... flush_bits needs it }
711 {ASSIGN_STATE(state.cur, entropy^.saved);}
715 { Flush out the last data }
719 { Update state }
722 {ASSIGN_STATE(entropy^.saved, state.cur);}
727 { Huffman coding optimization.
729 We first scan the supplied data and count the number of uses of each symbol
730 that is to be Huffman-coded. (This process MUST agree with the code above.)
731 Then we build a Huffman coding tree for the observed counts.
732 Symbols which are not needed at all for the particular image are not
733 assigned any code, which saves space in the DHT marker as well as in
734 the compressed data. }
736 {$ifdef ENTROPY_OPT_SUPPORTED}
739 { Process a single block's worth of coefficients }
741 {LOCAL}
748 var
752 begin
753 { Encode the DC coefficient difference per section F.1.2.1 }
758 { Find the number of bits needed for the magnitude of the coefficient }
761 begin
766 { Check for out-of-range coefficient values.
767 Since we're encoding a difference, the range limit is twice as much. }
772 { Count the Huffman symbol for the number of bits }
775 { Encode the AC coefficients per section F.1.2.2 }
780 begin
783 begin
785 end
786 else
787 begin
788 { if run length > 15, must emit special run-length-16 codes ($F0) }
790 begin
795 { Find the number of bits needed for the magnitude of the coefficient }
799 { Find the number of bits needed for the magnitude of the coefficient }
801 repeat
807 { Count Huffman symbol for run length / number of bits }
814 { If the last coef(s) were zero, emit an end-of-block code }
820 { Trial-encode one MCU's worth of Huffman-compressed coefficients.
821 No data is actually output, so no suspension return is possible. }
823 {METHODDEF}
826 var
830 begin
832 { Take care of restart intervals if needed }
834 begin
836 begin
837 { Re-initialize DC predictions to 0 }
840 { Update restart state }
847 begin
861 { Generate the best Huffman code table for the given counts, fill htbl.
862 Note this is also used by jcphuff.c.
864 The JPEG standard requires that no symbol be assigned a codeword of all
865 one bits (so that padding bits added at the end of a compressed segment
866 can't look like a valid code). Because of the canonical ordering of
867 codewords, this just means that there must be an unused slot in the
868 longest codeword length category. Section K.2 of the JPEG spec suggests
869 reserving such a slot by pretending that symbol 256 is a valid symbol
870 with count 1. In theory that's not optimal; giving it count zero but
871 including it in the symbol set anyway should give a better Huffman code.
872 But the theoretically better code actually seems to come out worse in
873 practice, because it produces more all-ones bytes (which incur stuffed
874 zero bytes in the final file). In any case the difference is tiny.
876 The JPEG standard requires Huffman codes to be no more than 16 bits long.
877 If some symbols have a very small but nonzero probability, the Huffman tree
878 must be adjusted to meet the code length restriction. We currently use
879 the adjustment method suggested in JPEG section K.2. This method is *not*
880 optimal; it may not choose the best possible limited-length code. But
881 typically only very-low-frequency symbols will be given less-than-optimal
882 lengths, so the code is almost optimal. Experimental comparisons against
883 an optimal limited-length-code algorithm indicate that the difference is
884 microscopic --- usually less than a hundredth of a percent of total size.
885 So the extra complexity of an optimal algorithm doesn't seem worthwhile. }
888 {GLOBAL}
892 const
894 var
901 begin
902 { This algorithm is explained in section K.2 of the JPEG standard }
910 { Including the pseudo-symbol 256 in the Huffman procedure guarantees
911 that no real symbol is given code-value of all ones, because 256
912 will be placed last in the largest codeword category. }
914 { Huffman's basic algorithm to assign optimal code lengths to symbols }
917 begin
918 { Find the smallest nonzero frequency, set c1 := its symbol }
919 { In case of ties, take the larger symbol number }
923 begin
925 begin
931 { Find the next smallest nonzero frequency, set c2 := its symbol }
932 { In case of ties, take the larger symbol number }
936 begin
938 begin
944 { Done if we've merged everything into one frequency }
946 break;
948 { Else merge the two counts/trees }
952 { Increment the codesize of everything in c1's tree branch }
955 begin
962 { Increment the codesize of everything in c2's tree branch }
965 begin
971 { Now count the number of symbols of each code length }
973 begin
975 begin
976 { The JPEG standard seems to think that this can't happen, }
977 { but I'm paranoid... }
985 { JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
986 Huffman procedure assigned any such lengths, we must adjust the coding.
987 Here is what the JPEG spec says about how this next bit works:
988 Since symbols are paired for the longest Huffman code, the symbols are
989 removed from this length category two at a time. The prefix for the pair
990 (which is one bit shorter) is allocated to one of the pair; then,
991 skipping the BITS entry for that prefix length, a code word from the next
992 shortest nonzero BITS entry is converted into a prefix for two code words
993 one bit longer. }
996 begin
998 begin
1010 { Delphi 2: FOR-loop variable 'i' may be undefined after loop }
1013 { Remove the count for the pseudo-symbol 256 from the largest codelength }
1018 { Return final symbol counts (only for lengths 0..16) }
1021 { Return a list of the symbols sorted by code length }
1022 { It's not real clear to me why we don't need to consider the codelength
1023 changes made above, but the JPEG spec seems to think this works. }
1027 begin
1029 begin
1031 begin
1038 { Set sent_table FALSE so updated table will be written to JPEG file. }
1043 { Finish up a statistics-gathering pass and create the new Huffman tables. }
1045 {METHODDEF}
1047 var
1054 begin
1057 { It's important not to apply jpeg_gen_optimal_table more than once
1058 per table, because it clobbers the input frequency counts! }
1064 begin
1069 begin
1077 begin
1090 { Module initialization routine for Huffman entropy encoding. }
1092 {GLOBAL}
1094 var
1097 begin
1104 { Mark tables unallocated }
1106 begin
1109 {$ifdef ENTROPY_OPT_SUPPORTED}
1112 {$endif}