1 /*
2 * @(#)utf.c 1.5 02/09/27
3 *
4 * Copyright 1995-1998 by Sun Microsystems, Inc.,
5 * 901 San Antonio Road, Palo Alto, California, 94303, U.S.A.
6 * All rights reserved.
7 *
8 * This software is the confidential and proprietary information
9 * of Sun Microsystems, Inc. ("Confidential Information"). You
10 * shall not disclose such Confidential Information and shall use
11 * it only in accordance with the terms of the license agreement
12 * you entered into with Sun.
13 * Use is subject to license terms.
14 */
16 /*=========================================================================
17 * SYSTEM: Verifier
18 * SUBSYSTEM: Unicode translators.
19 * FILE: utf.c
20 * OVERVIEW: Routines for Unicode -> UTF and UTF -> unicode translators.
21 *
22 * This file implements the unicode -> UTF and UTF -> unicode translators
23 * needed by the various parts of the compiler and interpreter.
24 *
25 * UTF strings are streams of bytes, in which unicode characters are encoded
26 * as follows:
27 * Unicode UTF
28 * 00000000 0jklmnop 0jklmnop
29 * 00000fgh ijklmnop 110fghij 10klmnop
30 * abcdefgh ijklmnop 1110abcd 10efghij 10klmnop
31 *
32 * unicode bytes with 7 or fewer significant bits MUST be converted using the
33 * first format. bytes with 11 or fewer bits MUST be converted using the
34 * second format.
35 *
36 * In JAVA/JAVAC, we deviate slightly from the above.
37 * 1) The null unicode character is represented using the 2-byte format
38 * 2) All UTF strings are null-terminated.
39 * In this way, we do not need to separately maintain a length field for the
40 * UTF string.
41 *
42 * Given a unicode string and its length, convert it to a utf string. But
43 * the result into the given buffer, whose length is buflength. The utf
44 * string should include a null terminator.
45 *
46 * If both buffer and buflength are 0, then malloc an appropriately sized
47 * buffer for the result.
48 *
49 * AUTHOR: Sheng Liang, Sun Microsystems, Inc.
50 * Edited by Tasneem Sayeed, Sun Microsystems
51 *=======================================================================*/
53 /*=========================================================================
54 * Include files
55 *=======================================================================*/
57 #include <stdio.h>
58 #include <string.h>
59 #include <stdlib.h>
66 {
76 }
87 /* 11 bits or less. */
95 /* possibly full 16 bits. */
104 }
105 }
108 }
110 /* Return the number of characters that would be needed to hold the unicode
111 * string in utf. This INCLUDES the NULL!
112 */
114 {
120 result_length++;
123 else
125 }
127 }
129 /* Give the number of unicode characters in a utf string */
131 {
136 }
138 /* Convert a utfstring to unicode in the buffer provided. Put at most
139 * max_length characters into the buffer. Whether or not we actually overflow
140 * the space, indicate the actual unicode length.
141 *
142 * Whether or not we overflow the space, return the actual number of
143 * characters that we used.
144 */
146 void
149 {
154 length_remaining--;
155 }
161 }
162 }
165 {
169 }
171 }
185 /* Shouldn't happen. */
189 /* 110xxxxx 10xxxxxx */
195 }
199 /* 1110xxxx 10xxxxxx 10xxxxxx */
209 }
210 }
216 }