1 | /* |
2 | * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #ifndef _SYS_UTFCONV_H_ |
30 | #define _SYS_UTFCONV_H_ |
31 | |
32 | #include <sys/appleapiopts.h> |
33 | #include <sys/cdefs.h> |
34 | |
35 | #ifdef KERNEL |
36 | #ifdef __APPLE_API_UNSTABLE |
37 | |
38 | /* |
39 | * UTF-8 encode/decode flags |
40 | */ |
41 | #define UTF_REVERSE_ENDIAN 0x0001 /* reverse UCS-2 byte order */ |
42 | #define UTF_NO_NULL_TERM 0x0002 /* do not add null termination */ |
43 | #define UTF_DECOMPOSED 0x0004 /* generate fully decomposed UCS-2 */ |
44 | #define UTF_PRECOMPOSED 0x0008 /* generate precomposed UCS-2 */ |
45 | #define UTF_ESCAPE_ILLEGAL 0x0010 /* escape illegal UTF-8 */ |
46 | #define UTF_SFM_CONVERSIONS 0x0020 /* Use SFM mappings for illegal NTFS chars */ |
47 | |
48 | #define UTF_BIG_ENDIAN \ |
49 | ((BYTE_ORDER == BIG_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN) |
50 | |
51 | #define UTF_LITTLE_ENDIAN \ |
52 | ((BYTE_ORDER == LITTLE_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN) |
53 | |
54 | __BEGIN_DECLS |
55 | |
56 | |
57 | /* |
58 | * unicode_combinable - Test for a combining unicode character. |
59 | * |
60 | * This function is similar to __CFUniCharIsNonBaseCharacter except |
61 | * that it also includes Hangul Jamo characters. |
62 | */ |
63 | |
64 | int unicode_combinable(u_int16_t character); |
65 | |
66 | /* |
67 | * Test for a precomposed character. |
68 | * |
69 | * Similar to __CFUniCharIsDecomposableCharacter. |
70 | */ |
71 | |
72 | int unicode_decomposeable(u_int16_t character); |
73 | |
74 | |
75 | /* |
76 | * utf8_encodelen - Calculate the UTF-8 encoding length |
77 | * |
78 | * This function takes an Unicode input string, ucsp, of ucslen bytes |
79 | * and calculates the size of the UTF-8 output in bytes (not including |
80 | * a NULL termination byte). The string must reside in kernel memory. |
81 | * |
82 | * FLAGS |
83 | * UTF_REVERSE_ENDIAN: Unicode byte order is opposite current runtime |
84 | * |
85 | * UTF_BIG_ENDIAN: Unicode byte order is always big endian |
86 | * |
87 | * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian |
88 | * |
89 | * UTF_DECOMPOSED: assume fully decomposed output |
90 | * |
91 | * ERRORS |
92 | * None |
93 | */ |
94 | size_t |
95 | utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash, |
96 | int flags); |
97 | |
98 | |
99 | /* |
100 | * utf8_encodestr - Encodes a Unicode string into UTF-8 |
101 | * |
102 | * This function takes an Unicode input string, ucsp, of ucslen bytes |
103 | * and produces the UTF-8 output into a buffer of buflen bytes pointed |
104 | * to by utf8p. The size of the output in bytes (not including a NULL |
105 | * termination byte) is returned in utf8len. The UTF-8 string output |
106 | * is NULL terminated. Both buffers must reside in kernel memory. |
107 | * |
108 | * If '/' chars are possible in the Unicode input then an alternate |
109 | * (replacement) char must be provided in altslash. |
110 | * |
111 | * FLAGS |
112 | * UTF_REVERSE_ENDIAN: Unicode byte order is opposite current runtime |
113 | * |
114 | * UTF_BIG_ENDIAN: Unicode byte order is always big endian |
115 | * |
116 | * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian |
117 | * |
118 | * UTF_NO_NULL_TERM: do not add null termination to output string |
119 | * |
120 | * UTF_DECOMPOSED: generate fully decomposed output |
121 | * |
122 | * ERRORS |
123 | * ENAMETOOLONG: output did not fit; only utf8len bytes were encoded |
124 | * |
125 | * EINVAL: illegal Unicode char encountered |
126 | */ |
127 | int |
128 | utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, |
129 | size_t * utf8len, size_t buflen, u_int16_t altslash, int flags); |
130 | |
131 | |
132 | /* |
133 | * utf8_decodestr - Decodes a UTF-8 string into Unicode |
134 | * |
135 | * This function takes an UTF-8 input string, utf8p, of utf8len bytes |
136 | * and produces the Unicode output into a buffer of buflen bytes pointed |
137 | * to by ucsp. The size of the output in bytes (not including a NULL |
138 | * termination byte) is returned in ucslen. Both buffers must reside |
139 | * in kernel memory. |
140 | * |
141 | * If '/' chars are allowed in the Unicode output then an alternate |
142 | * (replacement) char must be provided in altslash. |
143 | * |
144 | * FLAGS |
145 | * UTF_REV_ENDIAN: Unicode byte order is opposite current runtime |
146 | * |
147 | * UTF_BIG_ENDIAN: Unicode byte order is always big endian |
148 | * |
149 | * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian |
150 | * |
151 | * UTF_DECOMPOSED: generate fully decomposed output (NFD) |
152 | * |
153 | * UTF_PRECOMPOSED: generate precomposed output (NFC) |
154 | * |
155 | * UTF_ESCAPE_ILLEGAL: percent escape any illegal UTF-8 input |
156 | * |
157 | * ERRORS |
158 | * ENAMETOOLONG: output did not fit; only ucslen bytes were decoded. |
159 | * |
160 | * EINVAL: illegal UTF-8 sequence encountered. |
161 | */ |
162 | int |
163 | utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, |
164 | size_t *ucslen, size_t buflen, u_int16_t altslash, int flags); |
165 | |
166 | |
167 | /* |
168 | * utf8_normalizestr - Normalize a UTF-8 string (NFC or NFD) |
169 | * |
170 | * This function takes an UTF-8 input string, instr, of inlen bytes |
171 | * and produces normalized UTF-8 output into a buffer of buflen bytes |
172 | * pointed to by outstr. The size of the output in bytes (not including |
173 | * a NULL termination byte) is returned in outlen. In-place conversions |
174 | * are not supported (i.e. instr != outstr). Both buffers must reside |
175 | * in kernel memory. |
176 | * |
177 | * FLAGS |
178 | * UTF_DECOMPOSED: output string will be fully decomposed (NFD) |
179 | * |
180 | * UTF_PRECOMPOSED: output string will be precomposed (NFC) |
181 | * |
182 | * UTF_NO_NULL_TERM: do not add null termination to output string |
183 | * |
184 | * UTF_ESCAPE_ILLEGAL: percent escape any illegal UTF-8 input |
185 | * |
186 | * ERRORS |
187 | * ENAMETOOLONG: output did not fit or input exceeded MAXPATHLEN bytes |
188 | * |
189 | * EINVAL: illegal UTF-8 sequence encountered or invalid flags |
190 | */ |
191 | int |
192 | utf8_normalizestr(const u_int8_t* instr, size_t inlen, u_int8_t* outstr, |
193 | size_t *outlen, size_t buflen, int flags); |
194 | |
195 | |
196 | /* |
197 | * utf8_validatestr - validates a UTF-8 string |
198 | * |
199 | * This function takes an UTF-8 input string, utf8p, of utf8len bytes |
200 | * and determines if its valid UTF-8. The string must reside in kernel |
201 | * memory. |
202 | * |
203 | * ERRORS |
204 | * EINVAL: illegal UTF-8 sequence encountered. |
205 | */ |
206 | int |
207 | utf8_validatestr(const u_int8_t* utf8p, size_t utf8len); |
208 | |
209 | |
210 | __END_DECLS |
211 | |
212 | #endif /* __APPLE_API_UNSTABLE */ |
213 | #endif /* KERNEL */ |
214 | |
215 | #endif /* !_SYS_UTFCONV_H_ */ |
216 | |