1 {
2 File: CarbonCore/TextCommon.h
3
4 Contains: TextEncoding-related types and constants, and prototypes for related functions
5
6 Copyright: � 1995-2017 Apple Inc. All rights reserved.
7
8 Bugs?: For bug reports, consult the following page on
9 the World Wide Web:
10
11 http://bugs.freepascal.org
12
13 }
14 { Pascal Translation Updated: Gale R Paeper, <gpaeper@empirenet.com>, June 2018 }
15
16 {
17 Modified for use with Free Pascal
18 Version 308
19 Please report any bugs to <gpc@microbizz.nl>
20 }
21
22 {$ifc not defined MACOSALLINCLUDE or not MACOSALLINCLUDE}
23 {$mode macpas}
24 {$modeswitch cblocks}
25 {$packenum 1}
26 {$macro on}
27 {$inline on}
28 {$calling mwpascal}
29
30 unit TextCommon;
31 interface
32 {$setc UNIVERSAL_INTERFACES_VERSION := $0400}
33 {$setc GAP_INTERFACES_VERSION := $0308}
34
35 {$ifc not defined USE_CFSTR_CONSTANT_MACROS}
36 {$setc USE_CFSTR_CONSTANT_MACROS := TRUE}
37 {$endc}
38
39 {$ifc defined CPUPOWERPC and defined CPUI386}
40 {$error Conflicting initial definitions for CPUPOWERPC and CPUI386}
41 {$endc}
42 {$ifc defined FPC_BIG_ENDIAN and defined FPC_LITTLE_ENDIAN}
43 {$error Conflicting initial definitions for FPC_BIG_ENDIAN and FPC_LITTLE_ENDIAN}
44 {$endc}
45
46 {$ifc not defined __ppc__ and defined CPUPOWERPC32}
47 {$setc __ppc__ := 1}
48 {$elsec}
49 {$setc __ppc__ := 0}
50 {$endc}
51 {$ifc not defined __ppc64__ and defined CPUPOWERPC64}
52 {$setc __ppc64__ := 1}
53 {$elsec}
54 {$setc __ppc64__ := 0}
55 {$endc}
56 {$ifc not defined __i386__ and defined CPUI386}
57 {$setc __i386__ := 1}
58 {$elsec}
59 {$setc __i386__ := 0}
60 {$endc}
61 {$ifc not defined __x86_64__ and defined CPUX86_64}
62 {$setc __x86_64__ := 1}
63 {$elsec}
64 {$setc __x86_64__ := 0}
65 {$endc}
66 {$ifc not defined __arm__ and defined CPUARM}
67 {$setc __arm__ := 1}
68 {$elsec}
69 {$setc __arm__ := 0}
70 {$endc}
71 {$ifc not defined __arm64__ and defined CPUAARCH64}
72 {$setc __arm64__ := 1}
73 {$elsec}
74 {$setc __arm64__ := 0}
75 {$endc}
76
77 {$ifc defined cpu64}
78 {$setc __LP64__ := 1}
79 {$elsec}
80 {$setc __LP64__ := 0}
81 {$endc}
82
83
84 {$ifc defined __ppc__ and __ppc__ and defined __i386__ and __i386__}
85 {$error Conflicting definitions for __ppc__ and __i386__}
86 {$endc}
87
88 {$ifc defined __ppc__ and __ppc__}
89 {$setc TARGET_CPU_PPC := TRUE}
90 {$setc TARGET_CPU_PPC64 := FALSE}
91 {$setc TARGET_CPU_X86 := FALSE}
92 {$setc TARGET_CPU_X86_64 := FALSE}
93 {$setc TARGET_CPU_ARM := FALSE}
94 {$setc TARGET_CPU_ARM64 := FALSE}
95 {$setc TARGET_OS_MAC := TRUE}
96 {$setc TARGET_OS_IPHONE := FALSE}
97 {$setc TARGET_IPHONE_SIMULATOR := FALSE}
98 {$setc TARGET_OS_EMBEDDED := FALSE}
99 {$elifc defined __ppc64__ and __ppc64__}
100 {$setc TARGET_CPU_PPC := FALSE}
101 {$setc TARGET_CPU_PPC64 := TRUE}
102 {$setc TARGET_CPU_X86 := FALSE}
103 {$setc TARGET_CPU_X86_64 := FALSE}
104 {$setc TARGET_CPU_ARM := FALSE}
105 {$setc TARGET_CPU_ARM64 := FALSE}
106 {$setc TARGET_OS_MAC := TRUE}
107 {$setc TARGET_OS_IPHONE := FALSE}
108 {$setc TARGET_IPHONE_SIMULATOR := FALSE}
109 {$setc TARGET_OS_EMBEDDED := FALSE}
110 {$elifc defined __i386__ and __i386__}
111 {$setc TARGET_CPU_PPC := FALSE}
112 {$setc TARGET_CPU_PPC64 := FALSE}
113 {$setc TARGET_CPU_X86 := TRUE}
114 {$setc TARGET_CPU_X86_64 := FALSE}
115 {$setc TARGET_CPU_ARM := FALSE}
116 {$setc TARGET_CPU_ARM64 := FALSE}
117 {$ifc defined iphonesim}
118 {$setc TARGET_OS_MAC := FALSE}
119 {$setc TARGET_OS_IPHONE := TRUE}
120 {$setc TARGET_IPHONE_SIMULATOR := TRUE}
121 {$elsec}
122 {$setc TARGET_OS_MAC := TRUE}
123 {$setc TARGET_OS_IPHONE := FALSE}
124 {$setc TARGET_IPHONE_SIMULATOR := FALSE}
125 {$endc}
126 {$setc TARGET_OS_EMBEDDED := FALSE}
127 {$elifc defined __x86_64__ and __x86_64__}
128 {$setc TARGET_CPU_PPC := FALSE}
129 {$setc TARGET_CPU_PPC64 := FALSE}
130 {$setc TARGET_CPU_X86 := FALSE}
131 {$setc TARGET_CPU_X86_64 := TRUE}
132 {$setc TARGET_CPU_ARM := FALSE}
133 {$setc TARGET_CPU_ARM64 := FALSE}
134 {$ifc defined iphonesim}
135 {$setc TARGET_OS_MAC := FALSE}
136 {$setc TARGET_OS_IPHONE := TRUE}
137 {$setc TARGET_IPHONE_SIMULATOR := TRUE}
138 {$elsec}
139 {$setc TARGET_OS_MAC := TRUE}
140 {$setc TARGET_OS_IPHONE := FALSE}
141 {$setc TARGET_IPHONE_SIMULATOR := FALSE}
142 {$endc}
143 {$setc TARGET_OS_EMBEDDED := FALSE}
144 {$elifc defined __arm__ and __arm__}
145 {$setc TARGET_CPU_PPC := FALSE}
146 {$setc TARGET_CPU_PPC64 := FALSE}
147 {$setc TARGET_CPU_X86 := FALSE}
148 {$setc TARGET_CPU_X86_64 := FALSE}
149 {$setc TARGET_CPU_ARM := TRUE}
150 {$setc TARGET_CPU_ARM64 := FALSE}
151 {$setc TARGET_OS_MAC := FALSE}
152 {$setc TARGET_OS_IPHONE := TRUE}
153 {$setc TARGET_IPHONE_SIMULATOR := FALSE}
154 {$setc TARGET_OS_EMBEDDED := TRUE}
155 {$elifc defined __arm64__ and __arm64__}
156 {$setc TARGET_CPU_PPC := FALSE}
157 {$setc TARGET_CPU_PPC64 := FALSE}
158 {$setc TARGET_CPU_X86 := FALSE}
159 {$setc TARGET_CPU_X86_64 := FALSE}
160 {$setc TARGET_CPU_ARM := FALSE}
161 {$setc TARGET_CPU_ARM64 := TRUE}
162 {$ifc defined ios}
163 {$setc TARGET_OS_MAC := FALSE}
164 {$setc TARGET_OS_IPHONE := TRUE}
165 {$setc TARGET_OS_EMBEDDED := TRUE}
166 {$elsec}
167 {$setc TARGET_OS_MAC := TRUE}
168 {$setc TARGET_OS_IPHONE := FALSE}
169 {$setc TARGET_OS_EMBEDDED := FALSE}
170 {$endc}
171 {$setc TARGET_IPHONE_SIMULATOR := FALSE}
172 {$elsec}
173 {$error __ppc__ nor __ppc64__ nor __i386__ nor __x86_64__ nor __arm__ nor __arm64__ is defined.}
174 {$endc}
175
176 {$ifc defined __LP64__ and __LP64__ }
177 {$setc TARGET_CPU_64 := TRUE}
178 {$elsec}
179 {$setc TARGET_CPU_64 := FALSE}
180 {$endc}
181
182 {$ifc defined FPC_BIG_ENDIAN}
183 {$setc TARGET_RT_BIG_ENDIAN := TRUE}
184 {$setc TARGET_RT_LITTLE_ENDIAN := FALSE}
185 {$elifc defined FPC_LITTLE_ENDIAN}
186 {$setc TARGET_RT_BIG_ENDIAN := FALSE}
187 {$setc TARGET_RT_LITTLE_ENDIAN := TRUE}
188 {$elsec}
189 {$error Neither FPC_BIG_ENDIAN nor FPC_LITTLE_ENDIAN are defined.}
190 {$endc}
191 {$setc ACCESSOR_CALLS_ARE_FUNCTIONS := TRUE}
192 {$setc CALL_NOT_IN_CARBON := FALSE}
193 {$setc OLDROUTINENAMES := FALSE}
194 {$setc OPAQUE_TOOLBOX_STRUCTS := TRUE}
195 {$setc OPAQUE_UPP_TYPES := TRUE}
196 {$setc OTCARBONAPPLICATION := TRUE}
197 {$setc OTKERNEL := FALSE}
198 {$setc PM_USE_SESSION_APIS := TRUE}
199 {$setc TARGET_API_MAC_CARBON := TRUE}
200 {$setc TARGET_API_MAC_OS8 := FALSE}
201 {$setc TARGET_API_MAC_OSX := TRUE}
202 {$setc TARGET_CARBON := TRUE}
203 {$setc TARGET_CPU_68K := FALSE}
204 {$setc TARGET_CPU_MIPS := FALSE}
205 {$setc TARGET_CPU_SPARC := FALSE}
206 {$setc TARGET_OS_UNIX := FALSE}
207 {$setc TARGET_OS_WIN32 := FALSE}
208 {$setc TARGET_RT_MAC_68881 := FALSE}
209 {$setc TARGET_RT_MAC_CFM := FALSE}
210 {$setc TARGET_RT_MAC_MACHO := TRUE}
211 {$setc TYPED_FUNCTION_POINTERS := TRUE}
212 {$setc TYPE_BOOL := FALSE}
213 {$setc TYPE_EXTENDED := FALSE}
214 {$setc TYPE_LONGLONG := TRUE}
215 uses MacTypes;
216 {$endc} {not MACOSALLINCLUDE}
217
218
219 {$ifc TARGET_OS_MAC}
220
221 {$ALIGN MAC68K}
222
223 {
224 * Generic Text Alignment Constants
225 *
226 * Summary:
227 * These constants are implemented to supplant the old TextEdit
228 * Manager constants ( teFlushDefault, teCenter teFlushRight,
229 * teFlushLeft ) These constants are used outside the context of the
230 * legacy TextEdit Manager Framework. Use these as you would use the
231 * old TextEdit.h constants to specify how text should be justified
232 * (word aligned.) The new constants use the same values as the the
233 * old TextEdit ones, for backwards compatibility.
234 }
235 const
236 {
237 * Flush according to the line direction
238 }
239 kTextFlushDefault = 0;
240
241 {
242 * Center justify (word alignment)
243 }
244 kTextCenter = 1;
245
246 {
247 * Flush right
248 }
249 kTextFlushRight = -1;
250
251 {
252 * Flush left
253 }
254 kTextFlushLeft = -2;
255
256 { TextEncodingBase type & values }
257 { (values 0-32 correspond to the Script Codes defined in Inside Macintosh: Text pages 6-52 and 6-53 }
258 type
259 TextEncodingBase = UInt32;
260 const
261 { Mac OS encodings}
262 kTextEncodingMacRoman = 0;
263 kTextEncodingMacJapanese = 1;
264 kTextEncodingMacChineseTrad = 2;
265 kTextEncodingMacKorean = 3;
266 kTextEncodingMacArabic = 4;
267 kTextEncodingMacHebrew = 5;
268 kTextEncodingMacGreek = 6;
269 kTextEncodingMacCyrillic = 7;
270 kTextEncodingMacDevanagari = 9;
271 kTextEncodingMacGurmukhi = 10;
272 kTextEncodingMacGujarati = 11;
273 kTextEncodingMacOriya = 12;
274 kTextEncodingMacBengali = 13;
275 kTextEncodingMacTamil = 14;
276 kTextEncodingMacTelugu = 15;
277 kTextEncodingMacKannada = 16;
278 kTextEncodingMacMalayalam = 17;
279 kTextEncodingMacSinhalese = 18;
280 kTextEncodingMacBurmese = 19;
281 kTextEncodingMacKhmer = 20;
282 kTextEncodingMacThai = 21;
283 kTextEncodingMacLaotian = 22;
284 kTextEncodingMacGeorgian = 23;
285 kTextEncodingMacArmenian = 24;
286 kTextEncodingMacChineseSimp = 25;
287 kTextEncodingMacTibetan = 26;
288 kTextEncodingMacMongolian = 27;
289 kTextEncodingMacEthiopic = 28;
290 kTextEncodingMacCentralEurRoman = 29;
291 kTextEncodingMacVietnamese = 30;
292 kTextEncodingMacExtArabic = 31; { The following use script code 0, smRoman}
293 kTextEncodingMacSymbol = 33;
294 kTextEncodingMacDingbats = 34;
295 kTextEncodingMacTurkish = 35;
296 kTextEncodingMacCroatian = 36;
297 kTextEncodingMacIcelandic = 37;
298 kTextEncodingMacRomanian = 38;
299 kTextEncodingMacCeltic = 39;
300 kTextEncodingMacGaelic = 40;
301 kTextEncodingMacKeyboardGlyphs = 41;
302
303 { The following are older names for backward compatibility}
304 const
305 kTextEncodingMacTradChinese = kTextEncodingMacChineseTrad;
306 kTextEncodingMacRSymbol = 8;
307 kTextEncodingMacSimpChinese = kTextEncodingMacChineseSimp;
308 kTextEncodingMacGeez = kTextEncodingMacEthiopic;
309 kTextEncodingMacEastEurRoman = kTextEncodingMacCentralEurRoman;
310 kTextEncodingMacUninterp = 32;
311
312
313 {
314 Beginning in Mac OS 8.5, the following meta-value is used to indicate Unicode in some parts
315 of the Mac OS which previously only expected a Mac OS script code. In some of these places,
316 only 7 bits are available to indicate encoding (script code), so kTextEncodingUnicodeDefault
317 cannot be used. For example, kTextEncodingMacUnicode can be used to indicate Unicode in the
318 7-bit script code field of a Unicode input method's ComponentDescription.componentFlags field;
319 it can also be used to indicate Unicode in the 16-bit script code field of an AppleEvent's
320 typeIntlWritingCode text tag.
321 }
322 const
323 kTextEncodingMacUnicode = $7E; { Meta-value, Unicode as a Mac encoding}
324
325 { Variant Mac OS encodings that use script codes other than 0}
326 const
327 { The following use script code 4, smArabic}
328 kTextEncodingMacFarsi = $8C; { Like MacArabic but uses Farsi digits}
329 { The following use script code 7, smCyrillic}
330 kTextEncodingMacUkrainian = $98; { Meta-value in TEC 1.5 & later; maps to kTextEncodingMacCyrillic variant }
331 { The following use script code 28, smEthiopic}
332 kTextEncodingMacInuit = $EC; { The following use script code 32, smUnimplemented}
333 kTextEncodingMacVT100 = $FC; { VT100/102 font from Comm Toolbox: Latin-1 repertoire + box drawing etc}
334
335 { Special Mac OS encodings}
336 const
337 kTextEncodingMacHFS = $FF; { Meta-value, should never appear in a table.}
338
339 { Unicode & ISO UCS encodings begin at 0x100}
340 const
341 kTextEncodingUnicodeDefault = $0100; { Meta-value, should never appear in a table.}
342 kTextEncodingUnicodeV1_1 = $0101;
343 kTextEncodingISO10646_1993 = $0101; { Code points identical to Unicode 1.1}
344 kTextEncodingUnicodeV2_0 = $0103; { New location for Korean Hangul}
345 kTextEncodingUnicodeV2_1 = $0103; { We treat both Unicode 2.0 and Unicode 2.1 as 2.1}
346 kTextEncodingUnicodeV3_0 = $0104;
347 kTextEncodingUnicodeV3_1 = $0105; { Adds characters requiring surrogate pairs in UTF-16}
348 kTextEncodingUnicodeV3_2 = $0106;
349 kTextEncodingUnicodeV4_0 = $0108;
350 kTextEncodingUnicodeV5_0 = $010A;
351 kTextEncodingUnicodeV5_1 = $010B; { No constant for Unicode 5.2, but leave an opening.}
352 kTextEncodingUnicodeV6_0 = $010D; { Adds many symbols, including emoji support.}
353 kTextEncodingUnicodeV6_1 = $010E; { Adds emoji variation sequences, properties changes.}
354 kTextEncodingUnicodeV6_3 = $0110; { Adds new bidi controls.}
355 kTextEncodingUnicodeV7_0 = $0111; { Adds RUBLE SIGN, symbols from Wingdings/Webdings.}
356 kTextEncodingUnicodeV8_0 = $0112; { Adds LARI SIGN, lowercase Cherokee, emoji modifiers, CJK Ext E, 6 scripts.}
357 kTextEncodingUnicodeV9_0 = $0113; { Adds Tangut and 5 other scripts, 72 emoji.}
358
359 { ISO 8-bit and 7-bit encodings begin at 0x200}
360 const
361 kTextEncodingISOLatin1 = $0201; { ISO 8859-1, Western European}
362 kTextEncodingISOLatin2 = $0202; { ISO 8859-2, Central European}
363 kTextEncodingISOLatin3 = $0203; { ISO 8859-3, South European (Maltese...)}
364 kTextEncodingISOLatin4 = $0204; { ISO 8859-4, North European & some Baltic}
365 kTextEncodingISOLatinCyrillic = $0205; { ISO 8859-5}
366 kTextEncodingISOLatinArabic = $0206; { ISO 8859-6, = ASMO 708, =DOS CP 708}
367 kTextEncodingISOLatinGreek = $0207; { ISO 8859-7}
368 kTextEncodingISOLatinHebrew = $0208; { ISO 8859-8}
369 kTextEncodingISOLatin5 = $0209; { ISO 8859-9, Turkish}
370 kTextEncodingISOLatin6 = $020A; { ISO 8859-10, Nordic }
371 kTextEncodingISOLatin7 = $020D; { ISO 8859-13, Baltic Rim }
372 kTextEncodingISOLatin8 = $020E; { ISO 8859-14, Celtic }
373 kTextEncodingISOLatin9 = $020F; { ISO 8859-15, 8859-1 changed for EURO & CP1252 letters }
374 kTextEncodingISOLatin10 = $0210; { ISO 8859-16, Romanian}
375
376 { MS-DOS & Windows encodings begin at 0x400}
377 const
378 kTextEncodingDOSLatinUS = $0400; { code page 437}
379 kTextEncodingDOSGreek = $0405; { code page 737 (formerly code page 437G)}
380 kTextEncodingDOSBalticRim = $0406; { code page 775}
381 kTextEncodingDOSLatin1 = $0410; { code page 850, "Multilingual"}
382 kTextEncodingDOSGreek1 = $0411; { code page 851}
383 kTextEncodingDOSLatin2 = $0412; { code page 852, Slavic}
384 kTextEncodingDOSCyrillic = $0413; { code page 855, IBM Cyrillic}
385 kTextEncodingDOSTurkish = $0414; { code page 857, IBM Turkish}
386 kTextEncodingDOSPortuguese = $0415; { code page 860}
387 kTextEncodingDOSIcelandic = $0416; { code page 861}
388 kTextEncodingDOSHebrew = $0417; { code page 862}
389 kTextEncodingDOSCanadianFrench = $0418; { code page 863}
390 kTextEncodingDOSArabic = $0419; { code page 864}
391 kTextEncodingDOSNordic = $041A; { code page 865}
392 kTextEncodingDOSRussian = $041B; { code page 866}
393 kTextEncodingDOSGreek2 = $041C; { code page 869, IBM Modern Greek}
394 kTextEncodingDOSThai = $041D; { code page 874, also for Windows}
395 kTextEncodingDOSJapanese = $0420; { code page 932, also for Windows; Shift-JIS with additions}
396 kTextEncodingDOSChineseSimplif = $0421; { code page 936, also for Windows; was EUC-CN, now GBK (EUC-CN extended)}
397 kTextEncodingDOSKorean = $0422; { code page 949, also for Windows; Unified Hangul Code (EUC-KR extended)}
398 kTextEncodingDOSChineseTrad = $0423; { code page 950, also for Windows; Big-5}
399 kTextEncodingWindowsLatin1 = $0500; { code page 1252}
400 kTextEncodingWindowsANSI = $0500; { code page 1252 (alternate name)}
401 kTextEncodingWindowsLatin2 = $0501; { code page 1250, Central Europe}
402 kTextEncodingWindowsCyrillic = $0502; { code page 1251, Slavic Cyrillic}
403 kTextEncodingWindowsGreek = $0503; { code page 1253}
404 kTextEncodingWindowsLatin5 = $0504; { code page 1254, Turkish}
405 kTextEncodingWindowsHebrew = $0505; { code page 1255}
406 kTextEncodingWindowsArabic = $0506; { code page 1256}
407 kTextEncodingWindowsBalticRim = $0507; { code page 1257}
408 kTextEncodingWindowsVietnamese = $0508; { code page 1258}
409 kTextEncodingWindowsKoreanJohab = $0510; { code page 1361, for Windows NT}
410
411 { Various national standards begin at 0x600}
412 const
413 kTextEncodingUS_ASCII = $0600;
414 kTextEncodingANSEL = $0601; { ANSEL (ANSI Z39.47) for library use}
415 kTextEncodingJIS_X0201_76 = $0620; { JIS Roman and 1-byte katakana (halfwidth)}
416 kTextEncodingJIS_X0208_83 = $0621;
417 kTextEncodingJIS_X0208_90 = $0622;
418 kTextEncodingJIS_X0212_90 = $0623;
419 kTextEncodingJIS_C6226_78 = $0624;
420 kTextEncodingShiftJIS_X0213 = $0628; { Shift-JIS format encoding of JIS X0213 planes 1 and 2}
421 kTextEncodingJIS_X0213_MenKuTen = $0629; { JIS X0213 in plane-row-column notation (3 bytes)}
422 kTextEncodingGB_2312_80 = $0630;
423 kTextEncodingGBK_95 = $0631; { annex to GB 13000-93; for Windows 95; EUC-CN extended}
424 kTextEncodingGB_18030_2000 = $0632; { This is actually implemented as GB_18030_2005}
425 kTextEncodingGB_18030_2005 = $0632;
426 kTextEncodingKSC_5601_87 = $0640; { same as KSC 5601-92 without Johab annex}
427 kTextEncodingKSC_5601_92_Johab = $0641; { KSC 5601-92 Johab annex}
428 kTextEncodingCNS_11643_92_P1 = $0651; { CNS 11643-1992 plane 1}
429 kTextEncodingCNS_11643_92_P2 = $0652; { CNS 11643-1992 plane 2}
430 kTextEncodingCNS_11643_92_P3 = $0653; { CNS 11643-1992 plane 3 (was plane 14 in 1986 version)}
431
432 { ISO 2022 collections begin at 0x800}
433 const
434 kTextEncodingISO_2022_JP = $0820; { RFC 1468}
435 kTextEncodingISO_2022_JP_2 = $0821; { RFC 1554}
436 kTextEncodingISO_2022_JP_1 = $0822; { RFC 2237}
437 kTextEncodingISO_2022_JP_3 = $0823; { JIS X0213}
438 kTextEncodingISO_2022_CN = $0830; { RFC 1922}
439 kTextEncodingISO_2022_CN_EXT = $0831; { RFC 1922}
440 kTextEncodingISO_2022_KR = $0840; { RFC 1557}
441
442 { EUC collections begin at 0x900}
443 const
444 kTextEncodingEUC_JP = $0920; { ISO 646, 1-byte katakana, JIS 208, JIS 212}
445 kTextEncodingEUC_CN = $0930; { ISO 646, GB 2312-80}
446 kTextEncodingEUC_TW = $0931; { ISO 646, CNS 11643-1992 Planes 1-16}
447 kTextEncodingEUC_KR = $0940; { RFC 1557: ISO 646, KS C 5601-1987}
448
449 { Misc standards begin at 0xA00}
450 const
451 kTextEncodingShiftJIS = $0A01; { plain Shift-JIS}
452 kTextEncodingKOI8_R = $0A02; { RFC 1489, Russian internet standard}
453 kTextEncodingBig5 = $0A03; { Big-5 (has variants)}
454 kTextEncodingMacRomanLatin1 = $0A04; { Mac OS Roman permuted to align with ISO Latin-1}
455 kTextEncodingHZ_GB_2312 = $0A05; { HZ (RFC 1842, for Chinese mail & news)}
456 kTextEncodingBig5_HKSCS_1999 = $0A06; { Big-5 with Hong Kong special char set supplement}
457 kTextEncodingVISCII = $0A07; { RFC 1456, Vietnamese}
458 kTextEncodingKOI8_U = $0A08; { RFC 2319, Ukrainian}
459 kTextEncodingBig5_E = $0A09; { Taiwan Big-5E standard}
460
461 { Other platform encodings}
462 const
463 kTextEncodingNextStepLatin = $0B01; { NextStep Latin encoding}
464 kTextEncodingNextStepJapanese = $0B02; { NextStep Japanese encoding (variant of EUC-JP)}
465
466 { EBCDIC & IBM host encodings begin at 0xC00}
467 const
468 kTextEncodingEBCDIC_LatinCore = $0C01; { Common base subset of EBCDIC Latin encodings}
469 kTextEncodingEBCDIC_CP037 = $0C02; { code page 037, extended EBCDIC (Latin-1 set) for US,Canada...}
470
471 { Special values}
472 const
473 kTextEncodingMultiRun = $0FFF; { Multi-encoding text with external run info}
474 kTextEncodingUnknown = $FFFF; { Unknown or unspecified }
475
476 { The following are older names for backward compatibility}
477 const
478 kTextEncodingEBCDIC_US = $0C01;
479
480
481 { TextEncodingVariant type & values }
482 type
483 TextEncodingVariant = UInt32;
484 { Default TextEncodingVariant, for any TextEncodingBase}
485 const
486 kTextEncodingDefaultVariant = 0;
487
488 { Variants of kTextEncodingMacRoman }
489 const
490 kMacRomanDefaultVariant = 0; { meta value, maps to 1 or 2 depending on System }
491 kMacRomanCurrencySignVariant = 1; { Mac OS version < 8.5, 0xDB is CURRENCY SIGN}
492 kMacRomanEuroSignVariant = 2; { Mac OS version >= 8.5, 0xDB is EURO SIGN }
493
494 { Variants of kTextEncodingMacCyrillic (for TEC 1.5 and later) }
495 const
496 kMacCyrillicDefaultVariant = 0; { meta value, maps to 1, 2, or 3 depending on System}
497 kMacCyrillicCurrSignStdVariant = 1; { Mac OS < 9.0 (RU,BG), 0xFF = CURRENCY SIGN, 0xA2/0xB6 = CENT / PARTIAL DIFF.}
498 kMacCyrillicCurrSignUkrVariant = 2; { Mac OS < 9.0 (UA,LangKit), 0xFF = CURRENCY SIGN, 0xA2/0xB6 = GHE WITH UPTURN}
499 kMacCyrillicEuroSignVariant = 3; { Mac OS >= 9.0, 0xFF is EURO SIGN, 0xA2/0xB6 = GHE WITH UPTURN}
500
501 { Variants of kTextEncodingMacIcelandic }
502 const
503 kMacIcelandicStdDefaultVariant = 0; { meta value, maps to 2 or 4 depending on System }
504 kMacIcelandicTTDefaultVariant = 1; { meta value, maps to 3 or 5 depending on System }
505 { The following are for Mac OS version < 8.5, 0xDB is CURRENCY SIGN }
506 kMacIcelandicStdCurrSignVariant = 2; { 0xBB/0xBC are fem./masc. ordinal indicators}
507 kMacIcelandicTTCurrSignVariant = 3; { 0xBB/0xBC are fi/fl ligatures}
508 { The following are for Mac OS version >= 8.5, 0xDB is EURO SIGN }
509 kMacIcelandicStdEuroSignVariant = 4; { 0xBB/0xBC are fem./masc. ordinal indicators}
510 kMacIcelandicTTEuroSignVariant = 5; { 0xBB/0xBC are fi/fl ligatures}
511
512 { Variants of kTextEncodingMacCroatian }
513 const
514 kMacCroatianDefaultVariant = 0; { meta value, maps to 1 or 2 depending on System }
515 kMacCroatianCurrencySignVariant = 1; { Mac OS version < 8.5, 0xDB is CURRENCY SIGN }
516 kMacCroatianEuroSignVariant = 2; { Mac OS version >= 8.5, 0xDB is EURO SIGN }
517
518
519 { Variants of kTextEncodingMacRomanian }
520 const
521 kMacRomanianDefaultVariant = 0; { meta value, maps to 1 or 2 depending on System }
522 kMacRomanianCurrencySignVariant = 1; { Mac OS version < 8.5, 0xDB is CURRENCY SIGN }
523 kMacRomanianEuroSignVariant = 2; { Mac OS version >= 8.5, 0xDB is EURO SIGN }
524
525
526 { Variants of kTextEncodingMacJapanese}
527 const
528 kMacJapaneseStandardVariant = 0;
529 kMacJapaneseStdNoVerticalsVariant = 1;
530 kMacJapaneseBasicVariant = 2;
531 kMacJapanesePostScriptScrnVariant = 3;
532 kMacJapanesePostScriptPrintVariant = 4;
533 kMacJapaneseVertAtKuPlusTenVariant = 5;
534
535 { Variants of kTextEncodingMacArabic}
536 const
537 kMacArabicStandardVariant = 0; { 0xC0 is 8-spoke asterisk, 0x2A & 0xAA are asterisk (e.g. Cairo)}
538 kMacArabicTrueTypeVariant = 1; { 0xC0 is asterisk, 0x2A & 0xAA are multiply signs (e.g. Baghdad)}
539 kMacArabicThuluthVariant = 2; { 0xC0 is Arabic five-point star, 0x2A & 0xAA are multiply signs}
540 kMacArabicAlBayanVariant = 3; { 8-spoke asterisk, multiply sign, Koranic ligatures & parens}
541
542 { Variants of kTextEncodingMacFarsi}
543 const
544 kMacFarsiStandardVariant = 0; { 0xC0 is 8-spoke asterisk, 0x2A & 0xAA are asterisk (e.g. Tehran)}
545 kMacFarsiTrueTypeVariant = 1; { asterisk, multiply signs, Koranic ligatures, geometric shapes}
546
547 { Variants of kTextEncodingMacHebrew}
548 const
549 kMacHebrewStandardVariant = 0;
550 kMacHebrewFigureSpaceVariant = 1;
551
552 { Variants of kTextEncodingMacGreek}
553 const
554 kMacGreekDefaultVariant = 0; { meta value, maps to 1 or 2 depending on System}
555 kMacGreekNoEuroSignVariant = 1; { Mac OS version < 9.2.2, 0x9C is SOFT HYPHEN, 0xFF is undefined}
556 kMacGreekEuroSignVariant = 2; { Mac OS version >= 9.2.2, 0x9C is EURO SIGN, 0xFF is SOFT HYPHEN}
557
558 { Variants of kTextEncodingMacVT100 }
559 const
560 kMacVT100DefaultVariant = 0; { meta value, maps to 1 or 2 depending on System }
561 kMacVT100CurrencySignVariant = 1; { Mac OS version < 8.5, 0xDB is CURRENCY SIGN }
562 kMacVT100EuroSignVariant = 2; { Mac OS version >= 8.5, 0xDB is EURO SIGN }
563
564 { Variants of Unicode & ISO 10646 encodings}
565 const
566 kUnicodeNoSubset = 0;
567 kUnicodeNormalizationFormD = 5; { canonical decomposition (NFD); excludes composed chars}
568 kUnicodeNormalizationFormC = 3; { canonical composition (NFC); uses the composed chars as of Unicode 3.1}
569 kUnicodeHFSPlusDecompVariant = 8; { decomposition for HFS+; doesn't decompose in 2000-2FFF, F900-FAFF, 2F800-2FAFF}
570 kUnicodeHFSPlusCompVariant = 9; { composition based on HFS+ decomposition}
571
572 { Variants of kTextEncodingISOLatin1}
573 const
574 kISOLatin1StandardVariant = 0;
575 kISOLatin1MusicCDVariant = 1;
576
577 {
578 Variants of kTextEncodingISOLatinArabic, kTextEncodingISOLatinHebrew.
579 Per RFC 1556 and ECMA TR/53, there are three ways of handling bidirectional text
580 in the ISO character sets 8859-6 (Arabic) and 8859-8 (Hebrew).
581 1. Implicit or Logical order is "a presentation method in which the direction is
582 determined by an algorithm according to the type of characters and their position
583 relative to the adjacent characters and according to their primary direction." This
584 is the method normally used for Unicode and for the Mac OS and Windows Arabic and
585 Hebrew encodings.
586 2. Visual order assumes the text is already ordered such that it can be displayed
587 in a left-to-right display direction with no further directional processing. This
588 is equivalent to treating all characters as having strong left-right directionality.
589 This is the default assumed for internet Hebrew text encoded in ISO 8859-8, unless
590 the charset label suffix specifically indicates implicit (-i) or explicit (-e)
591 ordering.
592 3. Explicit order is "a presentation method in which the direction is explicitly
593 defined by using control sequences which are interleaved within the text and are
594 used for direction determination."
595 }
596 const
597 kISOLatinArabicImplicitOrderVariant = 0;
598 kISOLatinArabicVisualOrderVariant = 1;
599 kISOLatinArabicExplicitOrderVariant = 2;
600
601 const
602 kISOLatinHebrewImplicitOrderVariant = 0;
603 kISOLatinHebrewVisualOrderVariant = 1;
604 kISOLatinHebrewExplicitOrderVariant = 2;
605
606 { Variants of kTextEncodingWindowsLatin1}
607 const
608 kWindowsLatin1StandardVariant = 0;
609 kWindowsLatin1PalmVariant = 1; { PalmSource variant of cp1252}
610
611 { Variants of kTextEncodingDOSJapanese}
612 const
613 kDOSJapaneseStandardVariant = 0;
614 kDOSJapanesePalmVariant = 1; { PalmSource variant of cp932}
615
616 {
617 Variants of EUC_CN
618 The DOSVariant is like kTextEncodingDOSChineseSimplif, but with the
619 basic EUC_CN part mapped as per kTextEncodingEUC_CN.
620 }
621 const
622 kEUC_CN_BasicVariant = 0;
623 kEUC_CN_DOSVariant = 1;
624
625 {
626 Variants of EUC_KR
627 The DOSVariant is like kTextEncodingDOSKorean, but with the
628 basic EUC_KR part mapped as per kTextEncodingEUC_KR.
629 }
630 const
631 kEUC_KR_BasicVariant = 0;
632 kEUC_KR_DOSVariant = 1;
633
634 {
635 Variants of ShiftJIS
636 The DOSVariant is like kTextEncodingDOSJapanese, but with the
637 basic ShiftJIS part mapped as per kTextEncodingShiftJIS.
638 }
639 const
640 kShiftJIS_BasicVariant = 0;
641 kShiftJIS_DOSVariant = 1;
642 kShiftJIS_MusicCDVariant = 2; { MusicShiftJIS, per RIS-506 (RIAJ)}
643
644 {
645 Variants of Big-5 encoding
646 The DOSVariant is like kTextEncodingDOSChineseTrad, but with the
647 basic Big5 part mapped as per kTextEncodingBig5.
648 }
649 const
650 kBig5_BasicVariant = 0;
651 kBig5_StandardVariant = 1; { 0xC6A1-0xC7FC: kana, Cyrillic, enclosed numerics}
652 kBig5_ETenVariant = 2; { adds kana, Cyrillic, radicals, etc with hi bytes C6-C8,F9}
653 kBig5_DOSVariant = 3;
654
655 { Variants of MacRomanLatin1 }
656 const
657 kMacRomanLatin1DefaultVariant = 0; { meta value, maps to others depending on System}
658 kMacRomanLatin1StandardVariant = 2; { permuted MacRoman, EuroSignVariant}
659 kMacRomanLatin1TurkishVariant = 6; { permuted MacTurkish}
660 kMacRomanLatin1CroatianVariant = 8; { permuted MacCroatian, EuroSignVariant}
661 kMacRomanLatin1IcelandicVariant = 11; { permuted MacIcelandic, StdEuroSignVariant}
662 kMacRomanLatin1RomanianVariant = 14; { permuted MacRomanian, EuroSignVariant}
663
664 { Unicode variants not yet supported (and not fully defined)}
665 const
666 kUnicodeNoCompatibilityVariant = 1;
667 kUnicodeNoCorporateVariant = 4;
668
669 { The following are older names for backward compatibility}
670 const
671 kMacRomanStandardVariant = 0;
672 kMacIcelandicStandardVariant = 0;
673 kMacIcelandicTrueTypeVariant = 1;
674 kJapaneseStandardVariant = 0;
675 kJapaneseStdNoVerticalsVariant = 1;
676 kJapaneseBasicVariant = 2;
677 kJapanesePostScriptScrnVariant = 3;
678 kJapanesePostScriptPrintVariant = 4;
679 kJapaneseVertAtKuPlusTenVariant = 5;
680 kTextEncodingShiftJIS_X0213_00 = $0628; { Shift-JIS format encoding of JIS X0213 planes 1 and 2}
681 { kJapaneseStdNoOneByteKanaVariant = 6, // replaced by kJapaneseNoOneByteKanaOption}
682 { kJapaneseBasicNoOneByteKanaVariant = 7, // replaced by kJapaneseNoOneByteKanaOption }
683 kHebrewStandardVariant = 0;
684 kHebrewFigureSpaceVariant = 1; { Old Unicode variants. Variant 2 (kUnicodeCanonicalDecompVariant, kUnicodeMaxDecomposedVariant) is ambiguous and means}
685 { different things in different contexts. When normalizing (using ConvertFromUnicodeToText to convert from arbitrary}
686 { Unicode to a normalized form), Unicode variant 2 means the same thing as kUnicodeNormalizationFormD (i.e. NFD).}
687 { However, when converting between Unicode and traditional Mac OS encodings, Unicode variant 2 means the same thing as}
688 { kUnicodeHFSPlusDecompVariant (i.e. the special HFS decomposition which excludes some character ranges from normalization).}
689 { For clarity, please use the less ambiguous constants: kUnicodeNormalizationFormD = 5, kUnicodeHFSPlusDecompVariant = 8.}
690 { }
691 kUnicodeCanonicalDecompVariant = 2; { use kUnicodeNormalizationFormD or kUnicodeHFSPlusDecompVariant}
692 kUnicodeMaxDecomposedVariant = 2; { use kUnicodeNormalizationFormD or kUnicodeHFSPlusDecompVariant}
693 kUnicodeCanonicalCompVariant = 3; { replaced by kUnicodeNormalizationFormC}
694 kUnicodeNoComposedVariant = 3; { this really meant NoComposing; replaced by kUnicodeNormalizationFormC}
695
696 { TextEncodingFormat type & values }
697 type
698 TextEncodingFormat = UInt32;
699 const
700 { Default TextEncodingFormat for any TextEncodingBase}
701 kTextEncodingDefaultFormat = 0; { Formats for Unicode & ISO 10646}
702 kUnicodeUTF16Format = 0; { UTF16 form (16-bit units), native or external byte order (see below)}
703 kUnicodeUTF7Format = 1; { UTF7 form}
704 kUnicodeUTF8Format = 2; { UTF8 form}
705 kUnicodeUTF32Format = 3; { UTF32 form (32-bit units), native or external byte order (see below)}
706 kUnicodeUTF16BEFormat = 4; { UTF16 form, explicit big-endian byte order, no BOM}
707 kUnicodeUTF16LEFormat = 5; { UTF16 form, explicit little-endian byte order, no BOM}
708 kUnicodeUTF32BEFormat = 6; { UTF32 form, explicit big-endian byte order, no BOM}
709 kUnicodeUTF32LEFormat = 7; { UTF32 form, explicit little-endian byte order, no BOM}
710 kUnicodeSCSUFormat = 8; { Std. Compression Scheme for Unicode, Unicode Tech Std. #6}
711 { Note for kUnicodeUTF16Format and kUnicodeUTF32Format:}
712 { - An array of UTF16Char (UniChar) or UTF32Char is normally understood to use "internal" or}
713 { platform-native byte ordering for kUnicodeUTF16Format and kUnicodeUTF32Format; the array MAY}
714 { begin with byte-order mark (BOM), but the BOM should match the internal ordering.}
715 { - If an array of bytes (such as char *) that can be in various encodings is specified to be}
716 { in Unicode with kUnicodeUTF16Format or kUnicodeUTF32Format (not explicitly BE or LE), then it}
717 { is assumed to use "external" byte ordering, which means: If there is a BOM at the beginning}
718 { of text, the BOM specifies the byte ordering, otherwise big-endian is assumed.}
719 { Synonyms for some Unicode formats}
720 kUnicode16BitFormat = 0;
721 kUnicode32BitFormat = 3;
722
723 { TextEncoding type }
724 type
725 TextEncoding = UInt32;
726 TextEncoding_fix = TextEncoding; { used as field type when a record declaration contains a TextEncoding field identifier }
727 TextEncodingPtr = ^TextEncoding; { when a VAR xx: TextEncoding parameter can be nil, it is changed to xx: TextEncodingPtr }
728 { name part selector for GetTextEncodingName}
729 type
730 TextEncodingNameSelector = UInt32;
731 const
732 kTextEncodingFullName = 0;
733 kTextEncodingBaseName = 1;
734 kTextEncodingVariantName = 2;
735 kTextEncodingFormatName = 3;
736
737 { Types used in conversion }
738 type
739 TextEncodingRun = record
740 offset: ByteOffset;
741 textEncoding: TextEncoding_fix;
742 end;
743 TextEncodingRunPtr = ^TextEncodingRun;
744 type
745 ConstTextEncodingRunPtr = {const} TextEncodingRunPtr;
746 ScriptCodeRun = record
747 offset: ByteOffset;
748 script: ScriptCode;
749 end;
750 ScriptCodeRunPtr = ^ScriptCodeRun;
751 type
752 ConstScriptCodeRunPtr = {const} ScriptCodeRunPtr;
753 TextPtr = UInt8Ptr;
754 ConstTextPtr = {const} UInt8Ptr;
755 { Basic types for Unicode characters and strings:}
756 type
757 UniCharArrayPtr = UniCharPtr;
758 ConstUniCharArrayPtr = {const} UniCharPtr;
759 {
760 UniCharArrayHandle is a handle type to correspond to UniCharArrayPtr,
761 i.e. a handle to an array of UniChars (UInt16s).
762 }
763 type
764 UniCharArrayHandle = ^UniCharArrayPtr;
765 {
766 UniCharArrayOffset is used to indicate an edge offset in an array
767 of UniChars (UInt16s).
768 }
769 type
770 UniCharArrayOffset = UNSIGNEDLONG;
771 UniCharArrayOffsetPtr = ^UniCharArrayOffset;
772 { enums for TextEncoding Conversion routines}
773 const
774 kTextScriptDontCare = -128;
775 kTextLanguageDontCare = -128;
776 kTextRegionDontCare = -128;
777
778 { struct for TECGetInfo}
779
780 type
781 TECInfo = record
782 format: UInt16; { format code for this struct}
783 tecVersion: UInt16; { TEC version in BCD, e.g. 0x0121 for 1.2.1}
784 tecTextConverterFeatures: UInt32; { bitmask indicating TEC features/fixes}
785 tecUnicodeConverterFeatures: UInt32; { bitmask indicating UnicodeConverter features/fixes}
786 tecTextCommonFeatures: UInt32; { bitmask indicating TextCommon features/fixes}
787 tecTextEncodingsFolderName: Str31; { localized name of Text Encodings folder (pascal string)}
788 tecExtensionFileName: Str31; { localized name of TEC extension (pascal string)}
789 tecLowestTEFileVersion: UInt16; { Lowest version (BCD) of all files in Text Encodings folder}
790 tecHighestTEFileVersion: UInt16; { Highest version (BCD) of all files in Text Encodings folder}
791 end;
792 TECInfoPtr = ^TECInfo;
793 type
794 TECInfoHandle = ^TECInfoPtr;
795 { Value for TECInfo format code}
796 const
797 kTECInfoCurrentFormat = 2; { any future formats will just add fields at the end}
798
799 {
800 Defined feature/fix bits for tecUnicodeConverterFeatures field
801 Bit: Meaning if set:
802 ---- ---------------
803 kTECKeepInfoFixBit Unicode Converter no longer ignores other control flags if
804 kUnicodeKeepInfoBit is set. Bug fix in TEC Manager 1.2.1.
805 kTECFallbackTextLengthFixBit Unicode Converter honors the *srcConvLen and *destConvLen
806 returned by caller-supplied fallback handler for any status it
807 returns except for kTECUnmappableElementErr (previously it only
808 honored these values if noErr was returned). Bug fix in TEC
809 Manager 1.2.1.
810 kTECTextRunBitClearFixBit ConvertFromUnicodeToTextRun & ConvertFromUnicodeToScriptCodeRun
811 function correctly if the kUnicodeTextRunBit is set (previously
812 their determination of best target encoding was incorrect). Bug
813 fix in TEC Manager 1.3.
814 kTECTextToUnicodeScanFixBit ConvertFromTextToUnicode uses an improved scanner and maintains
815 some resulting state information, which it uses for mapping.
816 This has several effects:
817 - Improved mapping of 0x30-0x39 digits in Mac OS Arabic, fewer
818 direction overrides when mapping Mac OS Arabic & Hebrew, and
819 improved mapping of certain characters in Indic encodings.
820 - Malformed input produces kTextMalformedInputErr.
821 - ConvertFromTextToUnicode accepts and uses the control flags
822 kUnicodeKeepInfoMask and kUnicodeStringUnterminatedMask.
823 Bug fix and enhancement in TEC Manager 1.3.
824 kTECAddForceASCIIChangesBit Define new control flag bits kUnicodeForceASCIIRangeBit and
825 kUnicodeNoHalfwidthCharsBit for use with
826 ConvertFromTextToUnicode, ConvertFromUnicodeToText, etc.
827 Enhancement in TEC Manager 1.4.
828 kTECPreferredEncodingFixBit CreateUnicodeToTextRunInfo and related functions fix a problem
829 that occurred when a preferred encoding was specified that did
830 not match the System script; the preferred script was not
831 actually placed first in the ordered list of encodings to use.
832 Bug fix in TEC Manager 1.4.
833 kTECAddTextRunHeuristicsBit Define new control flag bit kUnicodeTextRunHeuristicsBit for
834 use with ConvertFromUnicodeToTextRun.
835 kTECAddFallbackInterruptBit Define new option kUnicodeFallbackInterruptSafeMask for use
836 with SetFallbackUnicodeToText. If a client fallback handler is
837 installed without specifying this bit, ConvertFromUnicodeToText
838 will HLock the tables it uses (in case the fallback handler
839 moves memory); otherwise, it won't.
840 }
841
842 const
843 kTECKeepInfoFixBit = 0;
844 kTECFallbackTextLengthFixBit = 1;
845 kTECTextRunBitClearFixBit = 2;
846 kTECTextToUnicodeScanFixBit = 3;
847 kTECAddForceASCIIChangesBit = 4;
848 kTECPreferredEncodingFixBit = 5;
849 kTECAddTextRunHeuristicsBit = 6;
850 kTECAddFallbackInterruptBit = 7;
851
852 const
853 kTECKeepInfoFixMask = 1 shl kTECKeepInfoFixBit;
854 kTECFallbackTextLengthFixMask = 1 shl kTECFallbackTextLengthFixBit;
855 kTECTextRunBitClearFixMask = 1 shl kTECTextRunBitClearFixBit;
856 kTECTextToUnicodeScanFixMask = 1 shl kTECTextToUnicodeScanFixBit;
857 kTECAddForceASCIIChangesMask = 1 shl kTECAddForceASCIIChangesBit;
858 kTECPreferredEncodingFixMask = 1 shl kTECPreferredEncodingFixBit;
859 kTECAddTextRunHeuristicsMask = 1 shl kTECAddTextRunHeuristicsBit;
860 kTECAddFallbackInterruptMask = 1 shl kTECAddFallbackInterruptBit;
861
862 {
863 -------------------------------------------------------------------------------------------------
864 CONSTANTS for common and special Unicode code values
865 -------------------------------------------------------------------------------------------------
866 }
867
868 const
869 kUnicodeByteOrderMark = $FEFF;
870 kUnicodeObjectReplacement = $FFFC; { placeholder for non-text object}
871 kUnicodeReplacementChar = $FFFD; { Unicode replacement for unconvertable input char}
872 kUnicodeSwappedByteOrderMark = $FFFE; { not a Unicode char; byte-swapped version of FEFF}
873 kUnicodeNotAChar = $FFFF; { not a Unicode char; may be used as a terminator}
874
875
876 {
877 -------------------------------------------------------------------------------------------------
878 CONSTANTS & DATA STRUCTURES for Unicode Properties
879 -------------------------------------------------------------------------------------------------
880 }
881
882 type
883 UCCharPropertyType = SInt32;
884 const
885 kUCCharPropTypeGenlCategory = 1; { requests enumeration value}
886 kUCCharPropTypeCombiningClass = 2; { requests numeric value 0..255}
887 kUCCharPropTypeBidiCategory = 3; { requests enumeration value}
888 kUCCharPropTypeDecimalDigitValue = 4; { requests numeric value 0..9 for decimal digit chars (get err for others)}
889
890 type
891 UCCharPropertyValue = UInt32;
892 { General Category enumeration values (requested by kUCCharPropTypeGenlCategory)}
893 const
894 { Normative categories:}
895 kUCGenlCatOtherNotAssigned = 0; { Cn Other, Not Assigned}
896 kUCGenlCatOtherControl = 1; { Cc Other, Control}
897 kUCGenlCatOtherFormat = 2; { Cf Other, Format}
898 kUCGenlCatOtherSurrogate = 3; { Cs Other, Surrogate}
899 kUCGenlCatOtherPrivateUse = 4; { Co Other, Private Use}
900 kUCGenlCatMarkNonSpacing = 5; { Mn Mark, Non-Spacing}
901 kUCGenlCatMarkSpacingCombining = 6; { Mc Mark, Spacing Combining}
902 kUCGenlCatMarkEnclosing = 7; { Me Mark, Enclosing}
903 kUCGenlCatNumberDecimalDigit = 8; { Nd Number, Decimal Digit}
904 kUCGenlCatNumberLetter = 9; { Nl Number, Letter}
905 kUCGenlCatNumberOther = 10; { No Number, Other}
906 kUCGenlCatSeparatorSpace = 11; { Zs Separator, Space}
907 kUCGenlCatSeparatorLine = 12; { Zl Separator, Line}
908 kUCGenlCatSeparatorParagraph = 13; { Zp Separator, Paragraph}
909 kUCGenlCatLetterUppercase = 14; { Lu Letter, Uppercase}
910 kUCGenlCatLetterLowercase = 15; { Ll Letter, Lowercase}
911 kUCGenlCatLetterTitlecase = 16; { Lt Letter, Titlecase}
912 { Informative categories:}
913 kUCGenlCatLetterModifier = 17; { Lm Letter, Modifier}
914 kUCGenlCatLetterOther = 18; { Lo Letter, Other}
915 kUCGenlCatPunctConnector = 20; { Pc Punctuation, Connector}
916 kUCGenlCatPunctDash = 21; { Pd Punctuation, Dash}
917 kUCGenlCatPunctOpen = 22; { Ps Punctuation, Open}
918 kUCGenlCatPunctClose = 23; { Pe Punctuation, Close}
919 kUCGenlCatPunctInitialQuote = 24; { Pi Punctuation, Initial quote}
920 kUCGenlCatPunctFinalQuote = 25; { Pf Punctuation, Final quote}
921 kUCGenlCatPunctOther = 26; { Po Punctuation, Other}
922 kUCGenlCatSymbolMath = 28; { Sm Symbol, Math}
923 kUCGenlCatSymbolCurrency = 29; { Sc Symbol, Currency}
924 kUCGenlCatSymbolModifier = 30; { Sk Symbol, Modifier}
925 kUCGenlCatSymbolOther = 31; { So Symbol, Other}
926
927 { Bidirectional Category enumeration values (requested by kUCCharPropTypeBidiCategory)}
928 const
929 kUCBidiCatNotApplicable = 0; { for now use this for unassigned}
930 { Strong types:}
931 kUCBidiCatLeftRight = 1; { L Left-to-Right}
932 kUCBidiCatRightLeft = 2; { R Right-to-Left}
933 { Weak types:}
934 kUCBidiCatEuroNumber = 3; { EN European Number}
935 kUCBidiCatEuroNumberSeparator = 4; { ES European Number Separator}
936 kUCBidiCatEuroNumberTerminator = 5; { ET European Number Terminator}
937 kUCBidiCatArabicNumber = 6; { AN Arabic Number}
938 kUCBidiCatCommonNumberSeparator = 7; { CS Common Number Separator}
939 { Separators:}
940 kUCBidiCatBlockSeparator = 8; { B Paragraph Separator (was Block Separator)}
941 kUCBidiCatSegmentSeparator = 9; { S Segment Separator}
942 { Neutrals:}
943 kUCBidiCatWhitespace = 10; { WS Whitespace}
944 kUCBidiCatOtherNeutral = 11; { ON Other Neutrals (unassigned codes could use this)}
945 { New categories for Unicode 3.0}
946 kUCBidiCatRightLeftArabic = 12; { AL Right-to-Left Arabic (was Arabic Letter)}
947 kUCBidiCatLeftRightEmbedding = 13; { LRE Left-to-Right Embedding}
948 kUCBidiCatRightLeftEmbedding = 14; { RLE Right-to-Left Embedding}
949 kUCBidiCatLeftRightOverride = 15; { LRO Left-to-Right Override}
950 kUCBidiCatRightLeftOverride = 16; { RLO Right-to-Left Override}
951 kUCBidiCatPopDirectionalFormat = 17; { PDF Pop Directional Format}
952 kUCBidiCatNonSpacingMark = 18; { NSM Non-Spacing Mark}
953 kUCBidiCatBoundaryNeutral = 19; { BN Boundary Neutral}
954 { New categories for Unicode 6.3}
955 kUCBidiCatLeftRightIsolate = 20; { LRI Left-to-Right Isolate}
956 kUCBidiCatRightLeftIsolate = 21; { RLI Right-to-Left Isolate}
957 kUCBidiCatFirstStrongIsolate = 22; { FSI First Strong Isolate}
958 kUCBidiCatPopDirectionalIsolate = 23; { PDI Pop Directional Isolate}
959
960
961 {
962 -------------------------------------------------------------------------------------------------
963 Prototypes for TextEncoding functions
964 -------------------------------------------------------------------------------------------------
965 }
966
967
968 {
969 * CreateTextEncoding()
970 *
971 * Availability:
972 * Mac OS X: in version 10.0 and later in CoreServices.framework
973 * CarbonLib: in CarbonLib 1.0 and later
974 * Non-Carbon CFM: in TextCommon 1.1 and later
975 }
CreateTextEncodingnull976 function CreateTextEncoding( encodingBase: TextEncodingBase; encodingVariant: TextEncodingVariant; encodingFormat: TextEncodingFormat ): TextEncoding; external name '_CreateTextEncoding';
977 (* __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA) *)
978
979
980 {
981 * GetTextEncodingBase()
982 *
983 * Availability:
984 * Mac OS X: in version 10.0 and later in CoreServices.framework
985 * CarbonLib: in CarbonLib 1.0 and later
986 * Non-Carbon CFM: in TextCommon 1.1 and later
987 }
GetTextEncodingBasenull988 function GetTextEncodingBase( encoding: TextEncoding ): TextEncodingBase; external name '_GetTextEncodingBase';
989 (* __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA) *)
990
991
992 {
993 * GetTextEncodingVariant()
994 *
995 * Availability:
996 * Mac OS X: in version 10.0 and later in CoreServices.framework
997 * CarbonLib: in CarbonLib 1.0 and later
998 * Non-Carbon CFM: in TextCommon 1.1 and later
999 }
GetTextEncodingVariantnull1000 function GetTextEncodingVariant( encoding: TextEncoding ): TextEncodingVariant; external name '_GetTextEncodingVariant';
1001 (* __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA) *)
1002
1003
1004 {
1005 * GetTextEncodingFormat()
1006 *
1007 * Availability:
1008 * Mac OS X: in version 10.0 and later in CoreServices.framework
1009 * CarbonLib: in CarbonLib 1.0 and later
1010 * Non-Carbon CFM: in TextCommon 1.1 and later
1011 }
GetTextEncodingFormatnull1012 function GetTextEncodingFormat( encoding: TextEncoding ): TextEncodingFormat; external name '_GetTextEncodingFormat';
1013 (* __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA) *)
1014
1015
1016 {
1017 * ResolveDefaultTextEncoding()
1018 *
1019 * Availability:
1020 * Mac OS X: in version 10.0 and later in CoreServices.framework
1021 * CarbonLib: in CarbonLib 1.0 and later
1022 * Non-Carbon CFM: in TextCommon 1.1 and later
1023 }
ResolveDefaultTextEncodingnull1024 function ResolveDefaultTextEncoding( encoding: TextEncoding ): TextEncoding; external name '_ResolveDefaultTextEncoding';
1025 (* __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA) *)
1026
1027
1028 {
1029 * GetTextEncodingName()
1030 *
1031 * Availability:
1032 * Mac OS X: in version 10.0 and later in CoreServices.framework
1033 * CarbonLib: in CarbonLib 1.0 and later
1034 * Non-Carbon CFM: in TextCommon 1.1 and later
1035 }
GetTextEncodingNamenull1036 function GetTextEncodingName( iEncoding: TextEncoding; iNamePartSelector: TextEncodingNameSelector; iPreferredRegion: RegionCode; iPreferredEncoding: TextEncoding; iOutputBufLen: ByteCount; var oNameLength: ByteCount; oActualRegion: RegionCodePtr { can be NULL }; oActualEncoding: TextEncodingPtr { can be NULL }; oEncodingName: TextPtr ): OSStatus; external name '_GetTextEncodingName';
1037 (* __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA) *)
1038
1039
1040 {
1041 * TECGetInfo()
1042 *
1043 * Availability:
1044 * Mac OS X: in version 10.0 and later in CoreServices.framework
1045 * CarbonLib: in CarbonLib 1.0 and later
1046 * Non-Carbon CFM: in TextCommon 1.2.1 and later
1047 }
TECGetInfonull1048 function TECGetInfo( var tecInfo: TECInfoHandle ): OSStatus; external name '_TECGetInfo';
1049 (* __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA) *)
1050
1051
1052 {
1053 * UpgradeScriptInfoToTextEncoding()
1054 *
1055 * Availability:
1056 * Mac OS X: in version 10.0 and later in CoreServices.framework
1057 * CarbonLib: in CarbonLib 1.0 and later
1058 * Non-Carbon CFM: in TextCommon 1.1 and later
1059 }
UpgradeScriptInfoToTextEncodingnull1060 function UpgradeScriptInfoToTextEncoding( iTextScriptID: ScriptCode; iTextLanguageID: LangCode; iRegionID: RegionCode; iTextFontname: StringPtr; var oEncoding: TextEncoding ): OSStatus; external name '_UpgradeScriptInfoToTextEncoding';
1061 (* __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA) *)
1062
1063
1064 {
1065 * RevertTextEncodingToScriptInfo()
1066 *
1067 * Availability:
1068 * Mac OS X: in version 10.0 and later in CoreServices.framework
1069 * CarbonLib: in CarbonLib 1.0 and later
1070 * Non-Carbon CFM: in TextCommon 1.1 and later
1071 }
RevertTextEncodingToScriptInfonull1072 function RevertTextEncodingToScriptInfo( iEncoding: TextEncoding; var oTextScriptID: ScriptCode; oTextLanguageID: LangCodePtr { can be NULL }; oTextFontname: StringPtr { can be NULL } ): OSStatus; external name '_RevertTextEncodingToScriptInfo';
1073 (* __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA) *)
1074
1075
1076 {
1077 * GetTextEncodingFromScriptInfo()
1078 *
1079 * Summary:
1080 * Converts any combination of a Mac OS script code, a language
1081 * code, and a region code to a text encoding.
1082 *
1083 * Discussion:
1084 * This function is almost identical to
1085 * UpgradeScriptInfoToTextEncoding except it doesn't take a font
1086 * name and it is available in CoreServices.
1087 *
1088 * Parameters:
1089 *
1090 * iTextScriptID:
1091 * A valid Script Manager script code. The Mac OS Script Manager
1092 * defines constants for script codes using this format: smXxx. To
1093 * designate the system script, specify the meta-value of
1094 * smSystemScript. To indicate that you do not want to provide a
1095 * script code for this parameter, specify the constant
1096 * kTextScriptDontCare.
1097 *
1098 * iTextLanguageID:
1099 * A valid Script Manager language code. The Mac OS Script Manager
1100 * defines constants for language codes using this format:
1101 * langXxx. To indicate that you do not want to provide a language
1102 * code for this parameter, specify the constant
1103 * kTextLanguageDontCare.
1104 *
1105 * iTextRegionID:
1106 * A valid Script Manager region code. The Mac OS Script Manager
1107 * defines constants for region codes using this format: verXxx.
1108 * To indicate that you do not want to provide a region code for
1109 * this parameter, specify the constant kTextRegionDontCare.
1110 *
1111 * oEncoding:
1112 * A pointer to a value of type TextEncoding. On return, this
1113 * value holds the text encoding specification that the function
1114 * created from the other values you provided.
1115 *
1116 * Availability:
1117 * Mac OS X: in version 10.2 and later in CoreServices.framework
1118 * CarbonLib: not available in CarbonLib 1.x, is available on Mac OS X version 10.2 and later
1119 * Non-Carbon CFM: not available
1120 }
GetTextEncodingFromScriptInfonull1121 function GetTextEncodingFromScriptInfo( iTextScriptID: ScriptCode; iTextLanguageID: LangCode; iTextRegionID: RegionCode; var oEncoding: TextEncoding ): OSStatus; external name '_GetTextEncodingFromScriptInfo';
1122 (* __OSX_AVAILABLE_STARTING(__MAC_10_2, __IPHONE_NA) *)
1123
1124
1125 {
1126 * GetScriptInfoFromTextEncoding()
1127 *
1128 * Summary:
1129 * Converts the given Mac OS text encoding specification to the
1130 * corresponding script code and, if possible, language code.
1131 *
1132 * Discussion:
1133 * This function is almost identical to
1134 * RevertTextEncodingToScriptInfo except it doesn't return a font
1135 * name and it is available in CoreServices.
1136 *
1137 * Parameters:
1138 *
1139 * iEncoding:
1140 * The text encoding specification to be converted.
1141 *
1142 * oTextScriptID:
1143 * A pointer to a value of type ScriptCode. On return, a Mac OS
1144 * script code that corresponds to the text encoding specification
1145 * you identified in the iEncoding parameter. If you do not pass a
1146 * pointer for this parameter, the function returns a paramErr
1147 * result code.
1148 *
1149 * oTextLanguageID:
1150 * A pointer to a value of type LangCode. On input, if you do not
1151 * want the function to return the language code, specify NULL as
1152 * the value of this parameter. On return, the appropriate
1153 * language code, if the language can be unambiguously derived
1154 * from the text encoding specification, for example, Japanese,
1155 * and you did not set the parameter to NULL. If you do not
1156 * specify NULL on input and the language is ambiguous�that is,
1157 * the function cannot accurately derive it from the text encoding
1158 * specification�the function returns a value of
1159 * kTextLanguageDontCare.
1160 *
1161 * Availability:
1162 * Mac OS X: in version 10.2 and later in CoreServices.framework
1163 * CarbonLib: not available in CarbonLib 1.x, is available on Mac OS X version 10.2 and later
1164 * Non-Carbon CFM: not available
1165 }
GetScriptInfoFromTextEncodingnull1166 function GetScriptInfoFromTextEncoding( iEncoding: TextEncoding; var oTextScriptID: ScriptCode; oTextLanguageID: LangCodePtr { can be NULL } ): OSStatus; external name '_GetScriptInfoFromTextEncoding';
1167 (* __OSX_AVAILABLE_STARTING(__MAC_10_2, __IPHONE_NA) *)
1168
1169
1170 {
1171 * NearestMacTextEncodings()
1172 *
1173 * Availability:
1174 * Mac OS X: in version 10.0 and later in CoreServices.framework
1175 * CarbonLib: in CarbonLib 1.0 and later
1176 * Non-Carbon CFM: in TextCommon 1.5 and later
1177 }
NearestMacTextEncodingsnull1178 function NearestMacTextEncodings( generalEncoding: TextEncoding; var bestMacEncoding: TextEncoding; var alternateMacEncoding: TextEncoding ): OSStatus; external name '_NearestMacTextEncodings';
1179 (* __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA) *)
1180
1181
1182 {
1183 * UCGetCharProperty()
1184 *
1185 * Availability:
1186 * Mac OS X: in version 10.0 and later in CoreServices.framework
1187 * CarbonLib: in CarbonLib 1.0 and later
1188 * Non-Carbon CFM: in TextCommon 1.5 and later
1189 }
UCGetCharPropertynull1190 function UCGetCharProperty( charPtr: ConstUniCharPtr; textLength: UniCharCount; propType: UCCharPropertyType; var propValue: UCCharPropertyValue ): OSStatus; external name '_UCGetCharProperty';
1191 (* __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA) *)
1192
1193
1194 {
1195 -------------------------------------------------------------------------------------------------
1196 Surrogate pair utilities
1197 -------------------------------------------------------------------------------------------------
1198 }
1199
1200
1201 // surrogate ranges
1202 const
1203 kUCHighSurrogateRangeStart = $D800;
1204 kUCHighSurrogateRangeEnd = $DBFF;
1205 kUCLowSurrogateRangeStart = $DC00;
1206 kUCLowSurrogateRangeEnd = $DFFF;
1207
1208 {$endc} {TARGET_OS_MAC}
1209 {$ifc not defined MACOSALLINCLUDE or not MACOSALLINCLUDE}
1210
1211 end.
1212 {$endc} {not MACOSALLINCLUDE}
1213