1 #ifndef __UTF8_H__
2 #define __UTF8_H__
3 
4 /* utf8.h -- convert characters to/from UTF-8
5 
6   (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
7   See tidy.h for the copyright notice.
8 
9   CVS Info :
10 
11     $Author: arnaud02 $
12     $Date: 2006/09/12 15:14:44 $
13     $Revision: 1.5 $
14 
15 */
16 
17 #include "platform.h"
18 #include "buffio.h"
19 
20 /* UTF-8 encoding/decoding support
21 ** Does not convert character "codepoints", i.e. to/from 10646.
22 */
23 
24 int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes,
25                                 TidyInputSource* inp, int* count );
26 
27 int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf,
28                                 TidyOutputSink* outp, int* count );
29 
30 
31 uint  TY_(GetUTF8)( ctmbstr str, uint *ch );
32 tmbstr TY_(PutUTF8)( tmbstr buf, uint c );
33 
34 #define UNICODE_BOM_BE   0xFEFF   /* big-endian (default) UNICODE BOM */
35 #define UNICODE_BOM      UNICODE_BOM_BE
36 #define UNICODE_BOM_LE   0xFFFE   /* little-endian UNICODE BOM */
37 #define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */
38 
39 
40 Bool    TY_(IsValidUTF16FromUCS4)( tchar ucs4 );
41 Bool    TY_(IsHighSurrogate)( tchar ch );
42 Bool    TY_(IsLowSurrogate)( tchar ch );
43 
44 Bool    TY_(IsCombinedChar)( tchar ch );
45 Bool    TY_(IsValidCombinedChar)( tchar ch );
46 
47 tchar   TY_(CombineSurrogatePair)( tchar high, tchar low );
48 Bool    TY_(SplitSurrogatePair)( tchar utf16, tchar* high, tchar* low );
49 
50 
51 
52 #endif /* __UTF8_H__ */
53