1 #ifndef __UTF8_H__ 2 #define __UTF8_H__ 3 4 /* utf8.h -- convert characters to/from UTF-8 5 6 (c) 1998-2006 (W3C) MIT, ERCIM, Keio University 7 See tidyp.h for the copyright notice. 8 9 */ 10 11 #include "platform.h" 12 #include "buffio.h" 13 14 /* UTF-8 encoding/decoding support 15 ** Does not convert character "codepoints", i.e. to/from 10646. 16 */ 17 18 int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes, 19 TidyInputSource* inp, int* count ); 20 21 int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf, 22 TidyOutputSink* outp, int* count ); 23 24 25 uint TY_(GetUTF8)( ctmbstr str, uint *ch ); 26 tmbstr TY_(PutUTF8)( tmbstr buf, uint c ); 27 28 #define UNICODE_BOM_BE 0xFEFF /* big-endian (default) UNICODE BOM */ 29 #define UNICODE_BOM UNICODE_BOM_BE 30 #define UNICODE_BOM_LE 0xFFFE /* little-endian UNICODE BOM */ 31 #define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */ 32 33 34 Bool TY_(IsValidUTF16FromUCS4)( tchar ucs4 ); 35 Bool TY_(IsHighSurrogate)( tchar ch ); 36 Bool TY_(IsLowSurrogate)( tchar ch ); 37 38 Bool TY_(IsCombinedChar)( tchar ch ); 39 Bool TY_(IsValidCombinedChar)( tchar ch ); 40 41 tchar TY_(CombineSurrogatePair)( tchar high, tchar low ); 42 Bool TY_(SplitSurrogatePair)( tchar utf16, tchar* high, tchar* low ); 43 44 45 46 #endif /* __UTF8_H__ */ 47