1 2 /*************************************************************************** 3 * __ __ _ ___________ * 4 * \ \ / /| |____ ____| * 5 * \ \ / / | | | | * 6 * \ \ /\ / / | | | | * 7 * \ \/ \/ / | | | | * 8 * \ /\ / | | | | * 9 * \/ \/ |_| |_| * 10 * * 11 * Wiimms ISO Tools * 12 * http://wit.wiimm.de/ * 13 * * 14 *************************************************************************** 15 * * 16 * This file is part of the WIT project. * 17 * Visit http://wit.wiimm.de/ for project details and sources. * 18 * * 19 * Copyright (c) 2009-2013 by Dirk Clemens <wiimm@wiimm.de> * 20 * * 21 *************************************************************************** 22 * * 23 * This program is free software; you can redistribute it and/or modify * 24 * it under the terms of the GNU General Public License as published by * 25 * the Free Software Foundation; either version 2 of the License, or * 26 * (at your option) any later version. * 27 * * 28 * This program is distributed in the hope that it will be useful, * 29 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 31 * GNU General Public License for more details. * 32 * * 33 * See file gpl-2.0.txt or http://www.gnu.org/licenses/gpl-2.0.txt * 34 * * 35 ***************************************************************************/ 36 37 #ifndef __LIBDC_UTF8_H 38 #define __LIBDC_UTF8_H 1 39 40 #include "types.h" 41 42 /////////////////////////////////////////////////////////////////////////////// 43 ///// this software is taken from dcLib2 and now publiced under GPL2. ///// 44 /////////////////////////////////////////////////////////////////////////////// 45 46 typedef enum dcUnicodeConsts 47 { 48 DC_UNICODE_MAX_UTF8_1 = 0x7f, 49 DC_UNICODE_MAX_UTF8_2 = 0x7ff, 50 DC_UNICODE_MAX_UTF8_3 = 0xffff, 51 DC_UNICODE_MAX_UTF8_4 = 0x1fffff, 52 53 DC_UNICODE_CODE_MASK = 0x1fffff, 54 55 } dcUnicodeConsts; 56 57 /////////////////////////////////////////////////////////////////////////////// 58 59 typedef enum dcUTF8Mode 60 { 61 DC_UTF8_ILLEGAL = 0x0000, // Illegale UTF8 Zeichen-Kombination 62 63 DC_UTF8_1CHAR = 0x0001, // Das Zeichen ist ein Einzelzeichen 64 65 DC_UTF8_2CHAR = 0x0002, // Beginn einer 2-Zeichen Sequenz 66 DC_UTF8_CONT_22 = 0x0004, // ein Fortsetzungszeichen an Pos 2 einer 2-er Sequenz 67 68 DC_UTF8_3CHAR = 0x0008, // Beginn einer 3-Zeichen Sequenz 69 DC_UTF8_CONT_23 = 0x0010, // ein Fortsetzungszeichen an Pos 2 einer 3-er Sequenz 70 DC_UTF8_CONT_33 = 0x0020, // ein Fortsetzungszeichen an Pos 3 einer 3-er Sequenz 71 72 DC_UTF8_4CHAR = 0x0040, // Beginn einer 4-Zeichen Sequenz 73 DC_UTF8_CONT_24 = 0x0080, // ein Fortsetzungszeichen an Pos 2 einer 4-er Sequenz 74 DC_UTF8_CONT_34 = 0x0100, // ein Fortsetzungszeichen an Pos 3 einer 4-er Sequenz 75 DC_UTF8_CONT_44 = 0x0200, // ein Fortsetzungszeichen an Pos 4 einer 4-er Sequenz 76 77 DC_UTF8_CONT_ANY = 0x0400, // ein Fortsetzungszeichen an beliebger Stelle 78 79 DC_UTF8_1CHAR_POSSIBLE = 0x0800, // als Einzelzeichen darstellbar 80 DC_UTF8_2CHAR_POSSIBLE = 0x1000, // als 2-er Sequenz darstellbar 81 DC_UTF8_3CHAR_POSSIBLE = 0x2000, // als 3-er Sequenz darstellbar 82 DC_UTF8_4CHAR_POSSIBLE = 0x4000, // als 4-er Sequenz darstellbar 83 } dcUTF8Mode; 84 85 /////////////////////////////////////////////////////////////////////////////// 86 87 extern const unsigned short TableUTF8Mode[256]; 88 dcUTF8Mode CheckUTF8Mode ( unsigned char ch ); 89 90 int GetUTF8CharLength ( ulong code ); 91 char * NextUTF8Char ( ccp str ); 92 char * NextUTF8CharE ( ccp str, ccp end ); 93 char * PrevUTF8Char ( ccp str ); 94 char * PrevUTF8CharB ( ccp str, ccp begin ); 95 ulong GetUTF8Char ( ccp str ); 96 ulong ScanUTF8Char ( ccp * str ); 97 ulong ScanUTF8CharE ( ccp * str, ccp end ); 98 ulong ScanUTF8CharInc ( ccp * str ); 99 ulong ScanUTF8CharIncE ( ccp * str, ccp end ); 100 ulong GetUTF8AnsiChar ( ccp str ); 101 ulong ScanUTF8AnsiChar ( ccp * str ); 102 ulong ScanUTF8AnsiCharE ( ccp * str, ccp end ); 103 int ScanUTF8Length ( ccp str, ccp end ); 104 char * PrintUTF8Char ( char * buf, ulong code ); 105 106 /////////////////////////////////////////////////////////////////////////////// 107 108 typedef struct dcUnicodeTripel 109 { 110 ulong code1; 111 ulong code2; 112 ulong code3; 113 } dcUnicodeTripel; 114 115 extern const dcUnicodeTripel TableUnicodeDecomp[]; 116 const dcUnicodeTripel * DecomposeUnicode ( ulong code ); 117 118 /////////////////////////////////////////////////////////////////////////////// 119 120 #endif //__LIBDC_UTF8_H 121 122