1 
2 /***************************************************************************
3  *                    __            __ _ ___________                       *
4  *                    \ \          / /| |____   ____|                      *
5  *                     \ \        / / | |    | |                           *
6  *                      \ \  /\  / /  | |    | |                           *
7  *                       \ \/  \/ /   | |    | |                           *
8  *                        \  /\  /    | |    | |                           *
9  *                         \/  \/     |_|    |_|                           *
10  *                                                                         *
11  *                           Wiimms ISO Tools                              *
12  *                         http://wit.wiimm.de/                            *
13  *                                                                         *
14  ***************************************************************************
15  *                                                                         *
16  *   This file is part of the WIT project.                                 *
17  *   Visit http://wit.wiimm.de/ for project details and sources.           *
18  *                                                                         *
19  *   Copyright (c) 2009-2013 by Dirk Clemens <wiimm@wiimm.de>              *
20  *                                                                         *
21  ***************************************************************************
22  *                                                                         *
23  *   This program is free software; you can redistribute it and/or modify  *
24  *   it under the terms of the GNU General Public License as published by  *
25  *   the Free Software Foundation; either version 2 of the License, or     *
26  *   (at your option) any later version.                                   *
27  *                                                                         *
28  *   This program is distributed in the hope that it will be useful,       *
29  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
30  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
31  *   GNU General Public License for more details.                          *
32  *                                                                         *
33  *   See file gpl-2.0.txt or http://www.gnu.org/licenses/gpl-2.0.txt       *
34  *                                                                         *
35  ***************************************************************************/
36 
37 #ifndef __LIBDC_UTF8_H
38 #define __LIBDC_UTF8_H 1
39 
40 #include "types.h"
41 
42 ///////////////////////////////////////////////////////////////////////////////
43 /////   this software is taken from dcLib2 and now publiced under GPL2.   /////
44 ///////////////////////////////////////////////////////////////////////////////
45 
46 typedef enum dcUnicodeConsts
47 {
48     DC_UNICODE_MAX_UTF8_1	= 0x7f,
49     DC_UNICODE_MAX_UTF8_2	= 0x7ff,
50     DC_UNICODE_MAX_UTF8_3	= 0xffff,
51     DC_UNICODE_MAX_UTF8_4	= 0x1fffff,
52 
53     DC_UNICODE_CODE_MASK	= 0x1fffff,
54 
55 } dcUnicodeConsts;
56 
57 ///////////////////////////////////////////////////////////////////////////////
58 
59 typedef enum dcUTF8Mode
60 {
61 	DC_UTF8_ILLEGAL		= 0x0000, // Illegale UTF8 Zeichen-Kombination
62 
63 	DC_UTF8_1CHAR		= 0x0001, // Das Zeichen ist ein Einzelzeichen
64 
65 	DC_UTF8_2CHAR		= 0x0002, // Beginn einer 2-Zeichen Sequenz
66 	DC_UTF8_CONT_22		= 0x0004, // ein Fortsetzungszeichen an Pos 2 einer 2-er Sequenz
67 
68 	DC_UTF8_3CHAR		= 0x0008, // Beginn einer 3-Zeichen Sequenz
69 	DC_UTF8_CONT_23		= 0x0010, // ein Fortsetzungszeichen an Pos 2 einer 3-er Sequenz
70 	DC_UTF8_CONT_33		= 0x0020, // ein Fortsetzungszeichen an Pos 3 einer 3-er Sequenz
71 
72 	DC_UTF8_4CHAR		= 0x0040, // Beginn einer 4-Zeichen Sequenz
73 	DC_UTF8_CONT_24		= 0x0080, // ein Fortsetzungszeichen an Pos 2 einer 4-er Sequenz
74 	DC_UTF8_CONT_34		= 0x0100, // ein Fortsetzungszeichen an Pos 3 einer 4-er Sequenz
75 	DC_UTF8_CONT_44		= 0x0200, // ein Fortsetzungszeichen an Pos 4 einer 4-er Sequenz
76 
77 	DC_UTF8_CONT_ANY	= 0x0400, // ein Fortsetzungszeichen an beliebger Stelle
78 
79 	DC_UTF8_1CHAR_POSSIBLE	= 0x0800, // als Einzelzeichen darstellbar
80 	DC_UTF8_2CHAR_POSSIBLE	= 0x1000, // als 2-er Sequenz darstellbar
81 	DC_UTF8_3CHAR_POSSIBLE	= 0x2000, // als 3-er Sequenz darstellbar
82 	DC_UTF8_4CHAR_POSSIBLE	= 0x4000, // als 4-er Sequenz darstellbar
83 } dcUTF8Mode;
84 
85 ///////////////////////////////////////////////////////////////////////////////
86 
87 extern const unsigned short TableUTF8Mode[256];
88 dcUTF8Mode CheckUTF8Mode ( unsigned char ch );
89 
90 int	GetUTF8CharLength	( ulong code );
91 char *	NextUTF8Char		( ccp str );
92 char *	NextUTF8CharE		( ccp str, ccp end );
93 char *	PrevUTF8Char		( ccp str );
94 char *  PrevUTF8CharB		( ccp str, ccp begin );
95 ulong	GetUTF8Char		( ccp str );
96 ulong	ScanUTF8Char		( ccp * str );
97 ulong	ScanUTF8CharE		( ccp * str, ccp end );
98 ulong	ScanUTF8CharInc		( ccp * str );
99 ulong	ScanUTF8CharIncE	( ccp * str, ccp end );
100 ulong	GetUTF8AnsiChar		( ccp str );
101 ulong	ScanUTF8AnsiChar	( ccp * str );
102 ulong	ScanUTF8AnsiCharE	( ccp * str, ccp end );
103 int	ScanUTF8Length		( ccp str, ccp end );
104 char *	PrintUTF8Char		( char * buf, ulong code );
105 
106 ///////////////////////////////////////////////////////////////////////////////
107 
108 typedef struct dcUnicodeTripel
109 {
110 	ulong code1;
111 	ulong code2;
112 	ulong code3;
113 } dcUnicodeTripel;
114 
115 extern const dcUnicodeTripel TableUnicodeDecomp[];
116 const dcUnicodeTripel * DecomposeUnicode ( ulong code );
117 
118 ///////////////////////////////////////////////////////////////////////////////
119 
120 #endif //__LIBDC_UTF8_H
121 
122