1 // utf8.h
2 //
3 // Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
4 //
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License
7 // as published by the Free Software Foundation; either version 2
8 // of the License, or (at your option) any later version.
9 
10 #ifndef _CELUTIL_UTF8_
11 #define _CELUTIL_UTF8_
12 
13 #include <string>
14 
15 #define UTF8_DEGREE_SIGN         "\302\260"
16 #define UTF8_MULTIPLICATION_SIGN "\303\227"
17 #define UTF8_SUPERSCRIPT_1       "\302\271"
18 #define UTF8_SUPERSCRIPT_2       "\302\262"
19 #define UTF8_SUPERSCRIPT_3       "\302\263"
20 
21 
22 bool UTF8Decode(const std::string& str, int pos, wchar_t& ch);
23 bool UTF8Decode(const char* str, int pos, int length, wchar_t& ch);
24 int UTF8Encode(wchar_t ch, char* s);
25 int UTF8StringCompare(const std::string& s0, const std::string& s1);
26 int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t length);
27 
28 class UTF8StringOrderingPredicate
29 {
30  public:
operator()31     bool operator()(const std::string& s0, const std::string& s1) const
32     {
33         return UTF8StringCompare(s0, s1) == -1;
34     }
35 };
36 
37 
38 int UTF8Length(const std::string& s);
39 
UTF8EncodedSize(wchar_t ch)40 inline int UTF8EncodedSize(wchar_t ch)
41 {
42     if (ch < 0x80)
43         return 1;
44     else if (ch < 0x800)
45         return 2;
46     else if (ch < 0x10000)
47         return 3;
48     else if (ch < 0x200000)
49         return 4;
50     else if (ch < 0x4000000)
51         return 5;
52     else
53         return 6;
54 }
55 
UTF8EncodedSizeFromFirstByte(unsigned int ch)56 inline int UTF8EncodedSizeFromFirstByte(unsigned int ch)
57 {
58     int charlen = 1;
59 
60     if (ch < 0x80)
61         charlen = 1;
62     else if ((ch & 0xe0) == 0xc0)
63         charlen = 2;
64     else if ((ch & 0xf0) == 0xe0)
65         charlen = 3;
66     else if ((ch & 0xf8) == 0xf0)
67         charlen = 4;
68     else if ((ch & 0xfc) == 0xf8)
69         charlen = 5;
70     else if ((ch & 0xfe) == 0xfc)
71         charlen = 6;
72 
73     return charlen;
74 }
75 
76 std::string ReplaceGreekLetterAbbr(const std::string&);
77 unsigned int ReplaceGreekLetterAbbr(char* dst, unsigned int dstSize, const char* src, unsigned int srcLength);
78 
79 class Greek
80 {
81  private:
82     Greek();
83     ~Greek();
84 
85  public:
86     enum Letter {
87         Alpha     =  1,
88         Beta      =  2,
89         Gamma     =  3,
90         Delta     =  4,
91         Epsilon   =  5,
92         Zeta      =  6,
93         Eta       =  7,
94         Theta     =  8,
95         Iota      =  9,
96         Kappa     = 10,
97         Lambda    = 11,
98         Mu        = 12,
99         Nu        = 13,
100         Xi        = 14,
101         Omicron   = 15,
102         Pi        = 16,
103         Rho       = 17,
104         Sigma     = 18,
105         Tau       = 19,
106         Upsilon   = 20,
107         Phi       = 21,
108         Chi       = 22,
109         Psi       = 23,
110         Omega     = 24,
111     };
112 
113     static const std::string& canonicalAbbreviation(const std::string&);
114 
115  public:
116     static Greek* instance;
117     int nLetters;
118     std::string* names;
119     std::string* abbrevs;
120 };
121 
122 #endif // _CELUTIL_UTF8_
123