1 // utf8.h
2 //
3 // Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
4 //
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License
7 // as published by the Free Software Foundation; either version 2
8 // of the License, or (at your option) any later version.
9
10 #ifndef _CELUTIL_UTF8_
11 #define _CELUTIL_UTF8_
12
13 #include <string>
14
15 #define UTF8_DEGREE_SIGN "\302\260"
16 #define UTF8_MULTIPLICATION_SIGN "\303\227"
17 #define UTF8_SUPERSCRIPT_1 "\302\271"
18 #define UTF8_SUPERSCRIPT_2 "\302\262"
19 #define UTF8_SUPERSCRIPT_3 "\302\263"
20
21
22 bool UTF8Decode(const std::string& str, int pos, wchar_t& ch);
23 bool UTF8Decode(const char* str, int pos, int length, wchar_t& ch);
24 int UTF8Encode(wchar_t ch, char* s);
25 int UTF8StringCompare(const std::string& s0, const std::string& s1);
26 int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t length);
27
28 class UTF8StringOrderingPredicate
29 {
30 public:
operator()31 bool operator()(const std::string& s0, const std::string& s1) const
32 {
33 return UTF8StringCompare(s0, s1) == -1;
34 }
35 };
36
37
38 int UTF8Length(const std::string& s);
39
UTF8EncodedSize(wchar_t ch)40 inline int UTF8EncodedSize(wchar_t ch)
41 {
42 if (ch < 0x80)
43 return 1;
44 else if (ch < 0x800)
45 return 2;
46 else if (ch < 0x10000)
47 return 3;
48 else if (ch < 0x200000)
49 return 4;
50 else if (ch < 0x4000000)
51 return 5;
52 else
53 return 6;
54 }
55
UTF8EncodedSizeFromFirstByte(unsigned int ch)56 inline int UTF8EncodedSizeFromFirstByte(unsigned int ch)
57 {
58 int charlen = 1;
59
60 if (ch < 0x80)
61 charlen = 1;
62 else if ((ch & 0xe0) == 0xc0)
63 charlen = 2;
64 else if ((ch & 0xf0) == 0xe0)
65 charlen = 3;
66 else if ((ch & 0xf8) == 0xf0)
67 charlen = 4;
68 else if ((ch & 0xfc) == 0xf8)
69 charlen = 5;
70 else if ((ch & 0xfe) == 0xfc)
71 charlen = 6;
72
73 return charlen;
74 }
75
76 std::string ReplaceGreekLetterAbbr(const std::string&);
77 unsigned int ReplaceGreekLetterAbbr(char* dst, unsigned int dstSize, const char* src, unsigned int srcLength);
78
79 class Greek
80 {
81 private:
82 Greek();
83 ~Greek();
84
85 public:
86 enum Letter {
87 Alpha = 1,
88 Beta = 2,
89 Gamma = 3,
90 Delta = 4,
91 Epsilon = 5,
92 Zeta = 6,
93 Eta = 7,
94 Theta = 8,
95 Iota = 9,
96 Kappa = 10,
97 Lambda = 11,
98 Mu = 12,
99 Nu = 13,
100 Xi = 14,
101 Omicron = 15,
102 Pi = 16,
103 Rho = 17,
104 Sigma = 18,
105 Tau = 19,
106 Upsilon = 20,
107 Phi = 21,
108 Chi = 22,
109 Psi = 23,
110 Omega = 24,
111 };
112
113 static const std::string& canonicalAbbreviation(const std::string&);
114
115 public:
116 static Greek* instance;
117 int nLetters;
118 std::string* names;
119 std::string* abbrevs;
120 };
121
122 #endif // _CELUTIL_UTF8_
123