1 // Copyright (c) 1994, 1997 James Clark
2 // See the file COPYING for copying permission.
3
4 #ifndef UnivCharsetDesc_INCLUDED
5 #define UnivCharsetDesc_INCLUDED 1
6 #ifdef __GNUG__
7 #pragma interface
8 #endif
9
10 #include <stddef.h>
11 #include "types.h"
12 #include "constant.h"
13 #include "CharMap.h"
14 #include "RangeMap.h"
15 #include "Boolean.h"
16 #include "ISet.h"
17
18 #ifdef SP_NAMESPACE
19 namespace SP_NAMESPACE {
20 #endif
21
22 class SP_API UnivCharsetDesc {
23 public:
24 struct SP_API Range {
25 WideChar descMin;
26 // Note that this is a count, as in the SGML declaration,
27 // rather than a maximum.
28 unsigned long count;
29 UnivChar univMin;
30 };
31 enum {
32 zero = 48,
33 A = 65,
34 a = 97,
35 tab = 9,
36 rs = 10,
37 re = 13,
38 space = 32,
39 exclamation = 33,
40 lessThan = 60,
41 greaterThan = 62
42 };
43 UnivCharsetDesc();
44 UnivCharsetDesc(const Range *, size_t);
UnivCharsetDesc(const UnivCharsetDesc & x)45 inline UnivCharsetDesc(const UnivCharsetDesc& x) :
46 charMap_(x.charMap_), rangeMap_(x.rangeMap_) {}
~UnivCharsetDesc()47 inline ~UnivCharsetDesc() {}
48 void set(const Range *, size_t);
49 Boolean descToUniv(WideChar from, UnivChar &to) const;
50 Boolean descToUniv(WideChar from, UnivChar &to, WideChar &alsoMax) const;
51 // Return 0 for no matches, 1 for 1, 2 for more than 1
52 unsigned univToDesc(UnivChar from, WideChar &to, ISet<WideChar> &toSet)
53 const;
54 unsigned univToDesc(UnivChar from, WideChar &to, ISet<WideChar> &toSet,
55 WideChar &count)
56 const;
57 void addRange(WideChar descMin, WideChar descMax, UnivChar univMin);
58 void addBaseRange(const UnivCharsetDesc &baseSet,
59 WideChar descMin,
60 WideChar descMax,
61 WideChar baseMin,
62 ISet<WideChar> &baseMissing);
63 private:
noDesc(Unsigned32 n)64 static Boolean noDesc(Unsigned32 n) {
65 return (n & (unsigned(1) << 31));
66 }
extractChar(Unsigned32 n,Char ch)67 static UnivChar extractChar(Unsigned32 n, Char ch) {
68 return UnivChar((n + ch) & ((unsigned(1) << 31) - 1));
69 }
wrapChar(UnivChar univ,Char ch)70 static Unsigned32 wrapChar(UnivChar univ, Char ch) {
71 return Unsigned32((univ - ch) & ((unsigned(1) << 31) - 1));
72 }
73 // For characters <= charMax.<
74 CharMap<Unsigned32> charMap_;
75 // For characters > charMax.
76 RangeMap<WideChar,UnivChar> rangeMap_;
77 friend class UnivCharsetDescIter;
78 };
79
80 class SP_API UnivCharsetDescIter {
81 public:
82 UnivCharsetDescIter(const UnivCharsetDesc &);
83 Boolean next(WideChar &descMin, WideChar &descMax, UnivChar &univMin);
84 void skipTo(WideChar);
85 private:
86 const CharMap<Unsigned32> *charMap_;
87 Char nextChar_;
88 Boolean doneCharMap_;
89 RangeMapIter<WideChar,UnivChar> rangeMapIter_;
90 };
91
92 inline
descToUniv(WideChar from,UnivChar & to)93 Boolean UnivCharsetDesc::descToUniv(WideChar from, UnivChar &to) const
94 {
95 if (from > charMax) {
96 WideChar tem;
97 return rangeMap_.map(from, to, tem);
98 }
99 else {
100 Unsigned32 tem = charMap_[from];
101 if (noDesc(tem))
102 return 0;
103 else {
104 to = extractChar(tem, from);
105 return 1;
106 }
107 }
108 }
109
110 inline
descToUniv(WideChar from,UnivChar & to,WideChar & alsoMax)111 Boolean UnivCharsetDesc::descToUniv(WideChar from, UnivChar &to,
112 WideChar &alsoMax) const
113 {
114 if (from > charMax)
115 return rangeMap_.map(from, to, alsoMax);
116 else {
117 Char max;
118 Unsigned32 tem = charMap_.getRange(from, max);
119 alsoMax = max;
120 if (noDesc(tem))
121 return 0;
122 else {
123 to = extractChar(tem, from);
124 return 1;
125 }
126 }
127 }
128
129 inline
univToDesc(UnivChar from,WideChar & to,ISet<WideChar> & toSet)130 unsigned UnivCharsetDesc::univToDesc(UnivChar from, WideChar &to,
131 ISet<WideChar> &toSet) const
132 {
133 WideChar tem;
134 return univToDesc(from, to, toSet, tem);
135 }
136
137 inline
skipTo(WideChar ch)138 void UnivCharsetDescIter::skipTo(WideChar ch)
139 {
140 if (ch > charMax)
141 doneCharMap_ = 1;
142 else
143 nextChar_ = ch;
144 }
145
146 #ifdef SP_NAMESPACE
147 }
148 #endif
149
150 #endif /* not UnivCharsetDesc_INCLUDED */
151