1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3
4 #ifdef __GNUG__
5 #pragma implementation
6 #endif
7 #include "splib.h"
8 #include "types.h"
9 #include "macros.h"
10 #include "StringOf.h"
11 #include "Trie.h"
12 #include "TrieBuilder.h"
13 #include "Priority.h"
14 #include <stdlib.h>
15
16 #ifdef SP_NAMESPACE
17 namespace SP_NAMESPACE {
18 #endif
19
~Trie()20 Trie::~Trie()
21 {
22 if (next_)
23 delete [] next_;
24 }
25
Trie(const Trie & t)26 Trie::Trie(const Trie &t)
27 : nCodes_(t.nCodes_),
28 token_(t.token_),
29 tokenLength_(t.tokenLength_),
30 priority_(t.priority_),
31 blank_(t.blank_)
32 {
33 if (t.next_) {
34 next_ = new Trie[nCodes_];
35 for (int i = 0; i < nCodes_; i++)
36 next_[i] = t.next_[i];
37 }
38 else
39 next_ = 0;
40 }
41
operator =(const Trie & t)42 Trie &Trie::operator=(const Trie &t)
43 {
44 if (next_)
45 delete [] next_;
46 nCodes_ = t.nCodes_;
47 token_ = t.token_;
48 tokenLength_ = t.tokenLength_;
49 priority_ = t.priority_;
50 blank_ = t.blank_;
51 if (t.next_) {
52 next_ = new Trie[nCodes_];
53 for (int i = 0; i < nCodes_; i++)
54 next_[i] = t.next_[i];
55 }
56 else
57 next_ = 0;
58 return *this;
59 }
60
TrieBuilder(int nCodes)61 TrieBuilder::TrieBuilder(int nCodes)
62 : nCodes_(nCodes), root_(new Trie)
63 {
64 root_->token_ = 0;
65 root_->tokenLength_ = 0;
66 root_->priority_ = Priority::data;
67 root_->nCodes_ = nCodes;
68 }
69
recognize(const String<EquivCode> & chars,Token t,Priority::Type priority,TokenVector & ambiguities)70 void TrieBuilder::recognize(const String<EquivCode> &chars,
71 Token t,
72 Priority::Type priority,
73 TokenVector &ambiguities)
74 {
75 setToken(extendTrie(root_.pointer(), chars), chars.size(), t, priority,
76 ambiguities);
77 }
78
recognize(const String<EquivCode> & chars,const String<EquivCode> & set,Token t,Priority::Type priority,TokenVector & ambiguities)79 void TrieBuilder::recognize(const String<EquivCode> &chars,
80 const String<EquivCode> &set,
81 Token t,
82 Priority::Type priority,
83 TokenVector &ambiguities)
84 {
85 Trie *trie = extendTrie(root_.pointer(), chars);
86
87 for (size_t i = 0; i < set.size(); i++)
88 setToken(forceNext(trie, set[i]), chars.size() + 1, t, priority,
89 ambiguities);
90 }
91
recognizeB(const String<EquivCode> & chars,int bSequenceLength,size_t maxBlankSequence,const String<EquivCode> & blankCodes,const String<EquivCode> & chars2,Token token,TokenVector & ambiguities)92 void TrieBuilder::recognizeB(const String<EquivCode> &chars,
93 int bSequenceLength,
94 size_t maxBlankSequence,
95 const String<EquivCode> &blankCodes,
96 const String<EquivCode> &chars2,
97 Token token,
98 TokenVector &ambiguities)
99 {
100 doB(extendTrie(root_.pointer(), chars),
101 chars.size(),
102 bSequenceLength,
103 maxBlankSequence,
104 blankCodes,
105 chars2,
106 token,
107 Priority::blank(bSequenceLength),
108 ambiguities);
109 }
110
recognizeEE(EquivCode code,Token t)111 void TrieBuilder::recognizeEE(EquivCode code, Token t)
112 {
113 Trie *trie = forceNext(root_.pointer(), code);
114 trie->tokenLength_ = 0; // it has length 0 in the buffer
115 trie->token_ = t;
116 trie->priority_ = Priority::data;
117 }
118
doB(Trie * trie,int tokenLength,int minBLength,size_t maxLength,const String<EquivCode> & blankCodes,const String<EquivCode> & chars2,Token token,Priority::Type pri,TokenVector & ambiguities)119 void TrieBuilder::doB(Trie *trie,
120 int tokenLength,
121 int minBLength,
122 size_t maxLength,
123 const String<EquivCode> &blankCodes,
124 const String<EquivCode> &chars2,
125 Token token,
126 Priority::Type pri,
127 TokenVector &ambiguities)
128 {
129 if (minBLength == 0 && trie->next_ == 0) {
130 if (!trie->blank_) {
131 BlankTrie *b = new BlankTrie;
132 trie->blank_ = b;
133 b->maxBlanksToScan_ = maxLength;
134 b->additionalLength_ = tokenLength;
135 b->codeIsBlank_.assign(nCodes_, 0);
136 for (size_t i = 0; i < blankCodes.size(); i++)
137 b->codeIsBlank_[blankCodes[i]] = 1;
138 b->tokenLength_ = 0;
139 b->token_ = 0;
140 b->priority_ = Priority::data;
141 b->nCodes_ = nCodes_;
142 }
143 else {
144 // A B sequence is not allowed to be adjacent to a character
145 // that can occur in a blank sequence, so maxLength will be
146 // the same at a node, no matter how we got there.
147 ASSERT(trie->blank_->maxBlanksToScan_ == maxLength);
148 ASSERT(trie->blank_->additionalLength_ == tokenLength);
149 }
150 if (chars2.size() == 0)
151 setToken(trie, tokenLength, token, pri, ambiguities);
152 else
153 setToken(extendTrie(trie->blank_.pointer(), chars2),
154 chars2.size(),
155 token,
156 pri,
157 ambiguities);
158 }
159 else {
160 if (minBLength == 0)
161 setToken(extendTrie(trie, chars2), tokenLength + chars2.size(),
162 token, pri, ambiguities);
163 for (size_t i = 0; i < blankCodes.size(); i++)
164 doB(forceNext(trie, blankCodes[i]),
165 tokenLength + 1,
166 minBLength == 0 ? 0 : minBLength - 1,
167 maxLength - 1,
168 blankCodes,
169 chars2,
170 token,
171 pri,
172 ambiguities);
173 }
174 }
175
extendTrie(Trie * trie,const String<EquivCode> & s)176 Trie *TrieBuilder::extendTrie(Trie *trie, const String<EquivCode> &s)
177 {
178 for (size_t i = 0; i < s.size(); i++)
179 trie = forceNext(trie, s[i]);
180 return trie;
181 }
182
setToken(Trie * trie,int tokenLength,Token token,Priority::Type pri,TokenVector & ambiguities)183 void TrieBuilder::setToken(Trie *trie,
184 int tokenLength,
185 Token token,
186 Priority::Type pri,
187 TokenVector &ambiguities)
188 {
189 if (tokenLength > trie->tokenLength_
190 || (tokenLength == trie->tokenLength_
191 && pri > trie->priority_)) {
192 trie->tokenLength_ = tokenLength;
193 trie->token_ = token;
194 trie->priority_ = pri;
195 }
196 else if (trie->tokenLength_ == tokenLength
197 && trie->priority_ == pri
198 && trie->token_ != token
199 && trie->token_ != 0) {
200 ambiguities.push_back(Token(trie->token_));
201 ambiguities.push_back(token);
202 }
203 if (trie->hasNext()) {
204 for (int i = 0; i < nCodes_; i++)
205 setToken(&trie->next_[i], tokenLength, token, pri, ambiguities);
206 }
207 }
208
copyInto(Trie * into,const Trie * from,int additionalLength)209 void TrieBuilder::copyInto(Trie *into, const Trie *from, int additionalLength)
210 {
211 if (from->token_ != 0) {
212 TokenVector ambiguities;
213 setToken(into, from->tokenLength_ + additionalLength, from->token_,
214 from->priority_, ambiguities);
215 ASSERT(ambiguities.size() == 0);
216 }
217 if (from->hasNext())
218 for (int i = 0; i < nCodes_; i++)
219 copyInto(forceNext(into, i), &from->next_[i], additionalLength);
220 }
221
forceNext(Trie * trie,EquivCode c)222 Trie *TrieBuilder::forceNext(Trie *trie, EquivCode c)
223 {
224 if (!trie->hasNext()) {
225 trie->next_ = new Trie[nCodes_];
226 if (trie->blank_) {
227 trie->blank_->additionalLength_ += 1;
228 trie->blank_->maxBlanksToScan_ -= 1;
229 }
230 Owner<BlankTrie> blankOwner(trie->blank_.extract());
231 const BlankTrie *b = blankOwner.pointer();
232 for (int i = 0; i < nCodes_; i++) {
233 Trie *p = &trie->next_[i];
234 if (b && b->codeIsBlank(i))
235 trie->next_[i].blank_ = (blankOwner
236 ? blankOwner.extract()
237 : new BlankTrie(*b));
238 p->token_ = trie->token_;
239 p->tokenLength_ = trie->tokenLength_;
240 p->priority_ = trie->priority_;
241 p->nCodes_ = nCodes_;
242 }
243 if (b)
244 // -1 because 1 was added above
245 copyInto(trie, b, b->additionalLength_ - 1);
246 }
247 return &trie->next_[c];
248 }
249
250 #ifdef SP_NAMESPACE
251 }
252 #endif
253