1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2 * Use of this file is governed by the BSD 3-clause license that
3 * can be found in the LICENSE.txt file in the project root.
4 */
5
6 #include "misc/MurmurHash.h"
7 #include "Lexer.h"
8 #include "Exceptions.h"
9 #include "Vocabulary.h"
10
11 #include "misc/IntervalSet.h"
12
13 using namespace antlr4;
14 using namespace antlr4::misc;
15
16 IntervalSet const IntervalSet::COMPLETE_CHAR_SET =
17 IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE);
18
19 IntervalSet const IntervalSet::EMPTY_SET;
20
IntervalSet()21 IntervalSet::IntervalSet() : _intervals() {
22 }
23
IntervalSet(const IntervalSet & set)24 IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() {
25 _intervals = set._intervals;
26 }
27
IntervalSet(IntervalSet && set)28 IntervalSet::IntervalSet(IntervalSet&& set) : IntervalSet(std::move(set._intervals)) {
29 }
30
IntervalSet(std::vector<Interval> && intervals)31 IntervalSet::IntervalSet(std::vector<Interval>&& intervals) : _intervals(std::move(intervals)) {
32 }
33
operator =(const IntervalSet & other)34 IntervalSet& IntervalSet::operator=(const IntervalSet& other) {
35 _intervals = other._intervals;
36 return *this;
37 }
38
operator =(IntervalSet && other)39 IntervalSet& IntervalSet::operator=(IntervalSet&& other) {
40 _intervals = move(other._intervals);
41 return *this;
42 }
43
of(ssize_t a)44 IntervalSet IntervalSet::of(ssize_t a) {
45 return IntervalSet({ Interval(a, a) });
46 }
47
of(ssize_t a,ssize_t b)48 IntervalSet IntervalSet::of(ssize_t a, ssize_t b) {
49 return IntervalSet({ Interval(a, b) });
50 }
51
clear()52 void IntervalSet::clear() {
53 _intervals.clear();
54 }
55
add(ssize_t el)56 void IntervalSet::add(ssize_t el) {
57 add(el, el);
58 }
59
add(ssize_t a,ssize_t b)60 void IntervalSet::add(ssize_t a, ssize_t b) {
61 add(Interval(a, b));
62 }
63
add(const Interval & addition)64 void IntervalSet::add(const Interval &addition) {
65 if (addition.b < addition.a) {
66 return;
67 }
68
69 // find position in list
70 for (auto iterator = _intervals.begin(); iterator != _intervals.end(); ++iterator) {
71 Interval r = *iterator;
72 if (addition == r) {
73 return;
74 }
75
76 if (addition.adjacent(r) || !addition.disjoint(r)) {
77 // next to each other, make a single larger interval
78 Interval bigger = addition.Union(r);
79 *iterator = bigger;
80
81 // make sure we didn't just create an interval that
82 // should be merged with next interval in list
83 while (iterator + 1 != _intervals.end()) {
84 Interval next = *++iterator;
85 if (!bigger.adjacent(next) && bigger.disjoint(next)) {
86 break;
87 }
88
89 // if we bump up against or overlap next, merge
90 iterator = _intervals.erase(iterator);// remove this one
91 --iterator; // move backwards to what we just set
92 *iterator = bigger.Union(next); // set to 3 merged ones
93 // ml: no need to advance iterator, we do that in the next round anyway. ++iterator; // first call to next after previous duplicates the result
94 }
95 return;
96 }
97
98 if (addition.startsBeforeDisjoint(r)) {
99 // insert before r
100 //--iterator;
101 _intervals.insert(iterator, addition);
102 return;
103 }
104
105 // if disjoint and after r, a future iteration will handle it
106 }
107
108 // ok, must be after last interval (and disjoint from last interval)
109 // just add it
110 _intervals.push_back(addition);
111 }
112
Or(const std::vector<IntervalSet> & sets)113 IntervalSet IntervalSet::Or(const std::vector<IntervalSet> &sets) {
114 IntervalSet result;
115 for (auto &s : sets) {
116 result.addAll(s);
117 }
118 return result;
119 }
120
addAll(const IntervalSet & set)121 IntervalSet& IntervalSet::addAll(const IntervalSet &set) {
122 // walk set and add each interval
123 for (auto const& interval : set._intervals) {
124 add(interval);
125 }
126 return *this;
127 }
128
complement(ssize_t minElement,ssize_t maxElement) const129 IntervalSet IntervalSet::complement(ssize_t minElement, ssize_t maxElement) const {
130 return complement(IntervalSet::of(minElement, maxElement));
131 }
132
complement(const IntervalSet & vocabulary) const133 IntervalSet IntervalSet::complement(const IntervalSet &vocabulary) const {
134 return vocabulary.subtract(*this);
135 }
136
subtract(const IntervalSet & other) const137 IntervalSet IntervalSet::subtract(const IntervalSet &other) const {
138 return subtract(*this, other);
139 }
140
subtract(const IntervalSet & left,const IntervalSet & right)141 IntervalSet IntervalSet::subtract(const IntervalSet &left, const IntervalSet &right) {
142 if (left.isEmpty()) {
143 return IntervalSet();
144 }
145
146 if (right.isEmpty()) {
147 // right set has no elements; just return the copy of the current set
148 return left;
149 }
150
151 IntervalSet result(left);
152 size_t resultI = 0;
153 size_t rightI = 0;
154 while (resultI < result._intervals.size() && rightI < right._intervals.size()) {
155 Interval &resultInterval = result._intervals[resultI];
156 const Interval &rightInterval = right._intervals[rightI];
157
158 // operation: (resultInterval - rightInterval) and update indexes
159
160 if (rightInterval.b < resultInterval.a) {
161 rightI++;
162 continue;
163 }
164
165 if (rightInterval.a > resultInterval.b) {
166 resultI++;
167 continue;
168 }
169
170 Interval beforeCurrent;
171 Interval afterCurrent;
172 if (rightInterval.a > resultInterval.a) {
173 beforeCurrent = Interval(resultInterval.a, rightInterval.a - 1);
174 }
175
176 if (rightInterval.b < resultInterval.b) {
177 afterCurrent = Interval(rightInterval.b + 1, resultInterval.b);
178 }
179
180 if (beforeCurrent.a > -1) { // -1 is the default value
181 if (afterCurrent.a > -1) {
182 // split the current interval into two
183 result._intervals[resultI] = beforeCurrent;
184 result._intervals.insert(result._intervals.begin() + resultI + 1, afterCurrent);
185 resultI++;
186 rightI++;
187 } else {
188 // replace the current interval
189 result._intervals[resultI] = beforeCurrent;
190 resultI++;
191 }
192 } else {
193 if (afterCurrent.a > -1) {
194 // replace the current interval
195 result._intervals[resultI] = afterCurrent;
196 rightI++;
197 } else {
198 // remove the current interval (thus no need to increment resultI)
199 result._intervals.erase(result._intervals.begin() + resultI);
200 }
201 }
202 }
203
204 // If rightI reached right.intervals.size(), no more intervals to subtract from result.
205 // If resultI reached result.intervals.size(), we would be subtracting from an empty set.
206 // Either way, we are done.
207 return result;
208 }
209
Or(const IntervalSet & a) const210 IntervalSet IntervalSet::Or(const IntervalSet &a) const {
211 IntervalSet result;
212 result.addAll(*this);
213 result.addAll(a);
214 return result;
215 }
216
And(const IntervalSet & other) const217 IntervalSet IntervalSet::And(const IntervalSet &other) const {
218 IntervalSet intersection;
219 size_t i = 0;
220 size_t j = 0;
221
222 // iterate down both interval lists looking for nondisjoint intervals
223 while (i < _intervals.size() && j < other._intervals.size()) {
224 Interval mine = _intervals[i];
225 Interval theirs = other._intervals[j];
226
227 if (mine.startsBeforeDisjoint(theirs)) {
228 // move this iterator looking for interval that might overlap
229 i++;
230 } else if (theirs.startsBeforeDisjoint(mine)) {
231 // move other iterator looking for interval that might overlap
232 j++;
233 } else if (mine.properlyContains(theirs)) {
234 // overlap, add intersection, get next theirs
235 intersection.add(mine.intersection(theirs));
236 j++;
237 } else if (theirs.properlyContains(mine)) {
238 // overlap, add intersection, get next mine
239 intersection.add(mine.intersection(theirs));
240 i++;
241 } else if (!mine.disjoint(theirs)) {
242 // overlap, add intersection
243 intersection.add(mine.intersection(theirs));
244
245 // Move the iterator of lower range [a..b], but not
246 // the upper range as it may contain elements that will collide
247 // with the next iterator. So, if mine=[0..115] and
248 // theirs=[115..200], then intersection is 115 and move mine
249 // but not theirs as theirs may collide with the next range
250 // in thisIter.
251 // move both iterators to next ranges
252 if (mine.startsAfterNonDisjoint(theirs)) {
253 j++;
254 } else if (theirs.startsAfterNonDisjoint(mine)) {
255 i++;
256 }
257 }
258 }
259
260 return intersection;
261 }
262
contains(size_t el) const263 bool IntervalSet::contains(size_t el) const {
264 return contains(symbolToNumeric(el));
265 }
266
contains(ssize_t el) const267 bool IntervalSet::contains(ssize_t el) const {
268 if (_intervals.empty())
269 return false;
270
271 if (el < _intervals[0].a) // list is sorted and el is before first interval; not here
272 return false;
273
274 for (auto &interval : _intervals) {
275 if (el >= interval.a && el <= interval.b) {
276 return true; // found in this interval
277 }
278 }
279 return false;
280 }
281
isEmpty() const282 bool IntervalSet::isEmpty() const {
283 return _intervals.empty();
284 }
285
getSingleElement() const286 ssize_t IntervalSet::getSingleElement() const {
287 if (_intervals.size() == 1) {
288 if (_intervals[0].a == _intervals[0].b) {
289 return _intervals[0].a;
290 }
291 }
292
293 return Token::INVALID_TYPE; // XXX: this value is 0, but 0 is a valid interval range, how can that work?
294 }
295
getMaxElement() const296 ssize_t IntervalSet::getMaxElement() const {
297 if (_intervals.empty()) {
298 return Token::INVALID_TYPE;
299 }
300
301 return _intervals.back().b;
302 }
303
getMinElement() const304 ssize_t IntervalSet::getMinElement() const {
305 if (_intervals.empty()) {
306 return Token::INVALID_TYPE;
307 }
308
309 return _intervals[0].a;
310 }
311
getIntervals() const312 std::vector<Interval> const& IntervalSet::getIntervals() const {
313 return _intervals;
314 }
315
hashCode() const316 size_t IntervalSet::hashCode() const {
317 size_t hash = MurmurHash::initialize();
318 for (auto &interval : _intervals) {
319 hash = MurmurHash::update(hash, interval.a);
320 hash = MurmurHash::update(hash, interval.b);
321 }
322
323 return MurmurHash::finish(hash, _intervals.size() * 2);
324 }
325
operator ==(const IntervalSet & other) const326 bool IntervalSet::operator == (const IntervalSet &other) const {
327 if (_intervals.empty() && other._intervals.empty())
328 return true;
329
330 if (_intervals.size() != other._intervals.size())
331 return false;
332
333 return std::equal(_intervals.begin(), _intervals.end(), other._intervals.begin());
334 }
335
toString() const336 std::string IntervalSet::toString() const {
337 return toString(false);
338 }
339
toString(bool elemAreChar) const340 std::string IntervalSet::toString(bool elemAreChar) const {
341 if (_intervals.empty()) {
342 return "{}";
343 }
344
345 std::stringstream ss;
346 size_t effectiveSize = size();
347 if (effectiveSize > 1) {
348 ss << "{";
349 }
350
351 bool firstEntry = true;
352 for (auto &interval : _intervals) {
353 if (!firstEntry)
354 ss << ", ";
355 firstEntry = false;
356
357 ssize_t a = interval.a;
358 ssize_t b = interval.b;
359 if (a == b) {
360 if (a == -1) {
361 ss << "<EOF>";
362 } else if (elemAreChar) {
363 ss << "'" << static_cast<char>(a) << "'";
364 } else {
365 ss << a;
366 }
367 } else {
368 if (elemAreChar) {
369 ss << "'" << static_cast<char>(a) << "'..'" << static_cast<char>(b) << "'";
370 } else {
371 ss << a << ".." << b;
372 }
373 }
374 }
375 if (effectiveSize > 1) {
376 ss << "}";
377 }
378
379 return ss.str();
380 }
381
toString(const std::vector<std::string> & tokenNames) const382 std::string IntervalSet::toString(const std::vector<std::string> &tokenNames) const {
383 return toString(dfa::Vocabulary::fromTokenNames(tokenNames));
384 }
385
toString(const dfa::Vocabulary & vocabulary) const386 std::string IntervalSet::toString(const dfa::Vocabulary &vocabulary) const {
387 if (_intervals.empty()) {
388 return "{}";
389 }
390
391 std::stringstream ss;
392 size_t effectiveSize = size();
393 if (effectiveSize > 1) {
394 ss << "{";
395 }
396
397 bool firstEntry = true;
398 for (auto &interval : _intervals) {
399 if (!firstEntry)
400 ss << ", ";
401 firstEntry = false;
402
403 ssize_t a = interval.a;
404 ssize_t b = interval.b;
405 if (a == b) {
406 ss << elementName(vocabulary, a);
407 } else {
408 for (ssize_t i = a; i <= b; i++) {
409 if (i > a) {
410 ss << ", ";
411 }
412 ss << elementName(vocabulary, i);
413 }
414 }
415 }
416 if (effectiveSize > 1) {
417 ss << "}";
418 }
419
420 return ss.str();
421 }
422
elementName(const std::vector<std::string> & tokenNames,ssize_t a) const423 std::string IntervalSet::elementName(const std::vector<std::string> &tokenNames, ssize_t a) const {
424 return elementName(dfa::Vocabulary::fromTokenNames(tokenNames), a);
425 }
426
elementName(const dfa::Vocabulary & vocabulary,ssize_t a) const427 std::string IntervalSet::elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const {
428 if (a == -1) {
429 return "<EOF>";
430 } else if (a == -2) {
431 return "<EPSILON>";
432 } else {
433 return vocabulary.getDisplayName(a);
434 }
435 }
436
size() const437 size_t IntervalSet::size() const {
438 size_t result = 0;
439 for (auto &interval : _intervals) {
440 result += size_t(interval.b - interval.a + 1);
441 }
442 return result;
443 }
444
toList() const445 std::vector<ssize_t> IntervalSet::toList() const {
446 std::vector<ssize_t> result;
447 for (auto &interval : _intervals) {
448 ssize_t a = interval.a;
449 ssize_t b = interval.b;
450 for (ssize_t v = a; v <= b; v++) {
451 result.push_back(v);
452 }
453 }
454 return result;
455 }
456
toSet() const457 std::set<ssize_t> IntervalSet::toSet() const {
458 std::set<ssize_t> result;
459 for (auto &interval : _intervals) {
460 ssize_t a = interval.a;
461 ssize_t b = interval.b;
462 for (ssize_t v = a; v <= b; v++) {
463 result.insert(v);
464 }
465 }
466 return result;
467 }
468
get(size_t i) const469 ssize_t IntervalSet::get(size_t i) const {
470 size_t index = 0;
471 for (auto &interval : _intervals) {
472 ssize_t a = interval.a;
473 ssize_t b = interval.b;
474 for (ssize_t v = a; v <= b; v++) {
475 if (index == i) {
476 return v;
477 }
478 index++;
479 }
480 }
481 return -1;
482 }
483
remove(size_t el)484 void IntervalSet::remove(size_t el) {
485 remove(symbolToNumeric(el));
486 }
487
remove(ssize_t el)488 void IntervalSet::remove(ssize_t el) {
489 for (size_t i = 0; i < _intervals.size(); ++i) {
490 Interval &interval = _intervals[i];
491 ssize_t a = interval.a;
492 ssize_t b = interval.b;
493 if (el < a) {
494 break; // list is sorted and el is before this interval; not here
495 }
496
497 // if whole interval x..x, rm
498 if (el == a && el == b) {
499 _intervals.erase(_intervals.begin() + (long)i);
500 break;
501 }
502 // if on left edge x..b, adjust left
503 if (el == a) {
504 interval.a++;
505 break;
506 }
507 // if on right edge a..x, adjust right
508 if (el == b) {
509 interval.b--;
510 break;
511 }
512 // if in middle a..x..b, split interval
513 if (el > a && el < b) { // found in this interval
514 ssize_t oldb = interval.b;
515 interval.b = el - 1; // [a..x-1]
516 add(el + 1, oldb); // add [x+1..b]
517
518 break; // ml: not in the Java code but I believe we also should stop searching here, as we found x.
519 }
520 }
521 }
522