1 /* Copyright 2009-2016 Francesco Biscani (bluescarni@gmail.com)
2
3 This file is part of the Piranha library.
4
5 The Piranha library is free software; you can redistribute it and/or modify
6 it under the terms of either:
7
8 * the GNU Lesser General Public License as published by the Free
9 Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
11
12 or
13
14 * the GNU General Public License as published by the Free Software
15 Foundation; either version 3 of the License, or (at your option) any
16 later version.
17
18 or both in parallel, as here.
19
20 The Piranha library is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
23 for more details.
24
25 You should have received copies of the GNU General Public License and the
26 GNU Lesser General Public License along with the Piranha library. If not,
27 see https://www.gnu.org/licenses/. */
28
29 #ifndef PIRANHA_SYMBOL_SET_HPP
30 #define PIRANHA_SYMBOL_SET_HPP
31
32 #include <algorithm>
33 #include <initializer_list>
34 #include <iterator>
35 #include <limits>
36 #include <set>
37 #include <stdexcept>
38 #include <type_traits>
39 #include <unordered_map>
40 #include <utility>
41 #include <vector>
42
43 #include "config.hpp"
44 #include "detail/init_data.hpp"
45 #include "detail/symbol_set_fwd.hpp"
46 #include "exceptions.hpp"
47 #include "symbol.hpp"
48 #include "type_traits.hpp"
49
50 namespace piranha
51 {
52
53 namespace detail
54 {
55
56 // Requirements for types that can be used in positions_map.
57 template <typename T>
58 struct is_pmappable {
59 static const bool value = std::is_copy_constructible<T>::value && std::is_move_constructible<T>::value
60 && std::is_move_assignable<T>::value && std::is_destructible<T>::value;
61 };
62
63 template <typename T>
64 const bool is_pmappable<T>::value;
65 }
66
67 /// Symbol set.
68 /**
69 * This class represents an ordered set of piranha::symbol. The individual piranha::symbol instances
70 * can be accessed via iterators or the index operator.
71 *
72 * ## Exception safety guarantee ##
73 *
74 * This class provides the strong exception safety guarantee for all operations.
75 *
76 * ## Move semantics ##
77 *
78 * Move construction and move assignment will leave the moved-from object in a state equivalent to a
79 * default-constructed object.
80 */
81 class symbol_set
82 {
check() const83 bool check() const
84 {
85 // Check for sorted range.
86 if (!std::is_sorted(begin(), end())) {
87 return false;
88 }
89 if (size() < 2u) {
90 return true;
91 }
92 // Check for duplicates.
93 for (size_type i = 0u; i < size() - 1u; ++i) {
94 if (m_values[i] == m_values[i + 1u]) {
95 return false;
96 }
97 }
98 return true;
99 }
100 // Enabler for ctor from iterator.
101 template <typename Iterator, typename Symbol>
102 using it_ctor_enabler = typename std::
103 enable_if<is_input_iterator<Iterator>::value
104 && std::is_constructible<Symbol, decltype(*(std::declval<const Iterator &>()))>::value,
105 int>::type;
106
107 public:
108 /// Size type.
109 typedef std::vector<symbol>::size_type size_type;
110 /// Positions class.
111 /**
112 * This is a small utility class that can be used to determine the positions,
113 * in a piranha::symbol_set \p a, of the symbols in the piranha::symbol_set \p b.
114 * The positions are stored internally in an \p std::vector which is guaranteed
115 * to be sorted in ascending order and which can be accessed via the iterators
116 * provided by this class. If a symbol which is present in \p b is not present in
117 * \p a, then it will be ignored.
118 *
119 * For instance, if the set \p a contains the symbols <tt>[B,C,D,E]</tt> and \p b
120 * contains <tt>[A,B,D,F]</tt>, then an instance of this class constructed passing
121 * \p a and \p b as parameters will contain the vector <tt>[0,2]</tt>.
122 */
123 class positions
124 {
125 public:
126 /// Value type.
127 /**
128 * The positions are represented using the size type of piranha::symbol_set.
129 */
130 using value_type = size_type;
131 /// Const iterator.
132 using const_iterator = std::vector<value_type>::const_iterator;
133 explicit positions(const symbol_set &, const symbol_set &);
134 /// Deleted copy constructor.
135 positions(const positions &) = delete;
136 /// Defaulted move constructor.
137 positions(positions &&) = default;
138
139 private:
140 positions &operator=(const positions &) = delete;
141 positions &operator=(positions &&) = delete;
142
143 public:
144 /// Begin iterator.
145 /**
146 * @return an iterator to the begin of the internal vector.
147 */
begin() const148 const_iterator begin() const
149 {
150 return m_values.begin();
151 }
152 /// End iterator.
153 /**
154 * @return an iterator to the end of the internal vector.
155 */
end() const156 const_iterator end() const
157 {
158 return m_values.end();
159 }
160 /// Last element.
161 /**
162 * @return a const reference to the last element.
163 */
back() const164 const value_type &back() const
165 {
166 piranha_assert(m_values.size());
167 return m_values.back();
168 }
169 /// Size.
170 /**
171 * @return the size of the internal vector.
172 */
size() const173 std::vector<value_type>::size_type size() const
174 {
175 return m_values.size();
176 }
177
178 private:
179 std::vector<value_type> m_values;
180 };
181 /// Positions map class.
182 /**
183 * This class is similar to piranha::symbol_set::positions. In addition to storing the positions
184 * of the symbols from a map \p b with respect to a reference piranha::symbol_set \p a, it will also store
185 * the instances of \p T that were originally mapped to the symbols in \p b.
186 *
187 * For instance, if \p T is \p int, \p a contains the symbols <tt>[B,C,D,E]</tt> and \p b is the map
188 * <tt>[(A,10),(B,20),(D,30),(F,40)]</tt>, then an instance of this class constructed passing
189 * \p a and \p b as parameters will contain the vector <tt>[(0,20),(2,30)]</tt>. The internal vector
190 * is guaranteed to be sorted according to the position of the symbols.
191 *
192 * ## Type requirements ##
193 *
194 * \p T must be destructible, copy-constructible, move-constructible and move-assignable.
195 */
196 template <typename T>
197 class positions_map
198 {
199 #if !defined(PIRANHA_DOXYGEN_INVOKED)
200 PIRANHA_TT_CHECK(detail::is_pmappable, T);
201 #endif
202 public:
203 /// Value type.
204 /**
205 * The stored value type is the <tt>(position,T)</tt> pair.
206 */
207 using value_type = std::pair<size_type, T>;
208 /// Const iterator.
209 using const_iterator = typename std::vector<value_type>::const_iterator;
210 explicit positions_map(const symbol_set &, const std::unordered_map<symbol, T> &);
211 /// Deleted copy constructor.
212 positions_map(const positions_map &) = delete;
213 /// Defaulted move constructor.
214 positions_map(positions_map &&) = default;
215
216 private:
217 positions_map &operator=(const positions_map &) = delete;
218 positions_map &operator=(positions_map &&) = delete;
219
220 public:
221 /// Begin iterator.
222 /**
223 * @return an iterator to the begin of the internal vector.
224 */
begin() const225 const_iterator begin() const
226 {
227 return m_pairs.begin();
228 }
229 /// End iterator.
230 /**
231 * @return an iterator to the end of the internal vector.
232 */
end() const233 const_iterator end() const
234 {
235 return m_pairs.end();
236 }
237 /// Size.
238 /**
239 * @return the size of the internal vector.
240 */
size() const241 typename std::vector<value_type>::size_type size() const
242 {
243 return m_pairs.size();
244 }
245 /// Last element.
246 /**
247 * @return a const reference to the last element.
248 */
back() const249 const value_type &back() const
250 {
251 piranha_assert(m_pairs.size());
252 return m_pairs.back();
253 }
254
255 private:
256 std::vector<value_type> m_pairs;
257 };
258 /// Const iterator.
259 typedef std::vector<symbol>::const_iterator const_iterator;
260 /// Defaulted default constructor.
261 /**
262 * Will construct an empty set.
263 */
264 symbol_set() = default;
265 /// Defaulted copy constructor.
266 /**
267 * @throws unspecified any exception thrown by memory allocation errors in \p std::vector.
268 */
269 symbol_set(const symbol_set &) = default;
270 /// Defaulted move constructor.
271 symbol_set(symbol_set &&) = default;
272 /// Constructor from initializer list of piranha::symbol.
273 /**
274 * Each symbol in the list will be added via add() to the set.
275 *
276 * @param l list of symbols used for construction.
277 *
278 * @throws unspecified any exception thrown by add().
279 */
symbol_set(std::initializer_list<symbol> l)280 explicit symbol_set(std::initializer_list<symbol> l)
281 {
282 // NOTE: for these types of initialisations from other containers
283 // it might make sense eventually to avoid all these adds, and do
284 // the sorting and elimination of duplicates in one pass to lower
285 // algorithmic complexity.
286 for (const auto &s : l) {
287 add(s);
288 }
289 }
290 /// Constructor from range.
291 /**
292 * \note
293 * This constructor is enabled only if \p Iterator is an input iterator and
294 * piranha::symbol is constructible from its value type.
295 *
296 * The set will be initialised with symbols constructed from the elements of the range.
297 *
298 * @param begin begin iterator.
299 * @param end end iterator.
300 *
301 * @throws unspecified any exception thrown by operations on standard containers or by
302 * the invoked constructor of piranha::symbol.
303 */
304 template <typename Iterator, it_ctor_enabler<Iterator, symbol> = 0>
symbol_set(const Iterator & begin,const Iterator & end)305 explicit symbol_set(const Iterator &begin, const Iterator &end)
306 {
307 // NOTE: this is one possible way of doing this, probably a sorted vector
308 // with std::unique will be faster. Also, this can be shared with the ctor
309 // from init list which can be furtherly generalised.
310 std::set<symbol> s_set;
311 for (Iterator it = begin; it != end; ++it) {
312 s_set.emplace(*it);
313 }
314 std::copy(s_set.begin(), s_set.end(), std::back_inserter(m_values));
315 }
316 /// Copy assignment operator.
317 /**
318 * @param other set to be assigned to \p this.
319 *
320 * @return reference to \p this.
321 *
322 * @throws unspecified any exception thrown by memory allocation errors in \p std::vector.
323 */
operator =(const symbol_set & other)324 symbol_set &operator=(const symbol_set &other)
325 {
326 if (likely(this != &other)) {
327 symbol_set tmp(other);
328 *this = std::move(tmp);
329 }
330 return *this;
331 }
332 /// Move assignment operator.
333 /**
334 * @param other assignment argument.
335 *
336 * @return reference to \p this.
337 */
operator =(symbol_set && other)338 symbol_set &operator=(symbol_set &&other) noexcept
339 {
340 // NOTE: here the idea is that in principle we want to be able to move-assign self,
341 // and we don't want to rely on std::vector to support this. Hence, the explicit check.
342 if (likely(this != &other)) {
343 m_values = std::move(other.m_values);
344 }
345 return *this;
346 }
347 /// Trivial destructor.
~symbol_set()348 ~symbol_set()
349 {
350 // NOTE: here we should replace with bidirectional tt, if we ever implement it.
351 PIRANHA_TT_CHECK(is_forward_iterator, const_iterator);
352 piranha_assert(run_destruction_checks());
353 }
354 /// Index operator.
355 /**
356 * @param n index of the element to be accessed.
357 *
358 * @return const reference to the element at index \p n.
359 */
operator [](const size_type & n) const360 const symbol &operator[](const size_type &n) const
361 {
362 piranha_assert(n < size());
363 return m_values[n];
364 }
365 /// Begin const iterator.
366 /**
367 * @return const iterator to the first element of the set, or end() if the set is empty.
368 */
begin() const369 const_iterator begin() const
370 {
371 return m_values.begin();
372 }
373 /// End const iterator.
374 /**
375 * @return const iterator to the element past the last element of the set.
376 */
end() const377 const_iterator end() const
378 {
379 return m_values.end();
380 }
381 /// Add symbol to the set.
382 /**
383 * The insertion of \p s will preserve the order of the set.
384 *
385 * @param s piranha::symbol to be inserted.
386 *
387 * @throws std::invalid_argument if \p s is already present in the set.
388 * @throws unspecified any exception thrown by memory allocation errors in \p std::vector.
389 */
add(const symbol & s)390 void add(const symbol &s)
391 {
392 // Copy it to provide strong exception safety.
393 std::vector<symbol> new_values;
394 new_values.reserve(size() + size_type(1u));
395 std::copy(begin(), end(), std::back_inserter(new_values));
396 const auto it = std::lower_bound(new_values.begin(), new_values.end(), s);
397 if (unlikely(it != new_values.end() && *it == s)) {
398 piranha_throw(std::invalid_argument, "symbol already present in this set");
399 }
400 new_values.insert(it, s);
401 // Move in the new args vector.
402 m_values = std::move(new_values);
403 piranha_assert(check());
404 }
405 /// Add symbol to the set.
406 /**
407 * Equivalent to constructing a piranha::symbol from \p name and then invoking the other overload of this method.
408 *
409 * @param name name of the piranha::symbol to be inserted.
410 *
411 * @throws unspecified any exception thrown by the other overload of this method or by the construction
412 * of piranha::symbol from \p std::string.
413 */
add(const std::string & name)414 void add(const std::string &name)
415 {
416 add(symbol(name));
417 }
418 /// Remove symbol from the set.
419 /**
420 * The removal of \p s will preserve the order of the set.
421 *
422 * @param s piranha::symbol to be removed.
423 *
424 * @throws std::invalid_argument if \p s is not present in the set.
425 * @throws unspecified any exception thrown by memory allocation errors in \p std::vector.
426 */
remove(const symbol & s)427 void remove(const symbol &s)
428 {
429 // Operate on a copy to provide strong exception safety.
430 std::vector<symbol> new_values;
431 std::remove_copy_if(begin(), end(), std::back_inserter(new_values),
432 [&s](const symbol &sym) { return sym == s; });
433 if (new_values.size() == size()) {
434 piranha_throw(std::invalid_argument, "symbol is not present in this set");
435 }
436 m_values = std::move(new_values);
437 piranha_assert(check());
438 }
439 /// Remove symbol from the set.
440 /**
441 * Equivalent to constructing a piranha::symbol from \p name and then invoking the other overload of this method.
442 *
443 * @param name name of the piranha::symbol to be removed.
444 *
445 * @throws unspecified any exception thrown by the other overload of this method or by the construction
446 * of piranha::symbol from \p std::string.
447 */
remove(const std::string & name)448 void remove(const std::string &name)
449 {
450 remove(symbol(name));
451 }
452 /// Set size.
453 /**
454 * @return the number of elements in the set.
455 */
size() const456 size_type size() const
457 {
458 return m_values.size();
459 }
460 /// Merge with other set.
461 /**
462 * @param other merge argument.
463 *
464 * @return a new set containing the union of the elements present in \p this and \p other.
465 *
466 * @throws unspecified any exception thrown by \p std::vector::push_back().
467 */
merge(const symbol_set & other) const468 symbol_set merge(const symbol_set &other) const
469 {
470 symbol_set retval;
471 retval.m_values.reserve(other.size() + size());
472 auto bi_it = std::back_insert_iterator<std::vector<symbol>>(retval.m_values);
473 std::set_union(begin(), end(), other.begin(), other.end(), bi_it);
474 piranha_assert(retval.check());
475 return retval;
476 }
477 /// Set difference.
478 /**
479 * @param other difference argument.
480 *
481 * @return a new set containing the elements of \p this which are not present in \p other.
482 *
483 * @throws unspecified any exception thrown by \p std::vector::push_back().
484 */
diff(const symbol_set & other) const485 symbol_set diff(const symbol_set &other) const
486 {
487 symbol_set retval;
488 std::set_difference(begin(), end(), other.begin(), other.end(), std::back_inserter(retval.m_values));
489 piranha_assert(retval.check());
490 return retval;
491 }
492 /// Equality operator.
493 /**
494 * @param other comparison argument.
495 *
496 * @return \p true if \p this and \p other contain exactly the same symbols, \p false otherwise.
497 */
operator ==(const symbol_set & other) const498 bool operator==(const symbol_set &other) const
499 {
500 return m_values == other.m_values;
501 }
502 /// Inequality operator.
503 /**
504 * @param other comparison argument.
505 *
506 * @return opposite of operator==().
507 */
operator !=(const symbol_set & other) const508 bool operator!=(const symbol_set &other) const
509 {
510 return !(*this == other);
511 }
512 /// Index of symbol.
513 /**
514 * This method will return the index in the set of the input symbol \p s. If \p s is not in the set,
515 * the size of the set is returned.
516 *
517 * @param s piranha::symbol whose index will be computed.
518 *
519 * @return the index of \p s in the set.
520 *
521 * @throws std::overflow_error if the size of the set is larger than an implementation-defined value.
522 */
index_of(const symbol & s) const523 size_type index_of(const symbol &s) const
524 {
525 // Need to check for potential overflows here.
526 using diff_type = std::iterator_traits<const_iterator>::difference_type;
527 using udiff_type = std::make_unsigned<diff_type>::type;
528 // This is not an exact check, it is conservative by 1 unit in order to simplify the logic.
529 if (unlikely(m_values.size() > static_cast<udiff_type>(std::numeric_limits<diff_type>::max()))) {
530 piranha_throw(std::overflow_error, "potential overflow in the computaion of the "
531 "index of a symbol");
532 }
533 const auto it = std::lower_bound(m_values.begin(), m_values.end(), s);
534 if (it != m_values.end() && *it == s) {
535 auto retval = it - m_values.begin();
536 piranha_assert(retval >= diff_type(0));
537 return static_cast<size_type>(retval);
538 }
539 return m_values.size();
540 }
541
542 private:
run_destruction_checks() const543 bool run_destruction_checks() const
544 {
545 // Run destruction checks only if we are not in the shutdown phase.
546 if (shutdown()) {
547 return true;
548 }
549 return check();
550 }
551
552 private:
553 std::vector<symbol> m_values;
554 };
555
556 /// Constructor from sets.
557 /**
558 * The internal positions vector will contain the positions in \p a of the elements
559 * of \p b appearing in the set \p a.
560 *
561 * @param a first set.
562 * @param b second set.
563 *
564 * @throws unspecified any exception thrown by memory errors in standard containers.
565 */
566 // NOTE: it might make sense here in the future to have a constructor from a range of strings instead.
567 // The rationale would be that like this we would avoid going through string -> symbol conversions
568 // when using this class from series.
positions(const symbol_set & a,const symbol_set & b)569 inline symbol_set::positions::positions(const symbol_set &a, const symbol_set &b)
570 {
571 size_type ia = 0u, ib = 0u;
572 while (true) {
573 if (ia == a.size() || ib == b.size()) {
574 break;
575 }
576 if (a[ia] == b[ib]) {
577 m_values.push_back(ia);
578 ++ia;
579 ++ib;
580 } else if (a[ia] < b[ib]) {
581 ++ia;
582 } else {
583 ++ib;
584 }
585 }
586 }
587
588 /// Constructor from map and set.
589 /**
590 * The internal vector of pairs will contain the positions in \p a of the mapped values
591 * of \p map appearing in the set \p a, and the mapped values themselves.
592 *
593 * @param a reference set.
594 * @param map input map.
595 *
596 * @throws unspecified any exception thrown by memory errors in standard containers, by
597 * the copy constructor of \p T, or by <tt>std::stable_sort()</tt>.
598 */
599 template <typename T>
positions_map(const symbol_set & a,const std::unordered_map<symbol,T> & map)600 inline symbol_set::positions_map<T>::positions_map(const symbol_set &a, const std::unordered_map<symbol, T> &map)
601 {
602 for (const auto &p : map) {
603 const auto idx = a.index_of(p.first);
604 if (idx != a.size()) {
605 m_pairs.push_back(std::make_pair(idx, p.second));
606 }
607 }
608 std::stable_sort(m_pairs.begin(), m_pairs.end(),
609 [](const value_type &p1, const value_type &p2) { return p1.first < p2.first; });
610 // Check that there are no duplicate positions.
611 piranha_assert(std::unique(m_pairs.begin(), m_pairs.end(),
612 [](const value_type &p1, const value_type &p2) { return p1.first == p2.first; })
613 == m_pairs.end());
614 }
615 }
616
617 #endif
618