1 /* Copyright 2009-2016 Francesco Biscani (bluescarni@gmail.com)
2 
3 This file is part of the Piranha library.
4 
5 The Piranha library is free software; you can redistribute it and/or modify
6 it under the terms of either:
7 
8   * the GNU Lesser General Public License as published by the Free
9     Software Foundation; either version 3 of the License, or (at your
10     option) any later version.
11 
12 or
13 
14   * the GNU General Public License as published by the Free Software
15     Foundation; either version 3 of the License, or (at your option) any
16     later version.
17 
18 or both in parallel, as here.
19 
20 The Piranha library is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
23 for more details.
24 
25 You should have received copies of the GNU General Public License and the
26 GNU Lesser General Public License along with the Piranha library.  If not,
27 see https://www.gnu.org/licenses/. */
28 
29 #ifndef PIRANHA_SYMBOL_SET_HPP
30 #define PIRANHA_SYMBOL_SET_HPP
31 
32 #include <algorithm>
33 #include <initializer_list>
34 #include <iterator>
35 #include <limits>
36 #include <set>
37 #include <stdexcept>
38 #include <type_traits>
39 #include <unordered_map>
40 #include <utility>
41 #include <vector>
42 
43 #include "config.hpp"
44 #include "detail/init_data.hpp"
45 #include "detail/symbol_set_fwd.hpp"
46 #include "exceptions.hpp"
47 #include "symbol.hpp"
48 #include "type_traits.hpp"
49 
50 namespace piranha
51 {
52 
53 namespace detail
54 {
55 
56 // Requirements for types that can be used in positions_map.
57 template <typename T>
58 struct is_pmappable {
59     static const bool value = std::is_copy_constructible<T>::value && std::is_move_constructible<T>::value
60                               && std::is_move_assignable<T>::value && std::is_destructible<T>::value;
61 };
62 
63 template <typename T>
64 const bool is_pmappable<T>::value;
65 }
66 
67 /// Symbol set.
68 /**
69  * This class represents an ordered set of piranha::symbol. The individual piranha::symbol instances
70  * can be accessed via iterators or the index operator.
71  *
72  * ## Exception safety guarantee ##
73  *
74  * This class provides the strong exception safety guarantee for all operations.
75  *
76  * ## Move semantics ##
77  *
78  * Move construction and move assignment will leave the moved-from object in a state equivalent to a
79  * default-constructed object.
80  */
81 class symbol_set
82 {
check() const83     bool check() const
84     {
85         // Check for sorted range.
86         if (!std::is_sorted(begin(), end())) {
87             return false;
88         }
89         if (size() < 2u) {
90             return true;
91         }
92         // Check for duplicates.
93         for (size_type i = 0u; i < size() - 1u; ++i) {
94             if (m_values[i] == m_values[i + 1u]) {
95                 return false;
96             }
97         }
98         return true;
99     }
100     // Enabler for ctor from iterator.
101     template <typename Iterator, typename Symbol>
102     using it_ctor_enabler = typename std::
103         enable_if<is_input_iterator<Iterator>::value
104                       && std::is_constructible<Symbol, decltype(*(std::declval<const Iterator &>()))>::value,
105                   int>::type;
106 
107 public:
108     /// Size type.
109     typedef std::vector<symbol>::size_type size_type;
110     /// Positions class.
111     /**
112      * This is a small utility class that can be used to determine the positions,
113      * in a piranha::symbol_set \p a, of the symbols in the piranha::symbol_set \p b.
114      * The positions are stored internally in an \p std::vector which is guaranteed
115      * to be sorted in ascending order and which can be accessed via the iterators
116      * provided by this class. If a symbol which is present in \p b is not present in
117      * \p a, then it will be ignored.
118      *
119      * For instance, if the set \p a contains the symbols <tt>[B,C,D,E]</tt> and \p b
120      * contains <tt>[A,B,D,F]</tt>, then an instance of this class constructed passing
121      * \p a and \p b as parameters will contain the vector <tt>[0,2]</tt>.
122      */
123     class positions
124     {
125     public:
126         /// Value type.
127         /**
128          * The positions are represented using the size type of piranha::symbol_set.
129          */
130         using value_type = size_type;
131         /// Const iterator.
132         using const_iterator = std::vector<value_type>::const_iterator;
133         explicit positions(const symbol_set &, const symbol_set &);
134         /// Deleted copy constructor.
135         positions(const positions &) = delete;
136         /// Defaulted move constructor.
137         positions(positions &&) = default;
138 
139     private:
140         positions &operator=(const positions &) = delete;
141         positions &operator=(positions &&) = delete;
142 
143     public:
144         /// Begin iterator.
145         /**
146          * @return an iterator to the begin of the internal vector.
147          */
begin() const148         const_iterator begin() const
149         {
150             return m_values.begin();
151         }
152         /// End iterator.
153         /**
154          * @return an iterator to the end of the internal vector.
155          */
end() const156         const_iterator end() const
157         {
158             return m_values.end();
159         }
160         /// Last element.
161         /**
162          * @return a const reference to the last element.
163          */
back() const164         const value_type &back() const
165         {
166             piranha_assert(m_values.size());
167             return m_values.back();
168         }
169         /// Size.
170         /**
171          * @return the size of the internal vector.
172          */
size() const173         std::vector<value_type>::size_type size() const
174         {
175             return m_values.size();
176         }
177 
178     private:
179         std::vector<value_type> m_values;
180     };
181     /// Positions map class.
182     /**
183      * This class is similar to piranha::symbol_set::positions. In addition to storing the positions
184      * of the symbols from a map \p b with respect to a reference piranha::symbol_set \p a, it will also store
185      * the instances of \p T that were originally mapped to the symbols in \p b.
186      *
187      * For instance, if \p T is \p int, \p a contains the symbols <tt>[B,C,D,E]</tt> and \p b is the map
188      * <tt>[(A,10),(B,20),(D,30),(F,40)]</tt>, then an instance of this class constructed passing
189      * \p a and \p b as parameters will contain the vector <tt>[(0,20),(2,30)]</tt>. The internal vector
190      * is guaranteed to be sorted according to the position of the symbols.
191      *
192      * ## Type requirements ##
193      *
194      * \p T must be destructible, copy-constructible, move-constructible and move-assignable.
195      */
196     template <typename T>
197     class positions_map
198     {
199 #if !defined(PIRANHA_DOXYGEN_INVOKED)
200         PIRANHA_TT_CHECK(detail::is_pmappable, T);
201 #endif
202     public:
203         /// Value type.
204         /**
205          * The stored value type is the <tt>(position,T)</tt> pair.
206          */
207         using value_type = std::pair<size_type, T>;
208         /// Const iterator.
209         using const_iterator = typename std::vector<value_type>::const_iterator;
210         explicit positions_map(const symbol_set &, const std::unordered_map<symbol, T> &);
211         /// Deleted copy constructor.
212         positions_map(const positions_map &) = delete;
213         /// Defaulted move constructor.
214         positions_map(positions_map &&) = default;
215 
216     private:
217         positions_map &operator=(const positions_map &) = delete;
218         positions_map &operator=(positions_map &&) = delete;
219 
220     public:
221         /// Begin iterator.
222         /**
223          * @return an iterator to the begin of the internal vector.
224          */
begin() const225         const_iterator begin() const
226         {
227             return m_pairs.begin();
228         }
229         /// End iterator.
230         /**
231          * @return an iterator to the end of the internal vector.
232          */
end() const233         const_iterator end() const
234         {
235             return m_pairs.end();
236         }
237         /// Size.
238         /**
239          * @return the size of the internal vector.
240          */
size() const241         typename std::vector<value_type>::size_type size() const
242         {
243             return m_pairs.size();
244         }
245         /// Last element.
246         /**
247          * @return a const reference to the last element.
248          */
back() const249         const value_type &back() const
250         {
251             piranha_assert(m_pairs.size());
252             return m_pairs.back();
253         }
254 
255     private:
256         std::vector<value_type> m_pairs;
257     };
258     /// Const iterator.
259     typedef std::vector<symbol>::const_iterator const_iterator;
260     /// Defaulted default constructor.
261     /**
262      * Will construct an empty set.
263      */
264     symbol_set() = default;
265     /// Defaulted copy constructor.
266     /**
267      * @throws unspecified any exception thrown by memory allocation errors in \p std::vector.
268      */
269     symbol_set(const symbol_set &) = default;
270     /// Defaulted move constructor.
271     symbol_set(symbol_set &&) = default;
272     /// Constructor from initializer list of piranha::symbol.
273     /**
274      * Each symbol in the list will be added via add() to the set.
275      *
276      * @param l list of symbols used for construction.
277      *
278      * @throws unspecified any exception thrown by add().
279      */
symbol_set(std::initializer_list<symbol> l)280     explicit symbol_set(std::initializer_list<symbol> l)
281     {
282         // NOTE: for these types of initialisations from other containers
283         // it might make sense eventually to avoid all these adds, and do
284         // the sorting and elimination of duplicates in one pass to lower
285         // algorithmic complexity.
286         for (const auto &s : l) {
287             add(s);
288         }
289     }
290     /// Constructor from range.
291     /**
292      * \note
293      * This constructor is enabled only if \p Iterator is an input iterator and
294      * piranha::symbol is constructible from its value type.
295      *
296      * The set will be initialised with symbols constructed from the elements of the range.
297      *
298      * @param begin begin iterator.
299      * @param end end iterator.
300      *
301      * @throws unspecified any exception thrown by operations on standard containers or by
302      * the invoked constructor of piranha::symbol.
303      */
304     template <typename Iterator, it_ctor_enabler<Iterator, symbol> = 0>
symbol_set(const Iterator & begin,const Iterator & end)305     explicit symbol_set(const Iterator &begin, const Iterator &end)
306     {
307         // NOTE: this is one possible way of doing this, probably a sorted vector
308         // with std::unique will be faster. Also, this can be shared with the ctor
309         // from init list which can be furtherly generalised.
310         std::set<symbol> s_set;
311         for (Iterator it = begin; it != end; ++it) {
312             s_set.emplace(*it);
313         }
314         std::copy(s_set.begin(), s_set.end(), std::back_inserter(m_values));
315     }
316     /// Copy assignment operator.
317     /**
318      * @param other set to be assigned to \p this.
319      *
320      * @return reference to \p this.
321      *
322      * @throws unspecified any exception thrown by memory allocation errors in \p std::vector.
323      */
operator =(const symbol_set & other)324     symbol_set &operator=(const symbol_set &other)
325     {
326         if (likely(this != &other)) {
327             symbol_set tmp(other);
328             *this = std::move(tmp);
329         }
330         return *this;
331     }
332     /// Move assignment operator.
333     /**
334      * @param other assignment argument.
335      *
336      * @return reference to \p this.
337      */
operator =(symbol_set && other)338     symbol_set &operator=(symbol_set &&other) noexcept
339     {
340         // NOTE: here the idea is that in principle we want to be able to move-assign self,
341         // and we don't want to rely on std::vector to support this. Hence, the explicit check.
342         if (likely(this != &other)) {
343             m_values = std::move(other.m_values);
344         }
345         return *this;
346     }
347     /// Trivial destructor.
~symbol_set()348     ~symbol_set()
349     {
350         // NOTE: here we should replace with bidirectional tt, if we ever implement it.
351         PIRANHA_TT_CHECK(is_forward_iterator, const_iterator);
352         piranha_assert(run_destruction_checks());
353     }
354     /// Index operator.
355     /**
356      * @param n index of the element to be accessed.
357      *
358      * @return const reference to the element at index \p n.
359      */
operator [](const size_type & n) const360     const symbol &operator[](const size_type &n) const
361     {
362         piranha_assert(n < size());
363         return m_values[n];
364     }
365     /// Begin const iterator.
366     /**
367      * @return const iterator to the first element of the set, or end() if the set is empty.
368      */
begin() const369     const_iterator begin() const
370     {
371         return m_values.begin();
372     }
373     /// End const iterator.
374     /**
375      * @return const iterator to the element past the last element of the set.
376      */
end() const377     const_iterator end() const
378     {
379         return m_values.end();
380     }
381     /// Add symbol to the set.
382     /**
383      * The insertion of \p s will preserve the order of the set.
384      *
385      * @param s piranha::symbol to be inserted.
386      *
387      * @throws std::invalid_argument if \p s is already present in the set.
388      * @throws unspecified any exception thrown by memory allocation errors in \p std::vector.
389      */
add(const symbol & s)390     void add(const symbol &s)
391     {
392         // Copy it to provide strong exception safety.
393         std::vector<symbol> new_values;
394         new_values.reserve(size() + size_type(1u));
395         std::copy(begin(), end(), std::back_inserter(new_values));
396         const auto it = std::lower_bound(new_values.begin(), new_values.end(), s);
397         if (unlikely(it != new_values.end() && *it == s)) {
398             piranha_throw(std::invalid_argument, "symbol already present in this set");
399         }
400         new_values.insert(it, s);
401         // Move in the new args vector.
402         m_values = std::move(new_values);
403         piranha_assert(check());
404     }
405     /// Add symbol to the set.
406     /**
407      * Equivalent to constructing a piranha::symbol from \p name and then invoking the other overload of this method.
408      *
409      * @param name name of the piranha::symbol to be inserted.
410      *
411      * @throws unspecified any exception thrown by the other overload of this method or by the construction
412      * of piranha::symbol from \p std::string.
413      */
add(const std::string & name)414     void add(const std::string &name)
415     {
416         add(symbol(name));
417     }
418     /// Remove symbol from the set.
419     /**
420      * The removal of \p s will preserve the order of the set.
421      *
422      * @param s piranha::symbol to be removed.
423      *
424      * @throws std::invalid_argument if \p s is not present in the set.
425      * @throws unspecified any exception thrown by memory allocation errors in \p std::vector.
426      */
remove(const symbol & s)427     void remove(const symbol &s)
428     {
429         // Operate on a copy to provide strong exception safety.
430         std::vector<symbol> new_values;
431         std::remove_copy_if(begin(), end(), std::back_inserter(new_values),
432                             [&s](const symbol &sym) { return sym == s; });
433         if (new_values.size() == size()) {
434             piranha_throw(std::invalid_argument, "symbol is not present in this set");
435         }
436         m_values = std::move(new_values);
437         piranha_assert(check());
438     }
439     /// Remove symbol from the set.
440     /**
441      * Equivalent to constructing a piranha::symbol from \p name and then invoking the other overload of this method.
442      *
443      * @param name name of the piranha::symbol to be removed.
444      *
445      * @throws unspecified any exception thrown by the other overload of this method or by the construction
446      * of piranha::symbol from \p std::string.
447      */
remove(const std::string & name)448     void remove(const std::string &name)
449     {
450         remove(symbol(name));
451     }
452     /// Set size.
453     /**
454      * @return the number of elements in the set.
455      */
size() const456     size_type size() const
457     {
458         return m_values.size();
459     }
460     /// Merge with other set.
461     /**
462      * @param other merge argument.
463      *
464      * @return a new set containing the union of the elements present in \p this and \p other.
465      *
466      * @throws unspecified any exception thrown by \p std::vector::push_back().
467      */
merge(const symbol_set & other) const468     symbol_set merge(const symbol_set &other) const
469     {
470         symbol_set retval;
471         retval.m_values.reserve(other.size() + size());
472         auto bi_it = std::back_insert_iterator<std::vector<symbol>>(retval.m_values);
473         std::set_union(begin(), end(), other.begin(), other.end(), bi_it);
474         piranha_assert(retval.check());
475         return retval;
476     }
477     /// Set difference.
478     /**
479      * @param other difference argument.
480      *
481      * @return a new set containing the elements of \p this which are not present in \p other.
482      *
483      * @throws unspecified any exception thrown by \p std::vector::push_back().
484      */
diff(const symbol_set & other) const485     symbol_set diff(const symbol_set &other) const
486     {
487         symbol_set retval;
488         std::set_difference(begin(), end(), other.begin(), other.end(), std::back_inserter(retval.m_values));
489         piranha_assert(retval.check());
490         return retval;
491     }
492     /// Equality operator.
493     /**
494      * @param other comparison argument.
495      *
496      * @return \p true if \p this and \p other contain exactly the same symbols, \p false otherwise.
497      */
operator ==(const symbol_set & other) const498     bool operator==(const symbol_set &other) const
499     {
500         return m_values == other.m_values;
501     }
502     /// Inequality operator.
503     /**
504      * @param other comparison argument.
505      *
506      * @return opposite of operator==().
507      */
operator !=(const symbol_set & other) const508     bool operator!=(const symbol_set &other) const
509     {
510         return !(*this == other);
511     }
512     /// Index of symbol.
513     /**
514      * This method will return the index in the set of the input symbol \p s. If \p s is not in the set,
515      * the size of the set is returned.
516      *
517      * @param s piranha::symbol whose index will be computed.
518      *
519      * @return the index of \p s in the set.
520      *
521      * @throws std::overflow_error if the size of the set is larger than an implementation-defined value.
522      */
index_of(const symbol & s) const523     size_type index_of(const symbol &s) const
524     {
525         // Need to check for potential overflows here.
526         using diff_type = std::iterator_traits<const_iterator>::difference_type;
527         using udiff_type = std::make_unsigned<diff_type>::type;
528         // This is not an exact check, it is conservative by 1 unit in order to simplify the logic.
529         if (unlikely(m_values.size() > static_cast<udiff_type>(std::numeric_limits<diff_type>::max()))) {
530             piranha_throw(std::overflow_error, "potential overflow in the computaion of the "
531                                                "index of a symbol");
532         }
533         const auto it = std::lower_bound(m_values.begin(), m_values.end(), s);
534         if (it != m_values.end() && *it == s) {
535             auto retval = it - m_values.begin();
536             piranha_assert(retval >= diff_type(0));
537             return static_cast<size_type>(retval);
538         }
539         return m_values.size();
540     }
541 
542 private:
run_destruction_checks() const543     bool run_destruction_checks() const
544     {
545         // Run destruction checks only if we are not in the shutdown phase.
546         if (shutdown()) {
547             return true;
548         }
549         return check();
550     }
551 
552 private:
553     std::vector<symbol> m_values;
554 };
555 
556 /// Constructor from sets.
557 /**
558  * The internal positions vector will contain the positions in \p a of the elements
559  * of \p b appearing in the set \p a.
560  *
561  * @param a first set.
562  * @param b second set.
563  *
564  * @throws unspecified any exception thrown by memory errors in standard containers.
565  */
566 // NOTE: it might make sense here in the future to have a constructor from a range of strings instead.
567 // The rationale would be that like this we would avoid going through string -> symbol conversions
568 // when using this class from series.
positions(const symbol_set & a,const symbol_set & b)569 inline symbol_set::positions::positions(const symbol_set &a, const symbol_set &b)
570 {
571     size_type ia = 0u, ib = 0u;
572     while (true) {
573         if (ia == a.size() || ib == b.size()) {
574             break;
575         }
576         if (a[ia] == b[ib]) {
577             m_values.push_back(ia);
578             ++ia;
579             ++ib;
580         } else if (a[ia] < b[ib]) {
581             ++ia;
582         } else {
583             ++ib;
584         }
585     }
586 }
587 
588 /// Constructor from map and set.
589 /**
590  * The internal vector of pairs will contain the positions in \p a of the mapped values
591  * of \p map appearing in the set \p a, and the mapped values themselves.
592  *
593  * @param a reference set.
594  * @param map input map.
595  *
596  * @throws unspecified any exception thrown by memory errors in standard containers, by
597  * the copy constructor of \p T, or by <tt>std::stable_sort()</tt>.
598  */
599 template <typename T>
positions_map(const symbol_set & a,const std::unordered_map<symbol,T> & map)600 inline symbol_set::positions_map<T>::positions_map(const symbol_set &a, const std::unordered_map<symbol, T> &map)
601 {
602     for (const auto &p : map) {
603         const auto idx = a.index_of(p.first);
604         if (idx != a.size()) {
605             m_pairs.push_back(std::make_pair(idx, p.second));
606         }
607     }
608     std::stable_sort(m_pairs.begin(), m_pairs.end(),
609                      [](const value_type &p1, const value_type &p2) { return p1.first < p2.first; });
610     // Check that there are no duplicate positions.
611     piranha_assert(std::unique(m_pairs.begin(), m_pairs.end(),
612                                [](const value_type &p1, const value_type &p2) { return p1.first == p2.first; })
613                    == m_pairs.end());
614 }
615 }
616 
617 #endif
618