1 /* -*- mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- */
2 
3 /*
4  *  Main authors:
5  *     Guido Tack <guido.tack@monash.edu>
6  */
7 
8 /* This Source Code Form is subject to the terms of the Mozilla Public
9  * License, v. 2.0. If a copy of the MPL was not distributed with this
10  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
11 
12 #pragma once
13 
14 #include <minizinc/gc.hh>
15 
16 #include <algorithm>
17 #include <cstring>
18 #include <functional>
19 #include <iostream>
20 #include <string>
21 #include <unordered_map>
22 
23 namespace MiniZinc {
24 
25 class ASTStringData;
26 
27 /**
28  * \brief Handle for an interned garbage collected string
29  */
30 class ASTString {
31 protected:
32   /// String
33   ASTStringData* _s = nullptr;
34 
35 public:
36   /// Default constructor
37   ASTString() = default;
38   /// Constructor
39   ASTString(const std::string& s);
40   /// Constructor
ASTString(ASTStringData * s)41   ASTString(ASTStringData* s) : _s(s){};
42   /// Copy constructor
43   ASTString(const ASTString& s) = default;
44   /// Assignment operator
45   ASTString& operator=(const ASTString& s) = default;
46 
47   /// Size of the string
48   size_t size() const;
49   /// Underlying C string object
50   const char* c_str() const;  // NOLINT(readability-identifier-naming)
51   /// Underlying string implementation
aststr() const52   ASTStringData* aststr() const { return _s; }
53 
54   /// Return if string is equal to \a s
55   bool operator==(const ASTString& s) const;
56   /// Return if string is not equal to \a s
57   bool operator!=(const ASTString& s) const;
58   /// Return if string is less than \a s
59   bool operator<(const ASTString& s) const;
60 
61   /// Return if string is equal to \a s
62   bool operator==(const std::string& s) const;
63   /// Return if string is not equal to \a s
64   bool operator!=(const std::string& s) const;
65 
66   /// Return if string ends with \a s
67   bool endsWith(const std::string& s) const;
68 
69   /// Return if string begins with \a s
70   bool beginsWith(const std::string& s) const;
71 
72   /// Returns a substring [pos, pos+count).
73   std::string substr(size_t pos = 0, size_t count = std::string::npos) const;
74   // Finds the last character equal to one of characters in the given character sequence.
75   size_t findLastOf(char ch, size_t pos = std::string::npos) const noexcept;
76   // Finds the first character equal to the given character sequence.
77   size_t find(char ch, size_t pos = 0) const noexcept;
78 
79   /// Return Levenshtein distance to \a s
80   int levenshteinDistance(const ASTString& other) const;
81 
82   /// Compute hash value of string
83   size_t hash() const;
84 
85   /// Mark string during garbage collection
86   void mark() const;
87 };
88 
89 /**
90  * \brief Print String \a s
91  */
92 template <class Char, class Traits>
operator <<(std::basic_ostream<Char,Traits> & os,const ASTString & s)93 std::basic_ostream<Char, Traits>& operator<<(std::basic_ostream<Char, Traits>& os,
94                                              const ASTString& s) {
95   return s.size() == 0 ? os : (os << s.c_str());
96 }
97 
98 }  // namespace MiniZinc
99 
100 namespace std {
101 template <>
102 struct hash<MiniZinc::ASTString> {
103 public:
104   size_t operator()(const MiniZinc::ASTString& s) const;
105 };
106 
107 template <>
108 struct equal_to<MiniZinc::ASTString> {
109 public:
110   bool operator()(const MiniZinc::ASTString& s0, const MiniZinc::ASTString& s1) const;
111 };
112 template <>
113 struct less<MiniZinc::ASTString> {
114 public:
115   bool operator()(const MiniZinc::ASTString& s0, const MiniZinc::ASTString& s1) const;
116 };
117 }  // namespace std
118 
119 namespace MiniZinc {
120 
121 struct CStringHash {
122 public:
123   // FIXME: This is not an amazing hash function
operator ()MiniZinc::CStringHash124   size_t operator()(const std::pair<const char*, size_t>& s) const {
125     size_t result = 0;
126     const size_t prime = 31;
127     for (size_t i = 0; i < s.second; ++i) {
128       result = s.first[i] + (result * prime);
129     }
130     return result;
131   }
132 };
133 struct CStringEquals {
134 public:
operator ()MiniZinc::CStringEquals135   bool operator()(const std::pair<const char*, size_t>& s0,
136                   const std::pair<const char*, size_t>& s1) const {
137     return s0.second == s1.second && (strncmp(s0.first, s1.first, s0.second) == 0);
138   }
139 };
140 
141 /**
142  * \brief Garbage collected interned string
143  */
144 class ASTStringData : public ASTChunk {
145   friend class GC::Heap;
146 
147 protected:
148   /// Interning Hash Map
149   using Interner = std::unordered_map<std::pair<const char*, size_t>, ASTStringData*, CStringHash,
150                                       CStringEquals>;
151   static Interner& interner();
152   /// Constructor
153   ASTStringData(const std::string& s);
154 
155 public:
156   /// Allocate and initialise as \a s
157   static ASTStringData* a(const std::string& s);
158   /// Return underlying C-style string
159   // NOLINTNEXTLINE(readability-identifier-naming)
c_str() const160   const char* c_str() const { return _data + sizeof(size_t); }
161   /// Return size of string
size() const162   size_t size() const {
163     return static_cast<unsigned int>(_size) - static_cast<unsigned int>(sizeof(size_t)) - 1;
164   }
165   /// Access character at position \a i
operator [](unsigned int i)166   char operator[](unsigned int i) {
167     assert(i < size());
168     return _data[sizeof(size_t) + i];
169   }
170   /// Return hash value of string
hash() const171   size_t hash() const { return reinterpret_cast<const size_t*>(_data)[0]; }
172   /// Mark for garbage collection
mark() const173   void mark() const { _gcMark = 1; }
174 
175 protected:
176   /// GC Destructor
destroy() const177   void destroy() const {
178     assert(interner().find({this->c_str(), this->size()}) != interner().end());
179     interner().erase({this->c_str(), this->size()});
180   };
181 };
182 
ASTString(const std::string & s)183 inline ASTString::ASTString(const std::string& s) : _s(ASTStringData::a(s)) {}
184 
size() const185 inline size_t ASTString::size() const { return _s != nullptr ? _s->size() : 0; }
186 // NOLINTNEXTLINE(readability-identifier-naming)
c_str() const187 inline const char* ASTString::c_str() const { return _s != nullptr ? _s->c_str() : nullptr; }
mark() const188 inline void ASTString::mark() const {
189   if (_s != nullptr) {
190     _s->mark();
191   }
192 }
193 
operator ==(const ASTString & s) const194 inline bool ASTString::operator==(const ASTString& s) const { return _s == s._s; }
operator !=(const ASTString & s) const195 inline bool ASTString::operator!=(const ASTString& s) const { return _s != s._s; }
operator <(const ASTString & s) const196 inline bool ASTString::operator<(const ASTString& s) const {
197   if (size() == 0) {
198     return 0 < s.size();
199   }
200   unsigned int size = std::min(_s->size(), s.size());
201   int cmp = strncmp(_s->c_str(), s.c_str(), size);
202   if (cmp == 0) {
203     return _s->size() < s.size();
204   }
205   return cmp < 0;
206 }
operator ==(const std::string & s) const207 inline bool ASTString::operator==(const std::string& s) const {
208   return size() == s.size() && (size() == 0 || strncmp(_s->c_str(), s.c_str(), size()) == 0);
209 }
operator !=(const std::string & s) const210 inline bool ASTString::operator!=(const std::string& s) const { return !(*this == s); }
endsWith(const std::string & s) const211 inline bool ASTString::endsWith(const std::string& s) const {
212   return size() >= s.size() &&
213          (size() == 0 || strncmp(_s->c_str() + size() - s.size(), s.c_str(), s.size()) == 0);
214 }
beginsWith(const std::string & s) const215 inline bool ASTString::beginsWith(const std::string& s) const {
216   return size() >= s.size() && (size() == 0 || strncmp(_s->c_str(), s.c_str(), s.size()) == 0);
217 }
218 
substr(size_t pos,size_t count) const219 inline std::string ASTString::substr(size_t pos, size_t count) const {
220   if (pos > size()) {
221     throw std::out_of_range("ASTString::substr pos out of range");
222   }
223   if (count == std::string::npos) {
224     return std::string(c_str() + pos, size() - pos);
225   }
226   return std::string(c_str() + pos, std::min(size() - pos, count));
227 }
findLastOf(char ch,size_t pos) const228 inline size_t ASTString::findLastOf(char ch, size_t pos) const noexcept {
229   const char* str = c_str();
230   for (int i = std::min(size() - 1, pos); i >= 0; --i) {
231     if (str[i] == ch) {
232       return i;
233     }
234   }
235   return std::string::npos;
236 }
237 
find(char ch,size_t pos) const238 inline size_t ASTString::find(char ch, size_t pos) const noexcept {
239   if (pos >= size()) {
240     return std::string::npos;
241   }
242   const char* str = c_str();
243   for (int i = pos; i < size(); ++i) {
244     if (str[i] == ch) {
245       return i;
246     }
247   }
248   return std::string::npos;
249 }
250 
hash() const251 inline size_t ASTString::hash() const { return _s != nullptr ? _s->hash() : 0; }
252 
253 }  // namespace MiniZinc
254 
255 namespace std {
operator ()(const MiniZinc::ASTString & s) const256 inline size_t hash<MiniZinc::ASTString>::operator()(const MiniZinc::ASTString& s) const {
257   return s.hash();
258 }
operator ()(const MiniZinc::ASTString & s0,const MiniZinc::ASTString & s1) const259 inline bool equal_to<MiniZinc::ASTString>::operator()(const MiniZinc::ASTString& s0,
260                                                       const MiniZinc::ASTString& s1) const {
261   return s0 == s1;
262 }
operator ()(const MiniZinc::ASTString & s0,const MiniZinc::ASTString & s1) const263 inline bool less<MiniZinc::ASTString>::operator()(const MiniZinc::ASTString& s0,
264                                                   const MiniZinc::ASTString& s1) const {
265   return s0 < s1;
266 }
267 }  // namespace std
268