1 /* -*- mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- */
2
3 /*
4 * Main authors:
5 * Guido Tack <guido.tack@monash.edu>
6 */
7
8 /* This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
11
12 #pragma once
13
14 #include <minizinc/gc.hh>
15
16 #include <algorithm>
17 #include <cstring>
18 #include <functional>
19 #include <iostream>
20 #include <string>
21 #include <unordered_map>
22
23 namespace MiniZinc {
24
25 class ASTStringData;
26
27 /**
28 * \brief Handle for an interned garbage collected string
29 */
30 class ASTString {
31 protected:
32 /// String
33 ASTStringData* _s = nullptr;
34
35 public:
36 /// Default constructor
37 ASTString() = default;
38 /// Constructor
39 ASTString(const std::string& s);
40 /// Constructor
ASTString(ASTStringData * s)41 ASTString(ASTStringData* s) : _s(s){};
42 /// Copy constructor
43 ASTString(const ASTString& s) = default;
44 /// Assignment operator
45 ASTString& operator=(const ASTString& s) = default;
46
47 /// Size of the string
48 size_t size() const;
49 /// Underlying C string object
50 const char* c_str() const; // NOLINT(readability-identifier-naming)
51 /// Underlying string implementation
aststr() const52 ASTStringData* aststr() const { return _s; }
53
54 /// Return if string is equal to \a s
55 bool operator==(const ASTString& s) const;
56 /// Return if string is not equal to \a s
57 bool operator!=(const ASTString& s) const;
58 /// Return if string is less than \a s
59 bool operator<(const ASTString& s) const;
60
61 /// Return if string is equal to \a s
62 bool operator==(const std::string& s) const;
63 /// Return if string is not equal to \a s
64 bool operator!=(const std::string& s) const;
65
66 /// Return if string ends with \a s
67 bool endsWith(const std::string& s) const;
68
69 /// Return if string begins with \a s
70 bool beginsWith(const std::string& s) const;
71
72 /// Returns a substring [pos, pos+count).
73 std::string substr(size_t pos = 0, size_t count = std::string::npos) const;
74 // Finds the last character equal to one of characters in the given character sequence.
75 size_t findLastOf(char ch, size_t pos = std::string::npos) const noexcept;
76 // Finds the first character equal to the given character sequence.
77 size_t find(char ch, size_t pos = 0) const noexcept;
78
79 /// Return Levenshtein distance to \a s
80 int levenshteinDistance(const ASTString& other) const;
81
82 /// Compute hash value of string
83 size_t hash() const;
84
85 /// Mark string during garbage collection
86 void mark() const;
87 };
88
89 /**
90 * \brief Print String \a s
91 */
92 template <class Char, class Traits>
operator <<(std::basic_ostream<Char,Traits> & os,const ASTString & s)93 std::basic_ostream<Char, Traits>& operator<<(std::basic_ostream<Char, Traits>& os,
94 const ASTString& s) {
95 return s.size() == 0 ? os : (os << s.c_str());
96 }
97
98 } // namespace MiniZinc
99
100 namespace std {
101 template <>
102 struct hash<MiniZinc::ASTString> {
103 public:
104 size_t operator()(const MiniZinc::ASTString& s) const;
105 };
106
107 template <>
108 struct equal_to<MiniZinc::ASTString> {
109 public:
110 bool operator()(const MiniZinc::ASTString& s0, const MiniZinc::ASTString& s1) const;
111 };
112 template <>
113 struct less<MiniZinc::ASTString> {
114 public:
115 bool operator()(const MiniZinc::ASTString& s0, const MiniZinc::ASTString& s1) const;
116 };
117 } // namespace std
118
119 namespace MiniZinc {
120
121 struct CStringHash {
122 public:
123 // FIXME: This is not an amazing hash function
operator ()MiniZinc::CStringHash124 size_t operator()(const std::pair<const char*, size_t>& s) const {
125 size_t result = 0;
126 const size_t prime = 31;
127 for (size_t i = 0; i < s.second; ++i) {
128 result = s.first[i] + (result * prime);
129 }
130 return result;
131 }
132 };
133 struct CStringEquals {
134 public:
operator ()MiniZinc::CStringEquals135 bool operator()(const std::pair<const char*, size_t>& s0,
136 const std::pair<const char*, size_t>& s1) const {
137 return s0.second == s1.second && (strncmp(s0.first, s1.first, s0.second) == 0);
138 }
139 };
140
141 /**
142 * \brief Garbage collected interned string
143 */
144 class ASTStringData : public ASTChunk {
145 friend class GC::Heap;
146
147 protected:
148 /// Interning Hash Map
149 using Interner = std::unordered_map<std::pair<const char*, size_t>, ASTStringData*, CStringHash,
150 CStringEquals>;
151 static Interner& interner();
152 /// Constructor
153 ASTStringData(const std::string& s);
154
155 public:
156 /// Allocate and initialise as \a s
157 static ASTStringData* a(const std::string& s);
158 /// Return underlying C-style string
159 // NOLINTNEXTLINE(readability-identifier-naming)
c_str() const160 const char* c_str() const { return _data + sizeof(size_t); }
161 /// Return size of string
size() const162 size_t size() const {
163 return static_cast<unsigned int>(_size) - static_cast<unsigned int>(sizeof(size_t)) - 1;
164 }
165 /// Access character at position \a i
operator [](unsigned int i)166 char operator[](unsigned int i) {
167 assert(i < size());
168 return _data[sizeof(size_t) + i];
169 }
170 /// Return hash value of string
hash() const171 size_t hash() const { return reinterpret_cast<const size_t*>(_data)[0]; }
172 /// Mark for garbage collection
mark() const173 void mark() const { _gcMark = 1; }
174
175 protected:
176 /// GC Destructor
destroy() const177 void destroy() const {
178 assert(interner().find({this->c_str(), this->size()}) != interner().end());
179 interner().erase({this->c_str(), this->size()});
180 };
181 };
182
ASTString(const std::string & s)183 inline ASTString::ASTString(const std::string& s) : _s(ASTStringData::a(s)) {}
184
size() const185 inline size_t ASTString::size() const { return _s != nullptr ? _s->size() : 0; }
186 // NOLINTNEXTLINE(readability-identifier-naming)
c_str() const187 inline const char* ASTString::c_str() const { return _s != nullptr ? _s->c_str() : nullptr; }
mark() const188 inline void ASTString::mark() const {
189 if (_s != nullptr) {
190 _s->mark();
191 }
192 }
193
operator ==(const ASTString & s) const194 inline bool ASTString::operator==(const ASTString& s) const { return _s == s._s; }
operator !=(const ASTString & s) const195 inline bool ASTString::operator!=(const ASTString& s) const { return _s != s._s; }
operator <(const ASTString & s) const196 inline bool ASTString::operator<(const ASTString& s) const {
197 if (size() == 0) {
198 return 0 < s.size();
199 }
200 unsigned int size = std::min(_s->size(), s.size());
201 int cmp = strncmp(_s->c_str(), s.c_str(), size);
202 if (cmp == 0) {
203 return _s->size() < s.size();
204 }
205 return cmp < 0;
206 }
operator ==(const std::string & s) const207 inline bool ASTString::operator==(const std::string& s) const {
208 return size() == s.size() && (size() == 0 || strncmp(_s->c_str(), s.c_str(), size()) == 0);
209 }
operator !=(const std::string & s) const210 inline bool ASTString::operator!=(const std::string& s) const { return !(*this == s); }
endsWith(const std::string & s) const211 inline bool ASTString::endsWith(const std::string& s) const {
212 return size() >= s.size() &&
213 (size() == 0 || strncmp(_s->c_str() + size() - s.size(), s.c_str(), s.size()) == 0);
214 }
beginsWith(const std::string & s) const215 inline bool ASTString::beginsWith(const std::string& s) const {
216 return size() >= s.size() && (size() == 0 || strncmp(_s->c_str(), s.c_str(), s.size()) == 0);
217 }
218
substr(size_t pos,size_t count) const219 inline std::string ASTString::substr(size_t pos, size_t count) const {
220 if (pos > size()) {
221 throw std::out_of_range("ASTString::substr pos out of range");
222 }
223 if (count == std::string::npos) {
224 return std::string(c_str() + pos, size() - pos);
225 }
226 return std::string(c_str() + pos, std::min(size() - pos, count));
227 }
findLastOf(char ch,size_t pos) const228 inline size_t ASTString::findLastOf(char ch, size_t pos) const noexcept {
229 const char* str = c_str();
230 for (int i = std::min(size() - 1, pos); i >= 0; --i) {
231 if (str[i] == ch) {
232 return i;
233 }
234 }
235 return std::string::npos;
236 }
237
find(char ch,size_t pos) const238 inline size_t ASTString::find(char ch, size_t pos) const noexcept {
239 if (pos >= size()) {
240 return std::string::npos;
241 }
242 const char* str = c_str();
243 for (int i = pos; i < size(); ++i) {
244 if (str[i] == ch) {
245 return i;
246 }
247 }
248 return std::string::npos;
249 }
250
hash() const251 inline size_t ASTString::hash() const { return _s != nullptr ? _s->hash() : 0; }
252
253 } // namespace MiniZinc
254
255 namespace std {
operator ()(const MiniZinc::ASTString & s) const256 inline size_t hash<MiniZinc::ASTString>::operator()(const MiniZinc::ASTString& s) const {
257 return s.hash();
258 }
operator ()(const MiniZinc::ASTString & s0,const MiniZinc::ASTString & s1) const259 inline bool equal_to<MiniZinc::ASTString>::operator()(const MiniZinc::ASTString& s0,
260 const MiniZinc::ASTString& s1) const {
261 return s0 == s1;
262 }
operator ()(const MiniZinc::ASTString & s0,const MiniZinc::ASTString & s1) const263 inline bool less<MiniZinc::ASTString>::operator()(const MiniZinc::ASTString& s0,
264 const MiniZinc::ASTString& s1) const {
265 return s0 < s1;
266 }
267 } // namespace std
268