1 /* 2 * String.h 3 * Apto 4 * 5 * Created by David on 2/10/11. 6 * Copyright 2011 David Michael Bryson. All rights reserved. 7 * http://programerror.com/software/apto 8 * 9 * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 10 * following conditions are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the 13 * following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the 15 * following disclaimer in the documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of David Michael Bryson, nor the names of contributors may be used to endorse or promote 17 * products derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY DAVID MICHAEL BRYSON AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 * DISCLAIMED. IN NO EVENT SHALL DAVID MICHAEL BRYSON OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 24 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 25 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * Authors: David M. Bryson <david@programerror.com> 28 * 29 */ 30 31 #ifndef AptoCoreString_h 32 #define AptoCoreString_h 33 34 #include "apto/core/Definitions.h" 35 #include "apto/core/RefCount.h" 36 #include "apto/core/SmartPtr.h" 37 #include "apto/core/TypeUtil.h" 38 39 #include <cassert> 40 #include <string.h> 41 42 43 namespace Apto { 44 45 // Basic String 46 // -------------------------------------------------------------------------------------------------------------- 47 48 template <class ThreadingModel = SingleThreaded> class BasicString 49 { 50 public: 51 typedef char ValueType; 52 class Iterator; 53 class ConstIterator; 54 55 protected: 56 class StringRep; 57 typedef SmartPtr<StringRep, InternalRCObject> StringRepPtr; 58 59 protected: 60 StringRepPtr m_value; 61 BasicString(StringRepPtr rep)62 BasicString(StringRepPtr rep) : m_value(rep) { ; } 63 64 public: 65 // Construction 66 BasicString(const char* str = "") 67 : m_value((str) ? new StringRep(static_cast<int>(strlen(str)), str) : new StringRep(0)) { assert(m_value); } BasicString(int size,const char * str)68 BasicString(int size, const char* str) : m_value(new StringRep(size, str)) { assert(m_value); } BasicString(const BasicString & rhs)69 BasicString(const BasicString& rhs) : m_value(rhs.m_value) { ; } BasicString(const BasicString<T1> & rhs)70 template <class T1> BasicString(const BasicString<T1>& rhs) : m_value(new StringRep(rhs.GetSize(), rhs.GetData())) { ; } 71 ~BasicString()72 ~BasicString() { ; } 73 74 75 // Property Access GetSize()76 inline int GetSize() const { return m_value->GetSize(); } GetData()77 inline const char* GetData() const { return m_value->GetRep(); } GetCString()78 inline const char* GetCString() const { return m_value->GetRep(); } 79 inline operator const char*() const { return m_value->GetRep(); } 80 81 82 // Assignment 83 inline BasicString& operator=(const BasicString& rhs) { m_value = rhs.m_value; return *this; } 84 template <class T1> inline BasicString& operator=(const BasicString<T1>& rhs) 85 { 86 m_value = new StringRep(rhs.GetSize(), rhs.GetData()); 87 assert(m_value); 88 return *this; 89 } 90 91 inline BasicString& operator=(const char* rhs) 92 { 93 assert(rhs); 94 m_value = StringRepPtr(new StringRep(static_cast<int>(strlen(rhs)), rhs)); 95 assert(m_value); 96 return *this; 97 } 98 99 100 // Comparison Operators Compare(const char * str)101 int Compare(const char* str) const 102 { 103 assert(str); 104 int i; 105 for (i = 0; i < GetSize() && str[i] != '\0' && (*this)[i] == str[i]; i++) ; 106 107 if (i == GetSize() && str[i] == '\0') return 0; 108 if (i < GetSize() && str[i] < (*this)[i]) return 1; 109 return -1; 110 } 111 112 bool operator==(const BasicString& rhs) const 113 { 114 if (rhs.GetSize() != GetSize()) return false; 115 for (int i = 0; i < GetSize(); i++) if ((*this)[i] != rhs[i]) return false; 116 return true; 117 } 118 inline bool operator==(const char* rhs) const { return Compare(rhs) == 0; } 119 120 template <class R> 121 inline bool operator!=(const BasicString<R>& rhs) const { return !operator==(rhs); } 122 inline bool operator!=(const char* rhs) const { return Compare(rhs) != 0; } 123 124 template <class R> 125 inline bool operator<(const BasicString<R>& rhs) const { return Compare(rhs) < 0; } 126 inline bool operator<(const char* rhs) const { return Compare(rhs) < 0; } 127 128 template <class R> 129 inline bool operator>(const BasicString<R>& rhs) const { return Compare(rhs) > 0; } 130 inline bool operator>(const char* rhs) const { return Compare(rhs) > 0; } 131 132 template <class R> 133 inline bool operator<=(const BasicString<R>& rhs) const { return Compare(rhs) <= 0; } 134 inline bool operator<=(const char* rhs) const { return Compare(rhs) <= 0; } 135 136 template <class R> 137 inline bool operator>=(const BasicString<R>& rhs) const { return Compare(rhs) >= 0; } 138 inline bool operator>=(const char* rhs) const { return Compare(rhs) >= 0; } 139 140 141 // Character access 142 inline char operator[](int index) const { return m_value->operator[](index); } 143 144 145 // Concatenation 146 inline BasicString& operator+=(const char c) { return append(1, &c); } 147 inline BasicString& operator+=(const char* str) { return append(static_cast<int>(strlen(str)), str); } 148 inline BasicString& operator+=(const BasicString& str) { return append(str.GetSize(), str.GetData()); } 149 inline BasicString operator+(const char c) { return concat(1, &c); } 150 inline BasicString operator+(const char* str) { return concat(static_cast<int>(strlen(str)), str); } 151 inline BasicString operator+(const BasicString& str) { return concat(str.GetSize(), str.GetData()); } 152 153 154 // Substring extraction 155 inline BasicString Substring(int idx = 0, int length = -1) const 156 { 157 assert(idx >= 0); 158 assert(idx < GetSize()); 159 assert(length <= (GetSize() - idx)); 160 return BasicString((length >= 0) ? length : (GetSize() - idx), m_value->GetRep() + idx); 161 } 162 Pop(const char delimeter)163 BasicString Pop(const char delimeter) 164 { 165 StringRepPtr value = m_value; 166 for (int i = 0; i < GetSize(); i++) { 167 if (m_value->operator[](i) == delimeter) { 168 *this = Substring(i + 1); 169 return BasicString(i, value->GetRep()); 170 } 171 } 172 BasicString rtn(*this); 173 *this = BasicString(); 174 return rtn; 175 } 176 177 178 // Various Character Inspection Utility Methods IsLetter(int idx)179 inline bool IsLetter(int idx) const { return IsUpper(idx) || IsLower(idx); } IsLower(int idx)180 inline bool IsLower(int idx) const { return ((*this)[idx] >= 'a' && (*this)[idx] <= 'z'); } IsUpper(int idx)181 inline bool IsUpper(int idx) const { return ((*this)[idx] >= 'A' && (*this)[idx] <= 'Z'); } IsNumber(int idx)182 inline bool IsNumber(int idx) const { return (*this)[idx] >= '0' && (*this)[idx] <= '9'; } IsWhitespace(int idx)183 inline bool IsWhitespace(int idx) const 184 { 185 return (*this)[idx] == ' ' || // space 186 (*this)[idx] == '\f' || // form feed 187 (*this)[idx] == '\n' || // newline 188 (*this)[idx] == '\r' || // carriage return 189 (*this)[idx] == '\t' || // horizontal tab 190 (*this)[idx] == '\v'; // vertical tab 191 } 192 193 194 // Iterators Begin()195 Iterator Begin() { return Iterator(m_value); } Begin()196 ConstIterator Begin() const { return ConstIterator(m_value); } 197 198 protected: 199 // Internal Support Methods append(int size,const char * str)200 BasicString& append(int size, const char* str) 201 { 202 assert(size == 0 || str != NULL); 203 StringRepPtr newstr(new StringRep(size + GetSize())); 204 assert(newstr); 205 for (int i = 0; i < GetSize(); i++) newstr->operator[](i) = m_value->operator[](i); 206 for (int i = 0; i < size; i++) newstr->operator[](i + GetSize()) = str[i]; 207 m_value = newstr; 208 return (*this); 209 } 210 concat(int size,const char * str)211 BasicString concat(int size, const char* str) 212 { 213 if (size == 0) return BasicString(*this); 214 assert(str != NULL); 215 StringRepPtr newstr(new StringRep(size + GetSize())); 216 for (int i = 0; i < GetSize(); i++) newstr->operator[](i) = m_value->operator[](i); 217 for (int i = 0; i < size; i++) newstr->operator[](i + GetSize()) = str[i]; 218 return BasicString(newstr); 219 } 220 221 222 class StringRep : public TypeSelect<ThreadingModel::UseThreadSafe, MTRefCountObject, RefCountObject>::Result 223 { 224 private: 225 int m_size; 226 char* m_data; 227 228 public: m_size(size)229 inline explicit StringRep(int size = 0) : m_size(size), m_data(new char[size + 1]) 230 { 231 assert(m_data); 232 m_data[0] = '\0'; 233 m_data[size] = '\0'; 234 } StringRep(int size,const char * str)235 inline StringRep(int size, const char* str) : m_size(size), m_data(new char[size + 1]) 236 { 237 assert(m_data); 238 memcpy(m_data, str, m_size); 239 m_data[size] = '\0'; 240 } StringRep(const StringRep & rhs)241 inline StringRep(const StringRep& rhs) : m_size(rhs.m_size), m_data(new char[m_size + 1]) 242 { 243 assert(m_data); 244 memcpy(m_data, rhs.m_data, m_size); 245 m_data[m_size] = '\0'; 246 } 247 ~StringRep()248 ~StringRep() { delete [] m_data; } 249 GetSize()250 inline int GetSize() const { return m_size; } GetRep()251 inline const char* GetRep() const { return m_data; } 252 253 char operator[](int index) const { return m_data[index]; } 254 char& operator[](int index) { return m_data[index]; } 255 }; 256 257 258 public: 259 class Iterator 260 { 261 friend class BasicString<ThreadingModel>; 262 263 private: 264 StringRepPtr m_value; 265 int m_index; 266 Iterator(StringRepPtr value)267 inline Iterator(StringRepPtr value) : m_value(value), m_index(-1) { ; } 268 269 public: Get()270 inline const char* Get() 271 { 272 return (m_index < 0 || m_index >= m_value->GetSize()) ? NULL : &m_value->operator[](m_index); 273 } 274 Next()275 inline const char* Next() 276 { 277 if (m_index == m_value->GetSize()) return NULL; 278 return (++m_index == m_value->GetSize()) ? NULL : &m_value->operator[](m_index); 279 } 280 }; 281 282 class ConstIterator 283 { 284 friend class BasicString<ThreadingModel>; 285 286 private: 287 StringRepPtr m_value; 288 int m_index; 289 ConstIterator(StringRepPtr value)290 inline ConstIterator(StringRepPtr value) : m_value(value), m_index(-1) { ; } 291 292 public: Get()293 inline const char* Get() 294 { 295 return (m_index < 0 || m_index >= m_value->GetSize()) ? NULL : &m_value->operator[](m_index); 296 } 297 Next()298 inline const char* Next() 299 { 300 if (m_index == m_value->GetSize()) return NULL; 301 return (++m_index == m_value->GetSize()) ? NULL : &m_value->operator[](m_index); 302 } 303 }; 304 }; 305 306 template <class ThreadingModel> 307 inline bool operator==(const char* lhs, const BasicString<ThreadingModel>& rhs) { return rhs == lhs; } 308 template <class ThreadingModel> 309 inline bool operator!=(const char* lhs, const BasicString<ThreadingModel>& rhs) { return rhs != lhs; } 310 311 template <class ThreadingModel> 312 inline bool operator<(const char* lhs, const BasicString<ThreadingModel>& rhs) { return rhs > lhs; } 313 template <class ThreadingModel> 314 inline bool operator>(const char* lhs, const BasicString<ThreadingModel>& rhs) { return rhs < lhs; } 315 316 template <class ThreadingModel> 317 inline bool operator<=(const char* lhs, const BasicString<ThreadingModel>& rhs) { return rhs >= lhs; } 318 template <class ThreadingModel> 319 inline bool operator>=(const char* lhs, const BasicString<ThreadingModel>& rhs) { return rhs <= lhs; } 320 321 322 // Basic String Hashing Support 323 // -------------------------------------------------------------------------------------------------------------- 324 325 // HASH_TYPE = BasicString<ThreadingModel> 326 // We hash a string simply by adding up the individual character values in 327 // that string and modding by the hash size. For most applications this 328 // will work fine (and reasonably fast!) but some patterns will cause all 329 // strings to go into the same cell. For example, "ABC"=="CBA"=="BBB". 330 template <class T, int HashFactor> class HashKey; 331 template <class ThreadingModel, int HashFactor> class HashKey<BasicString<ThreadingModel>, HashFactor> 332 { 333 public: Hash(const BasicString<ThreadingModel> & key)334 static int Hash(const BasicString<ThreadingModel>& key) 335 { 336 unsigned int out_hash = 0; 337 for (int i = 0; i < key.GetSize(); i++) 338 out_hash += (unsigned int) key[i]; 339 return out_hash % HashFactor; 340 } 341 }; 342 343 344 // Apto::String 345 // -------------------------------------------------------------------------------------------------------------- 346 347 typedef BasicString<> String; 348 }; 349 350 #endif 351