1 /*
2  *  String.h
3  *  Apto
4  *
5  *  Created by David on 2/10/11.
6  *  Copyright 2011 David Michael Bryson. All rights reserved.
7  *  http://programerror.com/software/apto
8  *
9  *  Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
10  *  following conditions are met:
11  *
12  *  1.  Redistributions of source code must retain the above copyright notice, this list of conditions and the
13  *      following disclaimer.
14  *  2.  Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
15  *      following disclaimer in the documentation and/or other materials provided with the distribution.
16  *  3.  Neither the name of David Michael Bryson, nor the names of contributors may be used to endorse or promote
17  *      products derived from this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY DAVID MICHAEL BRYSON AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20  *  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  *  DISCLAIMED. IN NO EVENT SHALL DAVID MICHAEL BRYSON OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22  *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  *  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
24  *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25  *  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  *  Authors: David M. Bryson <david@programerror.com>
28  *
29  */
30 
31 #ifndef AptoCoreString_h
32 #define AptoCoreString_h
33 
34 #include "apto/core/Definitions.h"
35 #include "apto/core/RefCount.h"
36 #include "apto/core/SmartPtr.h"
37 #include "apto/core/TypeUtil.h"
38 
39 #include <cassert>
40 #include <string.h>
41 
42 
43 namespace Apto {
44 
45   // Basic String
46   // --------------------------------------------------------------------------------------------------------------
47 
48   template <class ThreadingModel = SingleThreaded> class BasicString
49   {
50   public:
51     typedef char ValueType;
52     class Iterator;
53     class ConstIterator;
54 
55   protected:
56     class StringRep;
57     typedef SmartPtr<StringRep, InternalRCObject> StringRepPtr;
58 
59   protected:
60     StringRepPtr m_value;
61 
BasicString(StringRepPtr rep)62     BasicString(StringRepPtr rep) : m_value(rep) { ; }
63 
64   public:
65     // Construction
66     BasicString(const char* str = "")
67       : m_value((str) ? new StringRep(static_cast<int>(strlen(str)), str) : new StringRep(0)) { assert(m_value); }
BasicString(int size,const char * str)68     BasicString(int size, const char* str) : m_value(new StringRep(size, str)) { assert(m_value); }
BasicString(const BasicString & rhs)69     BasicString(const BasicString& rhs) : m_value(rhs.m_value) { ; }
BasicString(const BasicString<T1> & rhs)70     template <class T1> BasicString(const BasicString<T1>& rhs) : m_value(new StringRep(rhs.GetSize(), rhs.GetData())) { ; }
71 
~BasicString()72     ~BasicString() { ; }
73 
74 
75     // Property Access
GetSize()76     inline int GetSize() const { return m_value->GetSize(); }
GetData()77     inline const char* GetData() const { return m_value->GetRep(); }
GetCString()78     inline const char* GetCString() const { return m_value->GetRep(); }
79     inline operator const char*() const { return m_value->GetRep(); }
80 
81 
82     // Assignment
83     inline BasicString& operator=(const BasicString& rhs) { m_value = rhs.m_value; return *this; }
84     template <class T1> inline BasicString& operator=(const BasicString<T1>& rhs)
85     {
86       m_value = new StringRep(rhs.GetSize(), rhs.GetData());
87       assert(m_value);
88       return *this;
89     }
90 
91     inline BasicString& operator=(const char* rhs)
92     {
93       assert(rhs);
94       m_value = StringRepPtr(new StringRep(static_cast<int>(strlen(rhs)), rhs));
95       assert(m_value);
96       return *this;
97     }
98 
99 
100     // Comparison Operators
Compare(const char * str)101     int Compare(const char* str) const
102     {
103       assert(str);
104       int i;
105       for (i = 0; i < GetSize() && str[i] != '\0' && (*this)[i] == str[i]; i++) ;
106 
107       if (i == GetSize() && str[i] == '\0') return 0;
108       if (i < GetSize() && str[i] < (*this)[i]) return 1;
109       return -1;
110     }
111 
112     bool operator==(const BasicString& rhs) const
113     {
114       if (rhs.GetSize() != GetSize()) return false;
115       for (int i = 0; i < GetSize(); i++) if ((*this)[i] != rhs[i]) return false;
116       return true;
117     }
118     inline bool operator==(const char* rhs) const { return Compare(rhs) == 0; }
119 
120     template <class R>
121     inline bool operator!=(const BasicString<R>& rhs) const { return !operator==(rhs); }
122     inline bool operator!=(const char* rhs) const { return Compare(rhs) != 0; }
123 
124     template <class R>
125     inline bool operator<(const BasicString<R>& rhs) const { return Compare(rhs) < 0; }
126     inline bool operator<(const char* rhs) const { return Compare(rhs) < 0; }
127 
128     template <class R>
129     inline bool operator>(const BasicString<R>& rhs) const { return Compare(rhs) > 0; }
130     inline bool operator>(const char* rhs) const { return Compare(rhs) > 0; }
131 
132     template <class R>
133     inline bool operator<=(const BasicString<R>& rhs) const { return Compare(rhs) <= 0; }
134     inline bool operator<=(const char* rhs) const { return Compare(rhs) <= 0; }
135 
136     template <class R>
137     inline bool operator>=(const BasicString<R>& rhs) const { return Compare(rhs) >= 0; }
138     inline bool operator>=(const char* rhs) const { return Compare(rhs) >= 0; }
139 
140 
141     // Character access
142     inline char operator[](int index) const { return m_value->operator[](index); }
143 
144 
145     // Concatenation
146     inline BasicString& operator+=(const char c) { return append(1, &c); }
147     inline BasicString& operator+=(const char* str) { return append(static_cast<int>(strlen(str)), str); }
148     inline BasicString& operator+=(const BasicString& str) { return append(str.GetSize(), str.GetData()); }
149     inline BasicString operator+(const char c) { return concat(1, &c); }
150     inline BasicString operator+(const char* str) { return concat(static_cast<int>(strlen(str)), str); }
151     inline BasicString operator+(const BasicString& str) { return concat(str.GetSize(), str.GetData()); }
152 
153 
154     // Substring extraction
155     inline BasicString Substring(int idx = 0, int length = -1) const
156     {
157       assert(idx >= 0);
158       assert(idx < GetSize());
159       assert(length <= (GetSize() - idx));
160       return BasicString((length >= 0) ? length : (GetSize() - idx), m_value->GetRep() + idx);
161     }
162 
Pop(const char delimeter)163     BasicString Pop(const char delimeter)
164     {
165       StringRepPtr value = m_value;
166       for (int i = 0; i < GetSize(); i++) {
167         if (m_value->operator[](i) == delimeter) {
168           *this = Substring(i + 1);
169           return BasicString(i, value->GetRep());
170         }
171       }
172       BasicString rtn(*this);
173       *this = BasicString();
174       return rtn;
175     }
176 
177 
178     // Various Character Inspection Utility Methods
IsLetter(int idx)179     inline bool IsLetter(int idx) const { return IsUpper(idx) || IsLower(idx); }
IsLower(int idx)180     inline bool IsLower(int idx) const { return ((*this)[idx] >= 'a' && (*this)[idx] <= 'z'); }
IsUpper(int idx)181     inline bool IsUpper(int idx) const { return ((*this)[idx] >= 'A' && (*this)[idx] <= 'Z'); }
IsNumber(int idx)182     inline bool IsNumber(int idx) const { return (*this)[idx] >= '0' && (*this)[idx] <= '9'; }
IsWhitespace(int idx)183     inline bool IsWhitespace(int idx) const
184     {
185       return (*this)[idx] == ' ' ||   // space
186              (*this)[idx] == '\f' ||  // form feed
187              (*this)[idx] == '\n' ||  // newline
188              (*this)[idx] == '\r' ||  // carriage return
189              (*this)[idx] == '\t' ||  // horizontal tab
190              (*this)[idx] == '\v';    // vertical tab
191     }
192 
193 
194     // Iterators
Begin()195     Iterator Begin() { return Iterator(m_value); }
Begin()196     ConstIterator Begin() const { return ConstIterator(m_value); }
197 
198   protected:
199     // Internal Support Methods
append(int size,const char * str)200     BasicString& append(int size, const char* str)
201     {
202       assert(size == 0 || str != NULL);
203       StringRepPtr newstr(new StringRep(size + GetSize()));
204       assert(newstr);
205       for (int i = 0; i < GetSize(); i++) newstr->operator[](i) = m_value->operator[](i);
206       for (int i = 0; i < size; i++) newstr->operator[](i + GetSize()) = str[i];
207       m_value = newstr;
208       return (*this);
209     }
210 
concat(int size,const char * str)211     BasicString concat(int size, const char* str)
212     {
213       if (size == 0) return BasicString(*this);
214       assert(str != NULL);
215       StringRepPtr newstr(new StringRep(size + GetSize()));
216       for (int i = 0; i < GetSize(); i++) newstr->operator[](i) = m_value->operator[](i);
217       for (int i = 0; i < size; i++) newstr->operator[](i + GetSize()) = str[i];
218       return BasicString(newstr);
219     }
220 
221 
222     class StringRep : public TypeSelect<ThreadingModel::UseThreadSafe, MTRefCountObject, RefCountObject>::Result
223     {
224     private:
225       int m_size;
226       char* m_data;
227 
228     public:
m_size(size)229       inline explicit StringRep(int size = 0) : m_size(size), m_data(new char[size + 1])
230       {
231         assert(m_data);
232         m_data[0] = '\0';
233         m_data[size] = '\0';
234       }
StringRep(int size,const char * str)235       inline StringRep(int size, const char* str) : m_size(size), m_data(new char[size + 1])
236       {
237         assert(m_data);
238         memcpy(m_data, str, m_size);
239         m_data[size] = '\0';
240       }
StringRep(const StringRep & rhs)241       inline StringRep(const StringRep& rhs) : m_size(rhs.m_size), m_data(new char[m_size + 1])
242       {
243         assert(m_data);
244         memcpy(m_data, rhs.m_data, m_size);
245         m_data[m_size] = '\0';
246       }
247 
~StringRep()248       ~StringRep() { delete [] m_data; }
249 
GetSize()250       inline int GetSize() const { return m_size; }
GetRep()251       inline const char* GetRep() const { return m_data; }
252 
253       char operator[](int index) const { return m_data[index]; }
254       char& operator[](int index) { return m_data[index]; }
255     };
256 
257 
258   public:
259     class Iterator
260     {
261       friend class BasicString<ThreadingModel>;
262 
263     private:
264       StringRepPtr m_value;
265       int m_index;
266 
Iterator(StringRepPtr value)267       inline Iterator(StringRepPtr value) : m_value(value), m_index(-1) { ; }
268 
269     public:
Get()270       inline const char* Get()
271       {
272         return (m_index < 0 || m_index >= m_value->GetSize()) ? NULL : &m_value->operator[](m_index);
273       }
274 
Next()275       inline const char* Next()
276       {
277         if (m_index == m_value->GetSize()) return NULL;
278         return (++m_index == m_value->GetSize()) ? NULL : &m_value->operator[](m_index);
279       }
280     };
281 
282     class ConstIterator
283     {
284       friend class BasicString<ThreadingModel>;
285 
286     private:
287       StringRepPtr m_value;
288       int m_index;
289 
ConstIterator(StringRepPtr value)290       inline ConstIterator(StringRepPtr value) : m_value(value), m_index(-1) { ; }
291 
292     public:
Get()293       inline const char* Get()
294       {
295         return (m_index < 0 || m_index >= m_value->GetSize()) ? NULL : &m_value->operator[](m_index);
296       }
297 
Next()298       inline const char* Next()
299       {
300         if (m_index == m_value->GetSize()) return NULL;
301         return (++m_index == m_value->GetSize()) ? NULL : &m_value->operator[](m_index);
302       }
303     };
304   };
305 
306   template <class ThreadingModel>
307   inline bool operator==(const char* lhs, const BasicString<ThreadingModel>& rhs) { return rhs == lhs; }
308   template <class ThreadingModel>
309   inline bool operator!=(const char* lhs, const BasicString<ThreadingModel>& rhs) { return rhs != lhs; }
310 
311   template <class ThreadingModel>
312   inline bool operator<(const char* lhs, const BasicString<ThreadingModel>& rhs) { return rhs > lhs; }
313   template <class ThreadingModel>
314   inline bool operator>(const char* lhs, const BasicString<ThreadingModel>& rhs) { return rhs < lhs; }
315 
316   template <class ThreadingModel>
317   inline bool operator<=(const char* lhs, const BasicString<ThreadingModel>& rhs) { return rhs >= lhs; }
318   template <class ThreadingModel>
319   inline bool operator>=(const char* lhs, const BasicString<ThreadingModel>& rhs) { return rhs <= lhs; }
320 
321 
322   // Basic String Hashing Support
323   // --------------------------------------------------------------------------------------------------------------
324 
325   // HASH_TYPE = BasicString<ThreadingModel>
326   // We hash a string simply by adding up the individual character values in
327   // that string and modding by the hash size.  For most applications this
328   // will work fine (and reasonably fast!) but some patterns will cause all
329   // strings to go into the same cell.  For example, "ABC"=="CBA"=="BBB".
330   template <class T, int HashFactor> class HashKey;
331   template <class ThreadingModel, int HashFactor> class HashKey<BasicString<ThreadingModel>, HashFactor>
332   {
333   public:
Hash(const BasicString<ThreadingModel> & key)334     static int Hash(const BasicString<ThreadingModel>& key)
335     {
336       unsigned int out_hash = 0;
337       for (int i = 0; i < key.GetSize(); i++)
338         out_hash += (unsigned int) key[i];
339       return out_hash % HashFactor;
340     }
341   };
342 
343 
344   // Apto::String
345   // --------------------------------------------------------------------------------------------------------------
346 
347   typedef BasicString<> String;
348 };
349 
350 #endif
351