1 
2 //=============================================================================
3 //
4 // Adventure Game Studio (AGS)
5 //
6 // Copyright (C) 1999-2011 Chris Jones and 2011-20xx others
7 // The full list of copyright holders can be found in the Copyright.txt
8 // file, which is part of this source code distribution.
9 //
10 // The AGS source code is provided under the Artistic License 2.0.
11 // A copy of this license can be found in the file License.txt and at
12 // http://www.opensource.org/licenses/artistic-license-2.0.php
13 //
14 //=============================================================================
15 //
16 // String class with simple memory management and copy-on-write behavior.
17 //
18 // String objects do reference counting and share data buffer on assignment.
19 // The reallocation and copying is done only when the string is modified.
20 // This means that passing string object by value is about as effective, as
21 // passing by reference.
22 //
23 // The copying of memory inside buffer is reduced to minimum. If the string is
24 // truncated, it is not aligned to buffer head each time, instead the c-str
25 // pointer is advanced, or null-terminator is put on the new place. Similarly,
26 // when string is enlarged and new characters are prepended or appended, only
27 // c-str pointer and null-terminator's position are changed, if there's enough
28 // space before and after meaningful string data.
29 //
30 // The class provides means to reserve large amount of buffer space before
31 // making modifications, as well as compacting buffer to minimal size.
32 //
33 // For all methods that expect C-string as parameter - if the null pointer is
34 // passed in place of C-string it is treated in all aspects as a valid empty
35 // string.
36 //
37 //=============================================================================
38 #ifndef __AGS_CN_UTIL__STRING_H
39 #define __AGS_CN_UTIL__STRING_H
40 
41 #include <stdarg.h>
42 #include "core/types.h"
43 #include "debug/assert.h"
44 
45 namespace AGS
46 {
47 namespace Common
48 {
49 
50 class Stream;
51 
52 class String
53 {
54 public:
55     // Standart constructor: intialize empty string
56     String();
57     // Copy constructor
58     String(const String&);
59     // Initialize with C-string
60     String(const char *cstr);
61     // Initialize by copying up to N chars from C-string
62     String(const char *cstr, size_t length);
63     // Initialize by filling N chars with certain value
64     String(char c, size_t count);
65     ~String();
66 
67     // Get underlying C-string for reading
GetCStr()68     inline const char *GetCStr() const
69     {
70         return _meta ? _meta->CStr : "";
71     }
72     // Get character count
GetLength()73     inline size_t GetLength() const
74     {
75         return _meta ? _meta->Length : 0;
76     }
77     // Know if the string is empty (has no meaningful characters)
IsEmpty()78     inline bool IsEmpty() const
79     {
80         return _meta ? _meta->Length == 0 : true;
81     }
82 
83     // Those getters are for tests only, hence ifdef _DEBUG
84 #ifdef _DEBUG
GetData()85     inline const char *GetData() const
86     {
87         return _data;
88     }
89 
GetCapacity()90     inline size_t GetCapacity() const
91     {
92         return _meta ? _meta->Capacity : 0;
93     }
94 
GetRefCount()95     inline size_t GetRefCount() const
96     {
97         return _meta ? _meta->RefCount : 0;
98     }
99 #endif
100 
101     // Read() method implies that string length is initially unknown.
102     // max_chars parameter determine the buffer size limit.
103     // If stop_at_limit flag is set, it will read only up to the max_chars.
104     // Otherwise (by default) hitting the limit won't stop stream reading;
105     // the data will be read until null-terminator or EOS is met, and buffer
106     // will contain only leftmost part of the longer string that fits in.
107     // This method is better fit for reading from binary streams.
108     void    Read(Stream *in, size_t max_chars = 5000000, bool stop_at_limit = false);
109     // ReadCount() reads up to N characters from stream, ignoring null-
110     // terminator. This method is better fit for reading from text
111     // streams, or when the length of string is known beforehand.
112     void    ReadCount(Stream *in, size_t count);
113     // Write() puts the null-terminated string into the stream.
114     void    Write(Stream *out) const;
115     // WriteCount() writes N characters to stream, filling the remaining
116     // space with null-terminators when needed.
117     void    WriteCount(Stream *out, size_t count) const;
118 
119     static void WriteString(const char *cstr, Stream *out);
120 
121     //-------------------------------------------------------------------------
122     // String analysis methods
123     //-------------------------------------------------------------------------
124 
125     // Compares with given C-string
126     int     Compare(const char *cstr) const;
127     int     CompareNoCase(const char *cstr) const;
128     // Compares the leftmost part of this string with given C-string
129     int     CompareLeft(const char *cstr, size_t count = -1) const;
130     int     CompareLeftNoCase(const char *cstr, size_t count = -1) const;
131     // Compares any part of this string with given C-string
132     int     CompareMid(const char *cstr, size_t from, size_t count = -1) const;
133     int     CompareMidNoCase(const char *cstr, size_t from, size_t count = -1) const;
134     // Compares the rightmost part of this string with given C-string
135     int     CompareRight(const char *cstr, size_t count = -1) const;
136     int     CompareRightNoCase(const char *cstr, size_t count = -1) const;
137 
138     // These functions search for character or substring inside this string
139     // and return the index of the (first) character, or -1 if nothing found.
140     size_t  FindChar(char c, size_t from = 0) const;
141     size_t  FindCharReverse(char c, size_t from = -1) const;
142     size_t  FindString(const char *cstr, size_t from = 0) const;
143 
144     // Section methods treat string as a sequence of 'fields', separated by
145     // special character. They search for a substring consisting of all such
146     // 'fields' from the 'first' to the 'last', inclusive; the bounding
147     // separators are optionally included too.
148     // Section indexes are zero-based. The first (0th) section is always
149     // located before the first separator and the last section is always
150     // located after the last separator, meaning that if the outermost
151     // character in string is separator char, there's still an empty trailing
152     // field beyond that.
153     // This also means that there's always at least one section in any string,
154     // even if there are no separating chars.
155     bool    FindSection(char separator, size_t first, size_t last, bool exclude_first_sep, bool exclude_last_sep,
156                         size_t &from, size_t &to) const;
157 
158     // Get Nth character with bounds check (as opposed to subscript operator)
GetAt(size_t index)159     inline char GetAt(size_t index) const
160     {
161         return (_meta && index < _meta->Length) ? _meta->CStr[index] : 0;
162     }
GetLast()163     inline char GetLast() const
164     {
165         return (_meta && _meta->Length > 0) ? _meta->CStr[_meta->Length - 1] : 0;
166     }
167 
168     //-------------------------------------------------------------------------
169     // Value cast methods
170     //-------------------------------------------------------------------------
171 
172     int     ToInt() const;
173 
174     //-------------------------------------------------------------------------
175     // Factory methods
176     //-------------------------------------------------------------------------
177 
178     static String FromFormat(const char *fcstr, ...);
179     static String FromFormatV(const char *fcstr, va_list argptr);
180     // Reads stream until null-terminator or EOS
181     static String FromStream(Stream *in, size_t max_chars = 5000000, bool stop_at_limit = false);
182     // Reads up to N chars from stream
183     static String FromStreamCount(Stream *in, size_t count);
184 
185     // Creates a lowercased copy of the string
186     String  Lower() const;
187     // Creates an uppercased copy of the string
188     String  Upper() const;
189 
190     // Extract N leftmost characters as a new string
191     String  Left(size_t count) const;
192     // Extract up to N characters starting from given index
193     String  Mid(size_t from, size_t count = -1) const;
194     // Extract N rightmost characters
195     String  Right(size_t count) const;
196 
197     // Extract leftmost part, separated by the given char; if no separator was
198     // found returns the whole string
199     String  LeftSection(char separator, bool exclude_separator = true) const;
200     // Extract rightmost part, separated by the given char; if no separator was
201     // found returns the whole string
202     String  RightSection(char separator, bool exclude_separator = true) const;
203     // Extract the range of Xth to Yth fields, separated by the given character
204     String  Section(char separator, size_t first, size_t last,
205                               bool exclude_first_sep = true, bool exclude_last_sep = true) const;
206 
207     //-------------------------------------------------------------------------
208     // String modification methods
209     //-------------------------------------------------------------------------
210 
211     // Ensure string has at least space to store N chars;
212     // this does not change string contents, nor length
213     void    Reserve(size_t max_length);
214     // Ensure string has at least space to store N additional chars
215     void    ReserveMore(size_t more_length);
216     // Make string's buffer as small as possible to hold current data
217     void    Compact();
218 
219     // Append* methods add content at the string's end, increasing its length
220     // Add C-string at string's end
221     void    Append(const char *cstr);
222     // Add single character at string's end
223     void    AppendChar(char c);
224     // Clip* methods decrease the string, removing defined part
225     // Cuts off leftmost N characters
226     void    ClipLeft(size_t count);
227     // Cuts out N characters starting from given index
228     void    ClipMid(size_t from, size_t count = -1);
229     // Cuts off rightmost N characters
230     void    ClipRight(size_t count);
231     // Cuts off leftmost part, separated by the given char; if no separator was
232     // found cuts whole string, leaving empty string
233     void    ClipLeftSection(char separator, bool include_separator = true);
234     // Cuts off rightmost part, separated by the given char; if no separator
235     // was found cuts whole string, leaving empty string
236     void    ClipRightSection(char separator, bool include_separator = true);
237     // Cuts out the range of Xth to Yth fields separated by the given character
238     void    ClipSection(char separator, size_t first, size_t last,
239                               bool include_first_sep = true, bool include_last_sep = true);
240     // Sets string length to zero
241     void    Empty();
242     // Makes a new string by filling N chars with certain value
243     void    FillString(char c, size_t count);
244     // Makes a new string by putting in parameters according to format string
245     void    Format(const char *fcstr, ...);
246     void    FormatV(const char *fcstr, va_list argptr);
247     // Decrement ref counter and deallocate data if must.
248     // Free() should be called only when buffer is not needed anymore;
249     // if string must be truncated to zero length, but retain the allocated
250     // memory, call Empty() instead.
251     void    Free();
252     // Convert string to lowercase equivalent
253     void    MakeLower();
254     // Convert string to uppercase equivalent
255     void    MakeUpper();
256     // Prepend* methods add content before the string's head, increasing its length
257     // Add C-string before string's head
258     void    Prepend(const char *cstr);
259     // Add single character before string's head
260     void    PrependChar(char c);
261     // Replaces all occurences of one character with another character
262     void    Replace(char what, char with);
263     // Replaces particular substring with another substring; new substring
264     // may have different length
265     void    ReplaceMid(size_t from, size_t count, const char *cstr);
266     // Overwrite the Nth character of the string; does not change string's length
267     void    SetAt(size_t index, char c);
268     // Makes a new string by copying up to N chars from C-string
269     void    SetString(const char *cstr, size_t length = -1);
270     // For all Trim functions, if given character value is 0, all whitespace
271     // characters (space, tabs, CRLF) are removed.
272     // Remove heading and trailing characters from the string
273     void    Trim(char c = 0);
274     // Remove heading characters from the string;
275     void    TrimLeft(char c = 0);
276     // Remove trailing characters from the string
277     void    TrimRight(char c = 0);
278     // Truncate* methods decrease the string to the part of itself
279     // Truncate the string to the leftmost N characters
280     void    TruncateToLeft(size_t count);
281     // Truncate the string to the middle N characters
282     void    TruncateToMid(size_t from, size_t count = -1);
283     // Truncate the string to the rightmost N characters
284     void    TruncateToRight(size_t count);
285     // Truncate the string to the leftmost part, separated by the given char;
286     // if no separator was found leaves string unchanged
287     void    TruncateToLeftSection(char separator, bool exclude_separator = true);
288     // Truncate the string to the rightmost part, separated by the given char;
289     // if no separator was found leaves string unchanged
290     void    TruncateToRightSection(char separator, bool exclude_separator = true);
291     // Truncate the string to range of Xth to Yth fields separated by the
292     // given character
293     void    TruncateToSection(char separator, size_t first, size_t last,
294                               bool exclude_first_sep = true, bool exclude_last_sep = true);
295 
296     //-------------------------------------------------------------------------
297     // Operators
298     //-------------------------------------------------------------------------
299 
300     inline operator const char *() const
301     {
302         return GetCStr();
303     }
304     // Assign String by sharing data reference
305     String &operator=(const String&);
306     // Assign C-string by copying contents
307     String &operator=(const char *cstr);
308     inline char operator[](size_t index) const
309     {
310         assert(_meta && index < _meta->Length);
311         return _meta->CStr[index];
312     }
313     inline bool operator==(const char *cstr) const
314     {
315         return Compare(cstr) == 0;
316     }
317     inline bool operator!=(const char *cstr) const
318     {
319         return Compare(cstr) != 0;
320     }
321     inline bool operator <(const char *cstr) const
322     {
323         return Compare(cstr) < 0;
324     }
325 
326 private:
327     // Creates new empty string with buffer enough to fit given length
328     void    Create(size_t buffer_length);
329     // Release string and copy data to the new buffer
330     void    Copy(size_t buffer_length, size_t offset = 0);
331     // Aligns data at given offset
332     void    Align(size_t offset);
333 
334     // Ensure this string is a compact independent copy, with ref counter = 1
335     void    BecomeUnique();
336     // Ensure this string is independent, and there's enough space before
337     // or after the current string data
338     void    ReserveAndShift(bool left, size_t more_length);
339 
340     struct Header
341     {
342         Header();
343 
344         size_t  RefCount;   // reference count
345         // Capacity and Length do not include null-terminator
346         size_t  Capacity;   // available space, in characters
347         size_t  Length;     // used space
348         char    *CStr;      // pointer to string data start
349     };
350 
351     union
352     {
353         char    *_data;
354         Header  *_meta;
355     };
356 
357     static const size_t _internalBufferLength = 3000;
358     static char _internalBuffer[3001];
359 };
360 
361 } // namespace Common
362 } // namespace AGS
363 
364 #endif // __AGS_CN_UTIL__STRING_H
365