1 /*
2 * cString.h
3 * Avida
4 *
5 * Called "cstringh" prior to 12/7/05.
6 * Copyright 1999-2011 Michigan State University. All rights reserved.
7 * Copyright 1993-2003 California Institute of Technology.
8 *
9 *
10 * This file is part of Avida.
11 *
12 * Avida is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
14 *
15 * Avida is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public License along with Avida.
19 * If not, see <http://www.gnu.org/licenses/>.
20 *
21 */
22
23 #ifndef cString_h
24 #define cString_h
25
26 #include <cstdlib>
27 #include <iostream>
28 #include <cstdarg>
29 #include <string>
30 #include <cstring>
31 #include <cassert>
32
33 #include "cRCObject.h"
34 #include "tRCPtr.h"
35
36 #define MAX_STRING_LENGTH 4096
37 #define CONTINUE_LINE_CHAR '\\'
38
39 /**
40 * A multipurpose string class with many convenient methods of
41 * manipulating and comparing strings.
42 **/
43
44 class cString
45 {
46 protected:
47 inline void CopyOnWrite();
48
49 // -- Contained Classes --
50 private:
51 // Declarations (only needed)
52 class cStringData;
53
54 // cCharProxy -- To detect rvalue vs lvalue ---------------------
55 class cCharProxy
56 {
57 private:
58 cString& string;
59 int index;
60
61 public:
cCharProxy(cString & _string,int _index)62 cCharProxy(cString& _string, int _index) : string(_string), index(_index) { ; }
63
64 inline cCharProxy& operator=(char c); // lvalue
65 inline cCharProxy& operator+=(char c); // lvalue
66 inline cCharProxy& operator-=(char c); // lvalue
67 inline cCharProxy& operator++(); // lvalue (prefix)
68 inline char operator++(int dummy); // lvalue (postfix)
69 inline cCharProxy& operator--(); // lvalue (prefix)
70 inline char operator--(int dummy); // lvalue (postfix)
71 inline operator char () const; // rvalue
72 };
73 friend class cCharProxy; // Telling rvalue vs lvalue ....
74
75 // cStringData -- Holds the actual data and is reference count --
76 class cStringData : public cRCObject
77 {
78 // NOTE: Terminating NULL is always there (you can't assign!!)
79 private:
80 int m_size; // size of data (NOT INCLUDING TRAILING NULL)
81 char* m_data;
82
83 cStringData(); // @not_implemented
84
85 public:
86 explicit cStringData(int in_size);
87 cStringData(int in_size, const char* in);
88 cStringData(const cStringData& in);
89
~cStringData()90 ~cStringData()
91 {
92 delete [] m_data;
93 }
94
95 cStringData& operator=(const cStringData& in)
96 {
97 delete [] m_data;
98 m_size = in.GetSize();
99 m_data = new char [m_size + 1];
100 assert(m_data != NULL); // Memory Allocation Error: Out of Memory
101 for(int i = 0; i < m_size; ++i) m_data[i] = in[i];
102 m_data[m_size] = '\0';
103 return (*this);
104 }
105
GetSize()106 int GetSize() const { return m_size; }
GetData()107 const char* GetData() const { return m_data; }
108
109 char operator[] (int index) const
110 {
111 assert(index >= 0); // Lower Bounds Error
112 assert(index <= m_size); // Upper Bounds Error
113 return m_data[index];
114 }
115
116 char& operator[](int index)
117 {
118 assert(index >= 0); // Lower Bounds Error
119 assert(index <= m_size); // Upper Bounds Error
120 assert(index != m_size); // Cannot Change Terminating NULL
121 return m_data[index];
122 }
123 };
124
125 public:
126 cString(const char* in_str = "")
127 {
128 if (in_str) {
129 value = new cStringData(strlen(in_str), in_str);
130 } else {
131 value = new cStringData(0, "");
132 }
133 assert( value ); // Memory Allocation Error: Out of Memory
134 }
cString(const char * in,int in_size)135 cString(const char* in, int in_size) : value(new cStringData(in_size, in))
136 {
137 assert(in_size >= 0);
138 assert( in != NULL ); // NULL input string
139 assert( value ); // Memory Allocation Error: Out of Memory
140 }
cString(const int size)141 explicit cString(const int size) : value(new cStringData(size))
142 {
143 assert( value ); // Memory Allocation Error: Out of Memory
144 }
cString(const cString & in_str)145 cString(const cString& in_str) :value(in_str.value) { ; }
146
~cString()147 ~cString() { ; }
148
149
150 // Cast to const char *
151 operator const char* () const { return value->GetData(); }
152
153 // Assignment Operators
154 cString& operator=(const cString& in_str) { value = in_str.value; return *this; }
155 cString& operator=(const char* in)
156 {
157 assert( in != NULL ); // NULL input string
158 value = new cStringData(strlen(in),in);
159 assert(value); // Memory Allocation Error: Out of Memory
160 return *this;
161 }
162
163
164 /**
165 * Get the size of the string (not including the terminating '\0').
166 **/
GetSize()167 int GetSize() const { return value->GetSize(); }
168
169 /**
170 * Get the string
171 **/
GetData()172 const char* GetData() const { return value->GetData(); }
173
174
175 // Comparisons
176 int Compare(const char * in) const; // strcmp like function
177 bool operator== (const char * in) const { return (Compare(in)==0); }
178 bool operator== (const cString & in) const; // A bit optimized
179 bool operator!= (const char * in) const { return !(*this==in); }
180 bool operator< (const char * in) const { return (Compare(in)<0); }
181 bool operator> (const char * in) const { return (Compare(in)>0); }
182 bool operator<= (const char * in) const { return (Compare(in)<=0); }
183 bool operator>= (const char * in) const { return (Compare(in)>=0); }
184
185 // Concatenation
186 cString & operator+= (const char in) { return AppendStr(1,&in); }
187 cString & operator+= (const char * in){ return AppendStr(strlen(in),in); }
188 cString & operator+= (const cString & in){return AppendStr(in.GetSize(),in);}
189 cString operator+ (const char in_char){ return (cString(*this) += in_char); }
190 cString operator+ (const char * in) { return (cString(*this) += in); }
191 cString operator+ (const cString & in){ return (cString(*this) += in); }
192
193
194 // Additional modifiers
195 cString& Set(const char* fmt, ...);
196 cString& Set(const char* fmt, va_list args);
197
198 cString& Insert(const char in, int pos = 0, int excise = 0) { return InsertStr(1, &in, pos, excise); }
199 cString& Insert(const char* in, int pos = 0, int excise = 0) { return InsertStr(strlen(in), in, pos, excise); }
200 cString& Insert(const cString& in, int pos = 0, int excise = 0) { return InsertStr(in.GetSize(), in, pos, excise); }
201
202
203 // Removes 'size' characters from 'pos' (default size = to end of string)
204 cString& Clip(int pos, int size = -1 /*end of string*/ )
205 { if( size < 0 ) size = GetSize() - pos; return InsertStr(0, NULL, pos, size); }
ClipFront(int size)206 cString& ClipFront(int size) { /* Clip off first 'clip_size' chars */ return InsertStr(0, NULL, 0, size); }
ClipEnd(int size)207 cString& ClipEnd(int size) { /* Clip off last 'clip_size' chars */ return InsertStr(0, NULL, GetSize() - size, size); }
208
209 /**
210 * Find and replace a substring in the string with a different substring.
211 * If the substring is not found, the string object is not changed.
212 *
213 * @return The position at which the substring was found, or -1 if it wasn't found.
214 * @param old_string The substring that is going to be replaced.
215 * @param new_string The replacement.
216 * @param pos The position at which the search should start.
217 **/
218 int Replace(const cString& old_st, const cString& new_st, int pos = 0);
219
220 cString Pop(const char delim); // Remove and return up to delim char
221
222 /**
223 * Remove the first word.
224 *
225 * @return The removed word.
226 **/
227 cString PopWord();
228
229 /**
230 * Remove the first line.
231 *
232 * @return The removed line.
233 **/
PopLine()234 cString PopLine() { return Pop('\n'); }
235
236 /**
237 * Remove begining whitespace.
238 *
239 * @return The number of characters removed.
240 **/
241 int LeftJustify();
242
243 /**
244 * Remove ending whitespace.
245 *
246 * @return The number of characters removed.
247 **/
248 int RightJustify();
249
250 /**
251 * Remove beginning and ending whitespace.
252 **/
253 void Trim();
254
255 /**
256 * Reverse the order of the characters in the string.
257 **/
258 void Reverse();
259
260 /**
261 * Convert the string to lowercase.
262 **/
263 cString& ToLower();
264
265 /**
266 * Convert the string to uppercase.
267 **/
268 cString& ToUpper();
269
270 /**
271 * Replace all blocks of whitespace with a single space (' ').
272 *
273 * @see cString::IsWhitespace()
274 **/
275 void CompressWhitespace();
276
277 /**
278 * Get rid of all(!) whitespace.
279 *
280 * @see cString::IsWhitespace()
281 **/
282 void RemoveWhitespace();
283
284 /**
285 * Get rid of all occurances of a specific character.
286 *
287 * @see cString::RemoveWhitespace()
288 **/
289 void RemoveChar(char out_char);
290
291 /**
292 * Get rid of one character at a specific location
293 **/
294 void RemovePos(int pos);
295
296
297 // Parse for and replace escape sequences within the string
298 cString& ParseEscapeSequences();
299
300
301 // Individal Char Access
302 inline char operator[] (int index) const { return static_cast<char>((*value)[index]); }
303 cCharProxy operator[] (int index) { return cCharProxy(*this,index); }
304
305
306 /**
307 * Convert string to int.
308 *
309 * @return The integer value corresponding to the string.
310 **/
AsInt()311 int AsInt() const { return static_cast<int>(strtol(*this, NULL, 0)); }
312
313 /**
314 * Convert string to double.
315 *
316 * @return The double value corresponding to the string.
317 **/
AsDouble()318 double AsDouble() const { return strtod(*this, NULL); }
319
320 // Accessors & Information
321 /**
322 * Tests whether the string is empty.
323 **/
IsEmpty()324 bool IsEmpty() const { return GetSize() == 0; } // Can just call GetSize
325
326 /**
327 * Test if the continuation character is at the end of the line
328 * if it is strip off the charecter and return true else return
329 * false
330 **/
331 bool IsContinueLine();
332
333 /**
334 * Counts whitespace, beginning at the given position.
335 *
336 * @param start The index at which counting should begin.
337 **/
338 int CountWhitespace(int start=0) const;
339
340 /**
341 * Counts non-whitespace, beginning at the given position.
342 *
343 * @param start The index at which counting should begin.
344 **/
345 int CountWordsize(int start=0) const;
346
347 /**
348 * Counts until the first occurrence of '\n', beginning at the
349 * given position.
350 *
351 * @param start The index at which counting should begin.
352 **/
353 int CountLinesize(int start=0) const;
354
355 /**
356 * Counts the number of lines in a string.
357 **/
358 int CountNumLines() const;
359
360 /**
361 * Counts the number of separate words in a string.
362 **/
363 int CountNumWords() const;
364
365 /**
366 * Get a specific word from a string.
367 *
368 * @param word_id The number of the word, counted from the beginning of
369 * the string, starting with 0.
370 **/
371 cString GetWord(int word_id=0) const;
372
373 /**
374 * Get the next word after the specified position. Any leading whitespace
375 * is removed.
376 *
377 * @param start The position at which the function should start
378 * searching for a word.
379 **/
380 cString GetWordAt(int start=0) const;
381
382 /**
383 * Test if a character is whitespace. Currently, as whitespace count
384 * ' ', '\r', '\t', '\n'.
385 *
386 * @param pos The position of the character to test.
387 **/
IsWhitespace(int pos)388 bool IsWhitespace(int pos) const {
389 return ( (*this)[pos] == ' ' || (*this)[pos] == '\t' ||
390 (*this)[pos] == '\r' || (*this)[pos] == '\n' );
391 }
392
393 /**
394 * Test if a character is a capital letter.
395 *
396 * @param pos The position of the character to test.
397 **/
IsUpperLetter(int pos)398 bool IsUpperLetter(int pos) const {
399 return ((*this)[pos] >= 'A' && (*this)[pos] <= 'Z');
400 }
401
402 /**
403 * Test if a character is not a capital letter.
404 *
405 * @param pos The position of the character to test.
406 **/
IsLowerLetter(int pos)407 bool IsLowerLetter(int pos) const {
408 return ((*this)[pos] >= 'a' && (*this)[pos] <= 'z');
409 }
410
411 /**
412 * Test if a character is a letter.
413 *
414 * @param pos The position of the character to test.
415 **/
IsLetter(int pos)416 bool IsLetter(int pos) const {
417 return IsUpperLetter(pos) || IsLowerLetter(pos);
418 }
419
420 /**
421 * Test if a character is a number (this includes expressions
422 * such as -3.4e5).
423 *
424 * @param pos The position of the character to test.
425 **/
IsNumber(int pos)426 bool IsNumber(int pos) const {
427 return ( ( (*this)[pos] >= '0' && (*this)[pos] <= '9' ) ||
428 (*this)[pos] == '-' || (*this)[pos] == '+' ||
429 (*this)[pos] == '.' || (*this)[pos] == 'e' ||
430 (*this)[pos] == 'E' );
431 }
432
433 /**
434 * Test if a character is a numeral (0, 1, ..., 9).
435 *
436 * @param pos The position of the character to test.
437 **/
IsNumeric(int pos)438 bool IsNumeric(int pos) const {
439 return ((*this)[pos] >= '0' && (*this)[pos] <= '9');
440 }
441
442 /**
443 * Test if a character is either a numeral or a letter.
444 *
445 * @param pos The position of the character to test.
446 **/
IsAlphaNumeric(int pos)447 bool IsAlphaNumeric(int pos) const {
448 return IsLetter(pos) || IsNumber(pos);
449 }
450
451 /**
452 * Test whether the complete string consits only of whitespace.
453 **/
454 bool IsWhitespace() const;
455
456 /**
457 * Test whether the complete string consits only of uppercase letters.
458 **/
459 bool IsUpperLetter() const;
460
461 /**
462 * Test whether the complete string consits only of lowercase letters.
463 **/
464 bool IsLowerLetter() const;
465
466 /**
467 * Test whether the complete string consits only of letters.
468 **/
469 bool IsLetter() const;
470
471 /**
472 * Test whether the complete string can be seen as a number.
473 **/
474 bool IsNumber() const;
475
476 /**
477 * Test whether the complete string consits only of numerals.
478 **/
479 bool IsNumeric() const;
480
481 /**
482 * Test whether the complete string consits only of letters or numerals.
483 **/
484 bool IsAlphaNumeric() const;
485
486 /**
487 * Search for a single character.
488 *
489 * @return The first occurence after pos, or -1 if not found
490 **/
491 int Find(char in_char, int pos=0) const;
492
493 /**
494 * Search for a substring.
495 *
496 * @return The first occurence after pos, or -1 if not found
497 **/
498 int Find(const char * in, int pos=0) const{
499 return FindStr(in, strlen(in), pos); }
500
501 /**
502 * Search for a substring.
503 *
504 * @return The first occurence after pos, or -1 if not found
505 **/
506 int Find(const cString & in, int pos=0) const{
507 return FindStr(in, in.GetSize(), pos); }
508
509 /**
510 * Search for a word.
511 *
512 * @return The first occurence after pos, or -1 if not found
513 **/
514 int FindWord(const cString & in, int pos=0) const;
515
516
517 /**
518 * Cut out a substring.
519 *
520 * @return The substring.
521 * @param start The beginning of the substring in the string.
522 * @param size The number of characters in the substring.
523 **/
524 cString Substring(int start, int size) const ;
525
526 /**
527 * Determine if in_string is a substring of this string.
528 *
529 * @return Is this a substring?
530 * @param in_string the string to test.
531 * @param start The beginning of the substring in the string.
532 **/
533 bool IsSubstring(const cString & in_string, int start) const;
534
535 /**
536 * Clip a portion of the string and output it.
537 *
538 * @return Removed substring.
539 * @param pos the position to start the ejection.
540 * @param excise number of sites to eject.
541 **/
542 cString EjectStr(int pos, int excise);
543
544
545 // {{{ -- INTERNALS -------------------------------------------------------
546 protected:
547 // -- Internal Functions --
548
549 // Methods that take input string size (unsafe to call from outside)
550 cString& AppendStr(const int in_size, const char* in); // Optimized
551 cString& InsertStr(const int in_size, const char* in, int pos, int excise=0);
552 int FindStr(const char* in_string, const int in_size, int pos) const;
553
554 // -- Internal Data --
555 protected:
556 tRCPtr<cStringData> value;
557
558 // }}} End Internals
559 };
560
561
562 // {{{ ** External cString Functions **
563
564 // iostream input
565 std::istream& operator >> (std::istream& in, cString& string);
566 std::ostream& operator << (std::ostream& out, const cString& string);
567
568 // }}}
569
570 // -- INLINE INCLUDES --
571
CopyOnWrite()572 void cString::CopyOnWrite()
573 {
574 if (!value->SetExclusive()) { // if it is shared
575 value = new cStringData(*value); // make own copy of value
576 }
577 }
578
579 cString::cCharProxy & cString::cCharProxy::operator= (char c){ // lvalue
580 string.CopyOnWrite();
581 (*(string.value))[index] = c;
582 return *this;
583 }
584
585 cString::cCharProxy & cString::cCharProxy::operator+= (char c){ // lvalue
586 string.CopyOnWrite();
587 (*(string.value))[index] += c;
588 return *this;
589 }
590
591 cString::cCharProxy & cString::cCharProxy::operator-= (char c){ // lvalue
592 string.CopyOnWrite();
593 (*(string.value))[index] -= c;
594 return *this;
595 }
596
597 cString::cCharProxy & cString::cCharProxy::operator++ (){ // lvalue (prefix)
598 string.CopyOnWrite();
599 ++(*(string.value))[index];
600 return *this;
601 }
602
603 char cString::cCharProxy::operator++ (int dummy){ // lvalue (postfix)
604 (void)dummy;
605 char rv = (*(string.value))[index];
606 string.CopyOnWrite();
607 ++(*(string.value))[index];
608 return rv;
609 }
610
611 cString::cCharProxy & cString::cCharProxy::operator-- (){ // lvalue (prefix)
612 string.CopyOnWrite();
613 --(*(string.value))[index];
614 return *this;
615 }
616
617 char cString::cCharProxy::operator-- (int dummy){ // lvalue (postfix)
618 (void)dummy;
619 char rv = (*(string.value))[index];
620 string.CopyOnWrite();
621 --(*(string.value))[index];
622 return rv;
623 }
624
625 cString::cCharProxy::operator char () const { // rvalue
626 return static_cast<char>((*(string.value))[index]);
627 }
628
629 #endif
630