1 /*
2  *  cString.h
3  *  Avida
4  *
5  *  Called "cstringh" prior to 12/7/05.
6  *  Copyright 1999-2011 Michigan State University. All rights reserved.
7  *  Copyright 1993-2003 California Institute of Technology.
8  *
9  *
10  *  This file is part of Avida.
11  *
12  *  Avida is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License
13  *  as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
14  *
15  *  Avida is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.
17  *
18  *  You should have received a copy of the GNU Lesser General Public License along with Avida.
19  *  If not, see <http://www.gnu.org/licenses/>.
20  *
21  */
22 
23 #ifndef cString_h
24 #define cString_h
25 
26 #include <cstdlib>
27 #include <iostream>
28 #include <cstdarg>
29 #include <string>
30 #include <cstring>
31 #include <cassert>
32 
33 #include "cRCObject.h"
34 #include "tRCPtr.h"
35 
36 #define MAX_STRING_LENGTH 4096
37 #define CONTINUE_LINE_CHAR '\\'
38 
39 /**
40  * A multipurpose string class with many convenient methods of
41  * manipulating and comparing strings.
42  **/
43 
44 class cString
45 {
46 protected:
47   inline void CopyOnWrite();
48 
49   // -- Contained Classes --
50 private:
51   // Declarations (only needed)
52   class cStringData;
53 
54   // cCharProxy -- To detect rvalue vs lvalue ---------------------
55   class cCharProxy
56   {
57   private:
58     cString& string;
59     int index;
60 
61   public:
cCharProxy(cString & _string,int _index)62     cCharProxy(cString& _string, int _index) : string(_string), index(_index) { ; }
63 
64     inline cCharProxy& operator=(char c);     // lvalue
65     inline cCharProxy& operator+=(char c);    // lvalue
66     inline cCharProxy& operator-=(char c);    // lvalue
67     inline cCharProxy& operator++();          // lvalue (prefix)
68     inline char        operator++(int dummy); // lvalue (postfix)
69     inline cCharProxy& operator--();          // lvalue (prefix)
70     inline char        operator--(int dummy); // lvalue (postfix)
71     inline operator char () const;            // rvalue
72   };
73   friend class cCharProxy;  // Telling rvalue vs lvalue ....
74 
75   // cStringData -- Holds the actual data and is reference count --
76   class cStringData : public cRCObject
77   {
78     // NOTE: Terminating NULL is always there (you can't assign!!)
79   private:
80     int m_size;   // size of data (NOT INCLUDING TRAILING NULL)
81     char* m_data;
82 
83     cStringData(); // @not_implemented
84 
85   public:
86     explicit cStringData(int in_size);
87     cStringData(int in_size, const char* in);
88     cStringData(const cStringData& in);
89 
~cStringData()90     ~cStringData()
91     {
92       delete [] m_data;
93     }
94 
95     cStringData& operator=(const cStringData& in)
96     {
97       delete [] m_data;
98       m_size = in.GetSize();
99       m_data = new char [m_size + 1];
100       assert(m_data != NULL);   // Memory Allocation Error: Out of Memory
101       for(int i = 0; i < m_size; ++i)  m_data[i] = in[i];
102       m_data[m_size] = '\0';
103       return (*this);
104     }
105 
GetSize()106     int GetSize() const { return m_size; }
GetData()107     const char* GetData() const { return m_data; }
108 
109     char operator[] (int index) const
110     {
111       assert(index >= 0);    // Lower Bounds Error
112       assert(index <= m_size); // Upper Bounds Error
113       return m_data[index];
114     }
115 
116     char& operator[](int index)
117     {
118       assert(index >= 0);     // Lower Bounds Error
119       assert(index <= m_size);  // Upper Bounds Error
120       assert(index != m_size);  // Cannot Change Terminating NULL
121       return m_data[index];
122     }
123   };
124 
125 public:
126   cString(const char* in_str = "")
127   {
128     if (in_str) {
129       value = new cStringData(strlen(in_str), in_str);
130     } else {
131       value = new cStringData(0, "");
132     }
133     assert( value );  // Memory Allocation Error: Out of Memory
134   }
cString(const char * in,int in_size)135   cString(const char* in, int in_size) : value(new cStringData(in_size, in))
136   {
137     assert(in_size >= 0);
138     assert( in != NULL );     // NULL input string
139     assert( value );  // Memory Allocation Error: Out of Memory
140   }
cString(const int size)141   explicit cString(const int size) : value(new cStringData(size))
142   {
143     assert( value );    // Memory Allocation Error: Out of Memory
144   }
cString(const cString & in_str)145   cString(const cString& in_str) :value(in_str.value) { ; }
146 
~cString()147   ~cString() { ; }
148 
149 
150   // Cast to const char *
151   operator const char* () const { return value->GetData(); }
152 
153   // Assignment Operators
154   cString& operator=(const cString& in_str) { value = in_str.value; return *this; }
155   cString& operator=(const char* in)
156   {
157     assert( in != NULL ); // NULL input string
158     value = new cStringData(strlen(in),in);
159     assert(value);  // Memory Allocation Error: Out of Memory
160     return *this;
161   }
162 
163 
164   /**
165    * Get the size of the string (not including the terminating '\0').
166    **/
GetSize()167   int GetSize() const { return value->GetSize(); }
168 
169 	/**
170 	 *  Get the string
171 	 **/
GetData()172 	const char* GetData() const { return value->GetData(); }
173 
174 
175   // Comparisons
176   int Compare(const char * in) const;  // strcmp like function
177   bool operator== (const char * in)    const { return (Compare(in)==0); }
178   bool operator== (const cString & in) const;  // A bit optimized
179   bool operator!= (const char * in)    const { return !(*this==in); }
180   bool operator<  (const char * in)    const { return (Compare(in)<0); }
181   bool operator>  (const char * in)    const { return (Compare(in)>0); }
182   bool operator<= (const char * in)    const { return (Compare(in)<=0); }
183   bool operator>= (const char * in)    const { return (Compare(in)>=0); }
184 
185   // Concatenation
186   cString & operator+= (const char in)  { return AppendStr(1,&in); }
187   cString & operator+= (const char * in){ return AppendStr(strlen(in),in); }
188   cString & operator+= (const cString & in){return AppendStr(in.GetSize(),in);}
189   cString operator+ (const char in_char){ return (cString(*this) += in_char); }
190   cString operator+ (const char * in)   { return (cString(*this) += in); }
191   cString operator+ (const cString & in){ return (cString(*this) += in); }
192 
193 
194   // Additional modifiers
195   cString& Set(const char* fmt, ...);
196   cString& Set(const char* fmt, va_list args);
197 
198   cString& Insert(const char in, int pos = 0, int excise = 0) { return InsertStr(1, &in, pos, excise); }
199   cString& Insert(const char* in, int pos = 0, int excise = 0) { return InsertStr(strlen(in), in, pos, excise); }
200   cString& Insert(const cString& in, int pos = 0, int excise = 0) { return InsertStr(in.GetSize(), in, pos, excise); }
201 
202 
203   // Removes 'size' characters from 'pos' (default size = to end of string)
204   cString& Clip(int pos, int size = -1 /*end of string*/ )
205     { if( size < 0 ) size = GetSize() - pos; return InsertStr(0, NULL, pos, size); }
ClipFront(int size)206   cString& ClipFront(int size) { /* Clip off first 'clip_size' chars */ return InsertStr(0, NULL, 0, size); }
ClipEnd(int size)207   cString& ClipEnd(int size) { /* Clip off last 'clip_size' chars */ return InsertStr(0, NULL, GetSize() - size, size); }
208 
209   /**
210    * Find and replace a substring in the string with a different substring.
211    * If the substring is not found, the string object is not changed.
212    *
213    * @return The position at which the substring was found, or -1 if it wasn't found.
214    * @param old_string The substring that is going to be replaced.
215    * @param new_string The replacement.
216    * @param pos The position at which the search should start.
217    **/
218   int Replace(const cString& old_st, const cString& new_st, int pos = 0);
219 
220   cString Pop(const char delim);  // Remove and return up to delim char
221 
222   /**
223    * Remove the first word.
224    *
225    * @return The removed word.
226    **/
227   cString PopWord();
228 
229   /**
230    * Remove the first line.
231    *
232    * @return The removed line.
233    **/
PopLine()234   cString PopLine() { return Pop('\n'); }
235 
236   /**
237    * Remove begining whitespace.
238    *
239    * @return The number of characters removed.
240    **/
241   int LeftJustify();
242 
243   /**
244    * Remove ending whitespace.
245    *
246    * @return The number of characters removed.
247    **/
248   int RightJustify();
249 
250   /**
251     * Remove beginning and ending whitespace.
252    **/
253   void Trim();
254 
255   /**
256    * Reverse the order of the characters in the string.
257    **/
258   void Reverse();
259 
260   /**
261    * Convert the string to lowercase.
262    **/
263   cString& ToLower();
264 
265   /**
266    * Convert the string to uppercase.
267    **/
268   cString& ToUpper();
269 
270   /**
271    * Replace all blocks of whitespace with a single space (' ').
272    *
273    * @see cString::IsWhitespace()
274    **/
275   void CompressWhitespace();
276 
277   /**
278    * Get rid of all(!) whitespace.
279    *
280    * @see cString::IsWhitespace()
281    **/
282   void RemoveWhitespace();
283 
284   /**
285    * Get rid of all occurances of a specific character.
286    *
287    * @see cString::RemoveWhitespace()
288    **/
289   void RemoveChar(char out_char);
290 
291   /**
292    * Get rid of one character at a specific location
293    **/
294   void RemovePos(int pos);
295 
296 
297   // Parse for and replace escape sequences within the string
298   cString& ParseEscapeSequences();
299 
300 
301   // Individal Char Access
302   inline char operator[] (int index) const { return static_cast<char>((*value)[index]); }
303   cCharProxy operator[] (int index) { return cCharProxy(*this,index); }
304 
305 
306   /**
307    * Convert string to int.
308    *
309    * @return The integer value corresponding to the string.
310    **/
AsInt()311   int AsInt() const { return static_cast<int>(strtol(*this, NULL, 0)); }
312 
313   /**
314    * Convert string to double.
315    *
316    * @return The double value corresponding to the string.
317    **/
AsDouble()318   double AsDouble() const { return strtod(*this, NULL); }
319 
320   // Accessors & Information
321   /**
322    * Tests whether the string is empty.
323    **/
IsEmpty()324   bool IsEmpty() const { return GetSize() == 0; } // Can just call GetSize
325 
326   /**
327    * Test if the continuation character is at the end of the line
328    * if it is strip off the charecter and return true else return
329    * false
330    **/
331   bool IsContinueLine();
332 
333   /**
334    * Counts whitespace, beginning at the given position.
335    *
336    * @param start The index at which counting should begin.
337    **/
338   int CountWhitespace(int start=0) const;
339 
340   /**
341    * Counts non-whitespace, beginning at the given position.
342    *
343    * @param start The index at which counting should begin.
344    **/
345   int CountWordsize(int start=0) const;
346 
347   /**
348    * Counts until the first occurrence of '\n', beginning at the
349    * given position.
350    *
351    * @param start The index at which counting should begin.
352    **/
353   int CountLinesize(int start=0) const;
354 
355   /**
356    * Counts the number of lines in a string.
357    **/
358   int CountNumLines() const;
359 
360   /**
361    * Counts the number of separate words in a string.
362    **/
363   int CountNumWords() const;
364 
365   /**
366    * Get a specific word from a string.
367    *
368    * @param word_id The number of the word, counted from the beginning of
369    * the string, starting with 0.
370    **/
371   cString GetWord(int word_id=0) const;
372 
373   /**
374    * Get the next word after the specified position. Any leading whitespace
375    * is removed.
376    *
377    * @param start The position at which the function should start
378    * searching for a word.
379    **/
380   cString GetWordAt(int start=0) const;
381 
382   /**
383    * Test if a character is whitespace. Currently, as whitespace count
384    * ' ', '\r', '\t', '\n'.
385    *
386    * @param pos The position of the character to test.
387    **/
IsWhitespace(int pos)388   bool IsWhitespace(int pos) const {
389     return ( (*this)[pos] == ' '  || (*this)[pos] == '\t' ||
390 	     (*this)[pos] == '\r' || (*this)[pos] == '\n' );
391   }
392 
393   /**
394    * Test if a character is a capital letter.
395    *
396    * @param pos The position of the character to test.
397    **/
IsUpperLetter(int pos)398   bool IsUpperLetter(int pos) const {
399     return ((*this)[pos] >= 'A' && (*this)[pos] <= 'Z');
400   }
401 
402   /**
403    * Test if a character is not a capital letter.
404    *
405    * @param pos The position of the character to test.
406    **/
IsLowerLetter(int pos)407   bool IsLowerLetter(int pos) const {
408     return ((*this)[pos] >= 'a' && (*this)[pos] <= 'z');
409   }
410 
411   /**
412    * Test if a character is a letter.
413    *
414    * @param pos The position of the character to test.
415    **/
IsLetter(int pos)416   bool IsLetter(int pos) const {
417     return IsUpperLetter(pos) || IsLowerLetter(pos);
418   }
419 
420   /**
421    * Test if a character is a number (this includes expressions
422    * such as -3.4e5).
423    *
424    * @param pos The position of the character to test.
425    **/
IsNumber(int pos)426   bool IsNumber(int pos) const {
427     return ( ( (*this)[pos] >= '0' && (*this)[pos] <= '9' ) ||
428 	     (*this)[pos] == '-' || (*this)[pos] == '+' ||
429 	     (*this)[pos] == '.' || (*this)[pos] == 'e' ||
430 	     (*this)[pos] == 'E' );
431   }
432 
433   /**
434    * Test if a character is a numeral (0, 1, ..., 9).
435    *
436    * @param pos The position of the character to test.
437    **/
IsNumeric(int pos)438   bool IsNumeric(int pos) const {
439     return ((*this)[pos] >= '0' && (*this)[pos] <= '9');
440   }
441 
442   /**
443    * Test if a character is either a numeral or a letter.
444    *
445    * @param pos The position of the character to test.
446    **/
IsAlphaNumeric(int pos)447   bool IsAlphaNumeric(int pos) const {
448     return IsLetter(pos) || IsNumber(pos);
449   }
450 
451   /**
452    * Test whether the complete string consits only of whitespace.
453    **/
454   bool IsWhitespace() const;
455 
456   /**
457    * Test whether the complete string consits only of uppercase letters.
458    **/
459   bool IsUpperLetter() const;
460 
461   /**
462    * Test whether the complete string consits only of lowercase letters.
463    **/
464   bool IsLowerLetter() const;
465 
466   /**
467    * Test whether the complete string consits only of letters.
468    **/
469   bool IsLetter() const;
470 
471   /**
472    * Test whether the complete string can be seen as a number.
473    **/
474   bool IsNumber() const;
475 
476   /**
477    * Test whether the complete string consits only of numerals.
478    **/
479   bool IsNumeric() const;
480 
481   /**
482    * Test whether the complete string consits only of letters or numerals.
483    **/
484   bool IsAlphaNumeric() const;
485 
486   /**
487    * Search for a single character.
488    *
489    * @return The first occurence after pos, or -1 if not found
490    **/
491   int Find(char in_char, int pos=0) const;
492 
493   /**
494    * Search for a substring.
495    *
496    * @return The first occurence after pos, or -1 if not found
497    **/
498   int Find(const char * in, int pos=0) const{
499     return FindStr(in, strlen(in), pos); }
500 
501   /**
502    * Search for a substring.
503    *
504    * @return The first occurence after pos, or -1 if not found
505    **/
506   int Find(const cString & in, int pos=0) const{
507     return FindStr(in, in.GetSize(), pos); }
508 
509   /**
510    * Search for a word.
511    *
512    * @return The first occurence after pos, or -1 if not found
513    **/
514   int FindWord(const cString & in, int pos=0) const;
515 
516 
517   /**
518    * Cut out a substring.
519    *
520    * @return The substring.
521    * @param start The beginning of the substring in the string.
522    * @param size The number of characters in the substring.
523    **/
524   cString Substring(int start, int size) const ;
525 
526   /**
527    * Determine if in_string is a substring of this string.
528    *
529    * @return Is this a substring?
530    * @param in_string the string to test.
531    * @param start The beginning of the substring in the string.
532    **/
533   bool IsSubstring(const cString & in_string, int start) const;
534 
535   /**
536    * Clip a portion of the string and output it.
537    *
538    * @return Removed substring.
539    * @param pos the position to start the ejection.
540    * @param excise number of sites to eject.
541    **/
542   cString EjectStr(int pos, int excise);
543 
544 
545   // {{{ -- INTERNALS -------------------------------------------------------
546 protected:
547   // -- Internal Functions --
548 
549   // Methods that take input string size (unsafe to call from outside)
550   cString& AppendStr(const int in_size, const char* in);  // Optimized
551   cString& InsertStr(const int in_size, const char* in, int pos, int excise=0);
552   int FindStr(const char* in_string, const int in_size, int pos) const;
553 
554   // -- Internal Data --
555 protected:
556   tRCPtr<cStringData> value;
557 
558 // }}} End Internals
559 };
560 
561 
562 // {{{ ** External cString Functions **
563 
564 // iostream input
565 std::istream& operator >> (std::istream& in, cString& string);
566 std::ostream& operator << (std::ostream& out, const cString& string);
567 
568 // }}}
569 
570 // -- INLINE INCLUDES --
571 
CopyOnWrite()572 void cString::CopyOnWrite()
573 {
574   if (!value->SetExclusive()) {  // if it is shared
575     value = new cStringData(*value);  // make own copy of value
576   }
577 }
578 
579 cString::cCharProxy & cString::cCharProxy::operator= (char c){  // lvalue
580   string.CopyOnWrite();
581   (*(string.value))[index] = c;
582   return *this;
583 }
584 
585 cString::cCharProxy & cString::cCharProxy::operator+= (char c){  // lvalue
586   string.CopyOnWrite();
587   (*(string.value))[index] += c;
588   return *this;
589 }
590 
591 cString::cCharProxy & cString::cCharProxy::operator-= (char c){  // lvalue
592   string.CopyOnWrite();
593   (*(string.value))[index] -= c;
594   return *this;
595 }
596 
597 cString::cCharProxy & cString::cCharProxy::operator++ (){  // lvalue (prefix)
598   string.CopyOnWrite();
599   ++(*(string.value))[index];
600   return *this;
601 }
602 
603 char cString::cCharProxy::operator++ (int dummy){  // lvalue (postfix)
604   (void)dummy;
605   char rv = (*(string.value))[index];
606   string.CopyOnWrite();
607   ++(*(string.value))[index];
608   return rv;
609 }
610 
611 cString::cCharProxy & cString::cCharProxy::operator-- (){  // lvalue (prefix)
612   string.CopyOnWrite();
613   --(*(string.value))[index];
614   return *this;
615 }
616 
617 char cString::cCharProxy::operator-- (int dummy){  // lvalue (postfix)
618   (void)dummy;
619   char rv = (*(string.value))[index];
620   string.CopyOnWrite();
621   --(*(string.value))[index];
622   return rv;
623 }
624 
625 cString::cCharProxy::operator char () const {  // rvalue
626   return static_cast<char>((*(string.value))[index]);
627 }
628 
629 #endif
630