1 /*
2  *  cString.cc
3  *  Avida
4  *
5  *  Called "string.cc" prior to 12/7/05.
6  *  Copyright 1999-2011 Michigan State University. All rights reserved.
7  *  Copyright 1993-2003 California Institute of Technology.
8  *
9  *
10  *  This file is part of Avida.
11  *
12  *  Avida is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License
13  *  as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
14  *
15  *  Avida is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.
17  *
18  *  You should have received a copy of the GNU Lesser General Public License along with Avida.
19  *  If not, see <http://www.gnu.org/licenses/>.
20  *
21  */
22 
23 #include "cString.h"
24 
25 #include <cstdio>
26 
27 
28 using namespace std;
29 
30 
31 // ** class cStringData **
32 // -- Constructors --
cStringData(int in_size)33 cString::cStringData::cStringData(int in_size) : m_size(in_size), m_data(new char[m_size + 1])
34 {
35   assert(m_data != NULL); // Memory Allocation Error: Out of Memory
36   m_data[0] = '\0';
37   m_data[m_size] = '\0';
38 }
39 
cStringData(int in_size,const char * in)40 cString::cStringData::cStringData(int in_size, const char* in) : m_size(in_size), m_data(new char[m_size + 1])
41 {
42   assert(m_data != NULL); // Memory Allocation Error: Out of Memory
43   for (int i = 0; i < m_size; i++) m_data[i] = in[i];
44   m_data[m_size] = '\0';
45 }
46 
cStringData(const cStringData & in)47 cString::cStringData::cStringData(const cStringData& in) : cRCObject(*this), m_size(in.GetSize()), m_data(new char[m_size + 1])
48 {
49   assert(m_data != NULL); // Memory Allocation Error: Out of Memory
50   for (int i = 0; i < m_size; i++)  m_data[i] = in[i];
51   m_data[m_size] = '\0';
52 }
53 
54 
55 
56 // ** class cString **
57 
58 
59 // -- Comparisons --
60 
operator ==(const cString & in) const61 bool cString::operator==(const cString& in) const {
62   // Compares sizes first since we have that info anyway
63   int i = -1;
64   if (GetSize() == in.GetSize()) {
65     for (i = 0; i<GetSize() && (*this)[i] == in[i]; ++i) ;
66   }
67   return (i == GetSize());  // i == GetSize if all chars matched
68 }
69 
70 
Compare(const char * in) const71 int cString::Compare(const char * in) const
72 {
73   // -1 (*this<in), 0 (*this==in), 1 (*this>in) ... just like strcmp()
74   assert(in!=NULL);  // NULL input string
75   int i;
76   for (i = 0; i < GetSize() && in[i]!='\0' && (*this)[i] == in[i]; i++) ;
77 
78   if (i == GetSize() && in[i] == '\0') return 0;      // *this == in
79 
80   // They're not equal!
81   if (i < GetSize() && (*this)[i] > in[i]) return 1;  // *this > in
82   return -1;                                          // *this < in
83 }
84 
IsContinueLine()85 bool cString::IsContinueLine()
86 {
87   bool found = false;
88   bool goodstufffound = false;
89   int j = GetSize() - 1;
90 
91   // Scan the line from the end.  If the last non-whitespace line is
92   // continueation character the line expects a line to be concatenated on
93   // to it
94 
95   while ((j >= 0) && !found && !goodstufffound) {
96     if (!IsWhitespace(j)) {
97       if ((*this)[j] == CONTINUE_LINE_CHAR) {
98         found = true;
99         RemovePos(j);
100       } else {
101         goodstufffound = true;
102       }
103     }
104     j--;
105   }
106   return (found);
107 }
108 
109 // -- Information --
110 
CountWhitespace(int start) const111 int cString::CountWhitespace(int start) const
112 {
113   assert(start >= 0); // Negative Index into String
114   if (start >= GetSize()) return 0;
115   int count = 0;
116   while (start+count<GetSize() && IsWhitespace(start+count)) count++;
117   return count;
118 }
119 
120 
CountWordsize(int start) const121 int cString::CountWordsize(int start) const {
122   assert(start >= 0); // Negative Index into String
123   if( start>=GetSize() )  return 0;
124   int count = 0;
125   while( start+count<GetSize() && !IsWhitespace(start+count) )
126     ++count;
127   return count;
128 }
129 
130 
CountLinesize(int start) const131 int cString::CountLinesize(int start) const
132 {
133   assert(start >= 0); // Negative Index into String
134   if( start>=GetSize() )  return 0;
135   int count = 0;
136   while( start+count<GetSize() && (*this)[start+count]!='\n' )
137     count++;
138   return count;
139 }
140 
141 
CountNumLines() const142 int cString::CountNumLines() const
143 {
144   int num_lines = 1;
145   for( int i=0; i<GetSize(); ++i ){
146     if( (*this)[i] == '\n' )  num_lines++;
147   }
148   return num_lines;
149 }
150 
151 
CountNumWords() const152 int cString::CountNumWords() const
153 {
154   int num_words = 0;
155   int pos = CountWhitespace();     // Skip initial whitespace.
156   while( pos<GetSize() ) {
157     pos += CountWordsize(pos);
158     pos += CountWhitespace(pos);
159     num_words++;
160   }
161   return num_words;
162 }
163 
164 
165 // -- Search --
Find(char in_char,int start) const166 int cString::Find(char in_char, int start) const
167 {
168   int pos = start;
169   assert (pos >= 0);         // Negative Position: setting to 0
170   assert (pos <= GetSize()); // Position Past End of String: setting to end.
171   if (pos <= 0) pos = 0;
172   else if (pos > GetSize()) pos = GetSize();
173 
174   while( pos < GetSize() ) {
175     if( (*this)[pos] == in_char) return pos; // Found!
176     pos++;
177   }
178   return -1; // Not Found
179 }
180 
181 
FindWord(const cString & in,int pos) const182 int cString::FindWord(const cString & in, int pos) const
183 {
184   assert (pos >= 0);         // Negative Position: setting to 0
185   assert (pos <= GetSize()); // Position Past End of String: setting to end.
186   if (pos <= 0) pos = 0;
187   else if (pos > GetSize()) pos = GetSize();
188 
189   // While there is enough space to find
190   while (pos != -1 && pos + in.GetSize() < GetSize()) {
191     cerr << in << " " << pos << endl;
192     if( (pos=Find(in, pos)) >= 0 ){      // try to find it
193       // if it's got whitespace on both sides, it's a word
194       if( ( pos==0 || IsWhitespace(pos-1) )
195          && ( pos==GetSize()-1 || IsWhitespace(pos+in.GetSize()) ) ){
196         return pos;
197       } else {
198         pos++; // go on and look further down
199       }
200     }
201   }
202   return -1;
203 }
204 
205 
GetWord(int word_id) const206 cString cString::GetWord(int word_id) const
207 {
208   // Find positon of word
209   int pos = 0;
210   int cur_word = 0;
211   while( pos<GetSize() && cur_word<word_id ) {  // If this isn't the word
212     pos += CountWhitespace(pos);                 // Skip leading whitespace
213     pos += CountWordsize(pos);                   // Skip this word
214     cur_word++;
215   }
216   // Return GetWordAt position... (it will skip any leading whitespace)
217   return GetWordAt(pos);
218 }
219 
220 
GetWordAt(int start) const221 cString cString::GetWordAt(int start) const
222 {
223   int pos = start + CountWhitespace(start);  // Skip past initial whitespace.
224   int word_size = CountWordsize(pos);        // Get size of word
225   cString new_string(word_size);             // Allocate new_string that size
226   for (int i = 0; i < word_size; i++) {      // Copy the chars to new_string
227     new_string[i] = (*this)[pos + i];
228   }
229   return new_string;
230 }
231 
IsWhitespace() const232 bool cString::IsWhitespace() const
233 {
234   for( int i=0; i < GetSize(); ++i){
235     if ( IsWhitespace(i) == false ) return false;
236   }
237   return true;
238 }
239 
IsUpperLetter() const240 bool cString::IsUpperLetter() const
241 {
242   for (int i = 0; i < GetSize(); ++i) {
243     if ( IsUpperLetter(i) == false ) return false;
244   }
245   return true;
246 }
247 
IsLowerLetter() const248 bool cString::IsLowerLetter() const
249 {
250   for (int i = 0; i < GetSize(); ++i) {
251     if ( IsLowerLetter(i) == false ) return false;
252   }
253   return true;
254 }
255 
IsLetter() const256 bool cString::IsLetter() const
257 {
258   for (int i = 0; i < GetSize(); ++i) {
259     if ( IsLetter(i) == false ) return false;
260   }
261   return true;
262 }
263 
IsNumber() const264 bool cString::IsNumber() const
265 {
266   for (int i = 0; i < GetSize(); ++i) {
267     if ( IsNumber(i) == false ) return false;
268   }
269   return true;
270 }
271 
IsNumeric() const272 bool cString::IsNumeric() const
273 {
274   for (int i = 0; i < GetSize(); ++i) {
275     if ( IsNumber(i) == false ) return false;
276   }
277   return true;
278 }
279 
IsAlphaNumeric() const280 bool cString::IsAlphaNumeric() const
281 {
282   for (int i = 0; i < GetSize(); ++i) {
283     if ( IsAlphaNumeric(i) == false ) return false;
284   }
285   return true;
286 }
287 
288 
289 
Substring(int start,int size) const290 cString cString::Substring(int start, int size) const
291 {
292   assert(size >= 0); // Non-Positive Size
293   assert(start >= 0); // Negative Position
294   assert(start + size <= GetSize()); // Position+Size Past End of String
295 
296   cString new_string(size);
297   for (int i=0; i<size; i++) {
298     new_string[i] = (*this)[i+start];
299   }
300   return new_string;
301 }
302 
IsSubstring(const cString & in_string,int start) const303 bool cString::IsSubstring(const cString & in_string, int start) const
304 {
305   assert (start >= 0); // Negative start position
306 
307   // If the potential sub-string won't fit, return false;
308   if ( start + in_string.GetSize() > GetSize() ) return false;
309 
310   // Otherwise, check character by character.
311   for (int i = 0; i < in_string.GetSize(); i++) {
312     if ( (*this)[i+start] != in_string[i] ) return false;
313   }
314 
315   return true;
316 }
317 
318 
319 // -- Modifiers --
320 
Set(const char * fmt,...)321 cString& cString::Set(const char * fmt, ...)
322 {
323   va_list argp;
324   char buf[MAX_STRING_LENGTH];
325   va_start(argp, fmt);
326   vsprintf(buf, fmt, argp);
327   va_end(argp);
328   (*this) = buf;
329   return *this;
330 }
331 
Set(const char * fmt,va_list args)332 cString& cString::Set(const char * fmt, va_list args)
333 {
334   char buf[MAX_STRING_LENGTH];
335   vsprintf(buf, fmt, args);
336   (*this) = buf;
337   return *this;
338 }
339 
340 
Replace(const cString & old_st,const cString & new_st,int pos)341 int cString::Replace(const cString & old_st, const cString & new_st, int pos)
342 {
343   int location;
344   if( (location=Find(old_st, pos)) >= 0 ){ // If we find the old_st
345     Insert(new_st, location, old_st.GetSize());
346   }
347   return location;
348 }
349 
350 
LeftJustify()351 int cString::LeftJustify()
352 {
353   int ws_count = CountWhitespace();
354   if( ws_count>0 )
355     InsertStr(0, NULL, 0, ws_count);
356   return ws_count;
357 }
358 
359 
RightJustify()360 int cString::RightJustify()
361 {
362   int ws_count = 0;
363   while( GetSize()-ws_count-1>0 && IsWhitespace(GetSize()-ws_count-1) )
364     ws_count++;
365   if( ws_count>0 )
366     InsertStr(0, NULL, GetSize()-ws_count, ws_count);
367   return ws_count;
368 }
369 
Trim()370 void cString::Trim()
371 {
372   // Trim front
373   int ws_count = CountWhitespace();
374   if (ws_count > 0) InsertStr(0, NULL, 0, ws_count);
375 
376   // Trim trailing
377   ws_count = 0;
378   while (GetSize() - ws_count - 1 > 0 && IsWhitespace(GetSize() - ws_count - 1)) ws_count++;
379   if (ws_count > 0) InsertStr(0, NULL, GetSize() - ws_count, ws_count);
380 }
381 
382 
Pop(const char delim)383 cString cString::Pop(const char delim)
384 {
385   int pos;
386   cString rv("");
387   if( (pos=Find(delim)) >= 0 ){ // if it is found at all
388     if( pos > 0 ){  // if the first char is not delim, return substring
389       rv = Substring(0,pos);
390     }
391     // Trim off the front
392     InsertStr(0, NULL, 0, pos+1);
393   }
394 
395   // If the deliminator is *not* found, return the whole string.
396   else {
397     rv = *this;
398     *this = "";
399   }
400   return rv;
401 }
402 
403 
PopWord()404 cString cString::PopWord()
405 {
406   // If there is nothing here, there is nothing to be popped.
407   if (GetSize() == 0) return "";
408 
409   const int start_pos = CountWhitespace();
410   const int word_size = CountWordsize(start_pos);
411 
412   // If the string is not all whitespace, save the word we cut off...
413   cString rv("");
414   if (word_size > 0) rv = Substring(start_pos, word_size);
415 
416   // Trim off the front
417   const int word_end = start_pos + word_size;
418   const int new_start = word_end + CountWhitespace(word_end);
419   InsertStr(0, NULL, 0, new_start);  // Insert null in place of old word.
420 
421   return rv;
422 }
423 
424 
ToLower()425 cString & cString::ToLower()
426 {
427   for (int pos = 0; pos < GetSize(); pos++) {
428     if( (*this)[pos] >= 'A' && (*this)[pos] <= 'Z' )
429       (*this)[pos] += 'a' - 'A';
430   }
431 
432   return *this;
433 }
434 
435 
ToUpper()436 cString& cString::ToUpper()
437 {
438   for (int pos = 0; pos < GetSize(); pos++) {
439     if( (*this)[pos] >= 'a' && (*this)[pos] <= 'z' )
440       (*this)[pos] += 'A' - 'a';
441   }
442 
443   return *this;
444 }
445 
446 
Reverse()447 void cString::Reverse()
448 {
449   cString new_st(GetSize());
450   for( int i=0; i<GetSize(); ++i ){
451     // new_st[i] = (*this)[GetSize()-i-1];       // @CAO Problem in new gcc
452     new_st[i] = value->GetData()[GetSize()-i-1];
453   }
454   (*this) = new_st;
455 }
456 
457 
CompressWhitespace()458 void cString::CompressWhitespace()
459 {
460   // Eats initial whitespace
461   // 2 pass method...
462   int i;
463   int pos = 0;
464   int start_pos = CountWhitespace();
465   int new_size = 0;
466   bool ws = false;
467 
468   // count the number of characters that we will need in the new string
469   for( i=start_pos; i<GetSize(); ++i ){
470     if( IsWhitespace(i) ){ // if it whitespace...
471       if( ws == false ){     // if we arn't already in a whitespace block
472         ws = true;             // we are now in a whitespace block
473       }
474     }else{                 // it isn't whitespace, so count
475       if( ws==true ){        // if there was a whitespace block
476         ++new_size;            // inc once for the block
477         ws = false;
478       }
479       ++new_size;
480     }
481   }
482 
483   cString new_st(new_size);  // Allocate new string
484 
485   // Copy over the characters
486   // pos will be the location in new_st, while i is the index into this
487   ws = false;
488   for( i=start_pos; i<GetSize(); ++i ){
489     if( IsWhitespace(i) ){ // if it whitespace...
490       if( ws == false ){     // if we arn't already in a whitespace block
491         ws = true;             // we are now in a whitespace block
492       }
493     }else{                 // it isn't whitespace, so count
494       if( ws==true ){        // if there was a whitespace block
495         new_st[pos] = ' ';     // put a space in for the whitespace block
496         ++pos;                 // inc once for the block
497         ws = false;
498       }
499       // new_st[pos] = (*this)[i]; // copy it & increment pos  @CAO prob in gcc
500       new_st[pos] = value->GetData()[i]; // copy it & increment pos
501       ++pos;
502     }
503   }
504 
505   (*this) = new_st;  // assign the new_st to this
506 }
507 
508 
RemoveWhitespace()509 void cString::RemoveWhitespace()
510 {
511   int i;
512   int new_size = 0;
513   for (i = 0; i < GetSize(); ++i) {  // count new size
514     if (!IsWhitespace(i)) ++new_size;
515   }
516   cString new_st(new_size);      // allocate new string
517   int pos = 0;
518   for (i = 0; i < GetSize(); ++i) {  // count new size
519     if (!IsWhitespace(i)) {
520       new_st[pos] = value->GetData()[i]; // copy it & increment pos   @CAO prob in GCC
521       ++pos;
522     }
523   }
524   (*this) = new_st;  // assign the new_st to this
525 }
526 
527 
RemoveChar(char out_char)528 void cString::RemoveChar(char out_char)
529 {
530   int i;
531   int new_size = 0;
532   for (i = 0; i < GetSize(); ++i) {  // count new size
533     if (value->GetData()[i] != out_char) ++new_size;
534   }
535   cString new_st(new_size);      // allocate new string
536   int pos = 0;
537   for(i = 0; i < GetSize(); ++i) {  // count new size
538     if (value->GetData()[i] != out_char ) {
539       new_st[pos] = value->GetData()[i]; // copy it & increment pos   @CAO prob in GCC
540       ++pos;
541     }
542   }
543   (*this) = new_st;  // assign the new_st to this
544 }
545 
RemovePos(int pos)546 void cString::RemovePos(int pos){
547   int i;
548   int new_size = GetSize() - 1;
549   cString new_st(new_size);      // allocate new string
550   int newpos = 0;
551   for( i=0; i<GetSize(); ++i ){  // count new size
552     if( i != pos ){
553       new_st[newpos++] = value->GetData()[i]; // copy it & increment pos
554     }
555   }
556   (*this) = new_st;  // assign the new_st to this
557 }
558 
559 
560 
561 
ParseEscapeSequences()562 cString& cString::ParseEscapeSequences()
563 {
564   int o_sz = GetSize();
565   char* newstr = new char[o_sz];
566   int sz = 0;
567 
568   for (int i = 0; i < o_sz; i++) {
569     if ((*value)[i] == '\\') {
570       i++;
571       if (i == o_sz) break;
572 
573       switch ((*value)[i]) {
574         case 'b': newstr[sz++] = '\b'; break;
575         case 'f': newstr[sz++] = '\f'; break;
576         case 'n': newstr[sz++] = '\n'; break;
577         case 'r': newstr[sz++] = '\r'; break;
578         case 't': newstr[sz++] = '\t'; break;
579         case '\\':
580         case '\x22':
581         case '\'':
582         case '?':
583           newstr[sz++] = (*value)[i]; break;
584       }
585     } else {
586       newstr[sz++] = (*value)[i];
587     }
588   }
589   newstr[sz] = '\0';
590 
591   (*this) = newstr;
592   delete [] newstr;
593 
594   return *this;
595 }
596 
597 
598 // -- Internal Methods --
599 
AppendStr(const int in_size,const char * in)600 cString & cString::AppendStr(const int in_size, const char * in)
601 {
602   assert (in_size == 0 || in != NULL); // NULL input string
603 
604   // Allocate a new string
605   tRCPtr<cStringData> new_value(new cStringData(GetSize() + in_size));
606   assert (new_value);       // Memory Allocation Error: Out of Memory
607   for(int i=0; i<GetSize(); ++i ) { // Copy self up to pos
608     (*new_value)[i] = this->operator[](i);
609   }
610   for(int i=0; i<in_size; ++i ) {   // Copy in
611     assert(in[i] != '\0');          // Input String Contains '\\0' or too Short
612     (*new_value)[i+GetSize()] = in[i];
613   }
614   value = new_value;             // Reassign data to new data
615   return(*this);
616 }
617 
618 
InsertStr(const int in_size,const char * in,int pos,int excise)619 cString & cString::InsertStr(const int in_size, const char * in,
620                              int pos, int excise )
621 {
622   // Inserts 'in' (of length 'in_size') at postition 'pos'
623   // Also excises 'excise' characters from 'pos'
624   // If 'in_size'==0 then 'in' can == NULL and only excise happens
625 
626   // Validate inputs:
627   assert (in_size >= 0);               // Negative input size
628   assert (pos >= 0);                   // Negative position
629   assert (pos <= GetSize());           // Position past end of string
630   assert (excise >= 0);                // Negative excise
631   assert (excise <= GetSize()-pos);    // Excise number too large
632   assert (excise > 0 || in_size > 0);  // Make sure a change is made!
633   assert (in_size == 0 || in != NULL); // NULL input string
634 
635   // Allocate a new string
636   const int new_size = GetSize() + in_size - excise;
637   tRCPtr<cStringData> new_value(new cStringData(new_size));
638   assert (new_value);  // Memory Allocation Error: Out of Memory
639 
640   for(int i = 0; i < pos; ++i ){             // Copy self up to pos
641     (*new_value)[i] = this->operator[](i);
642   }
643   for(int i = 0; i < in_size; ++i ){         // Copy in
644     assert( in[i] != '\0');  // Input String Contains '\\0' or too Short
645     (*new_value)[i+pos] = in[i];
646   }
647   for(int i=pos+excise; i<GetSize(); ++i ){  // Copy rest of self
648     (*new_value)[i+in_size-excise] = this->operator[](i);
649   }
650 
651   value = new_value;                      // Reassign data to new data
652   return *this;
653 }
654 
655 
EjectStr(int pos,int excise)656 cString cString::EjectStr(int pos, int excise )
657 {
658   // Delete excise characters at pos and return the substring.
659 
660   // Validate inputs:
661   assert (pos >= 0);                   // Negative position
662   assert (pos <= GetSize());           // Position past end of string
663   assert (excise > 0);                 // Must excise something...
664   assert (excise <= GetSize()-pos);    // Excise number too large
665 
666   // Collect substring to output.
667   cString out_string(Substring(pos, excise));
668 
669   // Allocate a new string
670   const int new_size = GetSize() - excise;
671   tRCPtr<cStringData> new_value(new cStringData(new_size));
672   assert (new_value);  // Memory Allocation Error: Out of Memory
673 
674   for(int i = 0; i < pos; i++){             // Copy self up to pos
675     (*new_value)[i] = this->operator[](i);
676   }
677   for(int i=pos+excise; i<GetSize(); ++i ){  // Copy post-excise self
678     (*new_value)[i-excise] = this->operator[](i);
679   }
680 
681   value = new_value;                      // Reassign data to new data
682   return out_string;
683 }
684 
685 
FindStr(const char * in,const int in_size,int pos) const686 int cString::FindStr(const char * in, const int in_size, int pos) const
687 {
688   assert (pos>=0);         // Negative position
689   assert (pos<=GetSize()); // Position past end of string
690 
691   while (pos < GetSize()) {
692     if( GetSize()-pos < in_size ) return -1; // Too near this string's end.
693     if( (*this)[pos] == in[0] ){
694       // see if we have found the string...
695       int i;
696       for( i = 1; i < in_size; i++ ){
697         assert (pos+i < GetSize()); // Reached end of (*this) in Find
698         assert (in[i] != '\0');     // Reached end of 'in' in Find
699         if( (*this)[pos + i] != in[i] ) break; // Match failure!
700       }
701       // If we have made it fully through the loop, we have found a match!
702       if( i == in_size ) return pos;
703     }
704     pos++;
705   }
706   return -1;
707 }
708 
709 
710 
711 // {{{ ** External cString Functions **
712 
operator >>(istream & in,cString & string)713 istream & operator >> (istream & in, cString & string)
714 {
715   char buf[MAX_STRING_LENGTH];
716   in>>buf;
717   string=buf;
718   return in;
719 }
720 
operator <<(ostream & out,const cString & string)721 ostream& operator << (ostream& out, const cString& string)
722 {
723   out << static_cast<const char*>(string);
724   return out;
725 }
726