1 /*
2 * cString.cc
3 * Avida
4 *
5 * Called "string.cc" prior to 12/7/05.
6 * Copyright 1999-2011 Michigan State University. All rights reserved.
7 * Copyright 1993-2003 California Institute of Technology.
8 *
9 *
10 * This file is part of Avida.
11 *
12 * Avida is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
14 *
15 * Avida is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public License along with Avida.
19 * If not, see <http://www.gnu.org/licenses/>.
20 *
21 */
22
23 #include "cString.h"
24
25 #include <cstdio>
26
27
28 using namespace std;
29
30
31 // ** class cStringData **
32 // -- Constructors --
cStringData(int in_size)33 cString::cStringData::cStringData(int in_size) : m_size(in_size), m_data(new char[m_size + 1])
34 {
35 assert(m_data != NULL); // Memory Allocation Error: Out of Memory
36 m_data[0] = '\0';
37 m_data[m_size] = '\0';
38 }
39
cStringData(int in_size,const char * in)40 cString::cStringData::cStringData(int in_size, const char* in) : m_size(in_size), m_data(new char[m_size + 1])
41 {
42 assert(m_data != NULL); // Memory Allocation Error: Out of Memory
43 for (int i = 0; i < m_size; i++) m_data[i] = in[i];
44 m_data[m_size] = '\0';
45 }
46
cStringData(const cStringData & in)47 cString::cStringData::cStringData(const cStringData& in) : cRCObject(*this), m_size(in.GetSize()), m_data(new char[m_size + 1])
48 {
49 assert(m_data != NULL); // Memory Allocation Error: Out of Memory
50 for (int i = 0; i < m_size; i++) m_data[i] = in[i];
51 m_data[m_size] = '\0';
52 }
53
54
55
56 // ** class cString **
57
58
59 // -- Comparisons --
60
operator ==(const cString & in) const61 bool cString::operator==(const cString& in) const {
62 // Compares sizes first since we have that info anyway
63 int i = -1;
64 if (GetSize() == in.GetSize()) {
65 for (i = 0; i<GetSize() && (*this)[i] == in[i]; ++i) ;
66 }
67 return (i == GetSize()); // i == GetSize if all chars matched
68 }
69
70
Compare(const char * in) const71 int cString::Compare(const char * in) const
72 {
73 // -1 (*this<in), 0 (*this==in), 1 (*this>in) ... just like strcmp()
74 assert(in!=NULL); // NULL input string
75 int i;
76 for (i = 0; i < GetSize() && in[i]!='\0' && (*this)[i] == in[i]; i++) ;
77
78 if (i == GetSize() && in[i] == '\0') return 0; // *this == in
79
80 // They're not equal!
81 if (i < GetSize() && (*this)[i] > in[i]) return 1; // *this > in
82 return -1; // *this < in
83 }
84
IsContinueLine()85 bool cString::IsContinueLine()
86 {
87 bool found = false;
88 bool goodstufffound = false;
89 int j = GetSize() - 1;
90
91 // Scan the line from the end. If the last non-whitespace line is
92 // continueation character the line expects a line to be concatenated on
93 // to it
94
95 while ((j >= 0) && !found && !goodstufffound) {
96 if (!IsWhitespace(j)) {
97 if ((*this)[j] == CONTINUE_LINE_CHAR) {
98 found = true;
99 RemovePos(j);
100 } else {
101 goodstufffound = true;
102 }
103 }
104 j--;
105 }
106 return (found);
107 }
108
109 // -- Information --
110
CountWhitespace(int start) const111 int cString::CountWhitespace(int start) const
112 {
113 assert(start >= 0); // Negative Index into String
114 if (start >= GetSize()) return 0;
115 int count = 0;
116 while (start+count<GetSize() && IsWhitespace(start+count)) count++;
117 return count;
118 }
119
120
CountWordsize(int start) const121 int cString::CountWordsize(int start) const {
122 assert(start >= 0); // Negative Index into String
123 if( start>=GetSize() ) return 0;
124 int count = 0;
125 while( start+count<GetSize() && !IsWhitespace(start+count) )
126 ++count;
127 return count;
128 }
129
130
CountLinesize(int start) const131 int cString::CountLinesize(int start) const
132 {
133 assert(start >= 0); // Negative Index into String
134 if( start>=GetSize() ) return 0;
135 int count = 0;
136 while( start+count<GetSize() && (*this)[start+count]!='\n' )
137 count++;
138 return count;
139 }
140
141
CountNumLines() const142 int cString::CountNumLines() const
143 {
144 int num_lines = 1;
145 for( int i=0; i<GetSize(); ++i ){
146 if( (*this)[i] == '\n' ) num_lines++;
147 }
148 return num_lines;
149 }
150
151
CountNumWords() const152 int cString::CountNumWords() const
153 {
154 int num_words = 0;
155 int pos = CountWhitespace(); // Skip initial whitespace.
156 while( pos<GetSize() ) {
157 pos += CountWordsize(pos);
158 pos += CountWhitespace(pos);
159 num_words++;
160 }
161 return num_words;
162 }
163
164
165 // -- Search --
Find(char in_char,int start) const166 int cString::Find(char in_char, int start) const
167 {
168 int pos = start;
169 assert (pos >= 0); // Negative Position: setting to 0
170 assert (pos <= GetSize()); // Position Past End of String: setting to end.
171 if (pos <= 0) pos = 0;
172 else if (pos > GetSize()) pos = GetSize();
173
174 while( pos < GetSize() ) {
175 if( (*this)[pos] == in_char) return pos; // Found!
176 pos++;
177 }
178 return -1; // Not Found
179 }
180
181
FindWord(const cString & in,int pos) const182 int cString::FindWord(const cString & in, int pos) const
183 {
184 assert (pos >= 0); // Negative Position: setting to 0
185 assert (pos <= GetSize()); // Position Past End of String: setting to end.
186 if (pos <= 0) pos = 0;
187 else if (pos > GetSize()) pos = GetSize();
188
189 // While there is enough space to find
190 while (pos != -1 && pos + in.GetSize() < GetSize()) {
191 cerr << in << " " << pos << endl;
192 if( (pos=Find(in, pos)) >= 0 ){ // try to find it
193 // if it's got whitespace on both sides, it's a word
194 if( ( pos==0 || IsWhitespace(pos-1) )
195 && ( pos==GetSize()-1 || IsWhitespace(pos+in.GetSize()) ) ){
196 return pos;
197 } else {
198 pos++; // go on and look further down
199 }
200 }
201 }
202 return -1;
203 }
204
205
GetWord(int word_id) const206 cString cString::GetWord(int word_id) const
207 {
208 // Find positon of word
209 int pos = 0;
210 int cur_word = 0;
211 while( pos<GetSize() && cur_word<word_id ) { // If this isn't the word
212 pos += CountWhitespace(pos); // Skip leading whitespace
213 pos += CountWordsize(pos); // Skip this word
214 cur_word++;
215 }
216 // Return GetWordAt position... (it will skip any leading whitespace)
217 return GetWordAt(pos);
218 }
219
220
GetWordAt(int start) const221 cString cString::GetWordAt(int start) const
222 {
223 int pos = start + CountWhitespace(start); // Skip past initial whitespace.
224 int word_size = CountWordsize(pos); // Get size of word
225 cString new_string(word_size); // Allocate new_string that size
226 for (int i = 0; i < word_size; i++) { // Copy the chars to new_string
227 new_string[i] = (*this)[pos + i];
228 }
229 return new_string;
230 }
231
IsWhitespace() const232 bool cString::IsWhitespace() const
233 {
234 for( int i=0; i < GetSize(); ++i){
235 if ( IsWhitespace(i) == false ) return false;
236 }
237 return true;
238 }
239
IsUpperLetter() const240 bool cString::IsUpperLetter() const
241 {
242 for (int i = 0; i < GetSize(); ++i) {
243 if ( IsUpperLetter(i) == false ) return false;
244 }
245 return true;
246 }
247
IsLowerLetter() const248 bool cString::IsLowerLetter() const
249 {
250 for (int i = 0; i < GetSize(); ++i) {
251 if ( IsLowerLetter(i) == false ) return false;
252 }
253 return true;
254 }
255
IsLetter() const256 bool cString::IsLetter() const
257 {
258 for (int i = 0; i < GetSize(); ++i) {
259 if ( IsLetter(i) == false ) return false;
260 }
261 return true;
262 }
263
IsNumber() const264 bool cString::IsNumber() const
265 {
266 for (int i = 0; i < GetSize(); ++i) {
267 if ( IsNumber(i) == false ) return false;
268 }
269 return true;
270 }
271
IsNumeric() const272 bool cString::IsNumeric() const
273 {
274 for (int i = 0; i < GetSize(); ++i) {
275 if ( IsNumber(i) == false ) return false;
276 }
277 return true;
278 }
279
IsAlphaNumeric() const280 bool cString::IsAlphaNumeric() const
281 {
282 for (int i = 0; i < GetSize(); ++i) {
283 if ( IsAlphaNumeric(i) == false ) return false;
284 }
285 return true;
286 }
287
288
289
Substring(int start,int size) const290 cString cString::Substring(int start, int size) const
291 {
292 assert(size >= 0); // Non-Positive Size
293 assert(start >= 0); // Negative Position
294 assert(start + size <= GetSize()); // Position+Size Past End of String
295
296 cString new_string(size);
297 for (int i=0; i<size; i++) {
298 new_string[i] = (*this)[i+start];
299 }
300 return new_string;
301 }
302
IsSubstring(const cString & in_string,int start) const303 bool cString::IsSubstring(const cString & in_string, int start) const
304 {
305 assert (start >= 0); // Negative start position
306
307 // If the potential sub-string won't fit, return false;
308 if ( start + in_string.GetSize() > GetSize() ) return false;
309
310 // Otherwise, check character by character.
311 for (int i = 0; i < in_string.GetSize(); i++) {
312 if ( (*this)[i+start] != in_string[i] ) return false;
313 }
314
315 return true;
316 }
317
318
319 // -- Modifiers --
320
Set(const char * fmt,...)321 cString& cString::Set(const char * fmt, ...)
322 {
323 va_list argp;
324 char buf[MAX_STRING_LENGTH];
325 va_start(argp, fmt);
326 vsprintf(buf, fmt, argp);
327 va_end(argp);
328 (*this) = buf;
329 return *this;
330 }
331
Set(const char * fmt,va_list args)332 cString& cString::Set(const char * fmt, va_list args)
333 {
334 char buf[MAX_STRING_LENGTH];
335 vsprintf(buf, fmt, args);
336 (*this) = buf;
337 return *this;
338 }
339
340
Replace(const cString & old_st,const cString & new_st,int pos)341 int cString::Replace(const cString & old_st, const cString & new_st, int pos)
342 {
343 int location;
344 if( (location=Find(old_st, pos)) >= 0 ){ // If we find the old_st
345 Insert(new_st, location, old_st.GetSize());
346 }
347 return location;
348 }
349
350
LeftJustify()351 int cString::LeftJustify()
352 {
353 int ws_count = CountWhitespace();
354 if( ws_count>0 )
355 InsertStr(0, NULL, 0, ws_count);
356 return ws_count;
357 }
358
359
RightJustify()360 int cString::RightJustify()
361 {
362 int ws_count = 0;
363 while( GetSize()-ws_count-1>0 && IsWhitespace(GetSize()-ws_count-1) )
364 ws_count++;
365 if( ws_count>0 )
366 InsertStr(0, NULL, GetSize()-ws_count, ws_count);
367 return ws_count;
368 }
369
Trim()370 void cString::Trim()
371 {
372 // Trim front
373 int ws_count = CountWhitespace();
374 if (ws_count > 0) InsertStr(0, NULL, 0, ws_count);
375
376 // Trim trailing
377 ws_count = 0;
378 while (GetSize() - ws_count - 1 > 0 && IsWhitespace(GetSize() - ws_count - 1)) ws_count++;
379 if (ws_count > 0) InsertStr(0, NULL, GetSize() - ws_count, ws_count);
380 }
381
382
Pop(const char delim)383 cString cString::Pop(const char delim)
384 {
385 int pos;
386 cString rv("");
387 if( (pos=Find(delim)) >= 0 ){ // if it is found at all
388 if( pos > 0 ){ // if the first char is not delim, return substring
389 rv = Substring(0,pos);
390 }
391 // Trim off the front
392 InsertStr(0, NULL, 0, pos+1);
393 }
394
395 // If the deliminator is *not* found, return the whole string.
396 else {
397 rv = *this;
398 *this = "";
399 }
400 return rv;
401 }
402
403
PopWord()404 cString cString::PopWord()
405 {
406 // If there is nothing here, there is nothing to be popped.
407 if (GetSize() == 0) return "";
408
409 const int start_pos = CountWhitespace();
410 const int word_size = CountWordsize(start_pos);
411
412 // If the string is not all whitespace, save the word we cut off...
413 cString rv("");
414 if (word_size > 0) rv = Substring(start_pos, word_size);
415
416 // Trim off the front
417 const int word_end = start_pos + word_size;
418 const int new_start = word_end + CountWhitespace(word_end);
419 InsertStr(0, NULL, 0, new_start); // Insert null in place of old word.
420
421 return rv;
422 }
423
424
ToLower()425 cString & cString::ToLower()
426 {
427 for (int pos = 0; pos < GetSize(); pos++) {
428 if( (*this)[pos] >= 'A' && (*this)[pos] <= 'Z' )
429 (*this)[pos] += 'a' - 'A';
430 }
431
432 return *this;
433 }
434
435
ToUpper()436 cString& cString::ToUpper()
437 {
438 for (int pos = 0; pos < GetSize(); pos++) {
439 if( (*this)[pos] >= 'a' && (*this)[pos] <= 'z' )
440 (*this)[pos] += 'A' - 'a';
441 }
442
443 return *this;
444 }
445
446
Reverse()447 void cString::Reverse()
448 {
449 cString new_st(GetSize());
450 for( int i=0; i<GetSize(); ++i ){
451 // new_st[i] = (*this)[GetSize()-i-1]; // @CAO Problem in new gcc
452 new_st[i] = value->GetData()[GetSize()-i-1];
453 }
454 (*this) = new_st;
455 }
456
457
CompressWhitespace()458 void cString::CompressWhitespace()
459 {
460 // Eats initial whitespace
461 // 2 pass method...
462 int i;
463 int pos = 0;
464 int start_pos = CountWhitespace();
465 int new_size = 0;
466 bool ws = false;
467
468 // count the number of characters that we will need in the new string
469 for( i=start_pos; i<GetSize(); ++i ){
470 if( IsWhitespace(i) ){ // if it whitespace...
471 if( ws == false ){ // if we arn't already in a whitespace block
472 ws = true; // we are now in a whitespace block
473 }
474 }else{ // it isn't whitespace, so count
475 if( ws==true ){ // if there was a whitespace block
476 ++new_size; // inc once for the block
477 ws = false;
478 }
479 ++new_size;
480 }
481 }
482
483 cString new_st(new_size); // Allocate new string
484
485 // Copy over the characters
486 // pos will be the location in new_st, while i is the index into this
487 ws = false;
488 for( i=start_pos; i<GetSize(); ++i ){
489 if( IsWhitespace(i) ){ // if it whitespace...
490 if( ws == false ){ // if we arn't already in a whitespace block
491 ws = true; // we are now in a whitespace block
492 }
493 }else{ // it isn't whitespace, so count
494 if( ws==true ){ // if there was a whitespace block
495 new_st[pos] = ' '; // put a space in for the whitespace block
496 ++pos; // inc once for the block
497 ws = false;
498 }
499 // new_st[pos] = (*this)[i]; // copy it & increment pos @CAO prob in gcc
500 new_st[pos] = value->GetData()[i]; // copy it & increment pos
501 ++pos;
502 }
503 }
504
505 (*this) = new_st; // assign the new_st to this
506 }
507
508
RemoveWhitespace()509 void cString::RemoveWhitespace()
510 {
511 int i;
512 int new_size = 0;
513 for (i = 0; i < GetSize(); ++i) { // count new size
514 if (!IsWhitespace(i)) ++new_size;
515 }
516 cString new_st(new_size); // allocate new string
517 int pos = 0;
518 for (i = 0; i < GetSize(); ++i) { // count new size
519 if (!IsWhitespace(i)) {
520 new_st[pos] = value->GetData()[i]; // copy it & increment pos @CAO prob in GCC
521 ++pos;
522 }
523 }
524 (*this) = new_st; // assign the new_st to this
525 }
526
527
RemoveChar(char out_char)528 void cString::RemoveChar(char out_char)
529 {
530 int i;
531 int new_size = 0;
532 for (i = 0; i < GetSize(); ++i) { // count new size
533 if (value->GetData()[i] != out_char) ++new_size;
534 }
535 cString new_st(new_size); // allocate new string
536 int pos = 0;
537 for(i = 0; i < GetSize(); ++i) { // count new size
538 if (value->GetData()[i] != out_char ) {
539 new_st[pos] = value->GetData()[i]; // copy it & increment pos @CAO prob in GCC
540 ++pos;
541 }
542 }
543 (*this) = new_st; // assign the new_st to this
544 }
545
RemovePos(int pos)546 void cString::RemovePos(int pos){
547 int i;
548 int new_size = GetSize() - 1;
549 cString new_st(new_size); // allocate new string
550 int newpos = 0;
551 for( i=0; i<GetSize(); ++i ){ // count new size
552 if( i != pos ){
553 new_st[newpos++] = value->GetData()[i]; // copy it & increment pos
554 }
555 }
556 (*this) = new_st; // assign the new_st to this
557 }
558
559
560
561
ParseEscapeSequences()562 cString& cString::ParseEscapeSequences()
563 {
564 int o_sz = GetSize();
565 char* newstr = new char[o_sz];
566 int sz = 0;
567
568 for (int i = 0; i < o_sz; i++) {
569 if ((*value)[i] == '\\') {
570 i++;
571 if (i == o_sz) break;
572
573 switch ((*value)[i]) {
574 case 'b': newstr[sz++] = '\b'; break;
575 case 'f': newstr[sz++] = '\f'; break;
576 case 'n': newstr[sz++] = '\n'; break;
577 case 'r': newstr[sz++] = '\r'; break;
578 case 't': newstr[sz++] = '\t'; break;
579 case '\\':
580 case '\x22':
581 case '\'':
582 case '?':
583 newstr[sz++] = (*value)[i]; break;
584 }
585 } else {
586 newstr[sz++] = (*value)[i];
587 }
588 }
589 newstr[sz] = '\0';
590
591 (*this) = newstr;
592 delete [] newstr;
593
594 return *this;
595 }
596
597
598 // -- Internal Methods --
599
AppendStr(const int in_size,const char * in)600 cString & cString::AppendStr(const int in_size, const char * in)
601 {
602 assert (in_size == 0 || in != NULL); // NULL input string
603
604 // Allocate a new string
605 tRCPtr<cStringData> new_value(new cStringData(GetSize() + in_size));
606 assert (new_value); // Memory Allocation Error: Out of Memory
607 for(int i=0; i<GetSize(); ++i ) { // Copy self up to pos
608 (*new_value)[i] = this->operator[](i);
609 }
610 for(int i=0; i<in_size; ++i ) { // Copy in
611 assert(in[i] != '\0'); // Input String Contains '\\0' or too Short
612 (*new_value)[i+GetSize()] = in[i];
613 }
614 value = new_value; // Reassign data to new data
615 return(*this);
616 }
617
618
InsertStr(const int in_size,const char * in,int pos,int excise)619 cString & cString::InsertStr(const int in_size, const char * in,
620 int pos, int excise )
621 {
622 // Inserts 'in' (of length 'in_size') at postition 'pos'
623 // Also excises 'excise' characters from 'pos'
624 // If 'in_size'==0 then 'in' can == NULL and only excise happens
625
626 // Validate inputs:
627 assert (in_size >= 0); // Negative input size
628 assert (pos >= 0); // Negative position
629 assert (pos <= GetSize()); // Position past end of string
630 assert (excise >= 0); // Negative excise
631 assert (excise <= GetSize()-pos); // Excise number too large
632 assert (excise > 0 || in_size > 0); // Make sure a change is made!
633 assert (in_size == 0 || in != NULL); // NULL input string
634
635 // Allocate a new string
636 const int new_size = GetSize() + in_size - excise;
637 tRCPtr<cStringData> new_value(new cStringData(new_size));
638 assert (new_value); // Memory Allocation Error: Out of Memory
639
640 for(int i = 0; i < pos; ++i ){ // Copy self up to pos
641 (*new_value)[i] = this->operator[](i);
642 }
643 for(int i = 0; i < in_size; ++i ){ // Copy in
644 assert( in[i] != '\0'); // Input String Contains '\\0' or too Short
645 (*new_value)[i+pos] = in[i];
646 }
647 for(int i=pos+excise; i<GetSize(); ++i ){ // Copy rest of self
648 (*new_value)[i+in_size-excise] = this->operator[](i);
649 }
650
651 value = new_value; // Reassign data to new data
652 return *this;
653 }
654
655
EjectStr(int pos,int excise)656 cString cString::EjectStr(int pos, int excise )
657 {
658 // Delete excise characters at pos and return the substring.
659
660 // Validate inputs:
661 assert (pos >= 0); // Negative position
662 assert (pos <= GetSize()); // Position past end of string
663 assert (excise > 0); // Must excise something...
664 assert (excise <= GetSize()-pos); // Excise number too large
665
666 // Collect substring to output.
667 cString out_string(Substring(pos, excise));
668
669 // Allocate a new string
670 const int new_size = GetSize() - excise;
671 tRCPtr<cStringData> new_value(new cStringData(new_size));
672 assert (new_value); // Memory Allocation Error: Out of Memory
673
674 for(int i = 0; i < pos; i++){ // Copy self up to pos
675 (*new_value)[i] = this->operator[](i);
676 }
677 for(int i=pos+excise; i<GetSize(); ++i ){ // Copy post-excise self
678 (*new_value)[i-excise] = this->operator[](i);
679 }
680
681 value = new_value; // Reassign data to new data
682 return out_string;
683 }
684
685
FindStr(const char * in,const int in_size,int pos) const686 int cString::FindStr(const char * in, const int in_size, int pos) const
687 {
688 assert (pos>=0); // Negative position
689 assert (pos<=GetSize()); // Position past end of string
690
691 while (pos < GetSize()) {
692 if( GetSize()-pos < in_size ) return -1; // Too near this string's end.
693 if( (*this)[pos] == in[0] ){
694 // see if we have found the string...
695 int i;
696 for( i = 1; i < in_size; i++ ){
697 assert (pos+i < GetSize()); // Reached end of (*this) in Find
698 assert (in[i] != '\0'); // Reached end of 'in' in Find
699 if( (*this)[pos + i] != in[i] ) break; // Match failure!
700 }
701 // If we have made it fully through the loop, we have found a match!
702 if( i == in_size ) return pos;
703 }
704 pos++;
705 }
706 return -1;
707 }
708
709
710
711 // {{{ ** External cString Functions **
712
operator >>(istream & in,cString & string)713 istream & operator >> (istream & in, cString & string)
714 {
715 char buf[MAX_STRING_LENGTH];
716 in>>buf;
717 string=buf;
718 return in;
719 }
720
operator <<(ostream & out,const cString & string)721 ostream& operator << (ostream& out, const cString& string)
722 {
723 out << static_cast<const char*>(string);
724 return out;
725 }
726