1 /*
2 
3  HyPhy - Hypothesis Testing Using Phylogenies.
4 
5  Copyright (C) 1997-now
6  Core Developers:
7  Sergei L Kosakovsky Pond (sergeilkp@icloud.com)
8  Art FY Poon    (apoon42@uwo.ca)
9  Steven Weaver (sweaver@temple.edu)
10 
11  Module Developers:
12  Lance Hepler (nlhepler@gmail.com)
13  Martin Smith (martin.audacis@gmail.com)
14 
15  Significant contributions from:
16  Spencer V Muse (muse@stat.ncsu.edu)
17  Simon DW Frost (sdf22@cam.ac.uk)
18 
19  Permission is hereby granted, free of charge, to any person obtaining a
20  copy of this software and associated documentation files (the
21  "Software"), to deal in the Software without restriction, including
22  without limitation the rights to use, copy, modify, merge, publish,
23  distribute, sublicense, and/or sell copies of the Software, and to
24  permit persons to whom the Software is furnished to do so, subject to
25  the following conditions:
26 
27  The above copyright notice and this permission notice shall be included
28  in all copies or substantial portions of the Software.
29 
30  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
31  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
33  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
34  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
35  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
36  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37 
38  */
39 
40 #include <string.h>
41 #include <stdio.h>
42 #include <ctype.h>
43 #include <time.h>
44 
45 #include "global_things.h"
46 #include "hy_strings.h"
47 #include "batchlan.h"
48 #include "mersenne_twister.h"
49 #include "function_templates.h"
50 #include "hy_string_buffer.h"
51 
52 
53 _String   compileDate = __DATE__;
54 
55 using namespace hy_global;
56 
57 struct _hy_Valid_ID_Chars_Type {
58   unsigned char valid_chars[256];
59 
is_valid_first_hy_Valid_ID_Chars_Type60   inline bool is_valid_first (unsigned char c) const {
61     return valid_chars[c] == 2;
62   }
63 
is_valid_hy_Valid_ID_Chars_Type64   inline bool is_valid (unsigned char c) const {
65     return valid_chars[c] > 0;
66   }
67 
_hy_Valid_ID_Chars_Type_hy_Valid_ID_Chars_Type68   _hy_Valid_ID_Chars_Type (void) {
69     for (int c = 0; c < 256; c++) {
70       valid_chars[c] = 0;
71     }
72     for (unsigned char c = 'a'; c <= 'z'; c++) {
73       valid_chars[c] = 2;
74     }
75     for (unsigned char c = 'A'; c <= 'Z'; c++) {
76       valid_chars[c] = 2;
77     }
78     for (unsigned char c = '0'; c <= '9'; c++) {
79       valid_chars[c] = 1;
80     }
81     valid_chars[(unsigned char) '_'] = 2;
82 
83   }
84 } hy_Valid_ID_Chars;
85 
86 
87 /*
88 ==============================================================
89 Constructors/Destructors/Copiers
90 ==============================================================
91 */
92 
_String(void)93 _String::_String (void) {
94   _String::Initialize();
95 }
96 
97 //=============================================================
98 
Initialize(bool)99 void _String::Initialize (bool) {
100     s_length = 0UL;
101     s_data = nil;
102 }
103 
104 //=============================================================
105 
Clear(void)106 void _String::Clear (void) {
107   s_length = 0UL;
108   if (s_data) {
109     free (s_data);
110     s_data = nil;
111   }
112 }
113 
114 //=============================================================
115 
_String(long const number)116 _String::_String(long const number) {
117     char s[64];
118     s_length = snprintf(s, sizeof(s), "%ld", number);
119     AllocateAndCopyString (s, s_length);
120 }
121 
122 //=============================================================
123 
_String(const unsigned long sL,char * buffer)124 _String::_String(const unsigned long sL, char *buffer) {
125     s_length = sL;
126     if (buffer) {
127         s_data = buffer;
128         AddAReference();
129     } else {
130         s_data = (char*) MemAllocate(sL + 1L, true);
131     }
132     s_data[sL] = (char)0;
133 }
134 
135 //=============================================================
136 
_String(const hyFloat val,const char * format)137 _String::_String(const hyFloat val, const char *format) {
138     char s_val[128];
139     s_length = snprintf(s_val, 128, format ? format : PRINTF_FORMAT_STRING, val);
140     AllocateAndCopyString (s_val, s_length);
141 }
142 
143 //=============================================================
144 
_String(const hyFloat val,unsigned char digits)145 _String::_String(const hyFloat val, unsigned char digits) {
146     char format_str[64];
147     if (digits > 0) {
148         snprintf(format_str, 64, "%%.%dg", MIN(digits, 20));
149     } else {
150         snprintf(format_str, 64, "%%g");
151     }
152     char s_val[128];
153     s_length = snprintf(s_val, 128, format_str, val);
154     AllocateAndCopyString (s_val, s_length);
155 }
156 
157 //=============================================================
158 
_String(const _String & s)159 _String::_String(const _String &s) {
160     _String::Initialize ();
161     _String::Duplicate(& s);
162 }
163 
164 //=============================================================
165 
_String(_String && s)166 _String::_String(_String &&s) {
167   s_length = s.s_length;
168   s_data = s.s_data;
169   s.Initialize();
170 }
171 
172   //=============================================================
173 
_String(_StringBuffer && s)174 _String::_String(_StringBuffer &&s) {
175   s_length = s.s_length;
176   s.TrimSpace();
177   s_data = s.s_data;
178   s._String::Initialize();
179   s.Initialize();
180 }
181 
182 
183 //=============================================================
184 
_String(_String * s,bool dynamic)185 _String::_String(_String *s, bool dynamic) {
186     if (s->CanFreeMe ()) {
187         s_data       = s->s_data;
188         s_length     = s->s_length;
189         s->s_data    = nil;
190         if (dynamic) {
191             DeleteObject (s);
192         }
193     } else {
194         AllocateAndCopyString (s->s_data, s->s_length);
195         if (dynamic) {
196             s->RemoveAReference();
197         }
198     }
199 }
200 
201 
202 //=============================================================
_String(const _String & source,long start,long end)203 _String::_String(const _String &source, long start, long end) {
204     if (source.s_length) {
205 
206         long requested_range = source.NormalizeRange(start, end);
207 
208         if (requested_range > 0L) {
209             AllocateAndCopyString (source.s_data + start, requested_range);
210             return;
211 
212         }
213     }
214 
215     s_length = 0UL;
216     s_data = (char *)MemAllocate(1UL);
217     s_data[0] = '\0';
218 
219 }
220 
221 //=============================================================
222 
_String(const char * c_string)223 _String::_String(const char *c_string) {
224     AllocateAndCopyString (c_string, strlen(c_string));
225 }
226 
227 //=============================================================
_String(const wchar_t * wc_string)228 _String::_String(const wchar_t *wc_string) {
229     unsigned long allocated = wcslen (wc_string);
230     s_length = 0UL;
231     s_data = (char *)MemAllocate(allocated + 1UL);
232     for (unsigned long cid = 0UL; cid < allocated; cid ++) {
233         int this_char = wctob (wc_string[cid]);
234         if (this_char != WEOF) {
235             s_data[s_length++] = (char) this_char;
236         }
237     }
238     if (s_length != allocated) {
239         s_data = (char *)MemReallocate((char *)s_data, (s_length+1) * sizeof(char));
240     }
241     s_data[s_length] = '\0';
242 }
243 
244 //=============================================================
_String(const char c)245 _String::_String(const char c) {
246     s_length = 1UL;
247     s_data = (char *)MemAllocate(2UL);
248     s_data[0] = c;
249     s_data[1] = '\0';
250 }
251 
252 //=============================================================
_String(const _String & str,unsigned long copies)253 _String::_String (const _String& str, unsigned long copies) {
254     s_length = copies * str.s_length;
255     s_data = (char*)MemAllocate (s_length+1UL);
256     if (s_length > 0UL) {
257         for (unsigned long i = 0UL; i < copies; i++) {
258             memcpy (s_data + i * str.s_length, str.s_data, str.s_length);
259         }
260     }
261     s_data[s_length]='\0';
262 }
263 
264 //=============================================================
_String(FILE * file,long read_this_many)265 _String::_String(FILE * file, long read_this_many) {
266     _String::Initialize ();
267     if (file) {
268         if (read_this_many < 0) {
269           fseek(file, 0, SEEK_END);
270           s_length = (unsigned long) ftell(file);
271           rewind(file);
272         } else {
273           s_length = read_this_many;
274         }
275         s_data = (char *)MemAllocate(s_length + 1UL);
276         unsigned long read_items = fread(s_data, 1, s_length, file);
277         if (read_items < s_length) {
278           s_data = (char*)MemReallocate(s_data,read_items+1);
279           s_length = read_items;
280         }
281         s_data[s_length] = '\0';
282     }
283 }
284 
285 //=============================================================
~_String(void)286 _String::~_String(void) {
287     if (CanFreeMe()) {
288         if (s_data) {
289             free (s_data);
290             s_data = nil;
291         }
292         s_length = 0UL;
293     } else {
294         RemoveAReference();
295     }
296 }
297 
298 //=============================================================
makeDynamic(void) const299 BaseRef _String::makeDynamic (void) const {
300     _String * r = new _String;
301     r->Duplicate(this);
302     return r;
303 }
304 
305 //=============================================================
Duplicate(BaseRefConst ref)306 void    _String::Duplicate (BaseRefConst ref) {
307     if (s_data) {
308         free (s_data);
309     }
310 
311     _String const * s = (_String const*)ref;
312 
313     s_length = s->s_length;
314     s_data   = s->s_data;
315 
316     if (s_data) {
317         AllocateAndCopyString (s->s_data, s_length);
318     }
319 }
320 
321 //=============================================================
operator =(_String const & s)322 void _String::operator = (_String const& s) {
323     if (&s != this) Duplicate (&s);
324 }
325 
326 //=============================================================
operator =(_String && rhs)327 void _String::operator = (_String && rhs) {
328     if (this != &rhs) {
329         if (s_data) {
330             free (s_data);
331         }
332         s_data = rhs.s_data;
333         s_length = rhs.s_length;
334         rhs.s_data = nil;
335     }
336 }
337 
338 
339 
340 /*
341  ==============================================================
342  Private helpers
343  ==============================================================
344  */
345 
NormalizeRange(long & from,long & to) const346 long  _String::NormalizeRange(long & from, long & to) const {
347 
348     if (s_length == 0UL) {
349         return 0L;
350     }
351 
352     if (from < 0L) {
353         from = 0L;
354     }
355 
356     if (to < 0L || to >= s_length ) {
357         to = s_length - 1UL;
358     }
359 
360     return to - from + 1L;
361 
362 }
363 
364 //=============================================================
365 
AllocateAndCopyString(const char * source_string,unsigned long length)366 void _String::AllocateAndCopyString (const char * source_string, unsigned long length) {
367     s_length = length;
368     s_data = (char*) MemAllocate (length+1UL);
369     //if (s_length) {
370         memcpy (s_data, source_string, length);
371     //}
372     s_data [length] = '\0';
373 }
374 
375 
376 /*
377 ==============================================================
378 Getters and setters
379 ==============================================================
380 */
381 
operator [](long index)382 char& _String::operator [] (long index) {
383     if (index < s_length && index >= 0L) {
384         return s_data[index];
385     }
386     HandleApplicationError (_String ("Internal error at ") & __PRETTY_FUNCTION__ & ": an invalid index requested");
387     return s_data[0];
388 }
389 
390 //=============================================================
391 
392 
operator ()(long index) const393 char _String::operator () (long index) const {
394   if (index >= 0L && index < s_length) {
395     return s_data[index];
396   }
397   if (index < 0L && -index <= s_length) {
398     return s_data[s_length + index];
399   }
400   return default_return;
401 }
402 
403 //=============================================================
404 
set_char(unsigned long index,char const data)405 void _String::set_char (unsigned long index, char const data) {
406     if (index < s_length) {
407         s_data[index] = data;
408     }
409 }
410 
411 //=============================================================
412 
get_str(void) const413 const char *_String::get_str(void) const { return s_data; }
414 
415 /*
416  ==============================================================
417  Type conversions
418  ==============================================================
419  */
420 
421 
operator const char*(void) const422 _String::operator const char *(void) const { return s_data; }
423 
424 //=============================================================
425 
to_float(void) const426 hyFloat _String::to_float (void) const{
427   if (s_length == 0UL) {
428     return 0.;
429   }
430   char *endP;
431   return strtod(s_data, &endP);
432 }
433 
434 //=============================================================
435 
to_long(void) const436 long _String::to_long (void) const {
437   if (s_length == 0UL) {
438     return 0L;
439   }
440   char * endP;
441   return strtol(s_data,&endP,10);
442 }
443 
444 //=============================================================
445 
toStr(unsigned long)446 BaseRef _String::toStr (unsigned long) {
447   AddAReference();
448   return this;
449 }
450 
451 //=============================================================
452 
FormatTimeString(long time_diff)453 const _String _String::FormatTimeString(long time_diff){
454 
455   long fields [3] = {time_diff / 3600L, time_diff / 60L % 60L, time_diff % 60L};
456 
457   _StringBuffer time_string;
458 
459   for (unsigned long l = 0; l < 3UL; l++) {
460     if (l) {
461       time_string << ':';
462     }
463     if (fields[l] < 10L) {
464       time_string << '0';
465     }
466     time_string << _String (fields[l]);
467   }
468 
469   return time_string;
470 }
471 
472 /*
473  ==============================================================
474  Comparisons
475  ==============================================================
476  */
477 
Compare(_String const & rhs) const478 hyComparisonType _String::Compare(_String const& rhs) const {
479 
480     if (s_length <= rhs.s_length) {
481         for (unsigned long i = 0UL; i < s_length; i++) {
482             int diff = s_data[i] - rhs.s_data[i];
483 
484             if (diff < 0) {
485                 return kCompareLess;
486             } else {
487                 if (diff > 0) {
488                     return kCompareGreater;
489                 }
490             }
491         }
492 
493         if (s_length == rhs.s_length) {
494             return kCompareEqual;
495         }
496         return kCompareLess;
497     } else {
498 
499         for (unsigned long i = 0UL; i < rhs.s_length; i++) {
500             int diff = s_data[i] - rhs.s_data[i];
501 
502             if (diff < 0) {
503                 return kCompareLess;
504             } else {
505                 if (diff > 0) {
506                     return kCompareGreater;
507                 }
508             }
509         }
510         return kCompareGreater;
511     }
512 
513     /*
514     unsigned long up_to = MIN (s_length, rhs.s_length);
515 
516     for (unsigned long i = 0UL; i < up_to; i++) {
517         if (s_data[i] < rhs.s_data[i]) {
518             return kCompareLess;
519         }
520         if (s_data[i] > rhs.s_data[i]) {
521             return kCompareGreater;
522         }
523     }
524 
525     if (s_length == rhs.s_length) {
526         return kCompareEqual;
527     }
528 
529     return s_length < rhs.s_length ? kCompareLess : kCompareGreater;*/
530 }
531 
532 //=============================================================
533 
CompareIgnoringCase(_String const & rhs) const534 hyComparisonType _String::CompareIgnoringCase(_String const& rhs) const {
535     unsigned long up_to = MIN (s_length, rhs.s_length);
536 
537     for (unsigned long i = 0UL; i < up_to; i++) {
538 
539         char llhs = tolower (s_data[i]), lrhs = tolower (rhs.s_data[i]);
540 
541 
542         if (llhs < lrhs) {
543             return kCompareLess;
544         }
545         if (llhs > lrhs) {
546             return kCompareGreater;
547         }
548     }
549 
550     if (s_length == rhs.s_length) {
551         return kCompareEqual;
552     }
553 
554     return s_length < rhs.s_length ? kCompareLess : kCompareGreater;
555 }
556 
557 //=============================================================
558 
operator ==(const _String & s) const559 bool _String::operator==(const _String& s) const { return Compare  (s) == kCompareEqual; }
operator >(const _String & s) const560 bool _String::operator>(const _String & s) const { return Compare  (s) == kCompareGreater; }
operator <=(const _String & s) const561 bool _String::operator<=(const _String & s) const { return Compare (s) != kCompareGreater; }
operator >=(const _String & s) const562 bool _String::operator>=(const _String & s) const { return Compare (s) != kCompareLess; }
operator !=(const _String & s) const563 bool _String::operator!=(const _String & s) const { return Compare (s) != kCompareEqual; }
operator <(const _String & s) const564 bool _String::operator<(const _String & s) const { return Compare  (s) == kCompareLess; }
565 
Equal(const _String & s) const566 bool _String::Equal(const _String& s) const { return Compare  (s) == kCompareEqual; };
EqualIgnoringCase(const _String & s) const567 bool _String::EqualIgnoringCase(const _String& s) const { return CompareIgnoringCase  (s) == kCompareEqual; };
Equal(const char c) const568 bool _String::Equal(const char c) const {
569     return s_length == 1UL && s_data[0] == c;
570 }
571 
572 //=============================================================
573 
574 
EqualWithWildChar(const _String & pattern,const char wildchar,unsigned long start_this,unsigned long start_pattern,_SimpleList * wildchar_matches) const575 bool _String::EqualWithWildChar(const _String& pattern, const char wildchar, unsigned long start_this, unsigned long start_pattern, _SimpleList * wildchar_matches) const {
576     // wildcards only matter in the second string
577 
578     if (pattern.s_length > start_pattern && wildchar != '\0') {
579         unsigned long   match_this_char = start_pattern;
580         // the position we are currently trying to match in the pattern
581 
582         bool            is_wildcard = pattern.s_data[match_this_char] == wildchar,
583         scanning_pattern = is_wildcard;
584 
585         unsigned long i = start_this;
586         // the position we are currently trying to match in *this
587         long last_matched_char = (long)start_this - 1L;
588         // the index of the last character in *this that was matched to something other than the wildcard
589 
590         while (i <= s_length) {
591             if (scanning_pattern) { // skip consecutive wildcards in "pattern"
592                 scanning_pattern = pattern.s_data[++match_this_char] == wildchar;
593             } else {
594                 if (s_data[i] == pattern.s_data[match_this_char]) {
595                     if (is_wildcard) {
596                         // could either match the next character or consume it into the wildcard
597                         if (wildchar_matches) {
598                             // record the current wildcard match
599                             // if this is the last (0) character, return true
600                             long rollback_checkpoint = wildchar_matches->countitems();
601                             if (last_matched_char + 1 < i) { // something get matched to the wildchard
602                                 *wildchar_matches << (last_matched_char+1) << (i-1);
603                             }
604                             if (i == s_length) {
605                                 return true;
606                             }
607                             if (EqualWithWildChar (pattern, wildchar, i, match_this_char, wildchar_matches)) {
608                                 // matching worked
609                                 return true;
610                             } else { // consume the character into the wildcard
611                                 i++;
612                                 for (long k = wildchar_matches->countitems() - rollback_checkpoint; k >= 0; k--) {
613                                     wildchar_matches->Pop();
614                                 }
615                                 continue;
616                             }
617                         } else {
618                             if (EqualWithWildChar (pattern, wildchar, i, match_this_char)) {
619                                 // matching worked
620                                 return true;
621                             } else { // consume the character into the wildcard
622                                 i++;
623                                 continue;
624                             }
625                         }
626                     } else {
627                         // try character match
628                         // note that the terminal '0' characters will always match, so
629                         // this is where we terminate
630                         if (wildchar_matches) {
631                             if (last_matched_char + 1 < i) { // something get matched to the wildchard
632                                 *wildchar_matches << (last_matched_char+1) << (i-1);
633                             }
634                             last_matched_char = i;
635                         }
636                         i++;
637                         match_this_char++;
638                         if (i > s_length || match_this_char > pattern.s_length) {
639                             break;
640                         }
641                         // TODO check to see if this will return true strings that match the pattern and
642                         // have some left-over stuff, like
643                         // "tree.node.a.b" might incorrectly match "tree.?.a"
644                         is_wildcard =  pattern.s_data[match_this_char] == wildchar;
645                         scanning_pattern = is_wildcard;
646                     }
647                 } else { // match wildcard
648                     if (!is_wildcard) {
649                         return false;
650                     }
651                     scanning_pattern = false;
652                     i++;
653                 }
654             }
655         }
656 
657         if (wildchar_matches) {
658             if (last_matched_char + 1 < i) { // something get matched to the wildchard
659                 *wildchar_matches << (last_matched_char+1) << (i-1);
660             }
661         }
662 
663         return match_this_char > pattern.s_length;
664     } else {
665         return s_length == start_this;
666     }
667 
668     return false;
669 }
670 
671 
672 /*
673  ==============================================================
674  Content-modification and extraction methods
675  ==============================================================
676  */
677 
678 
679 //=============================================================
680 
681 
682 //Append operator
operator &(const _String & rhs) const683 _String _String::operator & (const _String& rhs) const {
684     unsigned long combined_length = s_length + rhs.s_length;
685 
686     if (combined_length == 0UL) {
687         return kEmptyString;
688     }
689 
690     _String res(combined_length);
691 
692     if (s_length && s_data) {
693         memcpy(res.s_data, s_data, s_length);
694     }
695 
696     if (rhs.s_length && rhs.s_data) {
697         memcpy(res.s_data + s_length, rhs.s_data, rhs.s_length);
698     }
699 
700     res.s_data[res.s_length] = '\0';
701     return res;
702 }
703 
704 //=============================================================
705 
Chop(long start,long end) const706 _String _String::Chop(long start, long end) const{
707 
708     long resulting_length = NormalizeRange(start,end);
709 
710     if (resulting_length > 0L) {
711         _String res((unsigned long)(s_length - resulting_length));
712         if (start > 0L) {
713             memcpy(res.s_data, s_data, start);
714         }
715         if (end + 1L < s_length) {
716             memcpy(res.s_data + start, s_data + end + 1L, s_length - end - 1L);
717         }
718 
719         return res;
720     }
721 
722     return *this;
723 
724 }
725 
726 //=============================================================
727 
Cut(long start,long end) const728 _String _String::Cut(long start, long end) const {
729     return _String (*this, start, end);
730 }
731 
732 //=============================================================
733 
Delete(long start,long end)734 void _String::Delete(long start, long end) {
735     long resulting_length = NormalizeRange(start,end);
736 
737     if (resulting_length > 0L) {
738         if (end < (long)s_length - 1UL) {
739             memmove(s_data + start, s_data + end + 1L, s_length - end - 1L);
740         }
741         s_length -= resulting_length;
742         s_data = (char*)MemReallocate(s_data, sizeof(char) * (s_length + 1UL));
743         s_data[s_length] = '\0';
744     }
745 }
746 
747 //=============================================================
748 
Flip(void)749 void _String::Flip(void) {
750     for (unsigned long i = 0UL; i < (s_length >> 1); i++) {
751         char c;
752         SWAP  (s_data[i], s_data[s_length - 1 - i], c);
753     }
754 }
755 
756 //=============================================================
757 
Reverse(void) const758 _String _String::Reverse(void) const {
759 
760     _String result (*this);
761     for (unsigned long s = 0UL, e = s_length - 1L;  s < s_length; s++, e--) {
762         result.s_data[s] = s_data[e];
763     }
764     return result;
765 }
766 
767 //=============================================================
768 
Insert(char c,long where)769 void _String::Insert(char c, long where) {
770     if (where < 0L || where >= s_length) {
771         where = s_length;
772     }
773 
774     s_data = (char*)MemReallocate(s_data, sizeof(char) * (s_length + 2UL));
775 
776     if (where < s_length) {
777         memmove(s_data + where + 1UL, s_data + where, s_length - where);
778     }
779 
780     s_data[where] = c;
781     s_data[++s_length] = '\0';
782 }
783 
784 //=============================================================
785 
Trim(long start,long end)786 void _String::Trim(long start, long end) {
787     long resulting_length = NormalizeRange(start, end);
788     /*if (s_length >= 5000 && start > 0) {
789         printf ("\nLong trim %d %d %d\n", s_length, start, end);
790     }*/
791 
792     if (resulting_length > 0L) {
793         if (start > 0L) {
794             memmove(s_data, s_data + start, resulting_length);
795         }
796         if (s_length != resulting_length) {
797             s_length = resulting_length;
798             s_data = (char*)MemReallocate(s_data, resulting_length + 1UL);
799             s_data[resulting_length] = '\0';
800         }
801     } else {
802         s_length = 0UL;
803         s_data = (char*)MemReallocate(s_data, 1UL);
804         s_data[0] = '\0';
805     }
806 }
807 
808 //=============================================================
809 
ChangeCase(hy_string_case conversion_type) const810 const _String    _String::ChangeCase (hy_string_case conversion_type) const {
811   _String result (s_length);
812 
813   auto conversion_function = conversion_type == kStringUpperCase ? toupper : tolower;
814 
815   for (unsigned long i = 0UL; i<s_length; i++) {
816     result.s_data [i] = conversion_function (s_data[i]);
817   }
818 
819   return result;
820 }
821 
822 //=============================================================
823 
ChangeCaseInPlace(hy_string_case conversion_type)824 void   _String::ChangeCaseInPlace (hy_string_case conversion_type) {
825 
826   auto conversion_function = conversion_type == kStringUpperCase ? toupper : tolower;
827 
828   for (unsigned long i = 0UL; i<s_length; i++) {
829     s_data[i] = conversion_function (s_data[i]);
830   }
831 
832 }
833 
834 //=============================================================
835 
Tokenize(const _String & splitter) const836 const _List _String::Tokenize(const _String& splitter) const {
837   _List tokenized;
838 
839   long cp = 0L, cpp;
840   while ((cpp = Find(splitter, cp)) != kNotFound) {
841     if (cpp > cp) {
842       tokenized < new _String(*this, cp, cpp - 1L);
843     } else {
844       tokenized < new _String;
845     }
846 
847     cp = cpp + splitter.s_length;
848   }
849 
850   tokenized < new _String(*this, cp, kStringEnd);
851   return tokenized;
852 }
853 
854   //=============================================================
855 
Tokenize(bool const splitter[256]) const856 const _List _String::Tokenize(bool const splitter[256]) const {
857   _List tokenized;
858 
859   long cp = 0L, cpp;
860   while ((cpp = Find(splitter, cp)) != kNotFound) {
861     if (cpp > cp) {
862       tokenized < new _String(*this, cp, cpp - 1L);
863     } else {
864       tokenized < new _String;
865     }
866 
867     cp = cpp + 1;
868   }
869 
870   tokenized < new _String(*this, cp);
871   return tokenized;
872 }
873 
874 //=============================================================
875 
Enquote(char quote_char) const876 const _String _String::Enquote (char quote_char) const {
877   return _StringBuffer (2UL + s_length) << quote_char  << *this << quote_char;
878 }
879 
880 //=============================================================
881 
Enquote(char open_char,char close_char) const882 const _String _String::Enquote (char open_char, char close_char) const {
883     return _StringBuffer (2UL + s_length) << open_char  << *this << close_char;
884 }
885 
886 //=============================================================
887 
KillSpaces(void) const888 const _String _String::KillSpaces(void) const {
889   _StringBuffer temp(s_length + 1UL);
890   for (unsigned long k = 0UL; k < s_length; k++) {
891     if (!isspace(s_data[k])) {
892       temp << s_data[k];
893     }
894   }
895   return temp;
896 }
897 
898 //=============================================================
899 
CompressSpaces(void) const900 const _String _String::CompressSpaces(void) const {
901 
902   _StringBuffer temp(s_length + 1UL);
903   bool skipping = false;
904 
905   for (unsigned long k = 0UL; k < s_length; k++) {
906     if (!isspace(s_data[k])) {
907       temp << s_data[k];
908       skipping = false;
909     } else {
910       if (!skipping) {
911         skipping = true;
912         temp << ' ';
913       }
914     }
915   }
916   return temp;
917 }
918 
919 
920 /*
921  ==============================================================
922  Search Functions
923  ==============================================================
924 */
925 
Find(const _String & pattern,long start,long end) const926 long _String::Find(const _String& pattern, long start, long end) const {
927 
928   if (pattern.s_length) {
929     long span = NormalizeRange(start, end);
930     if (span >= (long) pattern.s_length) {
931       const unsigned long upper_bound = end - pattern.s_length + 2L;
932       for (unsigned long i = start; i < upper_bound ; i++) {
933         unsigned long j = 0UL;
934         for (;j < pattern.s_length; j++) {
935           if (s_data[i + j] != pattern.s_data[j]) {
936             break;
937           }
938         }
939         if (j == pattern.s_length) {
940           return i;
941         }
942       }
943     }
944   }
945   return kNotFound;
946 }
947 
948 //=============================================================
949 
FindBackwards(const _String & pattern,long start,long end) const950 long _String::FindBackwards(const _String& pattern, long start, long end) const {
951 
952   if (pattern.s_length) {
953     long span = NormalizeRange(start, end);
954     if (span >= (long) pattern.s_length) {
955       const long upper_bound = end - pattern.s_length + 1L;
956       for (long i = upper_bound; i >= start; i--) {
957         unsigned long j = 0UL;
958         for (;j < pattern.s_length; j++) {
959           if (s_data[i + j] != pattern.s_data[j]) {
960             break;
961           }
962         }
963         if (j == pattern.s_length) {
964           return i;
965         }
966       }
967     }
968   }
969   return kNotFound;
970 }
971 
972 //=============================================================
973 
Find(const char p,long start,long end) const974 long _String::Find(const char p, long start, long end) const {
975   if (s_length) {
976     long span = NormalizeRange(start, end);
977     if (span > 0L) {
978       char sentinel = s_data[end+1];
979       s_data[end+1L] = p;
980       long index = start;
981       while (s_data[index] != p) {
982         index++;
983       }
984       s_data[end+1L] = sentinel;
985       return index <= end ? index : kNotFound;
986     }
987   }
988 
989   return kNotFound;
990 }
991 
992   //=============================================================
993 
Find(const bool lookup[256],long start,long end) const994 long _String::Find(const bool lookup[256], long start, long end) const {
995   if (s_length) {
996     long span = NormalizeRange(start, end);
997     if (span > 0L) {
998 
999       for (unsigned long index = start; index <= end; index ++ ) {
1000         if (lookup [s_data[index]]) {
1001           return index;
1002         }
1003       }
1004     }
1005   }
1006 
1007   return kNotFound;
1008 }
1009 
1010 //=============================================================
1011 
FindAnyCase(const bool lookup[256],long start,long end) const1012 long _String::FindAnyCase (const bool lookup[256], long start, long end) const {
1013   if (s_length) {
1014     long span = NormalizeRange(start, end);
1015     if (span > 0L) {
1016 
1017       for (unsigned long index = start; index <= end; index ++ ) {
1018         if (lookup [tolower(s_data[index])] || lookup [toupper (s_data[index])]) {
1019           return index;
1020         }
1021       }
1022     }
1023   }
1024 
1025   return kNotFound;
1026 }
1027 
1028 
1029 //=============================================================
FindAnyCase(const _String & pattern,long start,long end) const1030 long _String::FindAnyCase (const _String& pattern, long start, long end) const {
1031 
1032   if (pattern.s_length) {
1033     long span = NormalizeRange(start, end);
1034     if (span >= (long) pattern.s_length) {
1035       const unsigned long upper_bound = end - pattern.s_length + 2L;
1036       for (unsigned long i = start; i < upper_bound ; i++) {
1037         unsigned long j = 0UL;
1038         for (;j < pattern.s_length; j++) {
1039           if (toupper (s_data[i + j]) != toupper (pattern.s_data[j])) {
1040             break;
1041           }
1042         }
1043         if (j == pattern.s_length) {
1044           return i;
1045         }
1046       }
1047     }
1048   }
1049   return kNotFound;
1050 }
1051 //=============================================================
1052 
Replace(const _String & pattern,const _String & replace,bool replace_all) const1053 const _String _String::Replace(const _String& pattern, const _String& replace, bool replace_all) const {
1054 
1055   if (s_length < pattern.s_length || pattern.s_length == 0UL) {
1056     return *this;
1057   }
1058 
1059   _StringBuffer replacement_buffer;
1060   unsigned long anchor_index = 0UL;
1061   for (; anchor_index <= s_length - pattern.s_length; anchor_index ++) {
1062     unsigned long search_index = 0UL;
1063     for (; search_index < pattern.s_length; search_index++) {
1064       if (s_data[anchor_index + search_index] != pattern.s_data[search_index]) {
1065         break;
1066       }
1067     }
1068 
1069     if (search_index == pattern.s_length) {
1070       replacement_buffer << replace;
1071       anchor_index += pattern.s_length - 1UL;
1072       if (replace_all == false) {
1073         anchor_index ++;
1074         break;
1075       }
1076     } else {
1077       replacement_buffer << s_data[anchor_index];
1078     }
1079   }
1080 
1081   return replacement_buffer.AppendSubstring(*this, anchor_index, kNotFound);
1082 }
1083 //=============================================================
1084 
FirstNonSpaceIndex(long start,long end,hy_string_search_direction direction) const1085 long _String::FirstNonSpaceIndex(long start, long end, hy_string_search_direction direction) const {
1086   return _FindFirstIndexCondtion(start, end, direction, [] (char c) -> bool {return !isspace (c);});
1087 }
1088 
FirstNonSpace(long start,long end,hy_string_search_direction direction) const1089 char _String::FirstNonSpace(long start, long end, hy_string_search_direction direction) const {
1090   long r = FirstNonSpaceIndex(start, end, direction);
1091   return r == kNotFound ? default_return : s_data[r];
1092 }
1093 
1094 //=============================================================
1095 
FirstSpaceIndex(long start,long end,hy_string_search_direction direction) const1096 long _String::FirstSpaceIndex(long start, long end, hy_string_search_direction direction) const {
1097   return _FindFirstIndexCondtion(start, end, direction, isspace);
1098 }
1099 
1100 //=============================================================
1101 
FirstNonSpaceFollowingSpace(long start,long end,hy_string_search_direction direction) const1102 long _String::FirstNonSpaceFollowingSpace(long start, long end, hy_string_search_direction direction) const {
1103   long first_space = FirstSpaceIndex (start, end, direction);
1104   if (first_space != kNotFound) {
1105     if (direction == kStringDirectionForward) {
1106       first_space = FirstNonSpaceIndex (first_space, end, direction);
1107     } else {
1108       first_space = FirstNonSpaceIndex (start, first_space, direction);
1109     }
1110   }
1111   return first_space;
1112 }
1113 
1114 //Begins with string
BeginsWith(_String const & pattern,bool case_sensitive,unsigned long startfrom) const1115 bool _String::BeginsWith (_String const& pattern, bool case_sensitive, unsigned long startfrom) const{
1116   if (s_length >= pattern.s_length + startfrom) {
1117     if (case_sensitive) {
1118       for (unsigned long idx = 0; idx < pattern.s_length; idx ++) {
1119         if (s_data[idx+startfrom] != pattern.s_data[idx]) {
1120           return false;
1121         }
1122       }
1123     } else {
1124        for (unsigned long idx = 0; idx < pattern.s_length; idx ++) {
1125         if (tolower(s_data[idx+startfrom]) != tolower(pattern.s_data[idx])) {
1126           return false;
1127         }
1128       }
1129     }
1130     return true;
1131   }
1132   return false;
1133 }
1134 
1135 //Begins with string
BeginsWith(const bool pattern[256],bool case_sensitive,unsigned long startfrom) const1136 bool _String::BeginsWith (const bool pattern[256], bool case_sensitive, unsigned long startfrom) const{
1137   if (s_length >= 1UL + startfrom) {
1138     if (case_sensitive) {
1139       for (unsigned long idx = 0; idx < 256; idx ++) {
1140         if (pattern [s_data[startfrom]]) {
1141           return true;
1142         }
1143       }
1144     } else {
1145       for (unsigned long idx = 0; idx < 256; idx ++) {
1146         if (pattern [tolower(s_data[startfrom])] || pattern [toupper(s_data[startfrom])] ) {
1147           return true;
1148         }
1149       }
1150     }
1151   }
1152   return false;
1153 }
1154 
1155 
1156 //Ends with string
EndsWith(_String const & pattern,bool case_sensitive) const1157 bool _String::EndsWith (_String const& pattern, bool case_sensitive) const{
1158   if (s_length >= pattern.s_length) {
1159     unsigned long length_difference = s_length - pattern.s_length;
1160     return (case_sensitive ? Find (pattern, length_difference)
1161             : FindAnyCase (pattern, length_difference)) == length_difference;
1162   }
1163   return false;
1164 }
1165 
1166 
1167 
1168 //Begins with string
BeginsWithAndIsNotAnIdent(_String const & pattern) const1169 bool _String::BeginsWithAndIsNotAnIdent (_String const& pattern) const {
1170 
1171   if (BeginsWith (pattern)) {
1172     if (s_length > pattern.s_length) {
1173       char next_char = char_at (pattern.s_length);
1174       if (isalnum(next_char) || next_char == '.' || next_char == '_' || next_char == '&') {
1175         // TODO SLKP 20170616: what is the use case for next_char == '&'?
1176         return false;
1177       }
1178     }
1179     return true;
1180   }
1181   return false;
1182 }
1183 
1184 /*
1185  ==============================================================
1186  Parser-related functions
1187  TODO: possible deprecate when the move to the grammar is effected
1188  ==============================================================
1189 */
1190 
1191 
1192 
1193 //=============================================================
1194 
StripQuotes(char open_char,char close_char)1195 bool _String::StripQuotes(char open_char, char close_char) {
1196   if (s_length >= 2UL) {
1197     if (s_data [0] == open_char && s_data [s_length - 1UL] == close_char) {
1198       Trim (1, s_length - 2UL);
1199         return true;
1200     }
1201   }
1202     return false;
1203 }
1204 
1205 //=============================================================
1206 
StripQuotes(char const * open_chars,char const * close_chars)1207 bool _String::StripQuotes(char const* open_chars, char const * close_chars) {
1208   if (s_length >= 2UL) {
1209       int count = strlen (open_chars);
1210       for (int i = 0; i < count; i++) {
1211           if (s_data [0] == open_chars[i] && s_data [s_length - 1UL] == close_chars[i]) {
1212             Trim (1, s_length - 2UL);
1213               return true;
1214           }
1215       }
1216   }
1217   return false;
1218 }
1219 
1220 
1221 //=============================================================
1222 
IsValidIdentifier(int options) const1223 bool _String::IsValidIdentifier(int options) const {
1224   return s_length > 0UL && _IsValidIdentifierAux (options & fIDAllowCompound, options & fIDAllowFirstNumeric) == s_length - 1UL && hyReservedWords.FindObject (this) == kNotFound;
1225 }
1226 
1227 //=============================================================
1228 
ConvertToAnIdent(int options) const1229 const _String  _String::ConvertToAnIdent(int options) const {
1230   _StringBuffer converted;
1231 
1232   const char default_placeholder = '_';
1233 
1234   char       last_char = '\0';
1235   bool       allow_compounds = options & fIDAllowCompound,
1236              allow_first_numeric = options & fIDAllowFirstNumeric;
1237 
1238 
1239   unsigned long current_index = 0UL;
1240 
1241   bool          first     = true;
1242 
1243   for (; current_index < s_length; current_index ++) {
1244     char current_char =  s_data[current_index];
1245     if (first) {
1246       if ( hy_Valid_ID_Chars.is_valid_first (current_char) || allow_first_numeric && hy_Valid_ID_Chars.is_valid (current_char)) {
1247         converted << current_char;
1248       } else {
1249         if (last_char != default_placeholder) {
1250           converted << default_placeholder;
1251         }
1252       }
1253       first = false;
1254     } else {
1255       if ( hy_Valid_ID_Chars.is_valid(current_char)) {
1256         converted << current_char;
1257       } else {
1258           if (allow_compounds && current_char == '.') {
1259             first = true;
1260             converted << current_char;
1261           } else {
1262               if (last_char != default_placeholder) {
1263                 converted << default_placeholder;
1264               }
1265           }
1266       }
1267     }
1268     last_char = converted.char_at (converted.length () - 1UL);
1269   }
1270 
1271   return converted;
1272 }
1273 
1274 
1275 //=============================================================
1276 
_IsValidIdentifierAux(bool allow_compounds,bool allow_first_numeric,char wildcard) const1277 long _String::_IsValidIdentifierAux(bool allow_compounds, bool allow_first_numeric, char wildcard) const {
1278 
1279   unsigned long current_index = 0UL;
1280 
1281   bool          first     = true;
1282 
1283   for (; current_index < s_length; current_index ++) {
1284     char current_char =  s_data[current_index];
1285     if (first) {
1286       if ( ! (hy_Valid_ID_Chars.is_valid_first (current_char) || allow_first_numeric && hy_Valid_ID_Chars.is_valid (current_char))) {
1287         break;
1288       }
1289       first = false;
1290     } else {
1291       if ( ! hy_Valid_ID_Chars.is_valid(current_char)) {
1292         if (allow_compounds && current_char == '.') {
1293           first = true;
1294         } else {
1295           break;
1296         }
1297       }
1298     }
1299   }
1300 
1301   if (current_index) {
1302     return current_index - 1UL;
1303   }
1304 
1305   return kNotFound;
1306 }
1307 
1308 //=============================================================
1309 
IsALiteralArgument(bool strip_quotes)1310 bool    _String::IsALiteralArgument (bool strip_quotes) {
1311   if (s_length >= 2UL) {
1312     char quotes [2] = {'"','\''};
1313     for (char quote : quotes) {
1314       long from = 0L,
1315            to = ExtractEnclosedExpression (from,quote,quote, fExtractRespectEscape);
1316 
1317       if (from == 0L && to == s_length - 1L) {
1318         if (strip_quotes){
1319           Trim (1L, s_length-2L);
1320         }
1321         return true;
1322       }
1323     }
1324   }
1325   return false;
1326 }
1327 
1328 //=============================================================
1329 
1330 
ProcessVariableReferenceCases(_String & referenced_object,_String const * context) const1331 hy_reference_type _String::ProcessVariableReferenceCases (_String& referenced_object, _String const * context) const {
1332  const static _String kDot (".");
1333 
1334   if (nonempty()) {
1335 
1336       char first_char    = char_at(0);
1337       bool is_func_ref   = char_at(s_length-1) == '&';
1338 
1339       if (first_char == '*' || first_char == '^') {
1340         if (is_func_ref) {
1341           referenced_object = kEmptyString;
1342           return kStringInvalidReference;
1343         }
1344         bool is_global_ref = first_char == '^';
1345         _StringBuffer   plain_name (length());
1346         plain_name.AppendSubstring (*this,1,-1);
1347 
1348         if (plain_name.IsValidIdentifier(fIDAllowCompound | fIDAllowFirstNumeric)) {
1349           if (context) {
1350             plain_name.Clear();
1351             (plain_name << *context << '.').AppendSubstring (*this,1,-1);
1352           }
1353           _FString * dereferenced_value = (_FString*)FetchObjectFromVariableByType(&plain_name, STRING);
1354           if (dereferenced_value && dereferenced_value->get_str().ProcessVariableReferenceCases (referenced_object) == kStringDirectReference) {
1355             if (!is_global_ref && context) {
1356               referenced_object = (_StringBuffer (context->length() + 1UL + referenced_object.length()) << *context << '.' << referenced_object);
1357             }
1358             return is_global_ref?kStringGlobalDeference:kStringLocalDeference;
1359           }
1360         } else {
1361 
1362           _String try_as_expression;
1363           if (context) {
1364             _VariableContainer ctxt (*context);
1365             try_as_expression = ProcessLiteralArgument (&plain_name, &ctxt);
1366           } else {
1367             try_as_expression = ProcessLiteralArgument (&plain_name, nil);
1368           }
1369           if (try_as_expression.ProcessVariableReferenceCases (referenced_object) == kStringDirectReference) {
1370             if (!is_global_ref && context) {
1371               //referenced_object = *context & '.' & try_as_expression;
1372               referenced_object = (_StringBuffer (context->length() + 1UL + try_as_expression.length()) << *context << '.' << try_as_expression);
1373             }
1374 
1375             return is_global_ref?kStringGlobalDeference:kStringLocalDeference;
1376           }
1377         }
1378       }
1379 
1380       if (is_func_ref) {
1381         referenced_object = Cut (0, s_length-2UL);
1382         if (referenced_object.IsValidIdentifier(fIDAllowCompound | fIDAllowFirstNumeric)) {
1383           referenced_object = (context? (*context & '.' & referenced_object): (referenced_object)) & '&';
1384           return kStringDirectReference;
1385         }
1386       }
1387       else {
1388         if (IsValidIdentifier(fIDAllowCompound | fIDAllowFirstNumeric)) {
1389           if (context) {
1390             if (BeginsWith (*context) && BeginsWith(kDot, true, context->length())) {
1391                 referenced_object = *this;
1392             } else {
1393                 referenced_object = (_StringBuffer (context->length() + length() + 1) << *context << '.' << *this);
1394             }
1395 
1396             //_String cdot = *context & '.';
1397             //referenced_object = BeginsWith(cdot) ? *this : (cdot & *this);
1398           } else {
1399             referenced_object = *this;
1400           }
1401           return kStringDirectReference;
1402         }
1403       }
1404   }
1405 
1406   referenced_object = kEmptyString;
1407   return kStringInvalidReference;
1408 }
1409 
1410 
1411 
1412 
1413 /*
1414  ==============================================================
1415  Regular Expression Methods
1416  ==============================================================
1417  */
1418 
GetRegExpError(int error)1419 const _String _String::GetRegExpError(int error) {
1420   char buffer[512];
1421   buffer[regerror(error, nil, buffer, 511)] = 0;
1422   return _String("Regular Expression error:") & _String (buffer).Enquote();
1423 }
1424 
1425 //=============================================================
1426 
FlushRegExp(regex_t * re)1427 void _String::FlushRegExp(regex_t* re) {
1428   regfree(re);
1429   delete re;
1430 }
1431 
1432 //=============================================================
1433 
PrepRegExp(const _String & pattern,int & error_code,bool case_sensitive,bool throw_errors)1434 regex_t* _String::PrepRegExp(const _String& pattern, int &error_code, bool case_sensitive, bool throw_errors) {
1435   regex_t *res = new regex_t;
1436 
1437   error_code = regcomp(res, pattern.get_str(),
1438                     REG_EXTENDED | (case_sensitive ? 0 : REG_ICASE));
1439 
1440   if (error_code) {
1441     FlushRegExp(res);
1442     if (throw_errors) {
1443       throw (GetRegExpError (error_code));
1444     }
1445     return nil;
1446   }
1447   return res;
1448 }
1449 
1450 //=============================================================
1451 
RegExpMatch(regex_t const * re,unsigned long start) const1452 const _SimpleList _String::RegExpMatch(regex_t const* re, unsigned long start ) const {
1453   _SimpleList matched_pairs;
1454 
1455   if (s_length && start < s_length) {
1456 
1457     regmatch_t static_matches [4];
1458     if (re->re_nsub <= 3) {
1459         int error_code = regexec(re, s_data + start, re->re_nsub + 1, static_matches, 0);
1460         if (error_code == 0) {
1461             for (long k = 0L; k <= re->re_nsub; k++) {
1462                 matched_pairs << static_matches[k].rm_so + start
1463                 << static_matches[k].rm_eo - 1 + start;
1464             }
1465         }
1466     } else {
1467         regmatch_t *matches = new regmatch_t[re->re_nsub + 1];
1468         int error_code = regexec(re, s_data + start, re->re_nsub + 1, matches, 0);
1469         if (error_code == 0) {
1470           for (long k = 0L; k <= re->re_nsub; k++) {
1471             matched_pairs << matches[k].rm_so + start
1472                           << matches[k].rm_eo - 1 + start;
1473           }
1474         }
1475         delete[] matches;
1476     }
1477   }
1478 
1479   return matched_pairs;
1480 }
1481 
1482 //=============================================================
1483 
1484 
RegExpAllMatches(regex_t const * re) const1485 const _SimpleList _String::RegExpAllMatches(regex_t const* re) const {
1486   _SimpleList matched_pairs;
1487 
1488   if (s_length) {
1489 
1490     regmatch_t *matches = new regmatch_t[re->re_nsub + 1];
1491     int error_code = regexec(re, s_data, re->re_nsub + 1, matches, 0);
1492     while (error_code == 0) {
1493       long offset = matched_pairs.countitems()
1494       ? matched_pairs.Element(-1) + 1
1495       : 0;
1496 
1497       matched_pairs << matches[0].rm_so + offset
1498                     << matches[0].rm_eo - 1 + offset;
1499 
1500       offset += matches[0].rm_eo;
1501       if (offset < s_length) {
1502         error_code = regexec(re, s_data + offset, re->re_nsub + 1, matches, 0);
1503       } else {
1504         break;
1505       }
1506     }
1507     delete[] matches;
1508   }
1509   return matched_pairs;
1510 }
1511 
1512 //=============================================================
1513 
_IntRegExpMatch(const _String & pattern,bool case_sensitive,bool handle_errors,bool match_all) const1514 const _SimpleList _String::_IntRegExpMatch (const _String & pattern,
1515                                            bool case_sensitive, bool handle_errors, bool match_all) const {
1516   if (s_length) {
1517     int err_code = 0;
1518     regex_t* regex = PrepRegExp(pattern, err_code, case_sensitive);
1519     if (regex) {
1520       _SimpleList hits = match_all ? RegExpAllMatches(regex) : RegExpMatch(regex);
1521       FlushRegExp(regex);
1522       return hits;
1523     } else if (handle_errors) {
1524       HandleApplicationError(GetRegExpError(err_code));
1525     }
1526   }
1527   return _SimpleList();
1528 }
1529 
1530 //=============================================================
1531 
RegExpMatch(const _String & pattern,bool case_sensitive,bool handle_errors) const1532 const _SimpleList _String::RegExpMatch (const _String & pattern,
1533                                         bool case_sensitive, bool handle_errors) const {
1534   return _IntRegExpMatch (pattern, case_sensitive, handle_errors, false);
1535 }
1536 
1537 //=============================================================
1538 
RegExpAllMatches(const _String & pattern,bool case_sensitive,bool handle_errors) const1539 const _SimpleList _String::RegExpAllMatches (const _String & pattern,
1540                                         bool case_sensitive, bool handle_errors) const {
1541   return _IntRegExpMatch (pattern, case_sensitive, handle_errors, true);
1542 }
1543 
1544 /*
1545 ==============================================================
1546 Methods
1547 ==============================================================
1548 */
1549 
1550 
1551 // Compute Adler-32 CRC for a string
1552 // Implementation shamelessly lifted from http://en.wikipedia.org/wiki/Adler-32
Adler32(void) const1553 long _String::Adler32(void) const {
1554 
1555   const static unsigned long  MOD_ADLER = 65521UL;
1556 
1557   unsigned long len = s_length,
1558   a = 1UL,
1559   b = 0UL,
1560   i = 0UL;
1561 
1562   while (len) {
1563     unsigned long tlen = len > 5550UL ? 5550UL : len;
1564     len -= tlen;
1565     do {
1566       a += s_data[i++];
1567       b += a;
1568     } while (--tlen);
1569     a = (a & 0xffff) + (a >> 16) * (65536UL - MOD_ADLER);
1570     b = (b & 0xffff) + (b >> 16) * (65536UL - MOD_ADLER);
1571   }
1572 
1573   if (a >= MOD_ADLER) {
1574     a -= MOD_ADLER;
1575   }
1576 
1577   b = (b & 0xffff) + (b >> 16) * (65536UL - MOD_ADLER);
1578 
1579   if (b >= MOD_ADLER) {
1580     b -= MOD_ADLER;
1581   }
1582 
1583   return b << 16 | a;
1584 }
1585 
1586 //=============================================================
1587 
1588 
Random(const unsigned long length,const _String * alphabet)1589 _String const _String::Random(const unsigned long length, const _String * alphabet) {
1590   _String random (length);
1591 
1592   unsigned long alphabet_length = alphabet?alphabet->s_length:127UL;
1593 
1594   if (length > 0UL && alphabet_length > 0UL) {
1595     for (unsigned long c = 0UL; c < length; c++) {
1596       unsigned long idx = genrand_int32 () % alphabet_length;
1597       if (alphabet) {
1598         random.set_char (c, alphabet->char_at(idx));
1599       } else {
1600         random.set_char (c,(char)(1UL+idx));
1601       }
1602     }
1603   }
1604 
1605   return random;
1606 }
1607 
1608 //=============================================================
1609 
LempelZivProductionHistory(_SimpleList * rec) const1610 unsigned long _String::LempelZivProductionHistory (_SimpleList* rec) const {
1611     if (rec) {
1612         rec->Clear();
1613     }
1614 
1615     if (empty()) {
1616         return 0UL;
1617     }
1618 
1619     if (rec) {
1620         (*rec) << 0;
1621     }
1622 
1623     unsigned long   current_position = 1UL,
1624                     production_history  = 1UL;
1625 
1626     while (current_position<s_length) {
1627 
1628         unsigned long max_extension = 0UL;
1629 
1630         for (unsigned long ip = 0; ip < current_position; ip++) {
1631             long sp = ip,
1632                  mp = current_position;
1633 
1634             while (mp < s_length && s_data [mp] == s_data[sp]) {
1635                 mp++;
1636                 sp++;
1637             }
1638 
1639             if (mp==s_length) {
1640                 max_extension = s_length-current_position;
1641                 break;
1642             } else {
1643                 if ((mp = mp - current_position + 1UL) > max_extension) {
1644                     max_extension = mp;
1645                 }
1646             }
1647         }
1648 
1649         current_position += max_extension;
1650 
1651         if (rec) {
1652             (*rec) << current_position - 1UL;
1653         } else {
1654             production_history ++;
1655         }
1656     }
1657 
1658     if (rec) {
1659         return rec->lLength;
1660     }
1661 
1662     return production_history;
1663 }
1664 
1665 
1666 
1667 
1668 
1669 
1670 
1671 
1672 
1673 
1674 
1675 
1676 
1677 
1678