1 /*
2
3 HyPhy - Hypothesis Testing Using Phylogenies.
4
5 Copyright (C) 1997-now
6 Core Developers:
7 Sergei L Kosakovsky Pond (sergeilkp@icloud.com)
8 Art FY Poon (apoon42@uwo.ca)
9 Steven Weaver (sweaver@temple.edu)
10
11 Module Developers:
12 Lance Hepler (nlhepler@gmail.com)
13 Martin Smith (martin.audacis@gmail.com)
14
15 Significant contributions from:
16 Spencer V Muse (muse@stat.ncsu.edu)
17 Simon DW Frost (sdf22@cam.ac.uk)
18
19 Permission is hereby granted, free of charge, to any person obtaining a
20 copy of this software and associated documentation files (the
21 "Software"), to deal in the Software without restriction, including
22 without limitation the rights to use, copy, modify, merge, publish,
23 distribute, sublicense, and/or sell copies of the Software, and to
24 permit persons to whom the Software is furnished to do so, subject to
25 the following conditions:
26
27 The above copyright notice and this permission notice shall be included
28 in all copies or substantial portions of the Software.
29
30 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
31 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
33 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
34 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
35 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
36 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37
38 */
39
40 #include <string.h>
41 #include <stdio.h>
42 #include <ctype.h>
43 #include <time.h>
44
45 #include "global_things.h"
46 #include "hy_strings.h"
47 #include "batchlan.h"
48 #include "mersenne_twister.h"
49 #include "function_templates.h"
50 #include "hy_string_buffer.h"
51
52
53 _String compileDate = __DATE__;
54
55 using namespace hy_global;
56
57 struct _hy_Valid_ID_Chars_Type {
58 unsigned char valid_chars[256];
59
is_valid_first_hy_Valid_ID_Chars_Type60 inline bool is_valid_first (unsigned char c) const {
61 return valid_chars[c] == 2;
62 }
63
is_valid_hy_Valid_ID_Chars_Type64 inline bool is_valid (unsigned char c) const {
65 return valid_chars[c] > 0;
66 }
67
_hy_Valid_ID_Chars_Type_hy_Valid_ID_Chars_Type68 _hy_Valid_ID_Chars_Type (void) {
69 for (int c = 0; c < 256; c++) {
70 valid_chars[c] = 0;
71 }
72 for (unsigned char c = 'a'; c <= 'z'; c++) {
73 valid_chars[c] = 2;
74 }
75 for (unsigned char c = 'A'; c <= 'Z'; c++) {
76 valid_chars[c] = 2;
77 }
78 for (unsigned char c = '0'; c <= '9'; c++) {
79 valid_chars[c] = 1;
80 }
81 valid_chars[(unsigned char) '_'] = 2;
82
83 }
84 } hy_Valid_ID_Chars;
85
86
87 /*
88 ==============================================================
89 Constructors/Destructors/Copiers
90 ==============================================================
91 */
92
_String(void)93 _String::_String (void) {
94 _String::Initialize();
95 }
96
97 //=============================================================
98
Initialize(bool)99 void _String::Initialize (bool) {
100 s_length = 0UL;
101 s_data = nil;
102 }
103
104 //=============================================================
105
Clear(void)106 void _String::Clear (void) {
107 s_length = 0UL;
108 if (s_data) {
109 free (s_data);
110 s_data = nil;
111 }
112 }
113
114 //=============================================================
115
_String(long const number)116 _String::_String(long const number) {
117 char s[64];
118 s_length = snprintf(s, sizeof(s), "%ld", number);
119 AllocateAndCopyString (s, s_length);
120 }
121
122 //=============================================================
123
_String(const unsigned long sL,char * buffer)124 _String::_String(const unsigned long sL, char *buffer) {
125 s_length = sL;
126 if (buffer) {
127 s_data = buffer;
128 AddAReference();
129 } else {
130 s_data = (char*) MemAllocate(sL + 1L, true);
131 }
132 s_data[sL] = (char)0;
133 }
134
135 //=============================================================
136
_String(const hyFloat val,const char * format)137 _String::_String(const hyFloat val, const char *format) {
138 char s_val[128];
139 s_length = snprintf(s_val, 128, format ? format : PRINTF_FORMAT_STRING, val);
140 AllocateAndCopyString (s_val, s_length);
141 }
142
143 //=============================================================
144
_String(const hyFloat val,unsigned char digits)145 _String::_String(const hyFloat val, unsigned char digits) {
146 char format_str[64];
147 if (digits > 0) {
148 snprintf(format_str, 64, "%%.%dg", MIN(digits, 20));
149 } else {
150 snprintf(format_str, 64, "%%g");
151 }
152 char s_val[128];
153 s_length = snprintf(s_val, 128, format_str, val);
154 AllocateAndCopyString (s_val, s_length);
155 }
156
157 //=============================================================
158
_String(const _String & s)159 _String::_String(const _String &s) {
160 _String::Initialize ();
161 _String::Duplicate(& s);
162 }
163
164 //=============================================================
165
_String(_String && s)166 _String::_String(_String &&s) {
167 s_length = s.s_length;
168 s_data = s.s_data;
169 s.Initialize();
170 }
171
172 //=============================================================
173
_String(_StringBuffer && s)174 _String::_String(_StringBuffer &&s) {
175 s_length = s.s_length;
176 s.TrimSpace();
177 s_data = s.s_data;
178 s._String::Initialize();
179 s.Initialize();
180 }
181
182
183 //=============================================================
184
_String(_String * s,bool dynamic)185 _String::_String(_String *s, bool dynamic) {
186 if (s->CanFreeMe ()) {
187 s_data = s->s_data;
188 s_length = s->s_length;
189 s->s_data = nil;
190 if (dynamic) {
191 DeleteObject (s);
192 }
193 } else {
194 AllocateAndCopyString (s->s_data, s->s_length);
195 if (dynamic) {
196 s->RemoveAReference();
197 }
198 }
199 }
200
201
202 //=============================================================
_String(const _String & source,long start,long end)203 _String::_String(const _String &source, long start, long end) {
204 if (source.s_length) {
205
206 long requested_range = source.NormalizeRange(start, end);
207
208 if (requested_range > 0L) {
209 AllocateAndCopyString (source.s_data + start, requested_range);
210 return;
211
212 }
213 }
214
215 s_length = 0UL;
216 s_data = (char *)MemAllocate(1UL);
217 s_data[0] = '\0';
218
219 }
220
221 //=============================================================
222
_String(const char * c_string)223 _String::_String(const char *c_string) {
224 AllocateAndCopyString (c_string, strlen(c_string));
225 }
226
227 //=============================================================
_String(const wchar_t * wc_string)228 _String::_String(const wchar_t *wc_string) {
229 unsigned long allocated = wcslen (wc_string);
230 s_length = 0UL;
231 s_data = (char *)MemAllocate(allocated + 1UL);
232 for (unsigned long cid = 0UL; cid < allocated; cid ++) {
233 int this_char = wctob (wc_string[cid]);
234 if (this_char != WEOF) {
235 s_data[s_length++] = (char) this_char;
236 }
237 }
238 if (s_length != allocated) {
239 s_data = (char *)MemReallocate((char *)s_data, (s_length+1) * sizeof(char));
240 }
241 s_data[s_length] = '\0';
242 }
243
244 //=============================================================
_String(const char c)245 _String::_String(const char c) {
246 s_length = 1UL;
247 s_data = (char *)MemAllocate(2UL);
248 s_data[0] = c;
249 s_data[1] = '\0';
250 }
251
252 //=============================================================
_String(const _String & str,unsigned long copies)253 _String::_String (const _String& str, unsigned long copies) {
254 s_length = copies * str.s_length;
255 s_data = (char*)MemAllocate (s_length+1UL);
256 if (s_length > 0UL) {
257 for (unsigned long i = 0UL; i < copies; i++) {
258 memcpy (s_data + i * str.s_length, str.s_data, str.s_length);
259 }
260 }
261 s_data[s_length]='\0';
262 }
263
264 //=============================================================
_String(FILE * file,long read_this_many)265 _String::_String(FILE * file, long read_this_many) {
266 _String::Initialize ();
267 if (file) {
268 if (read_this_many < 0) {
269 fseek(file, 0, SEEK_END);
270 s_length = (unsigned long) ftell(file);
271 rewind(file);
272 } else {
273 s_length = read_this_many;
274 }
275 s_data = (char *)MemAllocate(s_length + 1UL);
276 unsigned long read_items = fread(s_data, 1, s_length, file);
277 if (read_items < s_length) {
278 s_data = (char*)MemReallocate(s_data,read_items+1);
279 s_length = read_items;
280 }
281 s_data[s_length] = '\0';
282 }
283 }
284
285 //=============================================================
~_String(void)286 _String::~_String(void) {
287 if (CanFreeMe()) {
288 if (s_data) {
289 free (s_data);
290 s_data = nil;
291 }
292 s_length = 0UL;
293 } else {
294 RemoveAReference();
295 }
296 }
297
298 //=============================================================
makeDynamic(void) const299 BaseRef _String::makeDynamic (void) const {
300 _String * r = new _String;
301 r->Duplicate(this);
302 return r;
303 }
304
305 //=============================================================
Duplicate(BaseRefConst ref)306 void _String::Duplicate (BaseRefConst ref) {
307 if (s_data) {
308 free (s_data);
309 }
310
311 _String const * s = (_String const*)ref;
312
313 s_length = s->s_length;
314 s_data = s->s_data;
315
316 if (s_data) {
317 AllocateAndCopyString (s->s_data, s_length);
318 }
319 }
320
321 //=============================================================
operator =(_String const & s)322 void _String::operator = (_String const& s) {
323 if (&s != this) Duplicate (&s);
324 }
325
326 //=============================================================
operator =(_String && rhs)327 void _String::operator = (_String && rhs) {
328 if (this != &rhs) {
329 if (s_data) {
330 free (s_data);
331 }
332 s_data = rhs.s_data;
333 s_length = rhs.s_length;
334 rhs.s_data = nil;
335 }
336 }
337
338
339
340 /*
341 ==============================================================
342 Private helpers
343 ==============================================================
344 */
345
NormalizeRange(long & from,long & to) const346 long _String::NormalizeRange(long & from, long & to) const {
347
348 if (s_length == 0UL) {
349 return 0L;
350 }
351
352 if (from < 0L) {
353 from = 0L;
354 }
355
356 if (to < 0L || to >= s_length ) {
357 to = s_length - 1UL;
358 }
359
360 return to - from + 1L;
361
362 }
363
364 //=============================================================
365
AllocateAndCopyString(const char * source_string,unsigned long length)366 void _String::AllocateAndCopyString (const char * source_string, unsigned long length) {
367 s_length = length;
368 s_data = (char*) MemAllocate (length+1UL);
369 //if (s_length) {
370 memcpy (s_data, source_string, length);
371 //}
372 s_data [length] = '\0';
373 }
374
375
376 /*
377 ==============================================================
378 Getters and setters
379 ==============================================================
380 */
381
operator [](long index)382 char& _String::operator [] (long index) {
383 if (index < s_length && index >= 0L) {
384 return s_data[index];
385 }
386 HandleApplicationError (_String ("Internal error at ") & __PRETTY_FUNCTION__ & ": an invalid index requested");
387 return s_data[0];
388 }
389
390 //=============================================================
391
392
operator ()(long index) const393 char _String::operator () (long index) const {
394 if (index >= 0L && index < s_length) {
395 return s_data[index];
396 }
397 if (index < 0L && -index <= s_length) {
398 return s_data[s_length + index];
399 }
400 return default_return;
401 }
402
403 //=============================================================
404
set_char(unsigned long index,char const data)405 void _String::set_char (unsigned long index, char const data) {
406 if (index < s_length) {
407 s_data[index] = data;
408 }
409 }
410
411 //=============================================================
412
get_str(void) const413 const char *_String::get_str(void) const { return s_data; }
414
415 /*
416 ==============================================================
417 Type conversions
418 ==============================================================
419 */
420
421
operator const char*(void) const422 _String::operator const char *(void) const { return s_data; }
423
424 //=============================================================
425
to_float(void) const426 hyFloat _String::to_float (void) const{
427 if (s_length == 0UL) {
428 return 0.;
429 }
430 char *endP;
431 return strtod(s_data, &endP);
432 }
433
434 //=============================================================
435
to_long(void) const436 long _String::to_long (void) const {
437 if (s_length == 0UL) {
438 return 0L;
439 }
440 char * endP;
441 return strtol(s_data,&endP,10);
442 }
443
444 //=============================================================
445
toStr(unsigned long)446 BaseRef _String::toStr (unsigned long) {
447 AddAReference();
448 return this;
449 }
450
451 //=============================================================
452
FormatTimeString(long time_diff)453 const _String _String::FormatTimeString(long time_diff){
454
455 long fields [3] = {time_diff / 3600L, time_diff / 60L % 60L, time_diff % 60L};
456
457 _StringBuffer time_string;
458
459 for (unsigned long l = 0; l < 3UL; l++) {
460 if (l) {
461 time_string << ':';
462 }
463 if (fields[l] < 10L) {
464 time_string << '0';
465 }
466 time_string << _String (fields[l]);
467 }
468
469 return time_string;
470 }
471
472 /*
473 ==============================================================
474 Comparisons
475 ==============================================================
476 */
477
Compare(_String const & rhs) const478 hyComparisonType _String::Compare(_String const& rhs) const {
479
480 if (s_length <= rhs.s_length) {
481 for (unsigned long i = 0UL; i < s_length; i++) {
482 int diff = s_data[i] - rhs.s_data[i];
483
484 if (diff < 0) {
485 return kCompareLess;
486 } else {
487 if (diff > 0) {
488 return kCompareGreater;
489 }
490 }
491 }
492
493 if (s_length == rhs.s_length) {
494 return kCompareEqual;
495 }
496 return kCompareLess;
497 } else {
498
499 for (unsigned long i = 0UL; i < rhs.s_length; i++) {
500 int diff = s_data[i] - rhs.s_data[i];
501
502 if (diff < 0) {
503 return kCompareLess;
504 } else {
505 if (diff > 0) {
506 return kCompareGreater;
507 }
508 }
509 }
510 return kCompareGreater;
511 }
512
513 /*
514 unsigned long up_to = MIN (s_length, rhs.s_length);
515
516 for (unsigned long i = 0UL; i < up_to; i++) {
517 if (s_data[i] < rhs.s_data[i]) {
518 return kCompareLess;
519 }
520 if (s_data[i] > rhs.s_data[i]) {
521 return kCompareGreater;
522 }
523 }
524
525 if (s_length == rhs.s_length) {
526 return kCompareEqual;
527 }
528
529 return s_length < rhs.s_length ? kCompareLess : kCompareGreater;*/
530 }
531
532 //=============================================================
533
CompareIgnoringCase(_String const & rhs) const534 hyComparisonType _String::CompareIgnoringCase(_String const& rhs) const {
535 unsigned long up_to = MIN (s_length, rhs.s_length);
536
537 for (unsigned long i = 0UL; i < up_to; i++) {
538
539 char llhs = tolower (s_data[i]), lrhs = tolower (rhs.s_data[i]);
540
541
542 if (llhs < lrhs) {
543 return kCompareLess;
544 }
545 if (llhs > lrhs) {
546 return kCompareGreater;
547 }
548 }
549
550 if (s_length == rhs.s_length) {
551 return kCompareEqual;
552 }
553
554 return s_length < rhs.s_length ? kCompareLess : kCompareGreater;
555 }
556
557 //=============================================================
558
operator ==(const _String & s) const559 bool _String::operator==(const _String& s) const { return Compare (s) == kCompareEqual; }
operator >(const _String & s) const560 bool _String::operator>(const _String & s) const { return Compare (s) == kCompareGreater; }
operator <=(const _String & s) const561 bool _String::operator<=(const _String & s) const { return Compare (s) != kCompareGreater; }
operator >=(const _String & s) const562 bool _String::operator>=(const _String & s) const { return Compare (s) != kCompareLess; }
operator !=(const _String & s) const563 bool _String::operator!=(const _String & s) const { return Compare (s) != kCompareEqual; }
operator <(const _String & s) const564 bool _String::operator<(const _String & s) const { return Compare (s) == kCompareLess; }
565
Equal(const _String & s) const566 bool _String::Equal(const _String& s) const { return Compare (s) == kCompareEqual; };
EqualIgnoringCase(const _String & s) const567 bool _String::EqualIgnoringCase(const _String& s) const { return CompareIgnoringCase (s) == kCompareEqual; };
Equal(const char c) const568 bool _String::Equal(const char c) const {
569 return s_length == 1UL && s_data[0] == c;
570 }
571
572 //=============================================================
573
574
EqualWithWildChar(const _String & pattern,const char wildchar,unsigned long start_this,unsigned long start_pattern,_SimpleList * wildchar_matches) const575 bool _String::EqualWithWildChar(const _String& pattern, const char wildchar, unsigned long start_this, unsigned long start_pattern, _SimpleList * wildchar_matches) const {
576 // wildcards only matter in the second string
577
578 if (pattern.s_length > start_pattern && wildchar != '\0') {
579 unsigned long match_this_char = start_pattern;
580 // the position we are currently trying to match in the pattern
581
582 bool is_wildcard = pattern.s_data[match_this_char] == wildchar,
583 scanning_pattern = is_wildcard;
584
585 unsigned long i = start_this;
586 // the position we are currently trying to match in *this
587 long last_matched_char = (long)start_this - 1L;
588 // the index of the last character in *this that was matched to something other than the wildcard
589
590 while (i <= s_length) {
591 if (scanning_pattern) { // skip consecutive wildcards in "pattern"
592 scanning_pattern = pattern.s_data[++match_this_char] == wildchar;
593 } else {
594 if (s_data[i] == pattern.s_data[match_this_char]) {
595 if (is_wildcard) {
596 // could either match the next character or consume it into the wildcard
597 if (wildchar_matches) {
598 // record the current wildcard match
599 // if this is the last (0) character, return true
600 long rollback_checkpoint = wildchar_matches->countitems();
601 if (last_matched_char + 1 < i) { // something get matched to the wildchard
602 *wildchar_matches << (last_matched_char+1) << (i-1);
603 }
604 if (i == s_length) {
605 return true;
606 }
607 if (EqualWithWildChar (pattern, wildchar, i, match_this_char, wildchar_matches)) {
608 // matching worked
609 return true;
610 } else { // consume the character into the wildcard
611 i++;
612 for (long k = wildchar_matches->countitems() - rollback_checkpoint; k >= 0; k--) {
613 wildchar_matches->Pop();
614 }
615 continue;
616 }
617 } else {
618 if (EqualWithWildChar (pattern, wildchar, i, match_this_char)) {
619 // matching worked
620 return true;
621 } else { // consume the character into the wildcard
622 i++;
623 continue;
624 }
625 }
626 } else {
627 // try character match
628 // note that the terminal '0' characters will always match, so
629 // this is where we terminate
630 if (wildchar_matches) {
631 if (last_matched_char + 1 < i) { // something get matched to the wildchard
632 *wildchar_matches << (last_matched_char+1) << (i-1);
633 }
634 last_matched_char = i;
635 }
636 i++;
637 match_this_char++;
638 if (i > s_length || match_this_char > pattern.s_length) {
639 break;
640 }
641 // TODO check to see if this will return true strings that match the pattern and
642 // have some left-over stuff, like
643 // "tree.node.a.b" might incorrectly match "tree.?.a"
644 is_wildcard = pattern.s_data[match_this_char] == wildchar;
645 scanning_pattern = is_wildcard;
646 }
647 } else { // match wildcard
648 if (!is_wildcard) {
649 return false;
650 }
651 scanning_pattern = false;
652 i++;
653 }
654 }
655 }
656
657 if (wildchar_matches) {
658 if (last_matched_char + 1 < i) { // something get matched to the wildchard
659 *wildchar_matches << (last_matched_char+1) << (i-1);
660 }
661 }
662
663 return match_this_char > pattern.s_length;
664 } else {
665 return s_length == start_this;
666 }
667
668 return false;
669 }
670
671
672 /*
673 ==============================================================
674 Content-modification and extraction methods
675 ==============================================================
676 */
677
678
679 //=============================================================
680
681
682 //Append operator
operator &(const _String & rhs) const683 _String _String::operator & (const _String& rhs) const {
684 unsigned long combined_length = s_length + rhs.s_length;
685
686 if (combined_length == 0UL) {
687 return kEmptyString;
688 }
689
690 _String res(combined_length);
691
692 if (s_length && s_data) {
693 memcpy(res.s_data, s_data, s_length);
694 }
695
696 if (rhs.s_length && rhs.s_data) {
697 memcpy(res.s_data + s_length, rhs.s_data, rhs.s_length);
698 }
699
700 res.s_data[res.s_length] = '\0';
701 return res;
702 }
703
704 //=============================================================
705
Chop(long start,long end) const706 _String _String::Chop(long start, long end) const{
707
708 long resulting_length = NormalizeRange(start,end);
709
710 if (resulting_length > 0L) {
711 _String res((unsigned long)(s_length - resulting_length));
712 if (start > 0L) {
713 memcpy(res.s_data, s_data, start);
714 }
715 if (end + 1L < s_length) {
716 memcpy(res.s_data + start, s_data + end + 1L, s_length - end - 1L);
717 }
718
719 return res;
720 }
721
722 return *this;
723
724 }
725
726 //=============================================================
727
Cut(long start,long end) const728 _String _String::Cut(long start, long end) const {
729 return _String (*this, start, end);
730 }
731
732 //=============================================================
733
Delete(long start,long end)734 void _String::Delete(long start, long end) {
735 long resulting_length = NormalizeRange(start,end);
736
737 if (resulting_length > 0L) {
738 if (end < (long)s_length - 1UL) {
739 memmove(s_data + start, s_data + end + 1L, s_length - end - 1L);
740 }
741 s_length -= resulting_length;
742 s_data = (char*)MemReallocate(s_data, sizeof(char) * (s_length + 1UL));
743 s_data[s_length] = '\0';
744 }
745 }
746
747 //=============================================================
748
Flip(void)749 void _String::Flip(void) {
750 for (unsigned long i = 0UL; i < (s_length >> 1); i++) {
751 char c;
752 SWAP (s_data[i], s_data[s_length - 1 - i], c);
753 }
754 }
755
756 //=============================================================
757
Reverse(void) const758 _String _String::Reverse(void) const {
759
760 _String result (*this);
761 for (unsigned long s = 0UL, e = s_length - 1L; s < s_length; s++, e--) {
762 result.s_data[s] = s_data[e];
763 }
764 return result;
765 }
766
767 //=============================================================
768
Insert(char c,long where)769 void _String::Insert(char c, long where) {
770 if (where < 0L || where >= s_length) {
771 where = s_length;
772 }
773
774 s_data = (char*)MemReallocate(s_data, sizeof(char) * (s_length + 2UL));
775
776 if (where < s_length) {
777 memmove(s_data + where + 1UL, s_data + where, s_length - where);
778 }
779
780 s_data[where] = c;
781 s_data[++s_length] = '\0';
782 }
783
784 //=============================================================
785
Trim(long start,long end)786 void _String::Trim(long start, long end) {
787 long resulting_length = NormalizeRange(start, end);
788 /*if (s_length >= 5000 && start > 0) {
789 printf ("\nLong trim %d %d %d\n", s_length, start, end);
790 }*/
791
792 if (resulting_length > 0L) {
793 if (start > 0L) {
794 memmove(s_data, s_data + start, resulting_length);
795 }
796 if (s_length != resulting_length) {
797 s_length = resulting_length;
798 s_data = (char*)MemReallocate(s_data, resulting_length + 1UL);
799 s_data[resulting_length] = '\0';
800 }
801 } else {
802 s_length = 0UL;
803 s_data = (char*)MemReallocate(s_data, 1UL);
804 s_data[0] = '\0';
805 }
806 }
807
808 //=============================================================
809
ChangeCase(hy_string_case conversion_type) const810 const _String _String::ChangeCase (hy_string_case conversion_type) const {
811 _String result (s_length);
812
813 auto conversion_function = conversion_type == kStringUpperCase ? toupper : tolower;
814
815 for (unsigned long i = 0UL; i<s_length; i++) {
816 result.s_data [i] = conversion_function (s_data[i]);
817 }
818
819 return result;
820 }
821
822 //=============================================================
823
ChangeCaseInPlace(hy_string_case conversion_type)824 void _String::ChangeCaseInPlace (hy_string_case conversion_type) {
825
826 auto conversion_function = conversion_type == kStringUpperCase ? toupper : tolower;
827
828 for (unsigned long i = 0UL; i<s_length; i++) {
829 s_data[i] = conversion_function (s_data[i]);
830 }
831
832 }
833
834 //=============================================================
835
Tokenize(const _String & splitter) const836 const _List _String::Tokenize(const _String& splitter) const {
837 _List tokenized;
838
839 long cp = 0L, cpp;
840 while ((cpp = Find(splitter, cp)) != kNotFound) {
841 if (cpp > cp) {
842 tokenized < new _String(*this, cp, cpp - 1L);
843 } else {
844 tokenized < new _String;
845 }
846
847 cp = cpp + splitter.s_length;
848 }
849
850 tokenized < new _String(*this, cp, kStringEnd);
851 return tokenized;
852 }
853
854 //=============================================================
855
Tokenize(bool const splitter[256]) const856 const _List _String::Tokenize(bool const splitter[256]) const {
857 _List tokenized;
858
859 long cp = 0L, cpp;
860 while ((cpp = Find(splitter, cp)) != kNotFound) {
861 if (cpp > cp) {
862 tokenized < new _String(*this, cp, cpp - 1L);
863 } else {
864 tokenized < new _String;
865 }
866
867 cp = cpp + 1;
868 }
869
870 tokenized < new _String(*this, cp);
871 return tokenized;
872 }
873
874 //=============================================================
875
Enquote(char quote_char) const876 const _String _String::Enquote (char quote_char) const {
877 return _StringBuffer (2UL + s_length) << quote_char << *this << quote_char;
878 }
879
880 //=============================================================
881
Enquote(char open_char,char close_char) const882 const _String _String::Enquote (char open_char, char close_char) const {
883 return _StringBuffer (2UL + s_length) << open_char << *this << close_char;
884 }
885
886 //=============================================================
887
KillSpaces(void) const888 const _String _String::KillSpaces(void) const {
889 _StringBuffer temp(s_length + 1UL);
890 for (unsigned long k = 0UL; k < s_length; k++) {
891 if (!isspace(s_data[k])) {
892 temp << s_data[k];
893 }
894 }
895 return temp;
896 }
897
898 //=============================================================
899
CompressSpaces(void) const900 const _String _String::CompressSpaces(void) const {
901
902 _StringBuffer temp(s_length + 1UL);
903 bool skipping = false;
904
905 for (unsigned long k = 0UL; k < s_length; k++) {
906 if (!isspace(s_data[k])) {
907 temp << s_data[k];
908 skipping = false;
909 } else {
910 if (!skipping) {
911 skipping = true;
912 temp << ' ';
913 }
914 }
915 }
916 return temp;
917 }
918
919
920 /*
921 ==============================================================
922 Search Functions
923 ==============================================================
924 */
925
Find(const _String & pattern,long start,long end) const926 long _String::Find(const _String& pattern, long start, long end) const {
927
928 if (pattern.s_length) {
929 long span = NormalizeRange(start, end);
930 if (span >= (long) pattern.s_length) {
931 const unsigned long upper_bound = end - pattern.s_length + 2L;
932 for (unsigned long i = start; i < upper_bound ; i++) {
933 unsigned long j = 0UL;
934 for (;j < pattern.s_length; j++) {
935 if (s_data[i + j] != pattern.s_data[j]) {
936 break;
937 }
938 }
939 if (j == pattern.s_length) {
940 return i;
941 }
942 }
943 }
944 }
945 return kNotFound;
946 }
947
948 //=============================================================
949
FindBackwards(const _String & pattern,long start,long end) const950 long _String::FindBackwards(const _String& pattern, long start, long end) const {
951
952 if (pattern.s_length) {
953 long span = NormalizeRange(start, end);
954 if (span >= (long) pattern.s_length) {
955 const long upper_bound = end - pattern.s_length + 1L;
956 for (long i = upper_bound; i >= start; i--) {
957 unsigned long j = 0UL;
958 for (;j < pattern.s_length; j++) {
959 if (s_data[i + j] != pattern.s_data[j]) {
960 break;
961 }
962 }
963 if (j == pattern.s_length) {
964 return i;
965 }
966 }
967 }
968 }
969 return kNotFound;
970 }
971
972 //=============================================================
973
Find(const char p,long start,long end) const974 long _String::Find(const char p, long start, long end) const {
975 if (s_length) {
976 long span = NormalizeRange(start, end);
977 if (span > 0L) {
978 char sentinel = s_data[end+1];
979 s_data[end+1L] = p;
980 long index = start;
981 while (s_data[index] != p) {
982 index++;
983 }
984 s_data[end+1L] = sentinel;
985 return index <= end ? index : kNotFound;
986 }
987 }
988
989 return kNotFound;
990 }
991
992 //=============================================================
993
Find(const bool lookup[256],long start,long end) const994 long _String::Find(const bool lookup[256], long start, long end) const {
995 if (s_length) {
996 long span = NormalizeRange(start, end);
997 if (span > 0L) {
998
999 for (unsigned long index = start; index <= end; index ++ ) {
1000 if (lookup [s_data[index]]) {
1001 return index;
1002 }
1003 }
1004 }
1005 }
1006
1007 return kNotFound;
1008 }
1009
1010 //=============================================================
1011
FindAnyCase(const bool lookup[256],long start,long end) const1012 long _String::FindAnyCase (const bool lookup[256], long start, long end) const {
1013 if (s_length) {
1014 long span = NormalizeRange(start, end);
1015 if (span > 0L) {
1016
1017 for (unsigned long index = start; index <= end; index ++ ) {
1018 if (lookup [tolower(s_data[index])] || lookup [toupper (s_data[index])]) {
1019 return index;
1020 }
1021 }
1022 }
1023 }
1024
1025 return kNotFound;
1026 }
1027
1028
1029 //=============================================================
FindAnyCase(const _String & pattern,long start,long end) const1030 long _String::FindAnyCase (const _String& pattern, long start, long end) const {
1031
1032 if (pattern.s_length) {
1033 long span = NormalizeRange(start, end);
1034 if (span >= (long) pattern.s_length) {
1035 const unsigned long upper_bound = end - pattern.s_length + 2L;
1036 for (unsigned long i = start; i < upper_bound ; i++) {
1037 unsigned long j = 0UL;
1038 for (;j < pattern.s_length; j++) {
1039 if (toupper (s_data[i + j]) != toupper (pattern.s_data[j])) {
1040 break;
1041 }
1042 }
1043 if (j == pattern.s_length) {
1044 return i;
1045 }
1046 }
1047 }
1048 }
1049 return kNotFound;
1050 }
1051 //=============================================================
1052
Replace(const _String & pattern,const _String & replace,bool replace_all) const1053 const _String _String::Replace(const _String& pattern, const _String& replace, bool replace_all) const {
1054
1055 if (s_length < pattern.s_length || pattern.s_length == 0UL) {
1056 return *this;
1057 }
1058
1059 _StringBuffer replacement_buffer;
1060 unsigned long anchor_index = 0UL;
1061 for (; anchor_index <= s_length - pattern.s_length; anchor_index ++) {
1062 unsigned long search_index = 0UL;
1063 for (; search_index < pattern.s_length; search_index++) {
1064 if (s_data[anchor_index + search_index] != pattern.s_data[search_index]) {
1065 break;
1066 }
1067 }
1068
1069 if (search_index == pattern.s_length) {
1070 replacement_buffer << replace;
1071 anchor_index += pattern.s_length - 1UL;
1072 if (replace_all == false) {
1073 anchor_index ++;
1074 break;
1075 }
1076 } else {
1077 replacement_buffer << s_data[anchor_index];
1078 }
1079 }
1080
1081 return replacement_buffer.AppendSubstring(*this, anchor_index, kNotFound);
1082 }
1083 //=============================================================
1084
FirstNonSpaceIndex(long start,long end,hy_string_search_direction direction) const1085 long _String::FirstNonSpaceIndex(long start, long end, hy_string_search_direction direction) const {
1086 return _FindFirstIndexCondtion(start, end, direction, [] (char c) -> bool {return !isspace (c);});
1087 }
1088
FirstNonSpace(long start,long end,hy_string_search_direction direction) const1089 char _String::FirstNonSpace(long start, long end, hy_string_search_direction direction) const {
1090 long r = FirstNonSpaceIndex(start, end, direction);
1091 return r == kNotFound ? default_return : s_data[r];
1092 }
1093
1094 //=============================================================
1095
FirstSpaceIndex(long start,long end,hy_string_search_direction direction) const1096 long _String::FirstSpaceIndex(long start, long end, hy_string_search_direction direction) const {
1097 return _FindFirstIndexCondtion(start, end, direction, isspace);
1098 }
1099
1100 //=============================================================
1101
FirstNonSpaceFollowingSpace(long start,long end,hy_string_search_direction direction) const1102 long _String::FirstNonSpaceFollowingSpace(long start, long end, hy_string_search_direction direction) const {
1103 long first_space = FirstSpaceIndex (start, end, direction);
1104 if (first_space != kNotFound) {
1105 if (direction == kStringDirectionForward) {
1106 first_space = FirstNonSpaceIndex (first_space, end, direction);
1107 } else {
1108 first_space = FirstNonSpaceIndex (start, first_space, direction);
1109 }
1110 }
1111 return first_space;
1112 }
1113
1114 //Begins with string
BeginsWith(_String const & pattern,bool case_sensitive,unsigned long startfrom) const1115 bool _String::BeginsWith (_String const& pattern, bool case_sensitive, unsigned long startfrom) const{
1116 if (s_length >= pattern.s_length + startfrom) {
1117 if (case_sensitive) {
1118 for (unsigned long idx = 0; idx < pattern.s_length; idx ++) {
1119 if (s_data[idx+startfrom] != pattern.s_data[idx]) {
1120 return false;
1121 }
1122 }
1123 } else {
1124 for (unsigned long idx = 0; idx < pattern.s_length; idx ++) {
1125 if (tolower(s_data[idx+startfrom]) != tolower(pattern.s_data[idx])) {
1126 return false;
1127 }
1128 }
1129 }
1130 return true;
1131 }
1132 return false;
1133 }
1134
1135 //Begins with string
BeginsWith(const bool pattern[256],bool case_sensitive,unsigned long startfrom) const1136 bool _String::BeginsWith (const bool pattern[256], bool case_sensitive, unsigned long startfrom) const{
1137 if (s_length >= 1UL + startfrom) {
1138 if (case_sensitive) {
1139 for (unsigned long idx = 0; idx < 256; idx ++) {
1140 if (pattern [s_data[startfrom]]) {
1141 return true;
1142 }
1143 }
1144 } else {
1145 for (unsigned long idx = 0; idx < 256; idx ++) {
1146 if (pattern [tolower(s_data[startfrom])] || pattern [toupper(s_data[startfrom])] ) {
1147 return true;
1148 }
1149 }
1150 }
1151 }
1152 return false;
1153 }
1154
1155
1156 //Ends with string
EndsWith(_String const & pattern,bool case_sensitive) const1157 bool _String::EndsWith (_String const& pattern, bool case_sensitive) const{
1158 if (s_length >= pattern.s_length) {
1159 unsigned long length_difference = s_length - pattern.s_length;
1160 return (case_sensitive ? Find (pattern, length_difference)
1161 : FindAnyCase (pattern, length_difference)) == length_difference;
1162 }
1163 return false;
1164 }
1165
1166
1167
1168 //Begins with string
BeginsWithAndIsNotAnIdent(_String const & pattern) const1169 bool _String::BeginsWithAndIsNotAnIdent (_String const& pattern) const {
1170
1171 if (BeginsWith (pattern)) {
1172 if (s_length > pattern.s_length) {
1173 char next_char = char_at (pattern.s_length);
1174 if (isalnum(next_char) || next_char == '.' || next_char == '_' || next_char == '&') {
1175 // TODO SLKP 20170616: what is the use case for next_char == '&'?
1176 return false;
1177 }
1178 }
1179 return true;
1180 }
1181 return false;
1182 }
1183
1184 /*
1185 ==============================================================
1186 Parser-related functions
1187 TODO: possible deprecate when the move to the grammar is effected
1188 ==============================================================
1189 */
1190
1191
1192
1193 //=============================================================
1194
StripQuotes(char open_char,char close_char)1195 bool _String::StripQuotes(char open_char, char close_char) {
1196 if (s_length >= 2UL) {
1197 if (s_data [0] == open_char && s_data [s_length - 1UL] == close_char) {
1198 Trim (1, s_length - 2UL);
1199 return true;
1200 }
1201 }
1202 return false;
1203 }
1204
1205 //=============================================================
1206
StripQuotes(char const * open_chars,char const * close_chars)1207 bool _String::StripQuotes(char const* open_chars, char const * close_chars) {
1208 if (s_length >= 2UL) {
1209 int count = strlen (open_chars);
1210 for (int i = 0; i < count; i++) {
1211 if (s_data [0] == open_chars[i] && s_data [s_length - 1UL] == close_chars[i]) {
1212 Trim (1, s_length - 2UL);
1213 return true;
1214 }
1215 }
1216 }
1217 return false;
1218 }
1219
1220
1221 //=============================================================
1222
IsValidIdentifier(int options) const1223 bool _String::IsValidIdentifier(int options) const {
1224 return s_length > 0UL && _IsValidIdentifierAux (options & fIDAllowCompound, options & fIDAllowFirstNumeric) == s_length - 1UL && hyReservedWords.FindObject (this) == kNotFound;
1225 }
1226
1227 //=============================================================
1228
ConvertToAnIdent(int options) const1229 const _String _String::ConvertToAnIdent(int options) const {
1230 _StringBuffer converted;
1231
1232 const char default_placeholder = '_';
1233
1234 char last_char = '\0';
1235 bool allow_compounds = options & fIDAllowCompound,
1236 allow_first_numeric = options & fIDAllowFirstNumeric;
1237
1238
1239 unsigned long current_index = 0UL;
1240
1241 bool first = true;
1242
1243 for (; current_index < s_length; current_index ++) {
1244 char current_char = s_data[current_index];
1245 if (first) {
1246 if ( hy_Valid_ID_Chars.is_valid_first (current_char) || allow_first_numeric && hy_Valid_ID_Chars.is_valid (current_char)) {
1247 converted << current_char;
1248 } else {
1249 if (last_char != default_placeholder) {
1250 converted << default_placeholder;
1251 }
1252 }
1253 first = false;
1254 } else {
1255 if ( hy_Valid_ID_Chars.is_valid(current_char)) {
1256 converted << current_char;
1257 } else {
1258 if (allow_compounds && current_char == '.') {
1259 first = true;
1260 converted << current_char;
1261 } else {
1262 if (last_char != default_placeholder) {
1263 converted << default_placeholder;
1264 }
1265 }
1266 }
1267 }
1268 last_char = converted.char_at (converted.length () - 1UL);
1269 }
1270
1271 return converted;
1272 }
1273
1274
1275 //=============================================================
1276
_IsValidIdentifierAux(bool allow_compounds,bool allow_first_numeric,char wildcard) const1277 long _String::_IsValidIdentifierAux(bool allow_compounds, bool allow_first_numeric, char wildcard) const {
1278
1279 unsigned long current_index = 0UL;
1280
1281 bool first = true;
1282
1283 for (; current_index < s_length; current_index ++) {
1284 char current_char = s_data[current_index];
1285 if (first) {
1286 if ( ! (hy_Valid_ID_Chars.is_valid_first (current_char) || allow_first_numeric && hy_Valid_ID_Chars.is_valid (current_char))) {
1287 break;
1288 }
1289 first = false;
1290 } else {
1291 if ( ! hy_Valid_ID_Chars.is_valid(current_char)) {
1292 if (allow_compounds && current_char == '.') {
1293 first = true;
1294 } else {
1295 break;
1296 }
1297 }
1298 }
1299 }
1300
1301 if (current_index) {
1302 return current_index - 1UL;
1303 }
1304
1305 return kNotFound;
1306 }
1307
1308 //=============================================================
1309
IsALiteralArgument(bool strip_quotes)1310 bool _String::IsALiteralArgument (bool strip_quotes) {
1311 if (s_length >= 2UL) {
1312 char quotes [2] = {'"','\''};
1313 for (char quote : quotes) {
1314 long from = 0L,
1315 to = ExtractEnclosedExpression (from,quote,quote, fExtractRespectEscape);
1316
1317 if (from == 0L && to == s_length - 1L) {
1318 if (strip_quotes){
1319 Trim (1L, s_length-2L);
1320 }
1321 return true;
1322 }
1323 }
1324 }
1325 return false;
1326 }
1327
1328 //=============================================================
1329
1330
ProcessVariableReferenceCases(_String & referenced_object,_String const * context) const1331 hy_reference_type _String::ProcessVariableReferenceCases (_String& referenced_object, _String const * context) const {
1332 const static _String kDot (".");
1333
1334 if (nonempty()) {
1335
1336 char first_char = char_at(0);
1337 bool is_func_ref = char_at(s_length-1) == '&';
1338
1339 if (first_char == '*' || first_char == '^') {
1340 if (is_func_ref) {
1341 referenced_object = kEmptyString;
1342 return kStringInvalidReference;
1343 }
1344 bool is_global_ref = first_char == '^';
1345 _StringBuffer plain_name (length());
1346 plain_name.AppendSubstring (*this,1,-1);
1347
1348 if (plain_name.IsValidIdentifier(fIDAllowCompound | fIDAllowFirstNumeric)) {
1349 if (context) {
1350 plain_name.Clear();
1351 (plain_name << *context << '.').AppendSubstring (*this,1,-1);
1352 }
1353 _FString * dereferenced_value = (_FString*)FetchObjectFromVariableByType(&plain_name, STRING);
1354 if (dereferenced_value && dereferenced_value->get_str().ProcessVariableReferenceCases (referenced_object) == kStringDirectReference) {
1355 if (!is_global_ref && context) {
1356 referenced_object = (_StringBuffer (context->length() + 1UL + referenced_object.length()) << *context << '.' << referenced_object);
1357 }
1358 return is_global_ref?kStringGlobalDeference:kStringLocalDeference;
1359 }
1360 } else {
1361
1362 _String try_as_expression;
1363 if (context) {
1364 _VariableContainer ctxt (*context);
1365 try_as_expression = ProcessLiteralArgument (&plain_name, &ctxt);
1366 } else {
1367 try_as_expression = ProcessLiteralArgument (&plain_name, nil);
1368 }
1369 if (try_as_expression.ProcessVariableReferenceCases (referenced_object) == kStringDirectReference) {
1370 if (!is_global_ref && context) {
1371 //referenced_object = *context & '.' & try_as_expression;
1372 referenced_object = (_StringBuffer (context->length() + 1UL + try_as_expression.length()) << *context << '.' << try_as_expression);
1373 }
1374
1375 return is_global_ref?kStringGlobalDeference:kStringLocalDeference;
1376 }
1377 }
1378 }
1379
1380 if (is_func_ref) {
1381 referenced_object = Cut (0, s_length-2UL);
1382 if (referenced_object.IsValidIdentifier(fIDAllowCompound | fIDAllowFirstNumeric)) {
1383 referenced_object = (context? (*context & '.' & referenced_object): (referenced_object)) & '&';
1384 return kStringDirectReference;
1385 }
1386 }
1387 else {
1388 if (IsValidIdentifier(fIDAllowCompound | fIDAllowFirstNumeric)) {
1389 if (context) {
1390 if (BeginsWith (*context) && BeginsWith(kDot, true, context->length())) {
1391 referenced_object = *this;
1392 } else {
1393 referenced_object = (_StringBuffer (context->length() + length() + 1) << *context << '.' << *this);
1394 }
1395
1396 //_String cdot = *context & '.';
1397 //referenced_object = BeginsWith(cdot) ? *this : (cdot & *this);
1398 } else {
1399 referenced_object = *this;
1400 }
1401 return kStringDirectReference;
1402 }
1403 }
1404 }
1405
1406 referenced_object = kEmptyString;
1407 return kStringInvalidReference;
1408 }
1409
1410
1411
1412
1413 /*
1414 ==============================================================
1415 Regular Expression Methods
1416 ==============================================================
1417 */
1418
GetRegExpError(int error)1419 const _String _String::GetRegExpError(int error) {
1420 char buffer[512];
1421 buffer[regerror(error, nil, buffer, 511)] = 0;
1422 return _String("Regular Expression error:") & _String (buffer).Enquote();
1423 }
1424
1425 //=============================================================
1426
FlushRegExp(regex_t * re)1427 void _String::FlushRegExp(regex_t* re) {
1428 regfree(re);
1429 delete re;
1430 }
1431
1432 //=============================================================
1433
PrepRegExp(const _String & pattern,int & error_code,bool case_sensitive,bool throw_errors)1434 regex_t* _String::PrepRegExp(const _String& pattern, int &error_code, bool case_sensitive, bool throw_errors) {
1435 regex_t *res = new regex_t;
1436
1437 error_code = regcomp(res, pattern.get_str(),
1438 REG_EXTENDED | (case_sensitive ? 0 : REG_ICASE));
1439
1440 if (error_code) {
1441 FlushRegExp(res);
1442 if (throw_errors) {
1443 throw (GetRegExpError (error_code));
1444 }
1445 return nil;
1446 }
1447 return res;
1448 }
1449
1450 //=============================================================
1451
RegExpMatch(regex_t const * re,unsigned long start) const1452 const _SimpleList _String::RegExpMatch(regex_t const* re, unsigned long start ) const {
1453 _SimpleList matched_pairs;
1454
1455 if (s_length && start < s_length) {
1456
1457 regmatch_t static_matches [4];
1458 if (re->re_nsub <= 3) {
1459 int error_code = regexec(re, s_data + start, re->re_nsub + 1, static_matches, 0);
1460 if (error_code == 0) {
1461 for (long k = 0L; k <= re->re_nsub; k++) {
1462 matched_pairs << static_matches[k].rm_so + start
1463 << static_matches[k].rm_eo - 1 + start;
1464 }
1465 }
1466 } else {
1467 regmatch_t *matches = new regmatch_t[re->re_nsub + 1];
1468 int error_code = regexec(re, s_data + start, re->re_nsub + 1, matches, 0);
1469 if (error_code == 0) {
1470 for (long k = 0L; k <= re->re_nsub; k++) {
1471 matched_pairs << matches[k].rm_so + start
1472 << matches[k].rm_eo - 1 + start;
1473 }
1474 }
1475 delete[] matches;
1476 }
1477 }
1478
1479 return matched_pairs;
1480 }
1481
1482 //=============================================================
1483
1484
RegExpAllMatches(regex_t const * re) const1485 const _SimpleList _String::RegExpAllMatches(regex_t const* re) const {
1486 _SimpleList matched_pairs;
1487
1488 if (s_length) {
1489
1490 regmatch_t *matches = new regmatch_t[re->re_nsub + 1];
1491 int error_code = regexec(re, s_data, re->re_nsub + 1, matches, 0);
1492 while (error_code == 0) {
1493 long offset = matched_pairs.countitems()
1494 ? matched_pairs.Element(-1) + 1
1495 : 0;
1496
1497 matched_pairs << matches[0].rm_so + offset
1498 << matches[0].rm_eo - 1 + offset;
1499
1500 offset += matches[0].rm_eo;
1501 if (offset < s_length) {
1502 error_code = regexec(re, s_data + offset, re->re_nsub + 1, matches, 0);
1503 } else {
1504 break;
1505 }
1506 }
1507 delete[] matches;
1508 }
1509 return matched_pairs;
1510 }
1511
1512 //=============================================================
1513
_IntRegExpMatch(const _String & pattern,bool case_sensitive,bool handle_errors,bool match_all) const1514 const _SimpleList _String::_IntRegExpMatch (const _String & pattern,
1515 bool case_sensitive, bool handle_errors, bool match_all) const {
1516 if (s_length) {
1517 int err_code = 0;
1518 regex_t* regex = PrepRegExp(pattern, err_code, case_sensitive);
1519 if (regex) {
1520 _SimpleList hits = match_all ? RegExpAllMatches(regex) : RegExpMatch(regex);
1521 FlushRegExp(regex);
1522 return hits;
1523 } else if (handle_errors) {
1524 HandleApplicationError(GetRegExpError(err_code));
1525 }
1526 }
1527 return _SimpleList();
1528 }
1529
1530 //=============================================================
1531
RegExpMatch(const _String & pattern,bool case_sensitive,bool handle_errors) const1532 const _SimpleList _String::RegExpMatch (const _String & pattern,
1533 bool case_sensitive, bool handle_errors) const {
1534 return _IntRegExpMatch (pattern, case_sensitive, handle_errors, false);
1535 }
1536
1537 //=============================================================
1538
RegExpAllMatches(const _String & pattern,bool case_sensitive,bool handle_errors) const1539 const _SimpleList _String::RegExpAllMatches (const _String & pattern,
1540 bool case_sensitive, bool handle_errors) const {
1541 return _IntRegExpMatch (pattern, case_sensitive, handle_errors, true);
1542 }
1543
1544 /*
1545 ==============================================================
1546 Methods
1547 ==============================================================
1548 */
1549
1550
1551 // Compute Adler-32 CRC for a string
1552 // Implementation shamelessly lifted from http://en.wikipedia.org/wiki/Adler-32
Adler32(void) const1553 long _String::Adler32(void) const {
1554
1555 const static unsigned long MOD_ADLER = 65521UL;
1556
1557 unsigned long len = s_length,
1558 a = 1UL,
1559 b = 0UL,
1560 i = 0UL;
1561
1562 while (len) {
1563 unsigned long tlen = len > 5550UL ? 5550UL : len;
1564 len -= tlen;
1565 do {
1566 a += s_data[i++];
1567 b += a;
1568 } while (--tlen);
1569 a = (a & 0xffff) + (a >> 16) * (65536UL - MOD_ADLER);
1570 b = (b & 0xffff) + (b >> 16) * (65536UL - MOD_ADLER);
1571 }
1572
1573 if (a >= MOD_ADLER) {
1574 a -= MOD_ADLER;
1575 }
1576
1577 b = (b & 0xffff) + (b >> 16) * (65536UL - MOD_ADLER);
1578
1579 if (b >= MOD_ADLER) {
1580 b -= MOD_ADLER;
1581 }
1582
1583 return b << 16 | a;
1584 }
1585
1586 //=============================================================
1587
1588
Random(const unsigned long length,const _String * alphabet)1589 _String const _String::Random(const unsigned long length, const _String * alphabet) {
1590 _String random (length);
1591
1592 unsigned long alphabet_length = alphabet?alphabet->s_length:127UL;
1593
1594 if (length > 0UL && alphabet_length > 0UL) {
1595 for (unsigned long c = 0UL; c < length; c++) {
1596 unsigned long idx = genrand_int32 () % alphabet_length;
1597 if (alphabet) {
1598 random.set_char (c, alphabet->char_at(idx));
1599 } else {
1600 random.set_char (c,(char)(1UL+idx));
1601 }
1602 }
1603 }
1604
1605 return random;
1606 }
1607
1608 //=============================================================
1609
LempelZivProductionHistory(_SimpleList * rec) const1610 unsigned long _String::LempelZivProductionHistory (_SimpleList* rec) const {
1611 if (rec) {
1612 rec->Clear();
1613 }
1614
1615 if (empty()) {
1616 return 0UL;
1617 }
1618
1619 if (rec) {
1620 (*rec) << 0;
1621 }
1622
1623 unsigned long current_position = 1UL,
1624 production_history = 1UL;
1625
1626 while (current_position<s_length) {
1627
1628 unsigned long max_extension = 0UL;
1629
1630 for (unsigned long ip = 0; ip < current_position; ip++) {
1631 long sp = ip,
1632 mp = current_position;
1633
1634 while (mp < s_length && s_data [mp] == s_data[sp]) {
1635 mp++;
1636 sp++;
1637 }
1638
1639 if (mp==s_length) {
1640 max_extension = s_length-current_position;
1641 break;
1642 } else {
1643 if ((mp = mp - current_position + 1UL) > max_extension) {
1644 max_extension = mp;
1645 }
1646 }
1647 }
1648
1649 current_position += max_extension;
1650
1651 if (rec) {
1652 (*rec) << current_position - 1UL;
1653 } else {
1654 production_history ++;
1655 }
1656 }
1657
1658 if (rec) {
1659 return rec->lLength;
1660 }
1661
1662 return production_history;
1663 }
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678