1 /* 2 3 HyPhy - Hypothesis Testing Using Phylogenies. 4 5 Copyright (C) 1997-now 6 Core Developers: 7 Sergei L Kosakovsky Pond (spond@ucsd.edu) 8 Art FY Poon (apoon42@uwo.ca) 9 Steven Weaver (sweaver@ucsd.edu) 10 11 Module Developers: 12 Lance Hepler (nlhepler@gmail.com) 13 Martin Smith (martin.audacis@gmail.com) 14 15 Significant contributions from: 16 Spencer V Muse (muse@stat.ncsu.edu) 17 Simon DW Frost (sdf22@cam.ac.uk) 18 19 Permission is hereby granted, free of charge, to any person obtaining a 20 copy of this software and associated documentation files (the 21 "Software"), to deal in the Software without restriction, including 22 without limitation the rights to use, copy, modify, merge, publish, 23 distribute, sublicense, and/or sell copies of the Software, and to 24 permit persons to whom the Software is furnished to do so, subject to 25 the following conditions: 26 27 The above copyright notice and this permission notice shall be included 28 in all copies or substantial portions of the Software. 29 30 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 31 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 32 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 33 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 34 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 35 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 36 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 37 38 */ 39 40 #ifndef _HY_STRINGS_ 41 #define _HY_STRINGS_ 42 43 #include "baseobj.h" 44 #include "hy_types.h" 45 #include "regex.h" 46 #include "wchar.h" 47 48 #define fExtractRespectQuote 0x01 49 #define fExtractRespectEscape 0x02 50 #define fExtractOneLevelOnly 0x04 51 52 #define fIDAllowFirstNumeric 0x01 53 #define fIDAllowCompound 0x02 54 55 #define kStringEnd (-1L) 56 57 enum hy_reference_type { 58 kStringInvalidReference = 0x00, 59 kStringDirectReference = 0x01, 60 kStringLocalDeference = 0x02, 61 kStringGlobalDeference = 0x03 62 }; 63 64 enum hy_string_case { kStringUpperCase, kStringLowerCase }; 65 66 enum hy_string_search_direction { 67 kStringDirectionForward, 68 kStringDirectionBackward 69 }; 70 71 class _SimpleList; 72 class _List; 73 class _ExecutionList; 74 class _StringBuffer; 75 76 class _String : public BaseObj { 77 78 protected: 79 char *s_data; 80 unsigned long s_length; 81 82 /** this value is returned for "failed" 83 access operations that don't throw errors, e.g. getChar */ 84 const static char default_return = '\0'; 85 86 public: 87 88 89 /* 90 ============================================================== 91 Constructors/Destructors/Copiers 92 ============================================================== 93 */ 94 95 /** 96 * The default constuctor 97 * which creates an empty string 98 99 * Revision history 100 - SLKP 20170517 porting from v3 branch 101 */ 102 _String(void); // v3; 103 104 /** 105 * Standard initalization to 0 length and empty data 106 * which creates an empty string 107 108 * Revision history 109 - SLKP 20170517 porting from v3 branch 110 */ 111 virtual void Initialize(bool = true); 112 113 /** 114 * Clear the string (delete allocated memory) 115 * which creates an empty string 116 117 * Revision history 118 - SLKP 20170612 iniital implementation 119 */ 120 virtual void Clear(void); 121 122 /** 123 * Construct a string representation of a long interger 124 * @param number: the number to convert to a string 125 126 * Revision history 127 - SLKP 20170517 reviewed while porting from v3 branch 128 */ 129 _String(long const number); 130 131 /** 132 * Construct a string long enough to hold the specified # of chars 133 * Contents will be initialized to 0 134 * @param lengths: the number of chars to store 135 * @param buffer: if specified, use an externally allocated buffer (to avoid memory calls) 136 137 * Revision history 138 - SLKP 20170517 reviewed while porting from v3 branch 139 - SLKP 2020921 added the option to use external buffers 140 */ 141 _String(const unsigned long sL, char *buffer = nil); 142 143 /** 144 * Construct a string representation of a hyFloat(double) to string, 145 * using a format string (default is to use PRINTF_FORMAT_STRING formatting) 146 * @param number : The floating number to convert to string 147 * @param format : The C-style format string to use for the conversion 148 149 * Revision history 150 - SLKP 20170517 reviewed while porting from v3 branch 151 */ 152 _String(const hyFloat number, const char *format = nil); 153 154 /** 155 * Construct a string representation of a hyFloat(double) to string, 156 * using with the required digits of precision ("%.[N]g") specified 157 * @param number : The floating number to convert to string 158 * @param unsigned char : The number of significant digits 159 160 * Revision history 161 - SLKP 20181009 initial implementation 162 */ 163 _String(const hyFloat number, unsigned char digits_of_precision); 164 165 /** 166 * A RHS copy constructor 167 * @param str : the string to copy from 168 169 * Revision history 170 - SLKP 20170517 reviewed while porting from v3 branch 171 */ 172 _String(const _String &str); 173 174 /** 175 * A RHS move constructor 176 * @param str : the string to copy from 177 178 * Revision history 179 - SLKP 20180920 initial implementation 180 */ 181 _String(_String && str); 182 183 /** 184 * A RHS move constructor for string buffer 185 * @param str : the string to copy from 186 187 * Revision history 188 - SLKP 20180920 initial implementation 189 */ 190 _String(_StringBuffer && str); 191 192 /** 193 * The purpose of this constructor is a "move" contents from a dynamically 194 * allocated string to a new string variable; it does so without allocating 195 * memory (this is a hack for C++ move semantics) 196 * After a call to this dynamic_string will be DELETED, so it CANNOT be used 197 * again 198 * @param dynamic_string: the source string to move data from 199 200 * Revision history 201 - SLKP 20170517 reviewed while porting from v3 branch 202 */ 203 _String(_String *dynamic_string, bool dynamic = true); 204 205 /** 206 * Copy a part of another string into this string 207 * 208 209 * @param str : The source string 210 * @param start : Start of the range to copy 211 * @param end : End of the range to copy 212 * @sa NormalizeRange for a discussion on ranges 213 214 * Revision history 215 - SLKP 20170517 reviewed while porting from v3 branch 216 */ 217 _String(const _String &str, long start, long end); 218 219 /** 220 * Create a string with the contents of a C-style (0-terminated) 221 * char array (they are copied) 222 223 * @param c_string : The source C char array 224 * Revision history 225 - SLKP 20170517 reviewed while porting from v3 branch 226 */ 227 _String(const char *c_string); // v3 228 229 /** 230 * Create a string with the contents of a C-style (0-terminated) 231 * wide-char array (they are copied); only single byte characters 232 * are copied 233 234 * @param wc_string : The source C wchar_t char array 235 * Revision history 236 - SLKP 20170517 reviewed while porting from v3 branch 237 */ 238 239 _String(const wchar_t *wc_string); 240 /** 241 * Create a string with the from a single charcater 242 * @param c : The source character 243 * Revision history 244 - SLKP 20170517 reviewed while porting from v3 branch 245 */ 246 _String(const char c); 247 248 /** 249 * Create a string with several consecutive copies of the source string 250 * @param str : the source string 251 * @param copies : the number of copies 252 * Revision history 253 - SLKP 20170517 reviewed while porting from v3 branch 254 */ 255 _String(const _String &str, unsigned long copies); 256 257 /** 258 * Create a string with the contents of an open file 259 * the file will be rewound and is assumed to be open for reading 260 261 * @param file : the source file handle 262 * @param read_this_many: if -1, then rewind the file and read all of its 263 contents, otherwise read 'read_this_many' characters from current position 264 * Revision history 265 - SLKP 20170517 reviewed while porting from v3 branch 266 - SLKP 20170623 added the option to read a specified number of chars 267 from the current position of an open file (to handle fscanf 268 specifically); also added a check that the # of chars read 269 was the same as the one requested. 270 */ 271 _String(FILE *file, long read_this_many = -1L); 272 273 /** 274 * A desctructor which respects reference counts 275 * Revision history 276 - SLKP 20170517 reviewed while porting from v3 branch 277 */ 278 virtual ~_String(void); 279 280 /** 281 * Create a dynamically allocated (shallow) copy of this object 282 * @return a shallow copy of this object (for strings, shallow == deep copy) 283 284 * Revision history 285 - SLKP 20170517 reviewed while porting from v3 branch 286 */ 287 virtual BaseRef makeDynamic(void) const; 288 289 /** Create a shallow copy of the argument (assumed castable to _String*) 290 in this object; this will be cleared out prior to this operation 291 292 293 @param source: the string to duplicate 294 295 * Revision history 296 - SLKP 20170517 reviewed while porting from v3 branch 297 [CHANGE-NOTE SLKP, this behavior may not be consistently enforced in old 298 code] 299 300 */ 301 virtual void Duplicate(BaseRefConst source); 302 303 /** Create a shallow copy of the argument 304 305 @param rhs : the right hand side of the assignment 306 307 * Revision history 308 - SLKP 20170517 reviewed while porting from v3 branch 309 [CHANGE-NOTE SLKP, changed parameter type from _String to _String const&] 310 311 */ 312 void operator=(_String const &rhs); 313 314 void operator=(_String &&rhs); 315 316 /* 317 ============================================================== 318 Getters and setters 319 ============================================================== 320 */ 321 322 /** 323 * Retrieve a writable element at index x. 324 * Internal error results if [] is called on an invalid index 325 326 * @param index : the index (0-based) of a character to retrieve 327 * @return : reference to the character at the specified index 328 * Revision history 329 - SLKP 20170517 reviewed while porting from v3 branch 330 [CHANGE-NOTE SLKP 20170517, used to ignore errored indices] 331 */ 332 virtual char &operator[](long index); 333 334 /** 335 * Retrieve a read-only element at index x. If the index is invalid, 336 return default_return (\0) 337 338 * @param index : the index (0-based) of a character to retrieve 339 if index < 0, return a character this far from the end; 340 e.g. -1 returns the last character (for non-empty strings) 341 -2 : the second to the last character (for strings with 2 or 342 more chars), etc 343 * @return : the character at the specified index or default_return 344 * @sa get_char 345 * Revision history 346 - SLKP 20170517 reviewed while porting from v3 branch 347 [CHANGE-NOTE SLKP 20170517, used to have unsigned long argument] 348 - SLKP 20170623 handling negative indices; SEMANTICS CHANGE 349 350 */ 351 char operator()(long index) const; 352 353 /** 354 * Retrieve a read-only element at index x. 355 * same as s(i), but with this function you don't have to write (*s)(i) for 356 pointers 357 358 * @param index : the index (0-based) of a character to retrieve 359 * @return : the character at the specified index or default_return 360 * @sa operator () 361 * Revision history 362 - SLKP 20170517 reviewed while porting from v3 branch 363 */ get_char(long index)364 virtual char get_char(long index) const { 365 if (index >= 0L && index < s_length) { 366 return s_data[index]; 367 } 368 return _String::default_return; 369 } 370 371 /** 372 * Retrieve a read-only element at index x. 373 * WITHOUT ANY RANGE CHECKING 374 * @param index : the index (0-based) of a character to retrieve 375 * @return : the character at the specified index or default_return 376 * @sa operator () 377 * @sa get_char 378 * Revision history 379 - SLKP 20170616 initial implementation 380 */ char_at(unsigned long idx)381 inline char char_at(unsigned long idx) const { return s_data[idx]; } 382 383 /** The sole purpose of this function is to allow warning-free compilation of 384 calls like array [string.getUChar (i)], otherwise you'd get warnings about 385 atypical indexing types 386 387 * Revision history 388 - SLKP 20170517 reviewed while porting from v3 branch 389 */ get_uchar(long i)390 inline unsigned char get_uchar(long i) const { 391 return (unsigned char)s_data[i]; 392 } 393 394 /** Get the length of this string 395 @return the length of the string 396 * Revision history 397 - SLKP 20170517 reviewed while porting from v3 branch 398 */ 399 length(void)400 inline unsigned long length(void) const { return s_length; } 401 402 /** Check if the string is emtpy 403 * Revision history 404 - SLKP 20170615 initial implementation 405 */ 406 empty(void)407 inline bool empty(void) const { return s_length == 0UL || s_data == nil; } 408 409 /** Check if the string is non-emtpy 410 * Revision history 411 - SLKP 20170621 initial implementation 412 */ 413 nonempty(void)414 inline bool nonempty(void) const { return !empty(); } 415 416 /** Store the supplied character in a given index; functionally almost the 417 same as str[index] = date, but neater to write than (*str)[index] = data, and 418 this also ignores invalid indices 419 420 * Revision history 421 - SLKP 20170517 reviewed while porting from v3 branch 422 [CHANGE-NOTE SLKP 20170517, used to have 'long' argument] 423 */ 424 void set_char(unsigned long index, char const data); 425 426 /** Retrieve the read-only char * for the string contents 427 A convenience function to avoid writing (const char*) (*this) 428 429 @return string data (could be null!, no checks performed) 430 @sa operator char * 431 432 * Revision history 433 - SLKP 20170608 reviewed while porting from v3 branch 434 */ 435 const char *get_str(void) const; 436 437 /* 438 ============================================================== 439 Type conversions 440 ============================================================== 441 */ 442 443 /** Retrieve the read-only char * for the string contents 444 445 @return string data (could be null!, no checks performed) 446 @sa get_str 447 448 * Revision history 449 - SLKP 20170608 reviewed while porting from v3 branch 450 */ 451 operator const char *(void)const; 452 453 /** 454 * Converts a string of form "[\d\.]\+" into a floating point number 455 * via a call to strtod 456 * \n\n \b Example: "3.14" becomes 3.14 457 458 * Revision history 459 - SLKP 20170608 reviewed while porting from v3 branch 460 */ 461 462 hyFloat to_float(void) const; 463 464 /** 465 * Converts a string into an integer number 466 * via a call to strtol 467 * \n\n \b Example: "3.14" becomes 3 468 469 * Revision history 470 - SLKP 20170608 reviewed; was not in v3 branch 471 */ 472 473 long to_long(void) const; 474 475 /** 476 * Obtain a string representation of this string 477 * Add a reference counter and return 'this' 478 @return this string with an extra reference counter 479 * Revision history 480 - SLKP 20170608 reviewed while porting from v3 branch 481 */ 482 virtual BaseRef toStr(unsigned long = 0UL); 483 484 /** 485 * Turns seconds into a time string in the form "hh:mm:ss" 486 * \n\n \b Example: 487 * \code 488 * long time_diff = 459132; 489 * _String("").FormatTimeString(time_diff); 490 * \endcode 491 * @param time_diff Seconds of time 492 * @return duration string to "127:32:12" in the example. 493 * Revision history 494 - SLKP 20170616; reviewed while porting from the v3 branch 495 */ 496 497 static const _String FormatTimeString(long const); 498 499 /* 500 ============================================================== 501 Comparisons 502 ============================================================== 503 */ 504 505 /** Perform a lexicographic comparison of two strings 506 @param rhs right hand side of the comparison 507 @returns less, equal, greater 508 * Revision history 509 - SLKP 20170517 reviewed while porting from v3 branch 510 [CHANGE-NOTE SLKP 20170517, 511 return type from char to hyComparisonType 512 argument from _String const* to _String const & ] 513 514 */ 515 hyComparisonType Compare(_String const &rhs) const; 516 517 /** Perform a lexicographic comparison of two strings ignoring case. 518 Same as casting both strings to lower case and running Compare 519 520 @param rhs right hand side of the comparison 521 522 @returns less, equal, greater 523 * Revision history 524 - SLKP 20170517 initial implementation 525 526 */ 527 hyComparisonType CompareIgnoringCase(_String const &rhs) const; 528 529 /** Obvious lexicographic comparisons, mostly making calls to Compare 530 * Revision history 531 - SLKP 20170517 reviewed while porting from v3 branch 532 */ 533 bool operator==(const _String &) const; 534 bool operator>(const _String &) const; 535 bool operator<(const _String &) const; 536 bool operator>=(const _String &) const; 537 bool operator<=(const _String &) const; 538 bool operator!=(const _String &) const; 539 bool Equal(const _String &) const; 540 bool EqualIgnoringCase(const _String &) const; 541 bool Equal(const char) const; 542 543 /** match this string to a shell style pattern where the wildchar specifies 544 "match zero or more of anything" 545 546 @param pattern : the pattern to match 547 @param wildchar : the charcter to treat as a wild char 548 @param start_this : start matching at this position in "this" 549 @param start_pattern : start matching at this position in *pattern* 550 @param wildchar_matches: if given, push indices of ranges that matched wildcards 551 552 @return did the string match the pattern 553 554 * Revision history 555 - SLKP 20170517 reviewed while porting from v3 branch 556 [CHANGE-NOTE SLKP 20170517 change pattern type to _String const& from _String 557 const *] 558 - SLKP 20181024 add the optional _SimpleList argument to store the index ranges 559 which matched the wildcards 560 561 */ 562 bool EqualWithWildChar(_String const &pattern, char const wildchar = '*', 563 unsigned long start_this = 0UL, 564 unsigned long start_pattern = 0UL, 565 _SimpleList * wildchar_matches = nil) const; 566 567 /* 568 ============================================================== 569 Content-modification and extraction methods 570 ============================================================== 571 */ 572 573 /** 574 * String concatenation operator, returns "thisrhs" 575 * \n\n \b Example: \code _String new_string = _String("A") & _String("B") 576 \endcode 577 * @param rhs : the suffix to concatenate to this 578 * @return "AB" 579 * @sa EscapeAndAppend() 580 581 * Revision history 582 - SLKP 20170519 reviewed while porting from v3 branch 583 */ 584 _String operator&(const _String &rhs) const; 585 586 /** 587 * Removes part of string that is between the two specified indices 588 * \n\n \b Example: \code _String new_string = _String("AAABBBCCC").Chop(3,5) 589 \endcode 590 * @param start The starting index to chop from 591 * @param end The ending index to chop from 592 * @return "AAACCC" 593 * @sa Cut() 594 * @sa Trim() 595 * Revision history 596 - SLKP 20170519 reviewed while porting from v3 branch 597 */ 598 _String Chop(long start, long end) const; 599 600 /** 601 * Cuts part of string that is between the two specified indices (0-bases, 602 inclusive) 603 * \n\n \b Example: \code _String new_string = _String("AAABBBCCC").Cut(3,5) 604 \endcode 605 * @param start The starting index to cut from 606 * @param end The ending index to cut from 607 * @return "BBB" 608 * @sa Chop() 609 * @sa Trim() 610 * Revision history 611 - SLKP 20170519 reviewed while porting from v3 branch 612 */ 613 _String Cut(long, long) const; 614 615 /** 616 * Delete a range of chars from the string (0-based, inclusive indices) 617 * \n\n \b Example: \code _String("AAABBBCCC").Delete(3,5) \endcode 618 * @param start The starting index to delete from 619 * @param end The ending index to delete to 620 * @return Transforms String to "AAACCC" 621 * @sa Chop() 622 * Revision history 623 - SLKP 20170519 reviewed while porting from v3 branch 624 */ 625 void Delete(long, long); 626 627 /** 628 * 629 * In-place reversed string 630 * \n s[0]...s[sLength-1] => s[sLength-1]...s[0] 631 * \n\n \b Example: \code _String("ABC").Flip() \endcode 632 * @return nothing 633 * @sa Reverse 634 * Revision history 635 - SLKP 20170519 reviewed while porting from v3 branch 636 */ 637 void Flip(void); 638 639 /** 640 * 641 * Return a reversed string, leaving the original unchanged 642 * \n s[0]...s[sLength-1] => s[sLength-1]...s[0] 643 * \n\n \b Example: \code _String("ABC").Reverse() \endcode 644 * @return "CBA" 645 * Revision history 646 - SLKP 20170519 reviewed ; (was missing in v3) 647 */ 648 _String Reverse(void) const; 649 650 /** 651 * Insert a char at a given position 652 * \n\n \b Example: \code _String("AA").insert('C',0) \endcode 653 * @param c Character to insert 654 * @param where The position (0-based) to insert the character into, 655 values less than 0 append to the string 656 * @return "CAA" 657 * Revision history 658 - SLKP 20170519 reviewed while porting from v3 branch 659 */ 660 661 void Insert(char, long); 662 663 /** 664 * Trim the string in place to retain characters beween the two indices 665 (0-bases, inclusive) 666 * \n\n \b Example: \code _String("AAABBBCCC").Trim(3,5) \endcode 667 * @param start The starting index to cut from 668 * @param end The ending index to cut from 669 * @return Transforms string to "BBB" 670 * @sa Cut() 671 * @sa Chop() 672 * Revision history 673 - SLKP 20170519 reviewed while porting from v3 branch 674 [CHANGE-NOTE SLKP 20170519 remove the bool argument for memory handling] 675 */ 676 677 virtual void Trim(long, long); 678 679 /** 680 * Converts string to a particular case 681 @param conversion_type: which case ? kStringUpperCase or kStringLowerCase 682 683 * Revision history 684 -SLKP 20170614 reviewed while porting from v3 branch 685 686 [CHANGE-NOTE SLKP 20170614 consolidated LoCase and UpCase; 687 changed behavior from in-place to returning a modified string 688 ] 689 */ 690 const _String ChangeCase(hy_string_case conversion_type) const; 691 void ChangeCaseInPlace(hy_string_case conversion_type); 692 693 /** 694 * Returns a list from a string split by a substr 695 * \n\n \b Example: _String("hyphy, gattaca, protease").Tokenize(",") will 696 create a list {"hyphy","gattaca","protease"} 697 * @param splitter The substring to split the string by 698 * @return A point to a *_List that holds a list of the resultant strings. 699 Retrieve one by list->lData[i] 700 * Revision history 701 -SLKP 20170615 reviewed while porting from v3 branch; previous 702 impelementation would not handle empty string splitter; 703 ] 704 */ 705 const _List Tokenize(_String const &splitter) const; 706 707 /** 708 * Returns a list from a string split by a any of the valid chars 709 * @param splitter a look table of characters 710 * @return A point to a *_List that holds a list of the resultant strings. Retrieve one by list->lData[i] 711 * Revision history 712 -SLKP 20170912 initial impementation 713 714 */ 715 const _List Tokenize (const bool splitter[256]) const; 716 /** 717 * Decorates the string with quotes 718 719 * @param quote_char which character to use as a "quote" 720 * @return quote_char + *this + quote_char 721 * Revision history 722 -SLKP 20170616 reviewed while porting from v2.3 branch 723 - 724 */ 725 726 727 const _String Enquote(char quote_char = '\'') const; 728 729 /** 730 * Decorates the string with open/close chars 731 732 * @param quote_char which character to use as a "quote" 733 * @return open_char + *this + close_char 734 * Revision history 735 -SLKP 20170626 initial implementation 736 - 737 */ 738 const _String Enquote(char open_char, char close_char) const; 739 740 /** 741 * Returns a copy of the string with all spaces removed 742 * \n\n \b Example: \code _String(" h y p h y").KillSpaces \endcode 743 * @param result The string that will have stripped spaces. 744 * @sa CompressSpaces() 745 * @return The example would return "hyphy" 746 * Revision history 747 -SLKP 20170616 reviewed while porting from v3 branch; changed from in 748 place to return by value 749 */ 750 const _String KillSpaces(void) const; 751 752 /** 753 * Replaces all runs of white spaces with a single ' ' character 754 * \n\n \b Example: \code _String(" h y p h y").CompressSpaces() 755 \endcode 756 * @return Example would return the string to " h y p h y" 757 * @sa KillSpaces() 758 * Revision history 759 -SLKP 20170616 reviewed while porting from v3 branch; changed from in 760 place to return by value 761 */ 762 const _String CompressSpaces(void) const; 763 764 /* 765 ============================================================== 766 Search functions 767 ============================================================== 768 */ 769 770 /** 771 * Find first occurence of the string between "start" and "end" (inclusive) 772 * \n\n \b Example: \code _String ("AABBCC").Find("B")\endcode 773 * @param pattern The substring to find 774 * @param start The 0-based index to start searching from 775 * @param end The 0-based index to search to (inclusive); -1 : end of string 776 * @return Returns the index of the first instance of the pattern, kNotFound 777 (<0) if not found. 2 in the example 778 @sa FindBackwards 779 * Revision history 780 - SLKP 20170608 reviewed while porting from v3 branch 781 */ 782 long Find(const _String &pattern, long start = 0L, 783 long end = kStringEnd) const; 784 785 /** 786 * Find first occurence of the string between "start" and "end" (inclusive) 787 * looking backwards (i.e. last occurrence reported) 788 * \n\n \b Example: \code _String ("AABBCC").Find("B")\endcode 789 * @param pattern The substring to find 790 * @param start The 0-based index to start searching from 791 * @param end The 0-based index to search to (inclusive); -1 : end of string 792 * @return Returns the index of the first instance of the pattern, kNotFound 793 (<0) if not found. 3 in the example 794 @sa Find 795 * Revision history 796 - SLKP 20170608 reviewed while porting from v3 branch 797 */ 798 799 long FindBackwards(const _String &pattern, long start = 0L, 800 long end = kStringEnd) const; 801 /** 802 * Find first occurence of the character between "start" and "end" (inclusive) 803 * Uses a sentinel linear search 804 * \n\n \b Example: \code _String ("AABBCC").Find('B')\endcode 805 * @param p The character to find 806 * @param start The 0-based index to start searching from 807 * @param end The 0-based index to search to (inclusive); -1 : end of string 808 * @return Returns the index of the first instance of the pattern, kNotFound 809 (<0) if not found. 2 in the example 810 811 * Revision history 812 - SLKP 20170608 reviewed while porting from v3 branch 813 */ 814 long Find(const char p, long start = 0L, long to = kStringEnd) const; 815 816 /** 817 * Find first occurence of the any of the characters marked in the lookup buffer (0/1) between "start" and "end" (inclusive) 818 * Uses a sentinel linear search 819 * \n\n \b Example: \code _String ("AABBCC").Find('B')\endcode 820 * @param lookup The lookup table whioch marks which characters are value 821 * @param start The 0-based index to start searching from 822 * @param end The 0-based index to search to (inclusive); -1 : end of string 823 * @return Returns the index of the first instance of the pattern, kNotFound (<0) if not found. 2 in the example 824 825 * Revision history 826 - SLKP 20170912 introduced 827 */ 828 829 long Find (const bool lookup[256] , long start = 0L, long to = kStringEnd) const ; 830 long FindAnyCase (const bool lookup[256] , long start = 0L, long to = kStringEnd) const ; 831 /** 832 * Find first occurence of the string between "start" and "end" (inclusive) 833 * @see Find() for parameter explanation 834 * Revision history 835 - SLKP 20170612; reviewed and modifed to be the same as Find with case 836 normalization while porting from the v3 branch 837 */ 838 839 840 841 long FindAnyCase(_String const &pattern, long start = 0L, 842 long to = kStringEnd) const; 843 844 /** 845 * Replace string `pattern` with string `replace`, all occurences true/false 846 * \n\n \b Example: \code _String("AAABBBCCCBBB").Replace("BBB","ZZ",true) 847 \endcode 848 * @param pattern The substring to replace 849 * @param replace The substring to replace the value with 850 * @param flag If true, replace all. 851 * @return "AAAZZCCCZZ" 852 853 * Revision history 854 - SLKP 20170614; reviewed while porting from the v3 branch 855 */ 856 857 const _String Replace(const _String &pattern, const _String& replace, 858 bool replace_all) const; 859 860 /** 861 * Locate the first non-space character of the string 862 * \n\n \b Example: \code _String (" hyphy").FirstNonSpaceIndex()\endcode 863 * @param start Beginning of string search 864 * @param end End of string search 865 * @param direction Choose between kStringDirectionForward and 866 kStringDirectionBackwards 867 * @return The char of the first non-space, in the example, 'h'. 868 * @see FirstNonSpaceIndex() 869 870 * Revision history 871 - SLKP 20170614; reviewed while porting from the v3 branch 872 [CHANGE-NOTE SLKP 20170614 changed to a call to _FindFirstIndexCondtion] 873 874 */ 875 876 char FirstNonSpace( 877 long start = 0, long end = kStringEnd, 878 hy_string_search_direction direction = kStringDirectionForward) const; 879 880 /** 881 * Locate the first non-space character of the string 882 * \n\n \b Example: \code _String (" hyphy").FirstNonSpaceIndex()\endcode 883 * @param start Beginning of string search 884 * @param end End of string search 885 * @param direction Choose between kStringDirectionForward and 886 kStringDirectionBackwards 887 * @return The index of the first non-space, in the example, 4. 888 * @see FirstNonSpaceIndex() 889 890 * Revision history 891 - SLKP 20170614; reviewed while porting from the v3 branch 892 893 */ 894 long FirstNonSpaceIndex( 895 long start = 0, long end = kStringEnd, 896 hy_string_search_direction direction = kStringDirectionForward) const; 897 898 /** 899 * Locate the first space character of the string 900 * \n Returns index of first space character 901 * \n\n \b Example: \code _String ("h yphy").FirstSpaceIndex()\endcode 902 * @param start starting index 903 * @param end ending index to search 904 * @param direction Choose between kStringDirectionForward and 905 kStringDirectionBackwards 906 * @return Returns the index of the first non-space. 1 in the example. 907 * @sa FirstSpaceIndex() 908 909 * Revision history 910 - SLKP 20170614; reviewed while porting from the v3 branch 911 [CHANGE-NOTE SLKP 20170614 changed to a call to _FindFirstIndexCondtion] 912 */ 913 long FirstSpaceIndex( 914 long start = 0, long end = kStringEnd, 915 hy_string_search_direction direction = kStringDirectionForward) const; 916 917 /** 918 * Locate the first non-space character of the string following one or more 919 spaces 920 * \n Returns index of first space character 921 * \n\n \b Example: \code _String ("h yphy").FirstSpaceIndex()\endcode 922 * @param start starting index 923 * @param end ending index to search 924 * @param direction Choose between kStringDirectionForward and 925 kStringDirectionBackwards 926 * @return Returns the index of the first non-space. 1 in the example. 927 * @sa FirstSpaceIndex() 928 * Revision history 929 - SLKP 20170614; reviewed while porting from the v3 branch 930 [CHANGE-NOTE SLKP 20170614 seems that the search in reverse direction was not 931 implemented correctly] 932 */ 933 934 long FirstNonSpaceFollowingSpace( 935 long start = 0, long end = kStringEnd, 936 hy_string_search_direction direction = kStringDirectionForward) const; 937 938 /** 939 * Checks to see if String begins with substring 940 * \n\n \b Example: \code _String("hyphy").BeginsWith("h")\endcode 941 * @param pattern Substring 942 * @param case_sensitive If true, it will be case sensitive. Default is case 943 sensitive. 944 * @param from: start matching *this at this position 945 * @return true if string begins with substring. Example returns true 946 * @sa EndsWith() 947 * Revision history 948 - SLKP 20170615; reviewed while porting from the v3 branch, renamed to camel 949 case (not cheap) added the third argument to check for match from a given 950 position in this 951 */ 952 953 bool BeginsWith (_String const& pattern, bool case_sensitive = true, unsigned long from = 0UL) const; 954 bool BeginsWith (bool const lookup[256], bool case_sensitive = true, unsigned long from = 0UL) const; 955 956 /** 957 * Checks to see if String ends with substring 958 * \n\n \b Example: \code _String("hyphy").EndsWith("hy")\endcode 959 * @param pattern Substring 960 * @param case_sensitive If true, it will be case sensitive. Default is case 961 sensitive. 962 * @return true if string ends with substring. Example returns true 963 * @sa BeginsWith() 964 * Revision history 965 - SLKP 20170616; reviewed while porting from the v3 branch, renamed to 966 camel case (not cheap) 967 */ 968 bool EndsWith(_String const &pattern, bool case_sensitive = true) const; 969 970 /** 971 * Checks to see if String starts with substring and it can't be extended to 972 make a valid ident 973 * by checking the next character only 974 * \n\n \b Example: \code 975 _String("return;").StarsWithAndIsNotAnIdent("return"); 976 _String("return_me").StarsWithAndIsNotAnIdent("return")\endcode 977 * @param pattern the prefix pattern 978 * @return true if string starts with substring and can't be extended to a 979 identifier. Example 1 would return true, and example 2 would return false 980 * Revision history 981 - SLKP 20170616; reviewed while porting from the v2.3 branch, renamed to 982 camel case (not cheap) 983 * @sa BeginsWith() 984 */ 985 bool BeginsWithAndIsNotAnIdent(_String const &) const; 986 /* 987 ============================================================== 988 Parser-related functions 989 TODO: possible deprecate when the move to the grammar is effected 990 ============================================================== 991 */ 992 993 /** 994 * Starting at index [argument 1], 995 * find a span that encloses an expression (nested) delimited by char[argument 996 2] 997 * and char[argument 3] (e.g. {}, ()) respecting quotes (argument 4), and 998 allowing 999 * escaped characters (argument 5) 1000 * \n SLKP 20090803 1001 * 1002 * @param &from The starting position of the segment will be stored here 1003 * @param open The first character to look for. For example, and open bracket 1004 '[' or open paranthesis '(' 1005 Can also be any object that supports char == object checks 1006 * @param close The first character to look for. For example, and open bracket 1007 ']' or open paranthesis ')' 1008 Can also be any object that supports char == object checks 1009 * @param options: a bitmask of options, if fExtractRespectQuote is mixed in 1010 then do not look within enquoted parts of the string if set if 1011 fExtractRespectEscape is mixed in do not consider \char as matches to char 1012 when searching 1013 * 1014 * @return Ending position is returned 1015 * kNotFound is returned if the starting character could not be found or the 1016 expression did not terminate before the end of the string 1017 * 1018 * Revision history 1019 - SLKP 20170614; reviewed while porting from the v2.3 branch; convered the 1020 two bool flags to a bit-mask so that the calls can be more explict 1021 - SLKP 20170615; included support for singly quoted literals 1022 - SLKP 20171211: added support for generic callbacks to check whether or not the final character has been found 1023 */ 1024 1025 //============================================================= 1026 1027 ExtractEnclosedExpression(long & from,DELIM open,DELIM close,int options)1028 template <class DELIM> long ExtractEnclosedExpression (long& from, DELIM open, DELIM close, int options) const { 1029 long current_position = from, 1030 current_level = 0L; 1031 1032 bool respect_quote = options & fExtractRespectQuote, 1033 respect_escape = options & fExtractRespectEscape, 1034 one_level_only = options & fExtractOneLevelOnly, 1035 do_escape = false; 1036 1037 char quote_state = '\0', 1038 this_char = get_char (current_position); 1039 1040 while (this_char) { 1041 bool check_quote = false; 1042 1043 if (do_escape) { 1044 do_escape = false; 1045 } else { 1046 // also need to handle cases when quotes are in the open / close set 1047 1048 if ((this_char == '"' || this_char == '\'') && respect_quote && !do_escape) { 1049 if (quote_state == '\0') { 1050 check_quote = true; 1051 quote_state = this_char; 1052 } else { 1053 if (this_char == quote_state) { 1054 check_quote = true; 1055 quote_state = '\0'; 1056 } 1057 } 1058 } 1059 if (open == this_char && (check_quote || quote_state == '\0')) { 1060 // handle the case when close and open are the same 1061 if (current_level == 1L && close == this_char && from < current_position) { 1062 return current_position; 1063 } 1064 if (current_level == 0L) { 1065 from = current_position; 1066 current_level++; 1067 } else { 1068 if (!one_level_only) { 1069 current_level++; 1070 } 1071 } 1072 1073 } else if (close == this_char && (check_quote || quote_state == '\0')) { 1074 current_level--; 1075 if (current_level == 0L && from < current_position) { 1076 return current_position; 1077 } 1078 if (current_level < 0L) { 1079 return kNotFound; 1080 } 1081 } else if (this_char == '\\' && respect_escape && quote_state != '\0' && !do_escape) { 1082 do_escape = true; 1083 } 1084 } 1085 1086 this_char = get_char (++current_position); 1087 1088 } 1089 1090 // check if \0 is a valid terminator 1091 1092 if (close == this_char) { 1093 if (current_level == 1L && from < current_position) { 1094 return current_position; 1095 } 1096 } 1097 1098 return kNotFound; 1099 } 1100 1101 /** 1102 * Starting at a 0-based index [argument 1], 1103 * find a span that terminates in one of the characters in [argument 2], while 1104 * respecting (), [], {}, "" and escapes 1105 * \n SLKP 20090805 1106 * @param start the index to start the search from 1107 * @param terminator The terminator to find 1108 * @return kNotFound is returned if the starting character could not be found 1109 or the expression did not terminate before the end of the string 1110 * @sa IsALiteralArgument() 1111 * Revision history 1112 - SLKP 20170615 reviewed while porting from the v2.3 branch; 1113 for the string; included support for singly quoted 1114 literals; cleaned up the logic, and fixed broken logic for terminator > 1 1115 char long 1116 1117 - SLKP 20180921 converted into a template to make it possible to search 1118 for multiple terminators 1119 */ 1120 FindTerminator(long start,TERMINATOR const & terminator)1121 template <typename TERMINATOR> long FindTerminator(long start, TERMINATOR const &terminator) const{ 1122 1123 long current_position = start; 1124 1125 1126 long curly_depth = 0L, 1127 square_depth = 0L, 1128 paren_depth = 0L; 1129 1130 bool do_escape = false; 1131 char quote_state = '\0'; 1132 1133 while (current_position < s_length) { 1134 char this_char = s_data[current_position]; 1135 if (do_escape) { 1136 do_escape = false; 1137 } else { 1138 if ((this_char == '"' || this_char == '\'') && !do_escape) { 1139 if (quote_state == '\0') { 1140 quote_state = this_char; 1141 } else { 1142 if (this_char == quote_state) { 1143 quote_state = '\0'; 1144 } 1145 } 1146 } else { 1147 if (quote_state == '\0') { 1148 1149 switch (this_char) { 1150 case '(': 1151 paren_depth ++; 1152 current_position++; 1153 continue; 1154 case ')': 1155 if (paren_depth > 0L) { 1156 paren_depth --; 1157 current_position++; 1158 continue; 1159 } 1160 break; 1161 case '[': 1162 square_depth++; 1163 current_position++; 1164 continue; 1165 case ']': 1166 if (square_depth > 0L) { 1167 square_depth --; 1168 current_position++; 1169 continue; 1170 } 1171 break; 1172 case '{': 1173 curly_depth++; 1174 current_position++; 1175 continue; 1176 case '}': 1177 if (curly_depth > 0L) { 1178 curly_depth --; 1179 current_position++; 1180 continue; 1181 } 1182 break; 1183 } 1184 1185 if (curly_depth == 0L && square_depth == 0L && paren_depth == 0L) { 1186 if (BeginsWith (terminator, true, current_position)) { 1187 return current_position; 1188 } 1189 } 1190 } else { 1191 if (this_char == '\\' && quote_state != '\0' && !do_escape) { 1192 do_escape = true; 1193 } 1194 } 1195 } 1196 } 1197 current_position++; 1198 } 1199 1200 return kNotFound; 1201 } 1202 1203 /** 1204 * Strips quotes from around the string if present (in place) 1205 * \n\n \b Example: \code _String("\"hyphy\"").StripQuotes("")\endcode 1206 * @param open_char : the opening quote char 1207 * @param close_char : the closing quote char 1208 * @return : true if the string was enquoted and the quotes had been stripped 1209 1210 * Revision history 1211 - SLKP 20170616 reviewed while porting from the v3 branch 1212 - SLKP 20170702 return TRUE if successfully stripped quotes 1213 1214 */ 1215 bool StripQuotes(char open_char = '"', char close_char = '"'); 1216 1217 /** 1218 * Strips quotes from around the string if present (in place) for multiple delimiters at once 1219 * \n\n \b Example: \code _String("\"hyphy\"").StripQuotes("\"'","\"'")\endcode 1220 * @param open_char : the opening quote chars (paired with close_char) 1221 * @param close_char : the closing quote char (paired with open char) 1222 * @return : true if the string was enquoted and the quotes had been stripped 1223 1224 * Revision history 1225 - SLKP 20200508 initial 1226 1227 */ 1228 bool StripQuotes(char const *, char const *); 1229 1230 /** 1231 * Checks if String is valid ident 1232 * \n A valid ident is any alphanumeric or '_' 1233 * \n\n \b Example: '$hyphy' is not legal. 'hy_phy' is legal. 1234 * @param options if fIDAllowCompound is set, treat 'x.y.z' as a valid 1235 identifier, if fIDAllowFirstNumeric is set, consider '2x' a valid identifier 1236 * @sa ConvertToAnIdent(); 1237 * Revision history 1238 - SLKP 20170616 reviewed while porting from the v3 branch 1239 changed the argument to bitmask, added 1240 fIDAllowFirstNumeric 1241 1242 */ 1243 bool IsValidIdentifier(int options = fIDAllowCompound) const; 1244 1245 /** 1246 * Converts a string to a valid ident 1247 * \n A valid ident is any alphanumeric or '_' 1248 * \n\n \b Example: \code _String("$hyphy") \endcode 1249 * @param strict If strict, only alphabetic, no numerals. 1250 * @param options if fIDAllowCompound is set, treat 'x.y.z' as a valid 1251 identifier, if fIDAllowFirstNumeric is set, consider '2x' a valid identifier 1252 * @sa IsValidIdentifier(); 1253 * @return the example would return "_hyphy" 1254 1255 * Revision history 1256 - SLKP 20170616 new implementation based on _IsValidIdentifierAux 1257 changed the argument to bitmask, added fIDAllowFirstNumeric 1258 changed from in-place modification to returning a modified 1259 string this function actually respects fIDAllowCompound now 1260 */ 1261 const _String ConvertToAnIdent(int options = fIDAllowCompound) const; 1262 1263 /** 1264 * If it is enclosed in quotes, then it is a literal argument 1265 * \n \n \b Example: "\"hyphy \"quote\"\"" is a literal argument; 1266 * @param strip_quotes if set to TRUE and the expression is a literal, trim 1267 the quotes 1268 * Revision history 1269 - SLKP 20170616 reviewed while porting from the v3 branch 1270 added support for single quotes in addition to double 1271 quotes 1272 */ 1273 1274 bool IsALiteralArgument(bool strip_quotes = false); 1275 1276 /** 1277 * Examine the string argument contained in this object, decide what it is, 1278 and process accordingly 1279 * \n\n \bExample: \code 'hyphy'.ProcessVariableReferenceCases (object) 1280 \endcode is a direct reference to object hyphy 1281 * \n\n \bExample: \code '\"hy\"+\"phy\"'.ProcessVariableReferenceCases 1282 (object) \endcode is a direct reference to object hyphy 1283 * \n\n \bExample: \code '*hyphy'.ProcessVariableReferenceCases (object) 1284 \endcode is a reference to the object whose name is stored in the string 1285 variable hyphy 1286 * \n\n \bExample: \code '**hyphy'.ProcessVariableReferenceCases (object) 1287 \endcode is a reference to the object whose name is stored in the string 1288 variable hyphy in the global context 1289 * @param referenced_object will store the handled variable ID 1290 * @param context is the namespace of the referenced object; could be nil 1291 * @return one of HY_STRING_INVALID_REFERENCE HY_STRING_DIRECT_REFERENCE 1292 HY_STRING_LOCAL_DEREFERENCE HY_STRING_GLOBAL_DEREFERENCE 1293 * @see IsValidIdentifier() 1294 - SLKP 20170616 reviewed while porting from the v2.3 branch 1295 */ 1296 1297 hy_reference_type 1298 ProcessVariableReferenceCases(_String &referenced_object, 1299 _String const *context = nil) const; 1300 1301 /* 1302 ============================================================== 1303 METHODS 1304 ============================================================== 1305 */ 1306 1307 /** a by-character iterator 1308 1309 1310 @param cb : a void (char c, unsigned long index) callback argument 1311 @param start_at : start the iteration at this position in the string 1312 1313 - SLKP 20171008 introduced this function 1314 1315 */ 1316 1317 template <typename CALLBACK> void Each (CALLBACK cb, unsigned long start_at = 0) const { 1318 for (unsigned long i = start_at; i<s_length; i++) { 1319 cb ( s_data[i], i ); 1320 } 1321 } 1322 1323 /** a by-character matching iterator 1324 1325 1326 @param cb : a void (char c, unsigned long index) callback argument 1327 @param start_at : start the iteration at this position in the string 1328 1329 - SLKP 20171008 introduced this function 1330 1331 */ 1332 1333 template <typename CALLBACK> long Any (CALLBACK cb, unsigned long start_at = 0) const { 1334 for (unsigned long i = start_at; i<s_length; i++) { 1335 if (cb ( s_data[i], i )) return i; 1336 } 1337 return kNotFound; 1338 } 1339 1340 /** 1341 * Compute Adler-32 CRC for a string 1342 * \n\n \b Example: \code _String result = new _String ("Wikipedia"); \endcode 1343 * \n Implementation shamelessly lifted from 1344 http://en.wikipedia.org/wiki/Adler-32 1345 * @return the Adler32 checksum. 300286872 returns in the Example 1346 1347 * Revision history 1348 - SLKP 20170614; reviewed while porting from the v3 branch 1349 */ 1350 long Adler32(void) const; 1351 1352 /** 1353 * Generate a random string on 1354 * @param len (>0) The desired length of the string 1355 * @param alphabet Which alphabet do the random charcters come from; in nil, 1356 then this will be generated from 1-128 ASCII codes 1357 * @return the random string 1358 * Revision history 1359 - SLKP 20170616; reviewed while porting from the v2.3 branch 1360 */ 1361 static _String const Random(const unsigned long len, 1362 const _String *alphabet = nil); 1363 1364 /** 1365 * Computes Lempel-Ziv complexity of the string, i.e. roughly the size of the 1366 substring table 1367 * that would have been computed using the LZW algorithm 1368 * @param rec if provided, will store the indices of substrings mapped to 1369 unique codes 1370 * @return string complexity (less compressible == higher complexity) 1371 * \n Example: 1001111011000010 = 6 because subset the input could be reduced 1372 to ~6 codes 1373 * The contents of 'rec' would be 0,1,3,7,11,15, implying that the encoded 1374 substrings would be [0:0] = 1 [1:1] = 0 [2:3] = 01 [4:7] = 1110 [8:11] = 1100 1375 [12:15] = 0010 1376 * Revision history 1377 - SLKP 20170616; reviewed while porting from the v2.3 branch, not sure 1378 */ 1379 unsigned long LempelZivProductionHistory(_SimpleList *rec = nil) const; 1380 1381 /* 1382 ============================================================== 1383 Regular Expression Methods 1384 ============================================================== 1385 */ 1386 /** 1387 * Compile a regular expression represented by a _String object. 1388 * @param pattern the regular expression to compile 1389 * @param error_code will receive compilation error codes if any 1390 * @param case_sensitive controls whether or not the RE is case sensitive 1391 * @param throw_errors if set, errors will result in thrown excptions (_String const type) 1392 * @return the resulting (opaque) RE datastructure, or NULL if 1393 compilation failed 1394 1395 * @sa FlushRegExp 1396 * @sa GetRegExpError 1397 * Revision history 1398 - SLKP 20170616; reviewed while porting from the v3 branch 1399 maded static member of the class, changed argument 1 to 1400 const & 1401 - SLKP 20180803; added the option for automatic error decoding 1402 */ 1403 static regex_t *PrepRegExp(_String const &pattern, int &error_code, 1404 bool case_sensitive, bool throw_errors = false); 1405 1406 /** 1407 * Free a reg_exp datastructure previously returned by PrepRegExp 1408 * @param re the (opaque) data structure for the regular expression 1409 * Revision history 1410 * @sa PrepRegExp 1411 * @sa GetRegExpError 1412 - SLKP 20170616; reviewed while porting from the v3 branch 1413 maded static member of the class 1414 */ 1415 static void FlushRegExp(regex_t *re); 1416 1417 /** 1418 * Convert internal regexp code into a string message 1419 * @param code error code 1420 * @return the string with the decoded error message 1421 * @sa PrepRegExp 1422 * @sa FlushRegExp 1423 * Revision history 1424 - SLKP 20170616; reviewed while porting from the v3 branch 1425 maded static member of the class 1426 */ 1427 static const _String GetRegExpError(int code); 1428 1429 /** 1430 * Search this string for the first match to regular expression and 1431 subexpressions return a list of hits (possibly empty) as pairs of ranges; for 1432 example "hyphy".RegExpMatch("([^y]+).") -> 0,1,0,0, meaning that the entire 1433 expression matches to [0:1] and the first subexpression matches to [0:0] 1434 * @param re the regular expression previously compiled by PrepRegExp 1435 * @param start start matching the string at this position 1436 * @return the coordinates of matches for the entire expression (first pair), 1437 and all subexpressions (left to right); empty if no match 1438 1439 * Revision history 1440 - SLKP 20170616; reviewed while porting from the v3 branch 1441 return by value vs writing to argument 1442 - SLKP 20170623; added the option to search from a given start 1443 position 1444 1445 * @sa RegExpAllMatches() 1446 */ 1447 1448 _SimpleList const RegExpMatch(regex_t const *re, 1449 unsigned long start = 0) const; 1450 1451 /** 1452 * Search this string for the ALL matches to a regular expression (ignoring 1453 subexpressions) return a list of hits (possibly empty) as pairs of ranges; 1454 for example "hyphy".RegExpMatch("([^y]+).") -> 0,1,2,4, meaning that [0:1] 1455 (hy) and [2:4] (phy) match the pattern 1456 * @param re the regular expression previously compiled by PrepRegExp 1457 * @return the coordinates of all matches for the entire expression left to 1458 right; empty if no match 1459 1460 * Revision history 1461 - SLKP 20170616; reviewed while porting from the v3 branch 1462 return by value vs writing to argument 1463 1464 * @sa RegExpMatch 1465 */ 1466 1467 _SimpleList const RegExpAllMatches(regex_t const *re) const; 1468 1469 /** 1470 Convenience wrappers for RegExpMatch and RegExpAllMatches taking in regex_t 1471 arguments where the regular expression is compiled and disposed of 1472 internally 1473 @param pattern the regular expression to match 1474 @param case_sensitive whether to compile the RE as case sensitive or not 1475 @param handle_errors if set, call application wide error handlers on 1476 errors, otherwise ignore errors and treat them as a missing match 1477 1478 * @sa RegExpMatch 1479 * @sa RegExpAllMatches 1480 * Revision history 1481 - SLKP 20170616; initial implementation 1482 * 1483 */ 1484 _SimpleList const RegExpMatch(_String const &pattern, bool case_sensitive, 1485 bool handle_errors) const; 1486 _SimpleList const RegExpAllMatches(_String const &pattern, 1487 bool case_sensitive, 1488 bool handle_errors) const; 1489 /** given coordinates start and end, converts then to valid string indices 1490 if called on an empty string, returns 0 and does not change start and end 1491 if start < 0 it is reset to 0 1492 if end < 0 or >= string length it is reset to (string length) - 1 1493 1494 @param start: start of the range (0-based) 1495 @param end : end of the range 1496 @return : the length of the range 1497 1498 * Revision history 1499 - SLKP 20170517 porting from v3 branch 1500 */ 1501 long NormalizeRange(long &start, long &end) const; 1502 1503 1504 private: 1505 /** Find the length of the maximum prefix that forms a valid ID 1506 1507 @param allow_compounds : treat '.' as a valid identifier character (e.g. 1508 x.y.z) 1509 @param allow_first_numeric : allow idents that start with a digit (e.g. 2x) 1510 @param wildcard : treat this character as a valid identifier character (e.g. 1511 this1.?.x) 1512 1513 @return the 0-based index of the end of the valid ID prefix (-1 if the prefix 1514 is empty) 1515 1516 * Revision history 1517 - SLKP 20170616 reviewed while porting from the v3 branch 1518 */ 1519 1520 long _IsValidIdentifierAux(bool allow_compounds, bool allow_first_numeric, 1521 char wildcard = '\0') const; 1522 1523 /** Find the first character in a range that meets a particular condition 1524 1525 @param start : start of the range to search (0-based) 1526 @param end : end of the range to search (0-based) 1527 @direction : forwards or backwards search 1528 @comparison_function: a function that takes a single argument (char) and 1529 returns true if it "passes" 1530 1531 * Revision history 1532 - SLKP 20170614 factored out common functions for conditional index finding 1533 */ 1534 1535 template <class CF> _FindFirstIndexCondtion(long start,long end,hy_string_search_direction direction,CF comparison_function)1536 long _FindFirstIndexCondtion(long start, long end, 1537 hy_string_search_direction direction, 1538 CF comparison_function) const { 1539 long requested_range = NormalizeRange(start, end); 1540 1541 if (requested_range > 0L) { 1542 if (direction == kStringDirectionForward) { 1543 for (; start <= end; start++) { 1544 if (comparison_function(s_data[start])) { 1545 return start; 1546 } 1547 } 1548 } else { 1549 for (; end >= start; end--) { 1550 if (comparison_function(s_data[end])) { 1551 return end; 1552 } 1553 } 1554 } 1555 } 1556 1557 return kNotFound; 1558 } 1559 1560 1561 /** this is a utility function which allocates length+1 chars for s_data, 1562 copies the data from source_string, and sets the terminating 0 1563 1564 * Revision history 1565 - SLKP 20170517 factoring repeated functionality 1566 1567 */ 1568 inline void AllocateAndCopyString(const char *source_string, 1569 unsigned long length); 1570 1571 1572 /** Factored out core of RegExpMatch and RegExpAllMatches 1573 * Revision history 1574 - SLKP 20170616; initial implementation 1575 */ 1576 const _SimpleList _IntRegExpMatch(const _String &pattern, bool case_sensitive, 1577 bool handle_errors, bool match_all) const; 1578 }; 1579 1580 // _______________________________________________________________________ 1581 1582 void SetStatusBarValue(long, hyFloat, hyFloat); 1583 void SetStatusLine(_String); 1584 void SetStatusLine(_String, _String, _String, long l); 1585 void SetStatusLine(_String, _String, _String); 1586 void SetStatusLine(_String, _String, _String, long, char); 1587 1588 void SetStatusLineUser(_String const); 1589 1590 void StringToConsole(_String const &, void *extra = nil); 1591 void BufferToConsole(const char *, void *extra = nil); 1592 void NLToConsole(void *extra = nil); 1593 void ObjectToConsole(BaseRef, void *extra = nil); 1594 1595 _String *StringFromConsole(void); 1596 1597 #endif 1598