1 /*
2 
3 HyPhy - Hypothesis Testing Using Phylogenies.
4 
5 Copyright (C) 1997-now
6 Core Developers:
7   Sergei L Kosakovsky Pond (spond@ucsd.edu)
8   Art FY Poon    (apoon42@uwo.ca)
9   Steven Weaver (sweaver@ucsd.edu)
10 
11 Module Developers:
12         Lance Hepler (nlhepler@gmail.com)
13         Martin Smith (martin.audacis@gmail.com)
14 
15 Significant contributions from:
16   Spencer V Muse (muse@stat.ncsu.edu)
17   Simon DW Frost (sdf22@cam.ac.uk)
18 
19 Permission is hereby granted, free of charge, to any person obtaining a
20 copy of this software and associated documentation files (the
21 "Software"), to deal in the Software without restriction, including
22 without limitation the rights to use, copy, modify, merge, publish,
23 distribute, sublicense, and/or sell copies of the Software, and to
24 permit persons to whom the Software is furnished to do so, subject to
25 the following conditions:
26 
27 The above copyright notice and this permission notice shall be included
28 in all copies or substantial portions of the Software.
29 
30 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
31 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
33 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
34 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
35 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
36 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37 
38 */
39 
40 #ifndef _HY_STRINGS_
41 #define _HY_STRINGS_
42 
43 #include "baseobj.h"
44 #include "hy_types.h"
45 #include "regex.h"
46 #include "wchar.h"
47 
48 #define fExtractRespectQuote  0x01
49 #define fExtractRespectEscape 0x02
50 #define fExtractOneLevelOnly  0x04
51 
52 #define fIDAllowFirstNumeric 0x01
53 #define fIDAllowCompound 0x02
54 
55 #define kStringEnd (-1L)
56 
57 enum hy_reference_type {
58   kStringInvalidReference = 0x00,
59   kStringDirectReference = 0x01,
60   kStringLocalDeference = 0x02,
61   kStringGlobalDeference = 0x03
62 };
63 
64 enum hy_string_case { kStringUpperCase, kStringLowerCase };
65 
66 enum hy_string_search_direction {
67   kStringDirectionForward,
68   kStringDirectionBackward
69 };
70 
71 class _SimpleList;
72 class _List;
73 class _ExecutionList;
74 class _StringBuffer;
75 
76 class _String : public BaseObj {
77 
78 protected:
79     char          *s_data;
80     unsigned long s_length;
81 
82     /** this value is returned for "failed"
83      access operations that don't throw errors, e.g. getChar */
84     const static char default_return = '\0';
85 
86 public:
87 
88 
89   /*
90    ==============================================================
91    Constructors/Destructors/Copiers
92    ==============================================================
93    */
94 
95   /**
96    * The default constuctor
97    * which creates an empty string
98 
99    * Revision history
100    - SLKP 20170517 porting from v3 branch
101    */
102   _String(void); // v3;
103 
104   /**
105    * Standard initalization to 0 length and empty data
106    * which creates an empty string
107 
108    * Revision history
109    - SLKP 20170517 porting from v3 branch
110    */
111   virtual void Initialize(bool = true);
112 
113   /**
114    * Clear the string (delete allocated memory)
115    * which creates an empty string
116 
117    * Revision history
118    - SLKP 20170612 iniital implementation
119    */
120   virtual void Clear(void);
121 
122   /**
123    * Construct a string representation of a long interger
124    * @param number: the number to convert to a string
125 
126    * Revision history
127    - SLKP 20170517 reviewed while porting from v3 branch
128    */
129   _String(long const number);
130 
131   /**
132    * Construct a string long enough to hold the specified # of chars
133    * Contents will be initialized to 0
134    * @param lengths: the number of chars to store
135    * @param buffer: if specified, use an externally allocated buffer (to avoid memory calls)
136 
137    * Revision history
138       - SLKP 20170517 reviewed while porting from v3 branch
139       - SLKP 2020921 added the option to use external buffers
140    */
141   _String(const unsigned long sL, char *buffer = nil);
142 
143   /**
144    * Construct a string representation of a hyFloat(double) to string,
145    * using a format string (default is to use PRINTF_FORMAT_STRING formatting)
146    * @param number : The floating number to convert to string
147    * @param format : The C-style format string to use for the conversion
148 
149    * Revision history
150    - SLKP 20170517 reviewed while porting from v3 branch
151    */
152   _String(const hyFloat number, const char *format = nil);
153 
154     /**
155      * Construct a string representation of a hyFloat(double) to string,
156      * using with the required digits of precision ("%.[N]g") specified
157      * @param number : The floating number to convert to string
158      * @param unsigned char : The number of significant digits
159 
160      * Revision history
161      - SLKP 20181009 initial implementation
162      */
163     _String(const hyFloat number, unsigned char digits_of_precision);
164 
165   /**
166    * A RHS copy constructor
167    * @param str : the string to copy from
168 
169    * Revision history
170    - SLKP 20170517 reviewed while porting from v3 branch
171    */
172   _String(const _String &str);
173 
174   /**
175    * A RHS move constructor
176    * @param str : the string to copy from
177 
178    * Revision history
179    - SLKP 20180920 initial implementation
180    */
181    _String(_String && str);
182 
183   /**
184    * A RHS move constructor for string buffer
185    * @param str : the string to copy from
186 
187    * Revision history
188    - SLKP 20180920 initial implementation
189    */
190    _String(_StringBuffer && str);
191 
192   /**
193    * The purpose of this constructor is a "move" contents from a dynamically
194    * allocated string to a new string variable; it does so without allocating
195    * memory (this is a hack for C++ move semantics)
196    * After a call to this dynamic_string will be DELETED, so it CANNOT be used
197    * again
198    * @param dynamic_string: the source string to move data from
199 
200    * Revision history
201    - SLKP 20170517 reviewed while porting from v3 branch
202    */
203   _String(_String *dynamic_string, bool dynamic = true);
204 
205   /**
206    * Copy a part of another string into this string
207    *
208 
209    * @param str   : The source string
210    * @param start : Start of the range to copy
211    * @param end   : End of the range to copy
212    * @sa NormalizeRange for a discussion on ranges
213 
214    * Revision history
215    - SLKP 20170517 reviewed while porting from v3 branch
216    */
217   _String(const _String &str, long start, long end);
218 
219   /**
220    * Create a string with the contents of a C-style (0-terminated)
221    * char array (they are copied)
222 
223    * @param c_string   : The source C char array
224    * Revision history
225    - SLKP 20170517 reviewed while porting from v3 branch
226    */
227   _String(const char *c_string); // v3
228 
229   /**
230    * Create a string with the contents of a C-style (0-terminated)
231    * wide-char array (they are copied); only single byte characters
232    * are copied
233 
234    * @param wc_string   : The source C wchar_t char array
235    * Revision history
236    - SLKP 20170517 reviewed while porting from v3 branch
237    */
238 
239   _String(const wchar_t *wc_string);
240   /**
241    * Create a string with the from a single charcater
242    * @param c   : The source character
243    * Revision history
244    - SLKP 20170517 reviewed while porting from v3 branch
245    */
246   _String(const char c);
247 
248   /**
249    * Create a string with several consecutive copies of the source string
250    * @param str    : the source string
251    * @param copies : the number of copies
252    * Revision history
253    - SLKP 20170517 reviewed while porting from v3 branch
254    */
255   _String(const _String &str, unsigned long copies);
256 
257   /**
258    * Create a string with the contents of an open file
259    * the file will be rewound and is assumed to be open for reading
260 
261    * @param file    : the source file handle
262    * @param read_this_many: if -1, then rewind the file and read all of its
263    contents, otherwise read 'read_this_many' characters from current position
264    * Revision history
265    - SLKP 20170517 reviewed while porting from v3 branch
266    - SLKP 20170623 added the option to read a specified number of chars
267                    from the current position of an open file (to handle fscanf
268                    specifically); also added a check that the # of chars read
269                    was the same as the one requested.
270    */
271   _String(FILE *file, long read_this_many = -1L);
272 
273   /**
274    *  A desctructor which respects reference counts
275    *  Revision history
276    - SLKP 20170517 reviewed while porting from v3 branch
277    */
278   virtual ~_String(void);
279 
280   /**
281    * Create a dynamically allocated (shallow) copy of this object
282    * @return a shallow copy of this object (for strings, shallow == deep copy)
283 
284    *  Revision history
285    - SLKP 20170517 reviewed while porting from v3 branch
286    */
287   virtual BaseRef makeDynamic(void) const;
288 
289   /** Create a shallow copy of the argument (assumed castable to _String*)
290    in this object; this will be cleared out prior to this operation
291 
292 
293    @param source: the string to duplicate
294 
295    *  Revision history
296    - SLKP 20170517 reviewed while porting from v3 branch
297    [CHANGE-NOTE SLKP, this behavior may not be consistently enforced in old
298    code]
299 
300    */
301   virtual void Duplicate(BaseRefConst source);
302 
303   /** Create a shallow copy of the argument
304 
305    @param rhs : the right hand side of the assignment
306 
307    *  Revision history
308    - SLKP 20170517 reviewed while porting from v3 branch
309    [CHANGE-NOTE SLKP, changed parameter type from _String to _String const&]
310 
311    */
312   void operator=(_String const &rhs);
313 
314   void operator=(_String &&rhs);
315 
316   /*
317    ==============================================================
318    Getters and setters
319    ==============================================================
320    */
321 
322   /**
323    * Retrieve a writable element at index x.
324    * Internal error results if [] is called on an invalid index
325 
326    * @param index : the index (0-based) of a character to retrieve
327    * @return      : reference to the character at the specified index
328    *  Revision history
329    - SLKP 20170517 reviewed while porting from v3 branch
330    [CHANGE-NOTE SLKP 20170517, used to ignore errored indices]
331    */
332   virtual char &operator[](long index);
333 
334   /**
335    * Retrieve a read-only element at index x. If the index is invalid,
336    return default_return (\0)
337 
338    * @param index : the index (0-based) of a character to retrieve
339                     if index < 0, return a character this far from the end;
340                     e.g. -1 returns  the last character (for non-empty strings)
341                     -2 : the second to the last character (for strings with 2 or
342    more chars), etc
343    * @return      : the character at the specified index or default_return
344    * @sa get_char
345    *  Revision history
346    - SLKP 20170517 reviewed while porting from v3 branch
347    [CHANGE-NOTE SLKP 20170517, used to have unsigned long argument]
348    - SLKP 20170623 handling negative indices; SEMANTICS CHANGE
349 
350    */
351   char operator()(long index) const;
352 
353   /**
354    * Retrieve a read-only element at index x.
355    * same as s(i), but with this function you don't have to write (*s)(i) for
356    pointers
357 
358    * @param index : the index (0-based) of a character to retrieve
359    * @return      : the character at the specified index or default_return
360    * @sa operator ()
361    *  Revision history
362    - SLKP 20170517 reviewed while porting from v3 branch
363    */
get_char(long index)364   virtual char get_char(long index) const {
365     if (index >= 0L && index < s_length) {
366       return s_data[index];
367     }
368     return _String::default_return;
369   }
370 
371   /**
372    * Retrieve a read-only element at index x.
373    * WITHOUT ANY RANGE CHECKING
374    * @param index : the index (0-based) of a character to retrieve
375    * @return      : the character at the specified index or default_return
376    * @sa operator ()
377    * @sa get_char
378    *  Revision history
379    - SLKP 20170616 initial implementation
380    */
char_at(unsigned long idx)381   inline char char_at(unsigned long idx) const { return s_data[idx]; }
382 
383   /** The sole purpose of this function is to allow warning-free compilation of
384    calls like array [string.getUChar (i)], otherwise you'd get warnings about
385    atypical indexing types
386 
387    *  Revision history
388    - SLKP 20170517 reviewed while porting from v3 branch
389    */
get_uchar(long i)390   inline unsigned char get_uchar(long i) const {
391     return (unsigned char)s_data[i];
392   }
393 
394   /** Get the length of this string
395    @return the length of the string
396    *  Revision history
397       - SLKP 20170517 reviewed while porting from v3 branch
398    */
399 
length(void)400   inline unsigned long length(void) const { return s_length; }
401 
402   /** Check if the string is emtpy
403    *  Revision history
404       - SLKP 20170615 initial implementation
405    */
406 
empty(void)407   inline bool empty(void) const { return s_length == 0UL || s_data == nil; }
408 
409   /** Check if the string is non-emtpy
410    *  Revision history
411    - SLKP 20170621 initial implementation
412    */
413 
nonempty(void)414   inline bool nonempty(void) const { return !empty(); }
415 
416   /** Store the supplied character in a given index; functionally almost the
417    same as str[index] = date, but neater to write than (*str)[index] = data, and
418    this also ignores invalid indices
419 
420    *  Revision history
421    - SLKP 20170517 reviewed while porting from v3 branch
422    [CHANGE-NOTE SLKP 20170517, used to have 'long' argument]
423    */
424   void set_char(unsigned long index, char const data);
425 
426   /** Retrieve the read-only char * for the string contents
427    A convenience function to avoid writing (const char*) (*this)
428 
429    @return string data (could be null!, no checks performed)
430    @sa operator char *
431 
432    *  Revision history
433    - SLKP 20170608 reviewed while porting from v3 branch
434    */
435   const char *get_str(void) const;
436 
437   /*
438    ==============================================================
439    Type conversions
440    ==============================================================
441    */
442 
443   /** Retrieve the read-only char * for the string contents
444 
445    @return string data (could be null!, no checks performed)
446    @sa get_str
447 
448    *  Revision history
449    - SLKP 20170608 reviewed while porting from v3 branch
450    */
451   operator const char *(void)const;
452 
453   /**
454    * Converts a string of form "[\d\.]\+" into a floating point number
455    * via a call to strtod
456    * \n\n \b Example: "3.14" becomes 3.14
457 
458    *  Revision history
459    - SLKP 20170608 reviewed while porting from v3 branch
460    */
461 
462   hyFloat to_float(void) const;
463 
464   /**
465    * Converts a string into an integer number
466    * via a call to strtol
467    * \n\n \b Example: "3.14" becomes 3
468 
469    *  Revision history
470    - SLKP 20170608 reviewed; was not in v3 branch
471    */
472 
473   long to_long(void) const;
474 
475   /**
476    * Obtain a string representation of this string
477    * Add a reference counter and return 'this'
478      @return this string with an extra reference counter
479    *  Revision history
480    - SLKP 20170608 reviewed while porting from v3 branch
481   */
482   virtual BaseRef toStr(unsigned long = 0UL);
483 
484   /**
485    * Turns seconds into a time string in the form "hh:mm:ss"
486    * \n\n \b Example:
487    * \code
488    * long time_diff = 459132;
489    * _String("").FormatTimeString(time_diff);
490    * \endcode
491    * @param time_diff Seconds of time
492    * @return duration string to "127:32:12" in the example.
493    *  Revision history
494    - SLKP 20170616; reviewed while porting from the v3 branch
495    */
496 
497   static const _String FormatTimeString(long const);
498 
499   /*
500    ==============================================================
501    Comparisons
502    ==============================================================
503   */
504 
505   /** Perform a lexicographic comparison of two strings
506    @param rhs right hand side of the comparison
507    @returns less, equal, greater
508    *  Revision history
509    - SLKP 20170517 reviewed while porting from v3 branch
510    [CHANGE-NOTE SLKP 20170517,
511    return type from char to hyComparisonType
512    argument from _String const* to _String const & ]
513 
514    */
515   hyComparisonType Compare(_String const &rhs) const;
516 
517   /** Perform a lexicographic comparison of two strings ignoring case.
518    Same as casting both strings to lower case and running Compare
519 
520    @param rhs right hand side of the comparison
521 
522    @returns less, equal, greater
523    *  Revision history
524    - SLKP 20170517 initial implementation
525 
526    */
527   hyComparisonType CompareIgnoringCase(_String const &rhs) const;
528 
529   /** Obvious lexicographic comparisons, mostly making calls to Compare
530    *  Revision history
531    - SLKP 20170517 reviewed while porting from v3 branch
532    */
533   bool operator==(const _String &) const;
534   bool operator>(const _String &) const;
535   bool operator<(const _String &) const;
536   bool operator>=(const _String &) const;
537   bool operator<=(const _String &) const;
538   bool operator!=(const _String &) const;
539   bool Equal(const _String &) const;
540   bool EqualIgnoringCase(const _String &) const;
541   bool Equal(const char) const;
542 
543   /** match this string to a shell style pattern where the wildchar specifies
544    "match zero or more of anything"
545 
546    @param pattern : the pattern to match
547    @param wildchar : the charcter to treat as a wild char
548    @param start_this : start matching at this position in "this"
549    @param start_pattern : start matching at this position in *pattern*
550    @param wildchar_matches: if given, push indices of ranges that matched wildcards
551 
552    @return did the string match the pattern
553 
554    *  Revision history
555    - SLKP 20170517 reviewed while porting from v3 branch
556    [CHANGE-NOTE SLKP 20170517 change pattern type to _String const& from _String
557    const *]
558    - SLKP 20181024 add the optional _SimpleList argument to store the index ranges
559                    which matched the wildcards
560 
561    */
562   bool EqualWithWildChar(_String const &pattern, char const wildchar = '*',
563                          unsigned long start_this = 0UL,
564                          unsigned long start_pattern = 0UL,
565                          _SimpleList * wildchar_matches = nil) const;
566 
567   /*
568    ==============================================================
569    Content-modification and extraction methods
570    ==============================================================
571    */
572 
573   /**
574   * String concatenation operator, returns "thisrhs"
575   * \n\n \b Example: \code _String new_string = _String("A") & _String("B")
576   \endcode
577   * @param  rhs : the suffix to concatenate to this
578   * @return "AB"
579 * @sa EscapeAndAppend()
580 
581    *  Revision history
582       - SLKP 20170519 reviewed while porting from v3 branch
583   */
584   _String operator&(const _String &rhs) const;
585 
586   /**
587    * Removes part of string that is between the two specified indices
588    * \n\n \b Example: \code _String new_string = _String("AAABBBCCC").Chop(3,5)
589    \endcode
590    * @param start The starting index to chop from
591    * @param end The ending index to chop from
592    * @return "AAACCC"
593    * @sa Cut()
594    * @sa Trim()
595    *  Revision history
596       - SLKP 20170519 reviewed while porting from v3 branch
597    */
598    _String Chop(long start, long end) const;
599 
600   /**
601    * Cuts part of string that is between the two specified indices (0-bases,
602    inclusive)
603    * \n\n \b Example: \code _String new_string = _String("AAABBBCCC").Cut(3,5)
604    \endcode
605    * @param start The starting index to cut from
606    * @param end The ending index to cut from
607    * @return "BBB"
608    * @sa Chop()
609    * @sa Trim()
610    *  Revision history
611       - SLKP 20170519 reviewed while porting from v3 branch
612    */
613    _String Cut(long, long) const;
614 
615   /**
616    * Delete a range of chars from the string (0-based, inclusive indices)
617    * \n\n \b Example: \code _String("AAABBBCCC").Delete(3,5) \endcode
618    * @param start The starting index to delete from
619    * @param end   The ending index to delete to
620    * @return Transforms String to "AAACCC"
621    * @sa Chop()
622    *  Revision history
623       - SLKP 20170519 reviewed while porting from v3 branch
624    */
625   void Delete(long, long);
626 
627   /**
628    *
629    * In-place reversed string
630    * \n s[0]...s[sLength-1] => s[sLength-1]...s[0]
631    * \n\n \b Example: \code _String("ABC").Flip() \endcode
632    * @return nothing
633    * @sa Reverse
634    *  Revision history
635       - SLKP 20170519 reviewed while porting from v3 branch
636    */
637   void Flip(void);
638 
639   /**
640    *
641    * Return a reversed string, leaving the original unchanged
642    * \n s[0]...s[sLength-1] => s[sLength-1]...s[0]
643    * \n\n \b Example: \code _String("ABC").Reverse() \endcode
644    * @return "CBA"
645    *  Revision history
646     - SLKP 20170519 reviewed ; (was missing in v3)
647    */
648    _String Reverse(void) const;
649 
650   /**
651    * Insert a char at a given position
652    * \n\n \b Example: \code _String("AA").insert('C',0) \endcode
653    * @param c Character to insert
654    * @param where The position (0-based) to insert the character into,
655    values less than 0 append to the string
656    * @return "CAA"
657    *  Revision history
658      - SLKP 20170519 reviewed while porting from v3 branch
659   */
660 
661   void Insert(char, long);
662 
663   /**
664    * Trim the string in place to retain characters beween the two indices
665    (0-bases, inclusive)
666    * \n\n \b Example: \code _String("AAABBBCCC").Trim(3,5) \endcode
667    * @param start The starting index to cut from
668    * @param end  The ending index to cut from
669    * @return Transforms string to "BBB"
670    * @sa Cut()
671    * @sa Chop()
672    *  Revision history
673       - SLKP 20170519 reviewed while porting from v3 branch
674       [CHANGE-NOTE SLKP 20170519 remove the bool argument for memory handling]
675    */
676 
677   virtual void Trim(long, long);
678 
679   /**
680    * Converts string to a particular case
681    @param conversion_type: which case ? kStringUpperCase or kStringLowerCase
682 
683    *  Revision history
684       -SLKP 20170614 reviewed while porting from v3 branch
685 
686       [CHANGE-NOTE SLKP 20170614 consolidated LoCase and UpCase;
687        changed behavior from in-place to returning a modified string
688       ]
689    */
690   const _String ChangeCase(hy_string_case conversion_type) const;
691   void   ChangeCaseInPlace(hy_string_case conversion_type);
692 
693   /**
694    * Returns a list from a string split by a substr
695    * \n\n \b Example: _String("hyphy, gattaca, protease").Tokenize(",") will
696    create a list {"hyphy","gattaca","protease"}
697    * @param splitter The substring to split the string by
698    * @return A point to a *_List that holds a list of the resultant strings.
699    Retrieve one by list->lData[i]
700    *  Revision history
701     -SLKP 20170615 reviewed while porting from v3 branch; previous
702    impelementation would not handle empty string splitter;
703      ]
704    */
705   const _List Tokenize(_String const &splitter) const;
706 
707     /**
708      * Returns a list from a string split by a any of the valid chars
709      * @param splitter a look table of characters
710      * @return A point to a *_List that holds a list of the resultant strings. Retrieve one by list->lData[i]
711      *  Revision history
712      -SLKP 20170912 initial impementation
713 
714      */
715     const _List  Tokenize (const bool splitter[256]) const;
716     /**
717    * Decorates the string with quotes
718 
719    * @param quote_char which character to use as a "quote"
720    * @return quote_char + *this + quote_char
721    *  Revision history
722       -SLKP 20170616 reviewed while porting from v2.3 branch
723       -
724    */
725 
726 
727   const _String Enquote(char quote_char = '\'') const;
728 
729   /**
730    * Decorates the string with open/close chars
731 
732    * @param quote_char which character to use as a "quote"
733    * @return open_char + *this + close_char
734    *  Revision history
735    -SLKP 20170626 initial implementation
736    -
737    */
738   const _String Enquote(char open_char, char close_char) const;
739 
740   /**
741    * Returns a copy of the string with all spaces removed
742    * \n\n \b Example: \code _String("   h  y p    h  y").KillSpaces \endcode
743    * @param result The string that will have stripped spaces.
744    * @sa CompressSpaces()
745    * @return The example would return "hyphy"
746    *  Revision history
747       -SLKP 20170616 reviewed while porting from v3 branch; changed from in
748    place to return by value
749    */
750   const _String KillSpaces(void) const;
751 
752   /**
753    * Replaces all runs of white spaces with a single ' ' character
754    * \n\n \b Example: \code _String("   h  y p    h  y").CompressSpaces()
755    \endcode
756    * @return Example would return the string to " h y p h y"
757    * @sa KillSpaces()
758    *  Revision history
759       -SLKP 20170616 reviewed while porting from v3 branch; changed from in
760    place to return by value
761    */
762   const _String CompressSpaces(void) const;
763 
764   /*
765   ==============================================================
766   Search functions
767   ==============================================================
768   */
769 
770   /**
771    * Find first occurence of the string between "start" and "end" (inclusive)
772    * \n\n \b Example: \code _String ("AABBCC").Find("B")\endcode
773    * @param pattern The substring to find
774    * @param start The 0-based index to start searching from
775    * @param end   The 0-based index to search to (inclusive); -1 : end of string
776    * @return Returns the index of the first instance of the pattern, kNotFound
777    (<0) if not found. 2 in the example
778      @sa FindBackwards
779    *  Revision history
780    - SLKP 20170608 reviewed while porting from v3 branch
781    */
782   long Find(const _String &pattern, long start = 0L,
783             long end = kStringEnd) const;
784 
785   /**
786    * Find first occurence of the string between "start" and "end" (inclusive)
787    * looking backwards (i.e. last occurrence reported)
788    * \n\n \b Example: \code _String ("AABBCC").Find("B")\endcode
789    * @param pattern The substring to find
790    * @param start The 0-based index to start searching from
791    * @param end   The 0-based index to search to (inclusive); -1 : end of string
792    * @return Returns the index of the first instance of the pattern, kNotFound
793    (<0) if not found. 3 in the example
794    @sa Find
795    *  Revision history
796    - SLKP 20170608 reviewed while porting from v3 branch
797    */
798 
799   long FindBackwards(const _String &pattern, long start = 0L,
800                      long end = kStringEnd) const;
801   /**
802   * Find first occurence of the character between "start" and "end" (inclusive)
803   * Uses a sentinel linear search
804   * \n\n \b Example: \code _String ("AABBCC").Find('B')\endcode
805   * @param p The character to find
806   * @param start The 0-based index to start searching from
807   * @param end   The 0-based index to search to (inclusive); -1 : end of string
808   * @return Returns the index of the first instance of the pattern, kNotFound
809   (<0) if not found. 2 in the example
810 
811   *  Revision history
812   - SLKP 20170608 reviewed while porting from v3 branch
813   */
814   long Find(const char p, long start = 0L, long to = kStringEnd) const;
815 
816 /**
817  * Find first occurence of the any of the characters marked in the lookup buffer (0/1) between "start" and "end" (inclusive)
818  * Uses a sentinel linear search
819  * \n\n \b Example: \code _String ("AABBCC").Find('B')\endcode
820  * @param lookup The lookup table whioch marks which characters are value
821  * @param start The 0-based index to start searching from
822  * @param end   The 0-based index to search to (inclusive); -1 : end of string
823  * @return Returns the index of the first instance of the pattern, kNotFound (<0) if not found. 2 in the example
824 
825  *  Revision history
826  - SLKP 20170912 introduced
827  */
828 
829    long    Find (const bool lookup[256] , long start = 0L, long to = kStringEnd) const ;
830    long    FindAnyCase (const bool lookup[256] , long start = 0L, long to = kStringEnd) const ;
831 /**
832   * Find first occurence of the string between "start" and "end" (inclusive)
833   * @see Find() for parameter explanation
834   *  Revision history
835    - SLKP 20170612; reviewed and modifed to be the same as Find with case
836   normalization while porting from the v3 branch
837   */
838 
839 
840 
841   long FindAnyCase(_String const &pattern, long start = 0L,
842                    long to = kStringEnd) const;
843 
844   /**
845    * Replace string `pattern` with string `replace`, all occurences true/false
846    * \n\n \b Example: \code _String("AAABBBCCCBBB").Replace("BBB","ZZ",true)
847    \endcode
848    * @param pattern The substring to replace
849    * @param replace The substring to replace the value with
850    * @param flag If true, replace all.
851    * @return "AAAZZCCCZZ"
852 
853    *  Revision history
854      - SLKP 20170614; reviewed while porting from the v3 branch
855    */
856 
857   const _String Replace(const _String &pattern, const _String& replace,
858                         bool replace_all) const;
859 
860   /**
861    * Locate the first non-space character of the string
862    * \n\n \b Example: \code _String ("    hyphy").FirstNonSpaceIndex()\endcode
863    * @param start Beginning of string search
864    * @param end End of string search
865    * @param direction Choose between kStringDirectionForward and
866    kStringDirectionBackwards
867    * @return The char of the first non-space, in the example, 'h'.
868    * @see FirstNonSpaceIndex()
869 
870    *  Revision history
871    - SLKP 20170614; reviewed while porting from the v3 branch
872      [CHANGE-NOTE SLKP 20170614 changed to a call to _FindFirstIndexCondtion]
873 
874    */
875 
876   char FirstNonSpace(
877       long start = 0, long end = kStringEnd,
878       hy_string_search_direction direction = kStringDirectionForward) const;
879 
880   /**
881    * Locate the first non-space character of the string
882    * \n\n \b Example: \code _String ("    hyphy").FirstNonSpaceIndex()\endcode
883    * @param start Beginning of string search
884    * @param end End of string search
885    * @param direction Choose between kStringDirectionForward and
886    kStringDirectionBackwards
887    * @return The index of the first non-space, in the example, 4.
888    * @see FirstNonSpaceIndex()
889 
890    *  Revision history
891    - SLKP 20170614; reviewed while porting from the v3 branch
892 
893    */
894   long FirstNonSpaceIndex(
895       long start = 0, long end = kStringEnd,
896       hy_string_search_direction direction = kStringDirectionForward) const;
897 
898   /**
899    * Locate the first space character of the string
900    * \n Returns index of first space character
901    * \n\n \b Example: \code _String ("h yphy").FirstSpaceIndex()\endcode
902    * @param start starting index
903    * @param end ending index to search
904    * @param direction Choose between kStringDirectionForward and
905    kStringDirectionBackwards
906    * @return Returns the index of the first non-space. 1 in the example.
907    * @sa FirstSpaceIndex()
908 
909    *  Revision history
910     - SLKP 20170614; reviewed while porting from the v3 branch
911       [CHANGE-NOTE SLKP 20170614 changed to a call to _FindFirstIndexCondtion]
912    */
913   long FirstSpaceIndex(
914       long start = 0, long end = kStringEnd,
915       hy_string_search_direction direction = kStringDirectionForward) const;
916 
917   /**
918    * Locate the first non-space character of the string following one or more
919    spaces
920    * \n Returns index of first space character
921    * \n\n \b Example: \code _String ("h yphy").FirstSpaceIndex()\endcode
922    * @param start starting index
923    * @param end ending index to search
924    * @param direction Choose between kStringDirectionForward and
925    kStringDirectionBackwards
926    * @return Returns the index of the first non-space. 1 in the example.
927    * @sa FirstSpaceIndex()
928    *  Revision history
929    - SLKP 20170614; reviewed while porting from the v3 branch
930    [CHANGE-NOTE SLKP 20170614 seems that the search in reverse direction was not
931    implemented correctly]
932   */
933 
934   long FirstNonSpaceFollowingSpace(
935       long start = 0, long end = kStringEnd,
936       hy_string_search_direction direction = kStringDirectionForward) const;
937 
938   /**
939    * Checks to see if String begins with substring
940    * \n\n \b Example: \code _String("hyphy").BeginsWith("h")\endcode
941    * @param pattern Substring
942    * @param case_sensitive If true, it will be case sensitive. Default is case
943    sensitive.
944    * @param from: start matching *this at this position
945    * @return true if string begins with substring. Example returns true
946    * @sa EndsWith()
947    *  Revision history
948    - SLKP 20170615; reviewed while porting from the v3 branch, renamed to camel
949    case (not cheap) added the third argument to check for match from a given
950    position in this
951   */
952 
953   bool BeginsWith (_String const& pattern, bool case_sensitive = true, unsigned long from = 0UL) const;
954   bool BeginsWith (bool const lookup[256], bool case_sensitive = true, unsigned long from = 0UL) const;
955 
956   /**
957    * Checks to see if String ends with substring
958    * \n\n \b Example: \code _String("hyphy").EndsWith("hy")\endcode
959    * @param pattern Substring
960    * @param case_sensitive If true, it will be case sensitive. Default is case
961    sensitive.
962    * @return true if string ends with substring. Example returns true
963    * @sa BeginsWith()
964    *  Revision history
965       - SLKP 20170616; reviewed while porting from the v3 branch, renamed to
966    camel case (not cheap)
967    */
968   bool EndsWith(_String const &pattern, bool case_sensitive = true) const;
969 
970   /**
971    * Checks to see if String starts with substring and it can't be extended to
972    make a valid ident
973    * by checking the next character only
974    * \n\n \b Example: \code
975    _String("return;").StarsWithAndIsNotAnIdent("return");
976    _String("return_me").StarsWithAndIsNotAnIdent("return")\endcode
977    * @param pattern the prefix pattern
978    * @return true if string starts with substring and can't be extended to a
979    identifier. Example 1 would return true, and example 2 would return false
980    *  Revision history
981       - SLKP 20170616; reviewed while porting from the v2.3 branch, renamed to
982    camel case (not cheap)
983    * @sa BeginsWith()
984    */
985   bool BeginsWithAndIsNotAnIdent(_String const &) const;
986   /*
987    ==============================================================
988    Parser-related functions
989    TODO: possible deprecate when the move to the grammar is effected
990    ==============================================================
991    */
992 
993   /**
994    * Starting at index [argument 1],
995    * find a span that encloses an expression (nested) delimited by char[argument
996    2]
997    * and char[argument 3] (e.g. {}, ()) respecting quotes (argument 4), and
998    allowing
999    * escaped characters (argument 5)
1000    * \n SLKP 20090803
1001    *
1002    * @param &from The starting position of the segment will be stored here
1003    * @param open The first character to look for. For example, and open bracket
1004    '[' or open paranthesis '('
1005      Can also be any object that supports char == object checks
1006    * @param close The first character to look for. For example, and open bracket
1007    ']' or open paranthesis ')'
1008      Can also be any object that supports char == object checks
1009    * @param options: a bitmask of options, if fExtractRespectQuote is mixed in
1010    then do not look within enquoted parts of the string if set if
1011    fExtractRespectEscape is mixed in do not consider \char as matches to char
1012    when searching
1013    *
1014    * @return Ending position is returned
1015    *   kNotFound is returned if the starting character could not be found or the
1016    expression did not terminate before the end of the string
1017    *
1018    *  Revision history
1019      - SLKP 20170614; reviewed while porting from the v2.3 branch; convered the
1020    two bool flags to a bit-mask so that the calls can be more explict
1021      - SLKP 20170615; included support for singly quoted literals
1022      - SLKP 20171211: added support for generic callbacks to check whether or not the final character has been found
1023   */
1024 
1025     //=============================================================
1026 
1027 
ExtractEnclosedExpression(long & from,DELIM open,DELIM close,int options)1028   template <class DELIM> long ExtractEnclosedExpression (long& from, DELIM open, DELIM close, int options) const {
1029     long   current_position = from,
1030     current_level    = 0L;
1031 
1032     bool       respect_quote = options & fExtractRespectQuote,
1033                respect_escape = options & fExtractRespectEscape,
1034                one_level_only = options & fExtractOneLevelOnly,
1035                do_escape = false;
1036 
1037     char       quote_state = '\0',
1038                this_char = get_char (current_position);
1039 
1040     while (this_char) {
1041       bool       check_quote = false;
1042 
1043       if (do_escape) {
1044         do_escape = false;
1045       } else {
1046         // also need to handle cases when quotes are in the open / close set
1047 
1048         if ((this_char == '"' || this_char == '\'') && respect_quote && !do_escape) {
1049           if (quote_state == '\0') {
1050             check_quote = true;
1051             quote_state = this_char;
1052           } else {
1053             if (this_char == quote_state) {
1054               check_quote = true;
1055               quote_state = '\0';
1056             }
1057           }
1058         }
1059         if (open == this_char && (check_quote || quote_state == '\0')) {
1060             // handle the case when close and open are the same
1061           if (current_level == 1L && close == this_char && from < current_position) {
1062             return current_position;
1063           }
1064           if (current_level == 0L) {
1065             from = current_position;
1066             current_level++;
1067           } else {
1068             if (!one_level_only) {
1069               current_level++;
1070             }
1071           }
1072 
1073         } else if (close == this_char && (check_quote || quote_state == '\0')) {
1074           current_level--;
1075           if (current_level == 0L && from < current_position) {
1076             return current_position;
1077           }
1078           if (current_level < 0L) {
1079             return kNotFound;
1080           }
1081         } else if (this_char == '\\' && respect_escape && quote_state != '\0' && !do_escape) {
1082           do_escape = true;
1083         }
1084       }
1085 
1086       this_char = get_char (++current_position);
1087 
1088     }
1089 
1090     // check if \0 is a valid terminator
1091 
1092    if (close == this_char) {
1093        if (current_level == 1L && from < current_position) {
1094            return current_position;
1095        }
1096    }
1097 
1098     return kNotFound;
1099   }
1100 
1101   /**
1102    * Starting at a 0-based index [argument 1],
1103    * find a span that terminates in one of the characters in [argument 2], while
1104    * respecting (), [], {}, "" and escapes
1105    * \n SLKP 20090805
1106    * @param start the index to start the search from
1107    * @param terminator The terminator to find
1108    * @return kNotFound is returned if the starting character could not be found
1109    or the expression did not terminate before the end of the string
1110    * @sa IsALiteralArgument()
1111    *  Revision history
1112       - SLKP 20170615   reviewed while porting from the v2.3 branch;
1113                         for the string; included support for singly quoted
1114                         literals; cleaned up the logic, and fixed broken logic for terminator > 1
1115                         char long
1116 
1117       - SLKP 20180921   converted into a template to make it possible to search
1118                         for multiple terminators
1119    */
1120 
FindTerminator(long start,TERMINATOR const & terminator)1121   template <typename TERMINATOR> long FindTerminator(long start, TERMINATOR const &terminator) const{
1122 
1123     long    current_position  = start;
1124 
1125 
1126     long   curly_depth = 0L,
1127     square_depth = 0L,
1128     paren_depth = 0L;
1129 
1130     bool   do_escape = false;
1131     char   quote_state = '\0';
1132 
1133     while (current_position < s_length) {
1134       char this_char = s_data[current_position];
1135       if (do_escape) {
1136         do_escape = false;
1137       } else {
1138         if ((this_char == '"' || this_char == '\'') && !do_escape) {
1139           if (quote_state == '\0') {
1140             quote_state = this_char;
1141           } else {
1142             if (this_char == quote_state) {
1143               quote_state = '\0';
1144             }
1145           }
1146         } else {
1147           if (quote_state == '\0') {
1148 
1149             switch (this_char) {
1150               case '(':
1151                 paren_depth ++;
1152                 current_position++;
1153                 continue;
1154               case ')':
1155                 if (paren_depth > 0L) {
1156                   paren_depth --;
1157                   current_position++;
1158                   continue;
1159                 }
1160                 break;
1161               case '[':
1162                 square_depth++;
1163                 current_position++;
1164                 continue;
1165               case ']':
1166                 if (square_depth > 0L) {
1167                   square_depth --;
1168                   current_position++;
1169                   continue;
1170                 }
1171                 break;
1172               case '{':
1173                 curly_depth++;
1174                 current_position++;
1175                 continue;
1176               case '}':
1177                 if (curly_depth > 0L) {
1178                   curly_depth --;
1179                   current_position++;
1180                   continue;
1181                 }
1182                 break;
1183             }
1184 
1185             if (curly_depth == 0L && square_depth == 0L && paren_depth == 0L) {
1186               if (BeginsWith (terminator, true, current_position)) {
1187                 return current_position;
1188               }
1189             }
1190           } else {
1191             if (this_char == '\\' && quote_state != '\0' && !do_escape) {
1192               do_escape = true;
1193             }
1194           }
1195         }
1196       }
1197       current_position++;
1198     }
1199 
1200     return kNotFound;
1201   }
1202 
1203   /**
1204    * Strips quotes from around the string if present (in place)
1205    * \n\n \b Example: \code _String("\"hyphy\"").StripQuotes("")\endcode
1206    * @param open_char : the opening quote char
1207    * @param close_char : the closing quote char
1208    * @return : true if the string was enquoted and the quotes had been stripped
1209 
1210    *  Revision history
1211       - SLKP 20170616   reviewed while porting from the v3 branch
1212       - SLKP 20170702   return TRUE if successfully stripped quotes
1213 
1214    */
1215   bool StripQuotes(char open_char = '"', char close_char = '"');
1216 
1217     /**
1218      * Strips quotes from around the string if present (in place) for multiple delimiters at once
1219      * \n\n \b Example: \code _String("\"hyphy\"").StripQuotes("\"'","\"'")\endcode
1220      * @param open_char : the opening quote chars (paired with close_char)
1221      * @param close_char : the closing quote char (paired with open char)
1222      * @return : true if the string was enquoted and the quotes had been stripped
1223 
1224      *  Revision history
1225         - SLKP 20200508  initial
1226 
1227      */
1228   bool StripQuotes(char const *, char const *);
1229 
1230   /**
1231    * Checks if String is valid ident
1232    * \n A valid ident is any alphanumeric or '_'
1233    * \n\n \b Example: '$hyphy' is not legal.  'hy_phy' is legal.
1234    * @param options if fIDAllowCompound is set, treat 'x.y.z' as a valid
1235    identifier, if fIDAllowFirstNumeric is set, consider '2x' a valid identifier
1236    * @sa ConvertToAnIdent();
1237    *  Revision history
1238       - SLKP 20170616   reviewed while porting from the v3 branch
1239                         changed the argument to bitmask, added
1240    fIDAllowFirstNumeric
1241 
1242    */
1243   bool IsValidIdentifier(int options = fIDAllowCompound) const;
1244 
1245   /**
1246    * Converts a string to a valid ident
1247    * \n A valid ident is any alphanumeric or '_'
1248    * \n\n \b Example: \code _String("$hyphy") \endcode
1249    * @param strict If strict, only alphabetic, no numerals.
1250    * @param options if fIDAllowCompound is set, treat 'x.y.z' as a valid
1251    identifier, if fIDAllowFirstNumeric is set, consider '2x' a valid identifier
1252    * @sa IsValidIdentifier();
1253    * @return the example would return "_hyphy"
1254 
1255    *  Revision history
1256    - SLKP 20170616   new implementation based on _IsValidIdentifierAux
1257                      changed the argument to bitmask, added fIDAllowFirstNumeric
1258                      changed from in-place modification to returning a modified
1259    string this function actually respects fIDAllowCompound now
1260    */
1261   const _String ConvertToAnIdent(int options = fIDAllowCompound) const;
1262 
1263   /**
1264    * If it is enclosed in quotes, then it is a literal argument
1265    * \n \n \b Example: "\"hyphy \"quote\"\"" is a literal argument;
1266    * @param strip_quotes if set to TRUE and the expression is a literal, trim
1267    the quotes
1268    *  Revision history
1269    - SLKP 20170616   reviewed while porting from the v3 branch
1270                      added support for single quotes in addition to double
1271    quotes
1272    */
1273 
1274   bool IsALiteralArgument(bool strip_quotes = false);
1275 
1276   /**
1277    * Examine the string argument contained in this object, decide what it is,
1278    and process accordingly
1279    * \n\n \bExample: \code 'hyphy'.ProcessVariableReferenceCases (object)
1280    \endcode is a direct reference to object hyphy
1281    * \n\n \bExample: \code '\"hy\"+\"phy\"'.ProcessVariableReferenceCases
1282    (object) \endcode is a direct reference to object hyphy
1283    * \n\n \bExample: \code '*hyphy'.ProcessVariableReferenceCases (object)
1284    \endcode is a reference to the object whose name is stored in the string
1285    variable hyphy
1286    * \n\n \bExample: \code '**hyphy'.ProcessVariableReferenceCases (object)
1287    \endcode is a reference to the object whose name is stored in the string
1288    variable hyphy in the global context
1289    * @param referenced_object will store the handled variable ID
1290    * @param context is the namespace of the referenced object; could be nil
1291    * @return one of HY_STRING_INVALID_REFERENCE    HY_STRING_DIRECT_REFERENCE
1292    HY_STRING_LOCAL_DEREFERENCE    HY_STRING_GLOBAL_DEREFERENCE
1293    * @see IsValidIdentifier()
1294    - SLKP 20170616   reviewed while porting from the v2.3 branch
1295    */
1296 
1297   hy_reference_type
1298   ProcessVariableReferenceCases(_String &referenced_object,
1299                                 _String const *context = nil) const;
1300 
1301   /*
1302   ==============================================================
1303   METHODS
1304   ==============================================================
1305   */
1306 
1307   /** a by-character iterator
1308 
1309 
1310    @param  cb : a void (char c, unsigned long index) callback argument
1311    @param  start_at : start the iteration at this position in the string
1312 
1313        - SLKP 20171008   introduced this function
1314 
1315    */
1316 
1317     template <typename CALLBACK> void Each (CALLBACK cb, unsigned long start_at = 0) const {
1318         for (unsigned long i = start_at; i<s_length; i++) {
1319             cb ( s_data[i], i );
1320         }
1321     }
1322 
1323 /** a by-character matching iterator
1324 
1325 
1326      @param  cb : a void (char c, unsigned long index) callback argument
1327      @param  start_at : start the iteration at this position in the string
1328 
1329          - SLKP 20171008   introduced this function
1330 
1331      */
1332 
1333       template <typename CALLBACK> long Any (CALLBACK cb, unsigned long start_at = 0) const {
1334           for (unsigned long i = start_at; i<s_length; i++) {
1335               if (cb ( s_data[i], i )) return i;
1336           }
1337           return kNotFound;
1338       }
1339 
1340   /**
1341   * Compute Adler-32 CRC for a string
1342   * \n\n \b Example: \code _String result = new _String ("Wikipedia"); \endcode
1343   * \n Implementation shamelessly lifted from
1344   http://en.wikipedia.org/wiki/Adler-32
1345   * @return the Adler32 checksum. 300286872 returns in the Example
1346 
1347    *  Revision history
1348    - SLKP 20170614; reviewed while porting from the v3 branch
1349   */
1350   long Adler32(void) const;
1351 
1352   /**
1353    * Generate a random string on
1354    * @param len (>0) The desired length of the string
1355    * @param alphabet Which alphabet do the random charcters come from; in nil,
1356    then this will be generated from 1-128 ASCII codes
1357    * @return the random string
1358    *  Revision history
1359     - SLKP 20170616; reviewed while porting from the v2.3 branch
1360    */
1361   static _String const Random(const unsigned long len,
1362                               const _String *alphabet = nil);
1363 
1364   /**
1365    * Computes Lempel-Ziv complexity of the string, i.e. roughly the size of the
1366    substring table
1367    * that would have been computed using the LZW algorithm
1368    * @param rec if provided, will store the indices of substrings mapped to
1369    unique codes
1370    * @return string complexity (less compressible == higher complexity)
1371    * \n Example: 1001111011000010 = 6 because subset the input could be reduced
1372    to ~6 codes
1373    * The contents of 'rec' would be 0,1,3,7,11,15, implying that the encoded
1374    substrings would be [0:0] = 1 [1:1] = 0 [2:3] = 01 [4:7] = 1110 [8:11] = 1100
1375      [12:15] = 0010
1376     *  Revision history
1377     - SLKP 20170616; reviewed while porting from the v2.3 branch, not sure
1378   */
1379   unsigned long LempelZivProductionHistory(_SimpleList *rec = nil) const;
1380 
1381   /*
1382    ==============================================================
1383    Regular Expression Methods
1384    ==============================================================
1385    */
1386   /**
1387    * Compile a regular expression represented by a _String object.
1388    * @param pattern the regular expression to compile
1389    * @param error_code will receive compilation error codes if any
1390    * @param case_sensitive controls whether or not the RE is case sensitive
1391    * @param throw_errors if set, errors will result in thrown excptions (_String const type)
1392    * @return the resulting (opaque) RE datastructure, or NULL if
1393              compilation failed
1394 
1395    * @sa FlushRegExp
1396    * @sa GetRegExpError
1397    *  Revision history
1398    - SLKP 20170616; reviewed while porting from the v3 branch
1399                     maded static member of the class, changed argument 1 to
1400                     const &
1401    - SLKP 20180803; added the option for automatic error decoding
1402    */
1403   static regex_t *PrepRegExp(_String const &pattern, int &error_code,
1404                              bool case_sensitive, bool throw_errors = false);
1405 
1406   /**
1407    * Free a reg_exp datastructure previously returned by PrepRegExp
1408    * @param re the (opaque) data structure for the regular expression
1409    *  Revision history
1410    * @sa PrepRegExp
1411    * @sa GetRegExpError
1412    - SLKP 20170616; reviewed while porting from the v3 branch
1413                     maded static member of the class
1414    */
1415   static void FlushRegExp(regex_t *re);
1416 
1417   /**
1418    * Convert internal regexp code into a string message
1419    * @param code error code
1420    * @return the string with the decoded error message
1421    * @sa PrepRegExp
1422    * @sa FlushRegExp
1423    *  Revision history
1424     - SLKP 20170616; reviewed while porting from the v3 branch
1425                      maded static member of the class
1426     */
1427   static const _String GetRegExpError(int code);
1428 
1429   /**
1430    * Search this string for the first match to regular expression and
1431    subexpressions return a list of hits (possibly empty) as pairs of ranges; for
1432    example "hyphy".RegExpMatch("([^y]+).") -> 0,1,0,0, meaning that the entire
1433    expression matches to [0:1] and the first subexpression matches to [0:0]
1434    * @param re the regular expression previously compiled by PrepRegExp
1435    * @param start start matching the string at this position
1436    * @return the coordinates of matches for the entire expression (first pair),
1437    and all subexpressions (left to right); empty if no match
1438 
1439    *  Revision history
1440           - SLKP 20170616; reviewed while porting from the v3 branch
1441                            return by value vs writing to argument
1442           - SLKP 20170623; added the option to search from a given start
1443    position
1444 
1445    * @sa RegExpAllMatches()
1446    */
1447 
1448   _SimpleList const RegExpMatch(regex_t const *re,
1449                                 unsigned long start = 0) const;
1450 
1451   /**
1452    * Search this string for the ALL matches to a regular expression (ignoring
1453    subexpressions) return a list of hits (possibly empty) as pairs of ranges;
1454    for example "hyphy".RegExpMatch("([^y]+).") -> 0,1,2,4, meaning that [0:1]
1455    (hy) and [2:4] (phy) match the pattern
1456    * @param re the regular expression previously compiled by PrepRegExp
1457    * @return the coordinates of all matches for the entire expression left to
1458    right; empty if no match
1459 
1460    *  Revision history
1461    - SLKP 20170616; reviewed while porting from the v3 branch
1462         return by value vs writing to argument
1463 
1464    * @sa RegExpMatch
1465     */
1466 
1467   _SimpleList const RegExpAllMatches(regex_t const *re) const;
1468 
1469   /**
1470      Convenience wrappers for RegExpMatch and RegExpAllMatches taking in regex_t
1471     arguments where the regular expression is compiled and disposed of
1472     internally
1473      @param pattern the regular expression to match
1474      @param case_sensitive whether to compile the RE as case sensitive or not
1475      @param handle_errors if set, call application wide error handlers on
1476     errors, otherwise ignore errors and treat them as a missing match
1477 
1478     * @sa RegExpMatch
1479     * @sa RegExpAllMatches
1480     * Revision history
1481        - SLKP 20170616;  initial implementation
1482     *
1483   */
1484   _SimpleList const RegExpMatch(_String const &pattern, bool case_sensitive,
1485                                 bool handle_errors) const;
1486   _SimpleList const RegExpAllMatches(_String const &pattern,
1487                                      bool case_sensitive,
1488                                      bool handle_errors) const;
1489     /** given coordinates start and end, converts then to valid string indices
1490      if called on an empty string, returns 0 and does not change start and end
1491      if start < 0 it is reset to 0
1492      if end < 0 or >= string length it is reset to (string length) - 1
1493 
1494      @param start: start of the range (0-based)
1495      @param end  : end of the range
1496      @return     : the length of the range
1497 
1498      * Revision history
1499      - SLKP 20170517 porting from v3 branch
1500      */
1501     long NormalizeRange(long &start, long &end) const;
1502 
1503 
1504 private:
1505   /** Find the length of the maximum prefix that forms a valid ID
1506 
1507    @param allow_compounds : treat '.' as a valid identifier character (e.g.
1508    x.y.z)
1509    @param allow_first_numeric : allow idents that start with a digit (e.g. 2x)
1510    @param wildcard : treat this character as a valid identifier character (e.g.
1511    this1.?.x)
1512 
1513    @return the 0-based index of the end of the valid ID prefix (-1 if the prefix
1514    is empty)
1515 
1516    * Revision history
1517    - SLKP 20170616 reviewed while porting from the v3 branch
1518    */
1519 
1520   long _IsValidIdentifierAux(bool allow_compounds, bool allow_first_numeric,
1521                              char wildcard = '\0') const;
1522 
1523   /** Find the first character in a range that meets a particular condition
1524 
1525      @param start : start of the range to search (0-based)
1526      @param end : end of the range to search (0-based)
1527      @direction : forwards or backwards search
1528      @comparison_function: a function that takes a single argument (char) and
1529     returns true if it "passes"
1530 
1531     * Revision history
1532     - SLKP 20170614 factored out common functions for conditional index finding
1533   */
1534 
1535   template <class CF>
_FindFirstIndexCondtion(long start,long end,hy_string_search_direction direction,CF comparison_function)1536   long _FindFirstIndexCondtion(long start, long end,
1537                                hy_string_search_direction direction,
1538                                CF comparison_function) const {
1539     long requested_range = NormalizeRange(start, end);
1540 
1541     if (requested_range > 0L) {
1542       if (direction == kStringDirectionForward) {
1543         for (; start <= end; start++) {
1544           if (comparison_function(s_data[start])) {
1545             return start;
1546           }
1547         }
1548       } else {
1549         for (; end >= start; end--) {
1550           if (comparison_function(s_data[end])) {
1551             return end;
1552           }
1553         }
1554       }
1555     }
1556 
1557     return kNotFound;
1558   }
1559 
1560 
1561   /** this is a utility function which allocates length+1 chars for s_data,
1562   copies the data from source_string, and sets the terminating 0
1563 
1564   * Revision history
1565   - SLKP 20170517 factoring repeated functionality
1566 
1567   */
1568   inline void AllocateAndCopyString(const char *source_string,
1569                                     unsigned long length);
1570 
1571 
1572   /** Factored out core of RegExpMatch and RegExpAllMatches
1573    * Revision history
1574     - SLKP 20170616; initial implementation
1575    */
1576   const _SimpleList _IntRegExpMatch(const _String &pattern, bool case_sensitive,
1577                                     bool handle_errors, bool match_all) const;
1578 };
1579 
1580 // _______________________________________________________________________
1581 
1582 void SetStatusBarValue(long, hyFloat, hyFloat);
1583 void SetStatusLine(_String);
1584 void SetStatusLine(_String, _String, _String, long l);
1585 void SetStatusLine(_String, _String, _String);
1586 void SetStatusLine(_String, _String, _String, long, char);
1587 
1588 void SetStatusLineUser(_String const);
1589 
1590 void StringToConsole(_String const &, void *extra = nil);
1591 void BufferToConsole(const char *, void *extra = nil);
1592 void NLToConsole(void *extra = nil);
1593 void ObjectToConsole(BaseRef, void *extra = nil);
1594 
1595 _String *StringFromConsole(void);
1596 
1597 #endif
1598