1/* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
2   file Copyright.txt or https://cmake.org/licensing#kwsys for details.  */
3// Original Copyright notice:
4// Copyright (C) 1991 Texas Instruments Incorporated.
5//
6// Permission is granted to any individual or institution to use, copy, modify,
7// and distribute this software, provided that this complete copyright and
8// permission notice is maintained, intact, in all copies and supporting
9// documentation.
10//
11// Texas Instruments Incorporated provides this software "as is" without
12// express or implied warranty.
13//
14// Created: MNF 06/13/89  Initial Design and Implementation
15// Updated: LGO 08/09/89  Inherit from Generic
16// Updated: MBN 09/07/89  Added conditional exception handling
17// Updated: MBN 12/15/89  Sprinkled "const" qualifiers all over the place!
18// Updated: DLS 03/22/91  New lite version
19//
20
21#ifndef @KWSYS_NAMESPACE@_RegularExpression_hxx
22#define @KWSYS_NAMESPACE@_RegularExpression_hxx
23
24#include <@KWSYS_NAMESPACE@/Configure.h>
25#include <@KWSYS_NAMESPACE@/Configure.hxx>
26
27#include <string>
28
29/* Disable useless Borland warnings.  KWSys tries not to force things
30   on its includers, but there is no choice here.  */
31#if defined(__BORLANDC__)
32#pragma warn - 8027 /* function not inlined.  */
33#endif
34
35namespace @KWSYS_NAMESPACE@ {
36
37// Forward declaration
38class RegularExpression;
39
40/** \class RegularExpressionMatch
41 * \brief Stores the pattern matches of a RegularExpression
42 */
43class @KWSYS_NAMESPACE@_EXPORT RegularExpressionMatch
44{
45public:
46  RegularExpressionMatch();
47
48  bool isValid() const;
49  void clear();
50
51  std::string::size_type start() const;
52  std::string::size_type end() const;
53  std::string::size_type start(int n) const;
54  std::string::size_type end(int n) const;
55  std::string match(int n) const;
56
57  enum
58  {
59    NSUBEXP = 10
60  };
61
62private:
63  friend class RegularExpression;
64  const char* startp[NSUBEXP];
65  const char* endp[NSUBEXP];
66  const char* searchstring;
67};
68
69/**
70 * \brief Creates an invalid match object
71 */
72inline RegularExpressionMatch::RegularExpressionMatch()
73{
74  startp[0] = 0;
75  endp[0] = 0;
76  searchstring = 0;
77}
78
79/**
80 * \brief Returns true if the match pointers are valid
81 */
82inline bool RegularExpressionMatch::isValid() const
83{
84  return (this->startp[0] != 0);
85}
86
87/**
88 * \brief Resets to the (invalid) construction state.
89 */
90inline void RegularExpressionMatch::clear()
91{
92  startp[0] = 0;
93  endp[0] = 0;
94  searchstring = 0;
95}
96
97/**
98 * \brief Returns the start index of the full match.
99 */
100inline std::string::size_type RegularExpressionMatch::start() const
101{
102  return static_cast<std::string::size_type>(this->startp[0] - searchstring);
103}
104
105/**
106 * \brief Returns the end index of the full match.
107 */
108inline std::string::size_type RegularExpressionMatch::end() const
109{
110  return static_cast<std::string::size_type>(this->endp[0] - searchstring);
111}
112
113/**
114 * \brief Returns the start index of nth submatch.
115 *        start(0) is the start of the full match.
116 */
117inline std::string::size_type RegularExpressionMatch::start(int n) const
118{
119  return static_cast<std::string::size_type>(this->startp[n] -
120                                             this->searchstring);
121}
122
123/**
124 * \brief Returns the end index of nth submatch.
125 *        end(0) is the end of the full match.
126 */
127inline std::string::size_type RegularExpressionMatch::end(int n) const
128{
129  return static_cast<std::string::size_type>(this->endp[n] -
130                                             this->searchstring);
131}
132
133/**
134 * \brief Returns the nth submatch as a string.
135 */
136inline std::string RegularExpressionMatch::match(int n) const
137{
138  if (this->startp[n] == 0) {
139    return std::string();
140  } else {
141    return std::string(this->startp[n], static_cast<std::string::size_type>(
142                                          this->endp[n] - this->startp[n]));
143  }
144}
145
146/** \class RegularExpression
147 * \brief Implements pattern matching with regular expressions.
148 *
149 * This is the header file for the regular expression class.  An object of
150 * this class contains a regular expression, in a special "compiled" format.
151 * This compiled format consists of several slots all kept as the objects
152 * private data.  The RegularExpression class provides a convenient way to
153 * represent regular expressions.  It makes it easy to search for the same
154 * regular expression in many different strings without having to compile a
155 * string to regular expression format more than necessary.
156 *
157 * This class implements pattern matching via regular expressions.
158 * A regular expression allows a programmer to specify  complex
159 * patterns  that  can  be searched for and matched against the
160 * character string of a string object. In its simplest form, a
161 * regular  expression  is  a  sequence  of  characters used to
162 * search for exact character matches. However, many times  the
163 * exact  sequence to be found is not known, or only a match at
164 * the beginning or end of a string is desired. The RegularExpression regu-
165 * lar  expression  class implements regular expression pattern
166 * matching as is found and implemented in many  UNIX  commands
167 * and utilities.
168 *
169 * Example: The perl code
170 *
171 *    $filename =~ m"([a-z]+)\.cc";
172 *    print $1;
173 *
174 * Is written as follows in C++
175 *
176 *    RegularExpression re("([a-z]+)\\.cc");
177 *    re.find(filename);
178 *    cerr << re.match(1);
179 *
180 *
181 * The regular expression class provides a convenient mechanism
182 * for  specifying  and  manipulating  regular expressions. The
183 * regular expression object allows specification of such  pat-
184 * terns  by using the following regular expression metacharac-
185 * ters:
186 *
187 *  ^        Matches at beginning of a line
188 *
189 *  $        Matches at end of a line
190 *
191 * .         Matches any single character
192 *
193 * [ ]       Matches any character(s) inside the brackets
194 *
195 * [^ ]      Matches any character(s) not inside the brackets
196 *
197 *  -        Matches any character in range on either side of a dash
198 *
199 *  *        Matches preceding pattern zero or more times
200 *
201 *  +        Matches preceding pattern one or more times
202 *
203 *  ?        Matches preceding pattern zero or once only
204 *
205 * ()        Saves a matched expression and uses it in a later match
206 *
207 * Note that more than one of these metacharacters can be  used
208 * in  a  single  regular expression in order to create complex
209 * search patterns. For example, the pattern [^ab1-9]  says  to
210 * match  any  character  sequence that does not begin with the
211 * characters "ab"  followed  by  numbers  in  the  series  one
212 * through nine.
213 *
214 * There are three constructors for RegularExpression.  One just creates an
215 * empty RegularExpression object.  Another creates a RegularExpression
216 * object and initializes it with a regular expression that is given in the
217 * form of a char*.  The third takes a reference to a RegularExpression
218 * object as an argument and creates an object initialized with the
219 * information from the given RegularExpression object.
220 *
221 * The  find  member function  finds   the  first  occurrence   of  the regular
222 * expression of that object in the string given to find as an argument.  Find
223 * returns a boolean, and  if true,  mutates  the private  data appropriately.
224 * Find sets pointers to the beginning and end of  the thing last  found, they
225 * are pointers into the actual string  that was searched.   The start and end
226 * member functions return indices  into the searched string that  correspond
227 * to the beginning   and  end pointers  respectively.   The    compile member
228 * function takes a char* and puts the  compiled version of the char* argument
229 * into the object's private data fields.  The == and  != operators only check
230 * the  to see  if   the compiled  regular  expression   is the same, and  the
231 * deep_equal functions also checks  to see if the  start and end pointers are
232 * the same.  The is_valid  function returns false if  program is set to NULL,
233 * (i.e. there is no valid compiled exression).  The set_invalid function sets
234 * the  program to NULL  (Warning: this deletes the compiled  expression). The
235 * following examples may help clarify regular expression usage:
236 *
237 *   *  The regular expression  "^hello" matches  a "hello"  only at  the
238 *      beginning of a  line.  It would match "hello  there" but not "hi,
239 *      hello there".
240 *
241 *   *  The regular expression "long$" matches a  "long"  only at the end
242 *      of a line. It would match "so long\0", but not "long ago".
243 *
244 *   *  The regular expression "t..t..g"  will match anything that  has a
245 *      "t" then any two characters, another "t", any  two characters and
246 *      then a "g".   It will match  "testing", or "test again" but would
247 *      not match "toasting"
248 *
249 *   *  The regular  expression "[1-9ab]" matches any  number one through
250 *      nine, and the characters  "a" and  "b".  It would match "hello 1"
251 *      or "begin", but would not match "no-match".
252 *
253 *   *  The  regular expression "[^1-9ab]"  matches any character that is
254 *      not a number one  through nine, or  an "a" or "b".   It would NOT
255 *      match "hello 1" or "begin", but would match "no-match".
256 *
257 *   *  The regular expression "br* " matches  something that begins with
258 *      a "b", is followed by zero or more "r"s, and ends in a space.  It
259 *      would match "brrrrr ", and "b ", but would not match "brrh ".
260 *
261 *   *  The regular expression "br+ " matches something  that begins with
262 *      a "b", is followed by one or more "r"s, and ends in  a space.  It
263 *      would match "brrrrr ",  and  "br ", but would not  match "b  " or
264 *      "brrh ".
265 *
266 *   *  The regular expression "br? " matches  something that begins with
267 *      a "b", is followed by zero or one "r"s, and ends in  a space.  It
268 *      would  match  "br ", and "b  ", but would not match  "brrrr "  or
269 *      "brrh ".
270 *
271 *   *  The regular expression "(..p)b" matches  something ending with pb
272 *      and beginning with whatever the two characters before the first p
273 *      encounterd in the line were.  It would find  "repb" in "rep drepa
274 *      qrepb".  The regular expression "(..p)a"  would find "repa qrepb"
275 *      in "rep drepa qrepb"
276 *
277 *   *  The regular expression "d(..p)" matches something ending  with p,
278 *      beginning with d, and having  two characters  in between that are
279 *      the same as the two characters before  the first p  encounterd in
280 *      the line.  It would match "drepa qrepb" in "rep drepa qrepb".
281 *
282 * All methods of RegularExpression can be called simultaneously from
283 * different threads but only if each invocation uses an own instance of
284 * RegularExpression.
285 */
286class @KWSYS_NAMESPACE@_EXPORT RegularExpression
287{
288public:
289  /**
290   * Instantiate RegularExpression with program=NULL.
291   */
292  inline RegularExpression();
293
294  /**
295   * Instantiate RegularExpression with compiled char*.
296   */
297  inline RegularExpression(char const*);
298
299  /**
300   * Instantiate RegularExpression as a copy of another regular expression.
301   */
302  RegularExpression(RegularExpression const&);
303
304  /**
305   * Instantiate RegularExpression with compiled string.
306   */
307  inline RegularExpression(std::string const&);
308
309  /**
310   * Destructor.
311   */
312  inline ~RegularExpression();
313
314  /**
315   * Compile a regular expression into internal code
316   * for later pattern matching.
317   */
318  bool compile(char const*);
319
320  /**
321   * Compile a regular expression into internal code
322   * for later pattern matching.
323   */
324  inline bool compile(std::string const&);
325
326  /**
327   * Matches the regular expression to the given string.
328   * Returns true if found, and sets start and end indexes
329   * in the RegularExpressionMatch instance accordingly.
330   *
331   * This method is thread safe when called with different
332   * RegularExpressionMatch instances.
333   */
334  bool find(char const*, RegularExpressionMatch&) const;
335
336  /**
337   * Matches the regular expression to the given string.
338   * Returns true if found, and sets start and end indexes accordingly.
339   */
340  inline bool find(char const*);
341
342  /**
343   * Matches the regular expression to the given std string.
344   * Returns true if found, and sets start and end indexes accordingly.
345   */
346  inline bool find(std::string const&);
347
348  /**
349   * Match indices
350   */
351  inline RegularExpressionMatch const& regMatch() const;
352  inline std::string::size_type start() const;
353  inline std::string::size_type end() const;
354  inline std::string::size_type start(int n) const;
355  inline std::string::size_type end(int n) const;
356
357  /**
358   * Match strings
359   */
360  inline std::string match(int n) const;
361
362  /**
363   * Copy the given regular expression.
364   */
365  RegularExpression& operator=(const RegularExpression& rxp);
366
367  /**
368   * Returns true if two regular expressions have the same
369   * compiled program for pattern matching.
370   */
371  bool operator==(RegularExpression const&) const;
372
373  /**
374   * Returns true if two regular expressions have different
375   * compiled program for pattern matching.
376   */
377  inline bool operator!=(RegularExpression const&) const;
378
379  /**
380   * Returns true if have the same compiled regular expressions
381   * and the same start and end pointers.
382   */
383  bool deep_equal(RegularExpression const&) const;
384
385  /**
386   * True if the compiled regexp is valid.
387   */
388  inline bool is_valid() const;
389
390  /**
391   * Marks the regular expression as invalid.
392   */
393  inline void set_invalid();
394
395private:
396  RegularExpressionMatch regmatch;
397  char regstart;                  // Internal use only
398  char reganch;                   // Internal use only
399  const char* regmust;            // Internal use only
400  std::string::size_type regmlen; // Internal use only
401  char* program;
402  int progsize;
403};
404
405/**
406 * Create an empty regular expression.
407 */
408inline RegularExpression::RegularExpression()
409{
410  this->program = 0;
411}
412
413/**
414 * Creates a regular expression from string s, and
415 * compiles s.
416 */
417inline RegularExpression::RegularExpression(const char* s)
418{
419  this->program = 0;
420  if (s) {
421    this->compile(s);
422  }
423}
424
425/**
426 * Creates a regular expression from string s, and
427 * compiles s.
428 */
429inline RegularExpression::RegularExpression(const std::string& s)
430{
431  this->program = 0;
432  this->compile(s);
433}
434
435/**
436 * Destroys and frees space allocated for the regular expression.
437 */
438inline RegularExpression::~RegularExpression()
439{
440  //#ifndef _WIN32
441  delete[] this->program;
442  //#endif
443}
444
445/**
446 * Compile a regular expression into internal code
447 * for later pattern matching.
448 */
449inline bool RegularExpression::compile(std::string const& s)
450{
451  return this->compile(s.c_str());
452}
453
454/**
455 * Matches the regular expression to the given std string.
456 * Returns true if found, and sets start and end indexes accordingly.
457 */
458inline bool RegularExpression::find(const char* s)
459{
460  return this->find(s, this->regmatch);
461}
462
463/**
464 * Matches the regular expression to the given std string.
465 * Returns true if found, and sets start and end indexes accordingly.
466 */
467inline bool RegularExpression::find(std::string const& s)
468{
469  return this->find(s.c_str());
470}
471
472/**
473 * Returns the internal match object
474 */
475inline RegularExpressionMatch const& RegularExpression::regMatch() const
476{
477  return this->regmatch;
478}
479
480/**
481 * Returns the start index of the full match.
482 */
483inline std::string::size_type RegularExpression::start() const
484{
485  return regmatch.start();
486}
487
488/**
489 * Returns the end index of the full match.
490 */
491inline std::string::size_type RegularExpression::end() const
492{
493  return regmatch.end();
494}
495
496/**
497 * Return start index of nth submatch. start(0) is the start of the full match.
498 */
499inline std::string::size_type RegularExpression::start(int n) const
500{
501  return regmatch.start(n);
502}
503
504/**
505 * Return end index of nth submatch. end(0) is the end of the full match.
506 */
507inline std::string::size_type RegularExpression::end(int n) const
508{
509  return regmatch.end(n);
510}
511
512/**
513 * Return nth submatch as a string.
514 */
515inline std::string RegularExpression::match(int n) const
516{
517  return regmatch.match(n);
518}
519
520/**
521 * Returns true if two regular expressions have different
522 * compiled program for pattern matching.
523 */
524inline bool RegularExpression::operator!=(const RegularExpression& r) const
525{
526  return (!(*this == r));
527}
528
529/**
530 * Returns true if a valid regular expression is compiled
531 * and ready for pattern matching.
532 */
533inline bool RegularExpression::is_valid() const
534{
535  return (this->program != 0);
536}
537
538inline void RegularExpression::set_invalid()
539{
540  //#ifndef _WIN32
541  delete[] this->program;
542  //#endif
543  this->program = 0;
544}
545
546} // namespace @KWSYS_NAMESPACE@
547
548#endif
549