1/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying 2 file Copyright.txt or https://cmake.org/licensing#kwsys for details. */ 3// Original Copyright notice: 4// Copyright (C) 1991 Texas Instruments Incorporated. 5// 6// Permission is granted to any individual or institution to use, copy, modify, 7// and distribute this software, provided that this complete copyright and 8// permission notice is maintained, intact, in all copies and supporting 9// documentation. 10// 11// Texas Instruments Incorporated provides this software "as is" without 12// express or implied warranty. 13// 14// Created: MNF 06/13/89 Initial Design and Implementation 15// Updated: LGO 08/09/89 Inherit from Generic 16// Updated: MBN 09/07/89 Added conditional exception handling 17// Updated: MBN 12/15/89 Sprinkled "const" qualifiers all over the place! 18// Updated: DLS 03/22/91 New lite version 19// 20 21#ifndef @KWSYS_NAMESPACE@_RegularExpression_hxx 22#define @KWSYS_NAMESPACE@_RegularExpression_hxx 23 24#include <@KWSYS_NAMESPACE@/Configure.h> 25#include <@KWSYS_NAMESPACE@/Configure.hxx> 26 27#include <string> 28 29/* Disable useless Borland warnings. KWSys tries not to force things 30 on its includers, but there is no choice here. */ 31#if defined(__BORLANDC__) 32#pragma warn - 8027 /* function not inlined. */ 33#endif 34 35namespace @KWSYS_NAMESPACE@ { 36 37// Forward declaration 38class RegularExpression; 39 40/** \class RegularExpressionMatch 41 * \brief Stores the pattern matches of a RegularExpression 42 */ 43class @KWSYS_NAMESPACE@_EXPORT RegularExpressionMatch 44{ 45public: 46 RegularExpressionMatch(); 47 48 bool isValid() const; 49 void clear(); 50 51 std::string::size_type start() const; 52 std::string::size_type end() const; 53 std::string::size_type start(int n) const; 54 std::string::size_type end(int n) const; 55 std::string match(int n) const; 56 57 enum 58 { 59 NSUBEXP = 10 60 }; 61 62private: 63 friend class RegularExpression; 64 const char* startp[NSUBEXP]; 65 const char* endp[NSUBEXP]; 66 const char* searchstring; 67}; 68 69/** 70 * \brief Creates an invalid match object 71 */ 72inline RegularExpressionMatch::RegularExpressionMatch() 73{ 74 startp[0] = 0; 75 endp[0] = 0; 76 searchstring = 0; 77} 78 79/** 80 * \brief Returns true if the match pointers are valid 81 */ 82inline bool RegularExpressionMatch::isValid() const 83{ 84 return (this->startp[0] != 0); 85} 86 87/** 88 * \brief Resets to the (invalid) construction state. 89 */ 90inline void RegularExpressionMatch::clear() 91{ 92 startp[0] = 0; 93 endp[0] = 0; 94 searchstring = 0; 95} 96 97/** 98 * \brief Returns the start index of the full match. 99 */ 100inline std::string::size_type RegularExpressionMatch::start() const 101{ 102 return static_cast<std::string::size_type>(this->startp[0] - searchstring); 103} 104 105/** 106 * \brief Returns the end index of the full match. 107 */ 108inline std::string::size_type RegularExpressionMatch::end() const 109{ 110 return static_cast<std::string::size_type>(this->endp[0] - searchstring); 111} 112 113/** 114 * \brief Returns the start index of nth submatch. 115 * start(0) is the start of the full match. 116 */ 117inline std::string::size_type RegularExpressionMatch::start(int n) const 118{ 119 return static_cast<std::string::size_type>(this->startp[n] - 120 this->searchstring); 121} 122 123/** 124 * \brief Returns the end index of nth submatch. 125 * end(0) is the end of the full match. 126 */ 127inline std::string::size_type RegularExpressionMatch::end(int n) const 128{ 129 return static_cast<std::string::size_type>(this->endp[n] - 130 this->searchstring); 131} 132 133/** 134 * \brief Returns the nth submatch as a string. 135 */ 136inline std::string RegularExpressionMatch::match(int n) const 137{ 138 if (this->startp[n] == 0) { 139 return std::string(); 140 } else { 141 return std::string(this->startp[n], static_cast<std::string::size_type>( 142 this->endp[n] - this->startp[n])); 143 } 144} 145 146/** \class RegularExpression 147 * \brief Implements pattern matching with regular expressions. 148 * 149 * This is the header file for the regular expression class. An object of 150 * this class contains a regular expression, in a special "compiled" format. 151 * This compiled format consists of several slots all kept as the objects 152 * private data. The RegularExpression class provides a convenient way to 153 * represent regular expressions. It makes it easy to search for the same 154 * regular expression in many different strings without having to compile a 155 * string to regular expression format more than necessary. 156 * 157 * This class implements pattern matching via regular expressions. 158 * A regular expression allows a programmer to specify complex 159 * patterns that can be searched for and matched against the 160 * character string of a string object. In its simplest form, a 161 * regular expression is a sequence of characters used to 162 * search for exact character matches. However, many times the 163 * exact sequence to be found is not known, or only a match at 164 * the beginning or end of a string is desired. The RegularExpression regu- 165 * lar expression class implements regular expression pattern 166 * matching as is found and implemented in many UNIX commands 167 * and utilities. 168 * 169 * Example: The perl code 170 * 171 * $filename =~ m"([a-z]+)\.cc"; 172 * print $1; 173 * 174 * Is written as follows in C++ 175 * 176 * RegularExpression re("([a-z]+)\\.cc"); 177 * re.find(filename); 178 * cerr << re.match(1); 179 * 180 * 181 * The regular expression class provides a convenient mechanism 182 * for specifying and manipulating regular expressions. The 183 * regular expression object allows specification of such pat- 184 * terns by using the following regular expression metacharac- 185 * ters: 186 * 187 * ^ Matches at beginning of a line 188 * 189 * $ Matches at end of a line 190 * 191 * . Matches any single character 192 * 193 * [ ] Matches any character(s) inside the brackets 194 * 195 * [^ ] Matches any character(s) not inside the brackets 196 * 197 * - Matches any character in range on either side of a dash 198 * 199 * * Matches preceding pattern zero or more times 200 * 201 * + Matches preceding pattern one or more times 202 * 203 * ? Matches preceding pattern zero or once only 204 * 205 * () Saves a matched expression and uses it in a later match 206 * 207 * Note that more than one of these metacharacters can be used 208 * in a single regular expression in order to create complex 209 * search patterns. For example, the pattern [^ab1-9] says to 210 * match any character sequence that does not begin with the 211 * characters "ab" followed by numbers in the series one 212 * through nine. 213 * 214 * There are three constructors for RegularExpression. One just creates an 215 * empty RegularExpression object. Another creates a RegularExpression 216 * object and initializes it with a regular expression that is given in the 217 * form of a char*. The third takes a reference to a RegularExpression 218 * object as an argument and creates an object initialized with the 219 * information from the given RegularExpression object. 220 * 221 * The find member function finds the first occurrence of the regular 222 * expression of that object in the string given to find as an argument. Find 223 * returns a boolean, and if true, mutates the private data appropriately. 224 * Find sets pointers to the beginning and end of the thing last found, they 225 * are pointers into the actual string that was searched. The start and end 226 * member functions return indices into the searched string that correspond 227 * to the beginning and end pointers respectively. The compile member 228 * function takes a char* and puts the compiled version of the char* argument 229 * into the object's private data fields. The == and != operators only check 230 * the to see if the compiled regular expression is the same, and the 231 * deep_equal functions also checks to see if the start and end pointers are 232 * the same. The is_valid function returns false if program is set to NULL, 233 * (i.e. there is no valid compiled exression). The set_invalid function sets 234 * the program to NULL (Warning: this deletes the compiled expression). The 235 * following examples may help clarify regular expression usage: 236 * 237 * * The regular expression "^hello" matches a "hello" only at the 238 * beginning of a line. It would match "hello there" but not "hi, 239 * hello there". 240 * 241 * * The regular expression "long$" matches a "long" only at the end 242 * of a line. It would match "so long\0", but not "long ago". 243 * 244 * * The regular expression "t..t..g" will match anything that has a 245 * "t" then any two characters, another "t", any two characters and 246 * then a "g". It will match "testing", or "test again" but would 247 * not match "toasting" 248 * 249 * * The regular expression "[1-9ab]" matches any number one through 250 * nine, and the characters "a" and "b". It would match "hello 1" 251 * or "begin", but would not match "no-match". 252 * 253 * * The regular expression "[^1-9ab]" matches any character that is 254 * not a number one through nine, or an "a" or "b". It would NOT 255 * match "hello 1" or "begin", but would match "no-match". 256 * 257 * * The regular expression "br* " matches something that begins with 258 * a "b", is followed by zero or more "r"s, and ends in a space. It 259 * would match "brrrrr ", and "b ", but would not match "brrh ". 260 * 261 * * The regular expression "br+ " matches something that begins with 262 * a "b", is followed by one or more "r"s, and ends in a space. It 263 * would match "brrrrr ", and "br ", but would not match "b " or 264 * "brrh ". 265 * 266 * * The regular expression "br? " matches something that begins with 267 * a "b", is followed by zero or one "r"s, and ends in a space. It 268 * would match "br ", and "b ", but would not match "brrrr " or 269 * "brrh ". 270 * 271 * * The regular expression "(..p)b" matches something ending with pb 272 * and beginning with whatever the two characters before the first p 273 * encounterd in the line were. It would find "repb" in "rep drepa 274 * qrepb". The regular expression "(..p)a" would find "repa qrepb" 275 * in "rep drepa qrepb" 276 * 277 * * The regular expression "d(..p)" matches something ending with p, 278 * beginning with d, and having two characters in between that are 279 * the same as the two characters before the first p encounterd in 280 * the line. It would match "drepa qrepb" in "rep drepa qrepb". 281 * 282 * All methods of RegularExpression can be called simultaneously from 283 * different threads but only if each invocation uses an own instance of 284 * RegularExpression. 285 */ 286class @KWSYS_NAMESPACE@_EXPORT RegularExpression 287{ 288public: 289 /** 290 * Instantiate RegularExpression with program=NULL. 291 */ 292 inline RegularExpression(); 293 294 /** 295 * Instantiate RegularExpression with compiled char*. 296 */ 297 inline RegularExpression(char const*); 298 299 /** 300 * Instantiate RegularExpression as a copy of another regular expression. 301 */ 302 RegularExpression(RegularExpression const&); 303 304 /** 305 * Instantiate RegularExpression with compiled string. 306 */ 307 inline RegularExpression(std::string const&); 308 309 /** 310 * Destructor. 311 */ 312 inline ~RegularExpression(); 313 314 /** 315 * Compile a regular expression into internal code 316 * for later pattern matching. 317 */ 318 bool compile(char const*); 319 320 /** 321 * Compile a regular expression into internal code 322 * for later pattern matching. 323 */ 324 inline bool compile(std::string const&); 325 326 /** 327 * Matches the regular expression to the given string. 328 * Returns true if found, and sets start and end indexes 329 * in the RegularExpressionMatch instance accordingly. 330 * 331 * This method is thread safe when called with different 332 * RegularExpressionMatch instances. 333 */ 334 bool find(char const*, RegularExpressionMatch&) const; 335 336 /** 337 * Matches the regular expression to the given string. 338 * Returns true if found, and sets start and end indexes accordingly. 339 */ 340 inline bool find(char const*); 341 342 /** 343 * Matches the regular expression to the given std string. 344 * Returns true if found, and sets start and end indexes accordingly. 345 */ 346 inline bool find(std::string const&); 347 348 /** 349 * Match indices 350 */ 351 inline RegularExpressionMatch const& regMatch() const; 352 inline std::string::size_type start() const; 353 inline std::string::size_type end() const; 354 inline std::string::size_type start(int n) const; 355 inline std::string::size_type end(int n) const; 356 357 /** 358 * Match strings 359 */ 360 inline std::string match(int n) const; 361 362 /** 363 * Copy the given regular expression. 364 */ 365 RegularExpression& operator=(const RegularExpression& rxp); 366 367 /** 368 * Returns true if two regular expressions have the same 369 * compiled program for pattern matching. 370 */ 371 bool operator==(RegularExpression const&) const; 372 373 /** 374 * Returns true if two regular expressions have different 375 * compiled program for pattern matching. 376 */ 377 inline bool operator!=(RegularExpression const&) const; 378 379 /** 380 * Returns true if have the same compiled regular expressions 381 * and the same start and end pointers. 382 */ 383 bool deep_equal(RegularExpression const&) const; 384 385 /** 386 * True if the compiled regexp is valid. 387 */ 388 inline bool is_valid() const; 389 390 /** 391 * Marks the regular expression as invalid. 392 */ 393 inline void set_invalid(); 394 395private: 396 RegularExpressionMatch regmatch; 397 char regstart; // Internal use only 398 char reganch; // Internal use only 399 const char* regmust; // Internal use only 400 std::string::size_type regmlen; // Internal use only 401 char* program; 402 int progsize; 403}; 404 405/** 406 * Create an empty regular expression. 407 */ 408inline RegularExpression::RegularExpression() 409{ 410 this->program = 0; 411} 412 413/** 414 * Creates a regular expression from string s, and 415 * compiles s. 416 */ 417inline RegularExpression::RegularExpression(const char* s) 418{ 419 this->program = 0; 420 if (s) { 421 this->compile(s); 422 } 423} 424 425/** 426 * Creates a regular expression from string s, and 427 * compiles s. 428 */ 429inline RegularExpression::RegularExpression(const std::string& s) 430{ 431 this->program = 0; 432 this->compile(s); 433} 434 435/** 436 * Destroys and frees space allocated for the regular expression. 437 */ 438inline RegularExpression::~RegularExpression() 439{ 440 //#ifndef _WIN32 441 delete[] this->program; 442 //#endif 443} 444 445/** 446 * Compile a regular expression into internal code 447 * for later pattern matching. 448 */ 449inline bool RegularExpression::compile(std::string const& s) 450{ 451 return this->compile(s.c_str()); 452} 453 454/** 455 * Matches the regular expression to the given std string. 456 * Returns true if found, and sets start and end indexes accordingly. 457 */ 458inline bool RegularExpression::find(const char* s) 459{ 460 return this->find(s, this->regmatch); 461} 462 463/** 464 * Matches the regular expression to the given std string. 465 * Returns true if found, and sets start and end indexes accordingly. 466 */ 467inline bool RegularExpression::find(std::string const& s) 468{ 469 return this->find(s.c_str()); 470} 471 472/** 473 * Returns the internal match object 474 */ 475inline RegularExpressionMatch const& RegularExpression::regMatch() const 476{ 477 return this->regmatch; 478} 479 480/** 481 * Returns the start index of the full match. 482 */ 483inline std::string::size_type RegularExpression::start() const 484{ 485 return regmatch.start(); 486} 487 488/** 489 * Returns the end index of the full match. 490 */ 491inline std::string::size_type RegularExpression::end() const 492{ 493 return regmatch.end(); 494} 495 496/** 497 * Return start index of nth submatch. start(0) is the start of the full match. 498 */ 499inline std::string::size_type RegularExpression::start(int n) const 500{ 501 return regmatch.start(n); 502} 503 504/** 505 * Return end index of nth submatch. end(0) is the end of the full match. 506 */ 507inline std::string::size_type RegularExpression::end(int n) const 508{ 509 return regmatch.end(n); 510} 511 512/** 513 * Return nth submatch as a string. 514 */ 515inline std::string RegularExpression::match(int n) const 516{ 517 return regmatch.match(n); 518} 519 520/** 521 * Returns true if two regular expressions have different 522 * compiled program for pattern matching. 523 */ 524inline bool RegularExpression::operator!=(const RegularExpression& r) const 525{ 526 return (!(*this == r)); 527} 528 529/** 530 * Returns true if a valid regular expression is compiled 531 * and ready for pattern matching. 532 */ 533inline bool RegularExpression::is_valid() const 534{ 535 return (this->program != 0); 536} 537 538inline void RegularExpression::set_invalid() 539{ 540 //#ifndef _WIN32 541 delete[] this->program; 542 //#endif 543 this->program = 0; 544} 545 546} // namespace @KWSYS_NAMESPACE@ 547 548#endif 549