1 //--------------------------------------------------------------------------------- 2 // 3 // Generated Header File. Do not edit by hand. 4 // This file contains the state table for the ICU Regular Expression Pattern Parser 5 // It is generated by the Perl script "regexcst.pl" from 6 // the rule parser state definitions file "regexcst.txt". 7 // 8 // Copyright (C) 2002-2016 International Business Machines Corporation 9 // and others. All rights reserved. 10 // 11 //--------------------------------------------------------------------------------- 12 #ifndef RBBIRPT_H 13 #define RBBIRPT_H 14 15 #include "unicode/utypes.h" 16 17 U_NAMESPACE_BEGIN 18 // 19 // Character classes for regex pattern scanning. 20 // 21 static const uint8_t kRuleSet_ascii_letter = 128; 22 static const uint8_t kRuleSet_digit_char = 129; 23 static const uint8_t kRuleSet_rule_char = 130; 24 25 26 enum Regex_PatternParseAction { 27 doSetBackslash_V, 28 doSetBackslash_h, 29 doBeginNamedBackRef, 30 doSetMatchMode, 31 doEnterQuoteMode, 32 doOpenCaptureParen, 33 doContinueNamedCapture, 34 doSetBackslash_d, 35 doBeginMatchMode, 36 doBackslashX, 37 doSetPosixProp, 38 doIntervalError, 39 doSetLiteralEscaped, 40 doSetBackslash_s, 41 doNOP, 42 doBackslashv, 43 doOpenLookBehind, 44 doPatStart, 45 doPossessiveInterval, 46 doOpenAtomicParen, 47 doOpenLookAheadNeg, 48 doBackslashd, 49 doBackslashZ, 50 doIntervalUpperDigit, 51 doBadNamedCapture, 52 doSetDifference2, 53 doSetAddAmp, 54 doSetNamedChar, 55 doNamedChar, 56 doSetBackslash_H, 57 doBackslashb, 58 doBackslashz, 59 doSetBeginDifference1, 60 doOpenLookAhead, 61 doMatchModeParen, 62 doBackslashV, 63 doIntevalLowerDigit, 64 doCaret, 65 doSetEnd, 66 doSetNegate, 67 doBackslashS, 68 doOrOperator, 69 doBackslashB, 70 doBackslashw, 71 doBackslashR, 72 doRuleError, 73 doDotAny, 74 doMatchMode, 75 doSetBackslash_W, 76 doNGPlus, 77 doSetBackslash_D, 78 doPossessiveOpt, 79 doSetNamedRange, 80 doConditionalExpr, 81 doBackslashs, 82 doPossessiveStar, 83 doPlus, 84 doBadOpenParenType, 85 doCloseParen, 86 doNGInterval, 87 doSetProp, 88 doBackRef, 89 doSetBeginUnion, 90 doEscapeError, 91 doOpt, 92 doSetBeginIntersection1, 93 doPossessivePlus, 94 doBackslashD, 95 doOpenLookBehindNeg, 96 doSetBegin, 97 doSetIntersection2, 98 doCompleteNamedBackRef, 99 doSetRange, 100 doDollar, 101 doBackslashH, 102 doExit, 103 doNGOpt, 104 doOpenNonCaptureParen, 105 doBackslashA, 106 doSetBackslash_v, 107 doBackslashh, 108 doBadModeFlag, 109 doSetNoCloseError, 110 doIntervalSame, 111 doSetAddDash, 112 doBackslashW, 113 doPerlInline, 114 doSetOpError, 115 doSetLiteral, 116 doPatFinish, 117 doBeginNamedCapture, 118 doEscapedLiteralChar, 119 doLiteralChar, 120 doSuppressComments, 121 doMismatchedParenErr, 122 doNGStar, 123 doSetFinish, 124 doInterval, 125 doBackslashG, 126 doStar, 127 doSetBackslash_w, 128 doSetBackslash_S, 129 doProperty, 130 doContinueNamedBackRef, 131 doIntervalInit, 132 rbbiLastAction}; 133 134 //------------------------------------------------------------------------------- 135 // 136 // RegexTableEl represents the structure of a row in the transition table 137 // for the pattern parser state machine. 138 //------------------------------------------------------------------------------- 139 struct RegexTableEl { 140 Regex_PatternParseAction fAction; 141 uint8_t fCharClass; // 0-127: an individual ASCII character 142 // 128-255: character class index 143 uint8_t fNextState; // 0-250: normal next-state numbers 144 // 255: pop next-state from stack. 145 uint8_t fPushState; 146 UBool fNextChar; 147 }; 148 149 static const struct RegexTableEl gRuleParseStateTable[] = { 150 {doNOP, 0, 0, 0, TRUE} 151 , {doPatStart, 255, 2,0, FALSE} // 1 start 152 , {doLiteralChar, 254, 14,0, TRUE} // 2 term 153 , {doLiteralChar, 130, 14,0, TRUE} // 3 154 , {doSetBegin, 91 /* [ */, 123, 205, TRUE} // 4 155 , {doNOP, 40 /* ( */, 27,0, TRUE} // 5 156 , {doDotAny, 46 /* . */, 14,0, TRUE} // 6 157 , {doCaret, 94 /* ^ */, 14,0, TRUE} // 7 158 , {doDollar, 36 /* $ */, 14,0, TRUE} // 8 159 , {doNOP, 92 /* \ */, 89,0, TRUE} // 9 160 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 10 161 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 11 162 , {doPatFinish, 253, 2,0, FALSE} // 12 163 , {doRuleError, 255, 206,0, FALSE} // 13 164 , {doNOP, 42 /* * */, 68,0, TRUE} // 14 expr-quant 165 , {doNOP, 43 /* + */, 71,0, TRUE} // 15 166 , {doNOP, 63 /* ? */, 74,0, TRUE} // 16 167 , {doIntervalInit, 123 /* { */, 77,0, TRUE} // 17 168 , {doNOP, 40 /* ( */, 23,0, TRUE} // 18 169 , {doNOP, 255, 20,0, FALSE} // 19 170 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 20 expr-cont 171 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 21 172 , {doNOP, 255, 2,0, FALSE} // 22 173 , {doSuppressComments, 63 /* ? */, 25,0, TRUE} // 23 open-paren-quant 174 , {doNOP, 255, 27,0, FALSE} // 24 175 , {doNOP, 35 /* # */, 50, 14, TRUE} // 25 open-paren-quant2 176 , {doNOP, 255, 29,0, FALSE} // 26 177 , {doSuppressComments, 63 /* ? */, 29,0, TRUE} // 27 open-paren 178 , {doOpenCaptureParen, 255, 2, 14, FALSE} // 28 179 , {doOpenNonCaptureParen, 58 /* : */, 2, 14, TRUE} // 29 open-paren-extended 180 , {doOpenAtomicParen, 62 /* > */, 2, 14, TRUE} // 30 181 , {doOpenLookAhead, 61 /* = */, 2, 20, TRUE} // 31 182 , {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE} // 32 183 , {doNOP, 60 /* < */, 46,0, TRUE} // 33 184 , {doNOP, 35 /* # */, 50, 2, TRUE} // 34 185 , {doBeginMatchMode, 105 /* i */, 53,0, FALSE} // 35 186 , {doBeginMatchMode, 100 /* d */, 53,0, FALSE} // 36 187 , {doBeginMatchMode, 109 /* m */, 53,0, FALSE} // 37 188 , {doBeginMatchMode, 115 /* s */, 53,0, FALSE} // 38 189 , {doBeginMatchMode, 117 /* u */, 53,0, FALSE} // 39 190 , {doBeginMatchMode, 119 /* w */, 53,0, FALSE} // 40 191 , {doBeginMatchMode, 120 /* x */, 53,0, FALSE} // 41 192 , {doBeginMatchMode, 45 /* - */, 53,0, FALSE} // 42 193 , {doConditionalExpr, 40 /* ( */, 206,0, TRUE} // 43 194 , {doPerlInline, 123 /* { */, 206,0, TRUE} // 44 195 , {doBadOpenParenType, 255, 206,0, FALSE} // 45 196 , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE} // 46 open-paren-lookbehind 197 , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE} // 47 198 , {doBeginNamedCapture, 128, 64,0, FALSE} // 48 199 , {doBadOpenParenType, 255, 206,0, FALSE} // 49 200 , {doNOP, 41 /* ) */, 255,0, TRUE} // 50 paren-comment 201 , {doMismatchedParenErr, 253, 206,0, FALSE} // 51 202 , {doNOP, 255, 50,0, TRUE} // 52 203 , {doMatchMode, 105 /* i */, 53,0, TRUE} // 53 paren-flag 204 , {doMatchMode, 100 /* d */, 53,0, TRUE} // 54 205 , {doMatchMode, 109 /* m */, 53,0, TRUE} // 55 206 , {doMatchMode, 115 /* s */, 53,0, TRUE} // 56 207 , {doMatchMode, 117 /* u */, 53,0, TRUE} // 57 208 , {doMatchMode, 119 /* w */, 53,0, TRUE} // 58 209 , {doMatchMode, 120 /* x */, 53,0, TRUE} // 59 210 , {doMatchMode, 45 /* - */, 53,0, TRUE} // 60 211 , {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 61 212 , {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 62 213 , {doBadModeFlag, 255, 206,0, FALSE} // 63 214 , {doContinueNamedCapture, 128, 64,0, TRUE} // 64 named-capture 215 , {doContinueNamedCapture, 129, 64,0, TRUE} // 65 216 , {doOpenCaptureParen, 62 /* > */, 2, 14, TRUE} // 66 217 , {doBadNamedCapture, 255, 206,0, FALSE} // 67 218 , {doNGStar, 63 /* ? */, 20,0, TRUE} // 68 quant-star 219 , {doPossessiveStar, 43 /* + */, 20,0, TRUE} // 69 220 , {doStar, 255, 20,0, FALSE} // 70 221 , {doNGPlus, 63 /* ? */, 20,0, TRUE} // 71 quant-plus 222 , {doPossessivePlus, 43 /* + */, 20,0, TRUE} // 72 223 , {doPlus, 255, 20,0, FALSE} // 73 224 , {doNGOpt, 63 /* ? */, 20,0, TRUE} // 74 quant-opt 225 , {doPossessiveOpt, 43 /* + */, 20,0, TRUE} // 75 226 , {doOpt, 255, 20,0, FALSE} // 76 227 , {doNOP, 129, 79,0, FALSE} // 77 interval-open 228 , {doIntervalError, 255, 206,0, FALSE} // 78 229 , {doIntevalLowerDigit, 129, 79,0, TRUE} // 79 interval-lower 230 , {doNOP, 44 /* , */, 83,0, TRUE} // 80 231 , {doIntervalSame, 125 /* } */, 86,0, TRUE} // 81 232 , {doIntervalError, 255, 206,0, FALSE} // 82 233 , {doIntervalUpperDigit, 129, 83,0, TRUE} // 83 interval-upper 234 , {doNOP, 125 /* } */, 86,0, TRUE} // 84 235 , {doIntervalError, 255, 206,0, FALSE} // 85 236 , {doNGInterval, 63 /* ? */, 20,0, TRUE} // 86 interval-type 237 , {doPossessiveInterval, 43 /* + */, 20,0, TRUE} // 87 238 , {doInterval, 255, 20,0, FALSE} // 88 239 , {doBackslashA, 65 /* A */, 2,0, TRUE} // 89 backslash 240 , {doBackslashB, 66 /* B */, 2,0, TRUE} // 90 241 , {doBackslashb, 98 /* b */, 2,0, TRUE} // 91 242 , {doBackslashd, 100 /* d */, 14,0, TRUE} // 92 243 , {doBackslashD, 68 /* D */, 14,0, TRUE} // 93 244 , {doBackslashG, 71 /* G */, 2,0, TRUE} // 94 245 , {doBackslashh, 104 /* h */, 14,0, TRUE} // 95 246 , {doBackslashH, 72 /* H */, 14,0, TRUE} // 96 247 , {doNOP, 107 /* k */, 115,0, TRUE} // 97 248 , {doNamedChar, 78 /* N */, 14,0, FALSE} // 98 249 , {doProperty, 112 /* p */, 14,0, FALSE} // 99 250 , {doProperty, 80 /* P */, 14,0, FALSE} // 100 251 , {doBackslashR, 82 /* R */, 14,0, TRUE} // 101 252 , {doEnterQuoteMode, 81 /* Q */, 2,0, TRUE} // 102 253 , {doBackslashS, 83 /* S */, 14,0, TRUE} // 103 254 , {doBackslashs, 115 /* s */, 14,0, TRUE} // 104 255 , {doBackslashv, 118 /* v */, 14,0, TRUE} // 105 256 , {doBackslashV, 86 /* V */, 14,0, TRUE} // 106 257 , {doBackslashW, 87 /* W */, 14,0, TRUE} // 107 258 , {doBackslashw, 119 /* w */, 14,0, TRUE} // 108 259 , {doBackslashX, 88 /* X */, 14,0, TRUE} // 109 260 , {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 110 261 , {doBackslashz, 122 /* z */, 2,0, TRUE} // 111 262 , {doBackRef, 129, 14,0, TRUE} // 112 263 , {doEscapeError, 253, 206,0, FALSE} // 113 264 , {doEscapedLiteralChar, 255, 14,0, TRUE} // 114 265 , {doBeginNamedBackRef, 60 /* < */, 117,0, TRUE} // 115 named-backref 266 , {doBadNamedCapture, 255, 206,0, FALSE} // 116 267 , {doContinueNamedBackRef, 128, 119,0, TRUE} // 117 named-backref-2 268 , {doBadNamedCapture, 255, 206,0, FALSE} // 118 269 , {doContinueNamedBackRef, 128, 119,0, TRUE} // 119 named-backref-3 270 , {doContinueNamedBackRef, 129, 119,0, TRUE} // 120 271 , {doCompleteNamedBackRef, 62 /* > */, 14,0, TRUE} // 121 272 , {doBadNamedCapture, 255, 206,0, FALSE} // 122 273 , {doSetNegate, 94 /* ^ */, 126,0, TRUE} // 123 set-open 274 , {doSetPosixProp, 58 /* : */, 128,0, FALSE} // 124 275 , {doNOP, 255, 126,0, FALSE} // 125 276 , {doSetLiteral, 93 /* ] */, 141,0, TRUE} // 126 set-open2 277 , {doNOP, 255, 131,0, FALSE} // 127 278 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 128 set-posix 279 , {doNOP, 58 /* : */, 131,0, FALSE} // 129 280 , {doRuleError, 255, 206,0, FALSE} // 130 281 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 131 set-start 282 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 132 283 , {doNOP, 92 /* \ */, 191,0, TRUE} // 133 284 , {doNOP, 45 /* - */, 137,0, TRUE} // 134 285 , {doNOP, 38 /* & */, 139,0, TRUE} // 135 286 , {doSetLiteral, 255, 141,0, TRUE} // 136 287 , {doRuleError, 45 /* - */, 206,0, FALSE} // 137 set-start-dash 288 , {doSetAddDash, 255, 141,0, FALSE} // 138 289 , {doRuleError, 38 /* & */, 206,0, FALSE} // 139 set-start-amp 290 , {doSetAddAmp, 255, 141,0, FALSE} // 140 291 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 141 set-after-lit 292 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 142 293 , {doNOP, 45 /* - */, 178,0, TRUE} // 143 294 , {doNOP, 38 /* & */, 169,0, TRUE} // 144 295 , {doNOP, 92 /* \ */, 191,0, TRUE} // 145 296 , {doSetNoCloseError, 253, 206,0, FALSE} // 146 297 , {doSetLiteral, 255, 141,0, TRUE} // 147 298 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 148 set-after-set 299 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 149 300 , {doNOP, 45 /* - */, 171,0, TRUE} // 150 301 , {doNOP, 38 /* & */, 166,0, TRUE} // 151 302 , {doNOP, 92 /* \ */, 191,0, TRUE} // 152 303 , {doSetNoCloseError, 253, 206,0, FALSE} // 153 304 , {doSetLiteral, 255, 141,0, TRUE} // 154 305 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 155 set-after-range 306 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 156 307 , {doNOP, 45 /* - */, 174,0, TRUE} // 157 308 , {doNOP, 38 /* & */, 176,0, TRUE} // 158 309 , {doNOP, 92 /* \ */, 191,0, TRUE} // 159 310 , {doSetNoCloseError, 253, 206,0, FALSE} // 160 311 , {doSetLiteral, 255, 141,0, TRUE} // 161 312 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 162 set-after-op 313 , {doSetOpError, 93 /* ] */, 206,0, FALSE} // 163 314 , {doNOP, 92 /* \ */, 191,0, TRUE} // 164 315 , {doSetLiteral, 255, 141,0, TRUE} // 165 316 , {doSetBeginIntersection1, 91 /* [ */, 123, 148, TRUE} // 166 set-set-amp 317 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 167 318 , {doSetAddAmp, 255, 141,0, FALSE} // 168 319 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 169 set-lit-amp 320 , {doSetAddAmp, 255, 141,0, FALSE} // 170 321 , {doSetBeginDifference1, 91 /* [ */, 123, 148, TRUE} // 171 set-set-dash 322 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 172 323 , {doSetAddDash, 255, 141,0, FALSE} // 173 324 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 174 set-range-dash 325 , {doSetAddDash, 255, 141,0, FALSE} // 175 326 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 176 set-range-amp 327 , {doSetAddAmp, 255, 141,0, FALSE} // 177 328 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 178 set-lit-dash 329 , {doSetAddDash, 91 /* [ */, 141,0, FALSE} // 179 330 , {doSetAddDash, 93 /* ] */, 141,0, FALSE} // 180 331 , {doNOP, 92 /* \ */, 183,0, TRUE} // 181 332 , {doSetRange, 255, 155,0, TRUE} // 182 333 , {doSetOpError, 115 /* s */, 206,0, FALSE} // 183 set-lit-dash-escape 334 , {doSetOpError, 83 /* S */, 206,0, FALSE} // 184 335 , {doSetOpError, 119 /* w */, 206,0, FALSE} // 185 336 , {doSetOpError, 87 /* W */, 206,0, FALSE} // 186 337 , {doSetOpError, 100 /* d */, 206,0, FALSE} // 187 338 , {doSetOpError, 68 /* D */, 206,0, FALSE} // 188 339 , {doSetNamedRange, 78 /* N */, 155,0, FALSE} // 189 340 , {doSetRange, 255, 155,0, TRUE} // 190 341 , {doSetProp, 112 /* p */, 148,0, FALSE} // 191 set-escape 342 , {doSetProp, 80 /* P */, 148,0, FALSE} // 192 343 , {doSetNamedChar, 78 /* N */, 141,0, FALSE} // 193 344 , {doSetBackslash_s, 115 /* s */, 155,0, TRUE} // 194 345 , {doSetBackslash_S, 83 /* S */, 155,0, TRUE} // 195 346 , {doSetBackslash_w, 119 /* w */, 155,0, TRUE} // 196 347 , {doSetBackslash_W, 87 /* W */, 155,0, TRUE} // 197 348 , {doSetBackslash_d, 100 /* d */, 155,0, TRUE} // 198 349 , {doSetBackslash_D, 68 /* D */, 155,0, TRUE} // 199 350 , {doSetBackslash_h, 104 /* h */, 155,0, TRUE} // 200 351 , {doSetBackslash_H, 72 /* H */, 155,0, TRUE} // 201 352 , {doSetBackslash_v, 118 /* v */, 155,0, TRUE} // 202 353 , {doSetBackslash_V, 86 /* V */, 155,0, TRUE} // 203 354 , {doSetLiteralEscaped, 255, 141,0, TRUE} // 204 355 , {doSetFinish, 255, 14,0, FALSE} // 205 set-finish 356 , {doExit, 255, 206,0, TRUE} // 206 errorDeath 357 }; 358 static const char * const RegexStateNames[] = { 0, 359 "start", 360 "term", 361 0, 362 0, 363 0, 364 0, 365 0, 366 0, 367 0, 368 0, 369 0, 370 0, 371 0, 372 "expr-quant", 373 0, 374 0, 375 0, 376 0, 377 0, 378 "expr-cont", 379 0, 380 0, 381 "open-paren-quant", 382 0, 383 "open-paren-quant2", 384 0, 385 "open-paren", 386 0, 387 "open-paren-extended", 388 0, 389 0, 390 0, 391 0, 392 0, 393 0, 394 0, 395 0, 396 0, 397 0, 398 0, 399 0, 400 0, 401 0, 402 0, 403 0, 404 "open-paren-lookbehind", 405 0, 406 0, 407 0, 408 "paren-comment", 409 0, 410 0, 411 "paren-flag", 412 0, 413 0, 414 0, 415 0, 416 0, 417 0, 418 0, 419 0, 420 0, 421 0, 422 "named-capture", 423 0, 424 0, 425 0, 426 "quant-star", 427 0, 428 0, 429 "quant-plus", 430 0, 431 0, 432 "quant-opt", 433 0, 434 0, 435 "interval-open", 436 0, 437 "interval-lower", 438 0, 439 0, 440 0, 441 "interval-upper", 442 0, 443 0, 444 "interval-type", 445 0, 446 0, 447 "backslash", 448 0, 449 0, 450 0, 451 0, 452 0, 453 0, 454 0, 455 0, 456 0, 457 0, 458 0, 459 0, 460 0, 461 0, 462 0, 463 0, 464 0, 465 0, 466 0, 467 0, 468 0, 469 0, 470 0, 471 0, 472 0, 473 "named-backref", 474 0, 475 "named-backref-2", 476 0, 477 "named-backref-3", 478 0, 479 0, 480 0, 481 "set-open", 482 0, 483 0, 484 "set-open2", 485 0, 486 "set-posix", 487 0, 488 0, 489 "set-start", 490 0, 491 0, 492 0, 493 0, 494 0, 495 "set-start-dash", 496 0, 497 "set-start-amp", 498 0, 499 "set-after-lit", 500 0, 501 0, 502 0, 503 0, 504 0, 505 0, 506 "set-after-set", 507 0, 508 0, 509 0, 510 0, 511 0, 512 0, 513 "set-after-range", 514 0, 515 0, 516 0, 517 0, 518 0, 519 0, 520 "set-after-op", 521 0, 522 0, 523 0, 524 "set-set-amp", 525 0, 526 0, 527 "set-lit-amp", 528 0, 529 "set-set-dash", 530 0, 531 0, 532 "set-range-dash", 533 0, 534 "set-range-amp", 535 0, 536 "set-lit-dash", 537 0, 538 0, 539 0, 540 0, 541 "set-lit-dash-escape", 542 0, 543 0, 544 0, 545 0, 546 0, 547 0, 548 0, 549 "set-escape", 550 0, 551 0, 552 0, 553 0, 554 0, 555 0, 556 0, 557 0, 558 0, 559 0, 560 0, 561 0, 562 0, 563 "set-finish", 564 "errorDeath", 565 0}; 566 567 U_NAMESPACE_END 568 #endif 569