1 /*
2  * Regexp engine instructions
3  *
4  * This file is included multiple times from src/regexp.c
5  */
6 
7 /* DEF_RE_INSN(name, operand-type)
8 
9    Operand-type can be:
10 
11    none        - no operand
12    octet       - one octet (character)
13    string      - one byte length + bytestring
14    cset        - code set number
15    group       - 1-byte group number
16    offset2     - 2-byte offset
17    offset1_2   - 1-byte ofset & 2-byte offset
18    offset2_2   - 2-1byte ofset & 2-byte offset
19 */
20 
21 /* 'RL' suffix indicates that the instructions moves the current position
22    pointer right to left.   They are used within lookbehind assertions. */
23 
24 /* Followed by 1 byte to match */
25 DEF_RE_INSN(MATCH1, OP_octet)
26 DEF_RE_INSN(MATCH1_RL, OP_octet)
27 
28 /* Followed by length, and bytes to match */
29 DEF_RE_INSN(MATCH, OP_string)
30 DEF_RE_INSN(MATCH_RL, OP_string)
31 
32 /* Followed by 1 byte to match, case insensitive */
33 DEF_RE_INSN(MATCH1_CI, OP_octet)
34 DEF_RE_INSN(MATCH1_CI_RL, OP_octet)
35 
36 /* Followed by length, and bytes to match, case insensitive */
37 DEF_RE_INSN(MATCH_CI, OP_string)
38 DEF_RE_INSN(MATCH_CI_RL, OP_string)
39 
40 /* Match any char */
41 DEF_RE_INSN(ANY, OP_none)
42 DEF_RE_INSN(ANY_RL, OP_none)
43 
44 /* Followed by offset (2 bytes).  Try matching the following sequence,
45    and if filas, jump to offset.  This handles backtracking. */
46 DEF_RE_INSN(TRY, OP_offset2)
47 
48 /* Followed by charset #.  Match any char in the charset. */
49 DEF_RE_INSN(SET, OP_cset)
50 DEF_RE_INSN(SET_RL, OP_cset)
51 
52 /* Followed by charset #.  Match any char in the charset.  */
53 DEF_RE_INSN(NSET, OP_cset)
54 DEF_RE_INSN(NSET_RL, OP_cset)
55 
56 /* Followed by charset #.  Match any char in the charset.  Guaranteed
57    that the charset holds only range 0-127.  */
58 DEF_RE_INSN(SET1, OP_cset)
59 DEF_RE_INSN(SET1_RL, OP_cset)
60 
61 /* Followed by charset #.  Match any char in the charset.  Guaranteed
62    that the charset holds only range 0-127.  */
63 DEF_RE_INSN(NSET1, OP_cset)
64 DEF_RE_INSN(NSET1_RL, OP_cset)
65 
66 /* Followed by offset (2 bytes).  Jump to that bytecode. */
67 DEF_RE_INSN(JUMP, OP_offset2)
68 
69 /* Fail and success */
70 DEF_RE_INSN(FAIL, OP_none)
71 DEF_RE_INSN(SUCCESS, OP_none)
72 
73 /* Follwed by a group number.  Start the group. */
74 DEF_RE_INSN(BEGIN, OP_group)
75 DEF_RE_INSN(BEGIN_RL, OP_group)
76 
77 /* Followed by a group number.  End the group. */
78 DEF_RE_INSN(END, OP_group)
79 DEF_RE_INSN(END_RL, OP_group)
80 
81 /* Beginning and end of string assertions */
82 DEF_RE_INSN(BOS, OP_none)
83 DEF_RE_INSN(EOS, OP_none)
84 
85 /* Beginning and end of line assertions */
86 DEF_RE_INSN(BOL, OP_none)
87 DEF_RE_INSN(EOL, OP_none)
88 
89 /* Beginning and end of word boundary assertions */
90 DEF_RE_INSN(BOW, OP_none)
91 DEF_RE_INSN(EOW, OP_none)
92 
93 /* RE_BOW + RE_EOW */
94 DEF_RE_INSN(WB, OP_none)
95 
96 /* Negative word boundary assertion */
97 DEF_RE_INSN(NWB, OP_none)
98 
99 /* Beginning and end of grapheme cluster */
100 DEF_RE_INSN(BOG, OP_none)
101 DEF_RE_INSN(EOG, OP_none)
102 
103 /* Backreference.  Followed by group # */
104 DEF_RE_INSN(BACKREF, OP_group)
105 DEF_RE_INSN(BACKREF_RL, OP_group)
106 DEF_RE_INSN(BACKREF_CI, OP_group)
107 DEF_RE_INSN(BACKREF_CI_RL, OP_group)
108 
109 /* Conditional pattern */
110 DEF_RE_INSN(CPAT, OP_offset1_2)
111 DEF_RE_INSN(CPATA, OP_offset2_2)
112 
113 /* Standalone pattern */
114 DEF_RE_INSN(ONCE, OP_offset2)
115 
116 /* Positive and negative lookahead assertion.  Followed by 2 byte offset */
117 DEF_RE_INSN(ASSERT, OP_offset2)
118 DEF_RE_INSN(NASSERT, OP_offset2)
119 
120 /* The following instructions are not necessary to implement the basic
121    engine, but used in the optimized code.
122    The *R instructions (and *R_RL counterparts) consumes all input that
123    matches, without backtracking.  */
124 
125 /* 1-byte set match repeat, followed by charset #. */
126 DEF_RE_INSN(SET1R, OP_cset)
127 DEF_RE_INSN(SET1R_RL, OP_cset)
128 
129 /* 1-byte negative set match repeat,  followed by charset #. */
130 DEF_RE_INSN(NSET1R, OP_cset)
131 DEF_RE_INSN(NSET1R_RL, OP_cset)
132 
133 /* set match repeat, followed by charset #. */
134 DEF_RE_INSN(SETR, OP_cset)
135 DEF_RE_INSN(SETR_RL, OP_cset)
136 
137 /* negative set match repeat, followed by charset #. */
138 DEF_RE_INSN(NSETR, OP_cset)
139 DEF_RE_INSN(NSETR_RL, OP_cset)
140 
141 /* 1-byte exact match repeat, followed by a byte */
142 DEF_RE_INSN(MATCH1R, OP_octet)
143 
144 /* multiple byte exact match repeat, followed by length, and bytes to match. */
145 DEF_RE_INSN(MATCHR, OP_string)
146 
147 /* any char match repeat */
148 DEF_RE_INSN(ANYR, OP_none)
149