xref: /dragonfly/contrib/grep/lib/dfa.h (revision e89cf083)
1 /* dfa.h - declarations for GNU deterministic regexp compiler
2    Copyright (C) 1988, 1998, 2007, 2009-2020 Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc.,
17    51 Franklin Street - Fifth Floor, Boston, MA  02110-1301, USA */
18 
19 /* Written June, 1988 by Mike Haertel */
20 
21 #include <regex.h>
22 #include <stdbool.h>
23 #include <stddef.h>
24 
25 struct localeinfo; /* See localeinfo.h.  */
26 
27 /* Element of a list of strings, at least one of which is known to
28    appear in any R.E. matching the DFA. */
29 struct dfamust
30 {
31   bool exact;
32   bool begline;
33   bool endline;
34   char must[FLEXIBLE_ARRAY_MEMBER];
35 };
36 
37 /* The dfa structure. It is completely opaque. */
38 struct dfa;
39 
40 /* Needed when Gnulib is not used.  */
41 #ifndef _GL_ATTRIBUTE_MALLOC
42 # define  _GL_ATTRIBUTE_MALLOC
43 #endif
44 
45 /* Entry points. */
46 
47 /* Allocate a struct dfa.  The struct dfa is completely opaque.
48    It should be initialized via dfasyntax or dfacopysyntax before other use.
49    The returned pointer should be passed directly to free() after
50    calling dfafree() on it. */
51 extern struct dfa *dfaalloc (void) _GL_ATTRIBUTE_MALLOC;
52 
53 /* DFA options that can be ORed together, for dfasyntax's 4th arg.  */
54 enum
55   {
56     /* ^ and $ match only the start and end of data, and do not match
57        end-of-line within data.  This is always false for grep, but
58        possibly true for other apps.  */
59     DFA_ANCHOR = 1 << 0,
60 
61     /* '\0' in data is end-of-line, instead of the traditional '\n'.  */
62     DFA_EOL_NUL = 1 << 1
63   };
64 
65 /* Initialize or reinitialize a DFA.  The arguments are:
66    1. The DFA to operate on.
67    2. Information about the current locale.
68    3. Syntax bits described in regex.h.
69    4. Additional DFA options described above.  */
70 extern void dfasyntax (struct dfa *, struct localeinfo const *,
71                        reg_syntax_t, int);
72 
73 /* Initialize or reinitialize a DFA from an already-initialized DFA.  */
74 extern void dfacopysyntax (struct dfa *, struct dfa const *);
75 
76 /* Parse the given string of given length into the given struct dfa.  */
77 extern void dfaparse (char const *, ptrdiff_t, struct dfa *);
78 
79 /* Allocate and return a struct dfamust from a struct dfa that was
80    initialized by dfaparse and not yet given to dfacomp.  */
81 extern struct dfamust *dfamust (struct dfa const *);
82 
83 /* Free the storage held by the components of a struct dfamust. */
84 extern void dfamustfree (struct dfamust *);
85 
86 /* Compile the given string of the given length into the given struct dfa.
87    The last argument says whether to build a searching or an exact matcher.
88    A null first argument means the struct dfa has already been
89    initialized by dfaparse; the second argument is ignored.  */
90 extern void dfacomp (char const *, ptrdiff_t, struct dfa *, bool);
91 
92 /* Search through a buffer looking for a match to the given struct dfa.
93    Find the first occurrence of a string matching the regexp in the
94    buffer, and the shortest possible version thereof.  Return a pointer to
95    the first character after the match, or NULL if none is found.  BEGIN
96    points to the beginning of the buffer, and END points to the first byte
97    after its end.  Note however that we store a sentinel byte (usually
98    newline) in *END, so the actual buffer must be one byte longer.
99    When ALLOW_NL is true, newlines may appear in the matching string.
100    If COUNT is non-NULL, increment *COUNT once for each newline processed.
101    Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we
102    encountered a back-reference.  The caller can use this to decide
103    whether to fall back on a backtracking matcher.  */
104 extern char *dfaexec (struct dfa *d, char const *begin, char *end,
105                       bool allow_nl, ptrdiff_t *count, bool *backref);
106 
107 /* Return a superset for D.  The superset matches everything that D
108    matches, along with some other strings (though the latter should be
109    rare, for efficiency reasons).  Return a null pointer if no useful
110    superset is available.  */
111 extern struct dfa *dfasuperset (struct dfa const *d) _GL_ATTRIBUTE_PURE;
112 
113 /* The DFA is likely to be fast.  */
114 extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE;
115 
116 /* Free the storage held by the components of a struct dfa. */
117 extern void dfafree (struct dfa *);
118 
119 /* Error handling. */
120 
121 /* dfawarn() is called by the regexp routines whenever a regex is compiled
122    that likely doesn't do what the user wanted.  It takes a single
123    argument, a NUL-terminated string describing the situation.  The user
124    must supply a dfawarn.  */
125 extern void dfawarn (const char *);
126 
127 /* dfaerror() is called by the regexp routines whenever an error occurs.  It
128    takes a single argument, a NUL-terminated string describing the error.
129    The user must supply a dfaerror.  */
130 extern _Noreturn void dfaerror (const char *);
131