1 //
2 // Copyright (C) 1991 Texas Instruments Incorporated.
3 //
4 // Permission is granted to any individual or institution to use, copy, modify,
5 // and distribute this software, provided that this complete copyright and
6 // permission notice is maintained, intact, in all copies and supporting
7 // documentation.
8 //
9 // Texas Instruments Incorporated provides this software "as is" without
10 // express or implied warranty.
11 //
12 // .NAME CoolRegExp - Pattern matching with regular expressions.
13 // .LIBRARY cool
14 // .HEADER String Classes
15 // .INCLUDE cool/RegExp.h
16 // .FILE cool/RegExp.C
17 // .FILE cool/RegExp.h
18 // .EXAMPLE examples/ex_RegExp.C
19 //
20 //
21 // .SECTION Description
22 // A regular expression allows a programmer to specify complex
23 // patterns that can be searched for and matched against the
24 // character string of a string object. In its simplest form, a
25 // regular expression is a sequence of characters used to
26 // search for exact character matches. However, many times the
27 // exact sequence to be found is not known, or only a match at
28 // the beginning or end of a string is desired. The cool regu-
29 // lar expression class implements regular expression pattern
30 // matching as is found and implemented in many UNIX commands
31 // and utilities.
32 //
33 // The regular expression class provides a convenient mechanism
34 // for specifying and manipulating regular expressions. The
35 // regular expression object allows specification of such pat-
36 // terns by using the following regular expression metacharac-
37 // ters:
38 //
39 // ^ Matches at beginning of a line
40 //
41 // $ Matches at end of a line
42 //
43 // . Matches any single character
44 //
45 // [ ] Matches any character(s) inside the brackets
46 //
47 // [^ ] Matches any character(s) not inside the brackets
48 //
49 // - Matches any character in range on either side of a dash
50 //
51 // * Matches preceding pattern zero or more times
52 //
53 // + Matches preceding pattern one or more times
54 //
55 // ? Matches preceding pattern zero or once only
56 //
57 // () Saves a matched expression and uses it in a later match
58 //
59 // Note that more than one of these metacharacters can be used
60 // in a single regular expression in order to create complex
61 // search patterns. For example, the pattern [^ab1-9] says to
62 // match any character sequence that does not begin with the
63 // characters "ab" followed by numbers in the series one
64 // through nine.
65
66
67 #ifndef ossimRegExph
68 #define ossimRegExph
69
70 #include <ossim/base/ossimString.h>
71
72 const int NSUBEXP = 10;
73
74
75 class OSSIMDLLEXPORT ossimRegExp {
76 public:
77 inline ossimRegExp (); // ossimRegExp with program=NULL
78 inline ossimRegExp (const char*); // ossimRegExp with compiled char*
79 ossimRegExp (const ossimRegExp&); // Copy constructor
80 inline ~ossimRegExp(); // Destructor
81
82 void compile (const char*); // Compiles char* --> regexp
83 bool find (const char*); // true if regexp in char* arg
84 inline ossim_uint32 start() const; // Index to start of first find
85 inline ossim_uint32 end() const; // Index to end of first find
86
87 bool operator== (const ossimRegExp&) const; // Equality operator
88 inline bool operator!= (const ossimRegExp&) const; // Inequality operator
89 bool deep_equal (const ossimRegExp&) const; // Same regexp and state?
90
91 inline bool is_valid() const; // true if compiled regexp
92 inline void set_invalid(); // Invalidates regexp
93
94 // awf added
95 ossim_uint32 start(ossim_uint32 n) const;
96 ossim_uint32 end(ossim_uint32 n) const;
97 std::string match(ossim_uint32 n) const;
98
99 private:
100 const char* startp[NSUBEXP];
101 const char* endp[NSUBEXP];
102 char regstart; // Internal use only
103 char reganch; // Internal use only
104 const char* regmust; // Internal use only
105 ossim_uint32 regmlen; // Internal use only
106 char* program;
107 ossim_uint32 progsize;
108 const char* searchstring;
109
110 // work variables
111 mutable const char* regparse;
112 mutable int regnpar; // () count.
113 mutable char regdummy;
114 mutable char* regcode; // Code-emit pointer; ®dummy = don't.
115 mutable long regsize; // Code size.
116 const char* reginput; // String-input pointer.
117 const char* regbol; // Beginning of input, for ^ check.
118 const char* *regstartp; // Pointer to startp array.
119 const char* *regendp; // Ditto for endp.
120
121 char* reg (int, int*);
122 char* regbranch (int*);
123 char* regpiece (int*);
124 char* regatom (int*);
125 char* regnode (char);
126 const char* regnext (const char*);
127 char* regnext (char*);
128 void regc (unsigned char);
129 void reginsert (char, char*);
130 void regtail (char*, const char*);
131 void regoptail (char*, const char*);
132 int regtry (const char*, const char* *,
133 const char* *, const char*);
134 int regmatch (const char*);
135 int regrepeat (const char*);
136 #ifdef STRCSPN
137 int strcspn ();
138 #endif
139
140 };
141
142 // ossimRegExp -- Creates an empty regular expression.
143
ossimRegExp()144 inline ossimRegExp::ossimRegExp ():
145 regstart(0), // Internal use only
146 reganch(0), // Internal use only
147 regmust(0), // Internal use only
148 regmlen(0), // Internal use only
149 program(0),
150 progsize(0),
151 searchstring(0),
152
153 // work variables
154 regparse(0),
155 regnpar(0), // () count.
156 regdummy(0),
157 regcode(0), // Code-emit pointer; ®dummy = don't.
158 regsize(0), // Code size.
159 reginput(0), // String-input pointer.
160 regbol(0), // Beginning of input, for ^ check.
161 regstartp(0), // Pointer to startp array.
162 regendp(0) // Ditto for endp.
163 {
164 }
165
166
167 // ossimRegExp -- Creates a regular expression from string s, and
168 // compiles s.
169
170
ossimRegExp(const char * s)171 inline ossimRegExp::ossimRegExp (const char* s) :
172 regstart(0), // Internal use only
173 reganch(0), // Internal use only
174 regmust(0), // Internal use only
175 regmlen(0), // Internal use only
176 program(0),
177 progsize(0),
178 searchstring(0),
179
180 // work variables
181 regparse(0),
182 regnpar(0), // () count.
183 regdummy(0),
184 regcode(0), // Code-emit pointer; ®dummy = don't.
185 regsize(0), // Code size.
186 reginput(0), // String-input pointer.
187 regbol(0), // Beginning of input, for ^ check.
188 regstartp(0), // Pointer to startp array.
189 regendp(0) // Ditto for endp.
190 {
191 this->program = NULL;
192 compile(s);
193 }
194
195 // ~ossimRegExp -- Frees space allocated for regular expression.
196
~ossimRegExp()197 inline ossimRegExp::~ossimRegExp () {
198 //#ifndef WIN32
199 if(program)
200 {
201 delete [] this->program;
202 this->program = 0;
203 }
204 //#endif
205 }
206
207 // Start --
208
start()209 inline ossim_uint32 ossimRegExp::start () const {
210 return(this->startp[0] - searchstring);
211 }
212
213
214 // End -- Returns the start/end index of the last item found.
215
216
end()217 inline ossim_uint32 ossimRegExp::end () const {
218 return(this->endp[0] - searchstring);
219 }
220
221
222 // operator!= //
223
224 inline bool ossimRegExp::operator!= (const ossimRegExp& r) const {
225 return(!(*this == r));
226 }
227
228
229 // is_valid -- Returns true if a valid regular expression is compiled
230 // and ready for pattern matching.
231
is_valid()232 inline bool ossimRegExp::is_valid () const {
233 return (this->program != NULL);
234 }
235
236
237 // set_invalid -- Invalidates regular expression.
238
set_invalid()239 inline void ossimRegExp::set_invalid () {
240 //#ifndef WIN32
241 delete [] this->program;
242 //#endif
243 this->program = NULL;
244 }
245
246 // -- Return start index of nth submatch. start(0) is the start of the full match.
start(ossim_uint32 n)247 inline ossim_uint32 ossimRegExp::start(ossim_uint32 n) const
248 {
249 return this->startp[n] - searchstring;
250 }
251
252 // -- Return end index of nth submatch. end(0) is the end of the full match.
end(ossim_uint32 n)253 inline ossim_uint32 ossimRegExp::end(ossim_uint32 n) const
254 {
255 return this->endp[n] - searchstring;
256 }
257
258 // -- Return nth submatch as a string.
match(ossim_uint32 n)259 inline std::string ossimRegExp::match(ossim_uint32 n) const
260 {
261 return std::string(this->startp[n], this->endp[n] - this->startp[n]);
262 }
263
264 #endif // CoolRegExph
265