1 //
2 // Copyright (C) 1991 Texas Instruments Incorporated.
3 //
4 // Permission is granted to any individual or institution to use, copy, modify,
5 // and distribute this software, provided that this complete copyright and
6 // permission notice is maintained, intact, in all copies and supporting
7 // documentation.
8 //
9 // Texas Instruments Incorporated provides this software "as is" without
10 // express or implied warranty.
11 //
12 // .NAME CoolRegExp - Pattern matching with regular expressions.
13 // .LIBRARY cool
14 // .HEADER String Classes
15 // .INCLUDE cool/RegExp.h
16 // .FILE cool/RegExp.C
17 // .FILE cool/RegExp.h
18 // .EXAMPLE examples/ex_RegExp.C
19 //
20 //
21 // .SECTION Description
22 //  A regular expression allows a programmer to specify  complex
23 //  patterns  that  can  be searched for and matched against the
24 //  character string of a string object. In its simplest form, a
25 //  regular  expression  is  a  sequence  of  characters used to
26 //  search for exact character matches. However, many times  the
27 //  exact  sequence to be found is not known, or only a match at
28 //  the beginning or end of a string is desired. The cool  regu-
29 //  lar  expression  class implements regular expression pattern
30 //  matching as is found and implemented in many  UNIX  commands
31 //  and utilities.
32 //
33 //  The regular expression class provides a convenient mechanism
34 //  for  specifying  and  manipulating  regular expressions. The
35 //  regular expression object allows specification of such  pat-
36 //  terns  by using the following regular expression metacharac-
37 //  ters:
38 //
39 //   ^        Matches at beginning of a line
40 //
41 //   $        Matches at end of a line
42 //
43 //  .         Matches any single character
44 //
45 //  [ ]       Matches any character(s) inside the brackets
46 //
47 //  [^ ]      Matches any character(s) not inside the brackets
48 //
49 //   -        Matches any character in range on either side of a dash
50 //
51 //   *        Matches preceding pattern zero or more times
52 //
53 //   +        Matches preceding pattern one or more times
54 //
55 //   ?        Matches preceding pattern zero or once only
56 //
57 //  ()        Saves a matched expression and uses it in a  later match
58 //
59 //  Note that more than one of these metacharacters can be  used
60 //  in  a  single  regular expression in order to create complex
61 //  search patterns. For example, the pattern [^ab1-9]  says  to
62 //  match  any  character  sequence that does not begin with the
63 //  characters "ab"  followed  by  numbers  in  the  series  one
64 //  through nine.
65 
66 
67 #ifndef ossimRegExph
68 #define ossimRegExph
69 
70 #include <ossim/base/ossimString.h>
71 
72 const int NSUBEXP = 10;
73 
74 
75 class OSSIMDLLEXPORT ossimRegExp {
76 public:
77   inline ossimRegExp ();			// ossimRegExp with program=NULL
78   inline ossimRegExp (const char*);	// ossimRegExp with compiled char*
79   ossimRegExp (const ossimRegExp&);	// Copy constructor
80   inline ~ossimRegExp();			// Destructor
81 
82   void compile (const char*);		// Compiles char* --> regexp
83   bool find (const char*);		// true if regexp in char* arg
84   inline ossim_uint32 start() const;	// Index to start of first find
85   inline ossim_uint32 end() const;	// Index to end of first find
86 
87   bool operator== (const ossimRegExp&) const;	// Equality operator
88   inline bool operator!= (const ossimRegExp&) const; // Inequality operator
89   bool deep_equal (const ossimRegExp&) const;	// Same regexp and state?
90 
91   inline bool is_valid() const;		// true if compiled regexp
92   inline void set_invalid();		// Invalidates regexp
93 
94   // awf added
95   ossim_uint32 start(ossim_uint32 n) const;
96   ossim_uint32 end(ossim_uint32 n) const;
97   std::string match(ossim_uint32 n) const;
98 
99 private:
100   const char* startp[NSUBEXP];
101   const char* endp[NSUBEXP];
102   char  regstart;			// Internal use only
103   char  reganch;			// Internal use only
104   const char* regmust;			// Internal use only
105   ossim_uint32 regmlen;			// Internal use only
106   char* program;
107   ossim_uint32 progsize;
108   const char* searchstring;
109 
110  // work variables
111   mutable const char* regparse;
112   mutable int   regnpar;	// () count.
113   mutable char  regdummy;
114   mutable char* regcode;	// Code-emit pointer; &regdummy = don't.
115   mutable long  regsize;	// Code size.
116   const char*  reginput;	// String-input pointer.
117   const char*  regbol;	// Beginning of input, for ^ check.
118   const char* *regstartp;	// Pointer to startp array.
119   const char* *regendp;	// Ditto for endp.
120 
121   char* reg (int, int*);
122   char* regbranch (int*);
123   char* regpiece (int*);
124   char* regatom (int*);
125   char* regnode (char);
126   const char* regnext (const char*);
127   char* regnext (char*);
128   void        regc (unsigned char);
129   void        reginsert (char, char*);
130   void        regtail (char*, const char*);
131   void        regoptail (char*, const char*);
132   int regtry (const char*, const char* *,
133               const char* *, const char*);
134   int regmatch (const char*);
135   int regrepeat (const char*);
136 #ifdef STRCSPN
137   int strcspn ();
138 #endif
139 
140 };
141 
142 // ossimRegExp -- Creates an empty regular expression.
143 
ossimRegExp()144 inline ossimRegExp::ossimRegExp ():
145   regstart(0),     // Internal use only
146   reganch(0),      // Internal use only
147   regmust(0),      // Internal use only
148   regmlen(0),     // Internal use only
149   program(0),
150   progsize(0),
151   searchstring(0),
152 
153  // work variables
154   regparse(0),
155   regnpar(0),  // () count.
156   regdummy(0),
157   regcode(0),  // Code-emit pointer; &regdummy = don't.
158   regsize(0),  // Code size.
159   reginput(0),  // String-input pointer.
160   regbol(0),  // Beginning of input, for ^ check.
161   regstartp(0), // Pointer to startp array.
162   regendp(0) // Ditto for endp.
163 {
164 }
165 
166 
167 // ossimRegExp -- Creates a regular expression from string s, and
168 // compiles s.
169 
170 
ossimRegExp(const char * s)171 inline ossimRegExp::ossimRegExp (const char* s) :
172   regstart(0),     // Internal use only
173   reganch(0),      // Internal use only
174   regmust(0),      // Internal use only
175   regmlen(0),     // Internal use only
176   program(0),
177   progsize(0),
178   searchstring(0),
179 
180  // work variables
181   regparse(0),
182   regnpar(0),  // () count.
183   regdummy(0),
184   regcode(0),  // Code-emit pointer; &regdummy = don't.
185   regsize(0),  // Code size.
186   reginput(0),  // String-input pointer.
187   regbol(0),  // Beginning of input, for ^ check.
188   regstartp(0), // Pointer to startp array.
189   regendp(0) // Ditto for endp.
190 {
191   this->program = NULL;
192   compile(s);
193 }
194 
195 // ~ossimRegExp -- Frees space allocated for regular expression.
196 
~ossimRegExp()197 inline ossimRegExp::~ossimRegExp () {
198 //#ifndef WIN32
199    if(program)
200    {
201       delete [] this->program;
202       this->program = 0;
203    }
204 //#endif
205 }
206 
207 // Start --
208 
start()209 inline ossim_uint32 ossimRegExp::start () const {
210   return(this->startp[0] - searchstring);
211 }
212 
213 
214 // End -- Returns the start/end index of the last item found.
215 
216 
end()217 inline ossim_uint32 ossimRegExp::end () const {
218   return(this->endp[0] - searchstring);
219 }
220 
221 
222 // operator!= //
223 
224 inline bool ossimRegExp::operator!= (const ossimRegExp& r) const {
225   return(!(*this == r));
226 }
227 
228 
229 // is_valid -- Returns true if a valid regular expression is compiled
230 // and ready for pattern matching.
231 
is_valid()232 inline bool ossimRegExp::is_valid () const {
233   return (this->program != NULL);
234 }
235 
236 
237 // set_invalid -- Invalidates regular expression.
238 
set_invalid()239 inline void ossimRegExp::set_invalid () {
240 //#ifndef WIN32
241   delete [] this->program;
242 //#endif
243   this->program = NULL;
244 }
245 
246 // -- Return start index of nth submatch. start(0) is the start of the full match.
start(ossim_uint32 n)247 inline ossim_uint32 ossimRegExp::start(ossim_uint32 n) const
248 {
249   return this->startp[n] - searchstring;
250 }
251 
252 // -- Return end index of nth submatch. end(0) is the end of the full match.
end(ossim_uint32 n)253 inline ossim_uint32 ossimRegExp::end(ossim_uint32 n) const
254 {
255   return this->endp[n] - searchstring;
256 }
257 
258 // -- Return nth submatch as a string.
match(ossim_uint32 n)259 inline std::string ossimRegExp::match(ossim_uint32 n) const
260 {
261 	return std::string(this->startp[n], this->endp[n] - this->startp[n]);
262 }
263 
264 #endif // CoolRegExph
265