1"====================================================================== 2| 3| Regular expressions 4| 5| 6 ======================================================================" 7 8 9"====================================================================== 10| 11| Copyright 1999 Free Software Foundation, Inc. 12| Written by Paolo Bonzini. 13| 14| This file is part of GNU Smalltalk. 15| 16| GNU Smalltalk is free software; you can redistribute it and/or modify it 17| under the terms of the GNU General Public License as published by the Free 18| Software Foundation; either version 2, or (at your option) any later version. 19| 20| GNU Smalltalk is distributed in the hope that it will be useful, but WITHOUT 21| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 22| FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 23| details. 24| 25| You should have received a copy of the GNU General Public License along with 26| GNU Smalltalk; see the file COPYING. If not, write to the Free Software 27| Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 28| 29 ======================================================================" 30 31Object subclass: #RegularExpression 32 instanceVariableNames: 'selectors params noDollar string' 33 classVariableNames: '' 34 poolDictionaries: '' 35 category: 'Examples-Useful'! 36 37!RegularExpression class methodsFor: 'instance creation'! 38 39new 40 self shouldNotImplement 41! 42 43fromString: aString 44 ^super new parseString: aString 45! ! 46 47!RegularExpression class methodsFor: 'parsing'! 48 49match: aString to: regExp 50 ^(self fromString: regExp) match: aString 51! ! 52 53!RegularExpression methodsFor: 'parsing'! 54 55match: aString 56 57 ^self matchStream: (ReadStream on: aString) from: 1 58! 59 60matchStream: aStream 61 62 ^self matchStream: aStream from: 1 63! ! 64 65!RegularExpression methodsFor: 'private'! 66 67parseString: aString 68 "Private - Convert a regular expression to its internal representation" 69 70 | regexp endPos | 71 72 params := OrderedCollection new. 73 selectors := OrderedCollection new. 74 75 "Zero-length aString is a special case" 76 aString size = 0 77 ifTrue: [ 78 noDollar := true. 79 self addDotAsterisk. 80 ^self 81 ]. 82 83 regexp := ReadStream on: aString. 84 noDollar := aString last ~= $$. 85 86 (regexp peekFor: $^) 87 ifFalse: [ self addDotAsterisk ] 88 89 endPos := noDollar 90 ifTrue: [ aString size ] 91 ifFalse: [ aString size - 1 ]. 92 93 [ regexp position > endPos ] whileFalse: [ self parseAtom: regexp ] 94! 95 96addDotAsterisk 97 98 "Add an implicit .* sequence" 99 params addLast: nil. 100 selectors addLast: #wild0any:index: 101! 102 103parseAtom: regexp 104 "Private - Parse an 'atom' of the regular expression. Add to selectors the 105 selector to be called to match it, and add to params the first parameter that 106 will be passed to this selector" 107 108 | next | 109 (next := regexp next) = $\ 110 ifTrue: [ 111 params addLast: regexp next. 112 ^selectors addLast: #char:index: 113 ]. 114 115 (next = $+) & selectors notEmpty 116 ifTrue: [ 117 ^selectors 118 at: selectors size 119 put: ('wild1', selectors last) asSymbol 120 ]. 121 122 (next = $*) & selectors notEmpty 123 ifTrue: [ 124 ^selectors 125 at: selectors size 126 put: ('wild0', selectors last) asSymbol 127 ]. 128 129 next = $. 130 ifTrue: [ 131 params addLast: nil. 132 ^selectors addLast: #any:index: 133 ]. 134 135 next = $[ 136 ifTrue: [ 137 (regexp peekFor: $^) 138 ifTrue: [ selectors addLast: #notRange:index: ] 139 ifFalse: [ selectors addLast: #range:index: ]. 140 141 params addLast: (self parseRange: regexp). 142 ^selectors last 143 ]. 144 145 next = ${ 146 ifTrue: [ 147 params addLast: (self parseOptional: regexp). 148 ^selectors addLast: #optional:index: 149 ]. 150 151 params addLast: next. 152 ^selectors addLast: #char:index: 153! 154 155parseRange: regexp 156 157 "Private - Parse a 'range atom', that is an atom that can match to seve- 158 ral characters." 159 160 | next answerStream | 161 answerStream := WriteStream on: (String new: 8). "Number out of a hat" 162 163 [ (next := regexp next) = $] ] whileFalse: [ 164 answerStream nextPut: next. 165 regexp atEnd ifTrue: [ self errorBadRegexp ]. 166 167 (regexp peekFor: $-) ifTrue: [ 168 regexp atEnd ifTrue: [ self errorBadRegexp ]. 169 170 next asciiValue + 1 to: regexp next asciiValue do: [:i | 171 answerStream nextPut: i asCharacter 172 ] 173 ] 174 ]. 175 ^answerStream contents! 176 177parseOptional: regexp 178 179 "Private - Parse an 'optional atom', that is an atom that can match to 180 several regular expressions." 181 182 | pos next result | 183 pos := regexp position. 184 result := OrderedCollection new. 185 [ 186 (next := regexp next) = $\ 187 ifTrue: [regexp next] 188 ifFalse: [ 189 next = $| ifTrue: [ 190 result addLast: (self class fromString: 191 '^', (regexp copyFrom: pos to: regexp position - 2)). 192 pos := regexp position 193 ]. 194 next = $} ifTrue: [ 195 result addLast: (self class fromString: 196 '^', (regexp copyFrom: pos to: regexp position - 2)). 197 ^result 198 ] 199 ]. 200 regexp atEnd 201 ] whileFalse: [ ]. 202 203 "If we come here, we have found no } : bad regular expression" 204 self errorBadRegexp 205! 206 207errorBadRegexp 208 209 "Seems like we had some problems parsing the regular expression" 210 self error: 'Bad regular expression' 211! 212 213char: aCharacter index: dummy 214 215 "Private - Check if the next character matchs to aCharacter" 216 217 ^string atEnd 218 ifTrue: [ false ] 219 ifFalse: [ aCharacter = string next ] 220! 221 222any: dummy index: dummy2 223 224 "Private - If we aren't at the end of the stream, skip a character, else 225 answer false" 226 227 ^string atEnd 228 ifTrue: [ false ] 229 ifFalse: [ string next. true ] 230! 231 232range: aString index: dummy 233 234 "Private - Check if the next character is included in aString" 235 236 ^string atEnd 237 ifTrue: [ false ] 238 ifFalse: [ aString includes: string next ] 239! 240 241notRange: aString index: dummy 242 243 "Private - Check if the next character is not included in aString" 244 245 ^string atEnd 246 ifTrue: [ false] 247 ifFalse: [ (aString includes: string next) not ] 248! 249 250optional: listOfRegexp index: dummy 251 252 "Private - Check if the next characters match to any of the RegularExpression 253 objects in listOfRegexp" 254 255 | pos | 256 257 string atEnd ifTrue: [^false]. 258 259 pos := string position. 260 listOfRegexp do: [ :re | 261 (re matchStream: string from: 1) ifTrue: [^true]. 262 string position: pos. 263 ]. 264 ^false 265! 266 267wild0any: atLeast1 index: index 268 269 "Private - Match a .* sequence" 270 271 ^self matchWild: #any:index: with: nil following: 272 [ self matchStream: string from: index + 1 ] 273! 274 275wild1any: atLeast1 index: index 276 277 "Private - Match a .+ sequence" 278 279 (self any: nil index: index) ifFalse: [^false]. 280 281 ^self matchWild: #any:index: with: nil following: 282 [ self matchStream: string from: index + 1 ] 283! 284 285wild0range: aString index: index 286 287 "Private - Match a [...]* sequence" 288 289 ^self matchWild: #range:index: with: aString following: 290 [ self matchStream: string from: index + 1 ] 291! 292 293wild1range: aString index: index 294 295 "Private - Match a [...]+ sequence" 296 297 (self range: aString index: index) ifFalse: [^false]. 298 ^self matchWild: #range:index: with: aString following: 299 [ self matchStream: string from: index + 1 ] 300! 301 302wild0notRange: aString index: index 303 304 "Private - Match a [...]* sequence" 305 306 ^self matchWild: #notRange:index: with: aString following: 307 [ self matchStream: string from: index + 1 ] 308! 309 310wild1notRange: aString index: index 311 312 "Private - Match a [...]+ sequence" 313 314 (self notRange: aString index: index) ifFalse: [^false]. 315 ^self matchWild: #notRange:index: with: aString following: 316 [ self matchStream: string from: index + 1 ] 317! 318 319wild0char: aCharacter index: index 320 321 "Private - Match a x* sequence" 322 323 ^self matchWild: #char:index: with: aCharacter following: 324 [ self matchStream: string from: index + 1 ] 325! 326 327wild1char: aCharacter index: index 328 329 "Private - Match a x+ sequence" 330 331 (self char: aCharacter index: index) ifFalse: [^false]. 332 ^self matchWild: #char:index: with: aCharacter following: 333 [ self matchStream: string from: index + 1 ] 334! 335 336matchWild: aSymbol with: arg following: aBlock 337 338 "Private - Helper function for * sequences (+ sequences are parsed by 339 checking for a match and then treating them as * sequences: for example, 340 x+ becomes xx*). Try to match as many characters as possible and then 341 look if the remaining part of the string matches the rest of the regular 342 expression (to do so, aBlock is evaluated): if yes, answer nil; if no, 343 try again with one character less. For example, matching [ABC]*AC to the 344 string BAC works in this way: 345 - try with the longest run of As, Bs or Cs (BAC). The rest of the 346 string (that is, nothing) doesn't match the regular expression AC, so... 347 - ...try with BA. The rest of the string (that is, C) doesn't match 348 the regular expression AC, so... 349 - ...try with B. The rest of the string (that is, AC) does match the 350 regular expression AC, so we answer nil" 351 352 | first last | 353 first := string position. 354 last := self findLastWild: aSymbol with: arg. 355 356 last to: first by: -1 do: [ :i | 357 (aBlock value == false) 358 ifFalse: [ ^nil ]. 359 360 i > 1 ifTrue: [self position: i - 1]. 361 ]. 362 ^false 363! 364 365findLastWild: aSymbol with: arg 366 "Send aSymbol with arg and nil as its parameter until it answers false and 367 answer the position of the last character for which aSymbol answered true" 368 369 [ string atEnd ifTrue: [ ^string position ]. 370 self perform: aSymbol with: arg with: nil ] whileTrue: [ ]. 371 372 string skip: -1. 373 ^string position 374! 375 376matchStream: aStream from: firstIndex 377 378 "Private - Match all the atoms from the firstIndex-th to the string on 379 which aStream is streaming. Answer true or false" 380 381 | result oldString | 382 383 oldString := string. 384 self string: aStream. 385 386 firstIndex to: self numberOfAtoms do: [ :i | 387 " result = true ---> go on; result = false ---> answer false; 388 result = nil ---> answer true" 389 result := self matchAtom: i. 390 result == true ifFalse: [ string := oldString. ^result isNil ]. 391 ]. 392 result := self checkIfAtEnd. 393 string := oldString. 394 ^result 395! 396 397string: aStream 398 399 "Private - Tell the other methods which string is being parsed" 400 401 string := aStream 402! 403 404checkIfAtEnd 405 406 "Private - Answer true if there is no $ or if we're at the end of the 407 parsed string" 408 409 ^noDollar or: [string atEnd] 410! 411 412numberOfAtoms 413 414 "Private - Answer the number of atoms in the receiver" 415 416 ^selectors size 417! 418 419matchAtom: index 420 421 "Private - Try to match an atom to string" 422 423 | result | 424 425 "index print. $ print. 426 (selectors at: index) print. $ print. 427 (params at: index) print. $ print. 428 string peek print. $ printNl." 429 430 ^self perform: (selectors at: index) with: (params at: index) with: index 431! ! 432