1/* 2 TextPattern.m 3 4 Implementation of operations on text patterns for the 5 ProjectManager application. 6 7 Copyright (C) 2005 Saso Kiselkov 8 9 This program is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation; either version 2 of the License, or 12 (at your option) any later version. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program; if not, write to the Free Software 21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 22*/ 23 24#import "TextPattern.h" 25 26#import <Foundation/NSBundle.h> 27#import <Foundation/NSScanner.h> 28#import <Foundation/NSException.h> 29 30static void 31FreeTextPatternItem (TextPatternItem *item) 32{ 33 if (item->type == MultipleCharactersTextPatternItem) 34 { 35 free (item->data.multiChar.characters); 36 } 37 38 free (item); 39} 40 41static TextPatternItem * 42ParseTextPatternItem (NSString *string, unsigned int *index) 43{ 44 unsigned int i = *index, n = [string length]; 45 TextPatternItem * newItem; 46 unichar c; 47 48 newItem = (TextPatternItem *) calloc(1, sizeof (TextPatternItem)); 49 50 c = [string characterAtIndex: i]; 51 i++; 52 switch (c) 53 { 54 case '[': 55 { 56 unichar * buf = NULL; 57 unsigned int nChars = 0; 58 59 for (; i < n; i++) 60 { 61 unichar c = [string characterAtIndex: i]; 62 63 // handle escapes 64 if (c == '\\') 65 { 66 if (i + 1 >= n) 67 { 68 NSLog(_(@"Text pattern item parse error in text " 69 @"pattern \"%@\" at index %i: unexpected end of " 70 @"pattern. Escape sequence expected."), string); 71 72 free (buf); 73 free (newItem); 74 75 return NULL; 76 } 77 78 i++; 79 c = [string characterAtIndex: i]; 80 } 81 else if (c == ']') 82 { 83 i++; 84 break; 85 } 86 87 nChars++; 88 buf = (unichar *) realloc(buf, sizeof (unichar) * nChars); 89 buf[nChars - 1] = c; 90 } 91 92 if (i == n) 93 { 94 NSLog(_(@"Text pattern item parse error in text pattern " 95 @"\"%@\" at index %i: unexpected end of character class."), 96 string, i); 97 98 free (buf); 99 free (newItem); 100 101 return NULL; 102 } 103 104 newItem->type = MultipleCharactersTextPatternItem; 105 newItem->data.multiChar.nCharacters = nChars; 106 newItem->data.multiChar.characters = buf; 107 } 108 break; 109 case '.': 110 newItem->type = AnyCharacterTextPatternItem; 111 break; 112 case '<': 113 newItem->type = BeginningOfWordTextPatternItem; 114 break; 115 case '>': 116 newItem->type = EndingOfWordTextPatternItem; 117 break; 118 case '^': 119 newItem->type = BeginningOfLineTextPatternItem; 120 break; 121 case '$': 122 newItem->type = EndingOfLineTextPatternItem; 123 break; 124 case '\\': 125 if (i >= n) 126 { 127 NSLog(_(@"Text pattern item parse error in text pattern " 128 @"\"%@\" at index %i: unexpected end of pattern. Escape " 129 @"sequence expected."), string); 130 131 free (newItem); 132 return NULL; 133 } 134 c = [string characterAtIndex: i]; 135 i++; 136 137 default: 138 newItem->type = SingleCharacterTextPatternItem; 139 newItem->data.singleChar = c; 140 break; 141 } 142 143 // is there trailing cardinality indication? 144 if (i < n) 145 { 146 c = [string characterAtIndex: i]; 147 i++; 148 149 switch (c) 150 { 151 case '{': 152 { 153 NSScanner * scanner; 154 int value; 155 156 if (newItem->type != SingleCharacterTextPatternItem && 157 newItem->type != MultipleCharactersTextPatternItem && 158 newItem->type != AnyCharacterTextPatternItem) 159 { 160 NSLog(_(@"Text pattern item parse error in text pattern " 161 @"\"%@\" at index %i: no cardinality indication in " 162 @"'<', '>', '^' or '$' allowed."), string, i); 163 164 FreeTextPatternItem(newItem); 165 166 return NULL; 167 } 168 169 scanner = [NSScanner scannerWithString: string]; 170 171 [scanner setScanLocation: i]; 172 if (![scanner scanInt: &value]) 173 { 174 NSLog(_(@"Text pattern item parse error in text pattern " 175 @"\"%@\" at index %i: integer expected."), string, 176 [scanner scanLocation]); 177 178 FreeTextPatternItem(newItem); 179 180 return NULL; 181 } 182 newItem->minCount = newItem->maxCount = value; 183 i = [scanner scanLocation]; 184 if (i + 1 >= n) 185 { 186 NSLog(_(@"Text pattern item parse error in text pattern " 187 @"\"%@\": unexpected end of pattern, '}' or ',' " 188 @"expected."), string); 189 190 FreeTextPatternItem(newItem); 191 192 return NULL; 193 } 194 c = [string characterAtIndex: i]; 195 if (c == ',') 196 { 197 [scanner setScanLocation: i + 1]; 198 if (![scanner scanInt: &value]) 199 { 200 NSLog(_(@"Text pattern item parser error in text " 201 @"pattern \"%@\" at index %i: integer expected."), 202 string, [scanner scanLocation]); 203 204 FreeTextPatternItem(newItem); 205 206 return NULL; 207 } 208 newItem->maxCount = value; 209 i = [scanner scanLocation]; 210 } 211 if (i >= n) 212 { 213 NSLog(_(@"Text pattern item parse error in text pattern " 214 @"\"%@\": unexpected end of pattern, '}' expected."), 215 string); 216 217 FreeTextPatternItem(newItem); 218 219 return NULL; 220 } 221 c = [string characterAtIndex: i]; 222 i++; 223 if (c != '}') 224 { 225 NSLog(_(@"Text pattern item parse error in text pattern " 226 @"\"%@\" at index %i: '}' expected."), string, i); 227 228 FreeTextPatternItem(newItem); 229 230 return NULL; 231 } 232 } 233 break; 234 // no cardinality indication - the next character is part of 235 // the next text pattern 236 case '*': 237 newItem->minCount = 0; 238 newItem->maxCount = 0x7fffffff; 239 break; 240 case '?': 241 newItem->minCount = 0; 242 newItem->maxCount = 1; 243 break; 244 default: 245 i--; 246 newItem->minCount = newItem->maxCount = 1; 247 break; 248 } 249 } 250 else 251 { 252 newItem->minCount = newItem->maxCount = 1; 253 } 254 255 *index = i; 256 257 return newItem; 258} 259 260#if 0 261// not used 262static void 263DescribeTextPatternItem(TextPatternItem *item) 264{ 265 switch (item->type) 266 { 267 case SingleCharacterTextPatternItem: 268 NSLog(@" type: single char, value: '%c', min: %i, max: %i", 269 item->data.singleChar, 270 item->minCount, 271 item->maxCount); 272 break; 273 case MultipleCharactersTextPatternItem: 274 NSLog(@" type: multi char, value: '%@', min: %i, max: %i", 275 [NSString stringWithCharacters: item->data.multiChar.characters 276 length: item->data.multiChar.nCharacters], 277 item->minCount, item->maxCount); 278 break; 279 case BeginningOfWordTextPatternItem: 280 NSLog(@" type: beginning of word"); 281 break; 282 case EndingOfWordTextPatternItem: 283 NSLog(@" type: ending of word"); 284 break; 285 case AnyCharacterTextPatternItem: 286 NSLog(@" type: any character, min: %i, max: %i", 287 item->minCount, item->maxCount); 288 break; 289 case BeginningOfLineTextPatternItem: 290 NSLog(@" type: beginning of line"); 291 break; 292 case EndingOfLineTextPatternItem: 293 NSLog(@" type: ending of line"); 294 break; 295 } 296} 297#endif 298 299TextPattern * 300CompileTextPattern (NSString *string) 301{ 302 TextPattern * pattern; 303 unsigned int i, n; 304 305 pattern = (TextPattern *) calloc(1, sizeof(TextPattern)); 306 307 ASSIGN(pattern->string, string); 308 309 for (i = 0, n = [string length]; i < n;) 310 { 311 TextPatternItem * item; 312 313 item = ParseTextPatternItem(string, &i); 314 if (item == NULL) 315 { 316 FreeTextPattern (pattern); 317 318 return NULL; 319 } 320 321 // enlarge the pattern buffer 322 pattern->nItems++; 323 pattern->items = (TextPatternItem **) realloc(pattern->items, 324 pattern->nItems * sizeof(TextPatternItem *)); 325 pattern->items[pattern->nItems - 1] = item; 326 } 327 328 return pattern; 329} 330 331void 332FreeTextPattern (TextPattern *pattern) 333{ 334 unsigned int i; 335 336 for (i = 0; i < pattern->nItems; i++) 337 { 338 FreeTextPatternItem(pattern->items[i]); 339 } 340 341 free(pattern->items); 342 343 TEST_RELEASE(pattern->string); 344 345 free(pattern); 346} 347 348static inline BOOL 349IsMemberOfCharacterClass(unichar c, unichar *charClass, unsigned int n) 350{ 351 unsigned int i; 352 353 for (i = 0; i < n; i++) 354 { 355 if (charClass[i] == c) 356 { 357 return YES; 358 } 359 } 360 361 return NO; 362} 363 364/** 365 * Returns YES if the passed character argument is an alphanumeric 366 * character, and NO if it isn't. 367 */ 368static inline BOOL 369my_isalnum (unichar c) 370{ 371 if ((c >= 'a' && c <= 'z') || 372 (c >= 'A' && c <= 'Z') || 373 (c >= '0' && c <= '9')) 374 { 375 return YES; 376 } 377 else 378 { 379 return NO; 380 } 381} 382 383static inline BOOL 384CheckTextPatternItemPresence(TextPatternItem *item, 385 unichar *string, 386 unsigned int stringLength, 387 unsigned int *offset) 388{ 389 switch (item->type) 390 { 391 case SingleCharacterTextPatternItem: 392 { 393 unsigned int i; 394 unsigned int n; 395 396 // read characters while they are equal to our letter 397 for (n = 0, i = *offset; 398 i < stringLength && n < item->maxCount; 399 i++, n++) 400 { 401 if (string[i] != item->data.singleChar) 402 { 403 break; 404 } 405 } 406 407 if (n >= item->minCount) 408 { 409 *offset = i; 410 return YES; 411 } 412 else 413 { 414 return NO; 415 } 416 } 417 break; 418 case MultipleCharactersTextPatternItem: 419 { 420 unsigned int i; 421 unsigned int n; 422 423 for (n = 0, i = *offset; 424 i < stringLength && n < item->maxCount; 425 i++, n++) 426 { 427 if (!IsMemberOfCharacterClass(string[i], 428 item->data.multiChar.characters, 429 item->data.multiChar.nCharacters)) 430 { 431 break; 432 } 433 } 434 435 if (n >= item->minCount) 436 { 437 *offset = i; 438 return YES; 439 } 440 else 441 { 442 return NO; 443 } 444 } 445 break; 446 case AnyCharacterTextPatternItem: 447 { 448 unsigned int i, n; 449 450 for (i = *offset, n = 0; n < item->minCount; i++, n++) 451 { 452 if (i >= stringLength) 453 { 454 return NO; 455 } 456 } 457 458 *offset = i; 459 return YES; 460 } 461 break; 462 case BeginningOfWordTextPatternItem: 463 { 464 unsigned int i = *offset; 465 466 if (i >= stringLength) 467 { 468 return NO; 469 } 470 471 if (i > 0) 472 { 473 if (my_isalnum(string[i - 1])) 474 { 475 return NO; 476 } 477 else 478 { 479 return YES; 480 } 481 } 482 else 483 { 484 return YES; 485 } 486 } 487 break; 488 case EndingOfWordTextPatternItem: 489 { 490 unsigned int i = *offset; 491 492 if (i >= stringLength) 493 { 494 return YES; 495 } 496 497 if (!my_isalnum(string[i])) 498 { 499 return YES; 500 } 501 else 502 { 503 return NO; 504 } 505 } 506 break; 507 case BeginningOfLineTextPatternItem: 508 { 509 unsigned int i = *offset; 510 511 if (i > 0) 512 { 513 return (string[i - 1] == '\n' || string[i - 1] == '\r'); 514 } 515 else 516 { 517 return YES; 518 } 519 } 520 break; 521 case EndingOfLineTextPatternItem: 522 { 523 unsigned int i = *offset; 524 525 if (i + 1 < stringLength) 526 { 527 return (string[i + 1] == '\n' || string[i + 1] == '\r'); 528 } 529 else 530 { 531 return YES; 532 } 533 } 534 break; 535 } 536 537/* [NSException raise: NSInternalInconsistencyException 538 format: _(@"Unknown text pattern item type %i encountered."), 539 item->type];*/ 540 541 return NO; 542} 543 544unsigned int 545CheckTextPatternPresenceInString(TextPattern *pattern, 546 unichar *string, 547 unsigned int stringLength, 548 unsigned int index) 549{ 550 unsigned int i, off; 551 552 off = index; 553 554 for (i = 0; i < pattern->nItems; i++) 555 { 556 if (!CheckTextPatternItemPresence(pattern->items[i], 557 string, 558 stringLength, 559 &off)) 560 { 561 break; 562 } 563 } 564 565 if (i == pattern->nItems) 566 { 567 return off - index; 568 } 569 else 570 { 571 return 0; 572 } 573} 574 575unichar *PermissibleCharactersAtPatternBeginning(TextPattern *pattern) 576{ 577 unsigned int i; 578 579 for (i = 0; i < pattern->nItems; i++) 580 { 581 switch(pattern->items[i]->type) 582 { 583 case SingleCharacterTextPatternItem: 584 { 585 unichar * buf; 586 587 buf = malloc(2 * sizeof(unichar)); 588 buf[0] = pattern->items[i]->data.singleChar; 589 buf[1] = 0; 590 591 return buf; 592 } 593 case MultipleCharactersTextPatternItem: 594 { 595 unichar * buf; 596 unsigned int n = pattern->items[i]->data.multiChar.nCharacters + 1; 597 598 buf = malloc(n * sizeof(unichar)); 599 memcpy(buf, pattern->items[i]->data.multiChar.characters, n * 600 sizeof(unichar)); 601 buf[n - 1] = 0; 602 603 return buf; 604 } 605 case AnyCharacterTextPatternItem: 606 return (unichar *) -1; 607 608 default: break; 609 } 610 } 611 612 return NULL; 613} 614