1/*
2    TextPattern.m
3
4    Implementation of operations on text patterns for the
5    ProjectManager application.
6
7    Copyright (C) 2005  Saso Kiselkov
8
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 2 of the License, or
12    (at your option) any later version.
13
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
22*/
23
24#import "TextPattern.h"
25
26#import <Foundation/NSBundle.h>
27#import <Foundation/NSScanner.h>
28#import <Foundation/NSException.h>
29
30static void
31FreeTextPatternItem (TextPatternItem *item)
32{
33  if (item->type == MultipleCharactersTextPatternItem)
34    {
35      free (item->data.multiChar.characters);
36    }
37
38  free (item);
39}
40
41static TextPatternItem *
42ParseTextPatternItem (NSString *string, unsigned int *index)
43{
44  unsigned int i = *index, n = [string length];
45  TextPatternItem * newItem;
46  unichar c;
47
48  newItem = (TextPatternItem *) calloc(1, sizeof (TextPatternItem));
49
50  c = [string characterAtIndex: i];
51  i++;
52  switch (c)
53    {
54      case '[':
55        {
56          unichar * buf = NULL;
57          unsigned int nChars = 0;
58
59          for (; i < n; i++)
60            {
61              unichar c = [string characterAtIndex: i];
62
63              // handle escapes
64              if (c == '\\')
65                {
66                  if (i + 1 >= n)
67                    {
68                      NSLog(_(@"Text pattern item parse error in text "
69                        @"pattern \"%@\" at index %i: unexpected end of "
70                        @"pattern. Escape sequence expected."), string);
71
72                      free (buf);
73                      free (newItem);
74
75                      return NULL;
76                    }
77
78                  i++;
79                  c = [string characterAtIndex: i];
80                }
81              else if (c == ']')
82                {
83                  i++;
84                  break;
85                }
86
87              nChars++;
88              buf = (unichar *) realloc(buf, sizeof (unichar) * nChars);
89              buf[nChars - 1] = c;
90            }
91
92          if (i == n)
93            {
94              NSLog(_(@"Text pattern item parse error in text pattern "
95                @"\"%@\" at index %i: unexpected end of character class."),
96                string, i);
97
98              free (buf);
99              free (newItem);
100
101              return NULL;
102            }
103
104          newItem->type = MultipleCharactersTextPatternItem;
105          newItem->data.multiChar.nCharacters = nChars;
106          newItem->data.multiChar.characters = buf;
107        }
108        break;
109      case '.':
110        newItem->type = AnyCharacterTextPatternItem;
111        break;
112      case '<':
113        newItem->type = BeginningOfWordTextPatternItem;
114        break;
115      case '>':
116        newItem->type = EndingOfWordTextPatternItem;
117        break;
118      case '^':
119        newItem->type = BeginningOfLineTextPatternItem;
120        break;
121      case '$':
122        newItem->type = EndingOfLineTextPatternItem;
123        break;
124      case '\\':
125        if (i >= n)
126          {
127            NSLog(_(@"Text pattern item parse error in text pattern "
128              @"\"%@\" at index %i: unexpected end of pattern. Escape "
129              @"sequence expected."), string);
130
131            free (newItem);
132            return NULL;
133          }
134        c = [string characterAtIndex: i];
135        i++;
136
137      default:
138        newItem->type = SingleCharacterTextPatternItem;
139        newItem->data.singleChar = c;
140        break;
141    }
142
143  // is there trailing cardinality indication?
144  if (i < n)
145    {
146      c = [string characterAtIndex: i];
147      i++;
148
149      switch (c)
150        {
151          case '{':
152            {
153              NSScanner * scanner;
154              int value;
155
156              if (newItem->type != SingleCharacterTextPatternItem &&
157                  newItem->type != MultipleCharactersTextPatternItem &&
158                  newItem->type != AnyCharacterTextPatternItem)
159                {
160                  NSLog(_(@"Text pattern item parse error in text pattern "
161                    @"\"%@\" at index %i: no cardinality indication in "
162                    @"'<', '>', '^' or '$' allowed."), string, i);
163
164                  FreeTextPatternItem(newItem);
165
166                  return NULL;
167                }
168
169              scanner = [NSScanner scannerWithString: string];
170
171              [scanner setScanLocation: i];
172              if (![scanner scanInt: &value])
173                {
174                  NSLog(_(@"Text pattern item parse error in text pattern "
175                    @"\"%@\" at index %i: integer expected."), string,
176                    [scanner scanLocation]);
177
178                  FreeTextPatternItem(newItem);
179
180                  return NULL;
181                }
182              newItem->minCount = newItem->maxCount = value;
183              i = [scanner scanLocation];
184              if (i + 1 >= n)
185                {
186                  NSLog(_(@"Text pattern item parse error in text pattern "
187                    @"\"%@\": unexpected end of pattern, '}' or ',' "
188                    @"expected."), string);
189
190                  FreeTextPatternItem(newItem);
191
192                  return NULL;
193                }
194              c = [string characterAtIndex: i];
195              if (c == ',')
196                {
197                  [scanner setScanLocation: i + 1];
198                  if (![scanner scanInt: &value])
199                    {
200                      NSLog(_(@"Text pattern item parser error in text "
201                        @"pattern \"%@\" at index %i: integer expected."),
202                        string, [scanner scanLocation]);
203
204                      FreeTextPatternItem(newItem);
205
206                      return NULL;
207                    }
208                  newItem->maxCount = value;
209                  i = [scanner scanLocation];
210                }
211              if (i >= n)
212                {
213                  NSLog(_(@"Text pattern item parse error in text pattern "
214                    @"\"%@\": unexpected end of pattern, '}' expected."),
215                    string);
216
217                  FreeTextPatternItem(newItem);
218
219                  return NULL;
220                }
221              c = [string characterAtIndex: i];
222              i++;
223              if (c != '}')
224                {
225                  NSLog(_(@"Text pattern item parse error in text pattern "
226                    @"\"%@\" at index %i: '}' expected."), string, i);
227
228                  FreeTextPatternItem(newItem);
229
230                  return NULL;
231                }
232            }
233            break;
234          // no cardinality indication - the next character is part of
235          // the next text pattern
236          case '*':
237            newItem->minCount = 0;
238            newItem->maxCount = 0x7fffffff;
239            break;
240          case '?':
241            newItem->minCount = 0;
242            newItem->maxCount = 1;
243            break;
244          default:
245            i--;
246            newItem->minCount = newItem->maxCount = 1;
247            break;
248        }
249    }
250  else
251    {
252      newItem->minCount = newItem->maxCount = 1;
253    }
254
255  *index = i;
256
257  return newItem;
258}
259
260#if 0
261// not used
262static void
263DescribeTextPatternItem(TextPatternItem *item)
264{
265  switch (item->type)
266    {
267    case SingleCharacterTextPatternItem:
268      NSLog(@"  type: single char, value: '%c', min: %i, max: %i",
269        item->data.singleChar,
270        item->minCount,
271        item->maxCount);
272      break;
273    case MultipleCharactersTextPatternItem:
274      NSLog(@"  type: multi char, value: '%@', min: %i, max: %i",
275        [NSString stringWithCharacters: item->data.multiChar.characters
276                                length: item->data.multiChar.nCharacters],
277        item->minCount, item->maxCount);
278      break;
279    case BeginningOfWordTextPatternItem:
280      NSLog(@"  type: beginning of word");
281      break;
282    case EndingOfWordTextPatternItem:
283      NSLog(@"  type: ending of word");
284      break;
285    case AnyCharacterTextPatternItem:
286      NSLog(@"  type: any character, min: %i, max: %i",
287        item->minCount, item->maxCount);
288      break;
289    case BeginningOfLineTextPatternItem:
290      NSLog(@"  type: beginning of line");
291      break;
292    case EndingOfLineTextPatternItem:
293      NSLog(@"  type: ending of line");
294      break;
295    }
296}
297#endif
298
299TextPattern *
300CompileTextPattern (NSString *string)
301{
302  TextPattern * pattern;
303  unsigned int i, n;
304
305  pattern = (TextPattern *) calloc(1, sizeof(TextPattern));
306
307  ASSIGN(pattern->string, string);
308
309  for (i = 0, n = [string length]; i < n;)
310    {
311      TextPatternItem * item;
312
313      item = ParseTextPatternItem(string, &i);
314      if (item == NULL)
315        {
316          FreeTextPattern (pattern);
317
318          return NULL;
319        }
320
321       // enlarge the pattern buffer
322      pattern->nItems++;
323      pattern->items = (TextPatternItem **) realloc(pattern->items,
324        pattern->nItems * sizeof(TextPatternItem *));
325      pattern->items[pattern->nItems - 1] = item;
326    }
327
328  return pattern;
329}
330
331void
332FreeTextPattern (TextPattern *pattern)
333{
334  unsigned int i;
335
336  for (i = 0; i < pattern->nItems; i++)
337    {
338      FreeTextPatternItem(pattern->items[i]);
339    }
340
341  free(pattern->items);
342
343  TEST_RELEASE(pattern->string);
344
345  free(pattern);
346}
347
348static inline BOOL
349IsMemberOfCharacterClass(unichar c, unichar *charClass, unsigned int n)
350{
351  unsigned int i;
352
353  for (i = 0; i < n; i++)
354    {
355      if (charClass[i] == c)
356        {
357          return YES;
358        }
359    }
360
361  return NO;
362}
363
364/**
365 * Returns YES if the passed character argument is an alphanumeric
366 * character, and NO if it isn't.
367 */
368static inline BOOL
369my_isalnum (unichar c)
370{
371  if ((c >= 'a' && c <= 'z') ||
372      (c >= 'A' && c <= 'Z') ||
373      (c >= '0' && c <= '9'))
374    {
375      return YES;
376    }
377  else
378    {
379      return NO;
380    }
381}
382
383static inline BOOL
384CheckTextPatternItemPresence(TextPatternItem *item,
385                             unichar *string,
386                             unsigned int stringLength,
387                             unsigned int *offset)
388{
389  switch (item->type)
390    {
391    case SingleCharacterTextPatternItem:
392      {
393        unsigned int i;
394        unsigned int n;
395
396         // read characters while they are equal to our letter
397        for (n = 0, i = *offset;
398             i < stringLength && n < item->maxCount;
399             i++, n++)
400          {
401            if (string[i] != item->data.singleChar)
402              {
403                break;
404              }
405          }
406
407        if (n >= item->minCount)
408          {
409            *offset = i;
410            return YES;
411          }
412        else
413          {
414            return NO;
415          }
416      }
417      break;
418    case MultipleCharactersTextPatternItem:
419      {
420        unsigned int i;
421        unsigned int n;
422
423        for (n = 0, i = *offset;
424             i < stringLength && n < item->maxCount;
425             i++, n++)
426          {
427            if (!IsMemberOfCharacterClass(string[i],
428                                          item->data.multiChar.characters,
429                                          item->data.multiChar.nCharacters))
430              {
431                break;
432              }
433          }
434
435        if (n >= item->minCount)
436          {
437            *offset = i;
438            return YES;
439          }
440        else
441          {
442            return NO;
443          }
444      }
445      break;
446    case AnyCharacterTextPatternItem:
447      {
448        unsigned int i, n;
449
450        for (i = *offset, n = 0; n < item->minCount; i++, n++)
451          {
452            if (i >= stringLength)
453              {
454                return NO;
455              }
456          }
457
458        *offset = i;
459        return YES;
460      }
461      break;
462    case BeginningOfWordTextPatternItem:
463      {
464        unsigned int i = *offset;
465
466        if (i >= stringLength)
467          {
468            return NO;
469          }
470
471        if (i > 0)
472          {
473            if (my_isalnum(string[i - 1]))
474              {
475                return NO;
476              }
477            else
478              {
479                return YES;
480              }
481          }
482        else
483          {
484            return YES;
485          }
486      }
487      break;
488    case EndingOfWordTextPatternItem:
489      {
490        unsigned int i = *offset;
491
492        if (i >= stringLength)
493          {
494            return YES;
495          }
496
497        if (!my_isalnum(string[i]))
498          {
499            return YES;
500          }
501        else
502          {
503            return NO;
504          }
505      }
506      break;
507    case BeginningOfLineTextPatternItem:
508      {
509        unsigned int i = *offset;
510
511        if (i > 0)
512          {
513            return (string[i - 1] == '\n' || string[i - 1] == '\r');
514          }
515        else
516          {
517            return YES;
518          }
519      }
520      break;
521    case EndingOfLineTextPatternItem:
522      {
523        unsigned int i = *offset;
524
525        if (i + 1 < stringLength)
526          {
527            return (string[i + 1] == '\n' || string[i + 1] == '\r');
528          }
529        else
530          {
531            return YES;
532          }
533      }
534      break;
535    }
536
537/*  [NSException raise: NSInternalInconsistencyException
538              format: _(@"Unknown text pattern item type %i encountered."),
539    item->type];*/
540
541  return NO;
542}
543
544unsigned int
545CheckTextPatternPresenceInString(TextPattern *pattern,
546                                 unichar *string,
547                                 unsigned int stringLength,
548                                 unsigned int index)
549{
550  unsigned int i, off;
551
552  off = index;
553
554  for (i = 0; i < pattern->nItems; i++)
555    {
556      if (!CheckTextPatternItemPresence(pattern->items[i],
557                                        string,
558                                        stringLength,
559                                        &off))
560        {
561          break;
562        }
563    }
564
565  if (i == pattern->nItems)
566    {
567      return off - index;
568    }
569  else
570    {
571      return 0;
572    }
573}
574
575unichar *PermissibleCharactersAtPatternBeginning(TextPattern *pattern)
576{
577  unsigned int i;
578
579  for (i = 0; i < pattern->nItems; i++)
580    {
581      switch(pattern->items[i]->type)
582        {
583        case SingleCharacterTextPatternItem:
584          {
585            unichar * buf;
586
587            buf = malloc(2 * sizeof(unichar));
588            buf[0] = pattern->items[i]->data.singleChar;
589            buf[1] = 0;
590
591            return buf;
592          }
593        case MultipleCharactersTextPatternItem:
594          {
595            unichar * buf;
596            unsigned int n = pattern->items[i]->data.multiChar.nCharacters + 1;
597
598            buf = malloc(n * sizeof(unichar));
599            memcpy(buf, pattern->items[i]->data.multiChar.characters, n *
600              sizeof(unichar));
601            buf[n - 1] = 0;
602
603            return buf;
604          }
605        case AnyCharacterTextPatternItem:
606          return (unichar *) -1;
607
608        default: break;
609        }
610    }
611
612  return NULL;
613}
614