1 /***************************************************************
2 
3    bwb_str.c       String-Management Routines
4                         for Bywater BASIC Interpreter
5 
6                         Copyright (c) 1993, Ted A. Campbell
7                         Bywater Software
8 
9                         email: tcamp@delphi.com
10 
11         Copyright and Permissions Information:
12 
13         All U.S. and international rights are claimed by the author,
14         Ted A. Campbell.
15 
16    This software is released under the terms of the GNU General
17    Public License (GPL), which is distributed with this software
18    in the file "COPYING".  The GPL specifies the terms under
19    which users may copy and use the software in this distribution.
20 
21    A separate license is available for commercial distribution,
22    for information on which you should contact the author.
23 
24 ***************************************************************/
25 
26 /*---------------------------------------------------------------*/
27 /* NOTE: Modifications marked "JBV" were made by Jon B. Volkoff, */
28 /* 11/1995 (eidetics@cerf.net).                                  */
29 /*                                                               */
30 /* Those additionally marked with "DD" were at the suggestion of */
31 /* Dale DePriest (daled@cadence.com).                            */
32 /*                                                               */
33 /* Version 3.00 by Howard Wulf, AF5NE                            */
34 /*                                                               */
35 /* Version 3.10 by Howard Wulf, AF5NE                            */
36 /*                                                               */
37 /* Version 3.20 by Howard Wulf, AF5NE                            */
38 /*                                                               */
39 /*---------------------------------------------------------------*/
40 
41 
42 
43 /***************************************************************
44 
45 BASIC allows embedded NUL (0) characters.  C str*() does not.
46 
47 ALL the StringType code should use mem*() and ->length.
48 ALL the StringType code should prevent string overflow.
49 
50 ***************************************************************/
51 
52 
53 
54 
55 #include "bwbasic.h"
56 
57 
58 static int CharListToSet (char *pattern, int start, int stop);
59 static int IndexOf (char *buffer, char find, int start);
60 
61 
62 /***************************************************************
63 
64         FUNCTION:       str_btob()
65 
66         DESCRIPTION:    This C function assigns a bwBASIC string
67          structure to another bwBASIC string
68          structure.
69 
70 ***************************************************************/
71 
72 int
str_btob(StringType * d,StringType * s)73 str_btob (StringType * d, StringType * s)
74 {
75 
76   assert (d != NULL);
77   assert (s != NULL);
78   /*  assert( s->length >= 0 ); */
79   assert( My != NULL );
80 
81   if (s->length > MAXLEN)
82   {
83     WARN_STRING_TOO_LONG;        /* str_btob */
84     s->length = MAXLEN;
85   }
86 
87   /* get memory for new buffer */
88   if (d->sbuffer != NULL)
89   {
90     /* free the destination's buffer */
91     if (d->sbuffer == My->MaxLenBuffer)
92     {
93       /*
94       ** this occurs when setting the return value of a multi-line string user function
95       **
96       ** FUNCTION INKEY$
97       **   DIM A$
98       **   LINE INPUT A$
99       **   LET INKEY$ = LEFT$( A$, 1 )
100       ** END FUNCTION
101       **
102       */
103     }
104     else if (d->sbuffer == My->ConsoleOutput)
105     {
106       /* this should never occur, but let's make sure we don't crash if it does */
107       WARN_INTERNAL_ERROR;
108       /* continue processing */
109     }
110     else if (d->sbuffer == My->ConsoleInput)
111     {
112       /* this should never occur, but let's make sure we don't crash if it does */
113       WARN_INTERNAL_ERROR;
114       /* continue processing */
115     }
116     else
117     {
118       free (d->sbuffer);
119       d->sbuffer = NULL;
120     }
121     d->sbuffer = NULL;
122     d->length = 0;
123   }
124   if (d->sbuffer == NULL)
125   {
126     /* allocate the destination's buffer */
127     d->length = 0;
128     if ((d->sbuffer =
129          (char *) calloc (s->length + 1 /* NulChar */ ,
130                           sizeof (char))) == NULL)
131     {
132       WARN_OUT_OF_MEMORY;
133       return FALSE;
134     }
135   }
136   /* write the b string to the b string */
137   assert( d->sbuffer != NULL );
138   if( s->length > 0 )
139   {
140     assert( s->sbuffer != NULL );
141     bwb_memcpy (d->sbuffer, s->sbuffer, s->length);
142   }
143   d->length = s->length;
144   d->sbuffer[d->length] = NulChar;
145   return TRUE;
146 }
147 
148 
149 /***************************************************************
150 
151         FUNCTION:       str_cmp()
152 
153    DESCRIPTION:    This C function performs the equivalent
154          of the C strcmp() function, using BASIC
155          strings.
156 
157 ***************************************************************/
158 
159 int
str_cmp(StringType * a,StringType * b)160 str_cmp (StringType * a, StringType * b)
161 {
162 
163   assert (a != NULL);
164   assert (b != NULL);
165   assert( My != NULL );
166   assert( My->CurrentVersion != NULL );
167 
168   if (a->length > MAXLEN)
169   {
170     WARN_STRING_TOO_LONG;        /* str_cmp */
171     a->length = MAXLEN;
172   }
173   if (b->length > MAXLEN)
174   {
175     WARN_STRING_TOO_LONG;        /* str_cmp */
176     b->length = MAXLEN;
177   }
178   if (a->sbuffer == NULL)
179   {
180     if (b->sbuffer == NULL)
181     {
182       return 0;
183     }
184     if (b->length == 0)
185     {
186       return 0;
187     }
188     return 1;
189   }
190   assert( a->sbuffer != NULL );
191   a->sbuffer[a->length] = NulChar;
192 
193   if (b->sbuffer == NULL)
194   {
195     if (a->sbuffer == NULL)
196     {
197       return 0;
198     }
199     if (a->length == 0)
200     {
201       return 0;
202     }
203     return -1;
204   }
205   assert( b->sbuffer != NULL );
206   b->sbuffer[b->length] = NulChar;
207 
208   if (My->CurrentVersion->OptionFlags & OPTION_COMPARE_TEXT)
209   {
210     /* case insensitive */
211     return bwb_stricmp (a->sbuffer, b->sbuffer);        /* NOTE: embedded NUL characters terminate comparison */
212   }
213   else
214   {
215     /* case sensitive */
216     return bwb_strcmp (a->sbuffer, b->sbuffer);        /* NOTE: embedded NUL characters terminate comparison */
217   }
218 }
219 
220 /***************************************************************
221 
222                      MATCH
223 
224 ***************************************************************/
225 
226 int
str_match(char * A,int A_Length,char * B,int B_Length,int I_Start)227 str_match (char *A, int A_Length, char *B, int B_Length, int I_Start)
228 {
229   /*
230      SYNTAX: j% = MATCH( a$, b$, i% )
231 
232      MATCH returns the position of the first occurrence of a$ in b$
233      starting with the character position given by the third parameter.
234      A zero is returned if no MATCH is found.
235 
236      The following pattern-matching features are available:
237      # matches any digit (0-9).
238      ! matches any upper-or lower-case letter.
239      ? matches any character.
240      \ serves as an escape character indicating the following character does not have special meaning.
241 
242      For example, a ? signifies any character is a MATCH unless preceded by a \.
243      a$ and b$ must be strings.
244      If either of these arguments are numeric, an error occurs.
245      If i% is real, it is converted to an integer.
246      If i% is a string, an error occurs.
247      If i% is negative or zero, a run-time error occurs.
248      When i% is greater than the length of b$, zero is returned.
249      If b$ is a null string, a 0 is returned.
250      If b$ is not null, but a$ is null, a 1 is returned.
251 
252      Examples:
253      MATCH( "is", "Now is the", 1) returns 5
254      MATCH( "##", "October 8, 1876", 1) returns 12
255      MATCH( "a?", "character", 4 ) returns 5
256      MATCH( "\#", "123#45", 1) returns 4
257      MATCH( "ABCD", "ABC", 1 ) returns 0
258      MATCH( "\#1\\\?", "1#1\?2#", 1 ) returns 2
259    */
260 
261 
262   int a;                        /* current position in A$ */
263   int b;                        /* current position in B$ */
264 
265   assert (A != NULL);
266   assert (B != NULL);
267 
268 
269   if (I_Start <= 0)
270   {
271     return 0;
272   }
273   if (I_Start > B_Length)
274   {
275     return 0;
276   }
277   if (B_Length <= 0)
278   {
279     return 0;
280   }
281   if (A_Length <= 0)
282   {
283     return 1;
284   }
285   I_Start--;                        /* BASIC to C */
286   for (b = I_Start; b < B_Length; b++)
287   {
288     int n;                        /* number of characters in A$ matched with B$ */
289 
290     n = 0;
291     for (a = 0; a < A_Length; a++)
292     {
293       int bn;
294       bn = b + n;
295       if (A[a] == '#' && bwb_isdigit (B[bn]))
296       {
297         n++;
298       }
299       else if (A[a] == '!' && bwb_isalpha (B[bn]))
300       {
301         n++;
302       }
303       else if (A[a] == '?')
304       {
305         n++;
306       }
307       else if (a < (A_Length - 1) && A[a] == '\\' && A[a + 1] == B[bn])
308       {
309         n++;
310         a++;
311       }
312       else if (A[a] == B[bn])
313       {
314         n++;
315       }
316       else
317       {
318         break;
319       }
320     }
321     if (a == A_Length)
322     {
323       b++;                        /* C to BASIC */
324       return b;
325     }
326   }
327   return 0;
328 }
329 
330 
331 
332 /***************************************************************
333 
334         FUNCTION:       str_like()
335 
336    DESCRIPTION:    This C function performs the equivalent
337          of the BASIC LIKE operator, using BASIC
338          strings.
339 
340 ***************************************************************/
341 
342 /*
343 inspired by http://www.blackbeltcoder.com/Articles/net/implementing-vbs-like-operator-in-c
344 */
345 
346 /*
347 KNOWN ISSUES:
348 To match the character '[', use "[[]".
349 To match the character '?', use "[?]".
350 To match the character '*', use "[*]".
351 Does not match "" with "[]" or "[!]".
352 */
353 
354 #define CHAR_SET '*'
355 #define CHAR_CLR ' '
356 
357 static char charList[256];
358 
359 static int
IndexOf(char * buffer,char find,int start)360 IndexOf (char *buffer, char find, int start)
361 {
362   int buffer_count;
363   int buffer_length;
364 
365   assert (buffer != NULL);
366   assert( My != NULL );
367   assert( My->CurrentVersion != NULL );
368 
369   buffer_length = bwb_strlen (buffer);
370 
371   if (My->CurrentVersion->OptionFlags & OPTION_COMPARE_TEXT)
372   {
373     find = bwb_toupper (find);
374   }
375 
376   for (buffer_count = start; buffer_count < buffer_length; buffer_count++)
377   {
378     char theChar;
379 
380     theChar = buffer[buffer_count];
381 
382     if (My->CurrentVersion->OptionFlags & OPTION_COMPARE_TEXT)
383     {
384       theChar = bwb_toupper (theChar);
385     }
386 
387 
388     if (theChar == find)
389     {
390       /* FOUND */
391       return buffer_count;
392     }
393 
394   }
395   /* NOT FOUND */
396   return -1;
397 }
398 
399 
400 static int
CharListToSet(char * pattern,int start,int stop)401 CharListToSet (char *pattern, int start, int stop)
402 {
403   /*
404      Converts a string of characters to a HashSet of characters. If the string
405      contains character ranges, such as A-Z, all characters in the range are
406      also added to the returned set of characters.
407    */
408   int pattern_Count;
409 
410   assert (pattern != NULL);
411 
412   bwb_memset (charList, CHAR_CLR, 256);
413 
414   if (start > stop)
415   {
416     /* ERROR */
417     return FALSE;
418   }
419   /* Leading '-' */
420   while (pattern[start] == '-')
421   {
422     /* Match character '-' */
423 
424     charList[0x00FF & pattern[start]] = CHAR_SET;
425     start++;
426     if (start > stop)
427     {
428       /* DONE */
429       return TRUE;
430     }
431   }
432   /* Trailing '-' */
433   while (pattern[stop] == '-')
434   {
435     /* Match character '-' */
436     charList[0x00FF & pattern[stop]] = CHAR_SET;
437     stop--;
438     if (start > stop)
439     {
440       /* DONE */
441       return TRUE;
442     }
443   }
444 
445 
446   for (pattern_Count = start; pattern_Count <= stop; pattern_Count++)
447   {
448     if (pattern[pattern_Count] == '-')
449     {
450       /* Character range */
451 
452       char startChar;
453       char endChar;
454       char theChar;
455 
456       if (pattern_Count > start)
457       {
458         startChar = pattern[pattern_Count - 1];
459         if (startChar == '-')
460         {
461           /* ERROR */
462           return FALSE;
463         }
464         if (My->CurrentVersion->OptionFlags & OPTION_COMPARE_TEXT)
465         {
466           startChar = bwb_toupper (startChar);
467         }
468       }
469       else
470       {
471         /* ERROR */
472         return FALSE;
473       }
474       if (pattern_Count < stop)
475       {
476         endChar = pattern[pattern_Count + 1];
477         if (endChar == '-')
478         {
479           /* ERROR */
480           return FALSE;
481         }
482         if (My->CurrentVersion->OptionFlags & OPTION_COMPARE_TEXT)
483         {
484           endChar = bwb_toupper (endChar);
485         }
486         if (endChar < startChar)
487         {
488           /* ERROR */
489           return FALSE;
490         }
491       }
492       else
493       {
494         /* ERROR */
495         return FALSE;
496       }
497       /*
498          Although the startChar has already been set,
499          and the endChar will be set on the next loop,
500          we go ahead and set them here too.
501          Not the most efficient, but easy to understand,
502          and we do not have to do anything special
503          for edge cases such as [A-A] and [A-B].
504        */
505       for (theChar = startChar; theChar <= endChar; theChar++)
506       {
507         charList[0x00FF & theChar] = CHAR_SET;
508       }
509     }
510     else
511     {
512       /* Single character */
513       char theChar;
514 
515       theChar = pattern[pattern_Count];
516       if (My->CurrentVersion->OptionFlags & OPTION_COMPARE_TEXT)
517       {
518         theChar = bwb_toupper (theChar);
519       }
520       charList[0x00FF & theChar] = CHAR_SET;
521     }
522   }
523 
524   return TRUE;
525 }
526 
527 
528 
529 int
IsLike(char * buffer,int * buffer_count,int buffer_Length,char * pattern,int * pattern_count,int pattern_Length)530 IsLike (char *buffer, int *buffer_count, int buffer_Length,
531         char *pattern, int *pattern_count, int pattern_Length)
532 {
533 
534   /* Implement's VB's Like operator logic. */
535 
536   /*
537      if matched then
538      buffer_count is updated
539      pattern_count is updated
540      returns TRUE
541      else
542      returns FALSE
543      end if
544    */
545 
546   int bc;
547   int pc;
548 
549   assert (buffer != NULL);
550   assert (buffer_count != NULL);
551   assert (pattern != NULL);
552   assert (pattern_count != NULL);
553   assert( My != NULL );
554   assert( My->CurrentVersion != NULL );
555 
556   bc = *buffer_count;
557   pc = *pattern_count;
558 
559 
560   /* Loop through pattern string */
561   while (pc < pattern_Length)
562   {
563 
564     /* Get next pattern character */
565     if (pattern[pc] == '[')
566     {
567       /* Character list */
568       /* [] and [!] are special */
569       char IsExclude;
570 
571       IsExclude = CHAR_CLR;
572       pc++;
573       /* pc is first character after '[' */
574       if (pattern[pc] == '!')
575       {
576         pc++;
577         IsExclude = CHAR_SET;
578       }
579       /* pc is first character after '[' */
580       if (pattern[pc] == ']')
581       {
582         /* [] and [!] are special */
583         /* pc is first character after '[' and is a ']' */
584         pc++;
585         if (IsExclude == CHAR_CLR)
586         {
587           /* [] */
588           /* matches "" */
589         }
590         else
591         {
592           /* [!] */
593           /* same as '?' */
594           if (bc >= buffer_Length)
595           {
596             /* we have completed the buffer without completing the pattern */
597             return FALSE;
598           }
599           bc++;
600         }
601       }
602       else
603       {
604         /* Build character list */
605         /* pc is first character after '[' and is not a ']' */
606         int stop_count;
607 
608         stop_count = IndexOf (pattern, ']', pc);
609         /* stop_count is the character ']' */
610         if (stop_count < 0)
611         {
612           /* NOT FOUND */
613           return FALSE;
614         }
615         /* pc is first character after '['  */
616         /* stop_count is the character ']' */
617         CharListToSet (pattern, pc, stop_count - 1);
618         pc = stop_count + 1;
619         /* pc is first character after ']' */
620         if (bc >= buffer_Length)
621         {
622           /* we have completed the buffer without completing the pattern */
623           return FALSE;
624         }
625         if (charList[0x00FF & buffer[bc]] == IsExclude)
626         {
627           /* not matched */
628           return FALSE;
629         }
630         bc++;
631       }
632     }
633     else if (pattern[pc] == '?' /* LIKE char */ )
634     {
635       /* Matches a single character */
636       pc++;
637       if (bc >= buffer_Length)
638       {
639         /* Check for end of string */
640         /* we have completed the buffer without completing the pattern */
641         return FALSE;
642       }
643       bc++;
644     }
645     else if (pattern[pc] == '#' /* LIKE digit */ )
646     {
647       /* Matches a single digit */
648       pc++;
649       if (bc >= buffer_Length)
650       {
651         /* Check for end of string */
652         /* we have completed the buffer without completing the pattern */
653         return FALSE;
654       }
655       if (bwb_isdigit (buffer[bc]))
656       {
657         bc++;
658       }
659       else
660       {
661         /* not matched */
662         return FALSE;
663       }
664     }
665     else if (pattern[pc] == '*' /* LIKE chars */ )
666     {
667       /* Zero or more characters */
668       while (pattern[pc] == '*' /* LIKE chars */ )
669       {
670         pc++;
671       }
672       if (pc == pattern_Length)
673       {
674         /* Matches all remaining characters */
675         bc = buffer_Length;
676         pc = pattern_Length;
677         break;
678       }
679       else
680       {
681         int p;
682         int b;
683         int IsMatched;
684 
685         p = pc;
686         b = bc;
687         IsMatched = FALSE;
688         while (b <= buffer_Length && IsMatched == FALSE)
689         {
690           int last_b;
691 
692           last_b = b;
693           IsMatched =
694             IsLike (buffer, &b, buffer_Length, pattern, &p, pattern_Length);
695           if (IsMatched == FALSE)
696           {
697             /* not matched, try again begining at next buffer position */
698             p = pc;
699             b = last_b + 1;
700           }
701         }
702         if (IsMatched == FALSE)
703         {
704           /* not matched */
705           return FALSE;
706         }
707         pc = p;
708         bc = b;
709       }
710     }
711     else
712     {
713       char pattChar;
714       char buffChar;
715 
716       pattChar = pattern[pc];
717       if (bc >= buffer_Length)
718       {
719         /* Check for end of string */
720         /* we have completed the buffer without completing the pattern */
721         return FALSE;
722       }
723       buffChar = buffer[bc];
724 
725       if (My->CurrentVersion->OptionFlags & OPTION_COMPARE_TEXT)
726       {
727         pattChar = bwb_toupper (pattChar);
728         buffChar = bwb_toupper (buffChar);
729       }
730 
731       if (pattChar == buffChar)
732       {
733         /* matched specified character */
734         pc++;
735         bc++;
736       }
737       else
738       {
739         /* not matched */
740         return FALSE;
741       }
742     }
743   }
744   /* Return true if all characters matched */
745   if (pc < pattern_Length)
746   {
747     /* not matched */
748     return FALSE;
749   }
750   if (bc < buffer_Length)
751   {
752     /* not matched */
753     return FALSE;
754   }
755   /* all characters matched */
756   *buffer_count = bc;
757   *pattern_count = pc;
758   return TRUE;
759 }
760 
761 
762 /* EOF */
763