1 /****************************************************************************
2 **
3 **  This file is part of GAP, a system for computational discrete algebra.
4 **
5 **  Copyright of GAP belongs to its developers, whose names are too numerous
6 **  to list here. Please refer to the COPYRIGHT file for details.
7 **
8 **  SPDX-License-Identifier: GPL-2.0-or-later
9 **
10 **  This file contains the functions which mainly deal with strings.
11 **
12 **  A *string* is a  list that  has no  holes, and  whose  elements  are  all
13 **  characters.  For the full definition of strings see chapter  "Strings" in
14 **  the {\GAP} manual.  Read also "More about Strings" about the  string flag
15 **  and the compact representation of strings.
16 **
17 **  A list  that  is  known to  be a  string is  represented by a bag of type
18 **  'T_STRING', which has the following format:
19 **
20 **      +--------+----+----+- - - -+----+----+
21 **      |length  |1st |2nd |       |last|null|
22 **      |as UInt |char|char|       |char|char|
23 **      +--------+----+----+- - - -+----+----+
24 **
25 **  Each entry is a  single character (of C type 'unsigned char').   The last
26 **  entry  in  the  bag is the  null  character  ('\0'),  which terminates  C
27 **  strings.  We add this null character although the length is stored in the
28 **  object. This allows to use C routines with  strings  directly  with  null
29 **  character free strings (e.g., filenames).
30 **
31 **  Note that a list represented by a bag of type 'T_PLIST' might still be a
32 **  string. It is just that the kernel does not know this.
33 **
34 **  This package consists of three parts.
35 **
36 **  The first part consists of the functions 'NEW_STRING', 'CHARS_STRING' (or
37 **  'CSTR_STRING'),  'GET_LEN_STRING', 'SET_LEN_STRING', and more. These and
38 **  the functions below use the detailed knowledge about the representation
39 **  of strings.
40 **
41 **  The second part  consists  of  the  functions  'LenString',  'ElmString',
42 **  'ElmsStrings', 'AssString',  'AsssString', PlainString',
43 **  and 'IsPossString'.  They are the functions required by the generic lists
44 **  package.  Using these functions the other  parts of the {\GAP} kernel can
45 **  access and  modify strings  without actually  being aware  that they  are
46 **  dealing with a string.
47 **
48 **  The third part consists  of the functions 'PrintString', which is  called
49 **  by 'FuncPrint', and 'IsString', which test whether an arbitrary list is a
50 **  string, and if so converts it into the above format.
51 */
52 
53 #include "stringobj.h"
54 
55 #include "ariths.h"
56 #include "bool.h"
57 #include "error.h"
58 #include "gaputils.h"
59 #include "io.h"
60 #include "lists.h"
61 #include "modules.h"
62 #include "opers.h"
63 #include "plist.h"
64 #include "range.h"
65 #include "saveload.h"
66 
67 #ifdef HPCGAP
68 #include "hpc/guards.h"
69 #endif
70 
71 
72 /****************************************************************************
73 **
74 *F * * * * * * * * * * * * * * character functions  * * * * * * * * * * * * *
75 */
76 
77 /****************************************************************************
78 **
79 *V  ObjsChar[<chr>] . . . . . . . . . . . . . . . . table of character values
80 **
81 **  'ObjsChar' contains all the character values.  That way we do not need to
82 **  allocate new bags for new characters.
83 */
84 Obj ObjsChar [256];
85 
86 
87 /****************************************************************************
88 **
89 *F  TypeChar( <chr> ) . . . . . . . . . . . . . . . type of a character value
90 **
91 **  'TypeChar' returns the type of the character <chr>.
92 **
93 **  'TypeChar' is the function in 'TypeObjFuncs' for character values.
94 */
95 static Obj TYPE_CHAR;
96 
TypeChar(Obj chr)97 static Obj TypeChar(Obj chr)
98 {
99     return TYPE_CHAR;
100 }
101 
102 
103 /****************************************************************************
104 **
105 *F  EqChar( <charL>, <charR> )  . . . . . . . . . . .  compare two characters
106 **
107 **  'EqChar'  returns 'true'  if the two  characters <charL>  and <charR> are
108 **  equal, and 'false' otherwise.
109 */
EqChar(Obj charL,Obj charR)110 static Int EqChar(Obj charL, Obj charR)
111 {
112     return CHAR_VALUE(charL) == CHAR_VALUE(charR);
113 }
114 
115 
116 /****************************************************************************
117 **
118 *F  LtChar( <charL>, <charR> )  . . . . . . . . . . .  compare two characters
119 **
120 **  'LtChar' returns  'true' if the    character <charL>  is less than    the
121 **  character <charR>, and 'false' otherwise.
122 */
LtChar(Obj charL,Obj charR)123 static Int LtChar(Obj charL, Obj charR)
124 {
125     return CHAR_VALUE(charL) < CHAR_VALUE(charR);
126 }
127 
128 
129 /****************************************************************************
130 **
131 *F  PrintChar( <chr> )  . . . . . . . . . . . . . . . . . . print a character
132 **
133 **  'PrChar' prints the character <chr>.
134 */
PrintChar(Obj val)135 static void PrintChar(Obj val)
136 {
137     UChar               chr;
138 
139     chr = CHAR_VALUE(val);
140     if      ( chr == '\n'  )  Pr("'\\n'",0L,0L);
141     else if ( chr == '\t'  )  Pr("'\\t'",0L,0L);
142     else if ( chr == '\r'  )  Pr("'\\r'",0L,0L);
143     else if ( chr == '\b'  )  Pr("'\\b'",0L,0L);
144     else if ( chr == '\01' )  Pr("'\\>'",0L,0L);
145     else if ( chr == '\02' )  Pr("'\\<'",0L,0L);
146     else if ( chr == '\03' )  Pr("'\\c'",0L,0L);
147     else if ( chr == '\''  )  Pr("'\\''",0L,0L);
148     else if ( chr == '\\'  )  Pr("'\\\\'",0L,0L);
149     /* print every non-printable on non-ASCII character in three digit
150      * notation  */
151     /*   old version (changed by FL)
152     else if ( chr == '\0'  )  Pr("'\\0'",0L,0L);
153     else if ( chr <  8     )  Pr("'\\0%d'",(Int)(chr&7),0L);
154     else if ( chr <  32    )  Pr("'\\0%d%d'",(Int)(chr/8),(Int)(chr&7));*/
155     else if ( chr < 32 || chr > 126 ) {
156         Pr("'\\%d%d", (Int)((chr & 192) >> 6), (Int)((chr & 56) >> 3));
157         Pr("%d'", (Int)(chr&7), 0L);
158     }
159     else                      Pr("'%c'",(Int)chr,0L);
160 }
161 
162 
163 /****************************************************************************
164 **
165 *F  SaveChar( <char> )  . . . . . . . . . . . . . . . . . .  save a character
166 **
167 */
SaveChar(Obj c)168 static void SaveChar(Obj c)
169 {
170     SaveUInt1( CHAR_VALUE(c));
171 }
172 
173 
174 /****************************************************************************
175 **
176 *F  LoadChar( <char> )  . . . . . . . . . . . . . . . . . .  load a character
177 **
178 */
LoadChar(Obj c)179 static void LoadChar(Obj c)
180 {
181     SET_CHAR_VALUE(c, LoadUInt1());
182 }
183 
184 
185 
186 /****************************************************************************
187 **
188 *F * * * * * * * * * * * * * * GAP level functions  * * * * * * * * * * * * *
189 */
190 
191 
192 /****************************************************************************
193 **
194 *F  FuncEmptyString( <self>, <len> ) . . . . . . . .  empty string with space
195 **
196 **  Returns an empty string, with space for <len> characters preallocated.
197 **
198 */
FuncEmptyString(Obj self,Obj len)199 static Obj FuncEmptyString(Obj self, Obj len)
200 {
201     Obj                 new;
202     RequireNonnegativeSmallInt("EmptyString", len);
203     new = NEW_STRING(INT_INTOBJ(len));
204     SET_LEN_STRING(new, 0);
205     return new;
206 }
207 
208 /****************************************************************************
209 **
210 *F  FuncShrinkAllocationString( <self>, <str> ) . . give back unneeded memory
211 **
212 **  Shrinks the bag of <str> to minimal possible size (possibly converts to
213 **  compact representation).
214 **
215 */
FuncShrinkAllocationString(Obj self,Obj str)216 static Obj FuncShrinkAllocationString(Obj self, Obj str)
217 {
218     RequireStringRep("ShrinkAllocationString", str);
219     SHRINK_STRING(str);
220     return (Obj)0;
221 }
222 
223 /****************************************************************************
224 **
225 *F  FuncCHAR_INT( <self>, <int> ) . . . . . . . . . . . . . . char by integer
226 */
FuncCHAR_INT(Obj self,Obj val)227 static Obj FuncCHAR_INT(Obj self, Obj val)
228 {
229     Int             chr;
230 
231     /* get and check the integer value                                     */
232     chr = GetSmallInt("CHAR_INT", val);
233     if ( 255 < chr || chr < 0 ) {
234         ErrorMayQuit("<val> must be an integer between 0 and 255", 0, 0);
235     }
236 
237     /* return the character                                                */
238     return ObjsChar[chr];
239 }
240 
241 
242 /****************************************************************************
243 **
244 *F  FuncINT_CHAR( <self>, <char> )  . . . . . . . . . . . . . integer by char
245 */
FuncINT_CHAR(Obj self,Obj val)246 static Obj FuncINT_CHAR(Obj self, Obj val)
247 {
248     /* get and check the character                                         */
249     if (TNUM_OBJ(val) != T_CHAR) {
250         RequireArgument("INT_CHAR", val, "must be a character");
251     }
252 
253     /* return the character                                                */
254     return INTOBJ_INT(CHAR_VALUE(val));
255 }
256 
257 /****************************************************************************
258 **
259 *F  FuncCHAR_SINT( <self>, <int> ) . . . . . . . . . . char by signed integer
260 */
FuncCHAR_SINT(Obj self,Obj val)261 static Obj FuncCHAR_SINT(Obj self, Obj val)
262 {
263     Int chr;
264 
265     /* get and check the integer value                                     */
266     chr = GetSmallInt("CHAR_SINT", val);
267     if (127 < chr || chr < -128) {
268         ErrorMayQuit("<val> must be an integer between -128 and 127", 0, 0);
269     }
270 
271     /* return the character                                                */
272     return ObjsChar[CHAR_SINT(chr)];
273 }
274 
275 
276 /****************************************************************************
277 **
278 *F  FuncSINT_CHAR( <self>, <char> ) . . . . . . . . .  signed integer by char
279 */
FuncSINT_CHAR(Obj self,Obj val)280 static Obj FuncSINT_CHAR(Obj self, Obj val)
281 {
282     /* get and check the character                                         */
283     if (TNUM_OBJ(val) != T_CHAR) {
284         RequireArgument("SINT_CHAR", val, "must be a character");
285     }
286 
287     /* return the character                                                */
288     return INTOBJ_INT(SINT_CHAR(CHAR_VALUE(val)));
289 }
290 
291 /****************************************************************************
292 **
293 *F  FuncSINTLIST_STRING( <self>, <string> ) signed integer list by string
294 */
FuncINTLIST_STRING(Obj self,Obj val,Obj sign)295 static Obj FuncINTLIST_STRING(Obj self, Obj val, Obj sign)
296 {
297   UInt l,i;
298   Obj n, *addr;
299   const UInt1 *p;
300 
301   /* test whether val is a string, convert to compact rep if necessary */
302   RequireStringRep("INTLIST_STRING", val);
303 
304   l=GET_LEN_STRING(val);
305   n=NEW_PLIST(T_PLIST,l);
306   SET_LEN_PLIST(n,l);
307   p=CONST_CHARS_STRING(val);
308   addr=ADDR_OBJ(n);
309   /* signed or unsigned ? */
310   if (sign == INTOBJ_INT(1L)) {
311     for (i=1; i<=l; i++) {
312       addr[i] = INTOBJ_INT(p[i-1]);
313     }
314   }
315   else {
316     for (i=1; i<=l; i++) {
317       addr[i] = INTOBJ_INT(SINT_CHAR(p[i-1]));
318     }
319   }
320 
321   CHANGED_BAG(n);
322   return n;
323 }
324 
FuncSINTLIST_STRING(Obj self,Obj val)325 static Obj FuncSINTLIST_STRING(Obj self, Obj val)
326 {
327   return FuncINTLIST_STRING ( self, val, INTOBJ_INT(-1L) );
328 }
329 
330 /****************************************************************************
331 **
332 *F  FuncSTRING_SINTLIST( <self>, <string> ) string by signed integer list
333 */
FuncSTRING_SINTLIST(Obj self,Obj val)334 static Obj FuncSTRING_SINTLIST(Obj self, Obj val)
335 {
336   UInt l,i;
337   Int low, inc;
338   Obj n;
339   UInt1 *p;
340 
341   /* there should be a test here, but how do I check cheaply for list of
342    * integers ? */
343 
344   /* general code */
345   if (!IS_RANGE(val) && !IS_PLIST(val)) {
346   again:
347       RequireArgument("STRING_SINTLIST", val,
348                       "must be a plain list of small integers or a range");
349   }
350   if (! IS_RANGE(val) ) {
351     l=LEN_PLIST(val);
352     n=NEW_STRING(l);
353     p=CHARS_STRING(n);
354     for (i=1;i<=l;i++) {
355       Obj x = ELM_PLIST(val,i);
356       if (!IS_INTOBJ(x))
357         goto again;
358       *p++=CHAR_SINT(INT_INTOBJ(x));
359     }
360   }
361   else {
362     l=GET_LEN_RANGE(val);
363     low=GET_LOW_RANGE(val);
364     inc=GET_INC_RANGE(val);
365     n=NEW_STRING(l);
366     p=CHARS_STRING(n);
367     for (i=1;i<=l;i++) {
368       *p++=CHAR_SINT(low);
369       low=low+inc;
370     }
371 
372   }
373 
374   return n;
375 }
376 
377 /****************************************************************************
378 **
379 *F  FuncREVNEG_STRING( <self>, <string> ) string by signed integer list
380 */
FuncREVNEG_STRING(Obj self,Obj val)381 static Obj FuncREVNEG_STRING(Obj self, Obj val)
382 {
383   UInt l,i,j;
384   Obj n;
385   const UInt1 *p;
386   UInt1 *q;
387 
388   /* test whether val is a string, convert to compact rep if necessary */
389   RequireStringRep("REVNEG_STRING", val);
390 
391   l=GET_LEN_STRING(val);
392   n=NEW_STRING(l);
393   p=CONST_CHARS_STRING(val);
394   q=CHARS_STRING(n);
395   j=l-1;
396   for (i=1;i<=l;i++) {
397     *q++=-p[j];
398     j--;
399   }
400 
401   return n;
402 }
403 
404 /****************************************************************************
405 **
406 *F * * * * * * * * * * * * * * * string functions * * * * * * * * * * * * * *
407 */
408 
409 /****************************************************************************
410 **
411 *F  NEW_STRING( <len> )  . . . returns new string with length <len>, first
412 **  character and "first behind last" set to zero
413 **
414 */
NEW_STRING(Int len)415 Obj NEW_STRING(Int len)
416 {
417     GAP_ASSERT(len >= 0);
418     if (len > INT_INTOBJ_MAX) {
419         ErrorQuit("NEW_STRING: length must be a small integer", 0, 0);
420     }
421     Obj res = NewBag(T_STRING, SIZEBAG_STRINGLEN(len));
422     SET_LEN_STRING(res, len);
423     return res;
424 }
425 
426 /****************************************************************************
427 **
428 *F  GrowString(<list>,<len>) . . . . . .  make sure a string is large enough
429 **
430 **  returns the new length, but doesn't set SET_LEN_STRING.
431 */
GrowString(Obj list,UInt need)432 Int             GrowString (
433     Obj                 list,
434     UInt                need )
435 {
436     UInt                len;            /* new physical length             */
437     UInt                good;           /* good new physical length        */
438 
439     if (need > INT_INTOBJ_MAX)
440         ErrorMayQuit("GrowString: string length too large", 0, 0);
441 
442     /* find out how large the data area  should become                     */
443     good = 5 * (GET_LEN_STRING(list)+3) / 4 + 1;
444     if (good > INT_INTOBJ_MAX)
445         good = INT_INTOBJ_MAX;
446 
447     /* but maybe we need more                                              */
448     if ( need < good ) { len = good; }
449     else               { len = need; }
450 
451     /* resize the bag                                                      */
452     ResizeBag( list, SIZEBAG_STRINGLEN(len) );
453 
454     /* return the new maximal length                                       */
455     return (Int) len;
456 }
457 
458 /****************************************************************************
459 **
460 *F  TypeString(<list>)  . . . . . . . . . . . . . . . . . .  type of a string
461 **
462 **  'TypeString' returns the type of the string <list>.
463 **
464 **  'TypeString' is the function in 'TypeObjFuncs' for strings.
465 */
466 static Obj TYPES_STRING;
467 
468 
TypeString(Obj list)469 static Obj TypeString(Obj list)
470 {
471     return ELM_PLIST(TYPES_STRING, TNUM_OBJ(list) - T_STRING + 1);
472 }
473 
474 
475 
476 /****************************************************************************
477 **
478 *F * * * * * * * * * * * * * * copy functions * * * * * * * * * * * * * * * *
479 */
480 
481 #if !defined(USE_THREADSAFE_COPYING)
482 
483 /****************************************************************************
484 **
485 *F  CopyString( <list>, <mut> ) . . . . . . . . . . . . . . . . copy a string
486 **
487 **  'CopyString' returns a structural (deep) copy of the string <list>, i.e.,
488 **  a recursive copy that preserves the structure.
489 **
490 **  If <list> has not  yet  been copied, it makes   a copy, leaves  a forward
491 **  pointer to the copy in  the first entry of  the string, where the size of
492 **  the string usually resides,  and copies  all the  entries.  If the string
493 **  has already been copied, it returns the value of the forwarding pointer.
494 **
495 **  'CopyString' is the function in 'CopyObjFuncs' for strings.
496 */
CopyString(Obj list,Int mut)497 static Obj CopyString(Obj list, Int mut)
498 {
499     Obj                 copy;           /* handle of the copy, result      */
500 
501     // immutable input is handled by COPY_OBJ
502     GAP_ASSERT(IS_MUTABLE_OBJ(list));
503 
504     /* make object for  copy                                               */
505     copy = NewBag(TNUM_OBJ(list), SIZE_OBJ(list));
506     if (!mut)
507         MakeImmutableNoRecurse(copy);
508     ADDR_OBJ(copy)[0] = CONST_ADDR_OBJ(list)[0];
509 
510     /* leave a forwarding pointer                                          */
511     PrepareCopy(list, copy);
512 
513     /* copy the subvalues                                                  */
514     memcpy(ADDR_OBJ(copy)+1, CONST_ADDR_OBJ(list)+1,
515            SIZE_OBJ(list)-sizeof(Obj) );
516 
517     /* return the copy                                                     */
518     return copy;
519 }
520 
521 #endif //!defined(USE_THREADSAFE_COPYING)
522 
523 
524 /****************************************************************************
525 **
526 *F * * * * * * * * * * * * * * list functions * * * * * * * * * * * * * * * *
527 */
528 
529 /****************************************************************************
530 **
531 *F  PrintString(<list>) . . . . . . . . . . . . . . . . . . .  print a string
532 **
533 **  'PrintString' prints the string with the handle <list>.
534 **
535 **  No linebreaks are  allowed, if one must be inserted  anyhow, it must
536 **  be escaped by a backslash '\', which is done in 'Pr'.
537 **
538 **  The buffer 'PrStrBuf' is used to protect 'Pr' against garbage collections
539 **  caused by printing to string streams, which might move the body of list.
540 **
541 **  The output uses octal number notation for non-ascii or non-printable
542 **  characters. The function can be used to print *any* string in a way
543 **  which can be read in by GAP afterwards.
544 */
PrintString(Obj list)545 void PrintString(Obj list)
546 {
547     char  PrStrBuf[10007]; /* 7 for a \c\123 at the end */
548     UInt  scanout, n;
549     UInt1 c;
550     UInt  len = GET_LEN_STRING(list);
551     UInt  off = 0;
552     Pr("\"", 0L, 0L);
553     while (off < len) {
554         scanout = 0;
555         do {
556             c = CONST_CHARS_STRING(list)[off++];
557             switch (c) {
558             case '\\':
559                 PrStrBuf[scanout++] = '\\';
560                 PrStrBuf[scanout++] = '\\';
561                 break;
562             case '\"':
563                 PrStrBuf[scanout++] = '\\';
564                 PrStrBuf[scanout++] = '\"';
565                 break;
566             case '\n':
567                 PrStrBuf[scanout++] = '\\';
568                 PrStrBuf[scanout++] = 'n';
569                 break;
570             case '\t':
571                 PrStrBuf[scanout++] = '\\';
572                 PrStrBuf[scanout++] = 't';
573                 break;
574             case '\r':
575                 PrStrBuf[scanout++] = '\\';
576                 PrStrBuf[scanout++] = 'r';
577                 break;
578             case '\b':
579                 PrStrBuf[scanout++] = '\\';
580                 PrStrBuf[scanout++] = 'b';
581                 break;
582             case '\01':
583                 PrStrBuf[scanout++] = '\\';
584                 PrStrBuf[scanout++] = '>';
585                 break;
586             case '\02':
587                 PrStrBuf[scanout++] = '\\';
588                 PrStrBuf[scanout++] = '<';
589                 break;
590             case '\03':
591                 PrStrBuf[scanout++] = '\\';
592                 PrStrBuf[scanout++] = 'c';
593                 break;
594             default:
595                 if (c < 32 || c > 126) {
596                     PrStrBuf[scanout++] = '\\';
597                     n = c / 64;
598                     c = c - n * 64;
599                     PrStrBuf[scanout++] = n + '0';
600                     n = c / 8;
601                     c = c - n * 8;
602                     PrStrBuf[scanout++] = n + '0';
603                     PrStrBuf[scanout++] = c + '0';
604                 }
605                 else
606                     PrStrBuf[scanout++] = c;
607             }
608         } while (off < len && scanout < 10000);
609         PrStrBuf[scanout++] = '\0';
610         Pr("%s", (Int)PrStrBuf, 0L);
611     }
612     Pr("\"", 0L, 0L);
613 }
614 
615 
616 /****************************************************************************
617 **
618 *F  PrintString1(<list>)  . . . . . . . . . . . .  print a string for 'Print'
619 **
620 **  'PrintString1' prints the string  constant  in  the  format  used  by the
621 **  'Print' and 'PrintTo' function.
622 */
PrintString1(Obj list)623 void PrintString1 (
624     Obj                 list )
625 {
626   Pr("%g", (Int)list, 0L);
627 }
628 
629 
630 /****************************************************************************
631 **
632 *F  EqString(<listL>,<listR>) . . . . . . . .  test whether strings are equal
633 **
634 **  'EqString'  returns  'true' if the  two  strings <listL>  and <listR> are
635 **  equal and 'false' otherwise.
636 */
EqString(Obj listL,Obj listR)637 static Int EqString(Obj listL, Obj listR)
638 {
639   UInt lL, lR;
640   const UInt1 *pL, *pR;
641   lL = GET_LEN_STRING(listL);
642   lR = GET_LEN_STRING(listR);
643   if (lR != lL) return 0;
644   pL = CONST_CHARS_STRING(listL);
645   pR = CONST_CHARS_STRING(listR);
646   return memcmp(pL, pR, lL) == 0;
647 }
648 
649 
650 /****************************************************************************
651 **
652 *F  LtString(<listL>,<listR>) .  test whether one string is less than another
653 **
654 **  'LtString' returns 'true' if  the string <listL> is  less than the string
655 **  <listR> and 'false' otherwise.
656 */
LtString(Obj listL,Obj listR)657 static Int LtString(Obj listL, Obj listR)
658 {
659   UInt lL, lR;
660   const UInt1 *pL, *pR;
661   lL = GET_LEN_STRING(listL);
662   lR = GET_LEN_STRING(listR);
663   pL = CONST_CHARS_STRING(listL);
664   pR = CONST_CHARS_STRING(listR);
665 
666   Int res;
667   if (lL <= lR) {
668     res = memcmp(pL, pR, lL);
669     if (res == 0)
670       return lL < lR;
671   }
672   else {
673     res = memcmp(pL, pR, lR);
674     if (res == 0)
675       return 0;
676   }
677   return res < 0;
678 }
679 
680 
681 /****************************************************************************
682 **
683 *F  LenString(<list>) . . . . . . . . . . . . . . . . . .  length of a string
684 **
685 **  'LenString' returns the length of the string <list> as a C integer.
686 **
687 **  'LenString' is the function in 'LenListFuncs' for strings.
688 */
LenString(Obj list)689 static Int LenString(Obj list)
690 {
691     return GET_LEN_STRING( list );
692 }
693 
694 
695 /****************************************************************************
696 **
697 *F  IsbString(<list>,<pos>) . . . . . . . . . test for an element of a string
698 **
699 **  'IsbString' returns 1 if the string <list> contains
700 **  a character at the position <pos> and 0 otherwise.
701 **  It can rely on <pos> being a positive integer.
702 **
703 **  'IsbString'  is the function in 'IsbListFuncs'  for strings.
704 */
IsbString(Obj list,Int pos)705 static Int IsbString(Obj list, Int pos)
706 {
707     /* since strings are dense, this must only test for the length         */
708     return (pos <= GET_LEN_STRING(list));
709 }
710 
711 
712 /****************************************************************************
713 **
714 *F  GET_ELM_STRING( <list>, <pos> ) . . . . . . select an element of a string
715 **
716 **  'GET_ELM_STRING'  returns the  <pos>-th  element  of  the string  <list>.
717 **  <pos> must be  a positive integer  less than  or  equal to  the length of
718 **  <list>.
719 */
GET_ELM_STRING(Obj list,Int pos)720 static inline Obj GET_ELM_STRING(Obj list, Int pos)
721 {
722     GAP_ASSERT(IS_STRING_REP(list));
723     GAP_ASSERT(pos > 0);
724     GAP_ASSERT((UInt) pos <= GET_LEN_STRING(list));
725     UChar c = CONST_CHARS_STRING(list)[pos - 1];
726     return ObjsChar[c];
727 }
728 
729 
730 /****************************************************************************
731 **
732 *F  SET_ELM_STRING( <list>, <pos>, <val> ) . . . . set a character of a string
733 **
734 **  'SET_ELM_STRING'  sets the  <pos>-th  character  of  the string  <list>.
735 **  <val> must be a character and <list> stay a string after the assignment.
736 */
SET_ELM_STRING(Obj list,Int pos,Obj val)737 static inline void SET_ELM_STRING(Obj list, Int pos, Obj val)
738 {
739     GAP_ASSERT(IS_STRING_REP(list));
740     GAP_ASSERT(pos > 0);
741     GAP_ASSERT((UInt) pos <= GET_LEN_STRING(list));
742     GAP_ASSERT(TNUM_OBJ(val) == T_CHAR);
743     UChar * ptr = CHARS_STRING(list) + (pos - 1);
744     *ptr = CHAR_VALUE(val);
745 }
746 
747 
748 /****************************************************************************
749 **
750 *F  Elm0String(<list>,<pos>)  . . . . . . . . . select an element of a string
751 *F  Elm0vString(<list>,<pos>) . . . . . . . . . select an element of a string
752 **
753 **  'Elm0String' returns the element at the position <pos> of the string
754 **  <list>, or returns 0 if <list> has no assigned object at <pos>.
755 **  It can rely on <pos> being a positive integer.
756 **
757 **  'Elm0vString' does the same thing as 'Elm0String', but it can
758 **  also rely on <pos> not being larger than the length of <list>.
759 **
760 **  'Elm0String'  is the function on 'Elm0ListFuncs'  for strings.
761 **  'Elm0vString' is the function in 'Elm0vListFuncs' for strings.
762 */
Elm0String(Obj list,Int pos)763 static Obj Elm0String(Obj list, Int pos)
764 {
765     if ( pos <= GET_LEN_STRING( list ) ) {
766         return GET_ELM_STRING( list, pos );
767     }
768     else {
769         return 0;
770     }
771 }
772 
Elm0vString(Obj list,Int pos)773 static Obj Elm0vString(Obj list, Int pos)
774 {
775     return GET_ELM_STRING( list, pos );
776 }
777 
778 
779 /****************************************************************************
780 **
781 *F  ElmString(<list>,<pos>) . . . . . . . . . . select an element of a string
782 *F  ElmvString(<list>,<pos>)  . . . . . . . . . select an element of a string
783 **
784 **  'ElmString' returns the element at the position <pos> of the string
785 **  <list>, or signals an error if <list> has no assigned object at <pos>.
786 **  It can rely on <pos> being a positive integer.
787 **
788 **  'ElmvString' does the same thing as 'ElmString', but it can
789 **  also rely on <pos> not being larger than the length of <list>.
790 **
791 **  'ElmwString' does the same thing as 'ElmString', but it can
792 **  also rely on <list> having an assigned object at <pos>.
793 **
794 **  'ElmString'  is the function in 'ElmListFuncs'  for strings.
795 **  'ElmfString' is the function in 'ElmfListFuncs' for strings.
796 **  'ElmwString' is the function in 'ElmwListFuncs' for strings.
797 */
ElmString(Obj list,Int pos)798 static Obj ElmString(Obj list, Int pos)
799 {
800     /* check the position                                                  */
801     if ( GET_LEN_STRING( list ) < pos ) {
802         ErrorMayQuit("List Element: <list>[%d] must have an assigned value",
803                      (Int)pos, 0);
804     }
805 
806     /* return the selected element                                         */
807     return GET_ELM_STRING( list, pos );
808 }
809 
810 #define ElmvString      Elm0vString
811 
812 #define ElmwString      Elm0vString
813 
814 
815 /****************************************************************************
816 **
817 *F  ElmsString(<list>,<poss>) . . . . . . . .  select a sublist from a string
818 **
819 **  'ElmsString' returns a new list containing the  elements at the positions
820 **  given   in  the  list   <poss> from   the  string   <list>.   It  is  the
821 **  responsibility of the called to ensure that  <poss> is dense and contains
822 **  only positive integers.  An error is signalled if an element of <poss> is
823 **  larger than the length of <list>.
824 **
825 **  'ElmsString' is the function in 'ElmsListFuncs' for strings.
826 */
ElmsString(Obj list,Obj poss)827 static Obj ElmsString(Obj list, Obj poss)
828 {
829     Obj                 elms;         /* selected sublist, result        */
830     Int                 lenList;        /* length of <list>                */
831     Char                elm;            /* one element from <list>         */
832     Int                 lenPoss;        /* length of <positions>           */
833     Int                 pos;            /* <position> as integer           */
834     Int                 inc;            /* increment in a range            */
835     Int                 i;              /* loop variable                   */
836 
837     /* general code                                                        */
838     if ( ! IS_RANGE(poss) ) {
839 
840         /* get the length of <list>                                        */
841         lenList = GET_LEN_STRING( list );
842 
843         /* get the length of <positions>                                   */
844         lenPoss = LEN_LIST( poss );
845 
846         /* make the result list                                            */
847         elms = NEW_STRING( lenPoss );
848 
849         /* loop over the entries of <positions> and select                 */
850         for ( i = 1; i <= lenPoss; i++ ) {
851 
852             /* get <position>                                              */
853             Obj p = ELMW_LIST(poss, i);
854             if (!IS_INTOBJ(p)) {
855                 ErrorMayQuit("List Elements: position is too large for "
856                              "this type of list",
857                              0, 0);
858             }
859             pos = INT_INTOBJ(p);
860 
861             /* select the element                                          */
862             if ( lenList < pos ) {
863                 ErrorMayQuit(
864                     "List Elements: <list>[%d] must have an assigned value",
865                     (Int)pos, 0);
866             }
867 
868             /* select the element                                          */
869             elm = CONST_CHARS_STRING(list)[pos-1];
870 
871             /* assign the element into <elms>                              */
872             CHARS_STRING(elms)[i-1] = elm;
873 
874         }
875 
876     }
877 
878     /* special code for ranges                                             */
879     else {
880 
881         /* get the length of <list>                                        */
882         lenList = GET_LEN_STRING( list );
883 
884         /* get the length of <positions>, the first elements, and the inc. */
885         lenPoss = GET_LEN_RANGE( poss );
886         pos = GET_LOW_RANGE( poss );
887         inc = GET_INC_RANGE( poss );
888 
889         /* check that no <position> is larger than 'LEN_LIST(<list>)'      */
890         if ( lenList < pos ) {
891             ErrorMayQuit(
892                 "List Elements: <list>[%d] must have an assigned value",
893                 (Int)pos, 0);
894         }
895         if ( lenList < pos + (lenPoss-1) * inc ) {
896             ErrorMayQuit(
897                 "List Elements: <list>[%d] must have an assigned value",
898                 (Int)(pos + (lenPoss - 1) * inc), 0);
899         }
900 
901         /* make the result list                                            */
902         elms = NEW_STRING( lenPoss );
903 
904         /* loop over the entries of <positions> and select                 */
905         const UInt1 * p = CONST_CHARS_STRING(list);
906         UInt1 * pn = CHARS_STRING(elms);
907         for ( i = 1; i <= lenPoss; i++, pos += inc ) {
908             pn[i - 1] = p[pos - 1];
909         }
910 
911     }
912 
913     /* return the result                                                   */
914     return elms;
915 }
916 
917 
918 /****************************************************************************
919 **
920 *F  AssString(<list>,<pos>,<val>) . . . . . . . . . . . .  assign to a string
921 **
922 **  'AssString' assigns the value <val> to the  string <list> at the position
923 **  <pos>.   It is the responsibility  of the caller to  ensure that <pos> is
924 **  positive, and that <val> is not 0.
925 **
926 **  'AssString' is the function in 'AssListFuncs' for strings.
927 **
928 **  'AssString' keeps <list> in string representation if possible.
929 **
930 */
AssString(Obj list,Int pos,Obj val)931 static void AssString(Obj list, Int pos, Obj val)
932 {
933   UInt len = GET_LEN_STRING(list);
934 
935   if (TNUM_OBJ(val) != T_CHAR || pos > len+1) {
936     /* convert the range into a plain list                                 */
937     PLAIN_LIST(list);
938     CLEAR_FILTS_LIST(list);
939 
940     /* resize the list if necessary                                        */
941     if ( len < pos ) {
942       GROW_PLIST( list, pos );
943       SET_LEN_PLIST( list, pos );
944     }
945 
946     /* now perform the assignment and return the assigned value            */
947     SET_ELM_PLIST( list, pos, val );
948     CHANGED_BAG( list );
949   }
950   else {
951     CLEAR_FILTS_LIST(list);
952 
953     /* resize the list if necessary                                        */
954     if ( len < pos ) {
955       GROW_STRING( list, pos );
956       SET_LEN_STRING( list, pos );
957       CHARS_STRING(list)[pos] = (UInt1)0;
958     }
959 
960     /* now perform the assignment and return the assigned value            */
961     SET_ELM_STRING( list, pos, val );
962   }
963 }
964 
965 
966 /****************************************************************************
967 **
968 *F  AsssString(<list>,<poss>,<vals>)  . . assign several elements to a string
969 **
970 **  'AsssString' assignes the  values from the  list <vals> at the  positions
971 **  given in the list <poss> to the string  <list>.  It is the responsibility
972 **  of the caller to ensure that  <poss> is dense  and contains only positive
973 **  integers, that <poss> and <vals> have the same length, and that <vals> is
974 **  dense.
975 **
976 **  'AsssString' is the function in 'AsssListFuncs' for strings.
977 **
978 **  'AsssString' simply delegates to AssString. Note that the ordering of
979 **  <poss> can be important if <list> should stay in string representation.
980 **
981 */
AsssString(Obj list,Obj poss,Obj vals)982 static void AsssString(Obj list, Obj poss, Obj vals)
983 {
984   Int i, len = LEN_LIST(poss);
985   for (i = 1; i <= len; i++) {
986     ASS_LIST(list, INT_INTOBJ(ELM_LIST(poss, i)), ELM_LIST(vals, i));
987   }
988 }
989 
990 
991 /****************************************************************************
992 **
993 *F  IsSSortString(<list>) . . . . . . . strictly sorted list test for strings
994 **
995 **  'IsSSortString'  returns 1 if the string  <list> is strictly sorted and 0
996 **  otherwise.
997 **
998 **  'IsSSortString' is the function in 'IsSSortListFuncs' for strings.
999 */
IsSSortString(Obj list)1000 static Int IsSSortString(Obj list)
1001 {
1002     Int                 len;
1003     Int                 i;
1004     const UInt1 *       ptr;
1005 
1006     /* test whether the string is strictly sorted                          */
1007     len = GET_LEN_STRING( list );
1008     ptr = CONST_CHARS_STRING(list);
1009     for ( i = 1; i < len; i++ ) {
1010         if ( ! (ptr[i-1] < ptr[i]) )
1011             break;
1012     }
1013 
1014     /* retype according to the outcome                                     */
1015     SET_FILT_LIST( list, (len <= i) ? FN_IS_SSORT : FN_IS_NSORT );
1016     return (len <= i);
1017 }
1018 
1019 
1020 /****************************************************************************
1021 **
1022 *F  IsPossString(<list>)  . . . . .  positions list test function for strings
1023 **
1024 **  'IsPossString' is the function in 'IsPossListFuncs' for strings.
1025 */
IsPossString(Obj list)1026 static Int IsPossString(Obj list)
1027 {
1028     return GET_LEN_STRING( list ) == 0;
1029 }
1030 
1031 
1032 /****************************************************************************
1033 **
1034 *F  PosString(<list>,<val>,<pos>) . . . .  position of an element in a string
1035 **
1036 **  'PosString' returns the position of the  value <val> in the string <list>
1037 **  after the first position <start> as a C integer.   0 is returned if <val>
1038 **  is not in the list.
1039 **
1040 **  'PosString' is the function in 'PosListFuncs' for strings.
1041 */
PosString(Obj list,Obj val,Obj start)1042 static Obj PosString(Obj list, Obj val, Obj start)
1043 {
1044     Int                 lenList;        /* length of <list>                */
1045     Int                 i;              /* loop variable                   */
1046     UInt1               valc;        /* C characters                    */
1047     const UInt1         *p;             /* pointer to chars of <list>      */
1048     UInt                istart;
1049 
1050     /* if the starting position is too big to be a small int
1051        then there can't be anything to find */
1052     if (!IS_INTOBJ(start))
1053       return Fail;
1054 
1055     istart = INT_INTOBJ(start);
1056 
1057     /* get the length of <list>                                            */
1058     lenList = GET_LEN_STRING( list );
1059 
1060     /* a string contains only characters */
1061     if (TNUM_OBJ(val) != T_CHAR) return Fail;
1062 
1063     /* val as C character   */
1064     valc = CHAR_VALUE(val);
1065 
1066     /* search entries in <list>                                     */
1067     p = CONST_CHARS_STRING(list);
1068     for ( i = istart; i < lenList && p[i] != valc; i++ );
1069 
1070     /* return the position (0 if <val> was not found)                      */
1071     return (lenList <= i ? Fail : INTOBJ_INT(i+1));
1072 }
1073 
1074 
1075 /****************************************************************************
1076 **
1077 *F  PlainString(<list>) . . . . . . . . . .  convert a string to a plain list
1078 **
1079 **  'PlainString' converts the string <list> to a plain list.  Not much work.
1080 **
1081 **  'PlainString' is the function in 'PlainListFuncs' for strings.
1082 */
PlainString(Obj list)1083 static void PlainString(Obj list)
1084 {
1085     Int                 lenList;        /* logical length of the string    */
1086     Obj                 tmp;            /* handle of the list              */
1087     Int                 i;              /* loop variable                   */
1088 
1089     /* find the length and allocate a temporary copy                       */
1090     lenList = GET_LEN_STRING( list );
1091     tmp = NEW_PLIST_WITH_MUTABILITY(IS_MUTABLE_OBJ(list), T_PLIST, lenList);
1092     SET_LEN_PLIST( tmp, lenList );
1093 
1094     /* copy the characters                                                 */
1095     for ( i = 1; i <= lenList; i++ ) {
1096         SET_ELM_PLIST( tmp, i, GET_ELM_STRING( list, i ) );
1097     }
1098 
1099     /* change size and type of the string and copy back                    */
1100     ResizeBag( list, SIZE_OBJ(tmp) );
1101     RetypeBag( list, TNUM_OBJ(tmp) );
1102 
1103     memcpy(ADDR_OBJ(list), CONST_ADDR_OBJ(tmp), SIZE_OBJ(tmp));
1104     CHANGED_BAG(list);
1105 }
1106 
1107 
1108 /****************************************************************************
1109 **
1110 *F  IS_STRING( <obj> )  . . . . . . . . . . . . test if an object is a string
1111 **
1112 **  'IS_STRING' returns 1  if the object <obj>  is a string  and 0 otherwise.
1113 **  It does not change the representation of <obj>.
1114 */
1115 Int (*IsStringFuncs [LAST_REAL_TNUM+1]) ( Obj obj );
1116 
1117 static Obj IsStringFilt;
1118 
IsStringList(Obj list)1119 static Int IsStringList(Obj list)
1120 {
1121     Int                 lenList;
1122     Obj                 elm;
1123     Int                 i;
1124 
1125     lenList = LEN_LIST( list );
1126     for ( i = 1; i <= lenList; i++ ) {
1127         elm = ELMV0_LIST( list, i );
1128         if ( elm == 0 )
1129             break;
1130 #ifdef HPCGAP
1131         if ( !CheckReadAccess(elm) )
1132             break;
1133 #endif
1134         if ( TNUM_OBJ( elm ) != T_CHAR )
1135             break;
1136     }
1137 
1138     return (lenList < i);
1139 }
1140 
IsStringListHom(Obj list)1141 static Int IsStringListHom(Obj list)
1142 {
1143     return (TNUM_OBJ( ELM_LIST(list,1) ) == T_CHAR);
1144 }
1145 
IsStringObject(Obj obj)1146 static Int IsStringObject(Obj obj)
1147 {
1148     return (DoFilter( IsStringFilt, obj ) != False);
1149 }
1150 
1151 
1152 /****************************************************************************
1153 **
1154 *F  CopyToStringRep( <string> ) . . .  copy a string to string representation
1155 **
1156 **  'CopyToStringRep' copies the string <string> to a new string in string
1157 **  representation.
1158 */
CopyToStringRep(Obj string)1159 Obj CopyToStringRep(
1160     Obj                 string )
1161 {
1162     Int                 lenString;      /* length of the string            */
1163     Obj                 elm;            /* one element of the string       */
1164     Obj                 copy;           /* temporary string                */
1165     Int                 i;              /* loop variable                   */
1166 
1167     lenString = LEN_LIST(string);
1168     copy = NEW_STRING(lenString);
1169 
1170     if ( IS_STRING_REP(string) ) {
1171         memcpy(CHARS_STRING(copy), CONST_CHARS_STRING(string),
1172             GET_LEN_STRING(string));
1173         /* XXX no error checks? */
1174     } else {
1175         /* copy the string to the string representation                     */
1176         for ( i = 1; i <= lenString; i++ ) {
1177             elm = ELMW_LIST( string, i );
1178             CHARS_STRING(copy)[i-1] = CHAR_VALUE(elm);
1179         }
1180         CHARS_STRING(copy)[lenString] = '\0';
1181     }
1182     return copy;
1183 }
1184 
1185 
1186 /****************************************************************************
1187 **
1188 *F  ImmutableString( <string> ) . . . copy to immutable string in string rep.
1189 **
1190 **  'ImmutableString' returns an immutable string in string representation
1191 **  equal to <string>. This may return <string> if it already satisfies these
1192 **  criteria.
1193 */
ImmutableString(Obj string)1194 Obj ImmutableString(Obj string)
1195 {
1196     if (!IS_STRING_REP(string) || IS_MUTABLE_OBJ(string)) {
1197         string = CopyToStringRep(string);
1198         MakeImmutableNoRecurse(string);
1199     }
1200     return string;
1201 }
1202 
1203 
1204 /****************************************************************************
1205 **
1206 *F  ConvString( <string> ) . . . .  convert a string to string representation
1207 **
1208 **  'ConvString' converts the string <string> to string representation.
1209 */
ConvString(Obj string)1210 void ConvString (
1211     Obj                 string )
1212 {
1213     Int                 lenString;      /* length of the string            */
1214     Obj                 elm;            /* one element of the string       */
1215     Obj                 tmp;            /* temporary string                */
1216     Int                 i;              /* loop variable                   */
1217 
1218     /* do nothing if the string is already in the string representation    */
1219     if ( IS_STRING_REP(string) )
1220     {
1221         return;
1222     }
1223 
1224 
1225     lenString = LEN_LIST(string);
1226     tmp = NEW_STRING(lenString);
1227 
1228     /* copy the string to the string representation                     */
1229     for ( i = 1; i <= lenString; i++ ) {
1230         elm = ELMW_LIST( string, i );
1231         CHARS_STRING(tmp)[i-1] = CHAR_VALUE(elm);
1232     }
1233     CHARS_STRING(tmp)[lenString] = '\0';
1234 
1235     /* copy back to string  */
1236     RetypeBagSM( string, T_STRING );
1237     ResizeBag( string, SIZEBAG_STRINGLEN(lenString) );
1238     /* copy data area from tmp */
1239     memcpy(ADDR_OBJ(string), CONST_ADDR_OBJ(tmp), SIZE_OBJ(tmp));
1240 }
1241 
1242 
1243 
1244 /****************************************************************************
1245 **
1246 *F  IsStringConv( <obj> ) . . . . . test if an object is a string and convert
1247 **
1248 **  'IsStringConv'   returns 1  if   the object <obj>  is   a  string,  and 0
1249 **  otherwise.   If <obj> is a  string it  changes  its representation to the
1250 **  string representation.
1251 */
IsStringConv(Obj obj)1252 Int IsStringConv (
1253     Obj                 obj )
1254 {
1255     Int                 res;
1256 
1257     /* test whether the object is a string                                 */
1258     res = IS_STRING( obj );
1259 
1260     /* if so, convert it to the string representation                      */
1261     if ( res ) {
1262         ConvString( obj );
1263     }
1264 
1265     /* return the result                                                   */
1266     return res;
1267 }
1268 
1269 
1270 /****************************************************************************
1271 **
1272 *F * * * * * * * * * * * * * * GAP level functions  * * * * * * * * * * * * *
1273 */
1274 
1275 /****************************************************************************
1276 **
1277 *F  FiltIS_STRING( <self>, <obj> )  . . . . . . . . .  test value is a string
1278 */
FiltIS_STRING(Obj self,Obj obj)1279 static Obj FiltIS_STRING(Obj self, Obj obj)
1280 {
1281     return (IS_STRING( obj ) ? True : False);
1282 }
1283 
1284 
1285 /****************************************************************************
1286 **
1287 *F  FuncIS_STRING_CONV( <self>, <obj> ) . . . . . . . . . . check and convert
1288 */
FuncIS_STRING_CONV(Obj self,Obj obj)1289 static Obj FuncIS_STRING_CONV(Obj self, Obj obj)
1290 {
1291     /* return 'true' if <obj> is a string and 'false' otherwise            */
1292     return (IsStringConv(obj) ? True : False);
1293 }
1294 
1295 
1296 /****************************************************************************
1297 **
1298 *F  FuncCONV_STRING( <self>, <string> ) . . . . . . . . convert to string rep
1299 */
FuncCONV_STRING(Obj self,Obj string)1300 static Obj FuncCONV_STRING(Obj self, Obj string)
1301 {
1302     /* check whether <string> is a string                                  */
1303     if (!IS_STRING(string)) {
1304         RequireArgument("ConvString", string, "must be a string");
1305     }
1306 
1307     /* convert to the string representation                                */
1308     ConvString( string );
1309 
1310     /* return nothing                                                      */
1311     return 0;
1312 }
1313 
1314 
1315 /****************************************************************************
1316 **
1317 *F  FiltIS_STRING_REP( <self>, <obj> )  . . . . test if value is a string rep
1318 */
1319 static Obj IsStringRepFilt;
1320 
FiltIS_STRING_REP(Obj self,Obj obj)1321 static Obj FiltIS_STRING_REP(Obj self, Obj obj)
1322 {
1323     return (IS_STRING_REP( obj ) ? True : False);
1324 }
1325 
1326 /****************************************************************************
1327 **
1328 *F  FuncCOPY_TO_STRING_REP( <self>, <obj> ) . copy a string into string rep
1329 */
FuncCOPY_TO_STRING_REP(Obj self,Obj string)1330 static Obj FuncCOPY_TO_STRING_REP(Obj self, Obj string)
1331 {
1332     /* check whether <string> is a string                                 */
1333     if (!IS_STRING(string)) {
1334         RequireArgument("CopyToStringRep", string, "must be a string");
1335     }
1336     return CopyToStringRep(string);
1337 }
1338 
1339 /****************************************************************************
1340 **
1341 *F  FuncPOSITION_SUBSTRING( <self>,  <string>, <substr>, <off> ) .  position of
1342 **  substring
1343 **
1344 **  <str> and <substr> must be strings  and <off> an integer. The position
1345 **  of  first  character of substring   in string,  search  starting  from
1346 **  <off>+1, is  returned if such  a substring exists. Otherwise `fail' is
1347 **  returned.
1348 */
FuncPOSITION_SUBSTRING(Obj self,Obj string,Obj substr,Obj off)1349 static Obj FuncPOSITION_SUBSTRING(Obj self, Obj string, Obj substr, Obj off)
1350 {
1351   Int    ipos, i, j, lens, lenss, max;
1352   const UInt1  *s, *ss;
1353 
1354   /* check whether <string> is a string                                  */
1355   RequireStringRep("POSITION_SUBSTRING", string);
1356 
1357   /* check whether <substr> is a string                        */
1358   RequireStringRep("POSITION_SUBSTRING", substr);
1359 
1360   /* check wether <off> is a non-negative integer  */
1361   RequireNonnegativeSmallInt("POSITION_SUBSTRING", off);
1362   ipos = INT_INTOBJ(off);
1363 
1364   /* special case for the empty string */
1365   lenss = GET_LEN_STRING(substr);
1366   if ( lenss == 0 ) {
1367     return INTOBJ_INT(ipos + 1);
1368   }
1369 
1370   lens = GET_LEN_STRING(string);
1371   max = lens - lenss + 1;
1372   s = CONST_CHARS_STRING(string);
1373   ss = CONST_CHARS_STRING(substr);
1374 
1375   const UInt1 c = ss[0];
1376   for (i = ipos; i < max; i++) {
1377     if (c == s[i]) {
1378       for (j = 1; j < lenss; j++) {
1379         if (! (s[i+j] == ss[j]))
1380           break;
1381       }
1382       if (j == lenss)
1383         return INTOBJ_INT(i+1);
1384     }
1385   }
1386   return Fail;
1387 }
1388 
1389 /****************************************************************************
1390 **
1391 *F  FuncNormalizeWhitespace( <self>, <string> ) . . . . . normalize white
1392 **  space in place
1393 **
1394 **  Whitespace  characters are  " \r\t\n".  Leading and  trailing whitespace  in
1395 **  string  is  removed. Intermediate  sequences  of  whitespace characters  are
1396 **  substituted by a single space.
1397 **
1398 */
FuncNormalizeWhitespace(Obj self,Obj string)1399 static Obj FuncNormalizeWhitespace(Obj self, Obj string)
1400 {
1401   UInt1  *s, c;
1402   Int i, j, len, white;
1403 
1404   /* check whether <string> is a string                                  */
1405   RequireStringRep("NormalizeWhitespace", string);
1406 
1407   len = GET_LEN_STRING(string);
1408   s = CHARS_STRING(string);
1409   i = -1;
1410   white = 1;
1411   for (j = 0; j < len; j++) {
1412     c = s[j];
1413     if (c == ' ' || c == '\n' || c == '\t' || c == '\r') {
1414       if (! white) {
1415         i++;
1416         s[i] = ' ';
1417         white = 1;
1418       }
1419     }
1420     else {
1421       i++;
1422       s[i] = c;
1423       white = 0;
1424     }
1425   }
1426   if (white && i > -1)
1427     i--;
1428   s[i+1] = '\0';
1429   SET_LEN_STRING(string, i+1);
1430 
1431   /* to make it useful as C-string */
1432   CHARS_STRING(string)[i+1] = (UInt1)0;
1433 
1434   return (Obj)0;
1435 }
1436 
1437 
1438 /****************************************************************************
1439 **
1440 *F  FuncREMOVE_CHARACTERS( <self>, <string>, <rem> ) . . . . . delete characters
1441 **  from <rem> in <string> in place
1442 **
1443 */
1444 
FuncREMOVE_CHARACTERS(Obj self,Obj string,Obj rem)1445 static Obj FuncREMOVE_CHARACTERS(Obj self, Obj string, Obj rem)
1446 {
1447   UInt1  *s;
1448   Int i, j, len;
1449   UInt1 REMCHARLIST[256] = {0};
1450 
1451   /* check whether <string> is a string                                  */
1452   RequireStringRep("RemoveCharacters", string);
1453 
1454   /* check whether <rem> is a string                                  */
1455   RequireStringRep("RemoveCharacters", rem);
1456 
1457   /* set REMCHARLIST by setting positions of characters in rem to 1 */
1458   len = GET_LEN_STRING(rem);
1459   s = CHARS_STRING(rem);
1460   for(i=0; i<len; i++) REMCHARLIST[s[i]] = 1;
1461 
1462   /* now change string in place */
1463   len = GET_LEN_STRING(string);
1464   s = CHARS_STRING(string);
1465   i = -1;
1466   for (j = 0; j < len; j++) {
1467     if (REMCHARLIST[s[j]] == 0) {
1468       i++;
1469       s[i] = s[j];
1470     }
1471   }
1472   i++;
1473   s[i] = '\0';
1474   SET_LEN_STRING(string, i);
1475   SHRINK_STRING(string);
1476 
1477   return (Obj)0;
1478 }
1479 
1480 
1481 /****************************************************************************
1482 **
1483 *F  FuncTranslateString( <self>, <string>, <trans> ) . . . translate characters
1484 **  in <string> in place, <string>[i] = <trans>[<string>[i]]
1485 **
1486 */
FuncTranslateString(Obj self,Obj string,Obj trans)1487 static Obj FuncTranslateString(Obj self, Obj string, Obj trans)
1488 {
1489   Int j, len;
1490 
1491   /* check whether <string> is a string                                  */
1492   RequireStringRep("TranslateString", string);
1493 
1494   // check whether <trans> is a string of length at least 256
1495   RequireStringRep("TranslateString", trans);
1496   if ( GET_LEN_STRING( trans ) < 256 ) {
1497       ErrorMayQuit("TranslateString: <trans> must have length >= 256",
1498                    0, 0 );
1499   }
1500 
1501   /* now change string in place */
1502   len = GET_LEN_STRING(string);
1503   UInt1 *s = CHARS_STRING(string);
1504   const UInt1 *t = CONST_CHARS_STRING(trans);
1505   for (j = 0; j < len; j++) {
1506     s[j] = t[s[j]];
1507   }
1508 
1509   return (Obj)0;
1510 }
1511 
1512 
1513 /****************************************************************************
1514 **
1515 *F  FuncSplitStringInternal( <self>, <string>, <seps>, <wspace> ) . . . . split string
1516 **  at characters in <seps> and <wspace>
1517 **
1518 **  The difference of <seps> and <wspace> is that characters in <wspace> don't
1519 **  separate empty strings.
1520 */
FuncSplitStringInternal(Obj self,Obj string,Obj seps,Obj wspace)1521 static Obj FuncSplitStringInternal(Obj self, Obj string, Obj seps, Obj wspace)
1522 {
1523   const UInt1  *s;
1524   Int i, a, z, l, pos, len;
1525   Obj res, part;
1526   UInt1 SPLITSTRINGSEPS[256] = { 0 };
1527   UInt1 SPLITSTRINGWSPACE[256] = { 0 };
1528 
1529   /* check whether <string> is a string                                  */
1530   RequireStringRep("SplitString", string);
1531 
1532   /* check whether <seps> is a string                                  */
1533   RequireStringRep("SplitString", seps);
1534 
1535   /* check whether <wspace> is a string                                  */
1536   RequireStringRep("SplitString", wspace);
1537 
1538   /* set SPLITSTRINGSEPS by setting positions of characters in rem to 1 */
1539   len = GET_LEN_STRING(seps);
1540   s = CONST_CHARS_STRING(seps);
1541   for(i=0; i<len; i++) SPLITSTRINGSEPS[s[i]] = 1;
1542 
1543   /* set SPLITSTRINGWSPACE by setting positions of characters in rem to 1 */
1544   len = GET_LEN_STRING(wspace);
1545   s = CONST_CHARS_STRING(wspace);
1546   for(i=0; i<len; i++) SPLITSTRINGWSPACE[s[i]] = 1;
1547 
1548   /* create the result (list of strings) */
1549   res = NEW_PLIST(T_PLIST, 2);
1550   pos = 0;
1551 
1552   /* now do the splitting */
1553   len = GET_LEN_STRING(string);
1554   s = CONST_CHARS_STRING(string);
1555   for (a=0, z=0; z<len; z++) {
1556     // Whenever we encounter a separator or a white space, the substring
1557     // starting after the last separator/white space is cut out.  The
1558     // only difference between white spaces and separators is that white
1559     // spaces don't separate empty strings.
1560     if (SPLITSTRINGWSPACE[s[z]] == 1) {
1561       if (a<z) {
1562         l = z-a;
1563         part = NEW_STRING(l);
1564         // update s in case there was a garbage collection
1565         s = CONST_CHARS_STRING(string);
1566         COPY_CHARS(part, s + a, l);
1567         CHARS_STRING(part)[l] = 0;
1568         pos++;
1569         AssPlist(res, pos, part);
1570         s = CONST_CHARS_STRING(string);
1571         a = z+1;
1572       }
1573       else {
1574         a = z+1;
1575       }
1576     }
1577     else {
1578       if (SPLITSTRINGSEPS[s[z]] == 1) {
1579         l = z-a;
1580         part = NEW_STRING(l);
1581         // update s in case there was a garbage collection
1582         s = CONST_CHARS_STRING(string);
1583         COPY_CHARS(part, s + a, l);
1584         CHARS_STRING(part)[l] = 0;
1585         pos++;
1586         AssPlist(res, pos, part);
1587         s = CONST_CHARS_STRING(string);
1588         a = z+1;
1589       }
1590     }
1591   }
1592 
1593   // Pick up a substring at the end of the string.  Note that a trailing
1594   // separator does not produce an empty string.
1595   if (a<z) {
1596     /* copy until last position which is z-1 */
1597     l = z-a;
1598     part = NEW_STRING(l);
1599     s = CONST_CHARS_STRING(string);
1600     COPY_CHARS(part, s + a, l);
1601     CHARS_STRING(part)[l] = 0;
1602     pos++;
1603     AssPlist(res, pos, part);
1604   }
1605 
1606   return res;
1607 }
1608 
1609 #ifdef HPCGAP
1610 
1611 /****************************************************************************
1612 **
1613 *F FuncFIND_ALL_IN_STRING( <self>, <string>, <chars> )
1614 **
1615 ** Kernel function to return a list of all occurrences of a set of characters
1616 ** within a string.
1617 */
1618 
FuncFIND_ALL_IN_STRING(Obj self,Obj string,Obj chars)1619 static Obj FuncFIND_ALL_IN_STRING(Obj self, Obj string, Obj chars)
1620 {
1621   Obj result;
1622   UInt i, len, matches;
1623   unsigned char table[1<<(8*sizeof(char))];
1624   const UInt1 *s;
1625   if (!IsStringConv(string) || !IsStringConv(chars))
1626     ErrorQuit("FIND_ALL_IN_STRING: Requires two string arguments", 0L, 0L);
1627   memset(table, 0, sizeof(table));
1628   len = GET_LEN_STRING(chars);
1629   s = CONST_CHARS_STRING(chars);
1630   for (i=0; i<len; i++)
1631     table[s[i]] = 1;
1632   len = GET_LEN_STRING(string);
1633   s = CONST_CHARS_STRING(string);
1634   matches = 0;
1635   for (i = 0; i < len; i++)
1636     if (table[s[i]])
1637       matches++;
1638   result = NEW_PLIST(T_PLIST_DENSE, matches);
1639   SET_LEN_PLIST(result, matches);
1640   matches = 1;
1641   for (i = 0; i < len; i++)
1642     if (table[s[i]]) {
1643       SET_ELM_PLIST(result, matches, INTOBJ_INT(i+1));
1644       matches++;
1645     }
1646   return result;
1647 }
1648 
1649 /****************************************************************************
1650 **
1651 *F FuncNORMALIZE_NEWLINES( <self>, <string> )
1652 **
1653 ** Kernel function to replace all occurrences of CR or CRLF within a
1654 ** string with LF characters. This function modifies its argument and
1655 ** returns it also as its result.
1656 */
1657 
FuncNORMALIZE_NEWLINES(Obj self,Obj string)1658 static Obj FuncNORMALIZE_NEWLINES(Obj self, Obj string)
1659 {
1660   UInt i, j, len;
1661   Char *s;
1662   if (!IsStringConv(string) || !REGION(string))
1663     ErrorQuit("NORMALIZE_NEWLINES: Requires a mutable string argument", 0L, 0L);
1664   len = GET_LEN_STRING(string);
1665   s = CSTR_STRING(string);
1666   for (i = j = 0; i < len; i++) {
1667     if (s[i] == '\r') {
1668       s[j++] = '\n';
1669       if (i + 1 < len && s[i+1] == '\n')
1670         i++;
1671     } else {
1672       s[j++] = s[i];
1673     }
1674   }
1675   SET_LEN_STRING(string, j);
1676   return string;
1677 }
1678 
1679 #endif
1680 
1681 /****************************************************************************
1682 **
1683 *F FuncSMALLINT_STR( <self>, <string> )
1684 **
1685 ** Kernel function to extract parse small integers from strings. Needed before
1686 ** we can conveniently have Int working for things like parsing command line
1687 ** options
1688 */
1689 
FuncSMALLINT_STR(Obj self,Obj str)1690 static Obj FuncSMALLINT_STR(Obj self, Obj str)
1691 {
1692   const Char *string = CONST_CSTR_STRING(str);
1693   Int x = 0;
1694   Int sign = 1;
1695   while (isspace((unsigned int)*string))
1696     string++;
1697   if (*string == '-') {
1698     sign = -1;
1699     string++;
1700   } else if (*string == '+') {
1701     string++;
1702   }
1703   while (IsDigit(*string)) {
1704     x *= 10;
1705     x += (*string - '0');
1706     string++;
1707   }
1708   return INTOBJ_INT(sign*x);
1709 }
1710 
1711 
1712 /****************************************************************************
1713 **
1714 *F  UnbString( <string>, <pos> ) . . . . . .  unbind an element from a string
1715 **
1716 **  This is to avoid unpacking of the string to a plain list when <pos> is
1717 **  larger or equal to the length of <string>.
1718 */
UnbString(Obj string,Int pos)1719 static void UnbString(Obj string, Int pos)
1720 {
1721     GAP_ASSERT(IS_MUTABLE_OBJ(string));
1722     const Int len = GET_LEN_STRING(string);
1723     if (len == pos) {
1724         // maybe the string becomes sorted
1725         CLEAR_FILTS_LIST(string);
1726         CHARS_STRING(string)[pos - 1] = (UInt1)0;
1727         SET_LEN_STRING(string, len - 1);
1728     }
1729     else if (pos < len) {
1730         PLAIN_LIST(string);
1731         UNB_LIST(string, pos);
1732     }
1733 }
1734 
1735 
1736 /****************************************************************************
1737 **
1738 *F * * * * * * * * * * * * * initialize module * * * * * * * * * * * * * * * */
1739 
1740 /****************************************************************************
1741 **
1742 *V  BagNames  . . . . . . . . . . . . . . . . . . . . . . . list of bag names
1743 */
1744 static StructBagNames BagNames[] = {
1745   { T_CHAR,                           "character"                      },
1746   { T_STRING,                         "list (string)"                  },
1747   { T_STRING              +IMMUTABLE, "list (string,imm)"              },
1748   { T_STRING_SSORT,                   "list (string,ssort)"            },
1749   { T_STRING_SSORT        +IMMUTABLE, "list (string,ssort,imm)"        },
1750   { T_STRING_NSORT,                   "list (string,nsort)"            },
1751   { T_STRING_NSORT        +IMMUTABLE, "list (string,nsort,imm)"        },
1752   { -1,                               ""                               }
1753 };
1754 
1755 
1756 /****************************************************************************
1757 **
1758 *V  ClearFiltsTab . . . . . . . . . . . . . . . . . . . .  clear filter tnums
1759 */
1760 static Int ClearFiltsTab [] = {
1761     T_STRING,                 T_STRING,
1762     T_STRING_NSORT,           T_STRING,
1763     T_STRING_SSORT,           T_STRING,
1764     -1,                       -1
1765 };
1766 
1767 
1768 /****************************************************************************
1769 **
1770 *V  HasFiltTab  . . . . . . . . . . . . . . . . . . . . .  tester filter tnum
1771 */
1772 static Int HasFiltTab [] = {
1773 
1774     // string
1775     T_STRING,                  FN_IS_DENSE,   1,
1776     T_STRING,                  FN_IS_NDENSE,  0,
1777     T_STRING,                  FN_IS_HOMOG,   1,
1778     T_STRING,                  FN_IS_NHOMOG,  0,
1779     T_STRING,                  FN_IS_TABLE,   0,
1780     T_STRING,                  FN_IS_RECT,    0,
1781     T_STRING,                  FN_IS_SSORT,   0,
1782     T_STRING,                  FN_IS_NSORT,   0,
1783 
1784     // ssort string
1785     T_STRING_SSORT,            FN_IS_DENSE,   1,
1786     T_STRING_SSORT,            FN_IS_NDENSE,  0,
1787     T_STRING_SSORT,            FN_IS_HOMOG,   1,
1788     T_STRING_SSORT,            FN_IS_NHOMOG,  0,
1789     T_STRING_SSORT,            FN_IS_TABLE,   0,
1790     T_STRING_SSORT,            FN_IS_RECT,    0,
1791     T_STRING_SSORT,            FN_IS_SSORT,   1,
1792     T_STRING_SSORT,            FN_IS_NSORT,   0,
1793 
1794     // nsort string
1795     T_STRING_NSORT,            FN_IS_DENSE,   1,
1796     T_STRING_NSORT,            FN_IS_NDENSE,  0,
1797     T_STRING_NSORT,            FN_IS_HOMOG,   1,
1798     T_STRING_NSORT,            FN_IS_NHOMOG,  0,
1799     T_STRING_NSORT,            FN_IS_TABLE,   0,
1800     T_STRING_NSORT,            FN_IS_RECT,    0,
1801     T_STRING_NSORT,            FN_IS_SSORT,   0,
1802     T_STRING_NSORT,            FN_IS_NSORT,   1,
1803 
1804     -1,                        -1,            -1
1805 };
1806 
1807 
1808 /****************************************************************************
1809 **
1810 *V  SetFiltTab  . . . . . . . . . . . . . . . . . . . . .  setter filter tnum
1811 */
1812 static Int SetFiltTab [] = {
1813 
1814     // string
1815     T_STRING,                  FN_IS_DENSE,   T_STRING,
1816     T_STRING,                  FN_IS_NDENSE,  -1,
1817     T_STRING,                  FN_IS_HOMOG,   T_STRING,
1818     T_STRING,                  FN_IS_NHOMOG,  -1,
1819     T_STRING,                  FN_IS_TABLE,   -1,
1820     T_STRING,                  FN_IS_RECT,    -1,
1821     T_STRING,                  FN_IS_SSORT,   T_STRING_SSORT,
1822     T_STRING,                  FN_IS_NSORT,   T_STRING_NSORT,
1823 
1824     // ssort string
1825     T_STRING_SSORT,            FN_IS_DENSE,   T_STRING_SSORT,
1826     T_STRING_SSORT,            FN_IS_NDENSE,  -1,
1827     T_STRING_SSORT,            FN_IS_HOMOG,   T_STRING_SSORT,
1828     T_STRING_SSORT,            FN_IS_NHOMOG,  -1,
1829     T_STRING_SSORT,            FN_IS_TABLE,   -1,
1830     T_STRING_SSORT,            FN_IS_RECT,    -1,
1831     T_STRING_SSORT,            FN_IS_SSORT,   T_STRING_SSORT,
1832     T_STRING_SSORT,            FN_IS_NSORT,   -1,
1833 
1834     // nsort string
1835     T_STRING_NSORT,            FN_IS_DENSE,   T_STRING_NSORT,
1836     T_STRING_NSORT,            FN_IS_NDENSE,  -1,
1837     T_STRING_NSORT,            FN_IS_HOMOG,   T_STRING_NSORT,
1838     T_STRING_NSORT,            FN_IS_NHOMOG,  -1,
1839     T_STRING_NSORT,            FN_IS_TABLE,   -1,
1840     T_STRING_NSORT,            FN_IS_RECT,    -1,
1841     T_STRING_NSORT,            FN_IS_SSORT,   -1,
1842     T_STRING_NSORT,            FN_IS_NSORT,   T_STRING_NSORT,
1843 
1844     -1,                        -1,            -1
1845 
1846 };
1847 
1848 
1849 /****************************************************************************
1850 **
1851 *V  ResetFiltTab  . . . . . . . . . . . . . . . . . . .  unsetter filter tnum
1852 */
1853 static Int ResetFiltTab [] = {
1854 
1855     // string
1856     T_STRING,                  FN_IS_DENSE,   T_STRING,
1857     T_STRING,                  FN_IS_NDENSE,  T_STRING,
1858     T_STRING,                  FN_IS_HOMOG,   T_STRING,
1859     T_STRING,                  FN_IS_NHOMOG,  T_STRING,
1860     T_STRING,                  FN_IS_TABLE,   T_STRING,
1861     T_STRING,                  FN_IS_RECT,    T_STRING,
1862     T_STRING,                  FN_IS_SSORT,   T_STRING,
1863     T_STRING,                  FN_IS_NSORT,   T_STRING,
1864 
1865     // ssort string
1866     T_STRING_SSORT,            FN_IS_DENSE,   T_STRING_SSORT,
1867     T_STRING_SSORT,            FN_IS_NDENSE,  T_STRING_SSORT,
1868     T_STRING_SSORT,            FN_IS_HOMOG,   T_STRING_SSORT,
1869     T_STRING_SSORT,            FN_IS_NHOMOG,  T_STRING_SSORT,
1870     T_STRING_SSORT,            FN_IS_TABLE,   T_STRING_SSORT,
1871     T_STRING_SSORT,            FN_IS_RECT,    T_STRING_SSORT,
1872     T_STRING_SSORT,            FN_IS_SSORT,   T_STRING,
1873     T_STRING_SSORT,            FN_IS_NSORT,   T_STRING_SSORT,
1874 
1875     // nsort string
1876     T_STRING_NSORT,            FN_IS_DENSE,   T_STRING_NSORT,
1877     T_STRING_NSORT,            FN_IS_NDENSE,  T_STRING_NSORT,
1878     T_STRING_NSORT,            FN_IS_HOMOG,   T_STRING_NSORT,
1879     T_STRING_NSORT,            FN_IS_NHOMOG,  T_STRING_NSORT,
1880     T_STRING_NSORT,            FN_IS_TABLE,   T_STRING_NSORT,
1881     T_STRING_NSORT,            FN_IS_RECT,    T_STRING_NSORT,
1882     T_STRING_NSORT,            FN_IS_SSORT,   T_STRING_NSORT,
1883     T_STRING_NSORT,            FN_IS_NSORT,   T_STRING,
1884 
1885     -1,                        -1,            -1
1886 
1887 };
1888 
1889 
1890 /****************************************************************************
1891 **
1892 *V  GVarFilts . . . . . . . . . . . . . . . . . . . list of filters to export
1893 */
1894 static StructGVarFilt GVarFilts [] = {
1895 
1896     GVAR_FILT(IS_STRING, "obj", &IsStringFilt),
1897     GVAR_FILT(IS_STRING_REP, "obj", &IsStringRepFilt),
1898     { 0, 0, 0, 0, 0 }
1899 
1900 };
1901 
1902 
1903 /****************************************************************************
1904 **
1905 *V  GVarFuncs . . . . . . . . . . . . . . . . . . list of functions to export
1906 */
1907 static StructGVarFunc GVarFuncs [] = {
1908 
1909     GVAR_FUNC(IS_STRING_CONV, 1, "string"),
1910     GVAR_FUNC(CONV_STRING, 1, "string"),
1911     GVAR_FUNC(COPY_TO_STRING_REP, 1, "string"),
1912     GVAR_FUNC(CHAR_INT, 1, "integer"),
1913     GVAR_FUNC(INT_CHAR, 1, "char"),
1914     GVAR_FUNC(CHAR_SINT, 1, "integer"),
1915     GVAR_FUNC(SINT_CHAR, 1, "char"),
1916     GVAR_FUNC(STRING_SINTLIST, 1, "list"),
1917     GVAR_FUNC(INTLIST_STRING, 2, "string, sign"),
1918     GVAR_FUNC(SINTLIST_STRING, 1, "string"),
1919     GVAR_FUNC(EmptyString, 1, "len"),
1920     GVAR_FUNC(ShrinkAllocationString, 1, "str"),
1921     GVAR_FUNC(REVNEG_STRING, 1, "string"),
1922     GVAR_FUNC(POSITION_SUBSTRING, 3, "string, substr, off"),
1923 #ifdef HPCGAP
1924     GVAR_FUNC(FIND_ALL_IN_STRING, 2, "string, characters"),
1925     GVAR_FUNC(NORMALIZE_NEWLINES, 1, "string"),
1926 #endif
1927     GVAR_FUNC(NormalizeWhitespace, 1, "string"),
1928     GVAR_FUNC(REMOVE_CHARACTERS, 2, "string, rem"),
1929     GVAR_FUNC(TranslateString, 2, "string, trans"),
1930     GVAR_FUNC(SplitStringInternal, 3, "string, seps, wspace"),
1931     GVAR_FUNC(SMALLINT_STR, 1, "string"),
1932     { 0, 0, 0, 0, 0 }
1933 
1934 };
1935 
1936 
1937 /****************************************************************************
1938 **
1939 *F  InitKernel( <module> )  . . . . . . . . initialise kernel data structures
1940 */
1941 static Char CharCookie[256][21];
1942 
InitKernel(StructInitInfo * module)1943 static Int InitKernel (
1944     StructInitInfo *    module )
1945 {
1946     UInt                t1;
1947     UInt                t2;
1948     Int                 i, j;
1949     const Char *        cookie_base = "src/stringobj.c:Char";
1950 
1951     /* GASMAN marking functions and GASMAN names                           */
1952     InitBagNamesFromTable( BagNames );
1953 
1954     InitMarkFuncBags( T_CHAR , MarkNoSubBags );
1955     for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1 += 2 ) {
1956         InitMarkFuncBags( t1                     , MarkNoSubBags );
1957         InitMarkFuncBags( t1          +IMMUTABLE , MarkNoSubBags );
1958     }
1959 
1960 #ifdef HPCGAP
1961     for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1 += 2 ) {
1962       MakeBagTypePublic( t1 + IMMUTABLE );
1963     }
1964     MakeBagTypePublic(T_CHAR);
1965 #endif
1966 
1967     /* make all the character constants once and for all                   */
1968     for ( i = 0; i < 256; i++ ) {
1969         for (j = 0; j < 17; j++ ) {
1970             CharCookie[i][j] = cookie_base[j];
1971         }
1972         CharCookie[i][j++] = '0' + i/100;
1973         CharCookie[i][j++] = '0' + (i % 100)/10;
1974         CharCookie[i][j++] = '0' + i % 10;
1975         CharCookie[i][j++] = '\0';
1976         InitGlobalBag( &ObjsChar[i], &(CharCookie[i][0]) );
1977     }
1978 
1979     /* install the type method                                             */
1980     ImportGVarFromLibrary( "TYPE_CHAR", &TYPE_CHAR );
1981     TypeObjFuncs[ T_CHAR ] = TypeChar;
1982 
1983     /* install the type method                                             */
1984     ImportGVarFromLibrary( "TYPES_STRING", &TYPES_STRING );
1985     for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1 += 2 ) {
1986         TypeObjFuncs[ t1            ] = TypeString;
1987         TypeObjFuncs[ t1 +IMMUTABLE ] = TypeString;
1988     }
1989 
1990     /* init filters and functions                                          */
1991     InitHdlrFiltsFromTable( GVarFilts );
1992     InitHdlrFuncsFromTable( GVarFuncs );
1993 
1994     /* initialise list tables                                              */
1995     InitClearFiltsTNumsFromTable   ( ClearFiltsTab );
1996     InitHasFiltListTNumsFromTable  ( HasFiltTab    );
1997     InitSetFiltListTNumsFromTable  ( SetFiltTab    );
1998     InitResetFiltListTNumsFromTable( ResetFiltTab  );
1999 
2000     /* Install the saving function                                         */
2001     SaveObjFuncs[ T_CHAR ] = SaveChar;
2002     LoadObjFuncs[ T_CHAR ] = LoadChar;
2003 
2004     /* install the character functions                                     */
2005     PrintObjFuncs[ T_CHAR ] = PrintChar;
2006     EqFuncs[ T_CHAR ][ T_CHAR ] = EqChar;
2007     LtFuncs[ T_CHAR ][ T_CHAR ] = LtChar;
2008 
2009     /* install the saving method                                             */
2010     for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1 += 2 ) {
2011         SaveObjFuncs[ t1            ] = SaveString;
2012         SaveObjFuncs[ t1 +IMMUTABLE ] = SaveString;
2013         LoadObjFuncs[ t1            ] = LoadString;
2014         LoadObjFuncs[ t1 +IMMUTABLE ] = LoadString;
2015     }
2016 
2017 #if !defined(USE_THREADSAFE_COPYING)
2018     /* install the copy method                                             */
2019     for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1 += 2 ) {
2020         CopyObjFuncs [ t1                     ] = CopyString;
2021         CopyObjFuncs [ t1          +IMMUTABLE ] = CopyString;
2022         CleanObjFuncs[ t1                     ] = 0;
2023         CleanObjFuncs[ t1          +IMMUTABLE ] = 0;
2024     }
2025 #endif
2026 
2027     /* install the print method                                            */
2028     for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1 += 2 ) {
2029         PrintObjFuncs[ t1            ] = PrintString;
2030         PrintObjFuncs[ t1 +IMMUTABLE ] = PrintString;
2031     }
2032 
2033     /* install the comparison methods                                      */
2034     for ( t1 = T_STRING; t1 <= T_STRING_SSORT+IMMUTABLE; t1++ ) {
2035         for ( t2 = T_STRING; t2 <= T_STRING_SSORT+IMMUTABLE; t2++ ) {
2036             EqFuncs[ t1 ][ t2 ] = EqString;
2037             LtFuncs[ t1 ][ t2 ] = LtString;
2038         }
2039     }
2040 
2041     /* install the list methods                                            */
2042     for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1 += 2 ) {
2043         LenListFuncs    [ t1            ] = LenString;
2044         LenListFuncs    [ t1 +IMMUTABLE ] = LenString;
2045         IsbListFuncs    [ t1            ] = IsbString;
2046         IsbListFuncs    [ t1 +IMMUTABLE ] = IsbString;
2047         Elm0ListFuncs   [ t1            ] = Elm0String;
2048         Elm0ListFuncs   [ t1 +IMMUTABLE ] = Elm0String;
2049         Elm0vListFuncs  [ t1            ] = Elm0vString;
2050         Elm0vListFuncs  [ t1 +IMMUTABLE ] = Elm0vString;
2051         ElmListFuncs    [ t1            ] = ElmString;
2052         ElmListFuncs    [ t1 +IMMUTABLE ] = ElmString;
2053         ElmvListFuncs   [ t1            ] = ElmvString;
2054         ElmvListFuncs   [ t1 +IMMUTABLE ] = ElmvString;
2055         ElmwListFuncs   [ t1            ] = ElmwString;
2056         ElmwListFuncs   [ t1 +IMMUTABLE ] = ElmwString;
2057         ElmsListFuncs   [ t1            ] = ElmsString;
2058         ElmsListFuncs   [ t1 +IMMUTABLE ] = ElmsString;
2059         UnbListFuncs    [ t1            ] = UnbString;
2060         AssListFuncs    [ t1            ] = AssString;
2061         AsssListFuncs   [ t1            ] = AsssString;
2062         IsDenseListFuncs[ t1            ] = AlwaysYes;
2063         IsDenseListFuncs[ t1 +IMMUTABLE ] = AlwaysYes;
2064         IsHomogListFuncs[ t1            ] = AlwaysYes;
2065         IsHomogListFuncs[ t1 +IMMUTABLE ] = AlwaysYes;
2066         IsTableListFuncs[ t1            ] = AlwaysNo;
2067         IsTableListFuncs[ t1 +IMMUTABLE ] = AlwaysNo;
2068         IsSSortListFuncs[ t1            ] = IsSSortString;
2069         IsSSortListFuncs[ t1 +IMMUTABLE ] = IsSSortString;
2070         IsPossListFuncs [ t1            ] = IsPossString;
2071         IsPossListFuncs [ t1 +IMMUTABLE ] = IsPossString;
2072         PosListFuncs    [ t1            ] = PosString;
2073         PosListFuncs    [ t1 +IMMUTABLE ] = PosString;
2074         PlainListFuncs  [ t1            ] = PlainString;
2075         PlainListFuncs  [ t1 +IMMUTABLE ] = PlainString;
2076     }
2077     IsSSortListFuncs[ T_STRING_NSORT            ] = AlwaysNo;
2078     IsSSortListFuncs[ T_STRING_NSORT +IMMUTABLE ] = AlwaysNo;
2079     IsSSortListFuncs[ T_STRING_SSORT            ] = AlwaysYes;
2080     IsSSortListFuncs[ T_STRING_SSORT +IMMUTABLE ] = AlwaysYes;
2081 
2082 
2083     /* install the `IsString' functions                                    */
2084     for ( t1 = FIRST_REAL_TNUM; t1 <= LAST_REAL_TNUM; t1++ ) {
2085         assert(IsStringFuncs[ t1 ] == 0);
2086         IsStringFuncs[ t1 ] = AlwaysNo;
2087     }
2088 
2089     IsStringFuncs[ T_PLIST                 ] = IsStringList;
2090     IsStringFuncs[ T_PLIST      +IMMUTABLE ] = IsStringList;
2091     IsStringFuncs[ T_PLIST_DENSE           ] = IsStringList;
2092     IsStringFuncs[ T_PLIST_DENSE+IMMUTABLE ] = IsStringList;
2093     IsStringFuncs[ T_PLIST_EMPTY           ] = AlwaysYes;
2094     IsStringFuncs[ T_PLIST_EMPTY+IMMUTABLE ] = AlwaysYes;
2095 
2096     for ( t1 = T_PLIST_HOM; t1 <= T_PLIST_HOM_SSORT; t1 += 2 ) {
2097         IsStringFuncs[ t1            ] = IsStringListHom;
2098         IsStringFuncs[ t1 +IMMUTABLE ] = IsStringListHom;
2099     }
2100 
2101     for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1++ ) {
2102         IsStringFuncs[ t1 ] = AlwaysYes;
2103     }
2104 
2105     for ( t1 = FIRST_EXTERNAL_TNUM; t1 <= LAST_EXTERNAL_TNUM; t1++ ) {
2106         IsStringFuncs[ t1 ] = IsStringObject;
2107     }
2108 
2109     MakeImmutableObjFuncs[ T_STRING       ] = MakeImmutableNoRecurse;
2110     MakeImmutableObjFuncs[ T_STRING_SSORT ] = MakeImmutableNoRecurse;
2111     MakeImmutableObjFuncs[ T_STRING_NSORT ] = MakeImmutableNoRecurse;
2112 
2113     /* return success                                                      */
2114     return 0;
2115 }
2116 
2117 
2118 /****************************************************************************
2119 **
2120 *F  InitLibrary( <module> ) . . . . . . .  initialise library data structures
2121 */
InitLibrary(StructInitInfo * module)2122 static Int InitLibrary (
2123     StructInitInfo *    module )
2124 {
2125     Int                 i;
2126 
2127 
2128     /* make all the character constants once and for all                   */
2129     for ( i = 0; i < 256; i++ ) {
2130         ObjsChar[i] = NewBag( T_CHAR, 1L );
2131         SET_CHAR_VALUE(ObjsChar[i], (UChar)i);
2132     }
2133 
2134     /* init filters and functions                                          */
2135     InitGVarFiltsFromTable( GVarFilts );
2136     InitGVarFuncsFromTable( GVarFuncs );
2137 
2138     /* return success                                                      */
2139     return 0;
2140 }
2141 
2142 
2143 /****************************************************************************
2144 **
2145 *F  InitInfoString()  . . . . . . . . . . . . . . . . table of init functions
2146 */
2147 static StructInitInfo module = {
2148     // init struct using C99 designated initializers; for a full list of
2149     // fields, please refer to the definition of StructInitInfo
2150     .type = MODULE_BUILTIN,
2151     .name = "string",
2152     .initKernel = InitKernel,
2153     .initLibrary = InitLibrary,
2154 };
2155 
InitInfoString(void)2156 StructInitInfo * InitInfoString ( void )
2157 {
2158     return &module;
2159 }
2160