1 /****************************************************************************
2 **
3 ** This file is part of GAP, a system for computational discrete algebra.
4 **
5 ** Copyright of GAP belongs to its developers, whose names are too numerous
6 ** to list here. Please refer to the COPYRIGHT file for details.
7 **
8 ** SPDX-License-Identifier: GPL-2.0-or-later
9 **
10 ** This file contains the functions which mainly deal with strings.
11 **
12 ** A *string* is a list that has no holes, and whose elements are all
13 ** characters. For the full definition of strings see chapter "Strings" in
14 ** the {\GAP} manual. Read also "More about Strings" about the string flag
15 ** and the compact representation of strings.
16 **
17 ** A list that is known to be a string is represented by a bag of type
18 ** 'T_STRING', which has the following format:
19 **
20 ** +--------+----+----+- - - -+----+----+
21 ** |length |1st |2nd | |last|null|
22 ** |as UInt |char|char| |char|char|
23 ** +--------+----+----+- - - -+----+----+
24 **
25 ** Each entry is a single character (of C type 'unsigned char'). The last
26 ** entry in the bag is the null character ('\0'), which terminates C
27 ** strings. We add this null character although the length is stored in the
28 ** object. This allows to use C routines with strings directly with null
29 ** character free strings (e.g., filenames).
30 **
31 ** Note that a list represented by a bag of type 'T_PLIST' might still be a
32 ** string. It is just that the kernel does not know this.
33 **
34 ** This package consists of three parts.
35 **
36 ** The first part consists of the functions 'NEW_STRING', 'CHARS_STRING' (or
37 ** 'CSTR_STRING'), 'GET_LEN_STRING', 'SET_LEN_STRING', and more. These and
38 ** the functions below use the detailed knowledge about the representation
39 ** of strings.
40 **
41 ** The second part consists of the functions 'LenString', 'ElmString',
42 ** 'ElmsStrings', 'AssString', 'AsssString', PlainString',
43 ** and 'IsPossString'. They are the functions required by the generic lists
44 ** package. Using these functions the other parts of the {\GAP} kernel can
45 ** access and modify strings without actually being aware that they are
46 ** dealing with a string.
47 **
48 ** The third part consists of the functions 'PrintString', which is called
49 ** by 'FuncPrint', and 'IsString', which test whether an arbitrary list is a
50 ** string, and if so converts it into the above format.
51 */
52
53 #include "stringobj.h"
54
55 #include "ariths.h"
56 #include "bool.h"
57 #include "error.h"
58 #include "gaputils.h"
59 #include "io.h"
60 #include "lists.h"
61 #include "modules.h"
62 #include "opers.h"
63 #include "plist.h"
64 #include "range.h"
65 #include "saveload.h"
66
67 #ifdef HPCGAP
68 #include "hpc/guards.h"
69 #endif
70
71
72 /****************************************************************************
73 **
74 *F * * * * * * * * * * * * * * character functions * * * * * * * * * * * * *
75 */
76
77 /****************************************************************************
78 **
79 *V ObjsChar[<chr>] . . . . . . . . . . . . . . . . table of character values
80 **
81 ** 'ObjsChar' contains all the character values. That way we do not need to
82 ** allocate new bags for new characters.
83 */
84 Obj ObjsChar [256];
85
86
87 /****************************************************************************
88 **
89 *F TypeChar( <chr> ) . . . . . . . . . . . . . . . type of a character value
90 **
91 ** 'TypeChar' returns the type of the character <chr>.
92 **
93 ** 'TypeChar' is the function in 'TypeObjFuncs' for character values.
94 */
95 static Obj TYPE_CHAR;
96
TypeChar(Obj chr)97 static Obj TypeChar(Obj chr)
98 {
99 return TYPE_CHAR;
100 }
101
102
103 /****************************************************************************
104 **
105 *F EqChar( <charL>, <charR> ) . . . . . . . . . . . compare two characters
106 **
107 ** 'EqChar' returns 'true' if the two characters <charL> and <charR> are
108 ** equal, and 'false' otherwise.
109 */
EqChar(Obj charL,Obj charR)110 static Int EqChar(Obj charL, Obj charR)
111 {
112 return CHAR_VALUE(charL) == CHAR_VALUE(charR);
113 }
114
115
116 /****************************************************************************
117 **
118 *F LtChar( <charL>, <charR> ) . . . . . . . . . . . compare two characters
119 **
120 ** 'LtChar' returns 'true' if the character <charL> is less than the
121 ** character <charR>, and 'false' otherwise.
122 */
LtChar(Obj charL,Obj charR)123 static Int LtChar(Obj charL, Obj charR)
124 {
125 return CHAR_VALUE(charL) < CHAR_VALUE(charR);
126 }
127
128
129 /****************************************************************************
130 **
131 *F PrintChar( <chr> ) . . . . . . . . . . . . . . . . . . print a character
132 **
133 ** 'PrChar' prints the character <chr>.
134 */
PrintChar(Obj val)135 static void PrintChar(Obj val)
136 {
137 UChar chr;
138
139 chr = CHAR_VALUE(val);
140 if ( chr == '\n' ) Pr("'\\n'",0L,0L);
141 else if ( chr == '\t' ) Pr("'\\t'",0L,0L);
142 else if ( chr == '\r' ) Pr("'\\r'",0L,0L);
143 else if ( chr == '\b' ) Pr("'\\b'",0L,0L);
144 else if ( chr == '\01' ) Pr("'\\>'",0L,0L);
145 else if ( chr == '\02' ) Pr("'\\<'",0L,0L);
146 else if ( chr == '\03' ) Pr("'\\c'",0L,0L);
147 else if ( chr == '\'' ) Pr("'\\''",0L,0L);
148 else if ( chr == '\\' ) Pr("'\\\\'",0L,0L);
149 /* print every non-printable on non-ASCII character in three digit
150 * notation */
151 /* old version (changed by FL)
152 else if ( chr == '\0' ) Pr("'\\0'",0L,0L);
153 else if ( chr < 8 ) Pr("'\\0%d'",(Int)(chr&7),0L);
154 else if ( chr < 32 ) Pr("'\\0%d%d'",(Int)(chr/8),(Int)(chr&7));*/
155 else if ( chr < 32 || chr > 126 ) {
156 Pr("'\\%d%d", (Int)((chr & 192) >> 6), (Int)((chr & 56) >> 3));
157 Pr("%d'", (Int)(chr&7), 0L);
158 }
159 else Pr("'%c'",(Int)chr,0L);
160 }
161
162
163 /****************************************************************************
164 **
165 *F SaveChar( <char> ) . . . . . . . . . . . . . . . . . . save a character
166 **
167 */
SaveChar(Obj c)168 static void SaveChar(Obj c)
169 {
170 SaveUInt1( CHAR_VALUE(c));
171 }
172
173
174 /****************************************************************************
175 **
176 *F LoadChar( <char> ) . . . . . . . . . . . . . . . . . . load a character
177 **
178 */
LoadChar(Obj c)179 static void LoadChar(Obj c)
180 {
181 SET_CHAR_VALUE(c, LoadUInt1());
182 }
183
184
185
186 /****************************************************************************
187 **
188 *F * * * * * * * * * * * * * * GAP level functions * * * * * * * * * * * * *
189 */
190
191
192 /****************************************************************************
193 **
194 *F FuncEmptyString( <self>, <len> ) . . . . . . . . empty string with space
195 **
196 ** Returns an empty string, with space for <len> characters preallocated.
197 **
198 */
FuncEmptyString(Obj self,Obj len)199 static Obj FuncEmptyString(Obj self, Obj len)
200 {
201 Obj new;
202 RequireNonnegativeSmallInt("EmptyString", len);
203 new = NEW_STRING(INT_INTOBJ(len));
204 SET_LEN_STRING(new, 0);
205 return new;
206 }
207
208 /****************************************************************************
209 **
210 *F FuncShrinkAllocationString( <self>, <str> ) . . give back unneeded memory
211 **
212 ** Shrinks the bag of <str> to minimal possible size (possibly converts to
213 ** compact representation).
214 **
215 */
FuncShrinkAllocationString(Obj self,Obj str)216 static Obj FuncShrinkAllocationString(Obj self, Obj str)
217 {
218 RequireStringRep("ShrinkAllocationString", str);
219 SHRINK_STRING(str);
220 return (Obj)0;
221 }
222
223 /****************************************************************************
224 **
225 *F FuncCHAR_INT( <self>, <int> ) . . . . . . . . . . . . . . char by integer
226 */
FuncCHAR_INT(Obj self,Obj val)227 static Obj FuncCHAR_INT(Obj self, Obj val)
228 {
229 Int chr;
230
231 /* get and check the integer value */
232 chr = GetSmallInt("CHAR_INT", val);
233 if ( 255 < chr || chr < 0 ) {
234 ErrorMayQuit("<val> must be an integer between 0 and 255", 0, 0);
235 }
236
237 /* return the character */
238 return ObjsChar[chr];
239 }
240
241
242 /****************************************************************************
243 **
244 *F FuncINT_CHAR( <self>, <char> ) . . . . . . . . . . . . . integer by char
245 */
FuncINT_CHAR(Obj self,Obj val)246 static Obj FuncINT_CHAR(Obj self, Obj val)
247 {
248 /* get and check the character */
249 if (TNUM_OBJ(val) != T_CHAR) {
250 RequireArgument("INT_CHAR", val, "must be a character");
251 }
252
253 /* return the character */
254 return INTOBJ_INT(CHAR_VALUE(val));
255 }
256
257 /****************************************************************************
258 **
259 *F FuncCHAR_SINT( <self>, <int> ) . . . . . . . . . . char by signed integer
260 */
FuncCHAR_SINT(Obj self,Obj val)261 static Obj FuncCHAR_SINT(Obj self, Obj val)
262 {
263 Int chr;
264
265 /* get and check the integer value */
266 chr = GetSmallInt("CHAR_SINT", val);
267 if (127 < chr || chr < -128) {
268 ErrorMayQuit("<val> must be an integer between -128 and 127", 0, 0);
269 }
270
271 /* return the character */
272 return ObjsChar[CHAR_SINT(chr)];
273 }
274
275
276 /****************************************************************************
277 **
278 *F FuncSINT_CHAR( <self>, <char> ) . . . . . . . . . signed integer by char
279 */
FuncSINT_CHAR(Obj self,Obj val)280 static Obj FuncSINT_CHAR(Obj self, Obj val)
281 {
282 /* get and check the character */
283 if (TNUM_OBJ(val) != T_CHAR) {
284 RequireArgument("SINT_CHAR", val, "must be a character");
285 }
286
287 /* return the character */
288 return INTOBJ_INT(SINT_CHAR(CHAR_VALUE(val)));
289 }
290
291 /****************************************************************************
292 **
293 *F FuncSINTLIST_STRING( <self>, <string> ) signed integer list by string
294 */
FuncINTLIST_STRING(Obj self,Obj val,Obj sign)295 static Obj FuncINTLIST_STRING(Obj self, Obj val, Obj sign)
296 {
297 UInt l,i;
298 Obj n, *addr;
299 const UInt1 *p;
300
301 /* test whether val is a string, convert to compact rep if necessary */
302 RequireStringRep("INTLIST_STRING", val);
303
304 l=GET_LEN_STRING(val);
305 n=NEW_PLIST(T_PLIST,l);
306 SET_LEN_PLIST(n,l);
307 p=CONST_CHARS_STRING(val);
308 addr=ADDR_OBJ(n);
309 /* signed or unsigned ? */
310 if (sign == INTOBJ_INT(1L)) {
311 for (i=1; i<=l; i++) {
312 addr[i] = INTOBJ_INT(p[i-1]);
313 }
314 }
315 else {
316 for (i=1; i<=l; i++) {
317 addr[i] = INTOBJ_INT(SINT_CHAR(p[i-1]));
318 }
319 }
320
321 CHANGED_BAG(n);
322 return n;
323 }
324
FuncSINTLIST_STRING(Obj self,Obj val)325 static Obj FuncSINTLIST_STRING(Obj self, Obj val)
326 {
327 return FuncINTLIST_STRING ( self, val, INTOBJ_INT(-1L) );
328 }
329
330 /****************************************************************************
331 **
332 *F FuncSTRING_SINTLIST( <self>, <string> ) string by signed integer list
333 */
FuncSTRING_SINTLIST(Obj self,Obj val)334 static Obj FuncSTRING_SINTLIST(Obj self, Obj val)
335 {
336 UInt l,i;
337 Int low, inc;
338 Obj n;
339 UInt1 *p;
340
341 /* there should be a test here, but how do I check cheaply for list of
342 * integers ? */
343
344 /* general code */
345 if (!IS_RANGE(val) && !IS_PLIST(val)) {
346 again:
347 RequireArgument("STRING_SINTLIST", val,
348 "must be a plain list of small integers or a range");
349 }
350 if (! IS_RANGE(val) ) {
351 l=LEN_PLIST(val);
352 n=NEW_STRING(l);
353 p=CHARS_STRING(n);
354 for (i=1;i<=l;i++) {
355 Obj x = ELM_PLIST(val,i);
356 if (!IS_INTOBJ(x))
357 goto again;
358 *p++=CHAR_SINT(INT_INTOBJ(x));
359 }
360 }
361 else {
362 l=GET_LEN_RANGE(val);
363 low=GET_LOW_RANGE(val);
364 inc=GET_INC_RANGE(val);
365 n=NEW_STRING(l);
366 p=CHARS_STRING(n);
367 for (i=1;i<=l;i++) {
368 *p++=CHAR_SINT(low);
369 low=low+inc;
370 }
371
372 }
373
374 return n;
375 }
376
377 /****************************************************************************
378 **
379 *F FuncREVNEG_STRING( <self>, <string> ) string by signed integer list
380 */
FuncREVNEG_STRING(Obj self,Obj val)381 static Obj FuncREVNEG_STRING(Obj self, Obj val)
382 {
383 UInt l,i,j;
384 Obj n;
385 const UInt1 *p;
386 UInt1 *q;
387
388 /* test whether val is a string, convert to compact rep if necessary */
389 RequireStringRep("REVNEG_STRING", val);
390
391 l=GET_LEN_STRING(val);
392 n=NEW_STRING(l);
393 p=CONST_CHARS_STRING(val);
394 q=CHARS_STRING(n);
395 j=l-1;
396 for (i=1;i<=l;i++) {
397 *q++=-p[j];
398 j--;
399 }
400
401 return n;
402 }
403
404 /****************************************************************************
405 **
406 *F * * * * * * * * * * * * * * * string functions * * * * * * * * * * * * * *
407 */
408
409 /****************************************************************************
410 **
411 *F NEW_STRING( <len> ) . . . returns new string with length <len>, first
412 ** character and "first behind last" set to zero
413 **
414 */
NEW_STRING(Int len)415 Obj NEW_STRING(Int len)
416 {
417 GAP_ASSERT(len >= 0);
418 if (len > INT_INTOBJ_MAX) {
419 ErrorQuit("NEW_STRING: length must be a small integer", 0, 0);
420 }
421 Obj res = NewBag(T_STRING, SIZEBAG_STRINGLEN(len));
422 SET_LEN_STRING(res, len);
423 return res;
424 }
425
426 /****************************************************************************
427 **
428 *F GrowString(<list>,<len>) . . . . . . make sure a string is large enough
429 **
430 ** returns the new length, but doesn't set SET_LEN_STRING.
431 */
GrowString(Obj list,UInt need)432 Int GrowString (
433 Obj list,
434 UInt need )
435 {
436 UInt len; /* new physical length */
437 UInt good; /* good new physical length */
438
439 if (need > INT_INTOBJ_MAX)
440 ErrorMayQuit("GrowString: string length too large", 0, 0);
441
442 /* find out how large the data area should become */
443 good = 5 * (GET_LEN_STRING(list)+3) / 4 + 1;
444 if (good > INT_INTOBJ_MAX)
445 good = INT_INTOBJ_MAX;
446
447 /* but maybe we need more */
448 if ( need < good ) { len = good; }
449 else { len = need; }
450
451 /* resize the bag */
452 ResizeBag( list, SIZEBAG_STRINGLEN(len) );
453
454 /* return the new maximal length */
455 return (Int) len;
456 }
457
458 /****************************************************************************
459 **
460 *F TypeString(<list>) . . . . . . . . . . . . . . . . . . type of a string
461 **
462 ** 'TypeString' returns the type of the string <list>.
463 **
464 ** 'TypeString' is the function in 'TypeObjFuncs' for strings.
465 */
466 static Obj TYPES_STRING;
467
468
TypeString(Obj list)469 static Obj TypeString(Obj list)
470 {
471 return ELM_PLIST(TYPES_STRING, TNUM_OBJ(list) - T_STRING + 1);
472 }
473
474
475
476 /****************************************************************************
477 **
478 *F * * * * * * * * * * * * * * copy functions * * * * * * * * * * * * * * * *
479 */
480
481 #if !defined(USE_THREADSAFE_COPYING)
482
483 /****************************************************************************
484 **
485 *F CopyString( <list>, <mut> ) . . . . . . . . . . . . . . . . copy a string
486 **
487 ** 'CopyString' returns a structural (deep) copy of the string <list>, i.e.,
488 ** a recursive copy that preserves the structure.
489 **
490 ** If <list> has not yet been copied, it makes a copy, leaves a forward
491 ** pointer to the copy in the first entry of the string, where the size of
492 ** the string usually resides, and copies all the entries. If the string
493 ** has already been copied, it returns the value of the forwarding pointer.
494 **
495 ** 'CopyString' is the function in 'CopyObjFuncs' for strings.
496 */
CopyString(Obj list,Int mut)497 static Obj CopyString(Obj list, Int mut)
498 {
499 Obj copy; /* handle of the copy, result */
500
501 // immutable input is handled by COPY_OBJ
502 GAP_ASSERT(IS_MUTABLE_OBJ(list));
503
504 /* make object for copy */
505 copy = NewBag(TNUM_OBJ(list), SIZE_OBJ(list));
506 if (!mut)
507 MakeImmutableNoRecurse(copy);
508 ADDR_OBJ(copy)[0] = CONST_ADDR_OBJ(list)[0];
509
510 /* leave a forwarding pointer */
511 PrepareCopy(list, copy);
512
513 /* copy the subvalues */
514 memcpy(ADDR_OBJ(copy)+1, CONST_ADDR_OBJ(list)+1,
515 SIZE_OBJ(list)-sizeof(Obj) );
516
517 /* return the copy */
518 return copy;
519 }
520
521 #endif //!defined(USE_THREADSAFE_COPYING)
522
523
524 /****************************************************************************
525 **
526 *F * * * * * * * * * * * * * * list functions * * * * * * * * * * * * * * * *
527 */
528
529 /****************************************************************************
530 **
531 *F PrintString(<list>) . . . . . . . . . . . . . . . . . . . print a string
532 **
533 ** 'PrintString' prints the string with the handle <list>.
534 **
535 ** No linebreaks are allowed, if one must be inserted anyhow, it must
536 ** be escaped by a backslash '\', which is done in 'Pr'.
537 **
538 ** The buffer 'PrStrBuf' is used to protect 'Pr' against garbage collections
539 ** caused by printing to string streams, which might move the body of list.
540 **
541 ** The output uses octal number notation for non-ascii or non-printable
542 ** characters. The function can be used to print *any* string in a way
543 ** which can be read in by GAP afterwards.
544 */
PrintString(Obj list)545 void PrintString(Obj list)
546 {
547 char PrStrBuf[10007]; /* 7 for a \c\123 at the end */
548 UInt scanout, n;
549 UInt1 c;
550 UInt len = GET_LEN_STRING(list);
551 UInt off = 0;
552 Pr("\"", 0L, 0L);
553 while (off < len) {
554 scanout = 0;
555 do {
556 c = CONST_CHARS_STRING(list)[off++];
557 switch (c) {
558 case '\\':
559 PrStrBuf[scanout++] = '\\';
560 PrStrBuf[scanout++] = '\\';
561 break;
562 case '\"':
563 PrStrBuf[scanout++] = '\\';
564 PrStrBuf[scanout++] = '\"';
565 break;
566 case '\n':
567 PrStrBuf[scanout++] = '\\';
568 PrStrBuf[scanout++] = 'n';
569 break;
570 case '\t':
571 PrStrBuf[scanout++] = '\\';
572 PrStrBuf[scanout++] = 't';
573 break;
574 case '\r':
575 PrStrBuf[scanout++] = '\\';
576 PrStrBuf[scanout++] = 'r';
577 break;
578 case '\b':
579 PrStrBuf[scanout++] = '\\';
580 PrStrBuf[scanout++] = 'b';
581 break;
582 case '\01':
583 PrStrBuf[scanout++] = '\\';
584 PrStrBuf[scanout++] = '>';
585 break;
586 case '\02':
587 PrStrBuf[scanout++] = '\\';
588 PrStrBuf[scanout++] = '<';
589 break;
590 case '\03':
591 PrStrBuf[scanout++] = '\\';
592 PrStrBuf[scanout++] = 'c';
593 break;
594 default:
595 if (c < 32 || c > 126) {
596 PrStrBuf[scanout++] = '\\';
597 n = c / 64;
598 c = c - n * 64;
599 PrStrBuf[scanout++] = n + '0';
600 n = c / 8;
601 c = c - n * 8;
602 PrStrBuf[scanout++] = n + '0';
603 PrStrBuf[scanout++] = c + '0';
604 }
605 else
606 PrStrBuf[scanout++] = c;
607 }
608 } while (off < len && scanout < 10000);
609 PrStrBuf[scanout++] = '\0';
610 Pr("%s", (Int)PrStrBuf, 0L);
611 }
612 Pr("\"", 0L, 0L);
613 }
614
615
616 /****************************************************************************
617 **
618 *F PrintString1(<list>) . . . . . . . . . . . . print a string for 'Print'
619 **
620 ** 'PrintString1' prints the string constant in the format used by the
621 ** 'Print' and 'PrintTo' function.
622 */
PrintString1(Obj list)623 void PrintString1 (
624 Obj list )
625 {
626 Pr("%g", (Int)list, 0L);
627 }
628
629
630 /****************************************************************************
631 **
632 *F EqString(<listL>,<listR>) . . . . . . . . test whether strings are equal
633 **
634 ** 'EqString' returns 'true' if the two strings <listL> and <listR> are
635 ** equal and 'false' otherwise.
636 */
EqString(Obj listL,Obj listR)637 static Int EqString(Obj listL, Obj listR)
638 {
639 UInt lL, lR;
640 const UInt1 *pL, *pR;
641 lL = GET_LEN_STRING(listL);
642 lR = GET_LEN_STRING(listR);
643 if (lR != lL) return 0;
644 pL = CONST_CHARS_STRING(listL);
645 pR = CONST_CHARS_STRING(listR);
646 return memcmp(pL, pR, lL) == 0;
647 }
648
649
650 /****************************************************************************
651 **
652 *F LtString(<listL>,<listR>) . test whether one string is less than another
653 **
654 ** 'LtString' returns 'true' if the string <listL> is less than the string
655 ** <listR> and 'false' otherwise.
656 */
LtString(Obj listL,Obj listR)657 static Int LtString(Obj listL, Obj listR)
658 {
659 UInt lL, lR;
660 const UInt1 *pL, *pR;
661 lL = GET_LEN_STRING(listL);
662 lR = GET_LEN_STRING(listR);
663 pL = CONST_CHARS_STRING(listL);
664 pR = CONST_CHARS_STRING(listR);
665
666 Int res;
667 if (lL <= lR) {
668 res = memcmp(pL, pR, lL);
669 if (res == 0)
670 return lL < lR;
671 }
672 else {
673 res = memcmp(pL, pR, lR);
674 if (res == 0)
675 return 0;
676 }
677 return res < 0;
678 }
679
680
681 /****************************************************************************
682 **
683 *F LenString(<list>) . . . . . . . . . . . . . . . . . . length of a string
684 **
685 ** 'LenString' returns the length of the string <list> as a C integer.
686 **
687 ** 'LenString' is the function in 'LenListFuncs' for strings.
688 */
LenString(Obj list)689 static Int LenString(Obj list)
690 {
691 return GET_LEN_STRING( list );
692 }
693
694
695 /****************************************************************************
696 **
697 *F IsbString(<list>,<pos>) . . . . . . . . . test for an element of a string
698 **
699 ** 'IsbString' returns 1 if the string <list> contains
700 ** a character at the position <pos> and 0 otherwise.
701 ** It can rely on <pos> being a positive integer.
702 **
703 ** 'IsbString' is the function in 'IsbListFuncs' for strings.
704 */
IsbString(Obj list,Int pos)705 static Int IsbString(Obj list, Int pos)
706 {
707 /* since strings are dense, this must only test for the length */
708 return (pos <= GET_LEN_STRING(list));
709 }
710
711
712 /****************************************************************************
713 **
714 *F GET_ELM_STRING( <list>, <pos> ) . . . . . . select an element of a string
715 **
716 ** 'GET_ELM_STRING' returns the <pos>-th element of the string <list>.
717 ** <pos> must be a positive integer less than or equal to the length of
718 ** <list>.
719 */
GET_ELM_STRING(Obj list,Int pos)720 static inline Obj GET_ELM_STRING(Obj list, Int pos)
721 {
722 GAP_ASSERT(IS_STRING_REP(list));
723 GAP_ASSERT(pos > 0);
724 GAP_ASSERT((UInt) pos <= GET_LEN_STRING(list));
725 UChar c = CONST_CHARS_STRING(list)[pos - 1];
726 return ObjsChar[c];
727 }
728
729
730 /****************************************************************************
731 **
732 *F SET_ELM_STRING( <list>, <pos>, <val> ) . . . . set a character of a string
733 **
734 ** 'SET_ELM_STRING' sets the <pos>-th character of the string <list>.
735 ** <val> must be a character and <list> stay a string after the assignment.
736 */
SET_ELM_STRING(Obj list,Int pos,Obj val)737 static inline void SET_ELM_STRING(Obj list, Int pos, Obj val)
738 {
739 GAP_ASSERT(IS_STRING_REP(list));
740 GAP_ASSERT(pos > 0);
741 GAP_ASSERT((UInt) pos <= GET_LEN_STRING(list));
742 GAP_ASSERT(TNUM_OBJ(val) == T_CHAR);
743 UChar * ptr = CHARS_STRING(list) + (pos - 1);
744 *ptr = CHAR_VALUE(val);
745 }
746
747
748 /****************************************************************************
749 **
750 *F Elm0String(<list>,<pos>) . . . . . . . . . select an element of a string
751 *F Elm0vString(<list>,<pos>) . . . . . . . . . select an element of a string
752 **
753 ** 'Elm0String' returns the element at the position <pos> of the string
754 ** <list>, or returns 0 if <list> has no assigned object at <pos>.
755 ** It can rely on <pos> being a positive integer.
756 **
757 ** 'Elm0vString' does the same thing as 'Elm0String', but it can
758 ** also rely on <pos> not being larger than the length of <list>.
759 **
760 ** 'Elm0String' is the function on 'Elm0ListFuncs' for strings.
761 ** 'Elm0vString' is the function in 'Elm0vListFuncs' for strings.
762 */
Elm0String(Obj list,Int pos)763 static Obj Elm0String(Obj list, Int pos)
764 {
765 if ( pos <= GET_LEN_STRING( list ) ) {
766 return GET_ELM_STRING( list, pos );
767 }
768 else {
769 return 0;
770 }
771 }
772
Elm0vString(Obj list,Int pos)773 static Obj Elm0vString(Obj list, Int pos)
774 {
775 return GET_ELM_STRING( list, pos );
776 }
777
778
779 /****************************************************************************
780 **
781 *F ElmString(<list>,<pos>) . . . . . . . . . . select an element of a string
782 *F ElmvString(<list>,<pos>) . . . . . . . . . select an element of a string
783 **
784 ** 'ElmString' returns the element at the position <pos> of the string
785 ** <list>, or signals an error if <list> has no assigned object at <pos>.
786 ** It can rely on <pos> being a positive integer.
787 **
788 ** 'ElmvString' does the same thing as 'ElmString', but it can
789 ** also rely on <pos> not being larger than the length of <list>.
790 **
791 ** 'ElmwString' does the same thing as 'ElmString', but it can
792 ** also rely on <list> having an assigned object at <pos>.
793 **
794 ** 'ElmString' is the function in 'ElmListFuncs' for strings.
795 ** 'ElmfString' is the function in 'ElmfListFuncs' for strings.
796 ** 'ElmwString' is the function in 'ElmwListFuncs' for strings.
797 */
ElmString(Obj list,Int pos)798 static Obj ElmString(Obj list, Int pos)
799 {
800 /* check the position */
801 if ( GET_LEN_STRING( list ) < pos ) {
802 ErrorMayQuit("List Element: <list>[%d] must have an assigned value",
803 (Int)pos, 0);
804 }
805
806 /* return the selected element */
807 return GET_ELM_STRING( list, pos );
808 }
809
810 #define ElmvString Elm0vString
811
812 #define ElmwString Elm0vString
813
814
815 /****************************************************************************
816 **
817 *F ElmsString(<list>,<poss>) . . . . . . . . select a sublist from a string
818 **
819 ** 'ElmsString' returns a new list containing the elements at the positions
820 ** given in the list <poss> from the string <list>. It is the
821 ** responsibility of the called to ensure that <poss> is dense and contains
822 ** only positive integers. An error is signalled if an element of <poss> is
823 ** larger than the length of <list>.
824 **
825 ** 'ElmsString' is the function in 'ElmsListFuncs' for strings.
826 */
ElmsString(Obj list,Obj poss)827 static Obj ElmsString(Obj list, Obj poss)
828 {
829 Obj elms; /* selected sublist, result */
830 Int lenList; /* length of <list> */
831 Char elm; /* one element from <list> */
832 Int lenPoss; /* length of <positions> */
833 Int pos; /* <position> as integer */
834 Int inc; /* increment in a range */
835 Int i; /* loop variable */
836
837 /* general code */
838 if ( ! IS_RANGE(poss) ) {
839
840 /* get the length of <list> */
841 lenList = GET_LEN_STRING( list );
842
843 /* get the length of <positions> */
844 lenPoss = LEN_LIST( poss );
845
846 /* make the result list */
847 elms = NEW_STRING( lenPoss );
848
849 /* loop over the entries of <positions> and select */
850 for ( i = 1; i <= lenPoss; i++ ) {
851
852 /* get <position> */
853 Obj p = ELMW_LIST(poss, i);
854 if (!IS_INTOBJ(p)) {
855 ErrorMayQuit("List Elements: position is too large for "
856 "this type of list",
857 0, 0);
858 }
859 pos = INT_INTOBJ(p);
860
861 /* select the element */
862 if ( lenList < pos ) {
863 ErrorMayQuit(
864 "List Elements: <list>[%d] must have an assigned value",
865 (Int)pos, 0);
866 }
867
868 /* select the element */
869 elm = CONST_CHARS_STRING(list)[pos-1];
870
871 /* assign the element into <elms> */
872 CHARS_STRING(elms)[i-1] = elm;
873
874 }
875
876 }
877
878 /* special code for ranges */
879 else {
880
881 /* get the length of <list> */
882 lenList = GET_LEN_STRING( list );
883
884 /* get the length of <positions>, the first elements, and the inc. */
885 lenPoss = GET_LEN_RANGE( poss );
886 pos = GET_LOW_RANGE( poss );
887 inc = GET_INC_RANGE( poss );
888
889 /* check that no <position> is larger than 'LEN_LIST(<list>)' */
890 if ( lenList < pos ) {
891 ErrorMayQuit(
892 "List Elements: <list>[%d] must have an assigned value",
893 (Int)pos, 0);
894 }
895 if ( lenList < pos + (lenPoss-1) * inc ) {
896 ErrorMayQuit(
897 "List Elements: <list>[%d] must have an assigned value",
898 (Int)(pos + (lenPoss - 1) * inc), 0);
899 }
900
901 /* make the result list */
902 elms = NEW_STRING( lenPoss );
903
904 /* loop over the entries of <positions> and select */
905 const UInt1 * p = CONST_CHARS_STRING(list);
906 UInt1 * pn = CHARS_STRING(elms);
907 for ( i = 1; i <= lenPoss; i++, pos += inc ) {
908 pn[i - 1] = p[pos - 1];
909 }
910
911 }
912
913 /* return the result */
914 return elms;
915 }
916
917
918 /****************************************************************************
919 **
920 *F AssString(<list>,<pos>,<val>) . . . . . . . . . . . . assign to a string
921 **
922 ** 'AssString' assigns the value <val> to the string <list> at the position
923 ** <pos>. It is the responsibility of the caller to ensure that <pos> is
924 ** positive, and that <val> is not 0.
925 **
926 ** 'AssString' is the function in 'AssListFuncs' for strings.
927 **
928 ** 'AssString' keeps <list> in string representation if possible.
929 **
930 */
AssString(Obj list,Int pos,Obj val)931 static void AssString(Obj list, Int pos, Obj val)
932 {
933 UInt len = GET_LEN_STRING(list);
934
935 if (TNUM_OBJ(val) != T_CHAR || pos > len+1) {
936 /* convert the range into a plain list */
937 PLAIN_LIST(list);
938 CLEAR_FILTS_LIST(list);
939
940 /* resize the list if necessary */
941 if ( len < pos ) {
942 GROW_PLIST( list, pos );
943 SET_LEN_PLIST( list, pos );
944 }
945
946 /* now perform the assignment and return the assigned value */
947 SET_ELM_PLIST( list, pos, val );
948 CHANGED_BAG( list );
949 }
950 else {
951 CLEAR_FILTS_LIST(list);
952
953 /* resize the list if necessary */
954 if ( len < pos ) {
955 GROW_STRING( list, pos );
956 SET_LEN_STRING( list, pos );
957 CHARS_STRING(list)[pos] = (UInt1)0;
958 }
959
960 /* now perform the assignment and return the assigned value */
961 SET_ELM_STRING( list, pos, val );
962 }
963 }
964
965
966 /****************************************************************************
967 **
968 *F AsssString(<list>,<poss>,<vals>) . . assign several elements to a string
969 **
970 ** 'AsssString' assignes the values from the list <vals> at the positions
971 ** given in the list <poss> to the string <list>. It is the responsibility
972 ** of the caller to ensure that <poss> is dense and contains only positive
973 ** integers, that <poss> and <vals> have the same length, and that <vals> is
974 ** dense.
975 **
976 ** 'AsssString' is the function in 'AsssListFuncs' for strings.
977 **
978 ** 'AsssString' simply delegates to AssString. Note that the ordering of
979 ** <poss> can be important if <list> should stay in string representation.
980 **
981 */
AsssString(Obj list,Obj poss,Obj vals)982 static void AsssString(Obj list, Obj poss, Obj vals)
983 {
984 Int i, len = LEN_LIST(poss);
985 for (i = 1; i <= len; i++) {
986 ASS_LIST(list, INT_INTOBJ(ELM_LIST(poss, i)), ELM_LIST(vals, i));
987 }
988 }
989
990
991 /****************************************************************************
992 **
993 *F IsSSortString(<list>) . . . . . . . strictly sorted list test for strings
994 **
995 ** 'IsSSortString' returns 1 if the string <list> is strictly sorted and 0
996 ** otherwise.
997 **
998 ** 'IsSSortString' is the function in 'IsSSortListFuncs' for strings.
999 */
IsSSortString(Obj list)1000 static Int IsSSortString(Obj list)
1001 {
1002 Int len;
1003 Int i;
1004 const UInt1 * ptr;
1005
1006 /* test whether the string is strictly sorted */
1007 len = GET_LEN_STRING( list );
1008 ptr = CONST_CHARS_STRING(list);
1009 for ( i = 1; i < len; i++ ) {
1010 if ( ! (ptr[i-1] < ptr[i]) )
1011 break;
1012 }
1013
1014 /* retype according to the outcome */
1015 SET_FILT_LIST( list, (len <= i) ? FN_IS_SSORT : FN_IS_NSORT );
1016 return (len <= i);
1017 }
1018
1019
1020 /****************************************************************************
1021 **
1022 *F IsPossString(<list>) . . . . . positions list test function for strings
1023 **
1024 ** 'IsPossString' is the function in 'IsPossListFuncs' for strings.
1025 */
IsPossString(Obj list)1026 static Int IsPossString(Obj list)
1027 {
1028 return GET_LEN_STRING( list ) == 0;
1029 }
1030
1031
1032 /****************************************************************************
1033 **
1034 *F PosString(<list>,<val>,<pos>) . . . . position of an element in a string
1035 **
1036 ** 'PosString' returns the position of the value <val> in the string <list>
1037 ** after the first position <start> as a C integer. 0 is returned if <val>
1038 ** is not in the list.
1039 **
1040 ** 'PosString' is the function in 'PosListFuncs' for strings.
1041 */
PosString(Obj list,Obj val,Obj start)1042 static Obj PosString(Obj list, Obj val, Obj start)
1043 {
1044 Int lenList; /* length of <list> */
1045 Int i; /* loop variable */
1046 UInt1 valc; /* C characters */
1047 const UInt1 *p; /* pointer to chars of <list> */
1048 UInt istart;
1049
1050 /* if the starting position is too big to be a small int
1051 then there can't be anything to find */
1052 if (!IS_INTOBJ(start))
1053 return Fail;
1054
1055 istart = INT_INTOBJ(start);
1056
1057 /* get the length of <list> */
1058 lenList = GET_LEN_STRING( list );
1059
1060 /* a string contains only characters */
1061 if (TNUM_OBJ(val) != T_CHAR) return Fail;
1062
1063 /* val as C character */
1064 valc = CHAR_VALUE(val);
1065
1066 /* search entries in <list> */
1067 p = CONST_CHARS_STRING(list);
1068 for ( i = istart; i < lenList && p[i] != valc; i++ );
1069
1070 /* return the position (0 if <val> was not found) */
1071 return (lenList <= i ? Fail : INTOBJ_INT(i+1));
1072 }
1073
1074
1075 /****************************************************************************
1076 **
1077 *F PlainString(<list>) . . . . . . . . . . convert a string to a plain list
1078 **
1079 ** 'PlainString' converts the string <list> to a plain list. Not much work.
1080 **
1081 ** 'PlainString' is the function in 'PlainListFuncs' for strings.
1082 */
PlainString(Obj list)1083 static void PlainString(Obj list)
1084 {
1085 Int lenList; /* logical length of the string */
1086 Obj tmp; /* handle of the list */
1087 Int i; /* loop variable */
1088
1089 /* find the length and allocate a temporary copy */
1090 lenList = GET_LEN_STRING( list );
1091 tmp = NEW_PLIST_WITH_MUTABILITY(IS_MUTABLE_OBJ(list), T_PLIST, lenList);
1092 SET_LEN_PLIST( tmp, lenList );
1093
1094 /* copy the characters */
1095 for ( i = 1; i <= lenList; i++ ) {
1096 SET_ELM_PLIST( tmp, i, GET_ELM_STRING( list, i ) );
1097 }
1098
1099 /* change size and type of the string and copy back */
1100 ResizeBag( list, SIZE_OBJ(tmp) );
1101 RetypeBag( list, TNUM_OBJ(tmp) );
1102
1103 memcpy(ADDR_OBJ(list), CONST_ADDR_OBJ(tmp), SIZE_OBJ(tmp));
1104 CHANGED_BAG(list);
1105 }
1106
1107
1108 /****************************************************************************
1109 **
1110 *F IS_STRING( <obj> ) . . . . . . . . . . . . test if an object is a string
1111 **
1112 ** 'IS_STRING' returns 1 if the object <obj> is a string and 0 otherwise.
1113 ** It does not change the representation of <obj>.
1114 */
1115 Int (*IsStringFuncs [LAST_REAL_TNUM+1]) ( Obj obj );
1116
1117 static Obj IsStringFilt;
1118
IsStringList(Obj list)1119 static Int IsStringList(Obj list)
1120 {
1121 Int lenList;
1122 Obj elm;
1123 Int i;
1124
1125 lenList = LEN_LIST( list );
1126 for ( i = 1; i <= lenList; i++ ) {
1127 elm = ELMV0_LIST( list, i );
1128 if ( elm == 0 )
1129 break;
1130 #ifdef HPCGAP
1131 if ( !CheckReadAccess(elm) )
1132 break;
1133 #endif
1134 if ( TNUM_OBJ( elm ) != T_CHAR )
1135 break;
1136 }
1137
1138 return (lenList < i);
1139 }
1140
IsStringListHom(Obj list)1141 static Int IsStringListHom(Obj list)
1142 {
1143 return (TNUM_OBJ( ELM_LIST(list,1) ) == T_CHAR);
1144 }
1145
IsStringObject(Obj obj)1146 static Int IsStringObject(Obj obj)
1147 {
1148 return (DoFilter( IsStringFilt, obj ) != False);
1149 }
1150
1151
1152 /****************************************************************************
1153 **
1154 *F CopyToStringRep( <string> ) . . . copy a string to string representation
1155 **
1156 ** 'CopyToStringRep' copies the string <string> to a new string in string
1157 ** representation.
1158 */
CopyToStringRep(Obj string)1159 Obj CopyToStringRep(
1160 Obj string )
1161 {
1162 Int lenString; /* length of the string */
1163 Obj elm; /* one element of the string */
1164 Obj copy; /* temporary string */
1165 Int i; /* loop variable */
1166
1167 lenString = LEN_LIST(string);
1168 copy = NEW_STRING(lenString);
1169
1170 if ( IS_STRING_REP(string) ) {
1171 memcpy(CHARS_STRING(copy), CONST_CHARS_STRING(string),
1172 GET_LEN_STRING(string));
1173 /* XXX no error checks? */
1174 } else {
1175 /* copy the string to the string representation */
1176 for ( i = 1; i <= lenString; i++ ) {
1177 elm = ELMW_LIST( string, i );
1178 CHARS_STRING(copy)[i-1] = CHAR_VALUE(elm);
1179 }
1180 CHARS_STRING(copy)[lenString] = '\0';
1181 }
1182 return copy;
1183 }
1184
1185
1186 /****************************************************************************
1187 **
1188 *F ImmutableString( <string> ) . . . copy to immutable string in string rep.
1189 **
1190 ** 'ImmutableString' returns an immutable string in string representation
1191 ** equal to <string>. This may return <string> if it already satisfies these
1192 ** criteria.
1193 */
ImmutableString(Obj string)1194 Obj ImmutableString(Obj string)
1195 {
1196 if (!IS_STRING_REP(string) || IS_MUTABLE_OBJ(string)) {
1197 string = CopyToStringRep(string);
1198 MakeImmutableNoRecurse(string);
1199 }
1200 return string;
1201 }
1202
1203
1204 /****************************************************************************
1205 **
1206 *F ConvString( <string> ) . . . . convert a string to string representation
1207 **
1208 ** 'ConvString' converts the string <string> to string representation.
1209 */
ConvString(Obj string)1210 void ConvString (
1211 Obj string )
1212 {
1213 Int lenString; /* length of the string */
1214 Obj elm; /* one element of the string */
1215 Obj tmp; /* temporary string */
1216 Int i; /* loop variable */
1217
1218 /* do nothing if the string is already in the string representation */
1219 if ( IS_STRING_REP(string) )
1220 {
1221 return;
1222 }
1223
1224
1225 lenString = LEN_LIST(string);
1226 tmp = NEW_STRING(lenString);
1227
1228 /* copy the string to the string representation */
1229 for ( i = 1; i <= lenString; i++ ) {
1230 elm = ELMW_LIST( string, i );
1231 CHARS_STRING(tmp)[i-1] = CHAR_VALUE(elm);
1232 }
1233 CHARS_STRING(tmp)[lenString] = '\0';
1234
1235 /* copy back to string */
1236 RetypeBagSM( string, T_STRING );
1237 ResizeBag( string, SIZEBAG_STRINGLEN(lenString) );
1238 /* copy data area from tmp */
1239 memcpy(ADDR_OBJ(string), CONST_ADDR_OBJ(tmp), SIZE_OBJ(tmp));
1240 }
1241
1242
1243
1244 /****************************************************************************
1245 **
1246 *F IsStringConv( <obj> ) . . . . . test if an object is a string and convert
1247 **
1248 ** 'IsStringConv' returns 1 if the object <obj> is a string, and 0
1249 ** otherwise. If <obj> is a string it changes its representation to the
1250 ** string representation.
1251 */
IsStringConv(Obj obj)1252 Int IsStringConv (
1253 Obj obj )
1254 {
1255 Int res;
1256
1257 /* test whether the object is a string */
1258 res = IS_STRING( obj );
1259
1260 /* if so, convert it to the string representation */
1261 if ( res ) {
1262 ConvString( obj );
1263 }
1264
1265 /* return the result */
1266 return res;
1267 }
1268
1269
1270 /****************************************************************************
1271 **
1272 *F * * * * * * * * * * * * * * GAP level functions * * * * * * * * * * * * *
1273 */
1274
1275 /****************************************************************************
1276 **
1277 *F FiltIS_STRING( <self>, <obj> ) . . . . . . . . . test value is a string
1278 */
FiltIS_STRING(Obj self,Obj obj)1279 static Obj FiltIS_STRING(Obj self, Obj obj)
1280 {
1281 return (IS_STRING( obj ) ? True : False);
1282 }
1283
1284
1285 /****************************************************************************
1286 **
1287 *F FuncIS_STRING_CONV( <self>, <obj> ) . . . . . . . . . . check and convert
1288 */
FuncIS_STRING_CONV(Obj self,Obj obj)1289 static Obj FuncIS_STRING_CONV(Obj self, Obj obj)
1290 {
1291 /* return 'true' if <obj> is a string and 'false' otherwise */
1292 return (IsStringConv(obj) ? True : False);
1293 }
1294
1295
1296 /****************************************************************************
1297 **
1298 *F FuncCONV_STRING( <self>, <string> ) . . . . . . . . convert to string rep
1299 */
FuncCONV_STRING(Obj self,Obj string)1300 static Obj FuncCONV_STRING(Obj self, Obj string)
1301 {
1302 /* check whether <string> is a string */
1303 if (!IS_STRING(string)) {
1304 RequireArgument("ConvString", string, "must be a string");
1305 }
1306
1307 /* convert to the string representation */
1308 ConvString( string );
1309
1310 /* return nothing */
1311 return 0;
1312 }
1313
1314
1315 /****************************************************************************
1316 **
1317 *F FiltIS_STRING_REP( <self>, <obj> ) . . . . test if value is a string rep
1318 */
1319 static Obj IsStringRepFilt;
1320
FiltIS_STRING_REP(Obj self,Obj obj)1321 static Obj FiltIS_STRING_REP(Obj self, Obj obj)
1322 {
1323 return (IS_STRING_REP( obj ) ? True : False);
1324 }
1325
1326 /****************************************************************************
1327 **
1328 *F FuncCOPY_TO_STRING_REP( <self>, <obj> ) . copy a string into string rep
1329 */
FuncCOPY_TO_STRING_REP(Obj self,Obj string)1330 static Obj FuncCOPY_TO_STRING_REP(Obj self, Obj string)
1331 {
1332 /* check whether <string> is a string */
1333 if (!IS_STRING(string)) {
1334 RequireArgument("CopyToStringRep", string, "must be a string");
1335 }
1336 return CopyToStringRep(string);
1337 }
1338
1339 /****************************************************************************
1340 **
1341 *F FuncPOSITION_SUBSTRING( <self>, <string>, <substr>, <off> ) . position of
1342 ** substring
1343 **
1344 ** <str> and <substr> must be strings and <off> an integer. The position
1345 ** of first character of substring in string, search starting from
1346 ** <off>+1, is returned if such a substring exists. Otherwise `fail' is
1347 ** returned.
1348 */
FuncPOSITION_SUBSTRING(Obj self,Obj string,Obj substr,Obj off)1349 static Obj FuncPOSITION_SUBSTRING(Obj self, Obj string, Obj substr, Obj off)
1350 {
1351 Int ipos, i, j, lens, lenss, max;
1352 const UInt1 *s, *ss;
1353
1354 /* check whether <string> is a string */
1355 RequireStringRep("POSITION_SUBSTRING", string);
1356
1357 /* check whether <substr> is a string */
1358 RequireStringRep("POSITION_SUBSTRING", substr);
1359
1360 /* check wether <off> is a non-negative integer */
1361 RequireNonnegativeSmallInt("POSITION_SUBSTRING", off);
1362 ipos = INT_INTOBJ(off);
1363
1364 /* special case for the empty string */
1365 lenss = GET_LEN_STRING(substr);
1366 if ( lenss == 0 ) {
1367 return INTOBJ_INT(ipos + 1);
1368 }
1369
1370 lens = GET_LEN_STRING(string);
1371 max = lens - lenss + 1;
1372 s = CONST_CHARS_STRING(string);
1373 ss = CONST_CHARS_STRING(substr);
1374
1375 const UInt1 c = ss[0];
1376 for (i = ipos; i < max; i++) {
1377 if (c == s[i]) {
1378 for (j = 1; j < lenss; j++) {
1379 if (! (s[i+j] == ss[j]))
1380 break;
1381 }
1382 if (j == lenss)
1383 return INTOBJ_INT(i+1);
1384 }
1385 }
1386 return Fail;
1387 }
1388
1389 /****************************************************************************
1390 **
1391 *F FuncNormalizeWhitespace( <self>, <string> ) . . . . . normalize white
1392 ** space in place
1393 **
1394 ** Whitespace characters are " \r\t\n". Leading and trailing whitespace in
1395 ** string is removed. Intermediate sequences of whitespace characters are
1396 ** substituted by a single space.
1397 **
1398 */
FuncNormalizeWhitespace(Obj self,Obj string)1399 static Obj FuncNormalizeWhitespace(Obj self, Obj string)
1400 {
1401 UInt1 *s, c;
1402 Int i, j, len, white;
1403
1404 /* check whether <string> is a string */
1405 RequireStringRep("NormalizeWhitespace", string);
1406
1407 len = GET_LEN_STRING(string);
1408 s = CHARS_STRING(string);
1409 i = -1;
1410 white = 1;
1411 for (j = 0; j < len; j++) {
1412 c = s[j];
1413 if (c == ' ' || c == '\n' || c == '\t' || c == '\r') {
1414 if (! white) {
1415 i++;
1416 s[i] = ' ';
1417 white = 1;
1418 }
1419 }
1420 else {
1421 i++;
1422 s[i] = c;
1423 white = 0;
1424 }
1425 }
1426 if (white && i > -1)
1427 i--;
1428 s[i+1] = '\0';
1429 SET_LEN_STRING(string, i+1);
1430
1431 /* to make it useful as C-string */
1432 CHARS_STRING(string)[i+1] = (UInt1)0;
1433
1434 return (Obj)0;
1435 }
1436
1437
1438 /****************************************************************************
1439 **
1440 *F FuncREMOVE_CHARACTERS( <self>, <string>, <rem> ) . . . . . delete characters
1441 ** from <rem> in <string> in place
1442 **
1443 */
1444
FuncREMOVE_CHARACTERS(Obj self,Obj string,Obj rem)1445 static Obj FuncREMOVE_CHARACTERS(Obj self, Obj string, Obj rem)
1446 {
1447 UInt1 *s;
1448 Int i, j, len;
1449 UInt1 REMCHARLIST[256] = {0};
1450
1451 /* check whether <string> is a string */
1452 RequireStringRep("RemoveCharacters", string);
1453
1454 /* check whether <rem> is a string */
1455 RequireStringRep("RemoveCharacters", rem);
1456
1457 /* set REMCHARLIST by setting positions of characters in rem to 1 */
1458 len = GET_LEN_STRING(rem);
1459 s = CHARS_STRING(rem);
1460 for(i=0; i<len; i++) REMCHARLIST[s[i]] = 1;
1461
1462 /* now change string in place */
1463 len = GET_LEN_STRING(string);
1464 s = CHARS_STRING(string);
1465 i = -1;
1466 for (j = 0; j < len; j++) {
1467 if (REMCHARLIST[s[j]] == 0) {
1468 i++;
1469 s[i] = s[j];
1470 }
1471 }
1472 i++;
1473 s[i] = '\0';
1474 SET_LEN_STRING(string, i);
1475 SHRINK_STRING(string);
1476
1477 return (Obj)0;
1478 }
1479
1480
1481 /****************************************************************************
1482 **
1483 *F FuncTranslateString( <self>, <string>, <trans> ) . . . translate characters
1484 ** in <string> in place, <string>[i] = <trans>[<string>[i]]
1485 **
1486 */
FuncTranslateString(Obj self,Obj string,Obj trans)1487 static Obj FuncTranslateString(Obj self, Obj string, Obj trans)
1488 {
1489 Int j, len;
1490
1491 /* check whether <string> is a string */
1492 RequireStringRep("TranslateString", string);
1493
1494 // check whether <trans> is a string of length at least 256
1495 RequireStringRep("TranslateString", trans);
1496 if ( GET_LEN_STRING( trans ) < 256 ) {
1497 ErrorMayQuit("TranslateString: <trans> must have length >= 256",
1498 0, 0 );
1499 }
1500
1501 /* now change string in place */
1502 len = GET_LEN_STRING(string);
1503 UInt1 *s = CHARS_STRING(string);
1504 const UInt1 *t = CONST_CHARS_STRING(trans);
1505 for (j = 0; j < len; j++) {
1506 s[j] = t[s[j]];
1507 }
1508
1509 return (Obj)0;
1510 }
1511
1512
1513 /****************************************************************************
1514 **
1515 *F FuncSplitStringInternal( <self>, <string>, <seps>, <wspace> ) . . . . split string
1516 ** at characters in <seps> and <wspace>
1517 **
1518 ** The difference of <seps> and <wspace> is that characters in <wspace> don't
1519 ** separate empty strings.
1520 */
FuncSplitStringInternal(Obj self,Obj string,Obj seps,Obj wspace)1521 static Obj FuncSplitStringInternal(Obj self, Obj string, Obj seps, Obj wspace)
1522 {
1523 const UInt1 *s;
1524 Int i, a, z, l, pos, len;
1525 Obj res, part;
1526 UInt1 SPLITSTRINGSEPS[256] = { 0 };
1527 UInt1 SPLITSTRINGWSPACE[256] = { 0 };
1528
1529 /* check whether <string> is a string */
1530 RequireStringRep("SplitString", string);
1531
1532 /* check whether <seps> is a string */
1533 RequireStringRep("SplitString", seps);
1534
1535 /* check whether <wspace> is a string */
1536 RequireStringRep("SplitString", wspace);
1537
1538 /* set SPLITSTRINGSEPS by setting positions of characters in rem to 1 */
1539 len = GET_LEN_STRING(seps);
1540 s = CONST_CHARS_STRING(seps);
1541 for(i=0; i<len; i++) SPLITSTRINGSEPS[s[i]] = 1;
1542
1543 /* set SPLITSTRINGWSPACE by setting positions of characters in rem to 1 */
1544 len = GET_LEN_STRING(wspace);
1545 s = CONST_CHARS_STRING(wspace);
1546 for(i=0; i<len; i++) SPLITSTRINGWSPACE[s[i]] = 1;
1547
1548 /* create the result (list of strings) */
1549 res = NEW_PLIST(T_PLIST, 2);
1550 pos = 0;
1551
1552 /* now do the splitting */
1553 len = GET_LEN_STRING(string);
1554 s = CONST_CHARS_STRING(string);
1555 for (a=0, z=0; z<len; z++) {
1556 // Whenever we encounter a separator or a white space, the substring
1557 // starting after the last separator/white space is cut out. The
1558 // only difference between white spaces and separators is that white
1559 // spaces don't separate empty strings.
1560 if (SPLITSTRINGWSPACE[s[z]] == 1) {
1561 if (a<z) {
1562 l = z-a;
1563 part = NEW_STRING(l);
1564 // update s in case there was a garbage collection
1565 s = CONST_CHARS_STRING(string);
1566 COPY_CHARS(part, s + a, l);
1567 CHARS_STRING(part)[l] = 0;
1568 pos++;
1569 AssPlist(res, pos, part);
1570 s = CONST_CHARS_STRING(string);
1571 a = z+1;
1572 }
1573 else {
1574 a = z+1;
1575 }
1576 }
1577 else {
1578 if (SPLITSTRINGSEPS[s[z]] == 1) {
1579 l = z-a;
1580 part = NEW_STRING(l);
1581 // update s in case there was a garbage collection
1582 s = CONST_CHARS_STRING(string);
1583 COPY_CHARS(part, s + a, l);
1584 CHARS_STRING(part)[l] = 0;
1585 pos++;
1586 AssPlist(res, pos, part);
1587 s = CONST_CHARS_STRING(string);
1588 a = z+1;
1589 }
1590 }
1591 }
1592
1593 // Pick up a substring at the end of the string. Note that a trailing
1594 // separator does not produce an empty string.
1595 if (a<z) {
1596 /* copy until last position which is z-1 */
1597 l = z-a;
1598 part = NEW_STRING(l);
1599 s = CONST_CHARS_STRING(string);
1600 COPY_CHARS(part, s + a, l);
1601 CHARS_STRING(part)[l] = 0;
1602 pos++;
1603 AssPlist(res, pos, part);
1604 }
1605
1606 return res;
1607 }
1608
1609 #ifdef HPCGAP
1610
1611 /****************************************************************************
1612 **
1613 *F FuncFIND_ALL_IN_STRING( <self>, <string>, <chars> )
1614 **
1615 ** Kernel function to return a list of all occurrences of a set of characters
1616 ** within a string.
1617 */
1618
FuncFIND_ALL_IN_STRING(Obj self,Obj string,Obj chars)1619 static Obj FuncFIND_ALL_IN_STRING(Obj self, Obj string, Obj chars)
1620 {
1621 Obj result;
1622 UInt i, len, matches;
1623 unsigned char table[1<<(8*sizeof(char))];
1624 const UInt1 *s;
1625 if (!IsStringConv(string) || !IsStringConv(chars))
1626 ErrorQuit("FIND_ALL_IN_STRING: Requires two string arguments", 0L, 0L);
1627 memset(table, 0, sizeof(table));
1628 len = GET_LEN_STRING(chars);
1629 s = CONST_CHARS_STRING(chars);
1630 for (i=0; i<len; i++)
1631 table[s[i]] = 1;
1632 len = GET_LEN_STRING(string);
1633 s = CONST_CHARS_STRING(string);
1634 matches = 0;
1635 for (i = 0; i < len; i++)
1636 if (table[s[i]])
1637 matches++;
1638 result = NEW_PLIST(T_PLIST_DENSE, matches);
1639 SET_LEN_PLIST(result, matches);
1640 matches = 1;
1641 for (i = 0; i < len; i++)
1642 if (table[s[i]]) {
1643 SET_ELM_PLIST(result, matches, INTOBJ_INT(i+1));
1644 matches++;
1645 }
1646 return result;
1647 }
1648
1649 /****************************************************************************
1650 **
1651 *F FuncNORMALIZE_NEWLINES( <self>, <string> )
1652 **
1653 ** Kernel function to replace all occurrences of CR or CRLF within a
1654 ** string with LF characters. This function modifies its argument and
1655 ** returns it also as its result.
1656 */
1657
FuncNORMALIZE_NEWLINES(Obj self,Obj string)1658 static Obj FuncNORMALIZE_NEWLINES(Obj self, Obj string)
1659 {
1660 UInt i, j, len;
1661 Char *s;
1662 if (!IsStringConv(string) || !REGION(string))
1663 ErrorQuit("NORMALIZE_NEWLINES: Requires a mutable string argument", 0L, 0L);
1664 len = GET_LEN_STRING(string);
1665 s = CSTR_STRING(string);
1666 for (i = j = 0; i < len; i++) {
1667 if (s[i] == '\r') {
1668 s[j++] = '\n';
1669 if (i + 1 < len && s[i+1] == '\n')
1670 i++;
1671 } else {
1672 s[j++] = s[i];
1673 }
1674 }
1675 SET_LEN_STRING(string, j);
1676 return string;
1677 }
1678
1679 #endif
1680
1681 /****************************************************************************
1682 **
1683 *F FuncSMALLINT_STR( <self>, <string> )
1684 **
1685 ** Kernel function to extract parse small integers from strings. Needed before
1686 ** we can conveniently have Int working for things like parsing command line
1687 ** options
1688 */
1689
FuncSMALLINT_STR(Obj self,Obj str)1690 static Obj FuncSMALLINT_STR(Obj self, Obj str)
1691 {
1692 const Char *string = CONST_CSTR_STRING(str);
1693 Int x = 0;
1694 Int sign = 1;
1695 while (isspace((unsigned int)*string))
1696 string++;
1697 if (*string == '-') {
1698 sign = -1;
1699 string++;
1700 } else if (*string == '+') {
1701 string++;
1702 }
1703 while (IsDigit(*string)) {
1704 x *= 10;
1705 x += (*string - '0');
1706 string++;
1707 }
1708 return INTOBJ_INT(sign*x);
1709 }
1710
1711
1712 /****************************************************************************
1713 **
1714 *F UnbString( <string>, <pos> ) . . . . . . unbind an element from a string
1715 **
1716 ** This is to avoid unpacking of the string to a plain list when <pos> is
1717 ** larger or equal to the length of <string>.
1718 */
UnbString(Obj string,Int pos)1719 static void UnbString(Obj string, Int pos)
1720 {
1721 GAP_ASSERT(IS_MUTABLE_OBJ(string));
1722 const Int len = GET_LEN_STRING(string);
1723 if (len == pos) {
1724 // maybe the string becomes sorted
1725 CLEAR_FILTS_LIST(string);
1726 CHARS_STRING(string)[pos - 1] = (UInt1)0;
1727 SET_LEN_STRING(string, len - 1);
1728 }
1729 else if (pos < len) {
1730 PLAIN_LIST(string);
1731 UNB_LIST(string, pos);
1732 }
1733 }
1734
1735
1736 /****************************************************************************
1737 **
1738 *F * * * * * * * * * * * * * initialize module * * * * * * * * * * * * * * * */
1739
1740 /****************************************************************************
1741 **
1742 *V BagNames . . . . . . . . . . . . . . . . . . . . . . . list of bag names
1743 */
1744 static StructBagNames BagNames[] = {
1745 { T_CHAR, "character" },
1746 { T_STRING, "list (string)" },
1747 { T_STRING +IMMUTABLE, "list (string,imm)" },
1748 { T_STRING_SSORT, "list (string,ssort)" },
1749 { T_STRING_SSORT +IMMUTABLE, "list (string,ssort,imm)" },
1750 { T_STRING_NSORT, "list (string,nsort)" },
1751 { T_STRING_NSORT +IMMUTABLE, "list (string,nsort,imm)" },
1752 { -1, "" }
1753 };
1754
1755
1756 /****************************************************************************
1757 **
1758 *V ClearFiltsTab . . . . . . . . . . . . . . . . . . . . clear filter tnums
1759 */
1760 static Int ClearFiltsTab [] = {
1761 T_STRING, T_STRING,
1762 T_STRING_NSORT, T_STRING,
1763 T_STRING_SSORT, T_STRING,
1764 -1, -1
1765 };
1766
1767
1768 /****************************************************************************
1769 **
1770 *V HasFiltTab . . . . . . . . . . . . . . . . . . . . . tester filter tnum
1771 */
1772 static Int HasFiltTab [] = {
1773
1774 // string
1775 T_STRING, FN_IS_DENSE, 1,
1776 T_STRING, FN_IS_NDENSE, 0,
1777 T_STRING, FN_IS_HOMOG, 1,
1778 T_STRING, FN_IS_NHOMOG, 0,
1779 T_STRING, FN_IS_TABLE, 0,
1780 T_STRING, FN_IS_RECT, 0,
1781 T_STRING, FN_IS_SSORT, 0,
1782 T_STRING, FN_IS_NSORT, 0,
1783
1784 // ssort string
1785 T_STRING_SSORT, FN_IS_DENSE, 1,
1786 T_STRING_SSORT, FN_IS_NDENSE, 0,
1787 T_STRING_SSORT, FN_IS_HOMOG, 1,
1788 T_STRING_SSORT, FN_IS_NHOMOG, 0,
1789 T_STRING_SSORT, FN_IS_TABLE, 0,
1790 T_STRING_SSORT, FN_IS_RECT, 0,
1791 T_STRING_SSORT, FN_IS_SSORT, 1,
1792 T_STRING_SSORT, FN_IS_NSORT, 0,
1793
1794 // nsort string
1795 T_STRING_NSORT, FN_IS_DENSE, 1,
1796 T_STRING_NSORT, FN_IS_NDENSE, 0,
1797 T_STRING_NSORT, FN_IS_HOMOG, 1,
1798 T_STRING_NSORT, FN_IS_NHOMOG, 0,
1799 T_STRING_NSORT, FN_IS_TABLE, 0,
1800 T_STRING_NSORT, FN_IS_RECT, 0,
1801 T_STRING_NSORT, FN_IS_SSORT, 0,
1802 T_STRING_NSORT, FN_IS_NSORT, 1,
1803
1804 -1, -1, -1
1805 };
1806
1807
1808 /****************************************************************************
1809 **
1810 *V SetFiltTab . . . . . . . . . . . . . . . . . . . . . setter filter tnum
1811 */
1812 static Int SetFiltTab [] = {
1813
1814 // string
1815 T_STRING, FN_IS_DENSE, T_STRING,
1816 T_STRING, FN_IS_NDENSE, -1,
1817 T_STRING, FN_IS_HOMOG, T_STRING,
1818 T_STRING, FN_IS_NHOMOG, -1,
1819 T_STRING, FN_IS_TABLE, -1,
1820 T_STRING, FN_IS_RECT, -1,
1821 T_STRING, FN_IS_SSORT, T_STRING_SSORT,
1822 T_STRING, FN_IS_NSORT, T_STRING_NSORT,
1823
1824 // ssort string
1825 T_STRING_SSORT, FN_IS_DENSE, T_STRING_SSORT,
1826 T_STRING_SSORT, FN_IS_NDENSE, -1,
1827 T_STRING_SSORT, FN_IS_HOMOG, T_STRING_SSORT,
1828 T_STRING_SSORT, FN_IS_NHOMOG, -1,
1829 T_STRING_SSORT, FN_IS_TABLE, -1,
1830 T_STRING_SSORT, FN_IS_RECT, -1,
1831 T_STRING_SSORT, FN_IS_SSORT, T_STRING_SSORT,
1832 T_STRING_SSORT, FN_IS_NSORT, -1,
1833
1834 // nsort string
1835 T_STRING_NSORT, FN_IS_DENSE, T_STRING_NSORT,
1836 T_STRING_NSORT, FN_IS_NDENSE, -1,
1837 T_STRING_NSORT, FN_IS_HOMOG, T_STRING_NSORT,
1838 T_STRING_NSORT, FN_IS_NHOMOG, -1,
1839 T_STRING_NSORT, FN_IS_TABLE, -1,
1840 T_STRING_NSORT, FN_IS_RECT, -1,
1841 T_STRING_NSORT, FN_IS_SSORT, -1,
1842 T_STRING_NSORT, FN_IS_NSORT, T_STRING_NSORT,
1843
1844 -1, -1, -1
1845
1846 };
1847
1848
1849 /****************************************************************************
1850 **
1851 *V ResetFiltTab . . . . . . . . . . . . . . . . . . . unsetter filter tnum
1852 */
1853 static Int ResetFiltTab [] = {
1854
1855 // string
1856 T_STRING, FN_IS_DENSE, T_STRING,
1857 T_STRING, FN_IS_NDENSE, T_STRING,
1858 T_STRING, FN_IS_HOMOG, T_STRING,
1859 T_STRING, FN_IS_NHOMOG, T_STRING,
1860 T_STRING, FN_IS_TABLE, T_STRING,
1861 T_STRING, FN_IS_RECT, T_STRING,
1862 T_STRING, FN_IS_SSORT, T_STRING,
1863 T_STRING, FN_IS_NSORT, T_STRING,
1864
1865 // ssort string
1866 T_STRING_SSORT, FN_IS_DENSE, T_STRING_SSORT,
1867 T_STRING_SSORT, FN_IS_NDENSE, T_STRING_SSORT,
1868 T_STRING_SSORT, FN_IS_HOMOG, T_STRING_SSORT,
1869 T_STRING_SSORT, FN_IS_NHOMOG, T_STRING_SSORT,
1870 T_STRING_SSORT, FN_IS_TABLE, T_STRING_SSORT,
1871 T_STRING_SSORT, FN_IS_RECT, T_STRING_SSORT,
1872 T_STRING_SSORT, FN_IS_SSORT, T_STRING,
1873 T_STRING_SSORT, FN_IS_NSORT, T_STRING_SSORT,
1874
1875 // nsort string
1876 T_STRING_NSORT, FN_IS_DENSE, T_STRING_NSORT,
1877 T_STRING_NSORT, FN_IS_NDENSE, T_STRING_NSORT,
1878 T_STRING_NSORT, FN_IS_HOMOG, T_STRING_NSORT,
1879 T_STRING_NSORT, FN_IS_NHOMOG, T_STRING_NSORT,
1880 T_STRING_NSORT, FN_IS_TABLE, T_STRING_NSORT,
1881 T_STRING_NSORT, FN_IS_RECT, T_STRING_NSORT,
1882 T_STRING_NSORT, FN_IS_SSORT, T_STRING_NSORT,
1883 T_STRING_NSORT, FN_IS_NSORT, T_STRING,
1884
1885 -1, -1, -1
1886
1887 };
1888
1889
1890 /****************************************************************************
1891 **
1892 *V GVarFilts . . . . . . . . . . . . . . . . . . . list of filters to export
1893 */
1894 static StructGVarFilt GVarFilts [] = {
1895
1896 GVAR_FILT(IS_STRING, "obj", &IsStringFilt),
1897 GVAR_FILT(IS_STRING_REP, "obj", &IsStringRepFilt),
1898 { 0, 0, 0, 0, 0 }
1899
1900 };
1901
1902
1903 /****************************************************************************
1904 **
1905 *V GVarFuncs . . . . . . . . . . . . . . . . . . list of functions to export
1906 */
1907 static StructGVarFunc GVarFuncs [] = {
1908
1909 GVAR_FUNC(IS_STRING_CONV, 1, "string"),
1910 GVAR_FUNC(CONV_STRING, 1, "string"),
1911 GVAR_FUNC(COPY_TO_STRING_REP, 1, "string"),
1912 GVAR_FUNC(CHAR_INT, 1, "integer"),
1913 GVAR_FUNC(INT_CHAR, 1, "char"),
1914 GVAR_FUNC(CHAR_SINT, 1, "integer"),
1915 GVAR_FUNC(SINT_CHAR, 1, "char"),
1916 GVAR_FUNC(STRING_SINTLIST, 1, "list"),
1917 GVAR_FUNC(INTLIST_STRING, 2, "string, sign"),
1918 GVAR_FUNC(SINTLIST_STRING, 1, "string"),
1919 GVAR_FUNC(EmptyString, 1, "len"),
1920 GVAR_FUNC(ShrinkAllocationString, 1, "str"),
1921 GVAR_FUNC(REVNEG_STRING, 1, "string"),
1922 GVAR_FUNC(POSITION_SUBSTRING, 3, "string, substr, off"),
1923 #ifdef HPCGAP
1924 GVAR_FUNC(FIND_ALL_IN_STRING, 2, "string, characters"),
1925 GVAR_FUNC(NORMALIZE_NEWLINES, 1, "string"),
1926 #endif
1927 GVAR_FUNC(NormalizeWhitespace, 1, "string"),
1928 GVAR_FUNC(REMOVE_CHARACTERS, 2, "string, rem"),
1929 GVAR_FUNC(TranslateString, 2, "string, trans"),
1930 GVAR_FUNC(SplitStringInternal, 3, "string, seps, wspace"),
1931 GVAR_FUNC(SMALLINT_STR, 1, "string"),
1932 { 0, 0, 0, 0, 0 }
1933
1934 };
1935
1936
1937 /****************************************************************************
1938 **
1939 *F InitKernel( <module> ) . . . . . . . . initialise kernel data structures
1940 */
1941 static Char CharCookie[256][21];
1942
InitKernel(StructInitInfo * module)1943 static Int InitKernel (
1944 StructInitInfo * module )
1945 {
1946 UInt t1;
1947 UInt t2;
1948 Int i, j;
1949 const Char * cookie_base = "src/stringobj.c:Char";
1950
1951 /* GASMAN marking functions and GASMAN names */
1952 InitBagNamesFromTable( BagNames );
1953
1954 InitMarkFuncBags( T_CHAR , MarkNoSubBags );
1955 for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1 += 2 ) {
1956 InitMarkFuncBags( t1 , MarkNoSubBags );
1957 InitMarkFuncBags( t1 +IMMUTABLE , MarkNoSubBags );
1958 }
1959
1960 #ifdef HPCGAP
1961 for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1 += 2 ) {
1962 MakeBagTypePublic( t1 + IMMUTABLE );
1963 }
1964 MakeBagTypePublic(T_CHAR);
1965 #endif
1966
1967 /* make all the character constants once and for all */
1968 for ( i = 0; i < 256; i++ ) {
1969 for (j = 0; j < 17; j++ ) {
1970 CharCookie[i][j] = cookie_base[j];
1971 }
1972 CharCookie[i][j++] = '0' + i/100;
1973 CharCookie[i][j++] = '0' + (i % 100)/10;
1974 CharCookie[i][j++] = '0' + i % 10;
1975 CharCookie[i][j++] = '\0';
1976 InitGlobalBag( &ObjsChar[i], &(CharCookie[i][0]) );
1977 }
1978
1979 /* install the type method */
1980 ImportGVarFromLibrary( "TYPE_CHAR", &TYPE_CHAR );
1981 TypeObjFuncs[ T_CHAR ] = TypeChar;
1982
1983 /* install the type method */
1984 ImportGVarFromLibrary( "TYPES_STRING", &TYPES_STRING );
1985 for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1 += 2 ) {
1986 TypeObjFuncs[ t1 ] = TypeString;
1987 TypeObjFuncs[ t1 +IMMUTABLE ] = TypeString;
1988 }
1989
1990 /* init filters and functions */
1991 InitHdlrFiltsFromTable( GVarFilts );
1992 InitHdlrFuncsFromTable( GVarFuncs );
1993
1994 /* initialise list tables */
1995 InitClearFiltsTNumsFromTable ( ClearFiltsTab );
1996 InitHasFiltListTNumsFromTable ( HasFiltTab );
1997 InitSetFiltListTNumsFromTable ( SetFiltTab );
1998 InitResetFiltListTNumsFromTable( ResetFiltTab );
1999
2000 /* Install the saving function */
2001 SaveObjFuncs[ T_CHAR ] = SaveChar;
2002 LoadObjFuncs[ T_CHAR ] = LoadChar;
2003
2004 /* install the character functions */
2005 PrintObjFuncs[ T_CHAR ] = PrintChar;
2006 EqFuncs[ T_CHAR ][ T_CHAR ] = EqChar;
2007 LtFuncs[ T_CHAR ][ T_CHAR ] = LtChar;
2008
2009 /* install the saving method */
2010 for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1 += 2 ) {
2011 SaveObjFuncs[ t1 ] = SaveString;
2012 SaveObjFuncs[ t1 +IMMUTABLE ] = SaveString;
2013 LoadObjFuncs[ t1 ] = LoadString;
2014 LoadObjFuncs[ t1 +IMMUTABLE ] = LoadString;
2015 }
2016
2017 #if !defined(USE_THREADSAFE_COPYING)
2018 /* install the copy method */
2019 for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1 += 2 ) {
2020 CopyObjFuncs [ t1 ] = CopyString;
2021 CopyObjFuncs [ t1 +IMMUTABLE ] = CopyString;
2022 CleanObjFuncs[ t1 ] = 0;
2023 CleanObjFuncs[ t1 +IMMUTABLE ] = 0;
2024 }
2025 #endif
2026
2027 /* install the print method */
2028 for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1 += 2 ) {
2029 PrintObjFuncs[ t1 ] = PrintString;
2030 PrintObjFuncs[ t1 +IMMUTABLE ] = PrintString;
2031 }
2032
2033 /* install the comparison methods */
2034 for ( t1 = T_STRING; t1 <= T_STRING_SSORT+IMMUTABLE; t1++ ) {
2035 for ( t2 = T_STRING; t2 <= T_STRING_SSORT+IMMUTABLE; t2++ ) {
2036 EqFuncs[ t1 ][ t2 ] = EqString;
2037 LtFuncs[ t1 ][ t2 ] = LtString;
2038 }
2039 }
2040
2041 /* install the list methods */
2042 for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1 += 2 ) {
2043 LenListFuncs [ t1 ] = LenString;
2044 LenListFuncs [ t1 +IMMUTABLE ] = LenString;
2045 IsbListFuncs [ t1 ] = IsbString;
2046 IsbListFuncs [ t1 +IMMUTABLE ] = IsbString;
2047 Elm0ListFuncs [ t1 ] = Elm0String;
2048 Elm0ListFuncs [ t1 +IMMUTABLE ] = Elm0String;
2049 Elm0vListFuncs [ t1 ] = Elm0vString;
2050 Elm0vListFuncs [ t1 +IMMUTABLE ] = Elm0vString;
2051 ElmListFuncs [ t1 ] = ElmString;
2052 ElmListFuncs [ t1 +IMMUTABLE ] = ElmString;
2053 ElmvListFuncs [ t1 ] = ElmvString;
2054 ElmvListFuncs [ t1 +IMMUTABLE ] = ElmvString;
2055 ElmwListFuncs [ t1 ] = ElmwString;
2056 ElmwListFuncs [ t1 +IMMUTABLE ] = ElmwString;
2057 ElmsListFuncs [ t1 ] = ElmsString;
2058 ElmsListFuncs [ t1 +IMMUTABLE ] = ElmsString;
2059 UnbListFuncs [ t1 ] = UnbString;
2060 AssListFuncs [ t1 ] = AssString;
2061 AsssListFuncs [ t1 ] = AsssString;
2062 IsDenseListFuncs[ t1 ] = AlwaysYes;
2063 IsDenseListFuncs[ t1 +IMMUTABLE ] = AlwaysYes;
2064 IsHomogListFuncs[ t1 ] = AlwaysYes;
2065 IsHomogListFuncs[ t1 +IMMUTABLE ] = AlwaysYes;
2066 IsTableListFuncs[ t1 ] = AlwaysNo;
2067 IsTableListFuncs[ t1 +IMMUTABLE ] = AlwaysNo;
2068 IsSSortListFuncs[ t1 ] = IsSSortString;
2069 IsSSortListFuncs[ t1 +IMMUTABLE ] = IsSSortString;
2070 IsPossListFuncs [ t1 ] = IsPossString;
2071 IsPossListFuncs [ t1 +IMMUTABLE ] = IsPossString;
2072 PosListFuncs [ t1 ] = PosString;
2073 PosListFuncs [ t1 +IMMUTABLE ] = PosString;
2074 PlainListFuncs [ t1 ] = PlainString;
2075 PlainListFuncs [ t1 +IMMUTABLE ] = PlainString;
2076 }
2077 IsSSortListFuncs[ T_STRING_NSORT ] = AlwaysNo;
2078 IsSSortListFuncs[ T_STRING_NSORT +IMMUTABLE ] = AlwaysNo;
2079 IsSSortListFuncs[ T_STRING_SSORT ] = AlwaysYes;
2080 IsSSortListFuncs[ T_STRING_SSORT +IMMUTABLE ] = AlwaysYes;
2081
2082
2083 /* install the `IsString' functions */
2084 for ( t1 = FIRST_REAL_TNUM; t1 <= LAST_REAL_TNUM; t1++ ) {
2085 assert(IsStringFuncs[ t1 ] == 0);
2086 IsStringFuncs[ t1 ] = AlwaysNo;
2087 }
2088
2089 IsStringFuncs[ T_PLIST ] = IsStringList;
2090 IsStringFuncs[ T_PLIST +IMMUTABLE ] = IsStringList;
2091 IsStringFuncs[ T_PLIST_DENSE ] = IsStringList;
2092 IsStringFuncs[ T_PLIST_DENSE+IMMUTABLE ] = IsStringList;
2093 IsStringFuncs[ T_PLIST_EMPTY ] = AlwaysYes;
2094 IsStringFuncs[ T_PLIST_EMPTY+IMMUTABLE ] = AlwaysYes;
2095
2096 for ( t1 = T_PLIST_HOM; t1 <= T_PLIST_HOM_SSORT; t1 += 2 ) {
2097 IsStringFuncs[ t1 ] = IsStringListHom;
2098 IsStringFuncs[ t1 +IMMUTABLE ] = IsStringListHom;
2099 }
2100
2101 for ( t1 = T_STRING; t1 <= T_STRING_SSORT; t1++ ) {
2102 IsStringFuncs[ t1 ] = AlwaysYes;
2103 }
2104
2105 for ( t1 = FIRST_EXTERNAL_TNUM; t1 <= LAST_EXTERNAL_TNUM; t1++ ) {
2106 IsStringFuncs[ t1 ] = IsStringObject;
2107 }
2108
2109 MakeImmutableObjFuncs[ T_STRING ] = MakeImmutableNoRecurse;
2110 MakeImmutableObjFuncs[ T_STRING_SSORT ] = MakeImmutableNoRecurse;
2111 MakeImmutableObjFuncs[ T_STRING_NSORT ] = MakeImmutableNoRecurse;
2112
2113 /* return success */
2114 return 0;
2115 }
2116
2117
2118 /****************************************************************************
2119 **
2120 *F InitLibrary( <module> ) . . . . . . . initialise library data structures
2121 */
InitLibrary(StructInitInfo * module)2122 static Int InitLibrary (
2123 StructInitInfo * module )
2124 {
2125 Int i;
2126
2127
2128 /* make all the character constants once and for all */
2129 for ( i = 0; i < 256; i++ ) {
2130 ObjsChar[i] = NewBag( T_CHAR, 1L );
2131 SET_CHAR_VALUE(ObjsChar[i], (UChar)i);
2132 }
2133
2134 /* init filters and functions */
2135 InitGVarFiltsFromTable( GVarFilts );
2136 InitGVarFuncsFromTable( GVarFuncs );
2137
2138 /* return success */
2139 return 0;
2140 }
2141
2142
2143 /****************************************************************************
2144 **
2145 *F InitInfoString() . . . . . . . . . . . . . . . . table of init functions
2146 */
2147 static StructInitInfo module = {
2148 // init struct using C99 designated initializers; for a full list of
2149 // fields, please refer to the definition of StructInitInfo
2150 .type = MODULE_BUILTIN,
2151 .name = "string",
2152 .initKernel = InitKernel,
2153 .initLibrary = InitLibrary,
2154 };
2155
InitInfoString(void)2156 StructInitInfo * InitInfoString ( void )
2157 {
2158 return &module;
2159 }
2160