1 /*
2  * tclXstring.c --
3  *
4  *      Extended TCL string and character manipulation commands.
5  *-----------------------------------------------------------------------------
6  * Copyright 1991-1999 Karl Lehenbauer and Mark Diekhans.
7  *
8  * Permission to use, copy, modify, and distribute this software and its
9  * documentation for any purpose and without fee is hereby granted, provided
10  * that the above copyright notice appear in all copies.  Karl Lehenbauer and
11  * Mark Diekhans make no representations about the suitability of this
12  * software for any purpose.  It is provided "as is" without express or
13  * implied warranty.
14  *-----------------------------------------------------------------------------
15  * $Id: tclXstring.c,v 1.4 2005/11/21 18:38:51 hobbs Exp $
16  *-----------------------------------------------------------------------------
17  */
18 
19 /*FIX: Add creplace to overwrite characters in a string. */
20 
21 #include "tclExtdInt.h"
22 
23 
24 /*
25  * Prototypes of internal functions.
26  */
27 static int
28 CheckForUniCode _ANSI_ARGS_((Tcl_Interp *interp,
29                              char *str,
30                              int strLen,
31                              char *which));
32 
33 static unsigned int
34 ExpandString _ANSI_ARGS_((unsigned char *inStr,
35                           int            inLength,
36                           unsigned char  outStr [],
37                           int           *outLengthPtr));
38 
39 static int
40 TclX_CindexObjCmd _ANSI_ARGS_((ClientData clientData,
41                                Tcl_Interp *interp,
42                                int         objc,
43                                Tcl_Obj   *CONST objv[]));
44 
45 static int
46 TclX_ClengthObjCmd _ANSI_ARGS_((ClientData clientData,
47                                 Tcl_Interp *interp,
48                                 int         objc,
49                                 Tcl_Obj   *CONST objv[]));
50 
51 static int
52 TclX_CconcatObjCmd _ANSI_ARGS_((ClientData clientData,
53                                 Tcl_Interp *interp,
54                                 int         objc,
55                                 Tcl_Obj   *CONST objv[]));
56 
57 static int
58 TclX_CrangeObjCmd _ANSI_ARGS_((ClientData clientData,
59                                Tcl_Interp *interp,
60                                int         objc,
61                                Tcl_Obj   *CONST objv[]));
62 
63 static int
64 TclX_CcollateObjCmd _ANSI_ARGS_((ClientData clientData,
65                                  Tcl_Interp *interp,
66                                  int         objc,
67                                  Tcl_Obj   *CONST objv[]));
68 
69 static int
70 TclX_ReplicateObjCmd _ANSI_ARGS_((ClientData clientData,
71                                   Tcl_Interp *interp,
72                                   int         objc,
73                                   Tcl_Obj   *CONST objv[]));
74 
75 static int
76 TclX_TranslitObjCmd _ANSI_ARGS_((ClientData clientData,
77                                  Tcl_Interp *interp,
78                                  int         objc,
79                                  Tcl_Obj   *CONST objv[]));
80 
81 static int
82 TclX_CtypeObjCmd _ANSI_ARGS_((ClientData clientData,
83                               Tcl_Interp *interp,
84                               int         objc,
85                               Tcl_Obj   *CONST objv[]));
86 
87 static int
88 TclX_CtokenObjCmd _ANSI_ARGS_((ClientData clientData,
89                                Tcl_Interp *interp,
90                                int         objc,
91                                Tcl_Obj   *CONST objv[]));
92 
93 static int
94 TclX_CequalObjCmd _ANSI_ARGS_((ClientData clientData,
95                                Tcl_Interp *interp,
96                                int         objc,
97                                Tcl_Obj   *CONST objv[]));
98 
99 
100 /*-----------------------------------------------------------------------------
101  * TclX_CindexObjCmd --
102  *     Implements the cindex Tcl command:
103  *         cindex string indexExpr
104  *
105  * Results:
106  *      Returns the character indexed by  index  (zero  based)  from string.
107  *-----------------------------------------------------------------------------
108  */
109 static int
TclX_CindexObjCmd(dummy,interp,objc,objv)110 TclX_CindexObjCmd (dummy, interp, objc, objv)
111     ClientData   dummy;
112     Tcl_Interp  *interp;
113     int          objc;
114     Tcl_Obj    *CONST objv[];
115 {
116     int strLen, utfLen, idx, numBytes;
117     char *str, buf [TCL_UTF_MAX];
118 
119     if (objc != 3)
120         return TclX_WrongArgs (interp, objv[0], "string indexExpr");
121 
122     str = Tcl_GetStringFromObj (objv[1], &strLen);
123     utfLen = Tcl_NumUtfChars(str, strLen);
124 
125     if (TclX_RelativeExpr (interp, objv [2], utfLen, &idx) != TCL_OK) {
126         return TCL_ERROR;
127     }
128 
129     if ((idx < 0) || (idx >= utfLen))
130         return TCL_OK;
131 
132     numBytes = Tcl_UniCharToUtf(Tcl_UniCharAtIndex(str, idx), buf);
133     Tcl_SetStringObj (Tcl_GetObjResult (interp), buf, numBytes);
134     return TCL_OK;
135 }
136 
137 
138 /*-----------------------------------------------------------------------------
139  * TclX_ClengthObjCmd --
140  *     Implements the clength Tcl command:
141  *         clength string
142  *
143  * Results:
144  *      Returns the length of string in characters.
145  *-----------------------------------------------------------------------------
146  */
147 static int
TclX_ClengthObjCmd(dummy,interp,objc,objv)148 TclX_ClengthObjCmd (dummy, interp, objc, objv)
149     ClientData   dummy;
150     Tcl_Interp  *interp;
151     int          objc;
152     Tcl_Obj    *CONST objv[];
153 {
154     char *str;
155     int strLen;
156 
157     if (objc != 2)
158         return TclX_WrongArgs (interp, objv[0], "string");
159 
160     str = Tcl_GetStringFromObj (objv[1], &strLen);
161     Tcl_SetIntObj (Tcl_GetObjResult (interp), Tcl_NumUtfChars(str, strLen));
162     return TCL_OK;
163 }
164 
165 
166 /*-----------------------------------------------------------------------------
167  * TclX_CconcatObjCmd --
168  *     Implements the cconcat TclX command:
169  *         cconcat ?string? ?string? ?...?
170  *
171  * Results:
172  *      The arguments concatenated.
173  *-----------------------------------------------------------------------------
174  */
175 static int
TclX_CconcatObjCmd(dummy,interp,objc,objv)176 TclX_CconcatObjCmd (dummy, interp, objc, objv)
177     ClientData   dummy;
178     Tcl_Interp  *interp;
179     int          objc;
180     Tcl_Obj    *CONST objv[];
181 {
182     Tcl_Obj *resultPtr = Tcl_GetObjResult(interp);
183     int idx, strLen;
184     char *str;
185 
186     for (idx = 1; idx < objc; idx++) {
187 	str = Tcl_GetStringFromObj(objv[idx], &strLen);
188 	Tcl_AppendToObj(resultPtr, str, strLen);
189     }
190     return TCL_OK;
191 }
192 
193 /*-----------------------------------------------------------------------------
194  * TclX_CrangeObjCmd --
195  *     Implements the crange and csubstr Tcl commands:
196  *         crange string firstExpr lastExpr
197  *         csubstr string firstExpr lengthExpr
198  *
199  * Results:
200  *      Standard Tcl result.
201  * Notes:
202  *   If clientData is TRUE its the range command, if its FALSE its csubstr.
203  *-----------------------------------------------------------------------------
204  */
205 static int
TclX_CrangeObjCmd(clientData,interp,objc,objv)206 TclX_CrangeObjCmd (clientData, interp, objc, objv)
207     ClientData   clientData;
208     Tcl_Interp  *interp;
209     int          objc;
210     Tcl_Obj    *CONST objv[];
211 {
212     int strLen, utfLen, first, subLen;
213     size_t isRange = (size_t) clientData;
214     char *str;
215     CONST84 char *start, *end;
216 
217     if (objc != 4) {
218         if (isRange)
219             return TclX_WrongArgs (interp, objv[0],
220                                    "string firstExpr lastExpr");
221         else
222             return TclX_WrongArgs (interp, objv[0],
223                                    "string firstExpr lengthExpr");
224     }
225 
226     str = Tcl_GetStringFromObj (objv [1], &strLen);
227     utfLen = Tcl_NumUtfChars(str, strLen);
228 
229     if (TclX_RelativeExpr (interp, objv [2], utfLen, &first) != TCL_OK) {
230         return TCL_ERROR;
231     }
232 
233     if ((first < 0) || (first >= utfLen))
234         return TCL_OK;
235 
236     if (TclX_RelativeExpr (interp, objv [3], utfLen, &subLen) != TCL_OK) {
237         return TCL_ERROR;
238     }
239 
240     if (isRange) {
241         if (subLen < first)
242             return TCL_OK;
243         subLen = subLen - first +1;
244     }
245 
246     if (first + subLen > utfLen)
247         subLen = utfLen - first;
248 
249     start = Tcl_UtfAtIndex(str, first);
250     end = Tcl_UtfAtIndex(start, subLen);
251     Tcl_SetStringObj(Tcl_GetObjResult(interp), start, end - start);
252     return TCL_OK;
253 }
254 
255 
256 /*-----------------------------------------------------------------------------
257  * TclX_CcollateObjCmd --
258  *     Implements ccollate Tcl commands:
259  *         ccollate [-local] string1 string2
260  *
261  * Results:
262  *      Standard Tcl result.
263  *-----------------------------------------------------------------------------
264  */
265 static int
TclX_CcollateObjCmd(dummy,interp,objc,objv)266 TclX_CcollateObjCmd (dummy, interp, objc, objv)
267     ClientData   dummy;
268     Tcl_Interp  *interp;
269     int          objc;
270     Tcl_Obj    *CONST objv[];
271 {
272     int argIndex, result, local = FALSE;
273     char *optionString;
274     char *string1;
275     int string1Len;
276     char *string2;
277     int string2Len;
278 
279     /*FIX: Not utf clean (FIXUTF), can it ever be... */
280     if ((objc < 3) || (objc > 4))
281         return TclX_WrongArgs (interp, objv[0], "?options? string1 string2");
282 
283     if (objc == 4) {
284         optionString = Tcl_GetStringFromObj (objv [1], NULL);
285         if (!STREQU (optionString, "-local")) {
286             TclX_AppendObjResult (interp, "Invalid option \"", optionString,
287                                   "\", expected \"-local\"", (char *) NULL);
288             return TCL_ERROR;
289         }
290         local = TRUE;
291     }
292     argIndex = objc - 2;
293 
294     string1 = Tcl_GetStringFromObj (objv [argIndex], &string1Len);
295     string2 = Tcl_GetStringFromObj (objv [argIndex + 1], &string2Len);
296     if ((strlen (string1) != (size_t) string1Len) ||
297 	(strlen (string1) != (size_t) string1Len)) {
298         TclX_AppendObjResult (interp, "The " ,
299                               Tcl_GetStringFromObj (objv [0], NULL),
300                               " command does not support binary data",
301                               (char *) NULL);
302         return TCL_ERROR;
303     }
304     if (local) {
305 #ifndef NO_STRCOLL
306         result = strcoll (string1, string2);
307 #else
308         result = strcmp (string1, string2);
309 #endif
310     } else {
311         result = strcmp (string1, string2);
312     }
313     Tcl_SetIntObj (Tcl_GetObjResult (interp),
314                    ((result == 0) ? 0 : ((result < 0) ? -1 : 1)));
315     return TCL_OK;
316 }
317 
318 /*-----------------------------------------------------------------------------
319  * TclX_ReplicateObjCmd --
320  *     Implements the replicate Tcl command:
321  *         replicate string countExpr
322  *
323  * Results:
324  *      Returns string replicated count times.
325  *-----------------------------------------------------------------------------
326  */
327 static int
TclX_ReplicateObjCmd(dummy,interp,objc,objv)328 TclX_ReplicateObjCmd (dummy, interp, objc, objv)
329     ClientData   dummy;
330     Tcl_Interp  *interp;
331     int          objc;
332     Tcl_Obj     *CONST objv[];
333 {
334     Tcl_Obj     *resultPtr = Tcl_GetObjResult (interp);
335     long         count;
336     long         repCount;
337     char        *stringPtr;
338     int          stringLength;
339 
340     if (objc != 3)
341         return TclX_WrongArgs (interp, objv[0], "string countExpr");
342 
343     if (Tcl_GetLongFromObj (interp, objv [2], &repCount) != TCL_OK)
344         return TCL_ERROR;
345 
346     stringPtr = Tcl_GetStringFromObj (objv [1], &stringLength);
347     for (count = 0; count < repCount; count++) {
348         Tcl_AppendToObj (resultPtr, stringPtr, stringLength);
349     }
350     return TCL_OK;
351 }
352 
353 /*-----------------------------------------------------------------------------
354  * TclX_CtokenObjCmd --
355  *     Implements the clength Tcl command:
356  *         ctoken strvar separators
357  *
358  * Results:
359  *      Returns the first token and removes it from the string variable.
360  * FIX: Add command to make a list.  Better yet, a new cparse command thats
361  * more flexable and includes this functionality.
362  *-----------------------------------------------------------------------------
363  */
364 static int
TclX_CtokenObjCmd(dummy,interp,objc,objv)365 TclX_CtokenObjCmd (dummy, interp, objc, objv)
366     ClientData   dummy;
367     Tcl_Interp  *interp;
368     int          objc;
369     Tcl_Obj    *CONST objv[];
370 {
371     Tcl_Obj* stringVarObj;
372     char* string;
373     int strByteLen;
374     int strByteIdx;
375     char* separators;
376     int separatorsLen;
377     int tokenByteIdx;
378     int tokenByteLen;
379     Tcl_DString token;
380     Tcl_UniChar uniChar;
381     int utfBytes;
382     Tcl_Obj *newVarValueObj;
383 
384     if (objc != 3) {
385         return TclX_WrongArgs(interp, objv[0], "strvar separators");
386     }
387 
388     stringVarObj = Tcl_ObjGetVar2(interp, objv[1], NULL,
389                                   TCL_LEAVE_ERR_MSG|TCL_PARSE_PART1);
390     if (stringVarObj == NULL) {
391         return TCL_ERROR;
392     }
393     string = Tcl_GetStringFromObj(stringVarObj, &strByteLen);
394     separators = Tcl_GetStringFromObj(objv[2], &separatorsLen);
395 
396     /* Find the start of the token */
397     strByteIdx = 0;
398     while (strByteIdx < strByteLen) {
399         utfBytes = Tcl_UtfToUniChar(string+strByteIdx, &uniChar);
400         if (Tcl_UtfFindFirst(separators, uniChar) == NULL) {
401             break;  /* Reached a separator */
402         }
403         strByteIdx += utfBytes;
404     }
405     tokenByteIdx = strByteIdx;
406 
407     /* Find end of the token */
408     while (strByteIdx < strByteLen) {
409         utfBytes = Tcl_UtfToUniChar(string+strByteIdx, &uniChar);
410         if (Tcl_UtfFindFirst(separators, uniChar) != NULL) {
411             break;  /* Reached a separator */
412         }
413         strByteIdx += utfBytes;
414     }
415     tokenByteLen = strByteIdx-tokenByteIdx;
416 
417     /* Copy token, before replacing variable, as its coming from old var */
418     Tcl_DStringInit(&token);
419     Tcl_DStringAppend(&token, string+tokenByteIdx, tokenByteLen);
420 
421     /* Set variable argument to new string. */
422     newVarValueObj = Tcl_NewStringObj(string+strByteIdx,
423                                       strByteLen-strByteIdx);
424     if (Tcl_SetVar2Ex(interp, Tcl_GetStringFromObj(objv[1], NULL), NULL,
425                       newVarValueObj,
426                       TCL_LEAVE_ERR_MSG|TCL_PARSE_PART1) == NULL) {
427         Tcl_DStringFree (&token);
428         Tcl_DecrRefCount (newVarValueObj);
429         return TCL_ERROR;
430     }
431 
432     Tcl_DStringResult(interp, &token);
433     return TCL_OK;
434 }
435 
436 /*-----------------------------------------------------------------------------
437  * TclX_CequalObjCmd --
438  *     Implements the cexpand Tcl command:
439  *         cequal string1 string2
440  *
441  * Results:
442  *   "0" or "1".
443  *-----------------------------------------------------------------------------
444  */
445 static int
TclX_CequalObjCmd(dummy,interp,objc,objv)446 TclX_CequalObjCmd (dummy, interp, objc, objv)
447     ClientData   dummy;
448     Tcl_Interp  *interp;
449     int          objc;
450     Tcl_Obj    *CONST objv[];
451 {
452     char *string1Ptr;
453     int string1Len;
454     char *string2Ptr;
455     int string2Len;
456 
457     if (objc != 3)
458         return TclX_WrongArgs (interp, objv[0], "string1 string2");
459 
460     string1Ptr = Tcl_GetStringFromObj (objv[1], &string1Len);
461     string2Ptr = Tcl_GetStringFromObj (objv[2], &string2Len);
462 
463     Tcl_SetBooleanObj (Tcl_GetObjResult (interp),
464                        ((string1Len == string2Len) &&
465                         (*string1Ptr == *string2Ptr) &&
466                         (memcmp (string1Ptr, string2Ptr, string1Len) == 0)));
467     return TCL_OK;
468 }
469 
470 /*-----------------------------------------------------------------------------
471  * Check for non-ascii characters in a translit string until we actually
472  * make it work for UniCode.
473  *-----------------------------------------------------------------------------
474  */
CheckForUniCode(interp,str,strLen,which)475 static int CheckForUniCode(interp, str, strLen, which)
476     Tcl_Interp  *interp;
477     char *str;
478     int strLen;
479     char *which;
480 {
481     int idx, nbytes;
482     Tcl_UniChar uc;
483 
484     for (idx = 0; idx < strLen; idx++) {
485         nbytes = Tcl_UtfToUniChar(&str[idx], &uc);
486         if (nbytes != 1) {
487             Tcl_AppendResult(interp, "Unicode character found in ", which,
488                              ", the translit command does not yet support Unicode",
489                              (char*)NULL);
490             return TCL_ERROR;
491         }
492     }
493     return TCL_OK;
494 }
495 
496 
497 
498 /*-----------------------------------------------------------------------------
499  * ExpandString --
500  *  Build an expand version of a translit range specification.
501  *
502  * Results:
503  *  The number of characters in the expansion buffer or < 0 if the maximum
504  * expansion has been exceeded.
505  *-----------------------------------------------------------------------------
506  */
507 #define MAX_EXPANSION 255
508 
509 static unsigned int
ExpandString(inStr,inLength,outStr,outLengthPtr)510 ExpandString(inStr, inLength, outStr, outLengthPtr)
511     unsigned char *inStr;
512     int            inLength;
513     unsigned char  outStr [];
514     int           *outLengthPtr;
515 {
516     int i, j;
517     unsigned char *s = inStr;
518     unsigned char *inStrLimit = inStr + inLength;
519 
520     i = 0;
521     while((s < inStrLimit) && (i < MAX_EXPANSION)) {
522         if ((s [1] == '-') && (s [2] > s [0])) {
523             for (j = s [0]; j <= s [2]; j++) {
524                 outStr [i++] = j;
525             }
526             s += 3;
527         } else {
528             outStr [i++] = *s++;
529         }
530     }
531     *outLengthPtr = i;
532     return (i < MAX_EXPANSION);
533 }
534 
535 /*-----------------------------------------------------------------------------
536  * TclX_TranslitObjCmd --
537  *     Implements the Tcl translit command:
538  *     translit inrange outrange string
539  *
540  * Results:
541  *  Standard Tcl results.
542  * FIXME:  Does not currently support non-ascii characters.
543  *-----------------------------------------------------------------------------
544  */
545 static int
TclX_TranslitObjCmd(dummy,interp,objc,objv)546 TclX_TranslitObjCmd (dummy, interp, objc, objv)
547     ClientData   dummy;
548     Tcl_Interp  *interp;
549     int          objc;
550     Tcl_Obj     *CONST objv[];
551 {
552     unsigned char from [MAX_EXPANSION+1];
553     int           fromLen;
554     unsigned char to   [MAX_EXPANSION+1];
555     int           toLen;
556     short         map [MAX_EXPANSION+1];
557     unsigned char *s;
558     char          *fromString;
559     int            fromStringLen;
560     char          *toString;
561     int            toStringLen;
562     Tcl_Obj       *transStringObj;
563     char          *transString;
564     int            transStringLen;
565     int            idx;
566     int            stringIndex;
567 
568     /*FIX: Not UTF-safe.(FIXUTF) */
569 
570     if (objc != 4)
571         return TclX_WrongArgs (interp, objv[0], "from to string");
572 
573     /*
574      * Expand ranges into descrete values.
575      */
576     fromString = Tcl_GetStringFromObj (objv[1], &fromStringLen);
577     if (CheckForUniCode(interp, fromString, fromStringLen,
578                         "in-range") != TCL_OK) {
579         return TCL_ERROR;
580     }
581     if (!ExpandString ((unsigned char *) fromString, fromStringLen,
582                        from, &fromLen)) {
583         TclX_AppendObjResult (interp, "inrange expansion too long",
584                               (char *) NULL);
585         return TCL_ERROR;
586     }
587 
588     toString = Tcl_GetStringFromObj (objv [2], &toStringLen);
589     if (CheckForUniCode(interp, toString, toStringLen,
590                         "out-range") != TCL_OK) {
591         return TCL_ERROR;
592     }
593     if (!ExpandString ((unsigned char *) toString, toStringLen,
594                        to, &toLen)) {
595         TclX_AppendObjResult (interp, "outrange expansion too long",
596                               (char *) NULL);
597         return TCL_ERROR;
598     }
599 
600     if (fromLen > toLen) {
601         TclX_AppendObjResult (interp, "inrange longer than outrange",
602                               (char *) NULL);
603         return TCL_ERROR;
604     }
605 
606     /*
607      * Build map.  Entries of -1 discard the char.  All other values are
608      * positive (hence its a short).
609      */
610     for (idx = 0; idx <= MAX_EXPANSION; idx++) {
611         map [idx] = idx;
612     }
613     for (idx = 0; (idx < toLen) && (idx < fromLen); idx++) {
614         map [from [idx]] = to [idx];
615     }
616     for (; idx < fromLen; idx++)
617         map [from [idx]] = -1;
618 
619     /*
620      * Get a string object to transform.
621      */
622     transString = Tcl_GetStringFromObj (objv[3], &transStringLen);
623     if (CheckForUniCode(interp, transString, transStringLen,
624                         "string to translate") != TCL_OK) {
625         return TCL_ERROR;
626     }
627 
628 
629     transStringObj = Tcl_NewStringObj (transString, transStringLen);
630     transString = Tcl_GetStringFromObj (transStringObj, &transStringLen);
631 
632     for (s = (unsigned char *) transString, stringIndex = 0;
633          stringIndex < transStringLen; stringIndex++) {
634         if (map [*s] >= 0) {
635             *s = (unsigned char) map [*s];
636             s++;
637         }
638     }
639 
640     Tcl_SetObjResult (interp, transStringObj);
641     return TCL_OK;
642 }
643 
644 /*-----------------------------------------------------------------------------
645  * TclX_CtypeObjCmd --
646  *
647  *      This function implements the 'ctype' command:
648  *      ctype ?-failindex? class string ?failIndexVar?
649  *
650  *      Where class is one of the following:
651  *        digit, xdigit, lower, upper, alpha, alnum,
652  *        space, cntrl,  punct, print, graph, ascii, char or ord.
653  *
654  * Results:
655  *       One or zero: Depending if all the characters in the string are of
656  *       the desired class.  Char and ord provide conversions and return the
657  *       converted value.
658  * FIX: Add check for legal number (can be negative, hex, etc).
659  *-----------------------------------------------------------------------------
660  */
661 static int
TclX_CtypeObjCmd(dummy,interp,objc,objv)662 TclX_CtypeObjCmd (dummy, interp, objc, objv)
663     ClientData   dummy;
664     Tcl_Interp  *interp;
665     int          objc;
666     Tcl_Obj    *CONST objv[];
667 {
668     int failIndex = FALSE;
669     char *optStr, *class, *charStr;
670     int charStrLen, cnt, idx;
671     char *failVar = NULL;
672     Tcl_Obj *classObj, *stringObj;
673     int number;
674     char charBuf[TCL_UTF_MAX];
675     Tcl_UniChar uniChar;
676 
677 #define IS_8BIT_UNICHAR(c) (c <= 255)
678 
679     if (TCL_UTF_MAX > sizeof(number)) {
680         panic("TclX_CtypeObjCmd: UTF character longer than a int");
681     }
682 
683     /*FIX: Split into multiple procs */
684     /*FIX: Should use UtfNext to walk string */
685 
686     if (objc < 3) {
687         goto wrongNumArgs;
688     }
689 
690     optStr = Tcl_GetStringFromObj(objv[1], NULL);
691     if (*optStr == '-') {
692         if (STREQU(optStr, "-failindex")) {
693             failIndex = TRUE;
694         } else {
695             TclX_AppendObjResult(interp, "invalid option \"",
696                                  Tcl_GetStringFromObj (objv [1], NULL),
697                                  "\", must be -failindex", (char *) NULL);
698             return TCL_ERROR;
699         }
700     }
701     if (failIndex) {
702         if (objc != 5) {
703             goto wrongNumArgs;
704         }
705         failVar = Tcl_GetStringFromObj(objv[2], NULL);
706         classObj = objv[3];
707         stringObj = objv[4];
708     } else {
709         if (objc != 3) {
710             goto wrongNumArgs;
711         }
712         classObj = objv[1];
713         stringObj = objv[2];
714     }
715     charStr = Tcl_GetStringFromObj(stringObj, &charStrLen);
716     charStrLen = Tcl_NumUtfChars(charStr, charStrLen);
717     class = Tcl_GetStringFromObj(classObj, NULL);
718 
719     /*
720      * Handle conversion requests.
721      */
722     if (STREQU(class, "char")) {
723         if (failIndex) {
724           goto failInvalid;
725         }
726         if (Tcl_GetIntFromObj(interp, stringObj, &number) != TCL_OK) {
727             return TCL_ERROR;
728         }
729         cnt = Tcl_UniCharToUtf(number, charBuf);
730         charBuf[cnt] = '\0';
731         Tcl_SetStringObj(Tcl_GetObjResult(interp), charBuf, cnt);
732         return TCL_OK;
733     }
734 
735     if (STREQU(class, "ord")) {
736         if (failIndex) {
737           goto failInvalid;
738         }
739         Tcl_UtfToUniChar(charStr, &uniChar);
740         Tcl_SetIntObj(Tcl_GetObjResult(interp), (int)uniChar);
741         return TCL_OK;
742     }
743 
744     /*
745      * The remainder of cases scan the string, stoping when their test case
746      * fails.  The value of `index' after the loops indicating if it succeeds
747      * or fails and where it fails.
748      */
749     if (STREQU(class, "alnum")) {
750         for (idx = 0; idx < charStrLen; idx++) {
751             if (!Tcl_UniCharIsAlnum(Tcl_UniCharAtIndex(charStr, idx))) {
752                 break;
753             }
754         }
755     } else if (STREQU(class, "alpha")) {
756         for (idx = 0; idx < charStrLen; idx++) {
757             if (!Tcl_UniCharIsAlpha(Tcl_UniCharAtIndex(charStr, idx))) {
758                 break;
759             }
760         }
761     } else if (STREQU(class, "ascii")) {
762         for (idx = 0; idx < charStrLen; idx++) {
763             uniChar = Tcl_UniCharAtIndex(charStr, idx);
764             if (!IS_8BIT_UNICHAR(uniChar)
765                 || !isascii(UCHAR(uniChar))) {
766                 break;
767             }
768         }
769     } else if (STREQU(class, "cntrl")) {
770         for (idx = 0; idx < charStrLen; idx++) {
771             uniChar = Tcl_UniCharAtIndex(charStr, idx);
772             /* Only accepts ascii controls */
773             if (!IS_8BIT_UNICHAR(uniChar)
774                 || !iscntrl(UCHAR(uniChar))) {
775                 break;
776             }
777         }
778     } else if (STREQU(class, "digit")) {
779         for (idx = 0; idx < charStrLen; idx++) {
780             if (!Tcl_UniCharIsDigit(Tcl_UniCharAtIndex(charStr, idx))) {
781                 break;
782             }
783         }
784     } else if (STREQU(class, "graph")) {
785         for (idx = 0; idx < charStrLen; idx++) {
786             uniChar = Tcl_UniCharAtIndex(charStr, idx);
787             if (!IS_8BIT_UNICHAR(uniChar)) {
788                 goto notSupportedUni;
789             }
790             if (!isgraph(UCHAR(uniChar))) {
791                 break;
792             }
793         }
794     } else if (STREQU(class, "lower")) {
795         for (idx = 0; idx < charStrLen; idx++) {
796             if (!Tcl_UniCharIsLower(Tcl_UniCharAtIndex(charStr, idx))) {
797                 break;
798             }
799         }
800     } else if (STREQU(class, "print")) {
801         for (idx = 0; idx < charStrLen; idx++) {
802             uniChar = Tcl_UniCharAtIndex(charStr, idx);
803             if (!IS_8BIT_UNICHAR(uniChar)) {
804                 goto notSupportedUni;
805             }
806             if (!isprint(UCHAR(uniChar))) {
807                 break;
808             }
809         }
810     } else if (STREQU(class, "punct")) {
811         for (idx = 0; idx < charStrLen; idx++) {
812             uniChar = Tcl_UniCharAtIndex(charStr, idx);
813             if (!IS_8BIT_UNICHAR(uniChar)) {
814                 goto notSupportedUni;
815             }
816             if (!ispunct(UCHAR(uniChar))) {
817                 break;
818             }
819         }
820     } else if (STREQU(class, "space")) {
821         for (idx = 0; idx < charStrLen; idx++) {
822             if (!Tcl_UniCharIsSpace(Tcl_UniCharAtIndex(charStr, idx))) {
823                 break;
824             }
825         }
826     } else if (STREQU(class, "upper")) {
827         for (idx = 0; idx < charStrLen; idx++) {
828             if (!Tcl_UniCharIsUpper(Tcl_UniCharAtIndex(charStr, idx))) {
829                 break;
830             }
831         }
832     } else if (STREQU(class, "xdigit")) {
833         for (idx = 0; idx < charStrLen; idx++) {
834             uniChar = Tcl_UniCharAtIndex(charStr, idx);
835             if (!IS_8BIT_UNICHAR(uniChar)) {
836                 goto notSupportedUni;
837             }
838             if (!isxdigit(UCHAR(uniChar))) {
839                 break;
840             }
841         }
842     } else {
843         TclX_AppendObjResult (interp, "unrecognized class specification: \"",
844                               class,
845                               "\", expected one of: alnum, alpha, ascii, ",
846                               "char, cntrl, digit, graph, lower, ord, ",
847                               "print, punct, space, upper or xdigit",
848                               (char *) NULL);
849         return TCL_ERROR;
850     }
851 
852     /*
853      * Return true or false, depending if the end was reached.  Always return
854      * false for a null string.  Optionally return the failed index if there
855      * is no match.
856      */
857     if ((idx != 0) && (idx == charStrLen)) {
858         Tcl_SetBooleanObj (Tcl_GetObjResult (interp), TRUE);
859     } else {
860         /*
861          * If the fail index was requested, set the variable here.
862          */
863         if (failIndex) {
864             Tcl_Obj *iObj = Tcl_NewIntObj (idx);
865 
866             if (Tcl_SetVar2Ex(interp, failVar, NULL,
867                               iObj, TCL_LEAVE_ERR_MSG|TCL_PARSE_PART1) == NULL) {
868                 Tcl_DecrRefCount (iObj);
869                 return TCL_ERROR;
870             }
871         }
872         Tcl_SetBooleanObj (Tcl_GetObjResult (interp), FALSE);
873     }
874     return TCL_OK;
875 
876   wrongNumArgs:
877     return TclX_WrongArgs (interp, objv[0], "?-failindex var? class string");
878 
879   failInvalid:
880     TclX_AppendObjResult (interp, "-failindex option is invalid for class \"",
881                           class, "\"", (char *) NULL);
882     return TCL_ERROR;
883 
884  notSupportedUni:
885     TclX_AppendObjResult (interp, "unicode characters not supported for class \"",
886                           class, "\"", (char *) NULL);
887     return TCL_ERROR;
888 }
889 
890 /*-----------------------------------------------------------------------------
891  * TclX_StringInit --
892  *   Initialize the list commands in an interpreter.
893  *
894  * Parameters:
895  *   o interp - Interpreter to add commands to.
896  *-----------------------------------------------------------------------------
897  */
898 void
TclX_StringInit(interp)899 TclX_StringInit (interp)
900     Tcl_Interp *interp;
901 {
902     Tcl_CreateObjCommand (interp,
903 			  "cindex",
904                           TclX_CindexObjCmd,
905 			  (ClientData) 0,
906                           (Tcl_CmdDeleteProc*) NULL);
907 
908     Tcl_CreateObjCommand (interp,
909 			  "clength",
910                           TclX_ClengthObjCmd,
911 			  (ClientData) 0,
912                           (Tcl_CmdDeleteProc *)NULL);
913 
914     Tcl_CreateObjCommand (interp,
915 			  "cconcat",
916                           TclX_CconcatObjCmd,
917 			  (ClientData) 0,
918                           (Tcl_CmdDeleteProc *)NULL);
919 
920     Tcl_CreateObjCommand (interp,
921 			  "crange",
922                           TclX_CrangeObjCmd,
923 			  (ClientData) TRUE,
924                           (Tcl_CmdDeleteProc*) NULL);
925 
926     Tcl_CreateObjCommand (interp,
927 			  "csubstr",
928                           TclX_CrangeObjCmd,
929 			  (ClientData) FALSE,
930                           (Tcl_CmdDeleteProc*) NULL);
931 
932     Tcl_CreateObjCommand (interp,
933 			  "ccollate",
934                           TclX_CcollateObjCmd,
935 			  (ClientData) 0,
936                           (Tcl_CmdDeleteProc*) NULL);
937 
938     Tcl_CreateObjCommand (interp,
939 			  "replicate",
940                           TclX_ReplicateObjCmd,
941 			  (ClientData) 0,
942                           (Tcl_CmdDeleteProc*) NULL);
943 
944     Tcl_CreateObjCommand (interp,
945 			  "translit",
946                           TclX_TranslitObjCmd,
947 			  (ClientData) 0,
948                           (Tcl_CmdDeleteProc*) NULL);
949 
950     Tcl_CreateObjCommand (interp,
951 			  "ctype",
952                           TclX_CtypeObjCmd,
953 			  (ClientData) 0,
954                           (Tcl_CmdDeleteProc*) NULL);
955 
956     Tcl_CreateObjCommand (interp,
957 			  "ctoken",
958                           TclX_CtokenObjCmd,
959 			  (ClientData) 0,
960                           (Tcl_CmdDeleteProc*) NULL);
961 
962     Tcl_CreateObjCommand (interp,
963 			  "cequal",
964 			  TclX_CequalObjCmd,
965 			  (ClientData) 0,
966                           (Tcl_CmdDeleteProc*) NULL);
967 
968 }
969 
970 
971