1 /*
2  * tclStringObj.c --
3  *
4  *	This file contains functions that implement string operations on Tcl
5  *	objects. Some string operations work with UTF strings and others
6  *	require Unicode format. Functions that require knowledge of the width
7  *	of each character, such as indexing, operate on Unicode data.
8  *
9  *	A Unicode string is an internationalized string. Conceptually, a
10  *	Unicode string is an array of 16-bit quantities organized as a
11  *	sequence of properly formed UTF-8 characters. There is a one-to-one
12  *	map between Unicode and UTF characters. Because Unicode characters
13  *	have a fixed width, operations such as indexing operate on Unicode
14  *	data. The String object is optimized for the case where each UTF char
15  *	in a string is only one byte. In this case, we store the value of
16  *	numChars, but we don't store the Unicode data (unless Tcl_GetUnicode
17  *	is explicitly called).
18  *
19  *	The String object type stores one or both formats. The default
20  *	behavior is to store UTF. Once Unicode is calculated by a function, it
21  *	is stored in the internal rep for future access (without an additional
22  *	O(n) cost).
23  *
24  *	To allow many appends to be done to an object without constantly
25  *	reallocating the space for the string or Unicode representation, we
26  *	allocate double the space for the string or Unicode and use the
27  *	internal representation to keep track of how much space is used vs.
28  *	allocated.
29  *
30  * Copyright (c) 1995-1997 Sun Microsystems, Inc.
31  * Copyright (c) 1999 by Scriptics Corporation.
32  *
33  * See the file "license.terms" for information on usage and redistribution of
34  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
35  */
36 
37 #include "tclInt.h"
38 #include "tommath.h"
39 #include "tclStringRep.h"
40 
41 /*
42  * Set COMPAT to 1 to restore the shimmering patterns to those of Tcl 8.5.
43  * This is an escape hatch in case the changes have some unexpected unwelcome
44  * impact on performance. If things go well, this mechanism can go away when
45  * post-8.6 development begins.
46  */
47 
48 #define COMPAT 0
49 
50 /*
51  * Prototypes for functions defined later in this file:
52  */
53 
54 static void		AppendPrintfToObjVA(Tcl_Obj *objPtr,
55 			    const char *format, va_list argList);
56 static void		AppendUnicodeToUnicodeRep(Tcl_Obj *objPtr,
57 			    const Tcl_UniChar *unicode, int appendNumChars);
58 static void		AppendUnicodeToUtfRep(Tcl_Obj *objPtr,
59 			    const Tcl_UniChar *unicode, int numChars);
60 static void		AppendUtfToUnicodeRep(Tcl_Obj *objPtr,
61 			    const char *bytes, int numBytes);
62 static void		AppendUtfToUtfRep(Tcl_Obj *objPtr,
63 			    const char *bytes, int numBytes);
64 static void		DupStringInternalRep(Tcl_Obj *objPtr,
65 			    Tcl_Obj *copyPtr);
66 static int		ExtendStringRepWithUnicode(Tcl_Obj *objPtr,
67 			    const Tcl_UniChar *unicode, int numChars);
68 static void		ExtendUnicodeRepWithString(Tcl_Obj *objPtr,
69 			    const char *bytes, int numBytes,
70 			    int numAppendChars);
71 static void		FillUnicodeRep(Tcl_Obj *objPtr);
72 static void		FreeStringInternalRep(Tcl_Obj *objPtr);
73 static void		GrowStringBuffer(Tcl_Obj *objPtr, int needed, int flag);
74 static void		GrowUnicodeBuffer(Tcl_Obj *objPtr, int needed);
75 static int		SetStringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr);
76 static void		SetUnicodeObj(Tcl_Obj *objPtr,
77 			    const Tcl_UniChar *unicode, int numChars);
78 static int		UnicodeLength(const Tcl_UniChar *unicode);
79 static void		UpdateStringOfString(Tcl_Obj *objPtr);
80 
81 /*
82  * The structure below defines the string Tcl object type by means of
83  * functions that can be invoked by generic object code.
84  */
85 
86 const Tcl_ObjType tclStringType = {
87     "string",			/* name */
88     FreeStringInternalRep,	/* freeIntRepPro */
89     DupStringInternalRep,	/* dupIntRepProc */
90     UpdateStringOfString,	/* updateStringProc */
91     SetStringFromAny		/* setFromAnyProc */
92 };
93 
94 /*
95  * TCL STRING GROWTH ALGORITHM
96  *
97  * When growing strings (during an append, for example), the following growth
98  * algorithm is used:
99  *
100  *   Attempt to allocate 2 * (originalLength + appendLength)
101  *   On failure:
102  *	attempt to allocate originalLength + 2*appendLength + TCL_MIN_GROWTH
103  *
104  * This algorithm allows very good performance, as it rapidly increases the
105  * memory allocated for a given string, which minimizes the number of
106  * reallocations that must be performed. However, using only the doubling
107  * algorithm can lead to a significant waste of memory. In particular, it may
108  * fail even when there is sufficient memory available to complete the append
109  * request (but there is not 2*totalLength memory available). So when the
110  * doubling fails (because there is not enough memory available), the
111  * algorithm requests a smaller amount of memory, which is still enough to
112  * cover the request, but which hopefully will be less than the total
113  * available memory.
114  *
115  * The addition of TCL_MIN_GROWTH allows for efficient handling of very
116  * small appends. Without this extra slush factor, a sequence of several small
117  * appends would cause several memory allocations. As long as
118  * TCL_MIN_GROWTH is a reasonable size, we can avoid that behavior.
119  *
120  * The growth algorithm can be tuned by adjusting the following parameters:
121  *
122  * TCL_MIN_GROWTH		Additional space, in bytes, to allocate when
123  *				the double allocation has failed. Default is
124  *				1024 (1 kilobyte).  See tclInt.h.
125  */
126 
127 #ifndef TCL_MIN_UNICHAR_GROWTH
128 #define TCL_MIN_UNICHAR_GROWTH	TCL_MIN_GROWTH/sizeof(Tcl_UniChar)
129 #endif
130 
131 static void
GrowStringBuffer(Tcl_Obj * objPtr,int needed,int flag)132 GrowStringBuffer(
133     Tcl_Obj *objPtr,
134     int needed,
135     int flag)
136 {
137     /*
138      * Pre-conditions:
139      *	objPtr->typePtr == &tclStringType
140      *	needed > stringPtr->allocated
141      *	flag || objPtr->bytes != NULL
142      */
143 
144     String *stringPtr = GET_STRING(objPtr);
145     char *ptr = NULL;
146     int attempt;
147 
148     if (objPtr->bytes == tclEmptyStringRep) {
149 	objPtr->bytes = NULL;
150     }
151     if (flag == 0 || stringPtr->allocated > 0) {
152 	if (needed <= INT_MAX / 2) {
153 	    attempt = 2 * needed;
154 	    ptr = (char *)attemptckrealloc(objPtr->bytes, attempt + 1);
155 	}
156 	if (ptr == NULL) {
157 	    /*
158 	     * Take care computing the amount of modest growth to avoid
159 	     * overflow into invalid argument values for attempt.
160 	     */
161 
162 	    unsigned int limit = INT_MAX - needed;
163 	    unsigned int extra = needed - objPtr->length + TCL_MIN_GROWTH;
164 	    int growth = (int) ((extra > limit) ? limit : extra);
165 
166 	    attempt = needed + growth;
167 	    ptr = (char *)attemptckrealloc(objPtr->bytes, attempt + 1);
168 	}
169     }
170     if (ptr == NULL) {
171 	/*
172 	 * First allocation - just big enough; or last chance fallback.
173 	 */
174 
175 	attempt = needed;
176 	ptr = (char *)ckrealloc(objPtr->bytes, attempt + 1);
177     }
178     objPtr->bytes = ptr;
179     stringPtr->allocated = attempt;
180 }
181 
182 static void
GrowUnicodeBuffer(Tcl_Obj * objPtr,int needed)183 GrowUnicodeBuffer(
184     Tcl_Obj *objPtr,
185     int needed)
186 {
187     /*
188      * Pre-conditions:
189      *	objPtr->typePtr == &tclStringType
190      *	needed > stringPtr->maxChars
191      *	needed < STRING_MAXCHARS
192      */
193 
194     String *ptr = NULL, *stringPtr = GET_STRING(objPtr);
195     int attempt;
196 
197     if (stringPtr->maxChars > 0) {
198 	/*
199 	 * Subsequent appends - apply the growth algorithm.
200 	 */
201 
202 	if (needed <= STRING_MAXCHARS / 2) {
203 	    attempt = 2 * needed;
204 	    ptr = stringAttemptRealloc(stringPtr, attempt);
205 	}
206 	if (ptr == NULL) {
207 	    /*
208 	     * Take care computing the amount of modest growth to avoid
209 	     * overflow into invalid argument values for attempt.
210 	     */
211 
212 	    unsigned int limit = STRING_MAXCHARS - needed;
213 	    unsigned int extra = needed - stringPtr->numChars
214 		    + TCL_MIN_UNICHAR_GROWTH;
215 	    int growth = (int) ((extra > limit) ? limit : extra);
216 
217 	    attempt = needed + growth;
218 	    ptr = stringAttemptRealloc(stringPtr, attempt);
219 	}
220     }
221     if (ptr == NULL) {
222 	/*
223 	 * First allocation - just big enough; or last chance fallback.
224 	 */
225 
226 	attempt = needed;
227 	ptr = stringRealloc(stringPtr, attempt);
228     }
229     stringPtr = ptr;
230     stringPtr->maxChars = attempt;
231     SET_STRING(objPtr, stringPtr);
232 }
233 
234 /*
235  *----------------------------------------------------------------------
236  *
237  * Tcl_NewStringObj --
238  *
239  *	This function is normally called when not debugging: i.e., when
240  *	TCL_MEM_DEBUG is not defined. It creates a new string object and
241  *	initializes it from the byte pointer and length arguments.
242  *
243  *	When TCL_MEM_DEBUG is defined, this function just returns the result
244  *	of calling the debugging version Tcl_DbNewStringObj.
245  *
246  * Results:
247  *	A newly created string object is returned that has ref count zero.
248  *
249  * Side effects:
250  *	The new object's internal string representation will be set to a copy
251  *	of the length bytes starting at "bytes". If "length" is negative, use
252  *	bytes up to the first NUL byte; i.e., assume "bytes" points to a
253  *	C-style NUL-terminated string. The object's type is set to NULL. An
254  *	extra NUL is added to the end of the new object's byte array.
255  *
256  *----------------------------------------------------------------------
257  */
258 
259 #ifdef TCL_MEM_DEBUG
260 #undef Tcl_NewStringObj
261 Tcl_Obj *
Tcl_NewStringObj(const char * bytes,int length)262 Tcl_NewStringObj(
263     const char *bytes,		/* Points to the first of the length bytes
264 				 * used to initialize the new object. */
265     int length)			/* The number of bytes to copy from "bytes"
266 				 * when initializing the new object. If
267 				 * negative, use bytes up to the first NUL
268 				 * byte. */
269 {
270     return Tcl_DbNewStringObj(bytes, length, "unknown", 0);
271 }
272 #else /* if not TCL_MEM_DEBUG */
273 Tcl_Obj *
Tcl_NewStringObj(const char * bytes,int length)274 Tcl_NewStringObj(
275     const char *bytes,		/* Points to the first of the length bytes
276 				 * used to initialize the new object. */
277     int length)			/* The number of bytes to copy from "bytes"
278 				 * when initializing the new object. If
279 				 * negative, use bytes up to the first NUL
280 				 * byte. */
281 {
282     Tcl_Obj *objPtr;
283 
284     if (length < 0) {
285 	length = (bytes? strlen(bytes) : 0);
286     }
287     TclNewStringObj(objPtr, bytes, length);
288     return objPtr;
289 }
290 #endif /* TCL_MEM_DEBUG */
291 
292 /*
293  *----------------------------------------------------------------------
294  *
295  * Tcl_DbNewStringObj --
296  *
297  *	This function is normally called when debugging: i.e., when
298  *	TCL_MEM_DEBUG is defined. It creates new string objects. It is the
299  *	same as the Tcl_NewStringObj function above except that it calls
300  *	Tcl_DbCkalloc directly with the file name and line number from its
301  *	caller. This simplifies debugging since then the [memory active]
302  *	command will report the correct file name and line number when
303  *	reporting objects that haven't been freed.
304  *
305  *	When TCL_MEM_DEBUG is not defined, this function just returns the
306  *	result of calling Tcl_NewStringObj.
307  *
308  * Results:
309  *	A newly created string object is returned that has ref count zero.
310  *
311  * Side effects:
312  *	The new object's internal string representation will be set to a copy
313  *	of the length bytes starting at "bytes". If "length" is negative, use
314  *	bytes up to the first NUL byte; i.e., assume "bytes" points to a
315  *	C-style NUL-terminated string. The object's type is set to NULL. An
316  *	extra NUL is added to the end of the new object's byte array.
317  *
318  *----------------------------------------------------------------------
319  */
320 
321 #ifdef TCL_MEM_DEBUG
322 Tcl_Obj *
Tcl_DbNewStringObj(const char * bytes,int length,const char * file,int line)323 Tcl_DbNewStringObj(
324     const char *bytes,		/* Points to the first of the length bytes
325 				 * used to initialize the new object. */
326     int length,			/* The number of bytes to copy from "bytes"
327 				 * when initializing the new object. If
328 				 * negative, use bytes up to the first NUL
329 				 * byte. */
330     const char *file,		/* The name of the source file calling this
331 				 * function; used for debugging. */
332     int line)			/* Line number in the source file; used for
333 				 * debugging. */
334 {
335     Tcl_Obj *objPtr;
336 
337     if (length < 0) {
338 	length = (bytes? strlen(bytes) : 0);
339     }
340     TclDbNewObj(objPtr, file, line);
341     TclInitStringRep(objPtr, bytes, length);
342     return objPtr;
343 }
344 #else /* if not TCL_MEM_DEBUG */
345 Tcl_Obj *
Tcl_DbNewStringObj(const char * bytes,int length,const char * file,int line)346 Tcl_DbNewStringObj(
347     const char *bytes,		/* Points to the first of the length bytes
348 				 * used to initialize the new object. */
349     int length,			/* The number of bytes to copy from "bytes"
350 				 * when initializing the new object. If
351 				 * negative, use bytes up to the first NUL
352 				 * byte. */
353     const char *file,		/* The name of the source file calling this
354 				 * function; used for debugging. */
355     int line)			/* Line number in the source file; used for
356 				 * debugging. */
357 {
358     return Tcl_NewStringObj(bytes, length);
359 }
360 #endif /* TCL_MEM_DEBUG */
361 
362 /*
363  *---------------------------------------------------------------------------
364  *
365  * Tcl_NewUnicodeObj --
366  *
367  *	This function is creates a new String object and initializes it from
368  *	the given Unicode String. If the Utf String is the same size as the
369  *	Unicode string, don't duplicate the data.
370  *
371  * Results:
372  *	The newly created object is returned. This object will have no initial
373  *	string representation. The returned object has a ref count of 0.
374  *
375  * Side effects:
376  *	Memory allocated for new object and copy of Unicode argument.
377  *
378  *---------------------------------------------------------------------------
379  */
380 
381 Tcl_Obj *
Tcl_NewUnicodeObj(const Tcl_UniChar * unicode,int numChars)382 Tcl_NewUnicodeObj(
383     const Tcl_UniChar *unicode,	/* The unicode string used to initialize the
384 				 * new object. */
385     int numChars)		/* Number of characters in the unicode
386 				 * string. */
387 {
388     Tcl_Obj *objPtr;
389 
390     TclNewObj(objPtr);
391     SetUnicodeObj(objPtr, unicode, numChars);
392     return objPtr;
393 }
394 
395 /*
396  *----------------------------------------------------------------------
397  *
398  * Tcl_GetCharLength --
399  *
400  *	Get the length of the Unicode string from the Tcl object.
401  *
402  * Results:
403  *	Pointer to unicode string representing the unicode object.
404  *
405  * Side effects:
406  *	Frees old internal rep. Allocates memory for new "String" internal
407  *	rep.
408  *
409  *----------------------------------------------------------------------
410  */
411 
412 int
Tcl_GetCharLength(Tcl_Obj * objPtr)413 Tcl_GetCharLength(
414     Tcl_Obj *objPtr)		/* The String object to get the num chars
415 				 * of. */
416 {
417     String *stringPtr;
418     int numChars;
419 
420     /*
421      * Quick, no-shimmer return for short string reps.
422      */
423 
424     if ((objPtr->bytes) && (objPtr->length < 2)) {
425 	/* 0 bytes -> 0 chars; 1 byte -> 1 char */
426 	return objPtr->length;
427     }
428 
429     /*
430      * Optimize the case where we're really dealing with a bytearray object;
431      * we don't need to convert to a string to perform the get-length operation.
432      *
433      * NOTE that we do not need the bytearray to be "pure".  A ByteArray value
434      * with a string rep cannot be trusted to represent the same value as the
435      * string rep, but it *can* be trusted to have the same character length
436      * as the string rep, which is all this routine cares about.
437      */
438 
439     if (objPtr->typePtr == &tclByteArrayType) {
440 	int length;
441 
442 	(void) Tcl_GetByteArrayFromObj(objPtr, &length);
443 	return length;
444     }
445 
446     /*
447      * OK, need to work with the object as a string.
448      */
449 
450     SetStringFromAny(NULL, objPtr);
451     stringPtr = GET_STRING(objPtr);
452     numChars = stringPtr->numChars;
453 
454     /*
455      * If numChars is unknown, compute it.
456      */
457 
458     if (numChars == -1) {
459 	TclNumUtfChars(numChars, objPtr->bytes, objPtr->length);
460 	stringPtr->numChars = numChars;
461 
462 #if COMPAT
463 	if (numChars < objPtr->length) {
464 	    /*
465 	     * Since we've just computed the number of chars, and not all UTF
466 	     * chars are 1-byte long, go ahead and populate the unicode
467 	     * string.
468 	     */
469 
470 	    FillUnicodeRep(objPtr);
471 	}
472 #endif
473     }
474     return numChars;
475 }
476 
477 /*
478  *----------------------------------------------------------------------
479  *
480  * TclCheckEmptyString --
481  *
482  *	Determine whether the string value of an object is or would be the
483  *	empty string, without generating a string representation.
484  *
485  * Results:
486  *	Returns 1 if empty, 0 if not, and -1 if unknown.
487  *
488  * Side effects:
489  *	None.
490  *
491  *----------------------------------------------------------------------
492  */
493 int
TclCheckEmptyString(Tcl_Obj * objPtr)494 TclCheckEmptyString (
495     Tcl_Obj *objPtr)
496 {
497     int length = -1;
498 
499     if (objPtr->bytes == tclEmptyStringRep) {
500 	return TCL_EMPTYSTRING_YES;
501     }
502 
503     if (TclIsPureList(objPtr)) {
504 	Tcl_ListObjLength(NULL, objPtr, &length);
505 	return length == 0;
506     }
507 
508     if (TclIsPureDict(objPtr)) {
509 	Tcl_DictObjSize(NULL, objPtr, &length);
510 	return length == 0;
511     }
512 
513     if (objPtr->bytes == NULL) {
514 	return TCL_EMPTYSTRING_UNKNOWN;
515     }
516     return objPtr->length == 0;
517 }
518 
519 /*
520  *----------------------------------------------------------------------
521  *
522  * Tcl_GetUniChar/TclGetUCS4 --
523  *
524  *	Get the index'th Unicode character from the String object. If index
525  *	is out of range, the result = 0xFFFD (Tcl_GetUniChar) resp. -1 (TclGetUCS4)
526  *
527  * Results:
528  *	Returns the index'th Unicode character in the Object.
529  *
530  * Side effects:
531  *	Fills unichar with the index'th Unicode character.
532  *
533  *----------------------------------------------------------------------
534  */
535 
536 Tcl_UniChar
Tcl_GetUniChar(Tcl_Obj * objPtr,int index)537 Tcl_GetUniChar(
538     Tcl_Obj *objPtr,		/* The object to get the Unicode charater
539 				 * from. */
540     int index)			/* Get the index'th Unicode character. */
541 {
542     String *stringPtr;
543     int length;
544 
545     if (index < 0) {
546 	return 0xFFFD;
547     }
548 
549     /*
550      * Optimize the case where we're really dealing with a bytearray object
551      * we don't need to convert to a string to perform the indexing operation.
552      */
553 
554     if (TclIsPureByteArray(objPtr)) {
555 	unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
556 	if (index >= length) {
557 		return 0xFFFD;
558 	}
559 
560 	return (Tcl_UniChar) bytes[index];
561     }
562 
563     /*
564      * OK, need to work with the object as a string.
565      */
566 
567     SetStringFromAny(NULL, objPtr);
568     stringPtr = GET_STRING(objPtr);
569 
570     if (stringPtr->hasUnicode == 0) {
571 	/*
572 	 * If numChars is unknown, compute it.
573 	 */
574 
575 	if (stringPtr->numChars == -1) {
576 	    TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length);
577 	}
578 	if (stringPtr->numChars == objPtr->length) {
579 	    return (Tcl_UniChar) objPtr->bytes[index];
580 	}
581 	FillUnicodeRep(objPtr);
582 	stringPtr = GET_STRING(objPtr);
583     }
584 
585     if (index >= stringPtr->numChars) {
586 	return 0xFFFD;
587     }
588     return stringPtr->unicode[index];
589 }
590 
591 #if TCL_UTF_MAX == 4
592 int
TclGetUCS4(Tcl_Obj * objPtr,int index)593 TclGetUCS4(
594     Tcl_Obj *objPtr,		/* The object to get the Unicode charater
595 				 * from. */
596     int index)			/* Get the index'th Unicode character. */
597 {
598     String *stringPtr;
599     int ch, length;
600 
601     if (index < 0) {
602 	return -1;
603     }
604 
605     /*
606      * Optimize the case where we're really dealing with a bytearray object
607      * we don't need to convert to a string to perform the indexing operation.
608      */
609 
610     if (TclIsPureByteArray(objPtr)) {
611 	unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
612 	if (index >= length) {
613 		return -1;
614 	}
615 
616 	return (int) bytes[index];
617     }
618 
619     /*
620      * OK, need to work with the object as a string.
621      */
622 
623     SetStringFromAny(NULL, objPtr);
624     stringPtr = GET_STRING(objPtr);
625 
626     if (stringPtr->hasUnicode == 0) {
627 	/*
628 	 * If numChars is unknown, compute it.
629 	 */
630 
631 	if (stringPtr->numChars == -1) {
632 	    TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length);
633 	}
634 	if (stringPtr->numChars == objPtr->length) {
635 	    return (Tcl_UniChar) objPtr->bytes[index];
636 	}
637 	FillUnicodeRep(objPtr);
638 	stringPtr = GET_STRING(objPtr);
639     }
640 
641     if (index >= stringPtr->numChars) {
642 	return -1;
643     }
644     ch = stringPtr->unicode[index];
645 #if TCL_UTF_MAX <= 4
646     /* See: bug [11ae2be95dac9417] */
647     if ((ch & 0xF800) == 0xD800) {
648 	if (ch & 0x400) {
649 	    if ((index > 0)
650 		    && ((stringPtr->unicode[index-1] & 0xFC00) == 0xD800)) {
651 		ch = -1; /* low surrogate preceded by high surrogate */
652 	    }
653 	} else if ((++index < stringPtr->numChars)
654 		&& ((stringPtr->unicode[index] & 0xFC00) == 0xDC00)) {
655 	    /* high surrogate followed by low surrogate */
656 	    ch = (((ch & 0x3FF) << 10) |
657 			(stringPtr->unicode[index] & 0x3FF)) + 0x10000;
658 	}
659     }
660 #endif
661     return ch;
662 }
663 #endif
664 
665 /*
666  *----------------------------------------------------------------------
667  *
668  * Tcl_GetUnicode --
669  *
670  *	Get the Unicode form of the String object. If the object is not
671  *	already a String object, it will be converted to one. If the String
672  *	object does not have a Unicode rep, then one is created from the UTF
673  *	string format.
674  *
675  * Results:
676  *	Returns a pointer to the object's internal Unicode string.
677  *
678  * Side effects:
679  *	Converts the object to have the String internal rep.
680  *
681  *----------------------------------------------------------------------
682  */
683 
684 Tcl_UniChar *
Tcl_GetUnicode(Tcl_Obj * objPtr)685 Tcl_GetUnicode(
686     Tcl_Obj *objPtr)		/* The object to find the unicode string
687 				 * for. */
688 {
689     return Tcl_GetUnicodeFromObj(objPtr, NULL);
690 }
691 
692 /*
693  *----------------------------------------------------------------------
694  *
695  * Tcl_GetUnicodeFromObj --
696  *
697  *	Get the Unicode form of the String object with length. If the object
698  *	is not already a String object, it will be converted to one. If the
699  *	String object does not have a Unicode rep, then one is create from the
700  *	UTF string format.
701  *
702  * Results:
703  *	Returns a pointer to the object's internal Unicode string.
704  *
705  * Side effects:
706  *	Converts the object to have the String internal rep.
707  *
708  *----------------------------------------------------------------------
709  */
710 
711 Tcl_UniChar *
Tcl_GetUnicodeFromObj(Tcl_Obj * objPtr,int * lengthPtr)712 Tcl_GetUnicodeFromObj(
713     Tcl_Obj *objPtr,		/* The object to find the unicode string
714 				 * for. */
715     int *lengthPtr)		/* If non-NULL, the location where the string
716 				 * rep's unichar length should be stored. If
717 				 * NULL, no length is stored. */
718 {
719     String *stringPtr;
720 
721     SetStringFromAny(NULL, objPtr);
722     stringPtr = GET_STRING(objPtr);
723 
724     if (stringPtr->hasUnicode == 0) {
725 	FillUnicodeRep(objPtr);
726 	stringPtr = GET_STRING(objPtr);
727     }
728 
729     if (lengthPtr != NULL) {
730 	*lengthPtr = stringPtr->numChars;
731     }
732     return stringPtr->unicode;
733 }
734 
735 /*
736  *----------------------------------------------------------------------
737  *
738  * Tcl_GetRange --
739  *
740  *	Create a Tcl Object that contains the chars between first and last of
741  *	the object indicated by "objPtr". If the object is not already a
742  *	String object, convert it to one. The first and last indices are
743  *	assumed to be in the appropriate range.
744  *
745  * Results:
746  *	Returns a new Tcl Object of the String type.
747  *
748  * Side effects:
749  *	Changes the internal rep of "objPtr" to the String type.
750  *
751  *----------------------------------------------------------------------
752  */
753 
754 Tcl_Obj *
Tcl_GetRange(Tcl_Obj * objPtr,int first,int last)755 Tcl_GetRange(
756     Tcl_Obj *objPtr,		/* The Tcl object to find the range of. */
757     int first,			/* First index of the range. */
758     int last)			/* Last index of the range. */
759 {
760     Tcl_Obj *newObjPtr;		/* The Tcl object to find the range of. */
761     String *stringPtr;
762     int length;
763 
764     if (first < 0) {
765 	first = 0;
766     }
767 
768     /*
769      * Optimize the case where we're really dealing with a bytearray object
770      * we don't need to convert to a string to perform the substring operation.
771      */
772 
773     if (TclIsPureByteArray(objPtr)) {
774 	unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
775 
776 	if (last >= length) {
777 	    last = length - 1;
778 	}
779 	if (last < first) {
780 	    return Tcl_NewObj();
781 	}
782 	return Tcl_NewByteArrayObj(bytes + first, last - first + 1);
783     }
784 
785     /*
786      * OK, need to work with the object as a string.
787      */
788 
789     SetStringFromAny(NULL, objPtr);
790     stringPtr = GET_STRING(objPtr);
791 
792     if (stringPtr->hasUnicode == 0) {
793 	/*
794 	 * If numChars is unknown, compute it.
795 	 */
796 
797 	if (stringPtr->numChars == -1) {
798 	    TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length);
799 	}
800 	if (stringPtr->numChars == objPtr->length) {
801 	    if (last >= stringPtr->numChars) {
802 		last = stringPtr->numChars - 1;
803 	    }
804 	    if (last < first) {
805 		return Tcl_NewObj();
806 	    }
807 	    newObjPtr = Tcl_NewStringObj(objPtr->bytes + first, last-first+1);
808 
809 	    /*
810 	     * Since we know the char length of the result, store it.
811 	     */
812 
813 	    SetStringFromAny(NULL, newObjPtr);
814 	    stringPtr = GET_STRING(newObjPtr);
815 	    stringPtr->numChars = newObjPtr->length;
816 	    return newObjPtr;
817 	}
818 	FillUnicodeRep(objPtr);
819 	stringPtr = GET_STRING(objPtr);
820     }
821     if (last > stringPtr->numChars) {
822 	last = stringPtr->numChars;
823     }
824     if (last < first) {
825 	return Tcl_NewObj();
826     }
827 #if TCL_UTF_MAX == 4
828     /* See: bug [11ae2be95dac9417] */
829     if ((first > 0) && ((stringPtr->unicode[first] & 0xFC00) == 0xDC00)
830 		&& ((stringPtr->unicode[first-1] & 0xFC00) == 0xD800)) {
831 	++first;
832     }
833     if ((last + 1 < stringPtr->numChars)
834 		&& ((stringPtr->unicode[last+1] & 0xFC00) == 0xDC00)
835 		&& ((stringPtr->unicode[last] & 0xFC00) == 0xD800)) {
836 	++last;
837     }
838 #endif
839     return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1);
840 }
841 
842 /*
843  *----------------------------------------------------------------------
844  *
845  * Tcl_SetStringObj --
846  *
847  *	Modify an object to hold a string that is a copy of the bytes
848  *	indicated by the byte pointer and length arguments.
849  *
850  * Results:
851  *	None.
852  *
853  * Side effects:
854  *	The object's string representation will be set to a copy of the
855  *	"length" bytes starting at "bytes". If "length" is negative, use bytes
856  *	up to the first NUL byte; i.e., assume "bytes" points to a C-style
857  *	NUL-terminated string. The object's old string and internal
858  *	representations are freed and the object's type is set NULL.
859  *
860  *----------------------------------------------------------------------
861  */
862 
863 void
Tcl_SetStringObj(Tcl_Obj * objPtr,const char * bytes,int length)864 Tcl_SetStringObj(
865     Tcl_Obj *objPtr,		/* Object whose internal rep to init. */
866     const char *bytes,		/* Points to the first of the length bytes
867 				 * used to initialize the object. */
868     int length)			/* The number of bytes to copy from "bytes"
869 				 * when initializing the object. If negative,
870 				 * use bytes up to the first NUL byte.*/
871 {
872     if (Tcl_IsShared(objPtr)) {
873 	Tcl_Panic("%s called with shared object", "Tcl_SetStringObj");
874     }
875 
876     /*
877      * Set the type to NULL and free any internal rep for the old type.
878      */
879 
880     TclFreeIntRep(objPtr);
881 
882     /*
883      * Free any old string rep, then set the string rep to a copy of the
884      * length bytes starting at "bytes".
885      */
886 
887     TclInvalidateStringRep(objPtr);
888     if (length < 0) {
889 	length = (bytes? strlen(bytes) : 0);
890     }
891     TclInitStringRep(objPtr, bytes, length);
892 }
893 
894 /*
895  *----------------------------------------------------------------------
896  *
897  * Tcl_SetObjLength --
898  *
899  *	This function changes the length of the string representation of an
900  *	object.
901  *
902  * Results:
903  *	None.
904  *
905  * Side effects:
906  *	If the size of objPtr's string representation is greater than length,
907  *	then it is reduced to length and a new terminating null byte is stored
908  *	in the strength. If the length of the string representation is greater
909  *	than length, the storage space is reallocated to the given length; a
910  *	null byte is stored at the end, but other bytes past the end of the
911  *	original string representation are undefined. The object's internal
912  *	representation is changed to "expendable string".
913  *
914  *----------------------------------------------------------------------
915  */
916 
917 void
Tcl_SetObjLength(Tcl_Obj * objPtr,int length)918 Tcl_SetObjLength(
919     Tcl_Obj *objPtr,		/* Pointer to object. This object must not
920 				 * currently be shared. */
921     int length)			/* Number of bytes desired for string
922 				 * representation of object, not including
923 				 * terminating null byte. */
924 {
925     String *stringPtr;
926 
927     if (length < 0) {
928 	/*
929 	 * Setting to a negative length is nonsense. This is probably the
930 	 * result of overflowing the signed integer range.
931 	 */
932 
933 	Tcl_Panic("Tcl_SetObjLength: negative length requested: "
934 		"%d (integer overflow?)", length);
935     }
936     if (Tcl_IsShared(objPtr)) {
937 	Tcl_Panic("%s called with shared object", "Tcl_SetObjLength");
938     }
939 
940     if (objPtr->bytes && objPtr->length == length) {
941 	return;
942     }
943 
944     SetStringFromAny(NULL, objPtr);
945     stringPtr = GET_STRING(objPtr);
946 
947     if (objPtr->bytes != NULL) {
948 	/*
949 	 * Change length of an existing string rep.
950 	 */
951 	if (length > stringPtr->allocated) {
952 	    /*
953 	     * Need to enlarge the buffer.
954 	     */
955 	    if (objPtr->bytes == tclEmptyStringRep) {
956 		objPtr->bytes = (char *)ckalloc(length + 1);
957 	    } else {
958 		objPtr->bytes = (char *)ckrealloc(objPtr->bytes, length + 1);
959 	    }
960 	    stringPtr->allocated = length;
961 	}
962 
963 	objPtr->length = length;
964 	objPtr->bytes[length] = 0;
965 
966 	/*
967 	 * Invalidate the unicode data.
968 	 */
969 
970 	stringPtr->numChars = -1;
971 	stringPtr->hasUnicode = 0;
972     } else {
973 	/*
974 	 * Changing length of pure unicode string.
975 	 */
976 
977 	stringCheckLimits(length);
978 	if (length > stringPtr->maxChars) {
979 	    stringPtr = stringRealloc(stringPtr, length);
980 	    SET_STRING(objPtr, stringPtr);
981 	    stringPtr->maxChars = length;
982 	}
983 
984 	/*
985 	 * Mark the new end of the unicode string
986 	 */
987 
988 	stringPtr->numChars = length;
989 	stringPtr->unicode[length] = 0;
990 	stringPtr->hasUnicode = 1;
991 
992 	/*
993 	 * Can only get here when objPtr->bytes == NULL. No need to invalidate
994 	 * the string rep.
995 	 */
996     }
997 }
998 
999 /*
1000  *----------------------------------------------------------------------
1001  *
1002  * Tcl_AttemptSetObjLength --
1003  *
1004  *	This function changes the length of the string representation of an
1005  *	object. It uses the attempt* (non-panic'ing) memory allocators.
1006  *
1007  * Results:
1008  *	1 if the requested memory was allocated, 0 otherwise.
1009  *
1010  * Side effects:
1011  *	If the size of objPtr's string representation is greater than length,
1012  *	then it is reduced to length and a new terminating null byte is stored
1013  *	in the strength. If the length of the string representation is greater
1014  *	than length, the storage space is reallocated to the given length; a
1015  *	null byte is stored at the end, but other bytes past the end of the
1016  *	original string representation are undefined. The object's internal
1017  *	representation is changed to "expendable string".
1018  *
1019  *----------------------------------------------------------------------
1020  */
1021 
1022 int
Tcl_AttemptSetObjLength(Tcl_Obj * objPtr,int length)1023 Tcl_AttemptSetObjLength(
1024     Tcl_Obj *objPtr,		/* Pointer to object. This object must not
1025 				 * currently be shared. */
1026     int length)			/* Number of bytes desired for string
1027 				 * representation of object, not including
1028 				 * terminating null byte. */
1029 {
1030     String *stringPtr;
1031 
1032     if (length < 0) {
1033 	/*
1034 	 * Setting to a negative length is nonsense. This is probably the
1035 	 * result of overflowing the signed integer range.
1036 	 */
1037 
1038 	return 0;
1039     }
1040     if (Tcl_IsShared(objPtr)) {
1041 	Tcl_Panic("%s called with shared object", "Tcl_AttemptSetObjLength");
1042     }
1043     if (objPtr->bytes && objPtr->length == length) {
1044 	return 1;
1045     }
1046 
1047     SetStringFromAny(NULL, objPtr);
1048     stringPtr = GET_STRING(objPtr);
1049 
1050     if (objPtr->bytes != NULL) {
1051 	/*
1052 	 * Change length of an existing string rep.
1053 	 */
1054 	if (length > stringPtr->allocated) {
1055 	    /*
1056 	     * Need to enlarge the buffer.
1057 	     */
1058 
1059 	    char *newBytes;
1060 
1061 	    if (objPtr->bytes == tclEmptyStringRep) {
1062 		newBytes = (char *)attemptckalloc(length + 1);
1063 	    } else {
1064 		newBytes = (char *)attemptckrealloc(objPtr->bytes, length + 1);
1065 	    }
1066 	    if (newBytes == NULL) {
1067 		return 0;
1068 	    }
1069 	    objPtr->bytes = newBytes;
1070 	    stringPtr->allocated = length;
1071 	}
1072 
1073 	objPtr->length = length;
1074 	objPtr->bytes[length] = 0;
1075 
1076 	/*
1077 	 * Invalidate the unicode data.
1078 	 */
1079 
1080 	stringPtr->numChars = -1;
1081 	stringPtr->hasUnicode = 0;
1082     } else {
1083 	/*
1084 	 * Changing length of pure unicode string.
1085 	 */
1086 
1087 	if (length > STRING_MAXCHARS) {
1088 	    return 0;
1089 	}
1090 	if (length > stringPtr->maxChars) {
1091 	    stringPtr = stringAttemptRealloc(stringPtr, length);
1092 	    if (stringPtr == NULL) {
1093 		return 0;
1094 	    }
1095 	    SET_STRING(objPtr, stringPtr);
1096 	    stringPtr->maxChars = length;
1097 	}
1098 
1099 	/*
1100 	 * Mark the new end of the unicode string.
1101 	 */
1102 
1103 	stringPtr->unicode[length] = 0;
1104 	stringPtr->numChars = length;
1105 	stringPtr->hasUnicode = 1;
1106 
1107 	/*
1108 	 * Can only get here when objPtr->bytes == NULL. No need to invalidate
1109 	 * the string rep.
1110 	 */
1111     }
1112     return 1;
1113 }
1114 
1115 /*
1116  *---------------------------------------------------------------------------
1117  *
1118  * Tcl_SetUnicodeObj --
1119  *
1120  *	Modify an object to hold the Unicode string indicated by "unicode".
1121  *
1122  * Results:
1123  *	None.
1124  *
1125  * Side effects:
1126  *	Memory allocated for new "String" internal rep.
1127  *
1128  *---------------------------------------------------------------------------
1129  */
1130 
1131 void
Tcl_SetUnicodeObj(Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int numChars)1132 Tcl_SetUnicodeObj(
1133     Tcl_Obj *objPtr,		/* The object to set the string of. */
1134     const Tcl_UniChar *unicode,	/* The unicode string used to initialize the
1135 				 * object. */
1136     int numChars)		/* Number of characters in the unicode
1137 				 * string. */
1138 {
1139     if (Tcl_IsShared(objPtr)) {
1140 	Tcl_Panic("%s called with shared object", "Tcl_SetUnicodeObj");
1141     }
1142     TclFreeIntRep(objPtr);
1143     SetUnicodeObj(objPtr, unicode, numChars);
1144 }
1145 
1146 static int
UnicodeLength(const Tcl_UniChar * unicode)1147 UnicodeLength(
1148     const Tcl_UniChar *unicode)
1149 {
1150     int numChars = 0;
1151 
1152     if (unicode) {
1153 	while (numChars >= 0 && unicode[numChars] != 0) {
1154 	    numChars++;
1155 	}
1156     }
1157     stringCheckLimits(numChars);
1158     return numChars;
1159 }
1160 
1161 static void
SetUnicodeObj(Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int numChars)1162 SetUnicodeObj(
1163     Tcl_Obj *objPtr,		/* The object to set the string of. */
1164     const Tcl_UniChar *unicode,	/* The unicode string used to initialize the
1165 				 * object. */
1166     int numChars)		/* Number of characters in the unicode
1167 				 * string. */
1168 {
1169     String *stringPtr;
1170 
1171     if (numChars < 0) {
1172 	numChars = UnicodeLength(unicode);
1173     }
1174 
1175     /*
1176      * Allocate enough space for the String structure + Unicode string.
1177      */
1178 
1179     stringCheckLimits(numChars);
1180     stringPtr = stringAlloc(numChars);
1181     SET_STRING(objPtr, stringPtr);
1182     objPtr->typePtr = &tclStringType;
1183 
1184     stringPtr->maxChars = numChars;
1185     memcpy(stringPtr->unicode, unicode, numChars * sizeof(Tcl_UniChar));
1186     stringPtr->unicode[numChars] = 0;
1187     stringPtr->numChars = numChars;
1188     stringPtr->hasUnicode = 1;
1189 
1190     TclInvalidateStringRep(objPtr);
1191     stringPtr->allocated = 0;
1192 }
1193 
1194 /*
1195  *----------------------------------------------------------------------
1196  *
1197  * Tcl_AppendLimitedToObj --
1198  *
1199  *	This function appends a limited number of bytes from a sequence of
1200  *	bytes to an object, marking any limitation with an ellipsis.
1201  *
1202  * Results:
1203  *	None.
1204  *
1205  * Side effects:
1206  *	The bytes at *bytes are appended to the string representation of
1207  *	objPtr.
1208  *
1209  *----------------------------------------------------------------------
1210  */
1211 
1212 void
Tcl_AppendLimitedToObj(Tcl_Obj * objPtr,const char * bytes,int length,int limit,const char * ellipsis)1213 Tcl_AppendLimitedToObj(
1214     Tcl_Obj *objPtr,		/* Points to the object to append to. */
1215     const char *bytes,		/* Points to the bytes to append to the
1216 				 * object. */
1217     int length,			/* The number of bytes available to be
1218 				 * appended from "bytes". If < 0, then all
1219 				 * bytes up to a NUL byte are available. */
1220     int limit,			/* The maximum number of bytes to append to
1221 				 * the object. */
1222     const char *ellipsis)	/* Ellipsis marker string, appended to the
1223 				 * object to indicate not all available bytes
1224 				 * at "bytes" were appended. */
1225 {
1226     String *stringPtr;
1227     int toCopy = 0;
1228     int eLen = 0;
1229 
1230     if (length < 0) {
1231 	length = (bytes ? strlen(bytes) : 0);
1232     }
1233     if (length == 0) {
1234 	return;
1235     }
1236     if (limit <= 0) {
1237 	return;
1238     }
1239 
1240     if (length <= limit) {
1241 	toCopy = length;
1242     } else {
1243 	if (ellipsis == NULL) {
1244 	    ellipsis = "...";
1245 	}
1246 	eLen = strlen(ellipsis);
1247 	while (eLen > limit) {
1248 	    eLen = TclUtfPrev(ellipsis+eLen, ellipsis) - ellipsis;
1249 	}
1250 
1251 	toCopy = TclUtfPrev(bytes+limit+1-eLen, bytes) - bytes;
1252     }
1253 
1254     /*
1255      * If objPtr has a valid Unicode rep, then append the Unicode conversion
1256      * of "bytes" to the objPtr's Unicode rep, otherwise append "bytes" to
1257      * objPtr's string rep.
1258      */
1259 
1260     if (Tcl_IsShared(objPtr)) {
1261 	Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj");
1262     }
1263 
1264     SetStringFromAny(NULL, objPtr);
1265     stringPtr = GET_STRING(objPtr);
1266 
1267     if (stringPtr->hasUnicode && stringPtr->numChars > 0) {
1268 	AppendUtfToUnicodeRep(objPtr, bytes, toCopy);
1269     } else {
1270 	AppendUtfToUtfRep(objPtr, bytes, toCopy);
1271     }
1272 
1273     if (length <= limit) {
1274 	return;
1275     }
1276 
1277     stringPtr = GET_STRING(objPtr);
1278     if (stringPtr->hasUnicode && stringPtr->numChars > 0) {
1279 	AppendUtfToUnicodeRep(objPtr, ellipsis, eLen);
1280     } else {
1281 	AppendUtfToUtfRep(objPtr, ellipsis, eLen);
1282     }
1283 }
1284 
1285 /*
1286  *----------------------------------------------------------------------
1287  *
1288  * Tcl_AppendToObj --
1289  *
1290  *	This function appends a sequence of bytes to an object.
1291  *
1292  * Results:
1293  *	None.
1294  *
1295  * Side effects:
1296  *	The bytes at *bytes are appended to the string representation of
1297  *	objPtr.
1298  *
1299  *----------------------------------------------------------------------
1300  */
1301 
1302 void
Tcl_AppendToObj(Tcl_Obj * objPtr,const char * bytes,int length)1303 Tcl_AppendToObj(
1304     Tcl_Obj *objPtr,		/* Points to the object to append to. */
1305     const char *bytes,		/* Points to the bytes to append to the
1306 				 * object. */
1307     int length)			/* The number of bytes to append from "bytes".
1308 				 * If < 0, then append all bytes up to NUL
1309 				 * byte. */
1310 {
1311     Tcl_AppendLimitedToObj(objPtr, bytes, length, INT_MAX, NULL);
1312 }
1313 
1314 /*
1315  *----------------------------------------------------------------------
1316  *
1317  * Tcl_AppendUnicodeToObj --
1318  *
1319  *	This function appends a Unicode string to an object in the most
1320  *	efficient manner possible. Length must be >= 0.
1321  *
1322  * Results:
1323  *	None.
1324  *
1325  * Side effects:
1326  *	Invalidates the string rep and creates a new Unicode string.
1327  *
1328  *----------------------------------------------------------------------
1329  */
1330 
1331 void
Tcl_AppendUnicodeToObj(Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int length)1332 Tcl_AppendUnicodeToObj(
1333     Tcl_Obj *objPtr,		/* Points to the object to append to. */
1334     const Tcl_UniChar *unicode,	/* The unicode string to append to the
1335 				 * object. */
1336     int length)			/* Number of chars in "unicode". */
1337 {
1338     String *stringPtr;
1339 
1340     if (Tcl_IsShared(objPtr)) {
1341 	Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj");
1342     }
1343 
1344     if (length == 0) {
1345 	return;
1346     }
1347 
1348     SetStringFromAny(NULL, objPtr);
1349     stringPtr = GET_STRING(objPtr);
1350 
1351     /*
1352      * If objPtr has a valid Unicode rep, then append the "unicode" to the
1353      * objPtr's Unicode rep, otherwise the UTF conversion of "unicode" to
1354      * objPtr's string rep.
1355      */
1356 
1357     if (stringPtr->hasUnicode
1358 #if COMPAT
1359 		&& stringPtr->numChars > 0
1360 #endif
1361 	    ) {
1362 	AppendUnicodeToUnicodeRep(objPtr, unicode, length);
1363     } else {
1364 	AppendUnicodeToUtfRep(objPtr, unicode, length);
1365     }
1366 }
1367 
1368 /*
1369  *----------------------------------------------------------------------
1370  *
1371  * Tcl_AppendObjToObj --
1372  *
1373  *	This function appends the string rep of one object to another.
1374  *	"objPtr" cannot be a shared object.
1375  *
1376  * Results:
1377  *	None.
1378  *
1379  * Side effects:
1380  *	The string rep of appendObjPtr is appended to the string
1381  *	representation of objPtr.
1382  *	IMPORTANT: This routine does not and MUST NOT shimmer appendObjPtr.
1383  *	Callers are counting on that.
1384  *
1385  *----------------------------------------------------------------------
1386  */
1387 
1388 void
Tcl_AppendObjToObj(Tcl_Obj * objPtr,Tcl_Obj * appendObjPtr)1389 Tcl_AppendObjToObj(
1390     Tcl_Obj *objPtr,		/* Points to the object to append to. */
1391     Tcl_Obj *appendObjPtr)	/* Object to append. */
1392 {
1393     String *stringPtr;
1394     int length, numChars, appendNumChars = -1;
1395     const char *bytes;
1396 
1397     /*
1398      * Special case: second object is standard-empty is fast case. We know
1399      * that appending nothing to anything leaves that starting anything...
1400      */
1401 
1402     if (appendObjPtr->bytes == tclEmptyStringRep) {
1403 	return;
1404     }
1405 
1406     /*
1407      * Handle append of one bytearray object to another as a special case.
1408      * Note that we only do this when the objects don't have string reps; if
1409      * it did, then appending the byte arrays together could well lose
1410      * information; this is a special-case optimization only.
1411      */
1412 
1413     if ((TclIsPureByteArray(objPtr) || objPtr->bytes == tclEmptyStringRep)
1414 	    && TclIsPureByteArray(appendObjPtr)) {
1415 	/*
1416 	 * You might expect the code here to be
1417 	 *
1418 	 *  bytes = Tcl_GetByteArrayFromObj(appendObjPtr, &length);
1419 	 *  TclAppendBytesToByteArray(objPtr, bytes, length);
1420 	 *
1421 	 * and essentially all of the time that would be fine. However, it
1422 	 * would run into trouble in the case where objPtr and appendObjPtr
1423 	 * point to the same thing. That may never be a good idea. It seems to
1424 	 * violate Copy On Write, and we don't have any tests for the
1425 	 * situation, since making any Tcl commands that call
1426 	 * Tcl_AppendObjToObj() do that appears impossible (They honor Copy On
1427 	 * Write!). For the sake of extensions that go off into that realm,
1428 	 * though, here's a more complex approach that can handle all the
1429 	 * cases.
1430 	 *
1431 	 * First, get the lengths.
1432 	 */
1433 
1434 	int lengthSrc;
1435 
1436 	(void) Tcl_GetByteArrayFromObj(objPtr, &length);
1437 	(void) Tcl_GetByteArrayFromObj(appendObjPtr, &lengthSrc);
1438 
1439 	/*
1440 	 * Grow buffer enough for the append.
1441 	 */
1442 
1443 	TclAppendBytesToByteArray(objPtr, NULL, lengthSrc);
1444 
1445 	/*
1446 	 * Reset objPtr back to the original value.
1447 	 */
1448 
1449 	Tcl_SetByteArrayLength(objPtr, length);
1450 
1451 	/*
1452 	 * Now do the append knowing that buffer growth cannot cause any
1453 	 * trouble.
1454 	 */
1455 
1456 	TclAppendBytesToByteArray(objPtr,
1457 		Tcl_GetByteArrayFromObj(appendObjPtr, NULL), lengthSrc);
1458 	return;
1459     }
1460 
1461     /*
1462      * Must append as strings.
1463      */
1464 
1465     SetStringFromAny(NULL, objPtr);
1466     stringPtr = GET_STRING(objPtr);
1467 
1468     /*
1469      * If objPtr has a valid Unicode rep, then get a Unicode string from
1470      * appendObjPtr and append it.
1471      */
1472 
1473     if (stringPtr->hasUnicode
1474 #if COMPAT
1475 		&& stringPtr->numChars > 0
1476 #endif
1477 	    ) {
1478 	/*
1479 	 * If appendObjPtr is not of the "String" type, don't convert it.
1480 	 */
1481 
1482 	if (appendObjPtr->typePtr == &tclStringType) {
1483 	    Tcl_UniChar *unicode =
1484 		    Tcl_GetUnicodeFromObj(appendObjPtr, &numChars);
1485 
1486 	    AppendUnicodeToUnicodeRep(objPtr, unicode, numChars);
1487 	} else {
1488 	    bytes = TclGetStringFromObj(appendObjPtr, &length);
1489 	    AppendUtfToUnicodeRep(objPtr, bytes, length);
1490 	}
1491 	return;
1492     }
1493 
1494     /*
1495      * Append to objPtr's UTF string rep. If we know the number of characters
1496      * in both objects before appending, then set the combined number of
1497      * characters in the final (appended-to) object.
1498      */
1499 
1500     bytes = TclGetStringFromObj(appendObjPtr, &length);
1501 
1502     numChars = stringPtr->numChars;
1503     if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) {
1504 	String *appendStringPtr = GET_STRING(appendObjPtr);
1505 
1506 	appendNumChars = appendStringPtr->numChars;
1507     }
1508 
1509     AppendUtfToUtfRep(objPtr, bytes, length);
1510 
1511     if (numChars >= 0 && appendNumChars >= 0
1512 #if COMPAT
1513 		&& appendNumChars == length
1514 #endif
1515 	    ) {
1516 	stringPtr->numChars = numChars + appendNumChars;
1517     }
1518 }
1519 
1520 /*
1521  *----------------------------------------------------------------------
1522  *
1523  * AppendUnicodeToUnicodeRep --
1524  *
1525  *	This function appends the contents of "unicode" to the Unicode rep of
1526  *	"objPtr". objPtr must already have a valid Unicode rep.
1527  *
1528  * Results:
1529  *	None.
1530  *
1531  * Side effects:
1532  *	objPtr's internal rep is reallocated.
1533  *
1534  *----------------------------------------------------------------------
1535  */
1536 
1537 static void
AppendUnicodeToUnicodeRep(Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int appendNumChars)1538 AppendUnicodeToUnicodeRep(
1539     Tcl_Obj *objPtr,		/* Points to the object to append to. */
1540     const Tcl_UniChar *unicode,	/* String to append. */
1541     int appendNumChars)		/* Number of chars of "unicode" to append. */
1542 {
1543     String *stringPtr;
1544     int numChars;
1545 
1546     if (appendNumChars < 0) {
1547 	appendNumChars = UnicodeLength(unicode);
1548     }
1549     if (appendNumChars == 0) {
1550 	return;
1551     }
1552 
1553     SetStringFromAny(NULL, objPtr);
1554     stringPtr = GET_STRING(objPtr);
1555 
1556     /*
1557      * If not enough space has been allocated for the unicode rep, reallocate
1558      * the internal rep object with additional space. First try to double the
1559      * required allocation; if that fails, try a more modest increase. See the
1560      * "TCL STRING GROWTH ALGORITHM" comment at the top of this file for an
1561      * explanation of this growth algorithm.
1562      */
1563 
1564     numChars = stringPtr->numChars + appendNumChars;
1565     stringCheckLimits(numChars);
1566 
1567     if (numChars > stringPtr->maxChars) {
1568 	int offset = -1;
1569 
1570 	/*
1571 	 * Protect against case where unicode points into the existing
1572 	 * stringPtr->unicode array. Force it to follow any relocations due to
1573 	 * the reallocs below.
1574 	 */
1575 
1576 	if (unicode && unicode >= stringPtr->unicode
1577 		&& unicode <= stringPtr->unicode + stringPtr->maxChars) {
1578 	    offset = unicode - stringPtr->unicode;
1579 	}
1580 
1581 	GrowUnicodeBuffer(objPtr, numChars);
1582 	stringPtr = GET_STRING(objPtr);
1583 
1584 	/*
1585 	 * Relocate unicode if needed; see above.
1586 	 */
1587 
1588 	if (offset >= 0) {
1589 	    unicode = stringPtr->unicode + offset;
1590 	}
1591     }
1592 
1593     /*
1594      * Copy the new string onto the end of the old string, then add the
1595      * trailing null.
1596      */
1597 
1598     if (unicode) {
1599 	memmove(stringPtr->unicode + stringPtr->numChars, unicode,
1600 		appendNumChars * sizeof(Tcl_UniChar));
1601     }
1602     stringPtr->unicode[numChars] = 0;
1603     stringPtr->numChars = numChars;
1604     stringPtr->allocated = 0;
1605 
1606     TclInvalidateStringRep(objPtr);
1607 }
1608 
1609 /*
1610  *----------------------------------------------------------------------
1611  *
1612  * AppendUnicodeToUtfRep --
1613  *
1614  *	This function converts the contents of "unicode" to UTF and appends
1615  *	the UTF to the string rep of "objPtr".
1616  *
1617  * Results:
1618  *	None.
1619  *
1620  * Side effects:
1621  *	objPtr's internal rep is reallocated.
1622  *
1623  *----------------------------------------------------------------------
1624  */
1625 
1626 static void
AppendUnicodeToUtfRep(Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int numChars)1627 AppendUnicodeToUtfRep(
1628     Tcl_Obj *objPtr,		/* Points to the object to append to. */
1629     const Tcl_UniChar *unicode,	/* String to convert to UTF. */
1630     int numChars)		/* Number of chars of "unicode" to convert. */
1631 {
1632     String *stringPtr = GET_STRING(objPtr);
1633 
1634     numChars = ExtendStringRepWithUnicode(objPtr, unicode, numChars);
1635 
1636     if (stringPtr->numChars != -1) {
1637 	stringPtr->numChars += numChars;
1638     }
1639 
1640 #if COMPAT
1641     /*
1642      * Invalidate the unicode rep.
1643      */
1644 
1645     stringPtr->hasUnicode = 0;
1646 #endif
1647 }
1648 
1649 /*
1650  *----------------------------------------------------------------------
1651  *
1652  * AppendUtfToUnicodeRep --
1653  *
1654  *	This function converts the contents of "bytes" to Unicode and appends
1655  *	the Unicode to the Unicode rep of "objPtr". objPtr must already have a
1656  *	valid Unicode rep. numBytes must be non-negative.
1657  *
1658  * Results:
1659  *	None.
1660  *
1661  * Side effects:
1662  *	objPtr's internal rep is reallocated.
1663  *
1664  *----------------------------------------------------------------------
1665  */
1666 
1667 static void
AppendUtfToUnicodeRep(Tcl_Obj * objPtr,const char * bytes,int numBytes)1668 AppendUtfToUnicodeRep(
1669     Tcl_Obj *objPtr,		/* Points to the object to append to. */
1670     const char *bytes,		/* String to convert to Unicode. */
1671     int numBytes)		/* Number of bytes of "bytes" to convert. */
1672 {
1673     String *stringPtr;
1674 
1675     if (numBytes == 0) {
1676 	return;
1677     }
1678 
1679     ExtendUnicodeRepWithString(objPtr, bytes, numBytes, -1);
1680     TclInvalidateStringRep(objPtr);
1681     stringPtr = GET_STRING(objPtr);
1682     stringPtr->allocated = 0;
1683 }
1684 
1685 /*
1686  *----------------------------------------------------------------------
1687  *
1688  * AppendUtfToUtfRep --
1689  *
1690  *	This function appends "numBytes" bytes of "bytes" to the UTF string
1691  *	rep of "objPtr". objPtr must already have a valid String rep.
1692  *	numBytes must be non-negative.
1693  *
1694  * Results:
1695  *	None.
1696  *
1697  * Side effects:
1698  *	objPtr's internal rep is reallocated.
1699  *
1700  *----------------------------------------------------------------------
1701  */
1702 
1703 static void
AppendUtfToUtfRep(Tcl_Obj * objPtr,const char * bytes,int numBytes)1704 AppendUtfToUtfRep(
1705     Tcl_Obj *objPtr,		/* Points to the object to append to. */
1706     const char *bytes,		/* String to append. */
1707     int numBytes)		/* Number of bytes of "bytes" to append. */
1708 {
1709     String *stringPtr;
1710     int newLength, oldLength;
1711 
1712     if (numBytes == 0) {
1713 	return;
1714     }
1715 
1716     /*
1717      * Copy the new string onto the end of the old string, then add the
1718      * trailing null.
1719      */
1720 
1721     if (objPtr->bytes == NULL) {
1722 	objPtr->length = 0;
1723     }
1724     oldLength = objPtr->length;
1725     newLength = numBytes + oldLength;
1726     if (newLength < 0) {
1727 	Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
1728     }
1729 
1730     stringPtr = GET_STRING(objPtr);
1731     if (newLength > stringPtr->allocated) {
1732 	int offset = -1;
1733 
1734 	/*
1735 	 * Protect against case where unicode points into the existing
1736 	 * stringPtr->unicode array. Force it to follow any relocations due to
1737 	 * the reallocs below.
1738 	 */
1739 
1740 	if (bytes && bytes >= objPtr->bytes
1741 		&& bytes <= objPtr->bytes + objPtr->length) {
1742 	    offset = bytes - objPtr->bytes;
1743 	}
1744 
1745 	/*
1746 	 * TODO: consider passing flag=1: no overalloc on first append. This
1747 	 * would make test stringObj-8.1 fail.
1748 	 */
1749 
1750 	GrowStringBuffer(objPtr, newLength, 0);
1751 
1752 	/*
1753 	 * Relocate bytes if needed; see above.
1754 	 */
1755 
1756 	if (offset >= 0) {
1757 	    bytes = objPtr->bytes + offset;
1758 	}
1759     }
1760 
1761     /*
1762      * Invalidate the unicode data.
1763      */
1764 
1765     stringPtr->numChars = -1;
1766     stringPtr->hasUnicode = 0;
1767 
1768     if (bytes) {
1769 	memmove(objPtr->bytes + oldLength, bytes, numBytes);
1770     }
1771     objPtr->bytes[newLength] = 0;
1772     objPtr->length = newLength;
1773 }
1774 
1775 /*
1776  *----------------------------------------------------------------------
1777  *
1778  * Tcl_AppendStringsToObjVA --
1779  *
1780  *	This function appends one or more null-terminated strings to an
1781  *	object.
1782  *
1783  * Results:
1784  *	None.
1785  *
1786  * Side effects:
1787  *	The contents of all the string arguments are appended to the string
1788  *	representation of objPtr.
1789  *
1790  *----------------------------------------------------------------------
1791  */
1792 
1793 void
Tcl_AppendStringsToObjVA(Tcl_Obj * objPtr,va_list argList)1794 Tcl_AppendStringsToObjVA(
1795     Tcl_Obj *objPtr,		/* Points to the object to append to. */
1796     va_list argList)		/* Variable argument list. */
1797 {
1798     if (Tcl_IsShared(objPtr)) {
1799 	Tcl_Panic("%s called with shared object", "Tcl_AppendStringsToObj");
1800     }
1801 
1802     while (1) {
1803 	const char *bytes = va_arg(argList, char *);
1804 
1805 	if (bytes == NULL) {
1806 	    break;
1807 	}
1808 	Tcl_AppendToObj(objPtr, bytes, -1);
1809     }
1810 }
1811 
1812 /*
1813  *----------------------------------------------------------------------
1814  *
1815  * Tcl_AppendStringsToObj --
1816  *
1817  *	This function appends one or more null-terminated strings to an
1818  *	object.
1819  *
1820  * Results:
1821  *	None.
1822  *
1823  * Side effects:
1824  *	The contents of all the string arguments are appended to the string
1825  *	representation of objPtr.
1826  *
1827  *----------------------------------------------------------------------
1828  */
1829 
1830 void
Tcl_AppendStringsToObj(Tcl_Obj * objPtr,...)1831 Tcl_AppendStringsToObj(
1832     Tcl_Obj *objPtr,
1833     ...)
1834 {
1835     va_list argList;
1836 
1837     va_start(argList, objPtr);
1838     Tcl_AppendStringsToObjVA(objPtr, argList);
1839     va_end(argList);
1840 }
1841 
1842 /*
1843  *----------------------------------------------------------------------
1844  *
1845  * Tcl_AppendFormatToObj --
1846  *
1847  *	This function appends a list of Tcl_Obj's to a Tcl_Obj according to
1848  *	the formatting instructions embedded in the format string. The
1849  *	formatting instructions are inspired by sprintf(). Returns TCL_OK when
1850  *	successful. If there's an error in the arguments, TCL_ERROR is
1851  *	returned, and an error message is written to the interp, if non-NULL.
1852  *
1853  * Results:
1854  *	A standard Tcl result.
1855  *
1856  * Side effects:
1857  *	None.
1858  *
1859  *----------------------------------------------------------------------
1860  */
1861 
1862 int
Tcl_AppendFormatToObj(Tcl_Interp * interp,Tcl_Obj * appendObj,const char * format,int objc,Tcl_Obj * const objv[])1863 Tcl_AppendFormatToObj(
1864     Tcl_Interp *interp,
1865     Tcl_Obj *appendObj,
1866     const char *format,
1867     int objc,
1868     Tcl_Obj *const objv[])
1869 {
1870     const char *span = format, *msg, *errCode;
1871     int numBytes = 0, objIndex = 0, gotXpg = 0, gotSequential = 0;
1872     int originalLength, limit;
1873     Tcl_UniChar ch = 0;
1874     static const char *mixedXPG =
1875 	    "cannot mix \"%\" and \"%n$\" conversion specifiers";
1876     static const char *const badIndex[2] = {
1877 	"not enough arguments for all format specifiers",
1878 	"\"%n$\" argument index out of range"
1879     };
1880     static const char *overflow = "max size for a Tcl value exceeded";
1881 
1882     if (Tcl_IsShared(appendObj)) {
1883 	Tcl_Panic("%s called with shared object", "Tcl_AppendFormatToObj");
1884     }
1885     TclGetStringFromObj(appendObj, &originalLength);
1886     limit = INT_MAX - originalLength;
1887 
1888     /*
1889      * Format string is NUL-terminated.
1890      */
1891 
1892     while (*format != '\0') {
1893 	char *end;
1894 	int gotMinus = 0, gotHash = 0, gotZero = 0, gotSpace = 0, gotPlus = 0;
1895 	int width, gotPrecision, precision, sawFlag, useShort = 0, useBig = 0;
1896 #ifndef TCL_WIDE_INT_IS_LONG
1897 	int useWide = 0;
1898 #endif
1899 	int newXpg, numChars, allocSegment = 0, segmentLimit, segmentNumBytes;
1900 	Tcl_Obj *segment;
1901 	int step = TclUtfToUniChar(format, &ch);
1902 
1903 	format += step;
1904 	if (ch != '%') {
1905 	    numBytes += step;
1906 	    continue;
1907 	}
1908 	if (numBytes) {
1909 	    if (numBytes > limit) {
1910 		msg = overflow;
1911 		errCode = "OVERFLOW";
1912 		goto errorMsg;
1913 	    }
1914 	    Tcl_AppendToObj(appendObj, span, numBytes);
1915 	    limit -= numBytes;
1916 	    numBytes = 0;
1917 	}
1918 
1919 	/*
1920 	 * Saw a % : process the format specifier.
1921 	 *
1922 	 * Step 0. Handle special case of escaped format marker (i.e., %%).
1923 	 */
1924 
1925 	step = TclUtfToUniChar(format, &ch);
1926 	if (ch == '%') {
1927 	    span = format;
1928 	    numBytes = step;
1929 	    format += step;
1930 	    continue;
1931 	}
1932 
1933 	/*
1934 	 * Step 1. XPG3 position specifier
1935 	 */
1936 
1937 	newXpg = 0;
1938 	if (isdigit(UCHAR(ch))) {
1939 	    int position = strtoul(format, &end, 10);
1940 
1941 	    if (*end == '$') {
1942 		newXpg = 1;
1943 		objIndex = position - 1;
1944 		format = end + 1;
1945 		step = TclUtfToUniChar(format, &ch);
1946 	    }
1947 	}
1948 	if (newXpg) {
1949 	    if (gotSequential) {
1950 		msg = mixedXPG;
1951 		errCode = "MIXEDSPECTYPES";
1952 		goto errorMsg;
1953 	    }
1954 	    gotXpg = 1;
1955 	} else {
1956 	    if (gotXpg) {
1957 		msg = mixedXPG;
1958 		errCode = "MIXEDSPECTYPES";
1959 		goto errorMsg;
1960 	    }
1961 	    gotSequential = 1;
1962 	}
1963 	if ((objIndex < 0) || (objIndex >= objc)) {
1964 	    msg = badIndex[gotXpg];
1965 	    errCode = gotXpg ? "INDEXRANGE" : "FIELDVARMISMATCH";
1966 	    goto errorMsg;
1967 	}
1968 
1969 	/*
1970 	 * Step 2. Set of flags.
1971 	 */
1972 
1973 	sawFlag = 1;
1974 	do {
1975 	    switch (ch) {
1976 	    case '-':
1977 		gotMinus = 1;
1978 		break;
1979 	    case '#':
1980 		gotHash = 1;
1981 		break;
1982 	    case '0':
1983 		gotZero = 1;
1984 		break;
1985 	    case ' ':
1986 		gotSpace = 1;
1987 		break;
1988 	    case '+':
1989 		gotPlus = 1;
1990 		break;
1991 	    default:
1992 		sawFlag = 0;
1993 	    }
1994 	    if (sawFlag) {
1995 		format += step;
1996 		step = TclUtfToUniChar(format, &ch);
1997 	    }
1998 	} while (sawFlag);
1999 
2000 	/*
2001 	 * Step 3. Minimum field width.
2002 	 */
2003 
2004 	width = 0;
2005 	if (isdigit(UCHAR(ch))) {
2006 	    width = strtoul(format, &end, 10);
2007 	    if (width < 0) {
2008 		msg = overflow;
2009 		errCode = "OVERFLOW";
2010 		goto errorMsg;
2011 	    }
2012 	    format = end;
2013 	    step = TclUtfToUniChar(format, &ch);
2014 	} else if (ch == '*') {
2015 	    if (objIndex >= objc - 1) {
2016 		msg = badIndex[gotXpg];
2017 		errCode = gotXpg ? "INDEXRANGE" : "FIELDVARMISMATCH";
2018 		goto errorMsg;
2019 	    }
2020 	    if (TclGetIntFromObj(interp, objv[objIndex], &width) != TCL_OK) {
2021 		goto error;
2022 	    }
2023 	    if (width < 0) {
2024 		width = -width;
2025 		gotMinus = 1;
2026 	    }
2027 	    objIndex++;
2028 	    format += step;
2029 	    step = TclUtfToUniChar(format, &ch);
2030 	}
2031 	if (width > limit) {
2032 	    msg = overflow;
2033 	    errCode = "OVERFLOW";
2034 	    goto errorMsg;
2035 	}
2036 
2037 	/*
2038 	 * Step 4. Precision.
2039 	 */
2040 
2041 	gotPrecision = precision = 0;
2042 	if (ch == '.') {
2043 	    gotPrecision = 1;
2044 	    format += step;
2045 	    step = TclUtfToUniChar(format, &ch);
2046 	}
2047 	if (isdigit(UCHAR(ch))) {
2048 	    precision = strtoul(format, &end, 10);
2049 	    format = end;
2050 	    step = TclUtfToUniChar(format, &ch);
2051 	} else if (ch == '*') {
2052 	    if (objIndex >= objc - 1) {
2053 		msg = badIndex[gotXpg];
2054 		errCode = gotXpg ? "INDEXRANGE" : "FIELDVARMISMATCH";
2055 		goto errorMsg;
2056 	    }
2057 	    if (TclGetIntFromObj(interp, objv[objIndex], &precision)
2058 		    != TCL_OK) {
2059 		goto error;
2060 	    }
2061 
2062 	    /*
2063 	     * TODO: Check this truncation logic.
2064 	     */
2065 
2066 	    if (precision < 0) {
2067 		precision = 0;
2068 	    }
2069 	    objIndex++;
2070 	    format += step;
2071 	    step = TclUtfToUniChar(format, &ch);
2072 	}
2073 
2074 	/*
2075 	 * Step 5. Length modifier.
2076 	 */
2077 
2078 	if (ch == 'h') {
2079 	    useShort = 1;
2080 	    format += step;
2081 	    step = TclUtfToUniChar(format, &ch);
2082 	} else if (ch == 'l') {
2083 	    format += step;
2084 	    step = TclUtfToUniChar(format, &ch);
2085 	    if (ch == 'l') {
2086 		useBig = 1;
2087 		format += step;
2088 		step = TclUtfToUniChar(format, &ch);
2089 #ifndef TCL_WIDE_INT_IS_LONG
2090 	    } else {
2091 		useWide = 1;
2092 #endif
2093 	    }
2094 	}
2095 
2096 	format += step;
2097 	span = format;
2098 
2099 	/*
2100 	 * Step 6. The actual conversion character.
2101 	 */
2102 
2103 	segment = objv[objIndex];
2104 	numChars = -1;
2105 	if (ch == 'i') {
2106 	    ch = 'd';
2107 	}
2108 	switch (ch) {
2109 	case '\0':
2110 	    msg = "format string ended in middle of field specifier";
2111 	    errCode = "INCOMPLETE";
2112 	    goto errorMsg;
2113 	case 's':
2114 	    if (gotPrecision) {
2115 		numChars = Tcl_GetCharLength(segment);
2116 		if (precision < numChars) {
2117 		    segment = Tcl_GetRange(segment, 0, precision - 1);
2118 		    numChars = precision;
2119 		    Tcl_IncrRefCount(segment);
2120 		    allocSegment = 1;
2121 		}
2122 	    }
2123 	    break;
2124 	case 'c': {
2125 	    char buf[4] = "";
2126 	    int code, length;
2127 
2128 	    if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) {
2129 		goto error;
2130 	    }
2131 	    length = Tcl_UniCharToUtf(code, buf);
2132 #if TCL_UTF_MAX > 3
2133 	    if ((code >= 0xD800) && (length < 3)) {
2134 		/* Special case for handling high surrogates. */
2135 		length += Tcl_UniCharToUtf(-1, buf + length);
2136 	    }
2137 #endif
2138 	    segment = Tcl_NewStringObj(buf, length);
2139 	    Tcl_IncrRefCount(segment);
2140 	    allocSegment = 1;
2141 	    break;
2142 	}
2143 
2144 	case 'u':
2145 	    if (useBig) {
2146 		msg = "unsigned bignum format is invalid";
2147 		errCode = "BADUNSIGNED";
2148 		goto errorMsg;
2149 	    }
2150 	    /* FALLTHRU */
2151 	case 'd':
2152 	case 'o':
2153 	case 'x':
2154 	case 'X':
2155 	case 'b': {
2156 	    short s = 0;	/* Silence compiler warning; only defined and
2157 				 * used when useShort is true. */
2158 	    long l;
2159 	    Tcl_WideInt w;
2160 	    mp_int big;
2161 	    int toAppend, isNegative = 0;
2162 
2163 	    if (useBig) {
2164 		if (Tcl_GetBignumFromObj(interp, segment, &big) != TCL_OK) {
2165 		    goto error;
2166 		}
2167 		isNegative = (mp_cmp_d(&big, 0) == MP_LT);
2168 #ifndef TCL_WIDE_INT_IS_LONG
2169 	    } else if (useWide) {
2170 		if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) {
2171 		    Tcl_Obj *objPtr;
2172 
2173 		    if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) {
2174 			goto error;
2175 		    }
2176 		    mp_mod_2d(&big, CHAR_BIT*sizeof(Tcl_WideInt), &big);
2177 		    objPtr = Tcl_NewBignumObj(&big);
2178 		    Tcl_IncrRefCount(objPtr);
2179 		    Tcl_GetWideIntFromObj(NULL, objPtr, &w);
2180 		    Tcl_DecrRefCount(objPtr);
2181 		}
2182 		isNegative = (w < (Tcl_WideInt) 0);
2183 #endif
2184 	    } else if (TclGetLongFromObj(NULL, segment, &l) != TCL_OK) {
2185 		if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) {
2186 		    Tcl_Obj *objPtr;
2187 
2188 		    if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) {
2189 			goto error;
2190 		    }
2191 		    mp_mod_2d(&big, CHAR_BIT * sizeof(long), &big);
2192 		    objPtr = Tcl_NewBignumObj(&big);
2193 		    Tcl_IncrRefCount(objPtr);
2194 		    TclGetLongFromObj(NULL, objPtr, &l);
2195 		    Tcl_DecrRefCount(objPtr);
2196 		} else {
2197 		    l = Tcl_WideAsLong(w);
2198 		}
2199 		if (useShort) {
2200 		    s = (short) l;
2201 		    isNegative = (s < (short) 0);
2202 		} else {
2203 		    isNegative = (l < (long) 0);
2204 		}
2205 	    } else if (useShort) {
2206 		s = (short) l;
2207 		isNegative = (s < (short) 0);
2208 	    } else {
2209 		isNegative = (l < (long) 0);
2210 	    }
2211 
2212 	    TclNewObj(segment);
2213 	    allocSegment = 1;
2214 	    segmentLimit = INT_MAX;
2215 	    Tcl_IncrRefCount(segment);
2216 
2217 	    if ((isNegative || gotPlus || gotSpace) && (useBig || ch=='d')) {
2218 		Tcl_AppendToObj(segment,
2219 			(isNegative ? "-" : gotPlus ? "+" : " "), 1);
2220 		segmentLimit -= 1;
2221 	    }
2222 
2223 	    if (gotHash) {
2224 		switch (ch) {
2225 		case 'o':
2226 		    Tcl_AppendToObj(segment, "0", 1);
2227 		    segmentLimit -= 1;
2228 		    precision--;
2229 		    break;
2230 		case 'X':
2231 		    Tcl_AppendToObj(segment, "0X", 2);
2232 		    segmentLimit -= 2;
2233 		    break;
2234 		case 'x':
2235 		    Tcl_AppendToObj(segment, "0x", 2);
2236 		    segmentLimit -= 2;
2237 		    break;
2238 		case 'b':
2239 		    Tcl_AppendToObj(segment, "0b", 2);
2240 		    segmentLimit -= 2;
2241 		    break;
2242 		}
2243 	    }
2244 
2245 	    switch (ch) {
2246 	    case 'd': {
2247 		int length;
2248 		Tcl_Obj *pure;
2249 		const char *bytes;
2250 
2251 		if (useShort) {
2252 		    TclNewIntObj(pure, (int) s);
2253 #ifndef TCL_WIDE_INT_IS_LONG
2254 		} else if (useWide) {
2255 		    pure = Tcl_NewWideIntObj(w);
2256 #endif
2257 		} else if (useBig) {
2258 		    pure = Tcl_NewBignumObj(&big);
2259 		} else {
2260 		    pure = Tcl_NewLongObj(l);
2261 		}
2262 		Tcl_IncrRefCount(pure);
2263 		bytes = TclGetStringFromObj(pure, &length);
2264 
2265 		/*
2266 		 * Already did the sign above.
2267 		 */
2268 
2269 		if (*bytes == '-') {
2270 		    length--;
2271 		    bytes++;
2272 		}
2273 		toAppend = length;
2274 
2275 		/*
2276 		 * Canonical decimal string reps for integers are composed
2277 		 * entirely of one-byte encoded characters, so "length" is the
2278 		 * number of chars.
2279 		 */
2280 
2281 		if (gotPrecision) {
2282 		    if (length < precision) {
2283 			segmentLimit -= precision - length;
2284 		    }
2285 		    while (length < precision) {
2286 			Tcl_AppendToObj(segment, "0", 1);
2287 			length++;
2288 		    }
2289 		    gotZero = 0;
2290 		}
2291 		if (gotZero) {
2292 		    length += Tcl_GetCharLength(segment);
2293 		    if (length < width) {
2294 			segmentLimit -= width - length;
2295 		    }
2296 		    while (length < width) {
2297 			Tcl_AppendToObj(segment, "0", 1);
2298 			length++;
2299 		    }
2300 		}
2301 		if (toAppend > segmentLimit) {
2302 		    msg = overflow;
2303 		    errCode = "OVERFLOW";
2304 		    goto errorMsg;
2305 		}
2306 		Tcl_AppendToObj(segment, bytes, toAppend);
2307 		Tcl_DecrRefCount(pure);
2308 		break;
2309 	    }
2310 
2311 	    case 'u':
2312 	    case 'o':
2313 	    case 'x':
2314 	    case 'X':
2315 	    case 'b': {
2316 		Tcl_WideUInt bits = (Tcl_WideUInt) 0;
2317 		Tcl_WideInt numDigits = (Tcl_WideInt) 0;
2318 		int length, numBits = 4, base = 16, index = 0, shift = 0;
2319 		Tcl_Obj *pure;
2320 		char *bytes;
2321 
2322 		if (ch == 'u') {
2323 		    base = 10;
2324 		} else if (ch == 'o') {
2325 		    base = 8;
2326 		    numBits = 3;
2327 		} else if (ch == 'b') {
2328 		    base = 2;
2329 		    numBits = 1;
2330 		}
2331 		if (useShort) {
2332 		    unsigned short us = (unsigned short) s;
2333 
2334 		    bits = (Tcl_WideUInt) us;
2335 		    while (us) {
2336 			numDigits++;
2337 			us /= base;
2338 		    }
2339 #ifndef TCL_WIDE_INT_IS_LONG
2340 		} else if (useWide) {
2341 		    Tcl_WideUInt uw = (Tcl_WideUInt) w;
2342 
2343 		    bits = uw;
2344 		    while (uw) {
2345 			numDigits++;
2346 			uw /= base;
2347 		    }
2348 #endif
2349 		} else if (useBig && big.used) {
2350 		    int leftover = (big.used * MP_DIGIT_BIT) % numBits;
2351 		    mp_digit mask = (~(mp_digit)0) << (MP_DIGIT_BIT-leftover);
2352 
2353 		    numDigits = 1 +
2354 			    (((Tcl_WideInt) big.used * MP_DIGIT_BIT) / numBits);
2355 		    while ((mask & big.dp[big.used-1]) == 0) {
2356 			numDigits--;
2357 			mask >>= numBits;
2358 		    }
2359 		    if (numDigits > INT_MAX) {
2360 			msg = overflow;
2361 			errCode = "OVERFLOW";
2362 			goto errorMsg;
2363 		    }
2364 		} else if (!useBig) {
2365 		    unsigned long ul = (unsigned long) l;
2366 
2367 		    bits = (Tcl_WideUInt) ul;
2368 		    while (ul) {
2369 			numDigits++;
2370 			ul /= base;
2371 		    }
2372 		}
2373 
2374 		/*
2375 		 * Need to be sure zero becomes "0", not "".
2376 		 */
2377 
2378 		if ((numDigits == 0) && !((ch == 'o') && gotHash)) {
2379 		    numDigits = 1;
2380 		}
2381 		TclNewObj(pure);
2382 		Tcl_SetObjLength(pure, (int) numDigits);
2383 		bytes = TclGetString(pure);
2384 		toAppend = length = (int) numDigits;
2385 		while (numDigits--) {
2386 		    int digitOffset;
2387 
2388 		    if (useBig && big.used) {
2389 			if (index < big.used && (size_t) shift <
2390 				CHAR_BIT*sizeof(Tcl_WideUInt) - MP_DIGIT_BIT) {
2391 			    bits |= ((Tcl_WideUInt) big.dp[index++]) << shift;
2392 			    shift += MP_DIGIT_BIT;
2393 			}
2394 			shift -= numBits;
2395 		    }
2396 		    digitOffset = (int) (bits % base);
2397 		    if (digitOffset > 9) {
2398 			if (ch == 'X') {
2399 			    bytes[numDigits] = 'A' + digitOffset - 10;
2400 			} else {
2401 			    bytes[numDigits] = 'a' + digitOffset - 10;
2402 			}
2403 		    } else {
2404 			bytes[numDigits] = '0' + digitOffset;
2405 		    }
2406 		    bits /= base;
2407 		}
2408 		if (useBig) {
2409 		    mp_clear(&big);
2410 		}
2411 		if (gotPrecision) {
2412 		    if (length < precision) {
2413 			segmentLimit -= precision - length;
2414 		    }
2415 		    while (length < precision) {
2416 			Tcl_AppendToObj(segment, "0", 1);
2417 			length++;
2418 		    }
2419 		    gotZero = 0;
2420 		}
2421 		if (gotZero) {
2422 		    length += Tcl_GetCharLength(segment);
2423 		    if (length < width) {
2424 			segmentLimit -= width - length;
2425 		    }
2426 		    while (length < width) {
2427 			Tcl_AppendToObj(segment, "0", 1);
2428 			length++;
2429 		    }
2430 		}
2431 		if (toAppend > segmentLimit) {
2432 		    msg = overflow;
2433 		    errCode = "OVERFLOW";
2434 		    goto errorMsg;
2435 		}
2436 		Tcl_AppendObjToObj(segment, pure);
2437 		Tcl_DecrRefCount(pure);
2438 		break;
2439 	    }
2440 
2441 	    }
2442 	    break;
2443 	}
2444 
2445 	case 'e':
2446 	case 'E':
2447 	case 'f':
2448 	case 'g':
2449 	case 'G': {
2450 #define MAX_FLOAT_SIZE 320
2451 	    char spec[2*TCL_INTEGER_SPACE + 9], *p = spec;
2452 	    double d;
2453 	    int length = MAX_FLOAT_SIZE;
2454 	    char *bytes;
2455 
2456 	    if (Tcl_GetDoubleFromObj(interp, segment, &d) != TCL_OK) {
2457 		/* TODO: Figure out ACCEPT_NAN here */
2458 		goto error;
2459 	    }
2460 	    *p++ = '%';
2461 	    if (gotMinus) {
2462 		*p++ = '-';
2463 	    }
2464 	    if (gotHash) {
2465 		*p++ = '#';
2466 	    }
2467 	    if (gotZero) {
2468 		*p++ = '0';
2469 	    }
2470 	    if (gotSpace) {
2471 		*p++ = ' ';
2472 	    }
2473 	    if (gotPlus) {
2474 		*p++ = '+';
2475 	    }
2476 	    if (width) {
2477 		p += sprintf(p, "%d", width);
2478 		if (width > length) {
2479 		    length = width;
2480 		}
2481 	    }
2482 	    if (gotPrecision) {
2483 		*p++ = '.';
2484 		p += sprintf(p, "%d", precision);
2485 		if (precision > INT_MAX - length) {
2486 		    msg = overflow;
2487 		    errCode = "OVERFLOW";
2488 		    goto errorMsg;
2489 		}
2490 		length += precision;
2491 	    }
2492 
2493 	    /*
2494 	     * Don't pass length modifiers!
2495 	     */
2496 
2497 	    *p++ = (char) ch;
2498 	    *p = '\0';
2499 
2500 	    TclNewObj(segment);
2501 	    allocSegment = 1;
2502 	    if (!Tcl_AttemptSetObjLength(segment, length)) {
2503 		msg = overflow;
2504 		errCode = "OVERFLOW";
2505 		goto errorMsg;
2506 	    }
2507 	    bytes = TclGetString(segment);
2508 	    if (!Tcl_AttemptSetObjLength(segment, sprintf(bytes, spec, d))) {
2509 		msg = overflow;
2510 		errCode = "OVERFLOW";
2511 		goto errorMsg;
2512 	    }
2513 	    break;
2514 	}
2515 	default:
2516 	    if (interp != NULL) {
2517 		Tcl_SetObjResult(interp,
2518 			Tcl_ObjPrintf("bad field specifier \"%c\"", ch));
2519 		Tcl_SetErrorCode(interp, "TCL", "FORMAT", "BADTYPE", NULL);
2520 	    }
2521 	    goto error;
2522 	}
2523 
2524 	if (width>0 && numChars<0) {
2525 	    numChars = Tcl_GetCharLength(segment);
2526 	}
2527 	if (!gotMinus && width>0) {
2528 	    if (numChars < width) {
2529 		limit -= width - numChars;
2530 	    }
2531 	    while (numChars < width) {
2532 		Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1);
2533 		numChars++;
2534 	    }
2535 	}
2536 
2537 	TclGetStringFromObj(segment, &segmentNumBytes);
2538 	if (segmentNumBytes > limit) {
2539 	    if (allocSegment) {
2540 		Tcl_DecrRefCount(segment);
2541 	    }
2542 	    msg = overflow;
2543 	    errCode = "OVERFLOW";
2544 	    goto errorMsg;
2545 	}
2546 	Tcl_AppendObjToObj(appendObj, segment);
2547 	limit -= segmentNumBytes;
2548 	if (allocSegment) {
2549 	    Tcl_DecrRefCount(segment);
2550 	}
2551 	if (width > 0) {
2552 	    if (numChars < width) {
2553 		limit -= width-numChars;
2554 	    }
2555 	    while (numChars < width) {
2556 		Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1);
2557 		numChars++;
2558 	    }
2559 	}
2560 
2561 	objIndex += gotSequential;
2562     }
2563     if (numBytes) {
2564 	if (numBytes > limit) {
2565 	    msg = overflow;
2566 	    errCode = "OVERFLOW";
2567 	    goto errorMsg;
2568 	}
2569 	Tcl_AppendToObj(appendObj, span, numBytes);
2570 	limit -= numBytes;
2571 	numBytes = 0;
2572     }
2573 
2574     return TCL_OK;
2575 
2576   errorMsg:
2577     if (interp != NULL) {
2578 	Tcl_SetObjResult(interp, Tcl_NewStringObj(msg, -1));
2579 	Tcl_SetErrorCode(interp, "TCL", "FORMAT", errCode, NULL);
2580     }
2581   error:
2582     Tcl_SetObjLength(appendObj, originalLength);
2583     return TCL_ERROR;
2584 }
2585 
2586 /*
2587  *---------------------------------------------------------------------------
2588  *
2589  * Tcl_Format --
2590  *
2591  * Results:
2592  *	A refcount zero Tcl_Obj.
2593  *
2594  * Side effects:
2595  *	None.
2596  *
2597  *---------------------------------------------------------------------------
2598  */
2599 
2600 Tcl_Obj *
Tcl_Format(Tcl_Interp * interp,const char * format,int objc,Tcl_Obj * const objv[])2601 Tcl_Format(
2602     Tcl_Interp *interp,
2603     const char *format,
2604     int objc,
2605     Tcl_Obj *const objv[])
2606 {
2607     int result;
2608     Tcl_Obj *objPtr;
2609 
2610     TclNewObj(objPtr);
2611     result = Tcl_AppendFormatToObj(interp, objPtr, format, objc, objv);
2612     if (result != TCL_OK) {
2613 	Tcl_DecrRefCount(objPtr);
2614 	return NULL;
2615     }
2616     return objPtr;
2617 }
2618 
2619 /*
2620  *---------------------------------------------------------------------------
2621  *
2622  * AppendPrintfToObjVA --
2623  *
2624  * Results:
2625  *
2626  * Side effects:
2627  *
2628  *---------------------------------------------------------------------------
2629  */
2630 
2631 static void
AppendPrintfToObjVA(Tcl_Obj * objPtr,const char * format,va_list argList)2632 AppendPrintfToObjVA(
2633     Tcl_Obj *objPtr,
2634     const char *format,
2635     va_list argList)
2636 {
2637     int code, objc;
2638     Tcl_Obj **objv, *list;
2639     const char *p;
2640 
2641     TclNewObj(list);
2642     p = format;
2643     Tcl_IncrRefCount(list);
2644     while (*p != '\0') {
2645 	int size = 0, seekingConversion = 1, gotPrecision = 0;
2646 	int lastNum = -1;
2647 
2648 	if (*p++ != '%') {
2649 	    continue;
2650 	}
2651 	if (*p == '%') {
2652 	    p++;
2653 	    continue;
2654 	}
2655 	do {
2656 	    switch (*p) {
2657 	    case '\0':
2658 		seekingConversion = 0;
2659 		break;
2660 	    case 's': {
2661 		const char *q, *end, *bytes = va_arg(argList, char *);
2662 		seekingConversion = 0;
2663 
2664 		/*
2665 		 * The buffer to copy characters from starts at bytes and ends
2666 		 * at either the first NUL byte, or after lastNum bytes, when
2667 		 * caller has indicated a limit.
2668 		 */
2669 
2670 		end = bytes;
2671 		while ((!gotPrecision || lastNum--) && (*end != '\0')) {
2672 		    end++;
2673 		}
2674 
2675 		/*
2676 		 * Within that buffer, we trim both ends if needed so that we
2677 		 * copy only whole characters, and avoid copying any partial
2678 		 * multi-byte characters.
2679 		 */
2680 
2681 		q = TclUtfPrev(end, bytes);
2682 		if (!Tcl_UtfCharComplete(q, (int)(end - q))) {
2683 		    end = q;
2684 		}
2685 
2686 		q = bytes + TCL_UTF_MAX;
2687 		while ((bytes < end) && (bytes < q)
2688 			&& ((*bytes & 0xC0) == 0x80)) {
2689 		    bytes++;
2690 		}
2691 
2692 		Tcl_ListObjAppendElement(NULL, list,
2693 			Tcl_NewStringObj(bytes , (int)(end - bytes)));
2694 
2695 		break;
2696 	    }
2697 	    case 'c':
2698 	    case 'i':
2699 	    case 'u':
2700 	    case 'd':
2701 	    case 'o':
2702 	    case 'x':
2703 	    case 'X':
2704 		seekingConversion = 0;
2705 		switch (size) {
2706 		case -1:
2707 		case 0:
2708 		    Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj(
2709 			    (long) va_arg(argList, int)));
2710 		    break;
2711 		case 1:
2712 		    Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj(
2713 			    va_arg(argList, long)));
2714 		    break;
2715 		}
2716 		break;
2717 	    case 'e':
2718 	    case 'E':
2719 	    case 'f':
2720 	    case 'g':
2721 	    case 'G':
2722 		Tcl_ListObjAppendElement(NULL, list, Tcl_NewDoubleObj(
2723 			va_arg(argList, double)));
2724 		seekingConversion = 0;
2725 		break;
2726 	    case '*':
2727 		lastNum = (int) va_arg(argList, int);
2728 		Tcl_ListObjAppendElement(NULL, list, Tcl_NewIntObj(lastNum));
2729 		p++;
2730 		break;
2731 	    case '0': case '1': case '2': case '3': case '4':
2732 	    case '5': case '6': case '7': case '8': case '9': {
2733 		char *end;
2734 
2735 		lastNum = (int) strtoul(p, &end, 10);
2736 		p = end;
2737 		break;
2738 	    }
2739 	    case '.':
2740 		gotPrecision = 1;
2741 		p++;
2742 		break;
2743 	    /* TODO: support for wide (and bignum?) arguments */
2744 	    case 'l':
2745 		size = 1;
2746 		p++;
2747 		break;
2748 	    case 'h':
2749 		size = -1;
2750 		/* FALLTHRU */
2751 	    default:
2752 		p++;
2753 	    }
2754 	} while (seekingConversion);
2755     }
2756     TclListObjGetElements(NULL, list, &objc, &objv);
2757     code = Tcl_AppendFormatToObj(NULL, objPtr, format, objc, objv);
2758     if (code != TCL_OK) {
2759 	Tcl_AppendPrintfToObj(objPtr,
2760 		"Unable to format \"%s\" with supplied arguments: %s",
2761 		format, Tcl_GetString(list));
2762     }
2763     Tcl_DecrRefCount(list);
2764 }
2765 
2766 /*
2767  *---------------------------------------------------------------------------
2768  *
2769  * Tcl_AppendPrintfToObj --
2770  *
2771  * Results:
2772  *	A standard Tcl result.
2773  *
2774  * Side effects:
2775  *	None.
2776  *
2777  *---------------------------------------------------------------------------
2778  */
2779 
2780 void
Tcl_AppendPrintfToObj(Tcl_Obj * objPtr,const char * format,...)2781 Tcl_AppendPrintfToObj(
2782     Tcl_Obj *objPtr,
2783     const char *format,
2784     ...)
2785 {
2786     va_list argList;
2787 
2788     va_start(argList, format);
2789     AppendPrintfToObjVA(objPtr, format, argList);
2790     va_end(argList);
2791 }
2792 
2793 /*
2794  *---------------------------------------------------------------------------
2795  *
2796  * Tcl_ObjPrintf --
2797  *
2798  * Results:
2799  *	A refcount zero Tcl_Obj.
2800  *
2801  * Side effects:
2802  *	None.
2803  *
2804  *---------------------------------------------------------------------------
2805  */
2806 
2807 Tcl_Obj *
Tcl_ObjPrintf(const char * format,...)2808 Tcl_ObjPrintf(
2809     const char *format,
2810     ...)
2811 {
2812     va_list argList;
2813     Tcl_Obj *objPtr;
2814 
2815     TclNewObj(objPtr);
2816     va_start(argList, format);
2817     AppendPrintfToObjVA(objPtr, format, argList);
2818     va_end(argList);
2819     return objPtr;
2820 }
2821 
2822 /*
2823  *---------------------------------------------------------------------------
2824  *
2825  * TclGetStringStorage --
2826  *
2827  *	Returns the string storage space of a Tcl_Obj.
2828  *
2829  * Results:
2830  *	The pointer value objPtr->bytes is returned and the number of bytes
2831  *	allocated there is written to *sizePtr (if known).
2832  *
2833  * Side effects:
2834  *	May set objPtr->bytes.
2835  *
2836  *---------------------------------------------------------------------------
2837  */
2838 
2839 char *
TclGetStringStorage(Tcl_Obj * objPtr,unsigned int * sizePtr)2840 TclGetStringStorage(
2841     Tcl_Obj *objPtr,
2842     unsigned int *sizePtr)
2843 {
2844     String *stringPtr;
2845 
2846     if (objPtr->typePtr != &tclStringType || objPtr->bytes == NULL) {
2847 	return TclGetStringFromObj(objPtr, (int *)sizePtr);
2848     }
2849 
2850     stringPtr = GET_STRING(objPtr);
2851     *sizePtr = stringPtr->allocated;
2852     return objPtr->bytes;
2853 }
2854 /*
2855  *---------------------------------------------------------------------------
2856  *
2857  * TclStringReverse --
2858  *
2859  *	Implements the [string reverse] operation.
2860  *
2861  * Results:
2862  *	An unshared Tcl value which is the [string reverse] of the argument
2863  *	supplied. When sharing rules permit, the returned value might be the
2864  *	argument with modifications done in place.
2865  *
2866  * Side effects:
2867  *	May allocate a new Tcl_Obj.
2868  *
2869  *---------------------------------------------------------------------------
2870  */
2871 
2872 static void
ReverseBytes(unsigned char * to,unsigned char * from,int count)2873 ReverseBytes(
2874     unsigned char *to,		/* Copy bytes into here... */
2875     unsigned char *from,	/* ...from here... */
2876     int count)			/* Until this many are copied, */
2877 				/* reversing as you go. */
2878 {
2879     unsigned char *src = from + count;
2880 
2881     if (to == from) {
2882 	/* Reversing in place */
2883 	while (--src > to) {
2884 	    unsigned char c = *src;
2885 
2886 	    *src = *to;
2887 	    *to++ = c;
2888 	}
2889     } else {
2890 	while (--src >= from) {
2891 	    *to++ = *src;
2892 	}
2893     }
2894 }
2895 
2896 Tcl_Obj *
TclStringReverse(Tcl_Obj * objPtr)2897 TclStringReverse(
2898     Tcl_Obj *objPtr)
2899 {
2900     String *stringPtr;
2901     Tcl_UniChar ch = 0;
2902 #if TCL_UTF_MAX <= 4
2903     int needFlip = 0;
2904 #endif
2905 
2906     if (TclIsPureByteArray(objPtr)) {
2907 	int numBytes;
2908 	unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes);
2909 
2910 	if (Tcl_IsShared(objPtr)) {
2911 	    objPtr = Tcl_NewByteArrayObj(NULL, numBytes);
2912 	}
2913 	ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, NULL), from, numBytes);
2914 	return objPtr;
2915     }
2916 
2917     SetStringFromAny(NULL, objPtr);
2918     stringPtr = GET_STRING(objPtr);
2919 
2920     if (stringPtr->hasUnicode) {
2921 	Tcl_UniChar *from = Tcl_GetUnicode(objPtr);
2922 	Tcl_UniChar *src = from + stringPtr->numChars;
2923 	Tcl_UniChar *to;
2924 
2925 	if (Tcl_IsShared(objPtr)) {
2926 	    /*
2927 	     * Create a non-empty, pure unicode value, so we can coax
2928 	     * Tcl_SetObjLength into growing the unicode rep buffer.
2929 	     */
2930 
2931 	    objPtr = Tcl_NewUnicodeObj(&ch, 1);
2932 	    Tcl_SetObjLength(objPtr, stringPtr->numChars);
2933 	    to = Tcl_GetUnicode(objPtr);
2934 	    while (--src >= from) {
2935 #if TCL_UTF_MAX <= 4
2936 		ch = *src;
2937 		if ((ch & 0xF800) == 0xD800) {
2938 		    needFlip = 1;
2939 		}
2940 		*to++ = ch;
2941 #else
2942 		*to++ = *src;
2943 #endif
2944 	    }
2945 	} else {
2946 	    /*
2947 	     * Reversing in place.
2948 	     */
2949 
2950 #if TCL_UTF_MAX <= 4
2951 	    to = src;
2952 #endif
2953 	    while (--src > from) {
2954 		ch = *src;
2955 #if TCL_UTF_MAX <= 4
2956 		if ((ch & 0xF800) == 0xD800) {
2957 		    needFlip = 1;
2958 		}
2959 #endif
2960 		*src = *from;
2961 		*from++ = ch;
2962 	    }
2963 	}
2964 #if TCL_UTF_MAX <= 4
2965 	if (needFlip) {
2966 	    /*
2967 	     * Flip back surrogate pairs.
2968 	     */
2969 
2970 	    from = to - stringPtr->numChars;
2971 	    while (--to >= from) {
2972 		ch = *to;
2973 		if ((ch & 0xFC00) == 0xD800) {
2974 		    if ((to-1 >= from) && ((to[-1] & 0xFC00) == 0xDC00)) {
2975 			to[0] = to[-1];
2976 			to[-1] = ch;
2977 			--to;
2978 		    }
2979 		}
2980 	    }
2981 	}
2982 #endif
2983     }
2984 
2985     if (objPtr->bytes) {
2986 	int numChars = stringPtr->numChars;
2987 	int numBytes = objPtr->length;
2988 	char *to, *from = objPtr->bytes;
2989 
2990 	if (Tcl_IsShared(objPtr)) {
2991 	    TclNewObj(objPtr);
2992 	    Tcl_SetObjLength(objPtr, numBytes);
2993 	}
2994 	to = objPtr->bytes;
2995 
2996 	if (numChars < numBytes) {
2997 	    /*
2998 	     * Either numChars == -1 and we don't know how many chars are
2999 	     * represented by objPtr->bytes and we need Pass 1 just in case,
3000 	     * or numChars >= 0 and we know we have fewer chars than bytes, so
3001 	     * we know there's a multibyte character needing Pass 1.
3002 	     *
3003 	     * Pass 1. Reverse the bytes of each multi-byte character.
3004 	     */
3005 
3006 	    int bytesLeft = numBytes;
3007 	    int chw;
3008 
3009 	    while (bytesLeft) {
3010 		/*
3011 		 * NOTE: We know that the from buffer is NUL-terminated. It's
3012 		 * part of the contract for objPtr->bytes values. Thus, we can
3013 		 * skip calling Tcl_UtfCharComplete() here.
3014 		 */
3015 
3016 		int bytesInChar = TclUtfToUCS4(from, &chw);
3017 
3018 		ReverseBytes((unsigned char *)to, (unsigned char *)from,
3019 			bytesInChar);
3020 		to += bytesInChar;
3021 		from += bytesInChar;
3022 		bytesLeft -= bytesInChar;
3023 	    }
3024 
3025 	    from = to = objPtr->bytes;
3026 	}
3027 	/* Pass 2. Reverse all the bytes. */
3028 	ReverseBytes((unsigned char *)to, (unsigned char *)from, numBytes);
3029     }
3030 
3031     return objPtr;
3032 }
3033 
3034 /*
3035  *---------------------------------------------------------------------------
3036  *
3037  * FillUnicodeRep --
3038  *
3039  *	Populate the Unicode internal rep with the Unicode form of its string
3040  *	rep. The object must alread have a "String" internal rep.
3041  *
3042  * Results:
3043  *	None.
3044  *
3045  * Side effects:
3046  *	Reallocates the String internal rep.
3047  *
3048  *---------------------------------------------------------------------------
3049  */
3050 
3051 static void
FillUnicodeRep(Tcl_Obj * objPtr)3052 FillUnicodeRep(
3053     Tcl_Obj *objPtr)		/* The object in which to fill the unicode
3054 				 * rep. */
3055 {
3056     String *stringPtr = GET_STRING(objPtr);
3057 
3058     ExtendUnicodeRepWithString(objPtr, objPtr->bytes, objPtr->length,
3059 	    stringPtr->numChars);
3060 }
3061 
3062 static void
ExtendUnicodeRepWithString(Tcl_Obj * objPtr,const char * bytes,int numBytes,int numAppendChars)3063 ExtendUnicodeRepWithString(
3064     Tcl_Obj *objPtr,
3065     const char *bytes,
3066     int numBytes,
3067     int numAppendChars)
3068 {
3069     String *stringPtr = GET_STRING(objPtr);
3070     int needed, numOrigChars = 0;
3071     Tcl_UniChar *dst, unichar = 0;
3072 
3073     if (stringPtr->hasUnicode) {
3074 	numOrigChars = stringPtr->numChars;
3075     }
3076     if (numAppendChars == -1) {
3077 	TclNumUtfChars(numAppendChars, bytes, numBytes);
3078     }
3079     needed = numOrigChars + numAppendChars;
3080     stringCheckLimits(needed);
3081 
3082     if (needed > stringPtr->maxChars) {
3083 	GrowUnicodeBuffer(objPtr, needed);
3084 	stringPtr = GET_STRING(objPtr);
3085     }
3086 
3087     stringPtr->hasUnicode = 1;
3088     if (bytes) {
3089 	stringPtr->numChars = needed;
3090     } else {
3091 	numAppendChars = 0;
3092     }
3093     for (dst=stringPtr->unicode + numOrigChars; numAppendChars-- > 0; dst++) {
3094 	bytes += TclUtfToUniChar(bytes, &unichar);
3095 	*dst = unichar;
3096     }
3097     *dst = 0;
3098 }
3099 
3100 /*
3101  *----------------------------------------------------------------------
3102  *
3103  * DupStringInternalRep --
3104  *
3105  *	Initialize the internal representation of a new Tcl_Obj to a copy of
3106  *	the internal representation of an existing string object.
3107  *
3108  * Results:
3109  *	None.
3110  *
3111  * Side effects:
3112  *	copyPtr's internal rep is set to a copy of srcPtr's internal
3113  *	representation.
3114  *
3115  *----------------------------------------------------------------------
3116  */
3117 
3118 static void
DupStringInternalRep(Tcl_Obj * srcPtr,Tcl_Obj * copyPtr)3119 DupStringInternalRep(
3120     Tcl_Obj *srcPtr,		/* Object with internal rep to copy. Must have
3121 				 * an internal rep of type "String". */
3122     Tcl_Obj *copyPtr)		/* Object with internal rep to set. Must not
3123 				 * currently have an internal rep.*/
3124 {
3125     String *srcStringPtr = GET_STRING(srcPtr);
3126     String *copyStringPtr = NULL;
3127 
3128 #if COMPAT==0
3129     if (srcStringPtr->numChars == -1) {
3130 	/*
3131 	 * The String struct in the source value holds zero useful data. Don't
3132 	 * bother copying it. Don't even bother allocating space in which to
3133 	 * copy it. Just let the copy be untyped.
3134 	 */
3135 
3136 	return;
3137     }
3138 
3139     if (srcStringPtr->hasUnicode) {
3140 	int copyMaxChars;
3141 
3142 	if (srcStringPtr->maxChars / 2 >= srcStringPtr->numChars) {
3143 	    copyMaxChars = 2 * srcStringPtr->numChars;
3144 	} else {
3145 	    copyMaxChars = srcStringPtr->maxChars;
3146 	}
3147 	copyStringPtr = stringAttemptAlloc(copyMaxChars);
3148 	if (copyStringPtr == NULL) {
3149 	    copyMaxChars = srcStringPtr->numChars;
3150 	    copyStringPtr = stringAlloc(copyMaxChars);
3151 	}
3152 	copyStringPtr->maxChars = copyMaxChars;
3153 	memcpy(copyStringPtr->unicode, srcStringPtr->unicode,
3154 		srcStringPtr->numChars * sizeof(Tcl_UniChar));
3155 	copyStringPtr->unicode[srcStringPtr->numChars] = 0;
3156     } else {
3157 	copyStringPtr = stringAlloc(0);
3158 	copyStringPtr->maxChars = 0;
3159 	copyStringPtr->unicode[0] = 0;
3160     }
3161     copyStringPtr->hasUnicode = srcStringPtr->hasUnicode;
3162     copyStringPtr->numChars = srcStringPtr->numChars;
3163 
3164     /*
3165      * Tricky point: the string value was copied by generic object management
3166      * code, so it doesn't contain any extra bytes that might exist in the
3167      * source object.
3168      */
3169 
3170     copyStringPtr->allocated = copyPtr->bytes ? copyPtr->length : 0;
3171 #else /* COMPAT!=0 */
3172     /*
3173      * If the src obj is a string of 1-byte Utf chars, then copy the string
3174      * rep of the source object and create an "empty" Unicode internal rep for
3175      * the new object. Otherwise, copy Unicode internal rep, and invalidate
3176      * the string rep of the new object.
3177      */
3178 
3179     if (srcStringPtr->hasUnicode && srcStringPtr->numChars > 0) {
3180 	/*
3181 	 * Copy the full allocation for the Unicode buffer.
3182 	 */
3183 
3184 	copyStringPtr = stringAlloc(srcStringPtr->maxChars);
3185 	copyStringPtr->maxChars = srcStringPtr->maxChars;
3186 	memcpy(copyStringPtr->unicode, srcStringPtr->unicode,
3187 		srcStringPtr->numChars * sizeof(Tcl_UniChar));
3188 	copyStringPtr->unicode[srcStringPtr->numChars] = 0;
3189 	copyStringPtr->allocated = 0;
3190     } else {
3191 	copyStringPtr = stringAlloc(0);
3192 	copyStringPtr->unicode[0] = 0;
3193 	copyStringPtr->maxChars = 0;
3194 
3195 	/*
3196 	 * Tricky point: the string value was copied by generic object
3197 	 * management code, so it doesn't contain any extra bytes that might
3198 	 * exist in the source object.
3199 	 */
3200 
3201 	copyStringPtr->allocated = copyPtr->length;
3202     }
3203     copyStringPtr->numChars = srcStringPtr->numChars;
3204     copyStringPtr->hasUnicode = srcStringPtr->hasUnicode;
3205 #endif /* COMPAT==0 */
3206 
3207     SET_STRING(copyPtr, copyStringPtr);
3208     copyPtr->typePtr = &tclStringType;
3209 }
3210 
3211 /*
3212  *----------------------------------------------------------------------
3213  *
3214  * SetStringFromAny --
3215  *
3216  *	Create an internal representation of type "String" for an object.
3217  *
3218  * Results:
3219  *	This operation always succeeds and returns TCL_OK.
3220  *
3221  * Side effects:
3222  *	Any old internal reputation for objPtr is freed and the internal
3223  *	representation is set to "String".
3224  *
3225  *----------------------------------------------------------------------
3226  */
3227 
3228 static int
SetStringFromAny(Tcl_Interp * interp,Tcl_Obj * objPtr)3229 SetStringFromAny(
3230     Tcl_Interp *interp,		/* Used for error reporting if not NULL. */
3231     Tcl_Obj *objPtr)		/* The object to convert. */
3232 {
3233     if (objPtr->typePtr != &tclStringType) {
3234 	String *stringPtr = stringAlloc(0);
3235 
3236 	/*
3237 	 * Convert whatever we have into an untyped value. Just A String.
3238 	 */
3239 
3240 	(void) TclGetString(objPtr);
3241 	TclFreeIntRep(objPtr);
3242 
3243 	/*
3244 	 * Create a basic String internalrep that just points to the UTF-8 string
3245 	 * already in place at objPtr->bytes.
3246 	 */
3247 
3248 	stringPtr->numChars = -1;
3249 	stringPtr->allocated = objPtr->length;
3250 	stringPtr->maxChars = 0;
3251 	stringPtr->hasUnicode = 0;
3252 	SET_STRING(objPtr, stringPtr);
3253 	objPtr->typePtr = &tclStringType;
3254     }
3255     return TCL_OK;
3256 }
3257 
3258 /*
3259  *----------------------------------------------------------------------
3260  *
3261  * UpdateStringOfString --
3262  *
3263  *	Update the string representation for an object whose internal
3264  *	representation is "String".
3265  *
3266  * Results:
3267  *	None.
3268  *
3269  * Side effects:
3270  *	The object's string may be set by converting its Unicode represention
3271  *	to UTF format.
3272  *
3273  *----------------------------------------------------------------------
3274  */
3275 
3276 static void
UpdateStringOfString(Tcl_Obj * objPtr)3277 UpdateStringOfString(
3278     Tcl_Obj *objPtr)		/* Object with string rep to update. */
3279 {
3280     String *stringPtr = GET_STRING(objPtr);
3281 
3282     /*
3283      * This routine is only called when we need to generate the
3284      * string rep objPtr->bytes because it does not exist -- it is NULL.
3285      * In that circumstance, any lingering claim about the size of
3286      * memory pointed to by that NULL pointer is clearly bogus, and
3287      * needs a reset.
3288      */
3289 
3290     stringPtr->allocated = 0;
3291 
3292     if (stringPtr->numChars == 0) {
3293 	TclInitStringRep(objPtr, tclEmptyStringRep, 0);
3294     } else {
3295 	(void) ExtendStringRepWithUnicode(objPtr, stringPtr->unicode,
3296 		stringPtr->numChars);
3297     }
3298 }
3299 
3300 static int
ExtendStringRepWithUnicode(Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int numChars)3301 ExtendStringRepWithUnicode(
3302     Tcl_Obj *objPtr,
3303     const Tcl_UniChar *unicode,
3304     int numChars)
3305 {
3306     /*
3307      * Pre-condition: this is the "string" Tcl_ObjType.
3308      */
3309 
3310     int i, origLength, size = 0;
3311     char *dst, buf[4] = "";
3312     String *stringPtr = GET_STRING(objPtr);
3313 
3314     if (numChars < 0) {
3315 	numChars = UnicodeLength(unicode);
3316     }
3317 
3318     if (numChars == 0) {
3319 	return 0;
3320     }
3321 
3322     if (objPtr->bytes == NULL) {
3323 	objPtr->length = 0;
3324     }
3325     size = origLength = objPtr->length;
3326 
3327     /*
3328      * Quick cheap check in case we have more than enough room.
3329      */
3330 
3331     if (numChars <= (INT_MAX - size)/TCL_UTF_MAX
3332 	    && stringPtr->allocated >= size + numChars * TCL_UTF_MAX) {
3333 	goto copyBytes;
3334     }
3335 
3336     for (i = 0; i < numChars && size >= 0; i++) {
3337 	size += Tcl_UniCharToUtf((int) unicode[i], buf);
3338     }
3339     if (size < 0) {
3340 	Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
3341     }
3342 
3343     /*
3344      * Grow space if needed.
3345      */
3346 
3347     if (size > stringPtr->allocated) {
3348 	GrowStringBuffer(objPtr, size, 1);
3349     }
3350 
3351   copyBytes:
3352     dst = objPtr->bytes + origLength;
3353     for (i = 0; i < numChars; i++) {
3354 	dst += Tcl_UniCharToUtf(unicode[i], dst);
3355     }
3356     *dst = '\0';
3357     objPtr->length = dst - objPtr->bytes;
3358     return numChars;
3359 }
3360 
3361 /*
3362  *----------------------------------------------------------------------
3363  *
3364  * FreeStringInternalRep --
3365  *
3366  *	Deallocate the storage associated with a String data object's internal
3367  *	representation.
3368  *
3369  * Results:
3370  *	None.
3371  *
3372  * Side effects:
3373  *	Frees memory.
3374  *
3375  *----------------------------------------------------------------------
3376  */
3377 
3378 static void
FreeStringInternalRep(Tcl_Obj * objPtr)3379 FreeStringInternalRep(
3380     Tcl_Obj *objPtr)		/* Object with internal rep to free. */
3381 {
3382     ckfree(GET_STRING(objPtr));
3383     objPtr->typePtr = NULL;
3384 }
3385 
3386 /*
3387  * Local Variables:
3388  * mode: c
3389  * c-basic-offset: 4
3390  * fill-column: 78
3391  * End:
3392  */
3393