1 /*
2  * tclStringObj.c --
3  *
4  *	This file contains functions that implement string operations on Tcl
5  *	objects. Some string operations work with UTF strings and others
6  *	require Unicode format. Functions that require knowledge of the width
7  *	of each character, such as indexing, operate on Unicode data.
8  *
9  *	A Unicode string is an internationalized string. Conceptually, a
10  *	Unicode string is an array of 16-bit quantities organized as a
11  *	sequence of properly formed UTF-8 characters. There is a one-to-one
12  *	map between Unicode and UTF characters. Because Unicode characters
13  *	have a fixed width, operations such as indexing operate on Unicode
14  *	data. The String object is optimized for the case where each UTF char
15  *	in a string is only one byte. In this case, we store the value of
16  *	numChars, but we don't store the Unicode data (unless Tcl_GetUnicode
17  *	is explicitly called).
18  *
19  *	The String object type stores one or both formats. The default
20  *	behavior is to store UTF. Once Unicode is calculated by a function, it
21  *	is stored in the internal rep for future access (without an additional
22  *	O(n) cost).
23  *
24  *	To allow many appends to be done to an object without constantly
25  *	reallocating the space for the string or Unicode representation, we
26  *	allocate double the space for the string or Unicode and use the
27  *	internal representation to keep track of how much space is used vs.
28  *	allocated.
29  *
30  * Copyright (c) 1995-1997 Sun Microsystems, Inc.
31  * Copyright (c) 1999 by Scriptics Corporation.
32  *
33  * See the file "license.terms" for information on usage and redistribution of
34  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
35  */
36 
37 #include "tclInt.h"
38 #include "tommath.h"
39 
40 /*
41  * Prototypes for functions defined later in this file:
42  */
43 
44 static void		AppendPrintfToObjVA(Tcl_Obj *objPtr,
45 			    const char *format, va_list argList);
46 static void		AppendUnicodeToUnicodeRep(Tcl_Obj *objPtr,
47 			    const Tcl_UniChar *unicode, int appendNumChars);
48 static void		AppendUnicodeToUtfRep(Tcl_Obj *objPtr,
49 			    const Tcl_UniChar *unicode, int numChars);
50 static void		AppendUtfToUnicodeRep(Tcl_Obj *objPtr,
51 			    const char *bytes, int numBytes);
52 static void		AppendUtfToUtfRep(Tcl_Obj *objPtr,
53 			    const char *bytes, int numBytes);
54 static void		DupStringInternalRep(Tcl_Obj *objPtr,
55 			    Tcl_Obj *copyPtr);
56 static void		FillUnicodeRep(Tcl_Obj *objPtr);
57 static void		FreeStringInternalRep(Tcl_Obj *objPtr);
58 static void		GrowUnicodeBuffer(Tcl_Obj *objPtr, int needed);
59 static int		SetStringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr);
60 static void		SetUnicodeObj(Tcl_Obj *objPtr,
61 			    const Tcl_UniChar *unicode, int numChars);
62 static int		UnicodeLength(const Tcl_UniChar *unicode);
63 static void		UpdateStringOfString(Tcl_Obj *objPtr);
64 
65 /*
66  * The structure below defines the string Tcl object type by means of
67  * functions that can be invoked by generic object code.
68  */
69 
70 Tcl_ObjType tclStringType = {
71     "string",			/* name */
72     FreeStringInternalRep,	/* freeIntRepPro */
73     DupStringInternalRep,	/* dupIntRepProc */
74     UpdateStringOfString,	/* updateStringProc */
75     SetStringFromAny		/* setFromAnyProc */
76 };
77 
78 /*
79  * The following structure is the internal rep for a String object. It keeps
80  * track of how much memory has been used and how much has been allocated for
81  * the Unicode and UTF string to enable growing and shrinking of the UTF and
82  * Unicode reps of the String object with fewer mallocs. To optimize string
83  * length and indexing operations, this structure also stores the number of
84  * characters (same of UTF and Unicode!) once that value has been computed.
85  *
86  * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16
87  * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This
88  * can be officially modified by altering the definition of Tcl_UniChar in
89  * tcl.h, but do not do that unless you are sure what you're doing!
90  */
91 
92 typedef struct String {
93     int numChars;		/* The number of chars in the string. -1 means
94 				 * this value has not been calculated. >= 0
95 				 * means that there is a valid Unicode rep, or
96 				 * that the number of UTF bytes == the number
97 				 * of chars. */
98     size_t allocated;		/* The amount of space actually allocated for
99 				 * the UTF string (minus 1 byte for the
100 				 * termination char). */
101     size_t uallocated;		/* The amount of space actually allocated for
102 				 * the Unicode string (minus 2 bytes for the
103 				 * termination char). */
104     int hasUnicode;		/* Boolean determining whether the string has
105 				 * a Unicode representation. */
106     Tcl_UniChar unicode[2];	/* The array of Unicode chars. The actual size
107 				 * of this field depends on the 'uallocated'
108 				 * field above. */
109 } String;
110 
111 #define STRING_MAXCHARS \
112 	(1 + (int)(((size_t)UINT_MAX - sizeof(String))/sizeof(Tcl_UniChar)))
113 #define STRING_UALLOC(numChars)	\
114 	((numChars) * sizeof(Tcl_UniChar))
115 #define STRING_SIZE(ualloc) \
116     ((unsigned) ((ualloc) \
117 	? (sizeof(String) - sizeof(Tcl_UniChar) + (ualloc)) \
118 	: sizeof(String)))
119 #define stringCheckLimits(numChars) \
120     if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \
121 	Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \
122 		STRING_MAXCHARS); \
123     }
124 #define stringRealloc(ptr, numChars) \
125 	(String *) ckrealloc((char *) ptr, \
126 		(unsigned) STRING_SIZE(STRING_UALLOC(numChars)) )
127 #define stringAttemptRealloc(ptr, numChars) \
128 	(String *) attemptckrealloc((char *) ptr, \
129 		(unsigned) STRING_SIZE(STRING_UALLOC(numChars)) )
130 #define GET_STRING(objPtr) \
131 	((String *) (objPtr)->internalRep.twoPtrValue.ptr1)
132 #define SET_STRING(objPtr, stringPtr) \
133 	((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr))
134 
135 /*
136  * TCL STRING GROWTH ALGORITHM
137  *
138  * When growing strings (during an append, for example), the following growth
139  * algorithm is used:
140  *
141  *   Attempt to allocate 2 * (originalLength + appendLength)
142  *   On failure:
143  *	attempt to allocate originalLength + 2*appendLength +
144  *			TCL_GROWTH_MIN_ALLOC
145  *
146  * This algorithm allows very good performance, as it rapidly increases the
147  * memory allocated for a given string, which minimizes the number of
148  * reallocations that must be performed. However, using only the doubling
149  * algorithm can lead to a significant waste of memory. In particular, it may
150  * fail even when there is sufficient memory available to complete the append
151  * request (but there is not 2*totalLength memory available). So when the
152  * doubling fails (because there is not enough memory available), the
153  * algorithm requests a smaller amount of memory, which is still enough to
154  * cover the request, but which hopefully will be less than the total
155  * available memory.
156  *
157  * The addition of TCL_GROWTH_MIN_ALLOC allows for efficient handling of very
158  * small appends. Without this extra slush factor, a sequence of several small
159  * appends would cause several memory allocations. As long as
160  * TCL_GROWTH_MIN_ALLOC is a reasonable size, we can avoid that behavior.
161  *
162  * The growth algorithm can be tuned by adjusting the following parameters:
163  *
164  * TCL_GROWTH_MIN_ALLOC		Additional space, in bytes, to allocate when
165  *				the double allocation has failed. Default is
166  *				1024 (1 kilobyte).
167  */
168 
169 #ifndef TCL_GROWTH_MIN_ALLOC
170 #define TCL_GROWTH_MIN_ALLOC	1024
171 #endif
172 
173 static void
GrowUnicodeBuffer(Tcl_Obj * objPtr,int needed)174 GrowUnicodeBuffer(
175     Tcl_Obj *objPtr,
176     int needed)
177 {
178     /* Pre-conditions:
179      *  objPtr->typePtr == &tclStringType
180      *  STRING_UALLOC(needed) > stringPtr->uallocated
181      *  needed < STRING_MAXCHARS
182      */
183     String *ptr = NULL, *stringPtr = GET_STRING(objPtr);
184     int attempt;
185 
186     if (stringPtr->uallocated > 0) {
187 	/* Subsequent appends - apply the growth algorithm. */
188 	attempt = 2 * needed;
189 	if (attempt >= 0 && attempt <= STRING_MAXCHARS) {
190 	    ptr = stringAttemptRealloc(stringPtr, attempt);
191 	}
192 	if (ptr == NULL) {
193 	    /*
194 	     * Take care computing the amount of modest growth to avoid
195 	     * overflow into invalid argument values for attempt.
196 	     */
197 	    unsigned int limit = STRING_MAXCHARS - needed;
198 	    unsigned int extra = needed - stringPtr->numChars
199 		    + TCL_GROWTH_MIN_ALLOC/sizeof(Tcl_UniChar);
200 	    int growth = (int) ((extra > limit) ? limit : extra);
201 	    attempt = needed + growth;
202 	    ptr = stringAttemptRealloc(stringPtr, attempt);
203 	}
204     }
205     if (ptr == NULL) {
206 	/* First allocation - just big enough; or last chance fallback. */
207 	attempt = needed;
208 	ptr = stringRealloc(stringPtr, attempt);
209     }
210     stringPtr = ptr;
211     stringPtr->uallocated = STRING_UALLOC(attempt);
212     SET_STRING(objPtr, stringPtr);
213 }
214 
215 
216 /*
217  *----------------------------------------------------------------------
218  *
219  * Tcl_NewStringObj --
220  *
221  *	This function is normally called when not debugging: i.e., when
222  *	TCL_MEM_DEBUG is not defined. It creates a new string object and
223  *	initializes it from the byte pointer and length arguments.
224  *
225  *	When TCL_MEM_DEBUG is defined, this function just returns the result
226  *	of calling the debugging version Tcl_DbNewStringObj.
227  *
228  * Results:
229  *	A newly created string object is returned that has ref count zero.
230  *
231  * Side effects:
232  *	The new object's internal string representation will be set to a copy
233  *	of the length bytes starting at "bytes". If "length" is negative, use
234  *	bytes up to the first NUL byte; i.e., assume "bytes" points to a
235  *	C-style NUL-terminated string. The object's type is set to NULL. An
236  *	extra NUL is added to the end of the new object's byte array.
237  *
238  *----------------------------------------------------------------------
239  */
240 
241 #ifdef TCL_MEM_DEBUG
242 #undef Tcl_NewStringObj
243 Tcl_Obj *
Tcl_NewStringObj(const char * bytes,int length)244 Tcl_NewStringObj(
245     const char *bytes,		/* Points to the first of the length bytes
246 				 * used to initialize the new object. */
247     int length)			/* The number of bytes to copy from "bytes"
248 				 * when initializing the new object. If
249 				 * negative, use bytes up to the first NUL
250 				 * byte. */
251 {
252     return Tcl_DbNewStringObj(bytes, length, "unknown", 0);
253 }
254 #else /* if not TCL_MEM_DEBUG */
255 Tcl_Obj *
Tcl_NewStringObj(const char * bytes,int length)256 Tcl_NewStringObj(
257     const char *bytes,		/* Points to the first of the length bytes
258 				 * used to initialize the new object. */
259     int length)			/* The number of bytes to copy from "bytes"
260 				 * when initializing the new object. If
261 				 * negative, use bytes up to the first NUL
262 				 * byte. */
263 {
264     register Tcl_Obj *objPtr;
265 
266     if (length < 0) {
267 	length = (bytes? strlen(bytes) : 0);
268     }
269     TclNewStringObj(objPtr, bytes, length);
270     return objPtr;
271 }
272 #endif /* TCL_MEM_DEBUG */
273 
274 /*
275  *----------------------------------------------------------------------
276  *
277  * Tcl_DbNewStringObj --
278  *
279  *	This function is normally called when debugging: i.e., when
280  *	TCL_MEM_DEBUG is defined. It creates new string objects. It is the
281  *	same as the Tcl_NewStringObj function above except that it calls
282  *	Tcl_DbCkalloc directly with the file name and line number from its
283  *	caller. This simplifies debugging since then the [memory active]
284  *	command will report the correct file name and line number when
285  *	reporting objects that haven't been freed.
286  *
287  *	When TCL_MEM_DEBUG is not defined, this function just returns the
288  *	result of calling Tcl_NewStringObj.
289  *
290  * Results:
291  *	A newly created string object is returned that has ref count zero.
292  *
293  * Side effects:
294  *	The new object's internal string representation will be set to a copy
295  *	of the length bytes starting at "bytes". If "length" is negative, use
296  *	bytes up to the first NUL byte; i.e., assume "bytes" points to a
297  *	C-style NUL-terminated string. The object's type is set to NULL. An
298  *	extra NUL is added to the end of the new object's byte array.
299  *
300  *----------------------------------------------------------------------
301  */
302 
303 #ifdef TCL_MEM_DEBUG
304 Tcl_Obj *
Tcl_DbNewStringObj(const char * bytes,int length,const char * file,int line)305 Tcl_DbNewStringObj(
306     const char *bytes,		/* Points to the first of the length bytes
307 				 * used to initialize the new object. */
308     int length,			/* The number of bytes to copy from "bytes"
309 				 * when initializing the new object. If
310 				 * negative, use bytes up to the first NUL
311 				 * byte. */
312     const char *file,		/* The name of the source file calling this
313 				 * function; used for debugging. */
314     int line)			/* Line number in the source file; used for
315 				 * debugging. */
316 {
317     register Tcl_Obj *objPtr;
318 
319     if (length < 0) {
320 	length = (bytes? strlen(bytes) : 0);
321     }
322     TclDbNewObj(objPtr, file, line);
323     TclInitStringRep(objPtr, bytes, length);
324     return objPtr;
325 }
326 #else /* if not TCL_MEM_DEBUG */
327 Tcl_Obj *
Tcl_DbNewStringObj(const char * bytes,register int length,const char * file,int line)328 Tcl_DbNewStringObj(
329     const char *bytes,		/* Points to the first of the length bytes
330 				 * used to initialize the new object. */
331     register int length,	/* The number of bytes to copy from "bytes"
332 				 * when initializing the new object. If
333 				 * negative, use bytes up to the first NUL
334 				 * byte. */
335     const char *file,		/* The name of the source file calling this
336 				 * function; used for debugging. */
337     int line)			/* Line number in the source file; used for
338 				 * debugging. */
339 {
340     return Tcl_NewStringObj(bytes, length);
341 }
342 #endif /* TCL_MEM_DEBUG */
343 
344 /*
345  *---------------------------------------------------------------------------
346  *
347  * Tcl_NewUnicodeObj --
348  *
349  *	This function is creates a new String object and initializes it from
350  *	the given Unicode String. If the Utf String is the same size as the
351  *	Unicode string, don't duplicate the data.
352  *
353  * Results:
354  *	The newly created object is returned. This object will have no initial
355  *	string representation. The returned object has a ref count of 0.
356  *
357  * Side effects:
358  *	Memory allocated for new object and copy of Unicode argument.
359  *
360  *---------------------------------------------------------------------------
361  */
362 
363 Tcl_Obj *
Tcl_NewUnicodeObj(const Tcl_UniChar * unicode,int numChars)364 Tcl_NewUnicodeObj(
365     const Tcl_UniChar *unicode,	/* The unicode string used to initialize the
366 				 * new object. */
367     int numChars)		/* Number of characters in the unicode
368 				 * string. */
369 {
370     Tcl_Obj *objPtr;
371 
372     TclNewObj(objPtr);
373     SetUnicodeObj(objPtr, unicode, numChars);
374     return objPtr;
375 }
376 
377 /*
378  *----------------------------------------------------------------------
379  *
380  * Tcl_GetCharLength --
381  *
382  *	Get the length of the Unicode string from the Tcl object.
383  *
384  * Results:
385  *	Pointer to unicode string representing the unicode object.
386  *
387  * Side effects:
388  *	Frees old internal rep. Allocates memory for new "String" internal
389  *	rep.
390  *
391  *----------------------------------------------------------------------
392  */
393 
394 int
Tcl_GetCharLength(Tcl_Obj * objPtr)395 Tcl_GetCharLength(
396     Tcl_Obj *objPtr)		/* The String object to get the num chars
397 				 * of. */
398 {
399     String *stringPtr;
400 
401     SetStringFromAny(NULL, objPtr);
402     stringPtr = GET_STRING(objPtr);
403 
404     /*
405      * If numChars is unknown, then calculate the number of characaters while
406      * populating the Unicode string.
407      */
408 
409     if (stringPtr->numChars == -1) {
410 	register int i = objPtr->length;
411 	register unsigned char *str = (unsigned char *) objPtr->bytes;
412 
413 	/*
414 	 * This is a speed sensitive function, so run specially over the
415 	 * string to count continuous ascii characters before resorting to the
416 	 * Tcl_NumUtfChars call. This is a long form of:
417 	 stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes,objPtr->length);
418 	 *
419 	 * TODO: Consider macro-izing this.
420 	 */
421 
422 	while (i && (*str < 0xC0)) {
423 	    i--;
424 	    str++;
425 	}
426 	stringPtr->numChars = objPtr->length - i;
427 	if (i) {
428 	    stringPtr->numChars += Tcl_NumUtfChars(objPtr->bytes
429 		    + (objPtr->length - i), i);
430 	}
431 
432 	if (stringPtr->numChars == objPtr->length) {
433 	    /*
434 	     * Since we've just calculated the number of chars, and all UTF
435 	     * chars are 1-byte long, we don't need to store the unicode
436 	     * string.
437 	     */
438 
439 	    stringPtr->hasUnicode = 0;
440 	} else {
441 	    /*
442 	     * Since we've just calucalated the number of chars, and not all
443 	     * UTF chars are 1-byte long, go ahead and populate the unicode
444 	     * string.
445 	     */
446 
447 	    FillUnicodeRep(objPtr);
448 
449 	    /*
450 	     * We need to fetch the pointer again because we have just
451 	     * reallocated the structure to make room for the Unicode data.
452 	     */
453 
454 	    stringPtr = GET_STRING(objPtr);
455 	}
456     }
457     return stringPtr->numChars;
458 }
459 
460 /*
461  *----------------------------------------------------------------------
462  *
463  * Tcl_GetUniChar --
464  *
465  *	Get the index'th Unicode character from the String object. The index
466  *	is assumed to be in the appropriate range.
467  *
468  * Results:
469  *	Returns the index'th Unicode character in the Object.
470  *
471  * Side effects:
472  *	Fills unichar with the index'th Unicode character.
473  *
474  *----------------------------------------------------------------------
475  */
476 
477 Tcl_UniChar
Tcl_GetUniChar(Tcl_Obj * objPtr,int index)478 Tcl_GetUniChar(
479     Tcl_Obj *objPtr,		/* The object to get the Unicode charater
480 				 * from. */
481     int index)			/* Get the index'th Unicode character. */
482 {
483     Tcl_UniChar unichar;
484     String *stringPtr;
485 
486     SetStringFromAny(NULL, objPtr);
487     stringPtr = GET_STRING(objPtr);
488 
489     if (stringPtr->numChars == -1) {
490 	/*
491 	 * We haven't yet calculated the length, so we don't have the Unicode
492 	 * str. We need to know the number of chars before we can do indexing.
493 	 */
494 
495 	Tcl_GetCharLength(objPtr);
496 
497 	/*
498 	 * We need to fetch the pointer again because we may have just
499 	 * reallocated the structure.
500 	 */
501 
502 	stringPtr = GET_STRING(objPtr);
503     }
504     if (stringPtr->hasUnicode == 0) {
505 	/*
506 	 * All of the characters in the Utf string are 1 byte chars, so we
507 	 * don't store the unicode char. We get the Utf string and convert the
508 	 * index'th byte to a Unicode character.
509 	 */
510 
511 	unichar = (Tcl_UniChar) objPtr->bytes[index];
512     } else {
513 	unichar = stringPtr->unicode[index];
514     }
515     return unichar;
516 }
517 
518 /*
519  *----------------------------------------------------------------------
520  *
521  * Tcl_GetUnicode --
522  *
523  *	Get the Unicode form of the String object. If the object is not
524  *	already a String object, it will be converted to one. If the String
525  *	object does not have a Unicode rep, then one is create from the UTF
526  *	string format.
527  *
528  * Results:
529  *	Returns a pointer to the object's internal Unicode string.
530  *
531  * Side effects:
532  *	Converts the object to have the String internal rep.
533  *
534  *----------------------------------------------------------------------
535  */
536 
537 Tcl_UniChar *
Tcl_GetUnicode(Tcl_Obj * objPtr)538 Tcl_GetUnicode(
539     Tcl_Obj *objPtr)		/* The object to find the unicode string
540 				 * for. */
541 {
542     String *stringPtr;
543 
544     SetStringFromAny(NULL, objPtr);
545     stringPtr = GET_STRING(objPtr);
546 
547     if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) {
548 	/*
549 	 * We haven't yet calculated the length, or all of the characters in
550 	 * the Utf string are 1 byte chars (so we didn't store the unicode
551 	 * str). Since this function must return a unicode string, and one has
552 	 * not yet been stored, force the Unicode to be calculated and stored
553 	 * now.
554 	 */
555 
556 	FillUnicodeRep(objPtr);
557 
558 	/*
559 	 * We need to fetch the pointer again because we have just reallocated
560 	 * the structure to make room for the Unicode data.
561 	 */
562 
563 	stringPtr = GET_STRING(objPtr);
564     }
565     return stringPtr->unicode;
566 }
567 
568 /*
569  *----------------------------------------------------------------------
570  *
571  * Tcl_GetUnicodeFromObj --
572  *
573  *	Get the Unicode form of the String object with length. If the object
574  *	is not already a String object, it will be converted to one. If the
575  *	String object does not have a Unicode rep, then one is create from the
576  *	UTF string format.
577  *
578  * Results:
579  *	Returns a pointer to the object's internal Unicode string.
580  *
581  * Side effects:
582  *	Converts the object to have the String internal rep.
583  *
584  *----------------------------------------------------------------------
585  */
586 
587 Tcl_UniChar *
Tcl_GetUnicodeFromObj(Tcl_Obj * objPtr,int * lengthPtr)588 Tcl_GetUnicodeFromObj(
589     Tcl_Obj *objPtr,		/* The object to find the unicode string
590 				 * for. */
591     int *lengthPtr)		/* If non-NULL, the location where the string
592 				 * rep's unichar length should be stored. If
593 				 * NULL, no length is stored. */
594 {
595     String *stringPtr;
596 
597     SetStringFromAny(NULL, objPtr);
598     stringPtr = GET_STRING(objPtr);
599 
600     if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) {
601 	/*
602 	 * We haven't yet calculated the length, or all of the characters in
603 	 * the Utf string are 1 byte chars (so we didn't store the unicode
604 	 * str). Since this function must return a unicode string, and one has
605 	 * not yet been stored, force the Unicode to be calculated and stored
606 	 * now.
607 	 */
608 
609 	FillUnicodeRep(objPtr);
610 
611 	/*
612 	 * We need to fetch the pointer again because we have just reallocated
613 	 * the structure to make room for the Unicode data.
614 	 */
615 
616 	stringPtr = GET_STRING(objPtr);
617     }
618 
619     if (lengthPtr != NULL) {
620 	*lengthPtr = stringPtr->numChars;
621     }
622     return stringPtr->unicode;
623 }
624 
625 /*
626  *----------------------------------------------------------------------
627  *
628  * Tcl_GetRange --
629  *
630  *	Create a Tcl Object that contains the chars between first and last of
631  *	the object indicated by "objPtr". If the object is not already a
632  *	String object, convert it to one. The first and last indices are
633  *	assumed to be in the appropriate range.
634  *
635  * Results:
636  *	Returns a new Tcl Object of the String type.
637  *
638  * Side effects:
639  *	Changes the internal rep of "objPtr" to the String type.
640  *
641  *----------------------------------------------------------------------
642  */
643 
644 Tcl_Obj *
Tcl_GetRange(Tcl_Obj * objPtr,int first,int last)645 Tcl_GetRange(
646     Tcl_Obj *objPtr,		/* The Tcl object to find the range of. */
647     int first,			/* First index of the range. */
648     int last)			/* Last index of the range. */
649 {
650     Tcl_Obj *newObjPtr;		/* The Tcl object to find the range of. */
651     String *stringPtr;
652 
653     SetStringFromAny(NULL, objPtr);
654     stringPtr = GET_STRING(objPtr);
655 
656     if (stringPtr->numChars == -1) {
657 	/*
658 	 * We haven't yet calculated the length, so we don't have the Unicode
659 	 * str. We need to know the number of chars before we can do indexing.
660 	 */
661 
662 	Tcl_GetCharLength(objPtr);
663 
664 	/*
665 	 * We need to fetch the pointer again because we may have just
666 	 * reallocated the structure.
667 	 */
668 
669 	stringPtr = GET_STRING(objPtr);
670     }
671 
672     if (objPtr->bytes && (stringPtr->numChars == objPtr->length)) {
673 	char *str = TclGetString(objPtr);
674 
675 	/*
676 	 * All of the characters in the Utf string are 1 byte chars, so we
677 	 * don't store the unicode char. Create a new string object containing
678 	 * the specified range of chars.
679 	 */
680 
681 	newObjPtr = Tcl_NewStringObj(&str[first], last-first+1);
682 
683 	/*
684 	 * Since we know the new string only has 1-byte chars, we can set it's
685 	 * numChars field.
686 	 */
687 
688 	SetStringFromAny(NULL, newObjPtr);
689 	stringPtr = GET_STRING(newObjPtr);
690 	stringPtr->numChars = last-first+1;
691     } else {
692 	newObjPtr = Tcl_NewUnicodeObj(stringPtr->unicode + first,
693 		last-first+1);
694     }
695     return newObjPtr;
696 }
697 
698 /*
699  *----------------------------------------------------------------------
700  *
701  * Tcl_SetStringObj --
702  *
703  *	Modify an object to hold a string that is a copy of the bytes
704  *	indicated by the byte pointer and length arguments.
705  *
706  * Results:
707  *	None.
708  *
709  * Side effects:
710  *	The object's string representation will be set to a copy of the
711  *	"length" bytes starting at "bytes". If "length" is negative, use bytes
712  *	up to the first NUL byte; i.e., assume "bytes" points to a C-style
713  *	NUL-terminated string. The object's old string and internal
714  *	representations are freed and the object's type is set NULL.
715  *
716  *----------------------------------------------------------------------
717  */
718 
719 void
Tcl_SetStringObj(register Tcl_Obj * objPtr,const char * bytes,register int length)720 Tcl_SetStringObj(
721     register Tcl_Obj *objPtr,	/* Object whose internal rep to init. */
722     const char *bytes,		/* Points to the first of the length bytes
723 				 * used to initialize the object. */
724     register int length)	/* The number of bytes to copy from "bytes"
725 				 * when initializing the object. If negative,
726 				 * use bytes up to the first NUL byte.*/
727 {
728     if (Tcl_IsShared(objPtr)) {
729 	Tcl_Panic("%s called with shared object", "Tcl_SetStringObj");
730     }
731 
732     /*
733      * Set the type to NULL and free any internal rep for the old type.
734      */
735 
736     TclFreeIntRep(objPtr);
737     objPtr->typePtr = NULL;
738 
739     /*
740      * Free any old string rep, then set the string rep to a copy of the
741      * length bytes starting at "bytes".
742      */
743 
744     TclInvalidateStringRep(objPtr);
745     if (length < 0) {
746 	length = (bytes? strlen(bytes) : 0);
747     }
748     TclInitStringRep(objPtr, bytes, length);
749 }
750 
751 /*
752  *----------------------------------------------------------------------
753  *
754  * Tcl_SetObjLength --
755  *
756  *	This function changes the length of the string representation of an
757  *	object.
758  *
759  * Results:
760  *	None.
761  *
762  * Side effects:
763  *	If the size of objPtr's string representation is greater than length,
764  *	then it is reduced to length and a new terminating null byte is stored
765  *	in the strength. If the length of the string representation is greater
766  *	than length, the storage space is reallocated to the given length; a
767  *	null byte is stored at the end, but other bytes past the end of the
768  *	original string representation are undefined. The object's internal
769  *	representation is changed to "expendable string".
770  *
771  *----------------------------------------------------------------------
772  */
773 
774 void
Tcl_SetObjLength(register Tcl_Obj * objPtr,register int length)775 Tcl_SetObjLength(
776     register Tcl_Obj *objPtr,	/* Pointer to object. This object must not
777 				 * currently be shared. */
778     register int length)	/* Number of bytes desired for string
779 				 * representation of object, not including
780 				 * terminating null byte. */
781 {
782     String *stringPtr;
783 
784     if (length < 0) {
785 	/*
786 	 * Setting to a negative length is nonsense.  This is probably the
787 	 * result of overflowing the signed integer range.
788 	 */
789 	Tcl_Panic("Tcl_SetObjLength: negative length requested: "
790 		"%d (integer overflow?)", length);
791     }
792     if (Tcl_IsShared(objPtr)) {
793 	Tcl_Panic("%s called with shared object", "Tcl_SetObjLength");
794     }
795     SetStringFromAny(NULL, objPtr);
796 
797     stringPtr = GET_STRING(objPtr);
798 
799     /*
800      * Check that we're not extending a pure unicode string.
801      */
802 
803     if ((size_t)length > stringPtr->allocated &&
804 	    (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) {
805 	/*
806 	 * Not enough space in current string. Reallocate the string space and
807 	 * free the old string.
808 	 */
809 
810 	if (objPtr->bytes != tclEmptyStringRep) {
811 	    objPtr->bytes = ckrealloc((char *) objPtr->bytes,
812 		    (unsigned) (length + 1));
813 	} else {
814 	    char *newBytes = ckalloc((unsigned) (length+1));
815 
816 	    if (objPtr->bytes != NULL && objPtr->length != 0) {
817 		memcpy(newBytes, objPtr->bytes, (size_t) objPtr->length);
818 		TclInvalidateStringRep(objPtr);
819 	    }
820 	    objPtr->bytes = newBytes;
821 	}
822 	stringPtr->allocated = length;
823 
824 	/*
825 	 * Invalidate the unicode data.
826 	 */
827 
828 	stringPtr->hasUnicode = 0;
829     }
830 
831     if (objPtr->bytes != NULL) {
832 	objPtr->length = length;
833 	if (objPtr->bytes != tclEmptyStringRep) {
834 	    /*
835 	     * Ensure the string is NUL-terminated.
836 	     */
837 
838 	    objPtr->bytes[length] = 0;
839 	}
840 
841 	/*
842 	 * Invalidate the unicode data.
843 	 */
844 
845 	stringPtr->numChars = -1;
846 	stringPtr->hasUnicode = 0;
847     } else {
848 	/*
849 	 * Changing length of pure unicode string.
850 	 */
851 
852 	size_t uallocated = STRING_UALLOC(length);
853 
854 	stringCheckLimits(length);
855 	if (uallocated > stringPtr->uallocated) {
856 	    stringPtr = stringRealloc(stringPtr, length);
857 	    SET_STRING(objPtr, stringPtr);
858 	    stringPtr->uallocated = uallocated;
859 	}
860 	stringPtr->numChars = length;
861 	stringPtr->hasUnicode = (length > 0);
862 
863 	/*
864 	 * Ensure the string is NUL-terminated.
865 	 */
866 
867 	stringPtr->unicode[length] = 0;
868 	stringPtr->allocated = 0;
869 	objPtr->length = 0;
870     }
871 }
872 
873 /*
874  *----------------------------------------------------------------------
875  *
876  * Tcl_AttemptSetObjLength --
877  *
878  *	This function changes the length of the string representation of an
879  *	object. It uses the attempt* (non-panic'ing) memory allocators.
880  *
881  * Results:
882  *	1 if the requested memory was allocated, 0 otherwise.
883  *
884  * Side effects:
885  *	If the size of objPtr's string representation is greater than length,
886  *	then it is reduced to length and a new terminating null byte is stored
887  *	in the strength. If the length of the string representation is greater
888  *	than length, the storage space is reallocated to the given length; a
889  *	null byte is stored at the end, but other bytes past the end of the
890  *	original string representation are undefined. The object's internal
891  *	representation is changed to "expendable string".
892  *
893  *----------------------------------------------------------------------
894  */
895 
896 int
Tcl_AttemptSetObjLength(register Tcl_Obj * objPtr,register int length)897 Tcl_AttemptSetObjLength(
898     register Tcl_Obj *objPtr,	/* Pointer to object. This object must not
899 				 * currently be shared. */
900     register int length)	/* Number of bytes desired for string
901 				 * representation of object, not including
902 				 * terminating null byte. */
903 {
904     String *stringPtr;
905 
906     if (length < 0) {
907 	/*
908 	 * Setting to a negative length is nonsense.  This is probably the
909 	 * result of overflowing the signed integer range.
910 	 */
911 	return 0;
912     }
913     if (Tcl_IsShared(objPtr)) {
914 	Tcl_Panic("%s called with shared object", "Tcl_AttemptSetObjLength");
915     }
916     SetStringFromAny(NULL, objPtr);
917 
918     stringPtr = GET_STRING(objPtr);
919 
920     /*
921      * Check that we're not extending a pure unicode string.
922      */
923 
924     if (length > (int) stringPtr->allocated &&
925 	    (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) {
926 	char *newBytes;
927 
928 	/*
929 	 * Not enough space in current string. Reallocate the string space and
930 	 * free the old string.
931 	 */
932 
933 	if (objPtr->bytes != tclEmptyStringRep) {
934 	    newBytes = attemptckrealloc(objPtr->bytes,
935 		    (unsigned)(length + 1));
936 	    if (newBytes == NULL) {
937 		return 0;
938 	    }
939 	} else {
940 	    newBytes = attemptckalloc((unsigned) (length + 1));
941 	    if (newBytes == NULL) {
942 		return 0;
943 	    }
944 	    if (objPtr->bytes != NULL && objPtr->length != 0) {
945 		memcpy(newBytes, objPtr->bytes, (size_t) objPtr->length);
946 		TclInvalidateStringRep(objPtr);
947 	    }
948 	}
949 	objPtr->bytes = newBytes;
950 	stringPtr->allocated = length;
951 
952 	/*
953 	 * Invalidate the unicode data.
954 	 */
955 
956 	stringPtr->hasUnicode = 0;
957     }
958 
959     if (objPtr->bytes != NULL) {
960 	objPtr->length = length;
961 	if (objPtr->bytes != tclEmptyStringRep) {
962 	    /*
963 	     * Ensure the string is NULL-terminated.
964 	     */
965 
966 	    objPtr->bytes[length] = 0;
967 	}
968 
969 	/*
970 	 * Invalidate the unicode data.
971 	 */
972 
973 	stringPtr->numChars = -1;
974 	stringPtr->hasUnicode = 0;
975     } else {
976 	/*
977 	 * Changing length of pure unicode string.
978 	 */
979 
980 	size_t uallocated = STRING_UALLOC(length);
981 	if (length > STRING_MAXCHARS) {
982 	    return 0;
983 	}
984 
985 	if (uallocated > stringPtr->uallocated) {
986 	    stringPtr = stringAttemptRealloc(stringPtr, length);
987 	    if (stringPtr == NULL) {
988 		return 0;
989 	    }
990 	    SET_STRING(objPtr, stringPtr);
991 	    stringPtr->uallocated = uallocated;
992 	}
993 	stringPtr->numChars = length;
994 	stringPtr->hasUnicode = (length > 0);
995 
996 	/*
997 	 * Ensure the string is NUL-terminated.
998 	 */
999 
1000 	stringPtr->unicode[length] = 0;
1001 	stringPtr->allocated = 0;
1002 	objPtr->length = 0;
1003     }
1004     return 1;
1005 }
1006 
1007 /*
1008  *---------------------------------------------------------------------------
1009  *
1010  * Tcl_SetUnicodeObj --
1011  *
1012  *	Modify an object to hold the Unicode string indicated by "unicode".
1013  *
1014  * Results:
1015  *	None.
1016  *
1017  * Side effects:
1018  *	Memory allocated for new "String" internal rep.
1019  *
1020  *---------------------------------------------------------------------------
1021  */
1022 
1023 void
Tcl_SetUnicodeObj(Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int numChars)1024 Tcl_SetUnicodeObj(
1025     Tcl_Obj *objPtr,		/* The object to set the string of. */
1026     const Tcl_UniChar *unicode,	/* The unicode string used to initialize the
1027 				 * object. */
1028     int numChars)		/* Number of characters in the unicode
1029 				 * string. */
1030 {
1031     if (Tcl_IsShared(objPtr)) {
1032 	Tcl_Panic("%s called with shared object", "Tcl_SetUnicodeObj");
1033     }
1034     TclFreeIntRep(objPtr);
1035     SetUnicodeObj(objPtr, unicode, numChars);
1036 }
1037 
1038 static int
UnicodeLength(const Tcl_UniChar * unicode)1039 UnicodeLength(
1040     const Tcl_UniChar *unicode)
1041 {
1042     int numChars = 0;
1043 
1044     if (unicode) {
1045 	while (numChars >= 0 && unicode[numChars] != 0) {
1046 	    numChars++;
1047 	}
1048     }
1049     stringCheckLimits(numChars);
1050     return numChars;
1051 }
1052 
1053 static void
SetUnicodeObj(Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int numChars)1054 SetUnicodeObj(
1055     Tcl_Obj *objPtr,		/* The object to set the string of. */
1056     const Tcl_UniChar *unicode,	/* The unicode string used to initialize the
1057 				 * object. */
1058     int numChars)		/* Number of characters in the unicode
1059 				 * string. */
1060 {
1061     String *stringPtr;
1062     size_t uallocated;
1063 
1064     if (numChars < 0) {
1065 	numChars = UnicodeLength(unicode);
1066     }
1067 
1068     /*
1069      * Allocate enough space for the String structure + Unicode string.
1070      */
1071 
1072     stringCheckLimits(numChars);
1073     uallocated = STRING_UALLOC(numChars);
1074     stringPtr = (String *) ckalloc(STRING_SIZE(uallocated));
1075 
1076     stringPtr->numChars = numChars;
1077     stringPtr->uallocated = uallocated;
1078     stringPtr->hasUnicode = (numChars > 0);
1079     stringPtr->allocated = 0;
1080     memcpy(stringPtr->unicode, unicode, uallocated);
1081     stringPtr->unicode[numChars] = 0;
1082 
1083     TclInvalidateStringRep(objPtr);
1084     objPtr->typePtr = &tclStringType;
1085     SET_STRING(objPtr, stringPtr);
1086 }
1087 
1088 /*
1089  *----------------------------------------------------------------------
1090  *
1091  * Tcl_AppendLimitedToObj --
1092  *
1093  *	This function appends a limited number of bytes from a sequence of
1094  *	bytes to an object, marking any limitation with an ellipsis.
1095  *
1096  * Results:
1097  *	None.
1098  *
1099  * Side effects:
1100  *	The bytes at *bytes are appended to the string representation of
1101  *	objPtr.
1102  *
1103  *----------------------------------------------------------------------
1104  */
1105 
1106 void
Tcl_AppendLimitedToObj(register Tcl_Obj * objPtr,const char * bytes,register int length,register int limit,const char * ellipsis)1107 Tcl_AppendLimitedToObj(
1108     register Tcl_Obj *objPtr,	/* Points to the object to append to. */
1109     const char *bytes,		/* Points to the bytes to append to the
1110 				 * object. */
1111     register int length,	/* The number of bytes available to be
1112 				 * appended from "bytes". If < 0, then all
1113 				 * bytes up to a NUL byte are available. */
1114     register int limit,		/* The maximum number of bytes to append to
1115 				 * the object. */
1116     const char *ellipsis)	/* Ellipsis marker string, appended to the
1117 				 * object to indicate not all available bytes
1118 				 * at "bytes" were appended. */
1119 {
1120     String *stringPtr;
1121     int toCopy = 0;
1122 
1123     if (Tcl_IsShared(objPtr)) {
1124 	Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj");
1125     }
1126 
1127     SetStringFromAny(NULL, objPtr);
1128 
1129     if (length < 0) {
1130 	length = (bytes ? strlen(bytes) : 0);
1131     }
1132     if (length == 0) {
1133 	return;
1134     }
1135 
1136     if (length <= limit) {
1137 	toCopy = length;
1138     } else {
1139 	if (ellipsis == NULL) {
1140 	    ellipsis = "...";
1141 	}
1142 	toCopy = (bytes == NULL) ? limit
1143 		: Tcl_UtfPrev(bytes+limit+1-strlen(ellipsis), bytes) - bytes;
1144     }
1145 
1146     /*
1147      * If objPtr has a valid Unicode rep, then append the Unicode conversion
1148      * of "bytes" to the objPtr's Unicode rep, otherwise append "bytes" to
1149      * objPtr's string rep.
1150      */
1151 
1152     stringPtr = GET_STRING(objPtr);
1153     if (stringPtr->hasUnicode != 0) {
1154 	AppendUtfToUnicodeRep(objPtr, bytes, toCopy);
1155     } else {
1156 	AppendUtfToUtfRep(objPtr, bytes, toCopy);
1157     }
1158 
1159     if (length <= limit) {
1160 	return;
1161     }
1162 
1163     stringPtr = GET_STRING(objPtr);
1164     if (stringPtr->hasUnicode != 0) {
1165 	AppendUtfToUnicodeRep(objPtr, ellipsis, -1);
1166     } else {
1167 	AppendUtfToUtfRep(objPtr, ellipsis, -1);
1168     }
1169 }
1170 
1171 /*
1172  *----------------------------------------------------------------------
1173  *
1174  * Tcl_AppendToObj --
1175  *
1176  *	This function appends a sequence of bytes to an object.
1177  *
1178  * Results:
1179  *	None.
1180  *
1181  * Side effects:
1182  *	The bytes at *bytes are appended to the string representation of
1183  *	objPtr.
1184  *
1185  *----------------------------------------------------------------------
1186  */
1187 
1188 void
Tcl_AppendToObj(register Tcl_Obj * objPtr,const char * bytes,register int length)1189 Tcl_AppendToObj(
1190     register Tcl_Obj *objPtr,	/* Points to the object to append to. */
1191     const char *bytes,		/* Points to the bytes to append to the
1192 				 * object. */
1193     register int length)	/* The number of bytes to append from "bytes".
1194 				 * If < 0, then append all bytes up to NUL
1195 				 * byte. */
1196 {
1197     Tcl_AppendLimitedToObj(objPtr, bytes, length, INT_MAX, NULL);
1198 }
1199 
1200 /*
1201  *----------------------------------------------------------------------
1202  *
1203  * Tcl_AppendUnicodeToObj --
1204  *
1205  *	This function appends a Unicode string to an object in the most
1206  *	efficient manner possible. Length must be >= 0.
1207  *
1208  * Results:
1209  *	None.
1210  *
1211  * Side effects:
1212  *	Invalidates the string rep and creates a new Unicode string.
1213  *
1214  *----------------------------------------------------------------------
1215  */
1216 
1217 void
Tcl_AppendUnicodeToObj(register Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int length)1218 Tcl_AppendUnicodeToObj(
1219     register Tcl_Obj *objPtr,	/* Points to the object to append to. */
1220     const Tcl_UniChar *unicode,	/* The unicode string to append to the
1221 				 * object. */
1222     int length)			/* Number of chars in "unicode". */
1223 {
1224     String *stringPtr;
1225 
1226     if (Tcl_IsShared(objPtr)) {
1227 	Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj");
1228     }
1229 
1230     if (length == 0) {
1231 	return;
1232     }
1233 
1234     SetStringFromAny(NULL, objPtr);
1235     stringPtr = GET_STRING(objPtr);
1236 
1237     /*
1238      * If objPtr has a valid Unicode rep, then append the "unicode" to the
1239      * objPtr's Unicode rep, otherwise the UTF conversion of "unicode" to
1240      * objPtr's string rep.
1241      */
1242 
1243     if (stringPtr->hasUnicode != 0) {
1244 	AppendUnicodeToUnicodeRep(objPtr, unicode, length);
1245     } else {
1246 	AppendUnicodeToUtfRep(objPtr, unicode, length);
1247     }
1248 }
1249 
1250 /*
1251  *----------------------------------------------------------------------
1252  *
1253  * Tcl_AppendObjToObj --
1254  *
1255  *	This function appends the string rep of one object to another.
1256  *	"objPtr" cannot be a shared object.
1257  *
1258  * Results:
1259  *	None.
1260  *
1261  * Side effects:
1262  *	The string rep of appendObjPtr is appended to the string
1263  *	representation of objPtr.
1264  *
1265  *----------------------------------------------------------------------
1266  */
1267 
1268 void
Tcl_AppendObjToObj(Tcl_Obj * objPtr,Tcl_Obj * appendObjPtr)1269 Tcl_AppendObjToObj(
1270     Tcl_Obj *objPtr,		/* Points to the object to append to. */
1271     Tcl_Obj *appendObjPtr)	/* Object to append. */
1272 {
1273     String *stringPtr;
1274     int length, numChars, allOneByteChars;
1275     char *bytes;
1276 
1277     SetStringFromAny(NULL, objPtr);
1278 
1279     /*
1280      * If objPtr has a valid Unicode rep, then get a Unicode string from
1281      * appendObjPtr and append it.
1282      */
1283 
1284     stringPtr = GET_STRING(objPtr);
1285     if (stringPtr->hasUnicode != 0) {
1286 	/*
1287 	 * If appendObjPtr is not of the "String" type, don't convert it.
1288 	 */
1289 
1290 	if (appendObjPtr->typePtr == &tclStringType) {
1291 	    stringPtr = GET_STRING(appendObjPtr);
1292 	    if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) {
1293 		/*
1294 		 * If appendObjPtr is a string obj with no valid Unicode rep,
1295 		 * then fill its unicode rep.
1296 		 */
1297 
1298 		FillUnicodeRep(appendObjPtr);
1299 		stringPtr = GET_STRING(appendObjPtr);
1300 	    }
1301 	    AppendUnicodeToUnicodeRep(objPtr, stringPtr->unicode,
1302 		    stringPtr->numChars);
1303 	} else {
1304 	    bytes = TclGetStringFromObj(appendObjPtr, &length);
1305 	    AppendUtfToUnicodeRep(objPtr, bytes, length);
1306 	}
1307 	return;
1308     }
1309 
1310     /*
1311      * Append to objPtr's UTF string rep. If we know the number of characters
1312      * in both objects before appending, then set the combined number of
1313      * characters in the final (appended-to) object.
1314      */
1315 
1316     bytes = TclGetStringFromObj(appendObjPtr, &length);
1317 
1318     allOneByteChars = 0;
1319     numChars = stringPtr->numChars;
1320     if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) {
1321 	stringPtr = GET_STRING(appendObjPtr);
1322 	if ((stringPtr->numChars >= 0) && (stringPtr->numChars == length)) {
1323 	    numChars += stringPtr->numChars;
1324 	    allOneByteChars = 1;
1325 	}
1326     }
1327 
1328     AppendUtfToUtfRep(objPtr, bytes, length);
1329 
1330     if (allOneByteChars) {
1331 	stringPtr = GET_STRING(objPtr);
1332 	stringPtr->numChars = numChars;
1333     }
1334 }
1335 
1336 /*
1337  *----------------------------------------------------------------------
1338  *
1339  * AppendUnicodeToUnicodeRep --
1340  *
1341  *	This function appends the contents of "unicode" to the Unicode rep of
1342  *	"objPtr". objPtr must already have a valid Unicode rep.
1343  *
1344  * Results:
1345  *	None.
1346  *
1347  * Side effects:
1348  *	objPtr's internal rep is reallocated.
1349  *
1350  *----------------------------------------------------------------------
1351  */
1352 
1353 static void
AppendUnicodeToUnicodeRep(Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int appendNumChars)1354 AppendUnicodeToUnicodeRep(
1355     Tcl_Obj *objPtr,		/* Points to the object to append to. */
1356     const Tcl_UniChar *unicode,	/* String to append. */
1357     int appendNumChars)		/* Number of chars of "unicode" to append. */
1358 {
1359     String *stringPtr;
1360     int numChars;
1361 
1362     if (appendNumChars < 0) {
1363 	appendNumChars = UnicodeLength(unicode);
1364     }
1365     if (appendNumChars == 0) {
1366 	return;
1367     }
1368 
1369     SetStringFromAny(NULL, objPtr);
1370     stringPtr = GET_STRING(objPtr);
1371 
1372     /*
1373      * If not enough space has been allocated for the unicode rep, reallocate
1374      * the internal rep object with additional space. First try to double the
1375      * required allocation; if that fails, try a more modest increase. See the
1376      * "TCL STRING GROWTH ALGORITHM" comment at the top of this file for an
1377      * explanation of this growth algorithm.
1378      */
1379 
1380     numChars = stringPtr->numChars + appendNumChars;
1381     stringCheckLimits(numChars);
1382 
1383     if (STRING_UALLOC(numChars) > stringPtr->uallocated) {
1384 	/*
1385 	 * Protect against case where unicode points into the existing
1386 	 * stringPtr->unicode array.  Force it to follow any relocations
1387 	 * due to the reallocs below.
1388 	 */
1389 	int offset = -1;
1390 	if (unicode && unicode >= stringPtr->unicode
1391 		&& unicode <= stringPtr->unicode
1392 		+ stringPtr->uallocated / sizeof(Tcl_UniChar)) {
1393 	    offset = unicode - stringPtr->unicode;
1394 	}
1395 
1396 	GrowUnicodeBuffer(objPtr, numChars);
1397 	stringPtr = GET_STRING(objPtr);
1398 
1399 	/* Relocate unicode if needed; see above. */
1400 	if (offset >= 0) {
1401 	    unicode = stringPtr->unicode + offset;
1402 	}
1403     }
1404 
1405     /*
1406      * Copy the new string onto the end of the old string, then add the
1407      * trailing null.
1408      */
1409 
1410     if (unicode) {
1411 	memcpy(stringPtr->unicode + stringPtr->numChars, unicode,
1412 		appendNumChars * sizeof(Tcl_UniChar));
1413     }
1414     stringPtr->unicode[numChars] = 0;
1415     stringPtr->numChars = numChars;
1416     stringPtr->allocated = 0;
1417 
1418     TclInvalidateStringRep(objPtr);
1419 }
1420 
1421 /*
1422  *----------------------------------------------------------------------
1423  *
1424  * AppendUnicodeToUtfRep --
1425  *
1426  *	This function converts the contents of "unicode" to UTF and appends
1427  *	the UTF to the string rep of "objPtr".
1428  *
1429  * Results:
1430  *	None.
1431  *
1432  * Side effects:
1433  *	objPtr's internal rep is reallocated.
1434  *
1435  *----------------------------------------------------------------------
1436  */
1437 
1438 static void
AppendUnicodeToUtfRep(Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int numChars)1439 AppendUnicodeToUtfRep(
1440     Tcl_Obj *objPtr,		/* Points to the object to append to. */
1441     const Tcl_UniChar *unicode,	/* String to convert to UTF. */
1442     int numChars)		/* Number of chars of "unicode" to convert. */
1443 {
1444     Tcl_DString dsPtr;
1445     const char *bytes;
1446 
1447     if (numChars < 0) {
1448 	numChars = UnicodeLength(unicode);
1449     }
1450     if (numChars == 0) {
1451 	return;
1452     }
1453 
1454     Tcl_DStringInit(&dsPtr);
1455     bytes = Tcl_UniCharToUtfDString(unicode, numChars, &dsPtr);
1456     AppendUtfToUtfRep(objPtr, bytes, Tcl_DStringLength(&dsPtr));
1457     Tcl_DStringFree(&dsPtr);
1458 }
1459 
1460 /*
1461  *----------------------------------------------------------------------
1462  *
1463  * AppendUtfToUnicodeRep --
1464  *
1465  *	This function converts the contents of "bytes" to Unicode and appends
1466  *	the Unicode to the Unicode rep of "objPtr". objPtr must already have a
1467  *	valid Unicode rep.
1468  *
1469  * Results:
1470  *	None.
1471  *
1472  * Side effects:
1473  *	objPtr's internal rep is reallocated.
1474  *
1475  *----------------------------------------------------------------------
1476  */
1477 
1478 static void
AppendUtfToUnicodeRep(Tcl_Obj * objPtr,const char * bytes,int numBytes)1479 AppendUtfToUnicodeRep(
1480     Tcl_Obj *objPtr,		/* Points to the object to append to. */
1481     const char *bytes,		/* String to convert to Unicode. */
1482     int numBytes)		/* Number of bytes of "bytes" to convert. */
1483 {
1484     Tcl_DString dsPtr;
1485     int numChars = numBytes;
1486     Tcl_UniChar *unicode = NULL;
1487 
1488     if (numBytes < 0) {
1489 	numBytes = (bytes ? strlen(bytes) : 0);
1490     }
1491     if (numBytes == 0) {
1492 	return;
1493     }
1494 
1495     Tcl_DStringInit(&dsPtr);
1496     if (bytes) {
1497 	numChars = Tcl_NumUtfChars(bytes, numBytes);
1498 	unicode = (Tcl_UniChar *) Tcl_UtfToUniCharDString(bytes, numBytes,
1499 		&dsPtr);
1500     }
1501     AppendUnicodeToUnicodeRep(objPtr, unicode, numChars);
1502     Tcl_DStringFree(&dsPtr);
1503 }
1504 
1505 /*
1506  *----------------------------------------------------------------------
1507  *
1508  * AppendUtfToUtfRep --
1509  *
1510  *	This function appends "numBytes" bytes of "bytes" to the UTF string
1511  *	rep of "objPtr". objPtr must already have a valid String rep.
1512  *
1513  * Results:
1514  *	None.
1515  *
1516  * Side effects:
1517  *	objPtr's internal rep is reallocated.
1518  *
1519  *----------------------------------------------------------------------
1520  */
1521 
1522 static void
AppendUtfToUtfRep(Tcl_Obj * objPtr,const char * bytes,int numBytes)1523 AppendUtfToUtfRep(
1524     Tcl_Obj *objPtr,		/* Points to the object to append to. */
1525     const char *bytes,		/* String to append. */
1526     int numBytes)		/* Number of bytes of "bytes" to append. */
1527 {
1528     String *stringPtr;
1529     int newLength, oldLength;
1530 
1531     if (numBytes < 0) {
1532 	numBytes = (bytes ? strlen(bytes) : 0);
1533     }
1534     if (numBytes == 0) {
1535 	return;
1536     }
1537 
1538     /*
1539      * Copy the new string onto the end of the old string, then add the
1540      * trailing null.
1541      */
1542 
1543     oldLength = objPtr->length;
1544     newLength = numBytes + oldLength;
1545     if (newLength < 0) {
1546 	Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
1547     }
1548 
1549     stringPtr = GET_STRING(objPtr);
1550     if (newLength > (int) stringPtr->allocated) {
1551 	/*
1552 	 * Protect against case where unicode points into the existing
1553 	 * stringPtr->unicode array.  Force it to follow any relocations
1554 	 * due to the reallocs below.
1555 	 */
1556 	int offset = -1;
1557 	if (bytes && bytes >= objPtr->bytes
1558 		&& bytes <= objPtr->bytes + objPtr->length) {
1559 	    offset = bytes - objPtr->bytes;
1560 	}
1561 
1562 	/*
1563 	 * There isn't currently enough space in the string representation so
1564 	 * allocate additional space. First, try to double the length
1565 	 * required. If that fails, try a more modest allocation. See the "TCL
1566 	 * STRING GROWTH ALGORITHM" comment at the top of this file for an
1567 	 * explanation of this growth algorithm.
1568 	 */
1569 
1570 	if (Tcl_AttemptSetObjLength(objPtr, 2 * newLength) == 0) {
1571 	    /*
1572 	     * Take care computing the amount of modest growth to avoid
1573 	     * overflow into invalid argument values for Tcl_SetObjLength.
1574 	     */
1575 	    unsigned int limit = INT_MAX - newLength;
1576 	    unsigned int extra = numBytes + TCL_GROWTH_MIN_ALLOC;
1577 	    int growth = (int) ((extra > limit) ? limit : extra);
1578 
1579 	    Tcl_SetObjLength(objPtr, newLength + growth);
1580 	}
1581 
1582 	/* Relocate bytes if needed; see above. */
1583 	if (offset >=0) {
1584 	    bytes = objPtr->bytes + offset;
1585 	}
1586     }
1587 
1588     /*
1589      * Invalidate the unicode data.
1590      */
1591 
1592     stringPtr->numChars = -1;
1593     stringPtr->hasUnicode = 0;
1594 
1595     if (bytes) {
1596 	memcpy(objPtr->bytes + oldLength, bytes, (size_t) numBytes);
1597     }
1598     objPtr->bytes[newLength] = 0;
1599     objPtr->length = newLength;
1600 }
1601 
1602 /*
1603  *----------------------------------------------------------------------
1604  *
1605  * Tcl_AppendStringsToObjVA --
1606  *
1607  *	This function appends one or more null-terminated strings to an
1608  *	object.
1609  *
1610  * Results:
1611  *	None.
1612  *
1613  * Side effects:
1614  *	The contents of all the string arguments are appended to the string
1615  *	representation of objPtr.
1616  *
1617  *----------------------------------------------------------------------
1618  */
1619 
1620 void
Tcl_AppendStringsToObjVA(Tcl_Obj * objPtr,va_list argList)1621 Tcl_AppendStringsToObjVA(
1622     Tcl_Obj *objPtr,		/* Points to the object to append to. */
1623     va_list argList)		/* Variable argument list. */
1624 {
1625 #define STATIC_LIST_SIZE 16
1626     String *stringPtr;
1627     int newLength, oldLength, attemptLength;
1628     register char *string, *dst;
1629     char *static_list[STATIC_LIST_SIZE];
1630     char **args = static_list;
1631     int nargs_space = STATIC_LIST_SIZE;
1632     int nargs, i;
1633 
1634     if (Tcl_IsShared(objPtr)) {
1635 	Tcl_Panic("%s called with shared object", "Tcl_AppendStringsToObj");
1636     }
1637 
1638     SetStringFromAny(NULL, objPtr);
1639 
1640     /*
1641      * Force the existence of a string rep. so we avoid crashes operating
1642      * on a pure unicode value.  [Bug 2597185]
1643      */
1644 
1645     (void) Tcl_GetStringFromObj(objPtr, &oldLength);
1646 
1647     /*
1648      * Figure out how much space is needed for all the strings, and expand the
1649      * string representation if it isn't big enough. If no bytes would be
1650      * appended, just return. Note that on some platforms (notably OS/390) the
1651      * argList is an array so we need to use memcpy.
1652      */
1653 
1654     nargs = 0;
1655     newLength = 0;
1656     while (1) {
1657 	string = va_arg(argList, char *);
1658 	if (string == NULL) {
1659 	    break;
1660 	}
1661 	if (nargs >= nargs_space) {
1662 	    /*
1663 	     * Expand the args buffer.
1664 	     */
1665 
1666 	    nargs_space += STATIC_LIST_SIZE;
1667 	    if (args == static_list) {
1668 		args = (void *) ckalloc(nargs_space * sizeof(char *));
1669 		for (i = 0; i < nargs; ++i) {
1670 		    args[i] = static_list[i];
1671 		}
1672 	    } else {
1673 		args = (void *) ckrealloc((void *) args,
1674 			nargs_space * sizeof(char *));
1675 	    }
1676 	}
1677 	newLength += strlen(string);
1678 	args[nargs++] = string;
1679     }
1680     if (newLength == 0) {
1681 	goto done;
1682     }
1683 
1684     stringPtr = GET_STRING(objPtr);
1685     if (oldLength + newLength > (int) stringPtr->allocated) {
1686 	/*
1687 	 * There isn't currently enough space in the string representation, so
1688 	 * allocate additional space. If the current string representation
1689 	 * isn't empty (i.e. it looks like we're doing a series of appends)
1690 	 * then try to allocate extra space to accomodate future growth: first
1691 	 * try to double the required memory; if that fails, try a more modest
1692 	 * allocation. See the "TCL STRING GROWTH ALGORITHM" comment at the
1693 	 * top of this file for an explanation of this growth algorithm.
1694 	 * Otherwise, if the current string representation is empty, exactly
1695 	 * enough memory is allocated.
1696 	 */
1697 
1698 	if (oldLength == 0) {
1699 	    Tcl_SetObjLength(objPtr, newLength);
1700 	} else {
1701 	    attemptLength = 2 * (oldLength + newLength);
1702 	    if (Tcl_AttemptSetObjLength(objPtr, attemptLength) == 0) {
1703 		attemptLength = oldLength + (2 * newLength) +
1704 			TCL_GROWTH_MIN_ALLOC;
1705 		Tcl_SetObjLength(objPtr, attemptLength);
1706 	    }
1707 	}
1708     }
1709 
1710     /*
1711      * Make a second pass through the arguments, appending all the strings to
1712      * the object.
1713      */
1714 
1715     dst = objPtr->bytes + oldLength;
1716     for (i = 0; i < nargs; ++i) {
1717 	string = args[i];
1718 	if (string == NULL) {
1719 	    break;
1720 	}
1721 	while (*string != 0) {
1722 	    *dst = *string;
1723 	    dst++;
1724 	    string++;
1725 	}
1726     }
1727 
1728     /*
1729      * Add a null byte to terminate the string. However, be careful: it's
1730      * possible that the object is totally empty (if it was empty originally
1731      * and there was nothing to append). In this case dst is NULL; just leave
1732      * everything alone.
1733      */
1734 
1735     if (dst != NULL) {
1736 	*dst = 0;
1737     }
1738     objPtr->length = oldLength + newLength;
1739 
1740   done:
1741     /*
1742      * If we had to allocate a buffer from the heap, free it now.
1743      */
1744 
1745     if (args != static_list) {
1746 	ckfree((void *) args);
1747     }
1748 #undef STATIC_LIST_SIZE
1749 }
1750 
1751 /*
1752  *----------------------------------------------------------------------
1753  *
1754  * Tcl_AppendStringsToObj --
1755  *
1756  *	This function appends one or more null-terminated strings to an
1757  *	object.
1758  *
1759  * Results:
1760  *	None.
1761  *
1762  * Side effects:
1763  *	The contents of all the string arguments are appended to the string
1764  *	representation of objPtr.
1765  *
1766  *----------------------------------------------------------------------
1767  */
1768 
1769 void
Tcl_AppendStringsToObj(Tcl_Obj * objPtr,...)1770 Tcl_AppendStringsToObj(
1771     Tcl_Obj *objPtr,
1772     ...)
1773 {
1774     va_list argList;
1775 
1776     va_start(argList, objPtr);
1777     Tcl_AppendStringsToObjVA(objPtr, argList);
1778     va_end(argList);
1779 }
1780 
1781 /*
1782  *----------------------------------------------------------------------
1783  *
1784  * Tcl_AppendFormatToObj --
1785  *
1786  *	This function appends a list of Tcl_Obj's to a Tcl_Obj according to
1787  *	the formatting instructions embedded in the format string. The
1788  *	formatting instructions are inspired by sprintf(). Returns TCL_OK when
1789  *	successful. If there's an error in the arguments, TCL_ERROR is
1790  *	returned, and an error message is written to the interp, if non-NULL.
1791  *
1792  * Results:
1793  *	A standard Tcl result.
1794  *
1795  * Side effects:
1796  *	None.
1797  *
1798  *----------------------------------------------------------------------
1799  */
1800 
1801 int
Tcl_AppendFormatToObj(Tcl_Interp * interp,Tcl_Obj * appendObj,const char * format,int objc,Tcl_Obj * const objv[])1802 Tcl_AppendFormatToObj(
1803     Tcl_Interp *interp,
1804     Tcl_Obj *appendObj,
1805     const char *format,
1806     int objc,
1807     Tcl_Obj *const objv[])
1808 {
1809     const char *span = format, *msg;
1810     int numBytes = 0, objIndex = 0, gotXpg = 0, gotSequential = 0;
1811     int originalLength, limit;
1812     static const char *mixedXPG =
1813 	    "cannot mix \"%\" and \"%n$\" conversion specifiers";
1814     static const char *badIndex[2] = {
1815 	"not enough arguments for all format specifiers",
1816 	"\"%n$\" argument index out of range"
1817     };
1818     static const char *overflow = "max size for a Tcl value exceeded";
1819 
1820     if (Tcl_IsShared(appendObj)) {
1821 	Tcl_Panic("%s called with shared object", "Tcl_AppendFormatToObj");
1822     }
1823     TclGetStringFromObj(appendObj, &originalLength);
1824     limit = INT_MAX - originalLength;
1825 
1826     /*
1827      * Format string is NUL-terminated.
1828      */
1829 
1830     while (*format != '\0') {
1831 	char *end;
1832 	int gotMinus, gotHash, gotZero, gotSpace, gotPlus, sawFlag;
1833 	int width, gotPrecision, precision, useShort, useWide, useBig;
1834 	int newXpg, numChars, allocSegment = 0, segmentLimit, segmentNumBytes;
1835 	Tcl_Obj *segment;
1836 	Tcl_UniChar ch;
1837 	int step = Tcl_UtfToUniChar(format, &ch);
1838 
1839 	format += step;
1840 	if (ch != '%') {
1841 	    numBytes += step;
1842 	    continue;
1843 	}
1844 	if (numBytes) {
1845 	    if (numBytes > limit) {
1846 		msg = overflow;
1847 		goto errorMsg;
1848 	    }
1849 	    Tcl_AppendToObj(appendObj, span, numBytes);
1850 	    limit -= numBytes;
1851 	    numBytes = 0;
1852 	}
1853 
1854 	/*
1855 	 * Saw a % : process the format specifier.
1856 	 *
1857 	 * Step 0. Handle special case of escaped format marker (i.e., %%).
1858 	 */
1859 
1860 	step = Tcl_UtfToUniChar(format, &ch);
1861 	if (ch == '%') {
1862 	    span = format;
1863 	    numBytes = step;
1864 	    format += step;
1865 	    continue;
1866 	}
1867 
1868 	/*
1869 	 * Step 1. XPG3 position specifier
1870 	 */
1871 
1872 	newXpg = 0;
1873 	if (isdigit(UCHAR(ch))) {
1874 	    int position = strtoul(format, &end, 10);
1875 	    if (*end == '$') {
1876 		newXpg = 1;
1877 		objIndex = position - 1;
1878 		format = end + 1;
1879 		step = Tcl_UtfToUniChar(format, &ch);
1880 	    }
1881 	}
1882 	if (newXpg) {
1883 	    if (gotSequential) {
1884 		msg = mixedXPG;
1885 		goto errorMsg;
1886 	    }
1887 	    gotXpg = 1;
1888 	} else {
1889 	    if (gotXpg) {
1890 		msg = mixedXPG;
1891 		goto errorMsg;
1892 	    }
1893 	    gotSequential = 1;
1894 	}
1895 	if ((objIndex < 0) || (objIndex >= objc)) {
1896 	    msg = badIndex[gotXpg];
1897 	    goto errorMsg;
1898 	}
1899 
1900 	/*
1901 	 * Step 2. Set of flags.
1902 	 */
1903 
1904 	gotMinus = gotHash = gotZero = gotSpace = gotPlus = 0;
1905 	sawFlag = 1;
1906 	do {
1907 	    switch (ch) {
1908 	    case '-':
1909 		gotMinus = 1;
1910 		break;
1911 	    case '#':
1912 		gotHash = 1;
1913 		break;
1914 	    case '0':
1915 		gotZero = 1;
1916 		break;
1917 	    case ' ':
1918 		gotSpace = 1;
1919 		break;
1920 	    case '+':
1921 		gotPlus = 1;
1922 		break;
1923 	    default:
1924 		sawFlag = 0;
1925 	    }
1926 	    if (sawFlag) {
1927 		format += step;
1928 		step = Tcl_UtfToUniChar(format, &ch);
1929 	    }
1930 	} while (sawFlag);
1931 
1932 	/*
1933 	 * Step 3. Minimum field width.
1934 	 */
1935 
1936 	width = 0;
1937 	if (isdigit(UCHAR(ch))) {
1938 	    width = strtoul(format, &end, 10);
1939 	    format = end;
1940 	    step = Tcl_UtfToUniChar(format, &ch);
1941 	} else if (ch == '*') {
1942 	    if (objIndex >= objc - 1) {
1943 		msg = badIndex[gotXpg];
1944 		goto errorMsg;
1945 	    }
1946 	    if (TclGetIntFromObj(interp, objv[objIndex], &width) != TCL_OK) {
1947 		goto error;
1948 	    }
1949 	    if (width < 0) {
1950 		width = -width;
1951 		gotMinus = 1;
1952 	    }
1953 	    objIndex++;
1954 	    format += step;
1955 	    step = Tcl_UtfToUniChar(format, &ch);
1956 	}
1957 	if (width > limit) {
1958 	    msg = overflow;
1959 	    goto errorMsg;
1960 	}
1961 
1962 	/*
1963 	 * Step 4. Precision.
1964 	 */
1965 
1966 	gotPrecision = precision = 0;
1967 	if (ch == '.') {
1968 	    gotPrecision = 1;
1969 	    format += step;
1970 	    step = Tcl_UtfToUniChar(format, &ch);
1971 	}
1972 	if (isdigit(UCHAR(ch))) {
1973 	    precision = strtoul(format, &end, 10);
1974 	    format = end;
1975 	    step = Tcl_UtfToUniChar(format, &ch);
1976 	} else if (ch == '*') {
1977 	    if (objIndex >= objc - 1) {
1978 		msg = badIndex[gotXpg];
1979 		goto errorMsg;
1980 	    }
1981 	    if (TclGetIntFromObj(interp, objv[objIndex], &precision)
1982 		    != TCL_OK) {
1983 		goto error;
1984 	    }
1985 
1986 	    /*
1987 	     * TODO: Check this truncation logic.
1988 	     */
1989 
1990 	    if (precision < 0) {
1991 		precision = 0;
1992 	    }
1993 	    objIndex++;
1994 	    format += step;
1995 	    step = Tcl_UtfToUniChar(format, &ch);
1996 	}
1997 
1998 	/*
1999 	 * Step 5. Length modifier.
2000 	 */
2001 
2002 	useShort = useWide = useBig = 0;
2003 	if (ch == 'h') {
2004 	    useShort = 1;
2005 	    format += step;
2006 	    step = Tcl_UtfToUniChar(format, &ch);
2007 	} else if (ch == 'l') {
2008 	    format += step;
2009 	    step = Tcl_UtfToUniChar(format, &ch);
2010 	    if (ch == 'l') {
2011 		useBig = 1;
2012 		format += step;
2013 		step = Tcl_UtfToUniChar(format, &ch);
2014 	    } else {
2015 #ifndef TCL_WIDE_INT_IS_LONG
2016 		useWide = 1;
2017 #endif
2018 	    }
2019 	}
2020 
2021 	format += step;
2022 	span = format;
2023 
2024 	/*
2025 	 * Step 6. The actual conversion character.
2026 	 */
2027 
2028 	segment = objv[objIndex];
2029 	numChars = -1;
2030 	if (ch == 'i') {
2031 	    ch = 'd';
2032 	}
2033 	switch (ch) {
2034 	case '\0':
2035 	    msg = "format string ended in middle of field specifier";
2036 	    goto errorMsg;
2037 	case 's':
2038 	    if (gotPrecision) {
2039 		numChars = Tcl_GetCharLength(segment);
2040 		if (precision < numChars) {
2041 		    segment = Tcl_GetRange(segment, 0, precision - 1);
2042 		    numChars = precision;
2043 		    Tcl_IncrRefCount(segment);
2044 		    allocSegment = 1;
2045 		}
2046 	    }
2047 	    break;
2048 	case 'c': {
2049 	    char buf[TCL_UTF_MAX];
2050 	    int code, length;
2051 
2052 	    if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) {
2053 		goto error;
2054 	    }
2055 	    length = Tcl_UniCharToUtf(code, buf);
2056 	    segment = Tcl_NewStringObj(buf, length);
2057 	    Tcl_IncrRefCount(segment);
2058 	    allocSegment = 1;
2059 	    break;
2060 	}
2061 
2062 	case 'u':
2063 	    if (useBig) {
2064 		msg = "unsigned bignum format is invalid";
2065 		goto errorMsg;
2066 	    }
2067 	case 'd':
2068 	case 'o':
2069 	case 'x':
2070 	case 'X': {
2071 	    short int s = 0;	/* Silence compiler warning; only defined and
2072 				 * used when useShort is true. */
2073 	    long l;
2074 	    Tcl_WideInt w;
2075 	    mp_int big;
2076 	    int toAppend, isNegative = 0;
2077 
2078 	    if (useBig) {
2079 		if (Tcl_GetBignumFromObj(interp, segment, &big) != TCL_OK) {
2080 		    goto error;
2081 		}
2082 		isNegative = (mp_cmp_d(&big, 0) == MP_LT);
2083 	    } else if (useWide) {
2084 		if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) {
2085 		    Tcl_Obj *objPtr;
2086 
2087 		    if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) {
2088 			goto error;
2089 		    }
2090 		    mp_mod_2d(&big, (int) CHAR_BIT*sizeof(Tcl_WideInt), &big);
2091 		    objPtr = Tcl_NewBignumObj(&big);
2092 		    Tcl_IncrRefCount(objPtr);
2093 		    Tcl_GetWideIntFromObj(NULL, objPtr, &w);
2094 		    Tcl_DecrRefCount(objPtr);
2095 		}
2096 		isNegative = (w < (Tcl_WideInt)0);
2097 	    } else if (TclGetLongFromObj(NULL, segment, &l) != TCL_OK) {
2098 		if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) {
2099 		    Tcl_Obj *objPtr;
2100 
2101 		    if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) {
2102 			goto error;
2103 		    }
2104 		    mp_mod_2d(&big, (int) CHAR_BIT * sizeof(long), &big);
2105 		    objPtr = Tcl_NewBignumObj(&big);
2106 		    Tcl_IncrRefCount(objPtr);
2107 		    TclGetLongFromObj(NULL, objPtr, &l);
2108 		    Tcl_DecrRefCount(objPtr);
2109 		} else {
2110 		    l = Tcl_WideAsLong(w);
2111 		}
2112 		if (useShort) {
2113 		    s = (short int) l;
2114 		    isNegative = (s < (short int)0);
2115 		} else {
2116 		    isNegative = (l < (long)0);
2117 		}
2118 	    } else if (useShort) {
2119 		s = (short int) l;
2120 		isNegative = (s < (short int)0);
2121 	    } else {
2122 		isNegative = (l < (long)0);
2123 	    }
2124 
2125 	    segment = Tcl_NewObj();
2126 	    allocSegment = 1;
2127 	    segmentLimit = INT_MAX;
2128 	    Tcl_IncrRefCount(segment);
2129 
2130 	    if ((isNegative || gotPlus || gotSpace) && (useBig || (ch == 'd'))) {
2131 		Tcl_AppendToObj(segment, (isNegative ? "-" : gotPlus ? "+" : " "), 1);
2132 		segmentLimit -= 1;
2133 	    }
2134 
2135 	    if (gotHash) {
2136 		switch (ch) {
2137 		case 'o':
2138 		    Tcl_AppendToObj(segment, "0", 1);
2139 		    segmentLimit -= 1;
2140 		    precision--;
2141 		    break;
2142 		case 'x':
2143 		case 'X':
2144 		    Tcl_AppendToObj(segment, "0x", 2);
2145 		    segmentLimit -= 2;
2146 		    break;
2147 		}
2148 	    }
2149 
2150 	    switch (ch) {
2151 	    case 'd': {
2152 		int length;
2153 		Tcl_Obj *pure;
2154 		const char *bytes;
2155 
2156 		if (useShort) {
2157 		    pure = Tcl_NewIntObj((int)(s));
2158 		} else if (useWide) {
2159 		    pure = Tcl_NewWideIntObj(w);
2160 		} else if (useBig) {
2161 		    pure = Tcl_NewBignumObj(&big);
2162 		} else {
2163 		    pure = Tcl_NewLongObj(l);
2164 		}
2165 		Tcl_IncrRefCount(pure);
2166 		bytes = TclGetStringFromObj(pure, &length);
2167 
2168 		/*
2169 		 * Already did the sign above.
2170 		 */
2171 
2172 		if (*bytes == '-') {
2173 		    length--;
2174 		    bytes++;
2175 		}
2176 		toAppend = length;
2177 
2178 		/*
2179 		 * Canonical decimal string reps for integers are composed
2180 		 * entirely of one-byte encoded characters, so "length" is the
2181 		 * number of chars.
2182 		 */
2183 
2184 		if (gotPrecision) {
2185 		    if (length < precision) {
2186 			segmentLimit -= (precision - length);
2187 		    }
2188 		    while (length < precision) {
2189 			Tcl_AppendToObj(segment, "0", 1);
2190 			length++;
2191 		    }
2192 		    gotZero = 0;
2193 		}
2194 		if (gotZero) {
2195 		    length += Tcl_GetCharLength(segment);
2196 		    if (length < width) {
2197 			segmentLimit -= (width - length);
2198 		    }
2199 		    while (length < width) {
2200 			Tcl_AppendToObj(segment, "0", 1);
2201 			length++;
2202 		    }
2203 		}
2204 		if (toAppend > segmentLimit) {
2205 		    msg = overflow;
2206 		    goto errorMsg;
2207 		}
2208 		Tcl_AppendToObj(segment, bytes, toAppend);
2209 		Tcl_DecrRefCount(pure);
2210 		break;
2211 	    }
2212 
2213 	    case 'u':
2214 	    case 'o':
2215 	    case 'x':
2216 	    case 'X': {
2217 		Tcl_WideUInt bits = (Tcl_WideUInt)0;
2218 		Tcl_WideInt numDigits = (Tcl_WideInt)0;
2219 		int length, numBits = 4, base = 16;
2220 		int index = 0, shift = 0;
2221 		Tcl_Obj *pure;
2222 		char *bytes;
2223 
2224 		if (ch == 'u') {
2225 		    base = 10;
2226 		}
2227 		if (ch == 'o') {
2228 		    base = 8;
2229 		    numBits = 3;
2230 		}
2231 		if (useShort) {
2232 		    unsigned short int us = (unsigned short int) s;
2233 
2234 		    bits = (Tcl_WideUInt) us;
2235 		    while (us) {
2236 			numDigits++;
2237 			us /= base;
2238 		    }
2239 		} else if (useWide) {
2240 		    Tcl_WideUInt uw = (Tcl_WideUInt) w;
2241 
2242 		    bits = uw;
2243 		    while (uw) {
2244 			numDigits++;
2245 			uw /= base;
2246 		    }
2247 		} else if (useBig && big.used) {
2248 		    int leftover = (big.used * DIGIT_BIT) % numBits;
2249 		    mp_digit mask = (~(mp_digit)0) << (DIGIT_BIT-leftover);
2250 
2251 		    numDigits = 1 +
2252 			    (((Tcl_WideInt)big.used * DIGIT_BIT) / numBits);
2253 		    while ((mask & big.dp[big.used-1]) == 0) {
2254 			numDigits--;
2255 			mask >>= numBits;
2256 		    }
2257 		    if (numDigits > INT_MAX) {
2258 			msg = overflow;
2259 			goto errorMsg;
2260 		    }
2261 		} else if (!useBig) {
2262 		    unsigned long int ul = (unsigned long int) l;
2263 
2264 		    bits = (Tcl_WideUInt) ul;
2265 		    while (ul) {
2266 			numDigits++;
2267 			ul /= base;
2268 		    }
2269 		}
2270 
2271 		/*
2272 		 * Need to be sure zero becomes "0", not "".
2273 		 */
2274 
2275 		if ((numDigits == 0) && !((ch == 'o') && gotHash)) {
2276 		    numDigits = 1;
2277 		}
2278 		pure = Tcl_NewObj();
2279 		Tcl_SetObjLength(pure, (int)numDigits);
2280 		bytes = TclGetString(pure);
2281 		toAppend = length = (int)numDigits;
2282 		while (numDigits--) {
2283 		    int digitOffset;
2284 
2285 		    if (useBig && big.used) {
2286 			if (index < big.used && (size_t) shift <
2287 				CHAR_BIT*sizeof(Tcl_WideUInt) - DIGIT_BIT) {
2288 			    bits |= (((Tcl_WideUInt)big.dp[index++]) <<shift);
2289 			    shift += DIGIT_BIT;
2290 			}
2291 			shift -= numBits;
2292 		    }
2293 		    digitOffset = (int) (bits % base);
2294 		    if (digitOffset > 9) {
2295 			bytes[numDigits] = 'a' + digitOffset - 10;
2296 		    } else {
2297 			bytes[numDigits] = '0' + digitOffset;
2298 		    }
2299 		    bits /= base;
2300 		}
2301 		if (useBig) {
2302 		    mp_clear(&big);
2303 		}
2304 		if (gotPrecision) {
2305 		    if (length < precision) {
2306 			segmentLimit -= (precision - length);
2307 		    }
2308 		    while (length < precision) {
2309 			Tcl_AppendToObj(segment, "0", 1);
2310 			length++;
2311 		    }
2312 		    gotZero = 0;
2313 		}
2314 		if (gotZero) {
2315 		    length += Tcl_GetCharLength(segment);
2316 		    if (length < width) {
2317 			segmentLimit -= (width - length);
2318 		    }
2319 		    while (length < width) {
2320 			Tcl_AppendToObj(segment, "0", 1);
2321 			length++;
2322 		    }
2323 		}
2324 		if (toAppend > segmentLimit) {
2325 		    msg = overflow;
2326 		    goto errorMsg;
2327 		}
2328 		Tcl_AppendObjToObj(segment, pure);
2329 		Tcl_DecrRefCount(pure);
2330 		break;
2331 	    }
2332 
2333 	    }
2334 	    break;
2335 	}
2336 
2337 	case 'e':
2338 	case 'E':
2339 	case 'f':
2340 	case 'g':
2341 	case 'G': {
2342 #define MAX_FLOAT_SIZE 320
2343 	    char spec[2*TCL_INTEGER_SPACE + 9], *p = spec;
2344 	    double d;
2345 	    int length = MAX_FLOAT_SIZE;
2346 	    char *bytes;
2347 
2348 	    if (Tcl_GetDoubleFromObj(interp, segment, &d) != TCL_OK) {
2349 		/* TODO: Figure out ACCEPT_NAN here */
2350 		goto error;
2351 	    }
2352 	    *p++ = '%';
2353 	    if (gotMinus) {
2354 		*p++ = '-';
2355 	    }
2356 	    if (gotHash) {
2357 		*p++ = '#';
2358 	    }
2359 	    if (gotZero) {
2360 		*p++ = '0';
2361 	    }
2362 	    if (gotSpace) {
2363 		*p++ = ' ';
2364 	    }
2365 	    if (gotPlus) {
2366 		*p++ = '+';
2367 	    }
2368 	    if (width) {
2369 		p += sprintf(p, "%d", width);
2370 		if (width > length) {
2371 		    length = width;
2372 		}
2373 	    }
2374 	    if (gotPrecision) {
2375 		*p++ = '.';
2376 		p += sprintf(p, "%d", precision);
2377 		if (precision > INT_MAX - length) {
2378 		    msg=overflow;
2379 		    goto errorMsg;
2380 		}
2381 		length += precision;
2382 	    }
2383 
2384 	    /*
2385 	     * Don't pass length modifiers!
2386 	     */
2387 
2388 	    *p++ = (char) ch;
2389 	    *p = '\0';
2390 
2391 	    segment = Tcl_NewObj();
2392 	    allocSegment = 1;
2393 	    if (!Tcl_AttemptSetObjLength(segment, length)) {
2394 		msg = overflow;
2395 		goto errorMsg;
2396 	    }
2397 	    bytes = TclGetString(segment);
2398 	    if (!Tcl_AttemptSetObjLength(segment, sprintf(bytes, spec, d))) {
2399 		msg = overflow;
2400 		goto errorMsg;
2401 	    }
2402 	    break;
2403 	}
2404 	default:
2405 	    if (interp != NULL) {
2406 		Tcl_SetObjResult(interp,
2407 			Tcl_ObjPrintf("bad field specifier \"%c\"", ch));
2408 	    }
2409 	    goto error;
2410 	}
2411 
2412 	switch (ch) {
2413 	case 'E':
2414 	case 'G':
2415 	case 'X': {
2416 	    Tcl_SetObjLength(segment, Tcl_UtfToUpper(TclGetString(segment)));
2417 	}
2418 	}
2419 
2420 	if (width > 0) {
2421 	    if (numChars < 0) {
2422 		numChars = Tcl_GetCharLength(segment);
2423 	    }
2424 	    if (!gotMinus) {
2425 		if (numChars < width) {
2426 		    limit -= (width - numChars);
2427 		}
2428 		while (numChars < width) {
2429 		    Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1);
2430 		    numChars++;
2431 		}
2432 	    }
2433 	}
2434 
2435 	Tcl_GetStringFromObj(segment, &segmentNumBytes);
2436 	if (segmentNumBytes > limit) {
2437 	    if (allocSegment) {
2438 		Tcl_DecrRefCount(segment);
2439 	    }
2440 	    msg = overflow;
2441 	    goto errorMsg;
2442 	}
2443 	Tcl_AppendObjToObj(appendObj, segment);
2444 	limit -= segmentNumBytes;
2445 	if (allocSegment) {
2446 	    Tcl_DecrRefCount(segment);
2447 	}
2448 	if (width > 0) {
2449 	    if (numChars < width) {
2450 		limit -= (width - numChars);
2451 	    }
2452 	    while (numChars < width) {
2453 		Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1);
2454 		numChars++;
2455 	    }
2456 	}
2457 
2458 	objIndex += gotSequential;
2459     }
2460     if (numBytes) {
2461 	if (numBytes > limit) {
2462 	    msg = overflow;
2463 	    goto errorMsg;
2464 	}
2465 	Tcl_AppendToObj(appendObj, span, numBytes);
2466 	limit -= numBytes;
2467 	numBytes = 0;
2468     }
2469 
2470     return TCL_OK;
2471 
2472   errorMsg:
2473     if (interp != NULL) {
2474 	Tcl_SetObjResult(interp, Tcl_NewStringObj(msg, -1));
2475     }
2476   error:
2477     Tcl_SetObjLength(appendObj, originalLength);
2478     return TCL_ERROR;
2479 }
2480 
2481 /*
2482  *---------------------------------------------------------------------------
2483  *
2484  * Tcl_Format--
2485  *
2486  * Results:
2487  *	A refcount zero Tcl_Obj.
2488  *
2489  * Side effects:
2490  * 	None.
2491  *
2492  *---------------------------------------------------------------------------
2493  */
2494 
2495 Tcl_Obj *
Tcl_Format(Tcl_Interp * interp,const char * format,int objc,Tcl_Obj * const objv[])2496 Tcl_Format(
2497     Tcl_Interp *interp,
2498     const char *format,
2499     int objc,
2500     Tcl_Obj *const objv[])
2501 {
2502     int result;
2503     Tcl_Obj *objPtr = Tcl_NewObj();
2504     result = Tcl_AppendFormatToObj(interp, objPtr, format, objc, objv);
2505     if (result != TCL_OK) {
2506 	Tcl_DecrRefCount(objPtr);
2507 	return NULL;
2508     }
2509     return objPtr;
2510 }
2511 
2512 /*
2513  *---------------------------------------------------------------------------
2514  *
2515  * AppendPrintfToObjVA --
2516  *
2517  * Results:
2518  *
2519  * Side effects:
2520  *
2521  *---------------------------------------------------------------------------
2522  */
2523 
2524 static void
AppendPrintfToObjVA(Tcl_Obj * objPtr,const char * format,va_list argList)2525 AppendPrintfToObjVA(
2526     Tcl_Obj *objPtr,
2527     const char *format,
2528     va_list argList)
2529 {
2530     int code, objc;
2531     Tcl_Obj **objv, *list = Tcl_NewObj();
2532     const char *p;
2533     char *end;
2534 
2535     p = format;
2536     Tcl_IncrRefCount(list);
2537     while (*p != '\0') {
2538 	int size = 0, seekingConversion = 1, gotPrecision = 0;
2539 	int lastNum = -1;
2540 
2541 	if (*p++ != '%') {
2542 	    continue;
2543 	}
2544 	if (*p == '%') {
2545 	    p++;
2546 	    continue;
2547 	}
2548 	do {
2549 	    switch (*p) {
2550 
2551 	    case '\0':
2552 		seekingConversion = 0;
2553 		break;
2554 	    case 's': {
2555 		const char *q, *end, *bytes = va_arg(argList, char *);
2556 		seekingConversion = 0;
2557 
2558 		/*
2559 		 * The buffer to copy characters from starts at bytes and ends
2560 		 * at either the first NUL byte, or after lastNum bytes, when
2561 		 * caller has indicated a limit.
2562 		 */
2563 
2564 		end = bytes;
2565 		while ((!gotPrecision || lastNum--) && (*end != '\0')) {
2566 		    end++;
2567 		}
2568 
2569 		/*
2570 		 * Within that buffer, we trim both ends if needed so that we
2571 		 * copy only whole characters, and avoid copying any partial
2572 		 * multi-byte characters.
2573 		 */
2574 
2575 		q = Tcl_UtfPrev(end, bytes);
2576 		if (!Tcl_UtfCharComplete(q, (int)(end - q))) {
2577 		    end = q;
2578 		}
2579 
2580 		q = bytes + TCL_UTF_MAX;
2581 		while ((bytes < end) && (bytes < q)
2582 			&& ((*bytes & 0xC0) == 0x80)) {
2583 		    bytes++;
2584 		}
2585 
2586 		Tcl_ListObjAppendElement(NULL, list,
2587 			Tcl_NewStringObj(bytes , (int)(end - bytes)));
2588 
2589 		break;
2590 	    }
2591 	    case 'c':
2592 	    case 'i':
2593 	    case 'u':
2594 	    case 'd':
2595 	    case 'o':
2596 	    case 'x':
2597 	    case 'X':
2598 		seekingConversion = 0;
2599 		switch (size) {
2600 		case -1:
2601 		case 0:
2602 		    Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj(
2603 			    (long int)va_arg(argList, int)));
2604 		    break;
2605 		case 1:
2606 		    Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj(
2607 			    va_arg(argList, long int)));
2608 		    break;
2609 		}
2610 		break;
2611 	    case 'e':
2612 	    case 'E':
2613 	    case 'f':
2614 	    case 'g':
2615 	    case 'G':
2616 		Tcl_ListObjAppendElement(NULL, list, Tcl_NewDoubleObj(
2617 			va_arg(argList, double)));
2618 		seekingConversion = 0;
2619 		break;
2620 	    case '*':
2621 		lastNum = (int)va_arg(argList, int);
2622 		Tcl_ListObjAppendElement(NULL, list, Tcl_NewIntObj(lastNum));
2623 		p++;
2624 		break;
2625 	    case '0': case '1': case '2': case '3': case '4':
2626 	    case '5': case '6': case '7': case '8': case '9':
2627 		lastNum = (int) strtoul(p, &end, 10);
2628 		p = end;
2629 		break;
2630 	    case '.':
2631 		gotPrecision = 1;
2632 		p++;
2633 		break;
2634 	    /* TODO: support for wide (and bignum?) arguments */
2635 	    case 'l':
2636 		size = 1;
2637 		p++;
2638 		break;
2639 	    case 'h':
2640 		size = -1;
2641 	    default:
2642 		p++;
2643 	    }
2644 	} while (seekingConversion);
2645     }
2646     TclListObjGetElements(NULL, list, &objc, &objv);
2647     code = Tcl_AppendFormatToObj(NULL, objPtr, format, objc, objv);
2648     if (code != TCL_OK) {
2649 	Tcl_AppendPrintfToObj(objPtr,
2650 		"Unable to format \"%s\" with supplied arguments: %s",
2651 		format, Tcl_GetString(list));
2652     }
2653     Tcl_DecrRefCount(list);
2654 }
2655 
2656 /*
2657  *---------------------------------------------------------------------------
2658  *
2659  * Tcl_AppendPrintfToObj --
2660  *
2661  * Results:
2662  *	A standard Tcl result.
2663  *
2664  * Side effects:
2665  * 	None.
2666  *
2667  *---------------------------------------------------------------------------
2668  */
2669 
2670 void
Tcl_AppendPrintfToObj(Tcl_Obj * objPtr,const char * format,...)2671 Tcl_AppendPrintfToObj(
2672     Tcl_Obj *objPtr,
2673     const char *format,
2674     ...)
2675 {
2676     va_list argList;
2677 
2678     va_start(argList, format);
2679     AppendPrintfToObjVA(objPtr, format, argList);
2680     va_end(argList);
2681 }
2682 
2683 /*
2684  *---------------------------------------------------------------------------
2685  *
2686  * Tcl_ObjPrintf --
2687  *
2688  * Results:
2689  *	A refcount zero Tcl_Obj.
2690  *
2691  * Side effects:
2692  * 	None.
2693  *
2694  *---------------------------------------------------------------------------
2695  */
2696 
2697 Tcl_Obj *
Tcl_ObjPrintf(const char * format,...)2698 Tcl_ObjPrintf(
2699     const char *format,
2700     ...)
2701 {
2702     va_list argList;
2703     Tcl_Obj *objPtr = Tcl_NewObj();
2704 
2705     va_start(argList, format);
2706     AppendPrintfToObjVA(objPtr, format, argList);
2707     va_end(argList);
2708     return objPtr;
2709 }
2710 
2711 /*
2712  *---------------------------------------------------------------------------
2713  *
2714  * TclGetStringStorage --
2715  *
2716  *	Returns the string storage space of a Tcl_Obj.
2717  *
2718  * Results:
2719  *	The pointer value objPtr->bytes is returned and the number of bytes
2720  *	allocated there is written to *sizePtr (if known).
2721  *
2722  * Side effects:
2723  *	May set objPtr->bytes.
2724  *
2725  *---------------------------------------------------------------------------
2726  */
2727 
2728 char *
TclGetStringStorage(Tcl_Obj * objPtr,unsigned int * sizePtr)2729 TclGetStringStorage(
2730     Tcl_Obj *objPtr,
2731     unsigned int *sizePtr)
2732 {
2733     String *stringPtr;
2734 
2735     if (objPtr->typePtr != &tclStringType || objPtr->bytes == NULL) {
2736 	return TclGetStringFromObj(objPtr, (int *)sizePtr);
2737     }
2738 
2739     stringPtr = GET_STRING(objPtr);
2740     *sizePtr = stringPtr->allocated;
2741     return objPtr->bytes;
2742 }
2743 /*
2744  *---------------------------------------------------------------------------
2745  *
2746  * TclStringObjReverse --
2747  *
2748  *	Implements the [string reverse] operation.
2749  *
2750  * Results:
2751  *	An unshared Tcl value which is the [string reverse] of the argument
2752  *	supplied.  When sharing rules permit, the returned value might be
2753  *	the argument with modifications done in place.
2754  *
2755  * Side effects:
2756  *	May allocate a new Tcl_Obj.
2757  *
2758  *---------------------------------------------------------------------------
2759  */
2760 
2761 Tcl_Obj *
TclStringObjReverse(Tcl_Obj * objPtr)2762 TclStringObjReverse(
2763     Tcl_Obj *objPtr)
2764 {
2765     String *stringPtr;
2766     int numChars = Tcl_GetCharLength(objPtr);
2767     int i = 0, lastCharIdx = numChars - 1;
2768     char *bytes;
2769 
2770     if (numChars <= 1) {
2771 	return objPtr;
2772     }
2773 
2774     stringPtr = GET_STRING(objPtr);
2775     if (stringPtr->hasUnicode) {
2776 	Tcl_UniChar *source = stringPtr->unicode;
2777 
2778 	if (Tcl_IsShared(objPtr)) {
2779 	    Tcl_UniChar *dest, ch = 0;
2780 
2781 	    /*
2782 	     * Create a non-empty, pure unicode value, so we can coax
2783 	     * Tcl_SetObjLength into growing the unicode rep buffer.
2784 	     */
2785 
2786 	    Tcl_Obj *resultPtr = Tcl_NewUnicodeObj(&ch, 1);
2787 	    Tcl_SetObjLength(resultPtr, numChars);
2788 	    dest = Tcl_GetUnicode(resultPtr);
2789 
2790 	    while (i < numChars) {
2791 		dest[i++] = source[lastCharIdx--];
2792 	    }
2793 	    return resultPtr;
2794 	}
2795 
2796 	while (i < lastCharIdx) {
2797 	    Tcl_UniChar tmp = source[lastCharIdx];
2798 	    source[lastCharIdx--] = source[i];
2799 	    source[i++] = tmp;
2800 	}
2801 	TclInvalidateStringRep(objPtr);
2802 	stringPtr->allocated = 0;
2803 	return objPtr;
2804     }
2805 
2806     bytes = TclGetString(objPtr);
2807     if (Tcl_IsShared(objPtr)) {
2808 	char *dest;
2809 	Tcl_Obj *resultPtr = Tcl_NewObj();
2810 	Tcl_SetObjLength(resultPtr, numChars);
2811 	dest = TclGetString(resultPtr);
2812 	while (i < numChars) {
2813 	    dest[i++] = bytes[lastCharIdx--];
2814 	}
2815 	return resultPtr;
2816     }
2817 
2818     while (i < lastCharIdx) {
2819 	char tmp = bytes[lastCharIdx];
2820 	bytes[lastCharIdx--] = bytes[i];
2821 	bytes[i++] = tmp;
2822     }
2823     return objPtr;
2824 }
2825 
2826 /*
2827  *---------------------------------------------------------------------------
2828  *
2829  * FillUnicodeRep --
2830  *
2831  *	Populate the Unicode internal rep with the Unicode form of its string
2832  *	rep. The object must alread have a "String" internal rep.
2833  *
2834  * Results:
2835  *	None.
2836  *
2837  * Side effects:
2838  *	Reallocates the String internal rep.
2839  *
2840  *---------------------------------------------------------------------------
2841  */
2842 
2843 static void
FillUnicodeRep(Tcl_Obj * objPtr)2844 FillUnicodeRep(
2845     Tcl_Obj *objPtr)		/* The object in which to fill the unicode
2846 				 * rep. */
2847 {
2848     String *stringPtr;
2849     size_t uallocated;
2850     char *srcEnd, *src = objPtr->bytes;
2851     Tcl_UniChar *dst;
2852 
2853     stringPtr = GET_STRING(objPtr);
2854     if (stringPtr->numChars == -1) {
2855 	stringPtr->numChars = Tcl_NumUtfChars(src, objPtr->length);
2856     }
2857     stringPtr->hasUnicode = (stringPtr->numChars > 0);
2858 
2859     stringCheckLimits(stringPtr->numChars);
2860     uallocated = STRING_UALLOC(stringPtr->numChars);
2861     if (uallocated > stringPtr->uallocated) {
2862 	GrowUnicodeBuffer(objPtr, stringPtr->numChars);
2863 	stringPtr = GET_STRING(objPtr);
2864     }
2865 
2866     /*
2867      * Convert src to Unicode and store the coverted data in "unicode".
2868      */
2869 
2870     srcEnd = src + objPtr->length;
2871     for (dst = stringPtr->unicode; src < srcEnd; dst++) {
2872 	src += TclUtfToUniChar(src, dst);
2873     }
2874     *dst = 0;
2875 
2876     SET_STRING(objPtr, stringPtr);
2877 }
2878 
2879 /*
2880  *----------------------------------------------------------------------
2881  *
2882  * DupStringInternalRep --
2883  *
2884  *	Initialize the internal representation of a new Tcl_Obj to a copy of
2885  *	the internal representation of an existing string object.
2886  *
2887  * Results:
2888  *	None.
2889  *
2890  * Side effects:
2891  *	copyPtr's internal rep is set to a copy of srcPtr's internal
2892  *	representation.
2893  *
2894  *----------------------------------------------------------------------
2895  */
2896 
2897 static void
DupStringInternalRep(register Tcl_Obj * srcPtr,register Tcl_Obj * copyPtr)2898 DupStringInternalRep(
2899     register Tcl_Obj *srcPtr,	/* Object with internal rep to copy. Must have
2900 				 * an internal rep of type "String". */
2901     register Tcl_Obj *copyPtr)	/* Object with internal rep to set. Must not
2902 				 * currently have an internal rep.*/
2903 {
2904     String *srcStringPtr = GET_STRING(srcPtr);
2905     String *copyStringPtr = NULL;
2906 
2907     /*
2908      * If the src obj is a string of 1-byte Utf chars, then copy the string
2909      * rep of the source object and create an "empty" Unicode internal rep for
2910      * the new object. Otherwise, copy Unicode internal rep, and invalidate
2911      * the string rep of the new object.
2912      */
2913 
2914     if (srcStringPtr->hasUnicode == 0) {
2915 	copyStringPtr = (String *) ckalloc(sizeof(String));
2916 	copyStringPtr->uallocated = 0;
2917     } else {
2918 	copyStringPtr = (String *) ckalloc(
2919 		STRING_SIZE(srcStringPtr->uallocated));
2920 	copyStringPtr->uallocated = srcStringPtr->uallocated;
2921 
2922 	memcpy(copyStringPtr->unicode, srcStringPtr->unicode,
2923 		(size_t) srcStringPtr->numChars * sizeof(Tcl_UniChar));
2924 	copyStringPtr->unicode[srcStringPtr->numChars] = 0;
2925     }
2926     copyStringPtr->numChars = srcStringPtr->numChars;
2927     copyStringPtr->hasUnicode = srcStringPtr->hasUnicode;
2928     copyStringPtr->allocated = srcStringPtr->allocated;
2929 
2930     /*
2931      * Tricky point: the string value was copied by generic object management
2932      * code, so it doesn't contain any extra bytes that might exist in the
2933      * source object.
2934      */
2935 
2936     copyStringPtr->allocated = copyPtr->length;
2937 
2938     SET_STRING(copyPtr, copyStringPtr);
2939     copyPtr->typePtr = &tclStringType;
2940 }
2941 
2942 /*
2943  *----------------------------------------------------------------------
2944  *
2945  * SetStringFromAny --
2946  *
2947  *	Create an internal representation of type "String" for an object.
2948  *
2949  * Results:
2950  *	This operation always succeeds and returns TCL_OK.
2951  *
2952  * Side effects:
2953  *	Any old internal reputation for objPtr is freed and the internal
2954  *	representation is set to "String".
2955  *
2956  *----------------------------------------------------------------------
2957  */
2958 
2959 static int
SetStringFromAny(Tcl_Interp * interp,register Tcl_Obj * objPtr)2960 SetStringFromAny(
2961     Tcl_Interp *interp,		/* Used for error reporting if not NULL. */
2962     register Tcl_Obj *objPtr)	/* The object to convert. */
2963 {
2964     /*
2965      * The Unicode object is optimized for the case where each UTF char in a
2966      * string is only one byte. In this case, we store the value of numChars,
2967      * but we don't copy the bytes to the unicodeObj->unicode.
2968      */
2969 
2970     if (objPtr->typePtr != &tclStringType) {
2971 	String *stringPtr;
2972 
2973 	if (objPtr->typePtr != NULL) {
2974 	    if (objPtr->bytes == NULL) {
2975 		objPtr->typePtr->updateStringProc(objPtr);
2976 	    }
2977 	    TclFreeIntRep(objPtr);
2978 	}
2979 	objPtr->typePtr = &tclStringType;
2980 
2981 	/*
2982 	 * Allocate enough space for the basic String structure.
2983 	 */
2984 
2985 	stringPtr = (String *) ckalloc(sizeof(String));
2986 	stringPtr->numChars = -1;
2987 	stringPtr->uallocated = 0;
2988 	stringPtr->hasUnicode = 0;
2989 
2990 	if (objPtr->bytes != NULL) {
2991 	    stringPtr->allocated = objPtr->length;
2992             if (objPtr->bytes != tclEmptyStringRep) {
2993 	        objPtr->bytes[objPtr->length] = 0;
2994             }
2995 	} else {
2996 	    objPtr->length = 0;
2997 	}
2998 	SET_STRING(objPtr, stringPtr);
2999     }
3000     return TCL_OK;
3001 }
3002 
3003 /*
3004  *----------------------------------------------------------------------
3005  *
3006  * UpdateStringOfString --
3007  *
3008  *	Update the string representation for an object whose internal
3009  *	representation is "String".
3010  *
3011  * Results:
3012  *	None.
3013  *
3014  * Side effects:
3015  *	The object's string may be set by converting its Unicode represention
3016  *	to UTF format.
3017  *
3018  *----------------------------------------------------------------------
3019  */
3020 
3021 static void
UpdateStringOfString(Tcl_Obj * objPtr)3022 UpdateStringOfString(
3023     Tcl_Obj *objPtr)		/* Object with string rep to update. */
3024 {
3025     int i, size;
3026     Tcl_UniChar *unicode;
3027     char dummy[TCL_UTF_MAX];
3028     char *dst;
3029     String *stringPtr;
3030 
3031     stringPtr = GET_STRING(objPtr);
3032     if ((objPtr->bytes == NULL) || (stringPtr->allocated == 0)) {
3033 	if (stringPtr->numChars <= 0) {
3034 	    /*
3035 	     * If there is no Unicode rep, or the string has 0 chars, then set
3036 	     * the string rep to an empty string.
3037 	     */
3038 
3039 	    objPtr->bytes = tclEmptyStringRep;
3040 	    objPtr->length = 0;
3041 	    return;
3042 	}
3043 
3044 	unicode = stringPtr->unicode;
3045 
3046 	/*
3047 	 * Translate the Unicode string to UTF. "size" will hold the amount of
3048 	 * space the UTF string needs.
3049 	 */
3050 
3051 	if (stringPtr->numChars <= INT_MAX/TCL_UTF_MAX
3052 	    && stringPtr->allocated >= stringPtr->numChars * (size_t)TCL_UTF_MAX) {
3053 	    goto copyBytes;
3054 	}
3055 
3056 	size = 0;
3057 	for (i = 0; i < stringPtr->numChars && size >= 0; i++) {
3058 	    size += Tcl_UniCharToUtf((int) unicode[i], dummy);
3059 	}
3060 	if (size < 0) {
3061 	    Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
3062 	}
3063 
3064 	objPtr->bytes = (char *) ckalloc((unsigned) (size + 1));
3065 	objPtr->length = size;
3066 	stringPtr->allocated = size;
3067 
3068     copyBytes:
3069 	dst = objPtr->bytes;
3070 	for (i = 0; i < stringPtr->numChars; i++) {
3071 	    dst += Tcl_UniCharToUtf(unicode[i], dst);
3072 	}
3073 	*dst = '\0';
3074     }
3075     return;
3076 }
3077 
3078 /*
3079  *----------------------------------------------------------------------
3080  *
3081  * FreeStringInternalRep --
3082  *
3083  *	Deallocate the storage associated with a String data object's internal
3084  *	representation.
3085  *
3086  * Results:
3087  *	None.
3088  *
3089  * Side effects:
3090  *	Frees memory.
3091  *
3092  *----------------------------------------------------------------------
3093  */
3094 
3095 static void
FreeStringInternalRep(Tcl_Obj * objPtr)3096 FreeStringInternalRep(
3097     Tcl_Obj *objPtr)		/* Object with internal rep to free. */
3098 {
3099     ckfree((char *) GET_STRING(objPtr));
3100     objPtr->typePtr = NULL;
3101 }
3102 
3103 /*
3104  * Local Variables:
3105  * mode: c
3106  * c-basic-offset: 4
3107  * fill-column: 78
3108  * End:
3109  */
3110