1 /*
2 * tclStringObj.c --
3 *
4 * This file contains functions that implement string operations on Tcl
5 * objects. Some string operations work with UTF strings and others
6 * require Unicode format. Functions that require knowledge of the width
7 * of each character, such as indexing, operate on Unicode data.
8 *
9 * A Unicode string is an internationalized string. Conceptually, a
10 * Unicode string is an array of 16-bit quantities organized as a
11 * sequence of properly formed UTF-8 characters. There is a one-to-one
12 * map between Unicode and UTF characters. Because Unicode characters
13 * have a fixed width, operations such as indexing operate on Unicode
14 * data. The String object is optimized for the case where each UTF char
15 * in a string is only one byte. In this case, we store the value of
16 * numChars, but we don't store the Unicode data (unless Tcl_GetUnicode
17 * is explicitly called).
18 *
19 * The String object type stores one or both formats. The default
20 * behavior is to store UTF. Once Unicode is calculated by a function, it
21 * is stored in the internal rep for future access (without an additional
22 * O(n) cost).
23 *
24 * To allow many appends to be done to an object without constantly
25 * reallocating the space for the string or Unicode representation, we
26 * allocate double the space for the string or Unicode and use the
27 * internal representation to keep track of how much space is used vs.
28 * allocated.
29 *
30 * Copyright (c) 1995-1997 Sun Microsystems, Inc.
31 * Copyright (c) 1999 by Scriptics Corporation.
32 *
33 * See the file "license.terms" for information on usage and redistribution of
34 * this file, and for a DISCLAIMER OF ALL WARRANTIES.
35 */
36
37 #include "tclInt.h"
38 #include "tommath.h"
39
40 /*
41 * Prototypes for functions defined later in this file:
42 */
43
44 static void AppendPrintfToObjVA(Tcl_Obj *objPtr,
45 const char *format, va_list argList);
46 static void AppendUnicodeToUnicodeRep(Tcl_Obj *objPtr,
47 const Tcl_UniChar *unicode, int appendNumChars);
48 static void AppendUnicodeToUtfRep(Tcl_Obj *objPtr,
49 const Tcl_UniChar *unicode, int numChars);
50 static void AppendUtfToUnicodeRep(Tcl_Obj *objPtr,
51 const char *bytes, int numBytes);
52 static void AppendUtfToUtfRep(Tcl_Obj *objPtr,
53 const char *bytes, int numBytes);
54 static void DupStringInternalRep(Tcl_Obj *objPtr,
55 Tcl_Obj *copyPtr);
56 static void FillUnicodeRep(Tcl_Obj *objPtr);
57 static void FreeStringInternalRep(Tcl_Obj *objPtr);
58 static void GrowUnicodeBuffer(Tcl_Obj *objPtr, int needed);
59 static int SetStringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr);
60 static void SetUnicodeObj(Tcl_Obj *objPtr,
61 const Tcl_UniChar *unicode, int numChars);
62 static int UnicodeLength(const Tcl_UniChar *unicode);
63 static void UpdateStringOfString(Tcl_Obj *objPtr);
64
65 /*
66 * The structure below defines the string Tcl object type by means of
67 * functions that can be invoked by generic object code.
68 */
69
70 Tcl_ObjType tclStringType = {
71 "string", /* name */
72 FreeStringInternalRep, /* freeIntRepPro */
73 DupStringInternalRep, /* dupIntRepProc */
74 UpdateStringOfString, /* updateStringProc */
75 SetStringFromAny /* setFromAnyProc */
76 };
77
78 /*
79 * The following structure is the internal rep for a String object. It keeps
80 * track of how much memory has been used and how much has been allocated for
81 * the Unicode and UTF string to enable growing and shrinking of the UTF and
82 * Unicode reps of the String object with fewer mallocs. To optimize string
83 * length and indexing operations, this structure also stores the number of
84 * characters (same of UTF and Unicode!) once that value has been computed.
85 *
86 * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16
87 * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This
88 * can be officially modified by altering the definition of Tcl_UniChar in
89 * tcl.h, but do not do that unless you are sure what you're doing!
90 */
91
92 typedef struct String {
93 int numChars; /* The number of chars in the string. -1 means
94 * this value has not been calculated. >= 0
95 * means that there is a valid Unicode rep, or
96 * that the number of UTF bytes == the number
97 * of chars. */
98 size_t allocated; /* The amount of space actually allocated for
99 * the UTF string (minus 1 byte for the
100 * termination char). */
101 size_t uallocated; /* The amount of space actually allocated for
102 * the Unicode string (minus 2 bytes for the
103 * termination char). */
104 int hasUnicode; /* Boolean determining whether the string has
105 * a Unicode representation. */
106 Tcl_UniChar unicode[2]; /* The array of Unicode chars. The actual size
107 * of this field depends on the 'uallocated'
108 * field above. */
109 } String;
110
111 #define STRING_MAXCHARS \
112 (1 + (int)(((size_t)UINT_MAX - sizeof(String))/sizeof(Tcl_UniChar)))
113 #define STRING_UALLOC(numChars) \
114 ((numChars) * sizeof(Tcl_UniChar))
115 #define STRING_SIZE(ualloc) \
116 ((unsigned) ((ualloc) \
117 ? (sizeof(String) - sizeof(Tcl_UniChar) + (ualloc)) \
118 : sizeof(String)))
119 #define stringCheckLimits(numChars) \
120 if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \
121 Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \
122 STRING_MAXCHARS); \
123 }
124 #define stringRealloc(ptr, numChars) \
125 (String *) ckrealloc((char *) ptr, \
126 (unsigned) STRING_SIZE(STRING_UALLOC(numChars)) )
127 #define stringAttemptRealloc(ptr, numChars) \
128 (String *) attemptckrealloc((char *) ptr, \
129 (unsigned) STRING_SIZE(STRING_UALLOC(numChars)) )
130 #define GET_STRING(objPtr) \
131 ((String *) (objPtr)->internalRep.twoPtrValue.ptr1)
132 #define SET_STRING(objPtr, stringPtr) \
133 ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr))
134
135 /*
136 * TCL STRING GROWTH ALGORITHM
137 *
138 * When growing strings (during an append, for example), the following growth
139 * algorithm is used:
140 *
141 * Attempt to allocate 2 * (originalLength + appendLength)
142 * On failure:
143 * attempt to allocate originalLength + 2*appendLength +
144 * TCL_GROWTH_MIN_ALLOC
145 *
146 * This algorithm allows very good performance, as it rapidly increases the
147 * memory allocated for a given string, which minimizes the number of
148 * reallocations that must be performed. However, using only the doubling
149 * algorithm can lead to a significant waste of memory. In particular, it may
150 * fail even when there is sufficient memory available to complete the append
151 * request (but there is not 2*totalLength memory available). So when the
152 * doubling fails (because there is not enough memory available), the
153 * algorithm requests a smaller amount of memory, which is still enough to
154 * cover the request, but which hopefully will be less than the total
155 * available memory.
156 *
157 * The addition of TCL_GROWTH_MIN_ALLOC allows for efficient handling of very
158 * small appends. Without this extra slush factor, a sequence of several small
159 * appends would cause several memory allocations. As long as
160 * TCL_GROWTH_MIN_ALLOC is a reasonable size, we can avoid that behavior.
161 *
162 * The growth algorithm can be tuned by adjusting the following parameters:
163 *
164 * TCL_GROWTH_MIN_ALLOC Additional space, in bytes, to allocate when
165 * the double allocation has failed. Default is
166 * 1024 (1 kilobyte).
167 */
168
169 #ifndef TCL_GROWTH_MIN_ALLOC
170 #define TCL_GROWTH_MIN_ALLOC 1024
171 #endif
172
173 static void
GrowUnicodeBuffer(Tcl_Obj * objPtr,int needed)174 GrowUnicodeBuffer(
175 Tcl_Obj *objPtr,
176 int needed)
177 {
178 /* Pre-conditions:
179 * objPtr->typePtr == &tclStringType
180 * STRING_UALLOC(needed) > stringPtr->uallocated
181 * needed < STRING_MAXCHARS
182 */
183 String *ptr = NULL, *stringPtr = GET_STRING(objPtr);
184 int attempt;
185
186 if (stringPtr->uallocated > 0) {
187 /* Subsequent appends - apply the growth algorithm. */
188 attempt = 2 * needed;
189 if (attempt >= 0 && attempt <= STRING_MAXCHARS) {
190 ptr = stringAttemptRealloc(stringPtr, attempt);
191 }
192 if (ptr == NULL) {
193 /*
194 * Take care computing the amount of modest growth to avoid
195 * overflow into invalid argument values for attempt.
196 */
197 unsigned int limit = STRING_MAXCHARS - needed;
198 unsigned int extra = needed - stringPtr->numChars
199 + TCL_GROWTH_MIN_ALLOC/sizeof(Tcl_UniChar);
200 int growth = (int) ((extra > limit) ? limit : extra);
201 attempt = needed + growth;
202 ptr = stringAttemptRealloc(stringPtr, attempt);
203 }
204 }
205 if (ptr == NULL) {
206 /* First allocation - just big enough; or last chance fallback. */
207 attempt = needed;
208 ptr = stringRealloc(stringPtr, attempt);
209 }
210 stringPtr = ptr;
211 stringPtr->uallocated = STRING_UALLOC(attempt);
212 SET_STRING(objPtr, stringPtr);
213 }
214
215
216 /*
217 *----------------------------------------------------------------------
218 *
219 * Tcl_NewStringObj --
220 *
221 * This function is normally called when not debugging: i.e., when
222 * TCL_MEM_DEBUG is not defined. It creates a new string object and
223 * initializes it from the byte pointer and length arguments.
224 *
225 * When TCL_MEM_DEBUG is defined, this function just returns the result
226 * of calling the debugging version Tcl_DbNewStringObj.
227 *
228 * Results:
229 * A newly created string object is returned that has ref count zero.
230 *
231 * Side effects:
232 * The new object's internal string representation will be set to a copy
233 * of the length bytes starting at "bytes". If "length" is negative, use
234 * bytes up to the first NUL byte; i.e., assume "bytes" points to a
235 * C-style NUL-terminated string. The object's type is set to NULL. An
236 * extra NUL is added to the end of the new object's byte array.
237 *
238 *----------------------------------------------------------------------
239 */
240
241 #ifdef TCL_MEM_DEBUG
242 #undef Tcl_NewStringObj
243 Tcl_Obj *
Tcl_NewStringObj(const char * bytes,int length)244 Tcl_NewStringObj(
245 const char *bytes, /* Points to the first of the length bytes
246 * used to initialize the new object. */
247 int length) /* The number of bytes to copy from "bytes"
248 * when initializing the new object. If
249 * negative, use bytes up to the first NUL
250 * byte. */
251 {
252 return Tcl_DbNewStringObj(bytes, length, "unknown", 0);
253 }
254 #else /* if not TCL_MEM_DEBUG */
255 Tcl_Obj *
Tcl_NewStringObj(const char * bytes,int length)256 Tcl_NewStringObj(
257 const char *bytes, /* Points to the first of the length bytes
258 * used to initialize the new object. */
259 int length) /* The number of bytes to copy from "bytes"
260 * when initializing the new object. If
261 * negative, use bytes up to the first NUL
262 * byte. */
263 {
264 register Tcl_Obj *objPtr;
265
266 if (length < 0) {
267 length = (bytes? strlen(bytes) : 0);
268 }
269 TclNewStringObj(objPtr, bytes, length);
270 return objPtr;
271 }
272 #endif /* TCL_MEM_DEBUG */
273
274 /*
275 *----------------------------------------------------------------------
276 *
277 * Tcl_DbNewStringObj --
278 *
279 * This function is normally called when debugging: i.e., when
280 * TCL_MEM_DEBUG is defined. It creates new string objects. It is the
281 * same as the Tcl_NewStringObj function above except that it calls
282 * Tcl_DbCkalloc directly with the file name and line number from its
283 * caller. This simplifies debugging since then the [memory active]
284 * command will report the correct file name and line number when
285 * reporting objects that haven't been freed.
286 *
287 * When TCL_MEM_DEBUG is not defined, this function just returns the
288 * result of calling Tcl_NewStringObj.
289 *
290 * Results:
291 * A newly created string object is returned that has ref count zero.
292 *
293 * Side effects:
294 * The new object's internal string representation will be set to a copy
295 * of the length bytes starting at "bytes". If "length" is negative, use
296 * bytes up to the first NUL byte; i.e., assume "bytes" points to a
297 * C-style NUL-terminated string. The object's type is set to NULL. An
298 * extra NUL is added to the end of the new object's byte array.
299 *
300 *----------------------------------------------------------------------
301 */
302
303 #ifdef TCL_MEM_DEBUG
304 Tcl_Obj *
Tcl_DbNewStringObj(const char * bytes,int length,const char * file,int line)305 Tcl_DbNewStringObj(
306 const char *bytes, /* Points to the first of the length bytes
307 * used to initialize the new object. */
308 int length, /* The number of bytes to copy from "bytes"
309 * when initializing the new object. If
310 * negative, use bytes up to the first NUL
311 * byte. */
312 const char *file, /* The name of the source file calling this
313 * function; used for debugging. */
314 int line) /* Line number in the source file; used for
315 * debugging. */
316 {
317 register Tcl_Obj *objPtr;
318
319 if (length < 0) {
320 length = (bytes? strlen(bytes) : 0);
321 }
322 TclDbNewObj(objPtr, file, line);
323 TclInitStringRep(objPtr, bytes, length);
324 return objPtr;
325 }
326 #else /* if not TCL_MEM_DEBUG */
327 Tcl_Obj *
Tcl_DbNewStringObj(const char * bytes,register int length,const char * file,int line)328 Tcl_DbNewStringObj(
329 const char *bytes, /* Points to the first of the length bytes
330 * used to initialize the new object. */
331 register int length, /* The number of bytes to copy from "bytes"
332 * when initializing the new object. If
333 * negative, use bytes up to the first NUL
334 * byte. */
335 const char *file, /* The name of the source file calling this
336 * function; used for debugging. */
337 int line) /* Line number in the source file; used for
338 * debugging. */
339 {
340 return Tcl_NewStringObj(bytes, length);
341 }
342 #endif /* TCL_MEM_DEBUG */
343
344 /*
345 *---------------------------------------------------------------------------
346 *
347 * Tcl_NewUnicodeObj --
348 *
349 * This function is creates a new String object and initializes it from
350 * the given Unicode String. If the Utf String is the same size as the
351 * Unicode string, don't duplicate the data.
352 *
353 * Results:
354 * The newly created object is returned. This object will have no initial
355 * string representation. The returned object has a ref count of 0.
356 *
357 * Side effects:
358 * Memory allocated for new object and copy of Unicode argument.
359 *
360 *---------------------------------------------------------------------------
361 */
362
363 Tcl_Obj *
Tcl_NewUnicodeObj(const Tcl_UniChar * unicode,int numChars)364 Tcl_NewUnicodeObj(
365 const Tcl_UniChar *unicode, /* The unicode string used to initialize the
366 * new object. */
367 int numChars) /* Number of characters in the unicode
368 * string. */
369 {
370 Tcl_Obj *objPtr;
371
372 TclNewObj(objPtr);
373 SetUnicodeObj(objPtr, unicode, numChars);
374 return objPtr;
375 }
376
377 /*
378 *----------------------------------------------------------------------
379 *
380 * Tcl_GetCharLength --
381 *
382 * Get the length of the Unicode string from the Tcl object.
383 *
384 * Results:
385 * Pointer to unicode string representing the unicode object.
386 *
387 * Side effects:
388 * Frees old internal rep. Allocates memory for new "String" internal
389 * rep.
390 *
391 *----------------------------------------------------------------------
392 */
393
394 int
Tcl_GetCharLength(Tcl_Obj * objPtr)395 Tcl_GetCharLength(
396 Tcl_Obj *objPtr) /* The String object to get the num chars
397 * of. */
398 {
399 String *stringPtr;
400
401 SetStringFromAny(NULL, objPtr);
402 stringPtr = GET_STRING(objPtr);
403
404 /*
405 * If numChars is unknown, then calculate the number of characaters while
406 * populating the Unicode string.
407 */
408
409 if (stringPtr->numChars == -1) {
410 register int i = objPtr->length;
411 register unsigned char *str = (unsigned char *) objPtr->bytes;
412
413 /*
414 * This is a speed sensitive function, so run specially over the
415 * string to count continuous ascii characters before resorting to the
416 * Tcl_NumUtfChars call. This is a long form of:
417 stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes,objPtr->length);
418 *
419 * TODO: Consider macro-izing this.
420 */
421
422 while (i && (*str < 0xC0)) {
423 i--;
424 str++;
425 }
426 stringPtr->numChars = objPtr->length - i;
427 if (i) {
428 stringPtr->numChars += Tcl_NumUtfChars(objPtr->bytes
429 + (objPtr->length - i), i);
430 }
431
432 if (stringPtr->numChars == objPtr->length) {
433 /*
434 * Since we've just calculated the number of chars, and all UTF
435 * chars are 1-byte long, we don't need to store the unicode
436 * string.
437 */
438
439 stringPtr->hasUnicode = 0;
440 } else {
441 /*
442 * Since we've just calucalated the number of chars, and not all
443 * UTF chars are 1-byte long, go ahead and populate the unicode
444 * string.
445 */
446
447 FillUnicodeRep(objPtr);
448
449 /*
450 * We need to fetch the pointer again because we have just
451 * reallocated the structure to make room for the Unicode data.
452 */
453
454 stringPtr = GET_STRING(objPtr);
455 }
456 }
457 return stringPtr->numChars;
458 }
459
460 /*
461 *----------------------------------------------------------------------
462 *
463 * Tcl_GetUniChar --
464 *
465 * Get the index'th Unicode character from the String object. The index
466 * is assumed to be in the appropriate range.
467 *
468 * Results:
469 * Returns the index'th Unicode character in the Object.
470 *
471 * Side effects:
472 * Fills unichar with the index'th Unicode character.
473 *
474 *----------------------------------------------------------------------
475 */
476
477 Tcl_UniChar
Tcl_GetUniChar(Tcl_Obj * objPtr,int index)478 Tcl_GetUniChar(
479 Tcl_Obj *objPtr, /* The object to get the Unicode charater
480 * from. */
481 int index) /* Get the index'th Unicode character. */
482 {
483 Tcl_UniChar unichar;
484 String *stringPtr;
485
486 SetStringFromAny(NULL, objPtr);
487 stringPtr = GET_STRING(objPtr);
488
489 if (stringPtr->numChars == -1) {
490 /*
491 * We haven't yet calculated the length, so we don't have the Unicode
492 * str. We need to know the number of chars before we can do indexing.
493 */
494
495 Tcl_GetCharLength(objPtr);
496
497 /*
498 * We need to fetch the pointer again because we may have just
499 * reallocated the structure.
500 */
501
502 stringPtr = GET_STRING(objPtr);
503 }
504 if (stringPtr->hasUnicode == 0) {
505 /*
506 * All of the characters in the Utf string are 1 byte chars, so we
507 * don't store the unicode char. We get the Utf string and convert the
508 * index'th byte to a Unicode character.
509 */
510
511 unichar = (Tcl_UniChar) objPtr->bytes[index];
512 } else {
513 unichar = stringPtr->unicode[index];
514 }
515 return unichar;
516 }
517
518 /*
519 *----------------------------------------------------------------------
520 *
521 * Tcl_GetUnicode --
522 *
523 * Get the Unicode form of the String object. If the object is not
524 * already a String object, it will be converted to one. If the String
525 * object does not have a Unicode rep, then one is create from the UTF
526 * string format.
527 *
528 * Results:
529 * Returns a pointer to the object's internal Unicode string.
530 *
531 * Side effects:
532 * Converts the object to have the String internal rep.
533 *
534 *----------------------------------------------------------------------
535 */
536
537 Tcl_UniChar *
Tcl_GetUnicode(Tcl_Obj * objPtr)538 Tcl_GetUnicode(
539 Tcl_Obj *objPtr) /* The object to find the unicode string
540 * for. */
541 {
542 String *stringPtr;
543
544 SetStringFromAny(NULL, objPtr);
545 stringPtr = GET_STRING(objPtr);
546
547 if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) {
548 /*
549 * We haven't yet calculated the length, or all of the characters in
550 * the Utf string are 1 byte chars (so we didn't store the unicode
551 * str). Since this function must return a unicode string, and one has
552 * not yet been stored, force the Unicode to be calculated and stored
553 * now.
554 */
555
556 FillUnicodeRep(objPtr);
557
558 /*
559 * We need to fetch the pointer again because we have just reallocated
560 * the structure to make room for the Unicode data.
561 */
562
563 stringPtr = GET_STRING(objPtr);
564 }
565 return stringPtr->unicode;
566 }
567
568 /*
569 *----------------------------------------------------------------------
570 *
571 * Tcl_GetUnicodeFromObj --
572 *
573 * Get the Unicode form of the String object with length. If the object
574 * is not already a String object, it will be converted to one. If the
575 * String object does not have a Unicode rep, then one is create from the
576 * UTF string format.
577 *
578 * Results:
579 * Returns a pointer to the object's internal Unicode string.
580 *
581 * Side effects:
582 * Converts the object to have the String internal rep.
583 *
584 *----------------------------------------------------------------------
585 */
586
587 Tcl_UniChar *
Tcl_GetUnicodeFromObj(Tcl_Obj * objPtr,int * lengthPtr)588 Tcl_GetUnicodeFromObj(
589 Tcl_Obj *objPtr, /* The object to find the unicode string
590 * for. */
591 int *lengthPtr) /* If non-NULL, the location where the string
592 * rep's unichar length should be stored. If
593 * NULL, no length is stored. */
594 {
595 String *stringPtr;
596
597 SetStringFromAny(NULL, objPtr);
598 stringPtr = GET_STRING(objPtr);
599
600 if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) {
601 /*
602 * We haven't yet calculated the length, or all of the characters in
603 * the Utf string are 1 byte chars (so we didn't store the unicode
604 * str). Since this function must return a unicode string, and one has
605 * not yet been stored, force the Unicode to be calculated and stored
606 * now.
607 */
608
609 FillUnicodeRep(objPtr);
610
611 /*
612 * We need to fetch the pointer again because we have just reallocated
613 * the structure to make room for the Unicode data.
614 */
615
616 stringPtr = GET_STRING(objPtr);
617 }
618
619 if (lengthPtr != NULL) {
620 *lengthPtr = stringPtr->numChars;
621 }
622 return stringPtr->unicode;
623 }
624
625 /*
626 *----------------------------------------------------------------------
627 *
628 * Tcl_GetRange --
629 *
630 * Create a Tcl Object that contains the chars between first and last of
631 * the object indicated by "objPtr". If the object is not already a
632 * String object, convert it to one. The first and last indices are
633 * assumed to be in the appropriate range.
634 *
635 * Results:
636 * Returns a new Tcl Object of the String type.
637 *
638 * Side effects:
639 * Changes the internal rep of "objPtr" to the String type.
640 *
641 *----------------------------------------------------------------------
642 */
643
644 Tcl_Obj *
Tcl_GetRange(Tcl_Obj * objPtr,int first,int last)645 Tcl_GetRange(
646 Tcl_Obj *objPtr, /* The Tcl object to find the range of. */
647 int first, /* First index of the range. */
648 int last) /* Last index of the range. */
649 {
650 Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */
651 String *stringPtr;
652
653 SetStringFromAny(NULL, objPtr);
654 stringPtr = GET_STRING(objPtr);
655
656 if (stringPtr->numChars == -1) {
657 /*
658 * We haven't yet calculated the length, so we don't have the Unicode
659 * str. We need to know the number of chars before we can do indexing.
660 */
661
662 Tcl_GetCharLength(objPtr);
663
664 /*
665 * We need to fetch the pointer again because we may have just
666 * reallocated the structure.
667 */
668
669 stringPtr = GET_STRING(objPtr);
670 }
671
672 if (objPtr->bytes && (stringPtr->numChars == objPtr->length)) {
673 char *str = TclGetString(objPtr);
674
675 /*
676 * All of the characters in the Utf string are 1 byte chars, so we
677 * don't store the unicode char. Create a new string object containing
678 * the specified range of chars.
679 */
680
681 newObjPtr = Tcl_NewStringObj(&str[first], last-first+1);
682
683 /*
684 * Since we know the new string only has 1-byte chars, we can set it's
685 * numChars field.
686 */
687
688 SetStringFromAny(NULL, newObjPtr);
689 stringPtr = GET_STRING(newObjPtr);
690 stringPtr->numChars = last-first+1;
691 } else {
692 newObjPtr = Tcl_NewUnicodeObj(stringPtr->unicode + first,
693 last-first+1);
694 }
695 return newObjPtr;
696 }
697
698 /*
699 *----------------------------------------------------------------------
700 *
701 * Tcl_SetStringObj --
702 *
703 * Modify an object to hold a string that is a copy of the bytes
704 * indicated by the byte pointer and length arguments.
705 *
706 * Results:
707 * None.
708 *
709 * Side effects:
710 * The object's string representation will be set to a copy of the
711 * "length" bytes starting at "bytes". If "length" is negative, use bytes
712 * up to the first NUL byte; i.e., assume "bytes" points to a C-style
713 * NUL-terminated string. The object's old string and internal
714 * representations are freed and the object's type is set NULL.
715 *
716 *----------------------------------------------------------------------
717 */
718
719 void
Tcl_SetStringObj(register Tcl_Obj * objPtr,const char * bytes,register int length)720 Tcl_SetStringObj(
721 register Tcl_Obj *objPtr, /* Object whose internal rep to init. */
722 const char *bytes, /* Points to the first of the length bytes
723 * used to initialize the object. */
724 register int length) /* The number of bytes to copy from "bytes"
725 * when initializing the object. If negative,
726 * use bytes up to the first NUL byte.*/
727 {
728 if (Tcl_IsShared(objPtr)) {
729 Tcl_Panic("%s called with shared object", "Tcl_SetStringObj");
730 }
731
732 /*
733 * Set the type to NULL and free any internal rep for the old type.
734 */
735
736 TclFreeIntRep(objPtr);
737 objPtr->typePtr = NULL;
738
739 /*
740 * Free any old string rep, then set the string rep to a copy of the
741 * length bytes starting at "bytes".
742 */
743
744 TclInvalidateStringRep(objPtr);
745 if (length < 0) {
746 length = (bytes? strlen(bytes) : 0);
747 }
748 TclInitStringRep(objPtr, bytes, length);
749 }
750
751 /*
752 *----------------------------------------------------------------------
753 *
754 * Tcl_SetObjLength --
755 *
756 * This function changes the length of the string representation of an
757 * object.
758 *
759 * Results:
760 * None.
761 *
762 * Side effects:
763 * If the size of objPtr's string representation is greater than length,
764 * then it is reduced to length and a new terminating null byte is stored
765 * in the strength. If the length of the string representation is greater
766 * than length, the storage space is reallocated to the given length; a
767 * null byte is stored at the end, but other bytes past the end of the
768 * original string representation are undefined. The object's internal
769 * representation is changed to "expendable string".
770 *
771 *----------------------------------------------------------------------
772 */
773
774 void
Tcl_SetObjLength(register Tcl_Obj * objPtr,register int length)775 Tcl_SetObjLength(
776 register Tcl_Obj *objPtr, /* Pointer to object. This object must not
777 * currently be shared. */
778 register int length) /* Number of bytes desired for string
779 * representation of object, not including
780 * terminating null byte. */
781 {
782 String *stringPtr;
783
784 if (length < 0) {
785 /*
786 * Setting to a negative length is nonsense. This is probably the
787 * result of overflowing the signed integer range.
788 */
789 Tcl_Panic("Tcl_SetObjLength: negative length requested: "
790 "%d (integer overflow?)", length);
791 }
792 if (Tcl_IsShared(objPtr)) {
793 Tcl_Panic("%s called with shared object", "Tcl_SetObjLength");
794 }
795 SetStringFromAny(NULL, objPtr);
796
797 stringPtr = GET_STRING(objPtr);
798
799 /*
800 * Check that we're not extending a pure unicode string.
801 */
802
803 if ((size_t)length > stringPtr->allocated &&
804 (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) {
805 /*
806 * Not enough space in current string. Reallocate the string space and
807 * free the old string.
808 */
809
810 if (objPtr->bytes != tclEmptyStringRep) {
811 objPtr->bytes = ckrealloc((char *) objPtr->bytes,
812 (unsigned) (length + 1));
813 } else {
814 char *newBytes = ckalloc((unsigned) (length+1));
815
816 if (objPtr->bytes != NULL && objPtr->length != 0) {
817 memcpy(newBytes, objPtr->bytes, (size_t) objPtr->length);
818 TclInvalidateStringRep(objPtr);
819 }
820 objPtr->bytes = newBytes;
821 }
822 stringPtr->allocated = length;
823
824 /*
825 * Invalidate the unicode data.
826 */
827
828 stringPtr->hasUnicode = 0;
829 }
830
831 if (objPtr->bytes != NULL) {
832 objPtr->length = length;
833 if (objPtr->bytes != tclEmptyStringRep) {
834 /*
835 * Ensure the string is NUL-terminated.
836 */
837
838 objPtr->bytes[length] = 0;
839 }
840
841 /*
842 * Invalidate the unicode data.
843 */
844
845 stringPtr->numChars = -1;
846 stringPtr->hasUnicode = 0;
847 } else {
848 /*
849 * Changing length of pure unicode string.
850 */
851
852 size_t uallocated = STRING_UALLOC(length);
853
854 stringCheckLimits(length);
855 if (uallocated > stringPtr->uallocated) {
856 stringPtr = stringRealloc(stringPtr, length);
857 SET_STRING(objPtr, stringPtr);
858 stringPtr->uallocated = uallocated;
859 }
860 stringPtr->numChars = length;
861 stringPtr->hasUnicode = (length > 0);
862
863 /*
864 * Ensure the string is NUL-terminated.
865 */
866
867 stringPtr->unicode[length] = 0;
868 stringPtr->allocated = 0;
869 objPtr->length = 0;
870 }
871 }
872
873 /*
874 *----------------------------------------------------------------------
875 *
876 * Tcl_AttemptSetObjLength --
877 *
878 * This function changes the length of the string representation of an
879 * object. It uses the attempt* (non-panic'ing) memory allocators.
880 *
881 * Results:
882 * 1 if the requested memory was allocated, 0 otherwise.
883 *
884 * Side effects:
885 * If the size of objPtr's string representation is greater than length,
886 * then it is reduced to length and a new terminating null byte is stored
887 * in the strength. If the length of the string representation is greater
888 * than length, the storage space is reallocated to the given length; a
889 * null byte is stored at the end, but other bytes past the end of the
890 * original string representation are undefined. The object's internal
891 * representation is changed to "expendable string".
892 *
893 *----------------------------------------------------------------------
894 */
895
896 int
Tcl_AttemptSetObjLength(register Tcl_Obj * objPtr,register int length)897 Tcl_AttemptSetObjLength(
898 register Tcl_Obj *objPtr, /* Pointer to object. This object must not
899 * currently be shared. */
900 register int length) /* Number of bytes desired for string
901 * representation of object, not including
902 * terminating null byte. */
903 {
904 String *stringPtr;
905
906 if (length < 0) {
907 /*
908 * Setting to a negative length is nonsense. This is probably the
909 * result of overflowing the signed integer range.
910 */
911 return 0;
912 }
913 if (Tcl_IsShared(objPtr)) {
914 Tcl_Panic("%s called with shared object", "Tcl_AttemptSetObjLength");
915 }
916 SetStringFromAny(NULL, objPtr);
917
918 stringPtr = GET_STRING(objPtr);
919
920 /*
921 * Check that we're not extending a pure unicode string.
922 */
923
924 if (length > (int) stringPtr->allocated &&
925 (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) {
926 char *newBytes;
927
928 /*
929 * Not enough space in current string. Reallocate the string space and
930 * free the old string.
931 */
932
933 if (objPtr->bytes != tclEmptyStringRep) {
934 newBytes = attemptckrealloc(objPtr->bytes,
935 (unsigned)(length + 1));
936 if (newBytes == NULL) {
937 return 0;
938 }
939 } else {
940 newBytes = attemptckalloc((unsigned) (length + 1));
941 if (newBytes == NULL) {
942 return 0;
943 }
944 if (objPtr->bytes != NULL && objPtr->length != 0) {
945 memcpy(newBytes, objPtr->bytes, (size_t) objPtr->length);
946 TclInvalidateStringRep(objPtr);
947 }
948 }
949 objPtr->bytes = newBytes;
950 stringPtr->allocated = length;
951
952 /*
953 * Invalidate the unicode data.
954 */
955
956 stringPtr->hasUnicode = 0;
957 }
958
959 if (objPtr->bytes != NULL) {
960 objPtr->length = length;
961 if (objPtr->bytes != tclEmptyStringRep) {
962 /*
963 * Ensure the string is NULL-terminated.
964 */
965
966 objPtr->bytes[length] = 0;
967 }
968
969 /*
970 * Invalidate the unicode data.
971 */
972
973 stringPtr->numChars = -1;
974 stringPtr->hasUnicode = 0;
975 } else {
976 /*
977 * Changing length of pure unicode string.
978 */
979
980 size_t uallocated = STRING_UALLOC(length);
981 if (length > STRING_MAXCHARS) {
982 return 0;
983 }
984
985 if (uallocated > stringPtr->uallocated) {
986 stringPtr = stringAttemptRealloc(stringPtr, length);
987 if (stringPtr == NULL) {
988 return 0;
989 }
990 SET_STRING(objPtr, stringPtr);
991 stringPtr->uallocated = uallocated;
992 }
993 stringPtr->numChars = length;
994 stringPtr->hasUnicode = (length > 0);
995
996 /*
997 * Ensure the string is NUL-terminated.
998 */
999
1000 stringPtr->unicode[length] = 0;
1001 stringPtr->allocated = 0;
1002 objPtr->length = 0;
1003 }
1004 return 1;
1005 }
1006
1007 /*
1008 *---------------------------------------------------------------------------
1009 *
1010 * Tcl_SetUnicodeObj --
1011 *
1012 * Modify an object to hold the Unicode string indicated by "unicode".
1013 *
1014 * Results:
1015 * None.
1016 *
1017 * Side effects:
1018 * Memory allocated for new "String" internal rep.
1019 *
1020 *---------------------------------------------------------------------------
1021 */
1022
1023 void
Tcl_SetUnicodeObj(Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int numChars)1024 Tcl_SetUnicodeObj(
1025 Tcl_Obj *objPtr, /* The object to set the string of. */
1026 const Tcl_UniChar *unicode, /* The unicode string used to initialize the
1027 * object. */
1028 int numChars) /* Number of characters in the unicode
1029 * string. */
1030 {
1031 if (Tcl_IsShared(objPtr)) {
1032 Tcl_Panic("%s called with shared object", "Tcl_SetUnicodeObj");
1033 }
1034 TclFreeIntRep(objPtr);
1035 SetUnicodeObj(objPtr, unicode, numChars);
1036 }
1037
1038 static int
UnicodeLength(const Tcl_UniChar * unicode)1039 UnicodeLength(
1040 const Tcl_UniChar *unicode)
1041 {
1042 int numChars = 0;
1043
1044 if (unicode) {
1045 while (numChars >= 0 && unicode[numChars] != 0) {
1046 numChars++;
1047 }
1048 }
1049 stringCheckLimits(numChars);
1050 return numChars;
1051 }
1052
1053 static void
SetUnicodeObj(Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int numChars)1054 SetUnicodeObj(
1055 Tcl_Obj *objPtr, /* The object to set the string of. */
1056 const Tcl_UniChar *unicode, /* The unicode string used to initialize the
1057 * object. */
1058 int numChars) /* Number of characters in the unicode
1059 * string. */
1060 {
1061 String *stringPtr;
1062 size_t uallocated;
1063
1064 if (numChars < 0) {
1065 numChars = UnicodeLength(unicode);
1066 }
1067
1068 /*
1069 * Allocate enough space for the String structure + Unicode string.
1070 */
1071
1072 stringCheckLimits(numChars);
1073 uallocated = STRING_UALLOC(numChars);
1074 stringPtr = (String *) ckalloc(STRING_SIZE(uallocated));
1075
1076 stringPtr->numChars = numChars;
1077 stringPtr->uallocated = uallocated;
1078 stringPtr->hasUnicode = (numChars > 0);
1079 stringPtr->allocated = 0;
1080 memcpy(stringPtr->unicode, unicode, uallocated);
1081 stringPtr->unicode[numChars] = 0;
1082
1083 TclInvalidateStringRep(objPtr);
1084 objPtr->typePtr = &tclStringType;
1085 SET_STRING(objPtr, stringPtr);
1086 }
1087
1088 /*
1089 *----------------------------------------------------------------------
1090 *
1091 * Tcl_AppendLimitedToObj --
1092 *
1093 * This function appends a limited number of bytes from a sequence of
1094 * bytes to an object, marking any limitation with an ellipsis.
1095 *
1096 * Results:
1097 * None.
1098 *
1099 * Side effects:
1100 * The bytes at *bytes are appended to the string representation of
1101 * objPtr.
1102 *
1103 *----------------------------------------------------------------------
1104 */
1105
1106 void
Tcl_AppendLimitedToObj(register Tcl_Obj * objPtr,const char * bytes,register int length,register int limit,const char * ellipsis)1107 Tcl_AppendLimitedToObj(
1108 register Tcl_Obj *objPtr, /* Points to the object to append to. */
1109 const char *bytes, /* Points to the bytes to append to the
1110 * object. */
1111 register int length, /* The number of bytes available to be
1112 * appended from "bytes". If < 0, then all
1113 * bytes up to a NUL byte are available. */
1114 register int limit, /* The maximum number of bytes to append to
1115 * the object. */
1116 const char *ellipsis) /* Ellipsis marker string, appended to the
1117 * object to indicate not all available bytes
1118 * at "bytes" were appended. */
1119 {
1120 String *stringPtr;
1121 int toCopy = 0;
1122
1123 if (Tcl_IsShared(objPtr)) {
1124 Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj");
1125 }
1126
1127 SetStringFromAny(NULL, objPtr);
1128
1129 if (length < 0) {
1130 length = (bytes ? strlen(bytes) : 0);
1131 }
1132 if (length == 0) {
1133 return;
1134 }
1135
1136 if (length <= limit) {
1137 toCopy = length;
1138 } else {
1139 if (ellipsis == NULL) {
1140 ellipsis = "...";
1141 }
1142 toCopy = (bytes == NULL) ? limit
1143 : Tcl_UtfPrev(bytes+limit+1-strlen(ellipsis), bytes) - bytes;
1144 }
1145
1146 /*
1147 * If objPtr has a valid Unicode rep, then append the Unicode conversion
1148 * of "bytes" to the objPtr's Unicode rep, otherwise append "bytes" to
1149 * objPtr's string rep.
1150 */
1151
1152 stringPtr = GET_STRING(objPtr);
1153 if (stringPtr->hasUnicode != 0) {
1154 AppendUtfToUnicodeRep(objPtr, bytes, toCopy);
1155 } else {
1156 AppendUtfToUtfRep(objPtr, bytes, toCopy);
1157 }
1158
1159 if (length <= limit) {
1160 return;
1161 }
1162
1163 stringPtr = GET_STRING(objPtr);
1164 if (stringPtr->hasUnicode != 0) {
1165 AppendUtfToUnicodeRep(objPtr, ellipsis, -1);
1166 } else {
1167 AppendUtfToUtfRep(objPtr, ellipsis, -1);
1168 }
1169 }
1170
1171 /*
1172 *----------------------------------------------------------------------
1173 *
1174 * Tcl_AppendToObj --
1175 *
1176 * This function appends a sequence of bytes to an object.
1177 *
1178 * Results:
1179 * None.
1180 *
1181 * Side effects:
1182 * The bytes at *bytes are appended to the string representation of
1183 * objPtr.
1184 *
1185 *----------------------------------------------------------------------
1186 */
1187
1188 void
Tcl_AppendToObj(register Tcl_Obj * objPtr,const char * bytes,register int length)1189 Tcl_AppendToObj(
1190 register Tcl_Obj *objPtr, /* Points to the object to append to. */
1191 const char *bytes, /* Points to the bytes to append to the
1192 * object. */
1193 register int length) /* The number of bytes to append from "bytes".
1194 * If < 0, then append all bytes up to NUL
1195 * byte. */
1196 {
1197 Tcl_AppendLimitedToObj(objPtr, bytes, length, INT_MAX, NULL);
1198 }
1199
1200 /*
1201 *----------------------------------------------------------------------
1202 *
1203 * Tcl_AppendUnicodeToObj --
1204 *
1205 * This function appends a Unicode string to an object in the most
1206 * efficient manner possible. Length must be >= 0.
1207 *
1208 * Results:
1209 * None.
1210 *
1211 * Side effects:
1212 * Invalidates the string rep and creates a new Unicode string.
1213 *
1214 *----------------------------------------------------------------------
1215 */
1216
1217 void
Tcl_AppendUnicodeToObj(register Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int length)1218 Tcl_AppendUnicodeToObj(
1219 register Tcl_Obj *objPtr, /* Points to the object to append to. */
1220 const Tcl_UniChar *unicode, /* The unicode string to append to the
1221 * object. */
1222 int length) /* Number of chars in "unicode". */
1223 {
1224 String *stringPtr;
1225
1226 if (Tcl_IsShared(objPtr)) {
1227 Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj");
1228 }
1229
1230 if (length == 0) {
1231 return;
1232 }
1233
1234 SetStringFromAny(NULL, objPtr);
1235 stringPtr = GET_STRING(objPtr);
1236
1237 /*
1238 * If objPtr has a valid Unicode rep, then append the "unicode" to the
1239 * objPtr's Unicode rep, otherwise the UTF conversion of "unicode" to
1240 * objPtr's string rep.
1241 */
1242
1243 if (stringPtr->hasUnicode != 0) {
1244 AppendUnicodeToUnicodeRep(objPtr, unicode, length);
1245 } else {
1246 AppendUnicodeToUtfRep(objPtr, unicode, length);
1247 }
1248 }
1249
1250 /*
1251 *----------------------------------------------------------------------
1252 *
1253 * Tcl_AppendObjToObj --
1254 *
1255 * This function appends the string rep of one object to another.
1256 * "objPtr" cannot be a shared object.
1257 *
1258 * Results:
1259 * None.
1260 *
1261 * Side effects:
1262 * The string rep of appendObjPtr is appended to the string
1263 * representation of objPtr.
1264 *
1265 *----------------------------------------------------------------------
1266 */
1267
1268 void
Tcl_AppendObjToObj(Tcl_Obj * objPtr,Tcl_Obj * appendObjPtr)1269 Tcl_AppendObjToObj(
1270 Tcl_Obj *objPtr, /* Points to the object to append to. */
1271 Tcl_Obj *appendObjPtr) /* Object to append. */
1272 {
1273 String *stringPtr;
1274 int length, numChars, allOneByteChars;
1275 char *bytes;
1276
1277 SetStringFromAny(NULL, objPtr);
1278
1279 /*
1280 * If objPtr has a valid Unicode rep, then get a Unicode string from
1281 * appendObjPtr and append it.
1282 */
1283
1284 stringPtr = GET_STRING(objPtr);
1285 if (stringPtr->hasUnicode != 0) {
1286 /*
1287 * If appendObjPtr is not of the "String" type, don't convert it.
1288 */
1289
1290 if (appendObjPtr->typePtr == &tclStringType) {
1291 stringPtr = GET_STRING(appendObjPtr);
1292 if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) {
1293 /*
1294 * If appendObjPtr is a string obj with no valid Unicode rep,
1295 * then fill its unicode rep.
1296 */
1297
1298 FillUnicodeRep(appendObjPtr);
1299 stringPtr = GET_STRING(appendObjPtr);
1300 }
1301 AppendUnicodeToUnicodeRep(objPtr, stringPtr->unicode,
1302 stringPtr->numChars);
1303 } else {
1304 bytes = TclGetStringFromObj(appendObjPtr, &length);
1305 AppendUtfToUnicodeRep(objPtr, bytes, length);
1306 }
1307 return;
1308 }
1309
1310 /*
1311 * Append to objPtr's UTF string rep. If we know the number of characters
1312 * in both objects before appending, then set the combined number of
1313 * characters in the final (appended-to) object.
1314 */
1315
1316 bytes = TclGetStringFromObj(appendObjPtr, &length);
1317
1318 allOneByteChars = 0;
1319 numChars = stringPtr->numChars;
1320 if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) {
1321 stringPtr = GET_STRING(appendObjPtr);
1322 if ((stringPtr->numChars >= 0) && (stringPtr->numChars == length)) {
1323 numChars += stringPtr->numChars;
1324 allOneByteChars = 1;
1325 }
1326 }
1327
1328 AppendUtfToUtfRep(objPtr, bytes, length);
1329
1330 if (allOneByteChars) {
1331 stringPtr = GET_STRING(objPtr);
1332 stringPtr->numChars = numChars;
1333 }
1334 }
1335
1336 /*
1337 *----------------------------------------------------------------------
1338 *
1339 * AppendUnicodeToUnicodeRep --
1340 *
1341 * This function appends the contents of "unicode" to the Unicode rep of
1342 * "objPtr". objPtr must already have a valid Unicode rep.
1343 *
1344 * Results:
1345 * None.
1346 *
1347 * Side effects:
1348 * objPtr's internal rep is reallocated.
1349 *
1350 *----------------------------------------------------------------------
1351 */
1352
1353 static void
AppendUnicodeToUnicodeRep(Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int appendNumChars)1354 AppendUnicodeToUnicodeRep(
1355 Tcl_Obj *objPtr, /* Points to the object to append to. */
1356 const Tcl_UniChar *unicode, /* String to append. */
1357 int appendNumChars) /* Number of chars of "unicode" to append. */
1358 {
1359 String *stringPtr;
1360 int numChars;
1361
1362 if (appendNumChars < 0) {
1363 appendNumChars = UnicodeLength(unicode);
1364 }
1365 if (appendNumChars == 0) {
1366 return;
1367 }
1368
1369 SetStringFromAny(NULL, objPtr);
1370 stringPtr = GET_STRING(objPtr);
1371
1372 /*
1373 * If not enough space has been allocated for the unicode rep, reallocate
1374 * the internal rep object with additional space. First try to double the
1375 * required allocation; if that fails, try a more modest increase. See the
1376 * "TCL STRING GROWTH ALGORITHM" comment at the top of this file for an
1377 * explanation of this growth algorithm.
1378 */
1379
1380 numChars = stringPtr->numChars + appendNumChars;
1381 stringCheckLimits(numChars);
1382
1383 if (STRING_UALLOC(numChars) > stringPtr->uallocated) {
1384 /*
1385 * Protect against case where unicode points into the existing
1386 * stringPtr->unicode array. Force it to follow any relocations
1387 * due to the reallocs below.
1388 */
1389 int offset = -1;
1390 if (unicode && unicode >= stringPtr->unicode
1391 && unicode <= stringPtr->unicode
1392 + stringPtr->uallocated / sizeof(Tcl_UniChar)) {
1393 offset = unicode - stringPtr->unicode;
1394 }
1395
1396 GrowUnicodeBuffer(objPtr, numChars);
1397 stringPtr = GET_STRING(objPtr);
1398
1399 /* Relocate unicode if needed; see above. */
1400 if (offset >= 0) {
1401 unicode = stringPtr->unicode + offset;
1402 }
1403 }
1404
1405 /*
1406 * Copy the new string onto the end of the old string, then add the
1407 * trailing null.
1408 */
1409
1410 if (unicode) {
1411 memcpy(stringPtr->unicode + stringPtr->numChars, unicode,
1412 appendNumChars * sizeof(Tcl_UniChar));
1413 }
1414 stringPtr->unicode[numChars] = 0;
1415 stringPtr->numChars = numChars;
1416 stringPtr->allocated = 0;
1417
1418 TclInvalidateStringRep(objPtr);
1419 }
1420
1421 /*
1422 *----------------------------------------------------------------------
1423 *
1424 * AppendUnicodeToUtfRep --
1425 *
1426 * This function converts the contents of "unicode" to UTF and appends
1427 * the UTF to the string rep of "objPtr".
1428 *
1429 * Results:
1430 * None.
1431 *
1432 * Side effects:
1433 * objPtr's internal rep is reallocated.
1434 *
1435 *----------------------------------------------------------------------
1436 */
1437
1438 static void
AppendUnicodeToUtfRep(Tcl_Obj * objPtr,const Tcl_UniChar * unicode,int numChars)1439 AppendUnicodeToUtfRep(
1440 Tcl_Obj *objPtr, /* Points to the object to append to. */
1441 const Tcl_UniChar *unicode, /* String to convert to UTF. */
1442 int numChars) /* Number of chars of "unicode" to convert. */
1443 {
1444 Tcl_DString dsPtr;
1445 const char *bytes;
1446
1447 if (numChars < 0) {
1448 numChars = UnicodeLength(unicode);
1449 }
1450 if (numChars == 0) {
1451 return;
1452 }
1453
1454 Tcl_DStringInit(&dsPtr);
1455 bytes = Tcl_UniCharToUtfDString(unicode, numChars, &dsPtr);
1456 AppendUtfToUtfRep(objPtr, bytes, Tcl_DStringLength(&dsPtr));
1457 Tcl_DStringFree(&dsPtr);
1458 }
1459
1460 /*
1461 *----------------------------------------------------------------------
1462 *
1463 * AppendUtfToUnicodeRep --
1464 *
1465 * This function converts the contents of "bytes" to Unicode and appends
1466 * the Unicode to the Unicode rep of "objPtr". objPtr must already have a
1467 * valid Unicode rep.
1468 *
1469 * Results:
1470 * None.
1471 *
1472 * Side effects:
1473 * objPtr's internal rep is reallocated.
1474 *
1475 *----------------------------------------------------------------------
1476 */
1477
1478 static void
AppendUtfToUnicodeRep(Tcl_Obj * objPtr,const char * bytes,int numBytes)1479 AppendUtfToUnicodeRep(
1480 Tcl_Obj *objPtr, /* Points to the object to append to. */
1481 const char *bytes, /* String to convert to Unicode. */
1482 int numBytes) /* Number of bytes of "bytes" to convert. */
1483 {
1484 Tcl_DString dsPtr;
1485 int numChars = numBytes;
1486 Tcl_UniChar *unicode = NULL;
1487
1488 if (numBytes < 0) {
1489 numBytes = (bytes ? strlen(bytes) : 0);
1490 }
1491 if (numBytes == 0) {
1492 return;
1493 }
1494
1495 Tcl_DStringInit(&dsPtr);
1496 if (bytes) {
1497 numChars = Tcl_NumUtfChars(bytes, numBytes);
1498 unicode = (Tcl_UniChar *) Tcl_UtfToUniCharDString(bytes, numBytes,
1499 &dsPtr);
1500 }
1501 AppendUnicodeToUnicodeRep(objPtr, unicode, numChars);
1502 Tcl_DStringFree(&dsPtr);
1503 }
1504
1505 /*
1506 *----------------------------------------------------------------------
1507 *
1508 * AppendUtfToUtfRep --
1509 *
1510 * This function appends "numBytes" bytes of "bytes" to the UTF string
1511 * rep of "objPtr". objPtr must already have a valid String rep.
1512 *
1513 * Results:
1514 * None.
1515 *
1516 * Side effects:
1517 * objPtr's internal rep is reallocated.
1518 *
1519 *----------------------------------------------------------------------
1520 */
1521
1522 static void
AppendUtfToUtfRep(Tcl_Obj * objPtr,const char * bytes,int numBytes)1523 AppendUtfToUtfRep(
1524 Tcl_Obj *objPtr, /* Points to the object to append to. */
1525 const char *bytes, /* String to append. */
1526 int numBytes) /* Number of bytes of "bytes" to append. */
1527 {
1528 String *stringPtr;
1529 int newLength, oldLength;
1530
1531 if (numBytes < 0) {
1532 numBytes = (bytes ? strlen(bytes) : 0);
1533 }
1534 if (numBytes == 0) {
1535 return;
1536 }
1537
1538 /*
1539 * Copy the new string onto the end of the old string, then add the
1540 * trailing null.
1541 */
1542
1543 oldLength = objPtr->length;
1544 newLength = numBytes + oldLength;
1545 if (newLength < 0) {
1546 Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
1547 }
1548
1549 stringPtr = GET_STRING(objPtr);
1550 if (newLength > (int) stringPtr->allocated) {
1551 /*
1552 * Protect against case where unicode points into the existing
1553 * stringPtr->unicode array. Force it to follow any relocations
1554 * due to the reallocs below.
1555 */
1556 int offset = -1;
1557 if (bytes && bytes >= objPtr->bytes
1558 && bytes <= objPtr->bytes + objPtr->length) {
1559 offset = bytes - objPtr->bytes;
1560 }
1561
1562 /*
1563 * There isn't currently enough space in the string representation so
1564 * allocate additional space. First, try to double the length
1565 * required. If that fails, try a more modest allocation. See the "TCL
1566 * STRING GROWTH ALGORITHM" comment at the top of this file for an
1567 * explanation of this growth algorithm.
1568 */
1569
1570 if (Tcl_AttemptSetObjLength(objPtr, 2 * newLength) == 0) {
1571 /*
1572 * Take care computing the amount of modest growth to avoid
1573 * overflow into invalid argument values for Tcl_SetObjLength.
1574 */
1575 unsigned int limit = INT_MAX - newLength;
1576 unsigned int extra = numBytes + TCL_GROWTH_MIN_ALLOC;
1577 int growth = (int) ((extra > limit) ? limit : extra);
1578
1579 Tcl_SetObjLength(objPtr, newLength + growth);
1580 }
1581
1582 /* Relocate bytes if needed; see above. */
1583 if (offset >=0) {
1584 bytes = objPtr->bytes + offset;
1585 }
1586 }
1587
1588 /*
1589 * Invalidate the unicode data.
1590 */
1591
1592 stringPtr->numChars = -1;
1593 stringPtr->hasUnicode = 0;
1594
1595 if (bytes) {
1596 memcpy(objPtr->bytes + oldLength, bytes, (size_t) numBytes);
1597 }
1598 objPtr->bytes[newLength] = 0;
1599 objPtr->length = newLength;
1600 }
1601
1602 /*
1603 *----------------------------------------------------------------------
1604 *
1605 * Tcl_AppendStringsToObjVA --
1606 *
1607 * This function appends one or more null-terminated strings to an
1608 * object.
1609 *
1610 * Results:
1611 * None.
1612 *
1613 * Side effects:
1614 * The contents of all the string arguments are appended to the string
1615 * representation of objPtr.
1616 *
1617 *----------------------------------------------------------------------
1618 */
1619
1620 void
Tcl_AppendStringsToObjVA(Tcl_Obj * objPtr,va_list argList)1621 Tcl_AppendStringsToObjVA(
1622 Tcl_Obj *objPtr, /* Points to the object to append to. */
1623 va_list argList) /* Variable argument list. */
1624 {
1625 #define STATIC_LIST_SIZE 16
1626 String *stringPtr;
1627 int newLength, oldLength, attemptLength;
1628 register char *string, *dst;
1629 char *static_list[STATIC_LIST_SIZE];
1630 char **args = static_list;
1631 int nargs_space = STATIC_LIST_SIZE;
1632 int nargs, i;
1633
1634 if (Tcl_IsShared(objPtr)) {
1635 Tcl_Panic("%s called with shared object", "Tcl_AppendStringsToObj");
1636 }
1637
1638 SetStringFromAny(NULL, objPtr);
1639
1640 /*
1641 * Force the existence of a string rep. so we avoid crashes operating
1642 * on a pure unicode value. [Bug 2597185]
1643 */
1644
1645 (void) Tcl_GetStringFromObj(objPtr, &oldLength);
1646
1647 /*
1648 * Figure out how much space is needed for all the strings, and expand the
1649 * string representation if it isn't big enough. If no bytes would be
1650 * appended, just return. Note that on some platforms (notably OS/390) the
1651 * argList is an array so we need to use memcpy.
1652 */
1653
1654 nargs = 0;
1655 newLength = 0;
1656 while (1) {
1657 string = va_arg(argList, char *);
1658 if (string == NULL) {
1659 break;
1660 }
1661 if (nargs >= nargs_space) {
1662 /*
1663 * Expand the args buffer.
1664 */
1665
1666 nargs_space += STATIC_LIST_SIZE;
1667 if (args == static_list) {
1668 args = (void *) ckalloc(nargs_space * sizeof(char *));
1669 for (i = 0; i < nargs; ++i) {
1670 args[i] = static_list[i];
1671 }
1672 } else {
1673 args = (void *) ckrealloc((void *) args,
1674 nargs_space * sizeof(char *));
1675 }
1676 }
1677 newLength += strlen(string);
1678 args[nargs++] = string;
1679 }
1680 if (newLength == 0) {
1681 goto done;
1682 }
1683
1684 stringPtr = GET_STRING(objPtr);
1685 if (oldLength + newLength > (int) stringPtr->allocated) {
1686 /*
1687 * There isn't currently enough space in the string representation, so
1688 * allocate additional space. If the current string representation
1689 * isn't empty (i.e. it looks like we're doing a series of appends)
1690 * then try to allocate extra space to accomodate future growth: first
1691 * try to double the required memory; if that fails, try a more modest
1692 * allocation. See the "TCL STRING GROWTH ALGORITHM" comment at the
1693 * top of this file for an explanation of this growth algorithm.
1694 * Otherwise, if the current string representation is empty, exactly
1695 * enough memory is allocated.
1696 */
1697
1698 if (oldLength == 0) {
1699 Tcl_SetObjLength(objPtr, newLength);
1700 } else {
1701 attemptLength = 2 * (oldLength + newLength);
1702 if (Tcl_AttemptSetObjLength(objPtr, attemptLength) == 0) {
1703 attemptLength = oldLength + (2 * newLength) +
1704 TCL_GROWTH_MIN_ALLOC;
1705 Tcl_SetObjLength(objPtr, attemptLength);
1706 }
1707 }
1708 }
1709
1710 /*
1711 * Make a second pass through the arguments, appending all the strings to
1712 * the object.
1713 */
1714
1715 dst = objPtr->bytes + oldLength;
1716 for (i = 0; i < nargs; ++i) {
1717 string = args[i];
1718 if (string == NULL) {
1719 break;
1720 }
1721 while (*string != 0) {
1722 *dst = *string;
1723 dst++;
1724 string++;
1725 }
1726 }
1727
1728 /*
1729 * Add a null byte to terminate the string. However, be careful: it's
1730 * possible that the object is totally empty (if it was empty originally
1731 * and there was nothing to append). In this case dst is NULL; just leave
1732 * everything alone.
1733 */
1734
1735 if (dst != NULL) {
1736 *dst = 0;
1737 }
1738 objPtr->length = oldLength + newLength;
1739
1740 done:
1741 /*
1742 * If we had to allocate a buffer from the heap, free it now.
1743 */
1744
1745 if (args != static_list) {
1746 ckfree((void *) args);
1747 }
1748 #undef STATIC_LIST_SIZE
1749 }
1750
1751 /*
1752 *----------------------------------------------------------------------
1753 *
1754 * Tcl_AppendStringsToObj --
1755 *
1756 * This function appends one or more null-terminated strings to an
1757 * object.
1758 *
1759 * Results:
1760 * None.
1761 *
1762 * Side effects:
1763 * The contents of all the string arguments are appended to the string
1764 * representation of objPtr.
1765 *
1766 *----------------------------------------------------------------------
1767 */
1768
1769 void
Tcl_AppendStringsToObj(Tcl_Obj * objPtr,...)1770 Tcl_AppendStringsToObj(
1771 Tcl_Obj *objPtr,
1772 ...)
1773 {
1774 va_list argList;
1775
1776 va_start(argList, objPtr);
1777 Tcl_AppendStringsToObjVA(objPtr, argList);
1778 va_end(argList);
1779 }
1780
1781 /*
1782 *----------------------------------------------------------------------
1783 *
1784 * Tcl_AppendFormatToObj --
1785 *
1786 * This function appends a list of Tcl_Obj's to a Tcl_Obj according to
1787 * the formatting instructions embedded in the format string. The
1788 * formatting instructions are inspired by sprintf(). Returns TCL_OK when
1789 * successful. If there's an error in the arguments, TCL_ERROR is
1790 * returned, and an error message is written to the interp, if non-NULL.
1791 *
1792 * Results:
1793 * A standard Tcl result.
1794 *
1795 * Side effects:
1796 * None.
1797 *
1798 *----------------------------------------------------------------------
1799 */
1800
1801 int
Tcl_AppendFormatToObj(Tcl_Interp * interp,Tcl_Obj * appendObj,const char * format,int objc,Tcl_Obj * const objv[])1802 Tcl_AppendFormatToObj(
1803 Tcl_Interp *interp,
1804 Tcl_Obj *appendObj,
1805 const char *format,
1806 int objc,
1807 Tcl_Obj *const objv[])
1808 {
1809 const char *span = format, *msg;
1810 int numBytes = 0, objIndex = 0, gotXpg = 0, gotSequential = 0;
1811 int originalLength, limit;
1812 static const char *mixedXPG =
1813 "cannot mix \"%\" and \"%n$\" conversion specifiers";
1814 static const char *badIndex[2] = {
1815 "not enough arguments for all format specifiers",
1816 "\"%n$\" argument index out of range"
1817 };
1818 static const char *overflow = "max size for a Tcl value exceeded";
1819
1820 if (Tcl_IsShared(appendObj)) {
1821 Tcl_Panic("%s called with shared object", "Tcl_AppendFormatToObj");
1822 }
1823 TclGetStringFromObj(appendObj, &originalLength);
1824 limit = INT_MAX - originalLength;
1825
1826 /*
1827 * Format string is NUL-terminated.
1828 */
1829
1830 while (*format != '\0') {
1831 char *end;
1832 int gotMinus, gotHash, gotZero, gotSpace, gotPlus, sawFlag;
1833 int width, gotPrecision, precision, useShort, useWide, useBig;
1834 int newXpg, numChars, allocSegment = 0, segmentLimit, segmentNumBytes;
1835 Tcl_Obj *segment;
1836 Tcl_UniChar ch;
1837 int step = Tcl_UtfToUniChar(format, &ch);
1838
1839 format += step;
1840 if (ch != '%') {
1841 numBytes += step;
1842 continue;
1843 }
1844 if (numBytes) {
1845 if (numBytes > limit) {
1846 msg = overflow;
1847 goto errorMsg;
1848 }
1849 Tcl_AppendToObj(appendObj, span, numBytes);
1850 limit -= numBytes;
1851 numBytes = 0;
1852 }
1853
1854 /*
1855 * Saw a % : process the format specifier.
1856 *
1857 * Step 0. Handle special case of escaped format marker (i.e., %%).
1858 */
1859
1860 step = Tcl_UtfToUniChar(format, &ch);
1861 if (ch == '%') {
1862 span = format;
1863 numBytes = step;
1864 format += step;
1865 continue;
1866 }
1867
1868 /*
1869 * Step 1. XPG3 position specifier
1870 */
1871
1872 newXpg = 0;
1873 if (isdigit(UCHAR(ch))) {
1874 int position = strtoul(format, &end, 10);
1875 if (*end == '$') {
1876 newXpg = 1;
1877 objIndex = position - 1;
1878 format = end + 1;
1879 step = Tcl_UtfToUniChar(format, &ch);
1880 }
1881 }
1882 if (newXpg) {
1883 if (gotSequential) {
1884 msg = mixedXPG;
1885 goto errorMsg;
1886 }
1887 gotXpg = 1;
1888 } else {
1889 if (gotXpg) {
1890 msg = mixedXPG;
1891 goto errorMsg;
1892 }
1893 gotSequential = 1;
1894 }
1895 if ((objIndex < 0) || (objIndex >= objc)) {
1896 msg = badIndex[gotXpg];
1897 goto errorMsg;
1898 }
1899
1900 /*
1901 * Step 2. Set of flags.
1902 */
1903
1904 gotMinus = gotHash = gotZero = gotSpace = gotPlus = 0;
1905 sawFlag = 1;
1906 do {
1907 switch (ch) {
1908 case '-':
1909 gotMinus = 1;
1910 break;
1911 case '#':
1912 gotHash = 1;
1913 break;
1914 case '0':
1915 gotZero = 1;
1916 break;
1917 case ' ':
1918 gotSpace = 1;
1919 break;
1920 case '+':
1921 gotPlus = 1;
1922 break;
1923 default:
1924 sawFlag = 0;
1925 }
1926 if (sawFlag) {
1927 format += step;
1928 step = Tcl_UtfToUniChar(format, &ch);
1929 }
1930 } while (sawFlag);
1931
1932 /*
1933 * Step 3. Minimum field width.
1934 */
1935
1936 width = 0;
1937 if (isdigit(UCHAR(ch))) {
1938 width = strtoul(format, &end, 10);
1939 format = end;
1940 step = Tcl_UtfToUniChar(format, &ch);
1941 } else if (ch == '*') {
1942 if (objIndex >= objc - 1) {
1943 msg = badIndex[gotXpg];
1944 goto errorMsg;
1945 }
1946 if (TclGetIntFromObj(interp, objv[objIndex], &width) != TCL_OK) {
1947 goto error;
1948 }
1949 if (width < 0) {
1950 width = -width;
1951 gotMinus = 1;
1952 }
1953 objIndex++;
1954 format += step;
1955 step = Tcl_UtfToUniChar(format, &ch);
1956 }
1957 if (width > limit) {
1958 msg = overflow;
1959 goto errorMsg;
1960 }
1961
1962 /*
1963 * Step 4. Precision.
1964 */
1965
1966 gotPrecision = precision = 0;
1967 if (ch == '.') {
1968 gotPrecision = 1;
1969 format += step;
1970 step = Tcl_UtfToUniChar(format, &ch);
1971 }
1972 if (isdigit(UCHAR(ch))) {
1973 precision = strtoul(format, &end, 10);
1974 format = end;
1975 step = Tcl_UtfToUniChar(format, &ch);
1976 } else if (ch == '*') {
1977 if (objIndex >= objc - 1) {
1978 msg = badIndex[gotXpg];
1979 goto errorMsg;
1980 }
1981 if (TclGetIntFromObj(interp, objv[objIndex], &precision)
1982 != TCL_OK) {
1983 goto error;
1984 }
1985
1986 /*
1987 * TODO: Check this truncation logic.
1988 */
1989
1990 if (precision < 0) {
1991 precision = 0;
1992 }
1993 objIndex++;
1994 format += step;
1995 step = Tcl_UtfToUniChar(format, &ch);
1996 }
1997
1998 /*
1999 * Step 5. Length modifier.
2000 */
2001
2002 useShort = useWide = useBig = 0;
2003 if (ch == 'h') {
2004 useShort = 1;
2005 format += step;
2006 step = Tcl_UtfToUniChar(format, &ch);
2007 } else if (ch == 'l') {
2008 format += step;
2009 step = Tcl_UtfToUniChar(format, &ch);
2010 if (ch == 'l') {
2011 useBig = 1;
2012 format += step;
2013 step = Tcl_UtfToUniChar(format, &ch);
2014 } else {
2015 #ifndef TCL_WIDE_INT_IS_LONG
2016 useWide = 1;
2017 #endif
2018 }
2019 }
2020
2021 format += step;
2022 span = format;
2023
2024 /*
2025 * Step 6. The actual conversion character.
2026 */
2027
2028 segment = objv[objIndex];
2029 numChars = -1;
2030 if (ch == 'i') {
2031 ch = 'd';
2032 }
2033 switch (ch) {
2034 case '\0':
2035 msg = "format string ended in middle of field specifier";
2036 goto errorMsg;
2037 case 's':
2038 if (gotPrecision) {
2039 numChars = Tcl_GetCharLength(segment);
2040 if (precision < numChars) {
2041 segment = Tcl_GetRange(segment, 0, precision - 1);
2042 numChars = precision;
2043 Tcl_IncrRefCount(segment);
2044 allocSegment = 1;
2045 }
2046 }
2047 break;
2048 case 'c': {
2049 char buf[TCL_UTF_MAX];
2050 int code, length;
2051
2052 if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) {
2053 goto error;
2054 }
2055 length = Tcl_UniCharToUtf(code, buf);
2056 segment = Tcl_NewStringObj(buf, length);
2057 Tcl_IncrRefCount(segment);
2058 allocSegment = 1;
2059 break;
2060 }
2061
2062 case 'u':
2063 if (useBig) {
2064 msg = "unsigned bignum format is invalid";
2065 goto errorMsg;
2066 }
2067 case 'd':
2068 case 'o':
2069 case 'x':
2070 case 'X': {
2071 short int s = 0; /* Silence compiler warning; only defined and
2072 * used when useShort is true. */
2073 long l;
2074 Tcl_WideInt w;
2075 mp_int big;
2076 int toAppend, isNegative = 0;
2077
2078 if (useBig) {
2079 if (Tcl_GetBignumFromObj(interp, segment, &big) != TCL_OK) {
2080 goto error;
2081 }
2082 isNegative = (mp_cmp_d(&big, 0) == MP_LT);
2083 } else if (useWide) {
2084 if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) {
2085 Tcl_Obj *objPtr;
2086
2087 if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) {
2088 goto error;
2089 }
2090 mp_mod_2d(&big, (int) CHAR_BIT*sizeof(Tcl_WideInt), &big);
2091 objPtr = Tcl_NewBignumObj(&big);
2092 Tcl_IncrRefCount(objPtr);
2093 Tcl_GetWideIntFromObj(NULL, objPtr, &w);
2094 Tcl_DecrRefCount(objPtr);
2095 }
2096 isNegative = (w < (Tcl_WideInt)0);
2097 } else if (TclGetLongFromObj(NULL, segment, &l) != TCL_OK) {
2098 if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) {
2099 Tcl_Obj *objPtr;
2100
2101 if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) {
2102 goto error;
2103 }
2104 mp_mod_2d(&big, (int) CHAR_BIT * sizeof(long), &big);
2105 objPtr = Tcl_NewBignumObj(&big);
2106 Tcl_IncrRefCount(objPtr);
2107 TclGetLongFromObj(NULL, objPtr, &l);
2108 Tcl_DecrRefCount(objPtr);
2109 } else {
2110 l = Tcl_WideAsLong(w);
2111 }
2112 if (useShort) {
2113 s = (short int) l;
2114 isNegative = (s < (short int)0);
2115 } else {
2116 isNegative = (l < (long)0);
2117 }
2118 } else if (useShort) {
2119 s = (short int) l;
2120 isNegative = (s < (short int)0);
2121 } else {
2122 isNegative = (l < (long)0);
2123 }
2124
2125 segment = Tcl_NewObj();
2126 allocSegment = 1;
2127 segmentLimit = INT_MAX;
2128 Tcl_IncrRefCount(segment);
2129
2130 if ((isNegative || gotPlus || gotSpace) && (useBig || (ch == 'd'))) {
2131 Tcl_AppendToObj(segment, (isNegative ? "-" : gotPlus ? "+" : " "), 1);
2132 segmentLimit -= 1;
2133 }
2134
2135 if (gotHash) {
2136 switch (ch) {
2137 case 'o':
2138 Tcl_AppendToObj(segment, "0", 1);
2139 segmentLimit -= 1;
2140 precision--;
2141 break;
2142 case 'x':
2143 case 'X':
2144 Tcl_AppendToObj(segment, "0x", 2);
2145 segmentLimit -= 2;
2146 break;
2147 }
2148 }
2149
2150 switch (ch) {
2151 case 'd': {
2152 int length;
2153 Tcl_Obj *pure;
2154 const char *bytes;
2155
2156 if (useShort) {
2157 pure = Tcl_NewIntObj((int)(s));
2158 } else if (useWide) {
2159 pure = Tcl_NewWideIntObj(w);
2160 } else if (useBig) {
2161 pure = Tcl_NewBignumObj(&big);
2162 } else {
2163 pure = Tcl_NewLongObj(l);
2164 }
2165 Tcl_IncrRefCount(pure);
2166 bytes = TclGetStringFromObj(pure, &length);
2167
2168 /*
2169 * Already did the sign above.
2170 */
2171
2172 if (*bytes == '-') {
2173 length--;
2174 bytes++;
2175 }
2176 toAppend = length;
2177
2178 /*
2179 * Canonical decimal string reps for integers are composed
2180 * entirely of one-byte encoded characters, so "length" is the
2181 * number of chars.
2182 */
2183
2184 if (gotPrecision) {
2185 if (length < precision) {
2186 segmentLimit -= (precision - length);
2187 }
2188 while (length < precision) {
2189 Tcl_AppendToObj(segment, "0", 1);
2190 length++;
2191 }
2192 gotZero = 0;
2193 }
2194 if (gotZero) {
2195 length += Tcl_GetCharLength(segment);
2196 if (length < width) {
2197 segmentLimit -= (width - length);
2198 }
2199 while (length < width) {
2200 Tcl_AppendToObj(segment, "0", 1);
2201 length++;
2202 }
2203 }
2204 if (toAppend > segmentLimit) {
2205 msg = overflow;
2206 goto errorMsg;
2207 }
2208 Tcl_AppendToObj(segment, bytes, toAppend);
2209 Tcl_DecrRefCount(pure);
2210 break;
2211 }
2212
2213 case 'u':
2214 case 'o':
2215 case 'x':
2216 case 'X': {
2217 Tcl_WideUInt bits = (Tcl_WideUInt)0;
2218 Tcl_WideInt numDigits = (Tcl_WideInt)0;
2219 int length, numBits = 4, base = 16;
2220 int index = 0, shift = 0;
2221 Tcl_Obj *pure;
2222 char *bytes;
2223
2224 if (ch == 'u') {
2225 base = 10;
2226 }
2227 if (ch == 'o') {
2228 base = 8;
2229 numBits = 3;
2230 }
2231 if (useShort) {
2232 unsigned short int us = (unsigned short int) s;
2233
2234 bits = (Tcl_WideUInt) us;
2235 while (us) {
2236 numDigits++;
2237 us /= base;
2238 }
2239 } else if (useWide) {
2240 Tcl_WideUInt uw = (Tcl_WideUInt) w;
2241
2242 bits = uw;
2243 while (uw) {
2244 numDigits++;
2245 uw /= base;
2246 }
2247 } else if (useBig && big.used) {
2248 int leftover = (big.used * DIGIT_BIT) % numBits;
2249 mp_digit mask = (~(mp_digit)0) << (DIGIT_BIT-leftover);
2250
2251 numDigits = 1 +
2252 (((Tcl_WideInt)big.used * DIGIT_BIT) / numBits);
2253 while ((mask & big.dp[big.used-1]) == 0) {
2254 numDigits--;
2255 mask >>= numBits;
2256 }
2257 if (numDigits > INT_MAX) {
2258 msg = overflow;
2259 goto errorMsg;
2260 }
2261 } else if (!useBig) {
2262 unsigned long int ul = (unsigned long int) l;
2263
2264 bits = (Tcl_WideUInt) ul;
2265 while (ul) {
2266 numDigits++;
2267 ul /= base;
2268 }
2269 }
2270
2271 /*
2272 * Need to be sure zero becomes "0", not "".
2273 */
2274
2275 if ((numDigits == 0) && !((ch == 'o') && gotHash)) {
2276 numDigits = 1;
2277 }
2278 pure = Tcl_NewObj();
2279 Tcl_SetObjLength(pure, (int)numDigits);
2280 bytes = TclGetString(pure);
2281 toAppend = length = (int)numDigits;
2282 while (numDigits--) {
2283 int digitOffset;
2284
2285 if (useBig && big.used) {
2286 if (index < big.used && (size_t) shift <
2287 CHAR_BIT*sizeof(Tcl_WideUInt) - DIGIT_BIT) {
2288 bits |= (((Tcl_WideUInt)big.dp[index++]) <<shift);
2289 shift += DIGIT_BIT;
2290 }
2291 shift -= numBits;
2292 }
2293 digitOffset = (int) (bits % base);
2294 if (digitOffset > 9) {
2295 bytes[numDigits] = 'a' + digitOffset - 10;
2296 } else {
2297 bytes[numDigits] = '0' + digitOffset;
2298 }
2299 bits /= base;
2300 }
2301 if (useBig) {
2302 mp_clear(&big);
2303 }
2304 if (gotPrecision) {
2305 if (length < precision) {
2306 segmentLimit -= (precision - length);
2307 }
2308 while (length < precision) {
2309 Tcl_AppendToObj(segment, "0", 1);
2310 length++;
2311 }
2312 gotZero = 0;
2313 }
2314 if (gotZero) {
2315 length += Tcl_GetCharLength(segment);
2316 if (length < width) {
2317 segmentLimit -= (width - length);
2318 }
2319 while (length < width) {
2320 Tcl_AppendToObj(segment, "0", 1);
2321 length++;
2322 }
2323 }
2324 if (toAppend > segmentLimit) {
2325 msg = overflow;
2326 goto errorMsg;
2327 }
2328 Tcl_AppendObjToObj(segment, pure);
2329 Tcl_DecrRefCount(pure);
2330 break;
2331 }
2332
2333 }
2334 break;
2335 }
2336
2337 case 'e':
2338 case 'E':
2339 case 'f':
2340 case 'g':
2341 case 'G': {
2342 #define MAX_FLOAT_SIZE 320
2343 char spec[2*TCL_INTEGER_SPACE + 9], *p = spec;
2344 double d;
2345 int length = MAX_FLOAT_SIZE;
2346 char *bytes;
2347
2348 if (Tcl_GetDoubleFromObj(interp, segment, &d) != TCL_OK) {
2349 /* TODO: Figure out ACCEPT_NAN here */
2350 goto error;
2351 }
2352 *p++ = '%';
2353 if (gotMinus) {
2354 *p++ = '-';
2355 }
2356 if (gotHash) {
2357 *p++ = '#';
2358 }
2359 if (gotZero) {
2360 *p++ = '0';
2361 }
2362 if (gotSpace) {
2363 *p++ = ' ';
2364 }
2365 if (gotPlus) {
2366 *p++ = '+';
2367 }
2368 if (width) {
2369 p += sprintf(p, "%d", width);
2370 if (width > length) {
2371 length = width;
2372 }
2373 }
2374 if (gotPrecision) {
2375 *p++ = '.';
2376 p += sprintf(p, "%d", precision);
2377 if (precision > INT_MAX - length) {
2378 msg=overflow;
2379 goto errorMsg;
2380 }
2381 length += precision;
2382 }
2383
2384 /*
2385 * Don't pass length modifiers!
2386 */
2387
2388 *p++ = (char) ch;
2389 *p = '\0';
2390
2391 segment = Tcl_NewObj();
2392 allocSegment = 1;
2393 if (!Tcl_AttemptSetObjLength(segment, length)) {
2394 msg = overflow;
2395 goto errorMsg;
2396 }
2397 bytes = TclGetString(segment);
2398 if (!Tcl_AttemptSetObjLength(segment, sprintf(bytes, spec, d))) {
2399 msg = overflow;
2400 goto errorMsg;
2401 }
2402 break;
2403 }
2404 default:
2405 if (interp != NULL) {
2406 Tcl_SetObjResult(interp,
2407 Tcl_ObjPrintf("bad field specifier \"%c\"", ch));
2408 }
2409 goto error;
2410 }
2411
2412 switch (ch) {
2413 case 'E':
2414 case 'G':
2415 case 'X': {
2416 Tcl_SetObjLength(segment, Tcl_UtfToUpper(TclGetString(segment)));
2417 }
2418 }
2419
2420 if (width > 0) {
2421 if (numChars < 0) {
2422 numChars = Tcl_GetCharLength(segment);
2423 }
2424 if (!gotMinus) {
2425 if (numChars < width) {
2426 limit -= (width - numChars);
2427 }
2428 while (numChars < width) {
2429 Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1);
2430 numChars++;
2431 }
2432 }
2433 }
2434
2435 Tcl_GetStringFromObj(segment, &segmentNumBytes);
2436 if (segmentNumBytes > limit) {
2437 if (allocSegment) {
2438 Tcl_DecrRefCount(segment);
2439 }
2440 msg = overflow;
2441 goto errorMsg;
2442 }
2443 Tcl_AppendObjToObj(appendObj, segment);
2444 limit -= segmentNumBytes;
2445 if (allocSegment) {
2446 Tcl_DecrRefCount(segment);
2447 }
2448 if (width > 0) {
2449 if (numChars < width) {
2450 limit -= (width - numChars);
2451 }
2452 while (numChars < width) {
2453 Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1);
2454 numChars++;
2455 }
2456 }
2457
2458 objIndex += gotSequential;
2459 }
2460 if (numBytes) {
2461 if (numBytes > limit) {
2462 msg = overflow;
2463 goto errorMsg;
2464 }
2465 Tcl_AppendToObj(appendObj, span, numBytes);
2466 limit -= numBytes;
2467 numBytes = 0;
2468 }
2469
2470 return TCL_OK;
2471
2472 errorMsg:
2473 if (interp != NULL) {
2474 Tcl_SetObjResult(interp, Tcl_NewStringObj(msg, -1));
2475 }
2476 error:
2477 Tcl_SetObjLength(appendObj, originalLength);
2478 return TCL_ERROR;
2479 }
2480
2481 /*
2482 *---------------------------------------------------------------------------
2483 *
2484 * Tcl_Format--
2485 *
2486 * Results:
2487 * A refcount zero Tcl_Obj.
2488 *
2489 * Side effects:
2490 * None.
2491 *
2492 *---------------------------------------------------------------------------
2493 */
2494
2495 Tcl_Obj *
Tcl_Format(Tcl_Interp * interp,const char * format,int objc,Tcl_Obj * const objv[])2496 Tcl_Format(
2497 Tcl_Interp *interp,
2498 const char *format,
2499 int objc,
2500 Tcl_Obj *const objv[])
2501 {
2502 int result;
2503 Tcl_Obj *objPtr = Tcl_NewObj();
2504 result = Tcl_AppendFormatToObj(interp, objPtr, format, objc, objv);
2505 if (result != TCL_OK) {
2506 Tcl_DecrRefCount(objPtr);
2507 return NULL;
2508 }
2509 return objPtr;
2510 }
2511
2512 /*
2513 *---------------------------------------------------------------------------
2514 *
2515 * AppendPrintfToObjVA --
2516 *
2517 * Results:
2518 *
2519 * Side effects:
2520 *
2521 *---------------------------------------------------------------------------
2522 */
2523
2524 static void
AppendPrintfToObjVA(Tcl_Obj * objPtr,const char * format,va_list argList)2525 AppendPrintfToObjVA(
2526 Tcl_Obj *objPtr,
2527 const char *format,
2528 va_list argList)
2529 {
2530 int code, objc;
2531 Tcl_Obj **objv, *list = Tcl_NewObj();
2532 const char *p;
2533 char *end;
2534
2535 p = format;
2536 Tcl_IncrRefCount(list);
2537 while (*p != '\0') {
2538 int size = 0, seekingConversion = 1, gotPrecision = 0;
2539 int lastNum = -1;
2540
2541 if (*p++ != '%') {
2542 continue;
2543 }
2544 if (*p == '%') {
2545 p++;
2546 continue;
2547 }
2548 do {
2549 switch (*p) {
2550
2551 case '\0':
2552 seekingConversion = 0;
2553 break;
2554 case 's': {
2555 const char *q, *end, *bytes = va_arg(argList, char *);
2556 seekingConversion = 0;
2557
2558 /*
2559 * The buffer to copy characters from starts at bytes and ends
2560 * at either the first NUL byte, or after lastNum bytes, when
2561 * caller has indicated a limit.
2562 */
2563
2564 end = bytes;
2565 while ((!gotPrecision || lastNum--) && (*end != '\0')) {
2566 end++;
2567 }
2568
2569 /*
2570 * Within that buffer, we trim both ends if needed so that we
2571 * copy only whole characters, and avoid copying any partial
2572 * multi-byte characters.
2573 */
2574
2575 q = Tcl_UtfPrev(end, bytes);
2576 if (!Tcl_UtfCharComplete(q, (int)(end - q))) {
2577 end = q;
2578 }
2579
2580 q = bytes + TCL_UTF_MAX;
2581 while ((bytes < end) && (bytes < q)
2582 && ((*bytes & 0xC0) == 0x80)) {
2583 bytes++;
2584 }
2585
2586 Tcl_ListObjAppendElement(NULL, list,
2587 Tcl_NewStringObj(bytes , (int)(end - bytes)));
2588
2589 break;
2590 }
2591 case 'c':
2592 case 'i':
2593 case 'u':
2594 case 'd':
2595 case 'o':
2596 case 'x':
2597 case 'X':
2598 seekingConversion = 0;
2599 switch (size) {
2600 case -1:
2601 case 0:
2602 Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj(
2603 (long int)va_arg(argList, int)));
2604 break;
2605 case 1:
2606 Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj(
2607 va_arg(argList, long int)));
2608 break;
2609 }
2610 break;
2611 case 'e':
2612 case 'E':
2613 case 'f':
2614 case 'g':
2615 case 'G':
2616 Tcl_ListObjAppendElement(NULL, list, Tcl_NewDoubleObj(
2617 va_arg(argList, double)));
2618 seekingConversion = 0;
2619 break;
2620 case '*':
2621 lastNum = (int)va_arg(argList, int);
2622 Tcl_ListObjAppendElement(NULL, list, Tcl_NewIntObj(lastNum));
2623 p++;
2624 break;
2625 case '0': case '1': case '2': case '3': case '4':
2626 case '5': case '6': case '7': case '8': case '9':
2627 lastNum = (int) strtoul(p, &end, 10);
2628 p = end;
2629 break;
2630 case '.':
2631 gotPrecision = 1;
2632 p++;
2633 break;
2634 /* TODO: support for wide (and bignum?) arguments */
2635 case 'l':
2636 size = 1;
2637 p++;
2638 break;
2639 case 'h':
2640 size = -1;
2641 default:
2642 p++;
2643 }
2644 } while (seekingConversion);
2645 }
2646 TclListObjGetElements(NULL, list, &objc, &objv);
2647 code = Tcl_AppendFormatToObj(NULL, objPtr, format, objc, objv);
2648 if (code != TCL_OK) {
2649 Tcl_AppendPrintfToObj(objPtr,
2650 "Unable to format \"%s\" with supplied arguments: %s",
2651 format, Tcl_GetString(list));
2652 }
2653 Tcl_DecrRefCount(list);
2654 }
2655
2656 /*
2657 *---------------------------------------------------------------------------
2658 *
2659 * Tcl_AppendPrintfToObj --
2660 *
2661 * Results:
2662 * A standard Tcl result.
2663 *
2664 * Side effects:
2665 * None.
2666 *
2667 *---------------------------------------------------------------------------
2668 */
2669
2670 void
Tcl_AppendPrintfToObj(Tcl_Obj * objPtr,const char * format,...)2671 Tcl_AppendPrintfToObj(
2672 Tcl_Obj *objPtr,
2673 const char *format,
2674 ...)
2675 {
2676 va_list argList;
2677
2678 va_start(argList, format);
2679 AppendPrintfToObjVA(objPtr, format, argList);
2680 va_end(argList);
2681 }
2682
2683 /*
2684 *---------------------------------------------------------------------------
2685 *
2686 * Tcl_ObjPrintf --
2687 *
2688 * Results:
2689 * A refcount zero Tcl_Obj.
2690 *
2691 * Side effects:
2692 * None.
2693 *
2694 *---------------------------------------------------------------------------
2695 */
2696
2697 Tcl_Obj *
Tcl_ObjPrintf(const char * format,...)2698 Tcl_ObjPrintf(
2699 const char *format,
2700 ...)
2701 {
2702 va_list argList;
2703 Tcl_Obj *objPtr = Tcl_NewObj();
2704
2705 va_start(argList, format);
2706 AppendPrintfToObjVA(objPtr, format, argList);
2707 va_end(argList);
2708 return objPtr;
2709 }
2710
2711 /*
2712 *---------------------------------------------------------------------------
2713 *
2714 * TclGetStringStorage --
2715 *
2716 * Returns the string storage space of a Tcl_Obj.
2717 *
2718 * Results:
2719 * The pointer value objPtr->bytes is returned and the number of bytes
2720 * allocated there is written to *sizePtr (if known).
2721 *
2722 * Side effects:
2723 * May set objPtr->bytes.
2724 *
2725 *---------------------------------------------------------------------------
2726 */
2727
2728 char *
TclGetStringStorage(Tcl_Obj * objPtr,unsigned int * sizePtr)2729 TclGetStringStorage(
2730 Tcl_Obj *objPtr,
2731 unsigned int *sizePtr)
2732 {
2733 String *stringPtr;
2734
2735 if (objPtr->typePtr != &tclStringType || objPtr->bytes == NULL) {
2736 return TclGetStringFromObj(objPtr, (int *)sizePtr);
2737 }
2738
2739 stringPtr = GET_STRING(objPtr);
2740 *sizePtr = stringPtr->allocated;
2741 return objPtr->bytes;
2742 }
2743 /*
2744 *---------------------------------------------------------------------------
2745 *
2746 * TclStringObjReverse --
2747 *
2748 * Implements the [string reverse] operation.
2749 *
2750 * Results:
2751 * An unshared Tcl value which is the [string reverse] of the argument
2752 * supplied. When sharing rules permit, the returned value might be
2753 * the argument with modifications done in place.
2754 *
2755 * Side effects:
2756 * May allocate a new Tcl_Obj.
2757 *
2758 *---------------------------------------------------------------------------
2759 */
2760
2761 Tcl_Obj *
TclStringObjReverse(Tcl_Obj * objPtr)2762 TclStringObjReverse(
2763 Tcl_Obj *objPtr)
2764 {
2765 String *stringPtr;
2766 int numChars = Tcl_GetCharLength(objPtr);
2767 int i = 0, lastCharIdx = numChars - 1;
2768 char *bytes;
2769
2770 if (numChars <= 1) {
2771 return objPtr;
2772 }
2773
2774 stringPtr = GET_STRING(objPtr);
2775 if (stringPtr->hasUnicode) {
2776 Tcl_UniChar *source = stringPtr->unicode;
2777
2778 if (Tcl_IsShared(objPtr)) {
2779 Tcl_UniChar *dest, ch = 0;
2780
2781 /*
2782 * Create a non-empty, pure unicode value, so we can coax
2783 * Tcl_SetObjLength into growing the unicode rep buffer.
2784 */
2785
2786 Tcl_Obj *resultPtr = Tcl_NewUnicodeObj(&ch, 1);
2787 Tcl_SetObjLength(resultPtr, numChars);
2788 dest = Tcl_GetUnicode(resultPtr);
2789
2790 while (i < numChars) {
2791 dest[i++] = source[lastCharIdx--];
2792 }
2793 return resultPtr;
2794 }
2795
2796 while (i < lastCharIdx) {
2797 Tcl_UniChar tmp = source[lastCharIdx];
2798 source[lastCharIdx--] = source[i];
2799 source[i++] = tmp;
2800 }
2801 TclInvalidateStringRep(objPtr);
2802 stringPtr->allocated = 0;
2803 return objPtr;
2804 }
2805
2806 bytes = TclGetString(objPtr);
2807 if (Tcl_IsShared(objPtr)) {
2808 char *dest;
2809 Tcl_Obj *resultPtr = Tcl_NewObj();
2810 Tcl_SetObjLength(resultPtr, numChars);
2811 dest = TclGetString(resultPtr);
2812 while (i < numChars) {
2813 dest[i++] = bytes[lastCharIdx--];
2814 }
2815 return resultPtr;
2816 }
2817
2818 while (i < lastCharIdx) {
2819 char tmp = bytes[lastCharIdx];
2820 bytes[lastCharIdx--] = bytes[i];
2821 bytes[i++] = tmp;
2822 }
2823 return objPtr;
2824 }
2825
2826 /*
2827 *---------------------------------------------------------------------------
2828 *
2829 * FillUnicodeRep --
2830 *
2831 * Populate the Unicode internal rep with the Unicode form of its string
2832 * rep. The object must alread have a "String" internal rep.
2833 *
2834 * Results:
2835 * None.
2836 *
2837 * Side effects:
2838 * Reallocates the String internal rep.
2839 *
2840 *---------------------------------------------------------------------------
2841 */
2842
2843 static void
FillUnicodeRep(Tcl_Obj * objPtr)2844 FillUnicodeRep(
2845 Tcl_Obj *objPtr) /* The object in which to fill the unicode
2846 * rep. */
2847 {
2848 String *stringPtr;
2849 size_t uallocated;
2850 char *srcEnd, *src = objPtr->bytes;
2851 Tcl_UniChar *dst;
2852
2853 stringPtr = GET_STRING(objPtr);
2854 if (stringPtr->numChars == -1) {
2855 stringPtr->numChars = Tcl_NumUtfChars(src, objPtr->length);
2856 }
2857 stringPtr->hasUnicode = (stringPtr->numChars > 0);
2858
2859 stringCheckLimits(stringPtr->numChars);
2860 uallocated = STRING_UALLOC(stringPtr->numChars);
2861 if (uallocated > stringPtr->uallocated) {
2862 GrowUnicodeBuffer(objPtr, stringPtr->numChars);
2863 stringPtr = GET_STRING(objPtr);
2864 }
2865
2866 /*
2867 * Convert src to Unicode and store the coverted data in "unicode".
2868 */
2869
2870 srcEnd = src + objPtr->length;
2871 for (dst = stringPtr->unicode; src < srcEnd; dst++) {
2872 src += TclUtfToUniChar(src, dst);
2873 }
2874 *dst = 0;
2875
2876 SET_STRING(objPtr, stringPtr);
2877 }
2878
2879 /*
2880 *----------------------------------------------------------------------
2881 *
2882 * DupStringInternalRep --
2883 *
2884 * Initialize the internal representation of a new Tcl_Obj to a copy of
2885 * the internal representation of an existing string object.
2886 *
2887 * Results:
2888 * None.
2889 *
2890 * Side effects:
2891 * copyPtr's internal rep is set to a copy of srcPtr's internal
2892 * representation.
2893 *
2894 *----------------------------------------------------------------------
2895 */
2896
2897 static void
DupStringInternalRep(register Tcl_Obj * srcPtr,register Tcl_Obj * copyPtr)2898 DupStringInternalRep(
2899 register Tcl_Obj *srcPtr, /* Object with internal rep to copy. Must have
2900 * an internal rep of type "String". */
2901 register Tcl_Obj *copyPtr) /* Object with internal rep to set. Must not
2902 * currently have an internal rep.*/
2903 {
2904 String *srcStringPtr = GET_STRING(srcPtr);
2905 String *copyStringPtr = NULL;
2906
2907 /*
2908 * If the src obj is a string of 1-byte Utf chars, then copy the string
2909 * rep of the source object and create an "empty" Unicode internal rep for
2910 * the new object. Otherwise, copy Unicode internal rep, and invalidate
2911 * the string rep of the new object.
2912 */
2913
2914 if (srcStringPtr->hasUnicode == 0) {
2915 copyStringPtr = (String *) ckalloc(sizeof(String));
2916 copyStringPtr->uallocated = 0;
2917 } else {
2918 copyStringPtr = (String *) ckalloc(
2919 STRING_SIZE(srcStringPtr->uallocated));
2920 copyStringPtr->uallocated = srcStringPtr->uallocated;
2921
2922 memcpy(copyStringPtr->unicode, srcStringPtr->unicode,
2923 (size_t) srcStringPtr->numChars * sizeof(Tcl_UniChar));
2924 copyStringPtr->unicode[srcStringPtr->numChars] = 0;
2925 }
2926 copyStringPtr->numChars = srcStringPtr->numChars;
2927 copyStringPtr->hasUnicode = srcStringPtr->hasUnicode;
2928 copyStringPtr->allocated = srcStringPtr->allocated;
2929
2930 /*
2931 * Tricky point: the string value was copied by generic object management
2932 * code, so it doesn't contain any extra bytes that might exist in the
2933 * source object.
2934 */
2935
2936 copyStringPtr->allocated = copyPtr->length;
2937
2938 SET_STRING(copyPtr, copyStringPtr);
2939 copyPtr->typePtr = &tclStringType;
2940 }
2941
2942 /*
2943 *----------------------------------------------------------------------
2944 *
2945 * SetStringFromAny --
2946 *
2947 * Create an internal representation of type "String" for an object.
2948 *
2949 * Results:
2950 * This operation always succeeds and returns TCL_OK.
2951 *
2952 * Side effects:
2953 * Any old internal reputation for objPtr is freed and the internal
2954 * representation is set to "String".
2955 *
2956 *----------------------------------------------------------------------
2957 */
2958
2959 static int
SetStringFromAny(Tcl_Interp * interp,register Tcl_Obj * objPtr)2960 SetStringFromAny(
2961 Tcl_Interp *interp, /* Used for error reporting if not NULL. */
2962 register Tcl_Obj *objPtr) /* The object to convert. */
2963 {
2964 /*
2965 * The Unicode object is optimized for the case where each UTF char in a
2966 * string is only one byte. In this case, we store the value of numChars,
2967 * but we don't copy the bytes to the unicodeObj->unicode.
2968 */
2969
2970 if (objPtr->typePtr != &tclStringType) {
2971 String *stringPtr;
2972
2973 if (objPtr->typePtr != NULL) {
2974 if (objPtr->bytes == NULL) {
2975 objPtr->typePtr->updateStringProc(objPtr);
2976 }
2977 TclFreeIntRep(objPtr);
2978 }
2979 objPtr->typePtr = &tclStringType;
2980
2981 /*
2982 * Allocate enough space for the basic String structure.
2983 */
2984
2985 stringPtr = (String *) ckalloc(sizeof(String));
2986 stringPtr->numChars = -1;
2987 stringPtr->uallocated = 0;
2988 stringPtr->hasUnicode = 0;
2989
2990 if (objPtr->bytes != NULL) {
2991 stringPtr->allocated = objPtr->length;
2992 if (objPtr->bytes != tclEmptyStringRep) {
2993 objPtr->bytes[objPtr->length] = 0;
2994 }
2995 } else {
2996 objPtr->length = 0;
2997 }
2998 SET_STRING(objPtr, stringPtr);
2999 }
3000 return TCL_OK;
3001 }
3002
3003 /*
3004 *----------------------------------------------------------------------
3005 *
3006 * UpdateStringOfString --
3007 *
3008 * Update the string representation for an object whose internal
3009 * representation is "String".
3010 *
3011 * Results:
3012 * None.
3013 *
3014 * Side effects:
3015 * The object's string may be set by converting its Unicode represention
3016 * to UTF format.
3017 *
3018 *----------------------------------------------------------------------
3019 */
3020
3021 static void
UpdateStringOfString(Tcl_Obj * objPtr)3022 UpdateStringOfString(
3023 Tcl_Obj *objPtr) /* Object with string rep to update. */
3024 {
3025 int i, size;
3026 Tcl_UniChar *unicode;
3027 char dummy[TCL_UTF_MAX];
3028 char *dst;
3029 String *stringPtr;
3030
3031 stringPtr = GET_STRING(objPtr);
3032 if ((objPtr->bytes == NULL) || (stringPtr->allocated == 0)) {
3033 if (stringPtr->numChars <= 0) {
3034 /*
3035 * If there is no Unicode rep, or the string has 0 chars, then set
3036 * the string rep to an empty string.
3037 */
3038
3039 objPtr->bytes = tclEmptyStringRep;
3040 objPtr->length = 0;
3041 return;
3042 }
3043
3044 unicode = stringPtr->unicode;
3045
3046 /*
3047 * Translate the Unicode string to UTF. "size" will hold the amount of
3048 * space the UTF string needs.
3049 */
3050
3051 if (stringPtr->numChars <= INT_MAX/TCL_UTF_MAX
3052 && stringPtr->allocated >= stringPtr->numChars * (size_t)TCL_UTF_MAX) {
3053 goto copyBytes;
3054 }
3055
3056 size = 0;
3057 for (i = 0; i < stringPtr->numChars && size >= 0; i++) {
3058 size += Tcl_UniCharToUtf((int) unicode[i], dummy);
3059 }
3060 if (size < 0) {
3061 Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
3062 }
3063
3064 objPtr->bytes = (char *) ckalloc((unsigned) (size + 1));
3065 objPtr->length = size;
3066 stringPtr->allocated = size;
3067
3068 copyBytes:
3069 dst = objPtr->bytes;
3070 for (i = 0; i < stringPtr->numChars; i++) {
3071 dst += Tcl_UniCharToUtf(unicode[i], dst);
3072 }
3073 *dst = '\0';
3074 }
3075 return;
3076 }
3077
3078 /*
3079 *----------------------------------------------------------------------
3080 *
3081 * FreeStringInternalRep --
3082 *
3083 * Deallocate the storage associated with a String data object's internal
3084 * representation.
3085 *
3086 * Results:
3087 * None.
3088 *
3089 * Side effects:
3090 * Frees memory.
3091 *
3092 *----------------------------------------------------------------------
3093 */
3094
3095 static void
FreeStringInternalRep(Tcl_Obj * objPtr)3096 FreeStringInternalRep(
3097 Tcl_Obj *objPtr) /* Object with internal rep to free. */
3098 {
3099 ckfree((char *) GET_STRING(objPtr));
3100 objPtr->typePtr = NULL;
3101 }
3102
3103 /*
3104 * Local Variables:
3105 * mode: c
3106 * c-basic-offset: 4
3107 * fill-column: 78
3108 * End:
3109 */
3110