1 /*
2 * tclBinary.c --
3 *
4 * This file contains the implementation of the "binary" Tcl built-in
5 * command and the Tcl binary data object.
6 *
7 * Copyright (c) 1997 by Sun Microsystems, Inc.
8 * Copyright (c) 1998-1999 by Scriptics Corporation.
9 *
10 * See the file "license.terms" for information on usage and redistribution of
11 * this file, and for a DISCLAIMER OF ALL WARRANTIES.
12 */
13
14 #include "tclInt.h"
15 #include "tommath.h"
16
17 #include <math.h>
18
19 /*
20 * The following constants are used by GetFormatSpec to indicate various
21 * special conditions in the parsing of a format specifier.
22 */
23
24 #define BINARY_ALL -1 /* Use all elements in the argument. */
25 #define BINARY_NOCOUNT -2 /* No count was specified in format. */
26
27 /*
28 * The following flags may be ORed together and returned by GetFormatSpec
29 */
30
31 #define BINARY_SIGNED 0 /* Field to be read as signed data */
32 #define BINARY_UNSIGNED 1 /* Field to be read as unsigned data */
33
34 /*
35 * The following defines the maximum number of different (integer) numbers
36 * placed in the object cache by 'binary scan' before it bails out and
37 * switches back to Plan A (creating a new object for each value.)
38 * Theoretically, it would be possible to keep the cache about for the values
39 * that are already in it, but that makes the code slower in practise when
40 * overflow happens, and makes little odds the rest of the time (as measured
41 * on my machine.) It is also slower (on the sample I tried at least) to grow
42 * the cache to hold all items we might want to put in it; presumably the
43 * extra cost of managing the memory for the enlarged table outweighs the
44 * benefit from allocating fewer objects. This is probably because as the
45 * number of objects increases, the likelihood of reuse of any particular one
46 * drops, and there is very little gain from larger maximum cache sizes (the
47 * value below is chosen to allow caching to work in full with conversion of
48 * bytes.) - DKF
49 */
50
51 #define BINARY_SCAN_MAX_CACHE 260
52
53 /*
54 * Prototypes for local procedures defined in this file:
55 */
56
57 static void DupByteArrayInternalRep(Tcl_Obj *srcPtr,
58 Tcl_Obj *copyPtr);
59 static int FormatNumber(Tcl_Interp *interp, int type,
60 Tcl_Obj *src, unsigned char **cursorPtr);
61 static void FreeByteArrayInternalRep(Tcl_Obj *objPtr);
62 static int GetFormatSpec(const char **formatPtr, char *cmdPtr,
63 int *countPtr, int *flagsPtr);
64 static Tcl_Obj * ScanNumber(unsigned char *buffer, int type,
65 int flags, Tcl_HashTable **numberCachePtr);
66 static int SetByteArrayFromAny(Tcl_Interp *interp,
67 Tcl_Obj *objPtr);
68 static void UpdateStringOfByteArray(Tcl_Obj *listPtr);
69 static void DeleteScanNumberCache(Tcl_HashTable *numberCachePtr);
70 static int NeedReversing(int format);
71 static void CopyNumber(const void *from, void *to,
72 unsigned length, int type);
73 /* Binary ensemble commands */
74 static int BinaryFormatCmd(ClientData clientData,
75 Tcl_Interp *interp,
76 int objc, Tcl_Obj *const objv[]);
77 static int BinaryScanCmd(ClientData clientData,
78 Tcl_Interp *interp,
79 int objc, Tcl_Obj *const objv[]);
80 /* Binary encoding sub-ensemble commands */
81 static int BinaryEncodeHex(ClientData clientData,
82 Tcl_Interp *interp,
83 int objc, Tcl_Obj *const objv[]);
84 static int BinaryDecodeHex(ClientData clientData,
85 Tcl_Interp *interp,
86 int objc, Tcl_Obj *const objv[]);
87 static int BinaryEncode64(ClientData clientData,
88 Tcl_Interp *interp,
89 int objc, Tcl_Obj *const objv[]);
90 static int BinaryDecode64(ClientData clientData,
91 Tcl_Interp *interp,
92 int objc, Tcl_Obj *const objv[]);
93 static int BinaryEncodeUu(ClientData clientData,
94 Tcl_Interp *interp, int objc,
95 Tcl_Obj *const objv[]);
96 static int BinaryDecodeUu(ClientData clientData,
97 Tcl_Interp *interp,
98 int objc, Tcl_Obj *const objv[]);
99
100 /*
101 * The following tables are used by the binary encoders
102 */
103
104 static const char HexDigits[16] = {
105 '0', '1', '2', '3', '4', '5', '6', '7',
106 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
107 };
108
109 static const char UueDigits[65] = {
110 '`', '!', '"', '#', '$', '%', '&', '\'',
111 '(', ')', '*', '+', ',', '-', '.', '/',
112 '0', '1', '2', '3', '4', '5', '6', '7',
113 '8', '9', ':', ';', '<', '=', '>', '?',
114 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
115 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
116 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
117 'X', 'Y', 'Z', '[', '\\',']', '^', '_',
118 '`'
119 };
120
121 static const char B64Digits[65] = {
122 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
123 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
124 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
125 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
126 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
127 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
128 'w', 'x', 'y', 'z', '0', '1', '2', '3',
129 '4', '5', '6', '7', '8', '9', '+', '/',
130 '='
131 };
132
133 /*
134 * How to construct the ensembles.
135 */
136
137 static const EnsembleImplMap binaryMap[] = {
138 { "format", BinaryFormatCmd, TclCompileBasicMin1ArgCmd, NULL, NULL, 0 },
139 { "scan", BinaryScanCmd, TclCompileBasicMin2ArgCmd, NULL, NULL, 0 },
140 { "encode", NULL, NULL, NULL, NULL, 0 },
141 { "decode", NULL, NULL, NULL, NULL, 0 },
142 { NULL, NULL, NULL, NULL, NULL, 0 }
143 };
144 static const EnsembleImplMap encodeMap[] = {
145 { "hex", BinaryEncodeHex, TclCompileBasic1ArgCmd, NULL, NULL, 0 },
146 { "uuencode", BinaryEncodeUu, NULL, NULL, NULL, 0 },
147 { "base64", BinaryEncode64, NULL, NULL, NULL, 0 },
148 { NULL, NULL, NULL, NULL, NULL, 0 }
149 };
150 static const EnsembleImplMap decodeMap[] = {
151 { "hex", BinaryDecodeHex, TclCompileBasic1Or2ArgCmd, NULL, NULL, 0 },
152 { "uuencode", BinaryDecodeUu, TclCompileBasic1Or2ArgCmd, NULL, NULL, 0 },
153 { "base64", BinaryDecode64, TclCompileBasic1Or2ArgCmd, NULL, NULL, 0 },
154 { NULL, NULL, NULL, NULL, NULL, 0 }
155 };
156
157 /*
158 * The following object type represents an array of bytes. An array of bytes
159 * is not equivalent to an internationalized string. Conceptually, a string is
160 * an array of 16-bit quantities organized as a sequence of properly formed
161 * UTF-8 characters, while a ByteArray is an array of 8-bit quantities.
162 * Accessor functions are provided to convert a ByteArray to a String or a
163 * String to a ByteArray. Two or more consecutive bytes in an array of bytes
164 * may look like a single UTF-8 character if the array is casually treated as
165 * a string. But obtaining the String from a ByteArray is guaranteed to
166 * produced properly formed UTF-8 sequences so that there is a one-to-one map
167 * between bytes and characters.
168 *
169 * Converting a ByteArray to a String proceeds by casting each byte in the
170 * array to a 16-bit quantity, treating that number as a Unicode character,
171 * and storing the UTF-8 version of that Unicode character in the String. For
172 * ByteArrays consisting entirely of values 1..127, the corresponding String
173 * representation is the same as the ByteArray representation.
174 *
175 * Converting a String to a ByteArray proceeds by getting the Unicode
176 * representation of each character in the String, casting it to a byte by
177 * truncating the upper 8 bits, and then storing the byte in the ByteArray.
178 * Converting from ByteArray to String and back to ByteArray is not lossy, but
179 * converting an arbitrary String to a ByteArray may be.
180 */
181
182 const Tcl_ObjType tclByteArrayType = {
183 "bytearray",
184 FreeByteArrayInternalRep,
185 DupByteArrayInternalRep,
186 UpdateStringOfByteArray,
187 SetByteArrayFromAny
188 };
189
190 /*
191 * The following structure is the internal rep for a ByteArray object. Keeps
192 * track of how much memory has been used and how much has been allocated for
193 * the byte array to enable growing and shrinking of the ByteArray object with
194 * fewer mallocs.
195 */
196
197 typedef struct ByteArray {
198 int used; /* The number of bytes used in the byte
199 * array. */
200 int allocated; /* The amount of space actually allocated
201 * minus 1 byte. */
202 unsigned char bytes[TCLFLEXARRAY]; /* The array of bytes. The actual size of this
203 * field depends on the 'allocated' field
204 * above. */
205 } ByteArray;
206
207 #define BYTEARRAY_SIZE(len) \
208 ((unsigned) (TclOffset(ByteArray, bytes) + (len)))
209 #define GET_BYTEARRAY(objPtr) \
210 ((ByteArray *) (objPtr)->internalRep.twoPtrValue.ptr1)
211 #define SET_BYTEARRAY(objPtr, baPtr) \
212 (objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (baPtr)
213
214
215 /*
216 *----------------------------------------------------------------------
217 *
218 * Tcl_NewByteArrayObj --
219 *
220 * This procedure is creates a new ByteArray object and initializes it
221 * from the given array of bytes.
222 *
223 * Results:
224 * The newly create object is returned. This object will have no initial
225 * string representation. The returned object has a ref count of 0.
226 *
227 * Side effects:
228 * Memory allocated for new object and copy of byte array argument.
229 *
230 *----------------------------------------------------------------------
231 */
232
233 #undef Tcl_NewByteArrayObj
234
235 Tcl_Obj *
Tcl_NewByteArrayObj(const unsigned char * bytes,int length)236 Tcl_NewByteArrayObj(
237 const unsigned char *bytes, /* The array of bytes used to initialize the
238 * new object. */
239 int length) /* Length of the array of bytes, which must be
240 * >= 0. */
241 {
242 #ifdef TCL_MEM_DEBUG
243 return Tcl_DbNewByteArrayObj(bytes, length, "unknown", 0);
244 #else /* if not TCL_MEM_DEBUG */
245 Tcl_Obj *objPtr;
246
247 TclNewObj(objPtr);
248 Tcl_SetByteArrayObj(objPtr, bytes, length);
249 return objPtr;
250 #endif /* TCL_MEM_DEBUG */
251 }
252
253 /*
254 *----------------------------------------------------------------------
255 *
256 * Tcl_DbNewByteArrayObj --
257 *
258 * This procedure is normally called when debugging: i.e., when
259 * TCL_MEM_DEBUG is defined. It is the same as the Tcl_NewByteArrayObj
260 * above except that it calls Tcl_DbCkalloc directly with the file name
261 * and line number from its caller. This simplifies debugging since then
262 * the [memory active] command will report the correct file name and line
263 * number when reporting objects that haven't been freed.
264 *
265 * When TCL_MEM_DEBUG is not defined, this procedure just returns the
266 * result of calling Tcl_NewByteArrayObj.
267 *
268 * Results:
269 * The newly create object is returned. This object will have no initial
270 * string representation. The returned object has a ref count of 0.
271 *
272 * Side effects:
273 * Memory allocated for new object and copy of byte array argument.
274 *
275 *----------------------------------------------------------------------
276 */
277
278 Tcl_Obj *
Tcl_DbNewByteArrayObj(const unsigned char * bytes,int length,const char * file,int line)279 Tcl_DbNewByteArrayObj(
280 const unsigned char *bytes, /* The array of bytes used to initialize the
281 * new object. */
282 int length, /* Length of the array of bytes, which must be
283 * >= 0. */
284 const char *file, /* The name of the source file calling this
285 * procedure; used for debugging. */
286 int line) /* Line number in the source file; used for
287 * debugging. */
288 {
289 #ifdef TCL_MEM_DEBUG
290 Tcl_Obj *objPtr;
291
292 TclDbNewObj(objPtr, file, line);
293 Tcl_SetByteArrayObj(objPtr, bytes, length);
294 return objPtr;
295 #else /* if not TCL_MEM_DEBUG */
296 return Tcl_NewByteArrayObj(bytes, length);
297 #endif /* TCL_MEM_DEBUG */
298 }
299
300 /*
301 *---------------------------------------------------------------------------
302 *
303 * Tcl_SetByteArrayObj --
304 *
305 * Modify an object to be a ByteArray object and to have the specified
306 * array of bytes as its value.
307 *
308 * Results:
309 * None.
310 *
311 * Side effects:
312 * The object's old string rep and internal rep is freed. Memory
313 * allocated for copy of byte array argument.
314 *
315 *----------------------------------------------------------------------
316 */
317
318 void
Tcl_SetByteArrayObj(Tcl_Obj * objPtr,const unsigned char * bytes,int length)319 Tcl_SetByteArrayObj(
320 Tcl_Obj *objPtr, /* Object to initialize as a ByteArray. */
321 const unsigned char *bytes, /* The array of bytes to use as the new value.
322 * May be NULL even if length > 0. */
323 int length) /* Length of the array of bytes, which must
324 * be >= 0. */
325 {
326 ByteArray *byteArrayPtr;
327
328 if (Tcl_IsShared(objPtr)) {
329 Tcl_Panic("%s called with shared object", "Tcl_SetByteArrayObj");
330 }
331 TclFreeIntRep(objPtr);
332 TclInvalidateStringRep(objPtr);
333
334 if (length < 0) {
335 length = 0;
336 }
337 byteArrayPtr = (ByteArray *)ckalloc(BYTEARRAY_SIZE(length));
338 byteArrayPtr->used = length;
339 byteArrayPtr->allocated = length;
340
341 if ((bytes != NULL) && (length > 0)) {
342 memcpy(byteArrayPtr->bytes, bytes, length);
343 }
344 objPtr->typePtr = &tclByteArrayType;
345 SET_BYTEARRAY(objPtr, byteArrayPtr);
346 }
347
348 /*
349 *----------------------------------------------------------------------
350 *
351 * Tcl_GetByteArrayFromObj --
352 *
353 * Attempt to get the array of bytes from the Tcl object. If the object
354 * is not already a ByteArray object, an attempt will be made to convert
355 * it to one.
356 *
357 * Results:
358 * Pointer to array of bytes representing the ByteArray object.
359 *
360 * Side effects:
361 * Frees old internal rep. Allocates memory for new internal rep.
362 *
363 *----------------------------------------------------------------------
364 */
365
366 unsigned char *
Tcl_GetByteArrayFromObj(Tcl_Obj * objPtr,int * lengthPtr)367 Tcl_GetByteArrayFromObj(
368 Tcl_Obj *objPtr, /* The ByteArray object. */
369 int *lengthPtr) /* If non-NULL, filled with length of the
370 * array of bytes in the ByteArray object. */
371 {
372 ByteArray *baPtr;
373
374 if (objPtr->typePtr != &tclByteArrayType) {
375 SetByteArrayFromAny(NULL, objPtr);
376 }
377 baPtr = GET_BYTEARRAY(objPtr);
378
379 if (lengthPtr != NULL) {
380 *lengthPtr = baPtr->used;
381 }
382 return (unsigned char *) baPtr->bytes;
383 }
384
385 /*
386 *----------------------------------------------------------------------
387 *
388 * Tcl_SetByteArrayLength --
389 *
390 * This procedure changes the length of the byte array for this object.
391 * Once the caller has set the length of the array, it is acceptable to
392 * directly modify the bytes in the array up until Tcl_GetStringFromObj()
393 * has been called on this object.
394 *
395 * Results:
396 * The new byte array of the specified length.
397 *
398 * Side effects:
399 * Allocates enough memory for an array of bytes of the requested size.
400 * When growing the array, the old array is copied to the new array; new
401 * bytes are undefined. When shrinking, the old array is truncated to the
402 * specified length.
403 *
404 *----------------------------------------------------------------------
405 */
406
407 unsigned char *
Tcl_SetByteArrayLength(Tcl_Obj * objPtr,int length)408 Tcl_SetByteArrayLength(
409 Tcl_Obj *objPtr, /* The ByteArray object. */
410 int length) /* New length for internal byte array. */
411 {
412 ByteArray *byteArrayPtr;
413
414 if (Tcl_IsShared(objPtr)) {
415 Tcl_Panic("%s called with shared object", "Tcl_SetByteArrayLength");
416 }
417 if (objPtr->typePtr != &tclByteArrayType) {
418 SetByteArrayFromAny(NULL, objPtr);
419 }
420
421 byteArrayPtr = GET_BYTEARRAY(objPtr);
422 if (length > byteArrayPtr->allocated) {
423 byteArrayPtr = (ByteArray *)ckrealloc(byteArrayPtr, BYTEARRAY_SIZE(length));
424 byteArrayPtr->allocated = length;
425 SET_BYTEARRAY(objPtr, byteArrayPtr);
426 }
427 TclInvalidateStringRep(objPtr);
428 byteArrayPtr->used = length;
429 return byteArrayPtr->bytes;
430 }
431
432 /*
433 *----------------------------------------------------------------------
434 *
435 * SetByteArrayFromAny --
436 *
437 * Generate the ByteArray internal rep from the string rep.
438 *
439 * Results:
440 * The return value is always TCL_OK.
441 *
442 * Side effects:
443 * A ByteArray object is stored as the internal rep of objPtr.
444 *
445 *----------------------------------------------------------------------
446 */
447
448 static int
SetByteArrayFromAny(Tcl_Interp * interp,Tcl_Obj * objPtr)449 SetByteArrayFromAny(
450 Tcl_Interp *interp, /* Not used. */
451 Tcl_Obj *objPtr) /* The object to convert to type ByteArray. */
452 {
453 int length;
454 const char *src, *srcEnd;
455 unsigned char *dst;
456 ByteArray *byteArrayPtr;
457 Tcl_UniChar ch = 0;
458
459 if (objPtr->typePtr != &tclByteArrayType) {
460 src = TclGetStringFromObj(objPtr, &length);
461 srcEnd = src + length;
462
463 byteArrayPtr = (ByteArray *)ckalloc(BYTEARRAY_SIZE(length));
464 for (dst = byteArrayPtr->bytes; src < srcEnd; ) {
465 src += TclUtfToUniChar(src, &ch);
466 *dst++ = UCHAR(ch);
467 }
468
469 byteArrayPtr->used = dst - byteArrayPtr->bytes;
470 byteArrayPtr->allocated = length;
471
472 TclFreeIntRep(objPtr);
473 objPtr->typePtr = &tclByteArrayType;
474 SET_BYTEARRAY(objPtr, byteArrayPtr);
475 }
476 return TCL_OK;
477 }
478
479 /*
480 *----------------------------------------------------------------------
481 *
482 * FreeByteArrayInternalRep --
483 *
484 * Deallocate the storage associated with a ByteArray data object's
485 * internal representation.
486 *
487 * Results:
488 * None.
489 *
490 * Side effects:
491 * Frees memory.
492 *
493 *----------------------------------------------------------------------
494 */
495
496 static void
FreeByteArrayInternalRep(Tcl_Obj * objPtr)497 FreeByteArrayInternalRep(
498 Tcl_Obj *objPtr) /* Object with internal rep to free. */
499 {
500 ckfree(GET_BYTEARRAY(objPtr));
501 objPtr->typePtr = NULL;
502 }
503
504 /*
505 *----------------------------------------------------------------------
506 *
507 * DupByteArrayInternalRep --
508 *
509 * Initialize the internal representation of a ByteArray Tcl_Obj to a
510 * copy of the internal representation of an existing ByteArray object.
511 *
512 * Results:
513 * None.
514 *
515 * Side effects:
516 * Allocates memory.
517 *
518 *----------------------------------------------------------------------
519 */
520
521 static void
DupByteArrayInternalRep(Tcl_Obj * srcPtr,Tcl_Obj * copyPtr)522 DupByteArrayInternalRep(
523 Tcl_Obj *srcPtr, /* Object with internal rep to copy. */
524 Tcl_Obj *copyPtr) /* Object with internal rep to set. */
525 {
526 int length;
527 ByteArray *srcArrayPtr, *copyArrayPtr;
528
529 srcArrayPtr = GET_BYTEARRAY(srcPtr);
530 length = srcArrayPtr->used;
531
532 copyArrayPtr = (ByteArray *)ckalloc(BYTEARRAY_SIZE(length));
533 copyArrayPtr->used = length;
534 copyArrayPtr->allocated = length;
535 memcpy(copyArrayPtr->bytes, srcArrayPtr->bytes, length);
536 SET_BYTEARRAY(copyPtr, copyArrayPtr);
537
538 copyPtr->typePtr = &tclByteArrayType;
539 }
540
541 /*
542 *----------------------------------------------------------------------
543 *
544 * UpdateStringOfByteArray --
545 *
546 * Update the string representation for a ByteArray data object. Note:
547 * This procedure does not invalidate an existing old string rep so
548 * storage will be lost if this has not already been done.
549 *
550 * Results:
551 * None.
552 *
553 * Side effects:
554 * The object's string is set to a valid string that results from the
555 * ByteArray-to-string conversion.
556 *
557 * The object becomes a string object -- the internal rep is discarded
558 * and the typePtr becomes NULL.
559 *
560 *----------------------------------------------------------------------
561 */
562
563 static void
UpdateStringOfByteArray(Tcl_Obj * objPtr)564 UpdateStringOfByteArray(
565 Tcl_Obj *objPtr) /* ByteArray object whose string rep to
566 * update. */
567 {
568 int i, length, size;
569 unsigned char *src;
570 char *dst;
571 ByteArray *byteArrayPtr;
572
573 byteArrayPtr = GET_BYTEARRAY(objPtr);
574 src = byteArrayPtr->bytes;
575 length = byteArrayPtr->used;
576
577 /*
578 * How much space will string rep need?
579 */
580
581 size = length;
582 for (i = 0; i < length && size >= 0; i++) {
583 if ((src[i] == 0) || (src[i] > 127)) {
584 size++;
585 }
586 }
587 if (size < 0) {
588 Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
589 }
590
591 dst = (char *)ckalloc(size + 1);
592 objPtr->bytes = dst;
593 objPtr->length = size;
594
595 if (size == length) {
596 memcpy(dst, src, size);
597 dst[size] = '\0';
598 } else {
599 for (i = 0; i < length; i++) {
600 dst += Tcl_UniCharToUtf(src[i], dst);
601 }
602 *dst = '\0';
603 }
604 }
605
606 /*
607 *----------------------------------------------------------------------
608 *
609 * TclAppendBytesToByteArray --
610 *
611 * This function appends an array of bytes to a byte array object. Note
612 * that the object *must* be unshared, and the array of bytes *must not*
613 * refer to the object being appended to.
614 *
615 * Results:
616 * None.
617 *
618 * Side effects:
619 * Allocates enough memory for an array of bytes of the requested total
620 * size, or possibly larger. [Bug 2992970]
621 *
622 *----------------------------------------------------------------------
623 */
624
625 void
TclAppendBytesToByteArray(Tcl_Obj * objPtr,const unsigned char * bytes,int len)626 TclAppendBytesToByteArray(
627 Tcl_Obj *objPtr,
628 const unsigned char *bytes,
629 int len)
630 {
631 ByteArray *byteArrayPtr;
632 int needed;
633
634 if (Tcl_IsShared(objPtr)) {
635 Tcl_Panic("%s called with shared object","TclAppendBytesToByteArray");
636 }
637 if (len < 0) {
638 Tcl_Panic("%s must be called with definite number of bytes to append",
639 "TclAppendBytesToByteArray");
640 }
641 if (len == 0) {
642 /*
643 * Append zero bytes is a no-op.
644 */
645
646 return;
647 }
648 if (objPtr->typePtr != &tclByteArrayType) {
649 SetByteArrayFromAny(NULL, objPtr);
650 }
651 byteArrayPtr = GET_BYTEARRAY(objPtr);
652
653 if (len > INT_MAX - byteArrayPtr->used) {
654 Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
655 }
656
657 needed = byteArrayPtr->used + len;
658 /*
659 * If we need to, resize the allocated space in the byte array.
660 */
661
662 if (needed > byteArrayPtr->allocated) {
663 ByteArray *ptr = NULL;
664 int attempt;
665
666 if (needed <= INT_MAX/2) {
667 /*
668 * Try to allocate double the total space that is needed.
669 */
670
671 attempt = 2 * needed;
672 ptr = (ByteArray *)attemptckrealloc(byteArrayPtr, BYTEARRAY_SIZE(attempt));
673 }
674 if (ptr == NULL) {
675 /*
676 * Try to allocate double the increment that is needed (plus).
677 */
678
679 unsigned int limit = INT_MAX - needed;
680 unsigned int extra = len + TCL_MIN_GROWTH;
681 int growth = (int) ((extra > limit) ? limit : extra);
682
683 attempt = needed + growth;
684 ptr = (ByteArray *)attemptckrealloc(byteArrayPtr, BYTEARRAY_SIZE(attempt));
685 }
686 if (ptr == NULL) {
687 /*
688 * Last chance: Try to allocate exactly what is needed.
689 */
690
691 attempt = needed;
692 ptr = (ByteArray *)ckrealloc(byteArrayPtr, BYTEARRAY_SIZE(attempt));
693 }
694 byteArrayPtr = ptr;
695 byteArrayPtr->allocated = attempt;
696 SET_BYTEARRAY(objPtr, byteArrayPtr);
697 }
698
699 if (bytes) {
700 memcpy(byteArrayPtr->bytes + byteArrayPtr->used, bytes, len);
701 }
702 byteArrayPtr->used += len;
703 TclInvalidateStringRep(objPtr);
704 }
705
706 /*
707 *----------------------------------------------------------------------
708 *
709 * TclInitBinaryCmd --
710 *
711 * This function is called to create the "binary" Tcl command. See the
712 * user documentation for details on what it does.
713 *
714 * Results:
715 * A command token for the new command.
716 *
717 * Side effects:
718 * Creates a new binary command as a mapped ensemble.
719 *
720 *----------------------------------------------------------------------
721 */
722
723 Tcl_Command
TclInitBinaryCmd(Tcl_Interp * interp)724 TclInitBinaryCmd(
725 Tcl_Interp *interp)
726 {
727 Tcl_Command binaryEnsemble;
728
729 binaryEnsemble = TclMakeEnsemble(interp, "binary", binaryMap);
730 TclMakeEnsemble(interp, "binary encode", encodeMap);
731 TclMakeEnsemble(interp, "binary decode", decodeMap);
732 return binaryEnsemble;
733 }
734
735 /*
736 *----------------------------------------------------------------------
737 *
738 * BinaryFormatCmd --
739 *
740 * This procedure implements the "binary format" Tcl command.
741 *
742 * Results:
743 * A standard Tcl result.
744 *
745 * Side effects:
746 * See the user documentation.
747 *
748 *----------------------------------------------------------------------
749 */
750
751 static int
BinaryFormatCmd(ClientData dummy,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])752 BinaryFormatCmd(
753 ClientData dummy, /* Not used. */
754 Tcl_Interp *interp, /* Current interpreter. */
755 int objc, /* Number of arguments. */
756 Tcl_Obj *const objv[]) /* Argument objects. */
757 {
758 int arg; /* Index of next argument to consume. */
759 int value = 0; /* Current integer value to be packed.
760 * Initialized to avoid compiler warning. */
761 char cmd; /* Current format character. */
762 int count; /* Count associated with current format
763 * character. */
764 int flags; /* Format field flags */
765 const char *format; /* Pointer to current position in format
766 * string. */
767 Tcl_Obj *resultPtr = NULL; /* Object holding result buffer. */
768 unsigned char *buffer; /* Start of result buffer. */
769 unsigned char *cursor; /* Current position within result buffer. */
770 unsigned char *maxPos; /* Greatest position within result buffer that
771 * cursor has visited.*/
772 const char *errorString;
773 const char *errorValue, *str;
774 int offset, size, length;
775
776 if (objc < 2) {
777 Tcl_WrongNumArgs(interp, 1, objv, "formatString ?arg ...?");
778 return TCL_ERROR;
779 }
780
781 /*
782 * To avoid copying the data, we format the string in two passes. The
783 * first pass computes the size of the output buffer. The second pass
784 * places the formatted data into the buffer.
785 */
786
787 format = TclGetString(objv[1]);
788 arg = 2;
789 offset = 0;
790 length = 0;
791 while (*format != '\0') {
792 str = format;
793 flags = 0;
794 if (!GetFormatSpec(&format, &cmd, &count, &flags)) {
795 break;
796 }
797 switch (cmd) {
798 case 'a':
799 case 'A':
800 case 'b':
801 case 'B':
802 case 'h':
803 case 'H':
804 /*
805 * For string-type specifiers, the count corresponds to the number
806 * of bytes in a single argument.
807 */
808
809 if (arg >= objc) {
810 goto badIndex;
811 }
812 if (count == BINARY_ALL) {
813 Tcl_GetByteArrayFromObj(objv[arg], &count);
814 } else if (count == BINARY_NOCOUNT) {
815 count = 1;
816 }
817 arg++;
818 if (cmd == 'a' || cmd == 'A') {
819 offset += count;
820 } else if (cmd == 'b' || cmd == 'B') {
821 offset += (count + 7) / 8;
822 } else {
823 offset += (count + 1) / 2;
824 }
825 break;
826 case 'c':
827 size = 1;
828 goto doNumbers;
829 case 't':
830 case 's':
831 case 'S':
832 size = 2;
833 goto doNumbers;
834 case 'n':
835 case 'i':
836 case 'I':
837 size = 4;
838 goto doNumbers;
839 case 'm':
840 case 'w':
841 case 'W':
842 size = 8;
843 goto doNumbers;
844 case 'r':
845 case 'R':
846 case 'f':
847 size = sizeof(float);
848 goto doNumbers;
849 case 'q':
850 case 'Q':
851 case 'd':
852 size = sizeof(double);
853
854 doNumbers:
855 if (arg >= objc) {
856 goto badIndex;
857 }
858
859 /*
860 * For number-type specifiers, the count corresponds to the number
861 * of elements in the list stored in a single argument. If no
862 * count is specified, then the argument is taken as a single
863 * non-list value.
864 */
865
866 if (count == BINARY_NOCOUNT) {
867 arg++;
868 count = 1;
869 } else {
870 int listc;
871 Tcl_Obj **listv;
872
873 /*
874 * The macro evals its args more than once: avoid arg++
875 */
876
877 if (TclListObjGetElements(interp, objv[arg], &listc,
878 &listv) != TCL_OK) {
879 return TCL_ERROR;
880 }
881 arg++;
882
883 if (count == BINARY_ALL) {
884 count = listc;
885 } else if (count > listc) {
886 Tcl_SetObjResult(interp, Tcl_NewStringObj(
887 "number of elements in list does not match count",
888 -1));
889 return TCL_ERROR;
890 }
891 }
892 offset += count*size;
893 break;
894
895 case 'x':
896 if (count == BINARY_ALL) {
897 Tcl_SetObjResult(interp, Tcl_NewStringObj(
898 "cannot use \"*\" in format string with \"x\"", -1));
899 return TCL_ERROR;
900 } else if (count == BINARY_NOCOUNT) {
901 count = 1;
902 }
903 offset += count;
904 break;
905 case 'X':
906 if (count == BINARY_NOCOUNT) {
907 count = 1;
908 }
909 if ((count > offset) || (count == BINARY_ALL)) {
910 count = offset;
911 }
912 if (offset > length) {
913 length = offset;
914 }
915 offset -= count;
916 break;
917 case '@':
918 if (offset > length) {
919 length = offset;
920 }
921 if (count == BINARY_ALL) {
922 offset = length;
923 } else if (count == BINARY_NOCOUNT) {
924 goto badCount;
925 } else {
926 offset = count;
927 }
928 break;
929 default:
930 errorString = str;
931 goto badField;
932 }
933 }
934 if (offset > length) {
935 length = offset;
936 }
937 if (length == 0) {
938 return TCL_OK;
939 }
940
941 /*
942 * Prepare the result object by preallocating the caclulated number of
943 * bytes and filling with nulls.
944 */
945
946 resultPtr = Tcl_NewObj();
947 buffer = Tcl_SetByteArrayLength(resultPtr, length);
948 memset(buffer, 0, length);
949
950 /*
951 * Pack the data into the result object. Note that we can skip the error
952 * checking during this pass, since we have already parsed the string
953 * once.
954 */
955
956 arg = 2;
957 format = TclGetString(objv[1]);
958 cursor = buffer;
959 maxPos = cursor;
960 while (*format != 0) {
961 flags = 0;
962 if (!GetFormatSpec(&format, &cmd, &count, &flags)) {
963 break;
964 }
965 if ((count == 0) && (cmd != '@')) {
966 if (cmd != 'x') {
967 arg++;
968 }
969 continue;
970 }
971 switch (cmd) {
972 case 'a':
973 case 'A': {
974 char pad = (char) (cmd == 'a' ? '\0' : ' ');
975 unsigned char *bytes;
976
977 bytes = Tcl_GetByteArrayFromObj(objv[arg++], &length);
978
979 if (count == BINARY_ALL) {
980 count = length;
981 } else if (count == BINARY_NOCOUNT) {
982 count = 1;
983 }
984 if (length >= count) {
985 memcpy(cursor, bytes, count);
986 } else {
987 memcpy(cursor, bytes, length);
988 memset(cursor + length, pad, count - length);
989 }
990 cursor += count;
991 break;
992 }
993 case 'b':
994 case 'B': {
995 unsigned char *last;
996
997 str = TclGetStringFromObj(objv[arg], &length);
998 arg++;
999 if (count == BINARY_ALL) {
1000 count = length;
1001 } else if (count == BINARY_NOCOUNT) {
1002 count = 1;
1003 }
1004 last = cursor + ((count + 7) / 8);
1005 if (count > length) {
1006 count = length;
1007 }
1008 value = 0;
1009 errorString = "binary";
1010 if (cmd == 'B') {
1011 for (offset = 0; offset < count; offset++) {
1012 value <<= 1;
1013 if (str[offset] == '1') {
1014 value |= 1;
1015 } else if (str[offset] != '0') {
1016 errorValue = str;
1017 Tcl_DecrRefCount(resultPtr);
1018 goto badValue;
1019 }
1020 if (((offset + 1) % 8) == 0) {
1021 *cursor++ = UCHAR(value);
1022 value = 0;
1023 }
1024 }
1025 } else {
1026 for (offset = 0; offset < count; offset++) {
1027 value >>= 1;
1028 if (str[offset] == '1') {
1029 value |= 128;
1030 } else if (str[offset] != '0') {
1031 errorValue = str;
1032 Tcl_DecrRefCount(resultPtr);
1033 goto badValue;
1034 }
1035 if (!((offset + 1) % 8)) {
1036 *cursor++ = UCHAR(value);
1037 value = 0;
1038 }
1039 }
1040 }
1041 if ((offset % 8) != 0) {
1042 if (cmd == 'B') {
1043 value <<= 8 - (offset % 8);
1044 } else {
1045 value >>= 8 - (offset % 8);
1046 }
1047 *cursor++ = UCHAR(value);
1048 }
1049 while (cursor < last) {
1050 *cursor++ = '\0';
1051 }
1052 break;
1053 }
1054 case 'h':
1055 case 'H': {
1056 unsigned char *last;
1057 int c;
1058
1059 str = TclGetStringFromObj(objv[arg], &length);
1060 arg++;
1061 if (count == BINARY_ALL) {
1062 count = length;
1063 } else if (count == BINARY_NOCOUNT) {
1064 count = 1;
1065 }
1066 last = cursor + ((count + 1) / 2);
1067 if (count > length) {
1068 count = length;
1069 }
1070 value = 0;
1071 errorString = "hexadecimal";
1072 if (cmd == 'H') {
1073 for (offset = 0; offset < count; offset++) {
1074 value <<= 4;
1075 if (!isxdigit(UCHAR(str[offset]))) { /* INTL: digit */
1076 errorValue = str;
1077 Tcl_DecrRefCount(resultPtr);
1078 goto badValue;
1079 }
1080 c = str[offset] - '0';
1081 if (c > 9) {
1082 c += ('0' - 'A') + 10;
1083 }
1084 if (c > 16) {
1085 c += ('A' - 'a');
1086 }
1087 value |= (c & 0xF);
1088 if (offset % 2) {
1089 *cursor++ = (char) value;
1090 value = 0;
1091 }
1092 }
1093 } else {
1094 for (offset = 0; offset < count; offset++) {
1095 value >>= 4;
1096
1097 if (!isxdigit(UCHAR(str[offset]))) { /* INTL: digit */
1098 errorValue = str;
1099 Tcl_DecrRefCount(resultPtr);
1100 goto badValue;
1101 }
1102 c = str[offset] - '0';
1103 if (c > 9) {
1104 c += ('0' - 'A') + 10;
1105 }
1106 if (c > 16) {
1107 c += ('A' - 'a');
1108 }
1109 value |= ((c << 4) & 0xF0);
1110 if (offset % 2) {
1111 *cursor++ = UCHAR(value & 0xFF);
1112 value = 0;
1113 }
1114 }
1115 }
1116 if (offset % 2) {
1117 if (cmd == 'H') {
1118 value <<= 4;
1119 } else {
1120 value >>= 4;
1121 }
1122 *cursor++ = UCHAR(value);
1123 }
1124
1125 while (cursor < last) {
1126 *cursor++ = '\0';
1127 }
1128 break;
1129 }
1130 case 'c':
1131 case 't':
1132 case 's':
1133 case 'S':
1134 case 'n':
1135 case 'i':
1136 case 'I':
1137 case 'm':
1138 case 'w':
1139 case 'W':
1140 case 'r':
1141 case 'R':
1142 case 'd':
1143 case 'q':
1144 case 'Q':
1145 case 'f': {
1146 int listc, i;
1147 Tcl_Obj **listv;
1148
1149 if (count == BINARY_NOCOUNT) {
1150 /*
1151 * Note that we are casting away the const-ness of objv, but
1152 * this is safe since we aren't going to modify the array.
1153 */
1154
1155 listv = (Tcl_Obj **) (objv + arg);
1156 listc = 1;
1157 count = 1;
1158 } else {
1159 TclListObjGetElements(interp, objv[arg], &listc, &listv);
1160 if (count == BINARY_ALL) {
1161 count = listc;
1162 }
1163 }
1164 arg++;
1165 for (i = 0; i < count; i++) {
1166 if (FormatNumber(interp, cmd, listv[i], &cursor) != TCL_OK) {
1167 Tcl_DecrRefCount(resultPtr);
1168 return TCL_ERROR;
1169 }
1170 }
1171 break;
1172 }
1173 case 'x':
1174 if (count == BINARY_NOCOUNT) {
1175 count = 1;
1176 }
1177 memset(cursor, 0, count);
1178 cursor += count;
1179 break;
1180 case 'X':
1181 if (cursor > maxPos) {
1182 maxPos = cursor;
1183 }
1184 if (count == BINARY_NOCOUNT) {
1185 count = 1;
1186 }
1187 if ((count == BINARY_ALL) || (count > (cursor - buffer))) {
1188 cursor = buffer;
1189 } else {
1190 cursor -= count;
1191 }
1192 break;
1193 case '@':
1194 if (cursor > maxPos) {
1195 maxPos = cursor;
1196 }
1197 if (count == BINARY_ALL) {
1198 cursor = maxPos;
1199 } else {
1200 cursor = buffer + count;
1201 }
1202 break;
1203 }
1204 }
1205 Tcl_SetObjResult(interp, resultPtr);
1206 return TCL_OK;
1207
1208 badValue:
1209 Tcl_ResetResult(interp);
1210 Tcl_SetObjResult(interp, Tcl_ObjPrintf(
1211 "expected %s string but got \"%s\" instead",
1212 errorString, errorValue));
1213 return TCL_ERROR;
1214
1215 badCount:
1216 errorString = "missing count for \"@\" field specifier";
1217 goto error;
1218
1219 badIndex:
1220 errorString = "not enough arguments for all format specifiers";
1221 goto error;
1222
1223 badField:
1224 {
1225 int ch;
1226 char buf[8] = "";
1227
1228 TclUtfToUCS4(errorString, &ch);
1229 buf[TclUCS4ToUtf(ch, buf)] = '\0';
1230 Tcl_SetObjResult(interp, Tcl_ObjPrintf(
1231 "bad field specifier \"%s\"", buf));
1232 return TCL_ERROR;
1233 }
1234
1235 error:
1236 Tcl_SetObjResult(interp, Tcl_NewStringObj(errorString, -1));
1237 return TCL_ERROR;
1238 }
1239
1240 /*
1241 *----------------------------------------------------------------------
1242 *
1243 * BinaryScanCmd --
1244 *
1245 * This procedure implements the "binary scan" Tcl command.
1246 *
1247 * Results:
1248 * A standard Tcl result.
1249 *
1250 * Side effects:
1251 * See the user documentation.
1252 *
1253 *----------------------------------------------------------------------
1254 */
1255
1256 int
BinaryScanCmd(ClientData dummy,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])1257 BinaryScanCmd(
1258 ClientData dummy, /* Not used. */
1259 Tcl_Interp *interp, /* Current interpreter. */
1260 int objc, /* Number of arguments. */
1261 Tcl_Obj *const objv[]) /* Argument objects. */
1262 {
1263 int arg; /* Index of next argument to consume. */
1264 int value = 0; /* Current integer value to be packed.
1265 * Initialized to avoid compiler warning. */
1266 char cmd; /* Current format character. */
1267 int count; /* Count associated with current format
1268 * character. */
1269 int flags; /* Format field flags */
1270 const char *format; /* Pointer to current position in format
1271 * string. */
1272 Tcl_Obj *resultPtr = NULL; /* Object holding result buffer. */
1273 unsigned char *buffer; /* Start of result buffer. */
1274 const char *errorString;
1275 const char *str;
1276 int offset, size, length;
1277
1278 int i;
1279 Tcl_Obj *valuePtr, *elementPtr;
1280 Tcl_HashTable numberCacheHash;
1281 Tcl_HashTable *numberCachePtr;
1282
1283 if (objc < 3) {
1284 Tcl_WrongNumArgs(interp, 1, objv,
1285 "value formatString ?varName ...?");
1286 return TCL_ERROR;
1287 }
1288 numberCachePtr = &numberCacheHash;
1289 Tcl_InitHashTable(numberCachePtr, TCL_ONE_WORD_KEYS);
1290 buffer = Tcl_GetByteArrayFromObj(objv[1], &length);
1291 format = TclGetString(objv[2]);
1292 arg = 3;
1293 offset = 0;
1294 while (*format != '\0') {
1295 str = format;
1296 flags = 0;
1297 if (!GetFormatSpec(&format, &cmd, &count, &flags)) {
1298 goto done;
1299 }
1300 switch (cmd) {
1301 case 'a':
1302 case 'A': {
1303 unsigned char *src;
1304
1305 if (arg >= objc) {
1306 DeleteScanNumberCache(numberCachePtr);
1307 goto badIndex;
1308 }
1309 if (count == BINARY_ALL) {
1310 count = length - offset;
1311 } else {
1312 if (count == BINARY_NOCOUNT) {
1313 count = 1;
1314 }
1315 if (count > (length - offset)) {
1316 goto done;
1317 }
1318 }
1319
1320 src = buffer + offset;
1321 size = count;
1322
1323 /*
1324 * Trim trailing nulls and spaces, if necessary.
1325 */
1326
1327 if (cmd == 'A') {
1328 while (size > 0) {
1329 if (src[size - 1] != '\0' && src[size - 1] != ' ') {
1330 break;
1331 }
1332 size--;
1333 }
1334 }
1335
1336 /*
1337 * Have to do this #ifdef-fery because (as part of defining
1338 * Tcl_NewByteArrayObj) we removed the #def that hides this stuff
1339 * normally. If this code ever gets copied to another file, it
1340 * should be changed back to the simpler version.
1341 */
1342
1343 #ifdef TCL_MEM_DEBUG
1344 valuePtr = Tcl_DbNewByteArrayObj(src, size, __FILE__, __LINE__);
1345 #else
1346 valuePtr = Tcl_NewByteArrayObj(src, size);
1347 #endif /* TCL_MEM_DEBUG */
1348
1349 resultPtr = Tcl_ObjSetVar2(interp, objv[arg], NULL, valuePtr,
1350 TCL_LEAVE_ERR_MSG);
1351 arg++;
1352 if (resultPtr == NULL) {
1353 DeleteScanNumberCache(numberCachePtr);
1354 return TCL_ERROR;
1355 }
1356 offset += count;
1357 break;
1358 }
1359 case 'b':
1360 case 'B': {
1361 unsigned char *src;
1362 char *dest;
1363
1364 if (arg >= objc) {
1365 DeleteScanNumberCache(numberCachePtr);
1366 goto badIndex;
1367 }
1368 if (count == BINARY_ALL) {
1369 count = (length - offset) * 8;
1370 } else {
1371 if (count == BINARY_NOCOUNT) {
1372 count = 1;
1373 }
1374 if (count > (length - offset) * 8) {
1375 goto done;
1376 }
1377 }
1378 src = buffer + offset;
1379 valuePtr = Tcl_NewObj();
1380 Tcl_SetObjLength(valuePtr, count);
1381 dest = TclGetString(valuePtr);
1382
1383 if (cmd == 'b') {
1384 for (i = 0; i < count; i++) {
1385 if (i % 8) {
1386 value >>= 1;
1387 } else {
1388 value = *src++;
1389 }
1390 *dest++ = (char) ((value & 1) ? '1' : '0');
1391 }
1392 } else {
1393 for (i = 0; i < count; i++) {
1394 if (i % 8) {
1395 value <<= 1;
1396 } else {
1397 value = *src++;
1398 }
1399 *dest++ = (char) ((value & 0x80) ? '1' : '0');
1400 }
1401 }
1402
1403 resultPtr = Tcl_ObjSetVar2(interp, objv[arg], NULL, valuePtr,
1404 TCL_LEAVE_ERR_MSG);
1405 arg++;
1406 if (resultPtr == NULL) {
1407 DeleteScanNumberCache(numberCachePtr);
1408 return TCL_ERROR;
1409 }
1410 offset += (count + 7) / 8;
1411 break;
1412 }
1413 case 'h':
1414 case 'H': {
1415 char *dest;
1416 unsigned char *src;
1417 static const char hexdigit[] = "0123456789abcdef";
1418
1419 if (arg >= objc) {
1420 DeleteScanNumberCache(numberCachePtr);
1421 goto badIndex;
1422 }
1423 if (count == BINARY_ALL) {
1424 count = (length - offset)*2;
1425 } else {
1426 if (count == BINARY_NOCOUNT) {
1427 count = 1;
1428 }
1429 if (count > (length - offset)*2) {
1430 goto done;
1431 }
1432 }
1433 src = buffer + offset;
1434 valuePtr = Tcl_NewObj();
1435 Tcl_SetObjLength(valuePtr, count);
1436 dest = TclGetString(valuePtr);
1437
1438 if (cmd == 'h') {
1439 for (i = 0; i < count; i++) {
1440 if (i % 2) {
1441 value >>= 4;
1442 } else {
1443 value = *src++;
1444 }
1445 *dest++ = hexdigit[value & 0xF];
1446 }
1447 } else {
1448 for (i = 0; i < count; i++) {
1449 if (i % 2) {
1450 value <<= 4;
1451 } else {
1452 value = *src++;
1453 }
1454 *dest++ = hexdigit[(value >> 4) & 0xF];
1455 }
1456 }
1457
1458 resultPtr = Tcl_ObjSetVar2(interp, objv[arg], NULL, valuePtr,
1459 TCL_LEAVE_ERR_MSG);
1460 arg++;
1461 if (resultPtr == NULL) {
1462 DeleteScanNumberCache(numberCachePtr);
1463 return TCL_ERROR;
1464 }
1465 offset += (count + 1) / 2;
1466 break;
1467 }
1468 case 'c':
1469 size = 1;
1470 goto scanNumber;
1471 case 't':
1472 case 's':
1473 case 'S':
1474 size = 2;
1475 goto scanNumber;
1476 case 'n':
1477 case 'i':
1478 case 'I':
1479 size = 4;
1480 goto scanNumber;
1481 case 'm':
1482 case 'w':
1483 case 'W':
1484 size = 8;
1485 goto scanNumber;
1486 case 'r':
1487 case 'R':
1488 case 'f':
1489 size = sizeof(float);
1490 goto scanNumber;
1491 case 'q':
1492 case 'Q':
1493 case 'd': {
1494 unsigned char *src;
1495
1496 size = sizeof(double);
1497 /* fall through */
1498
1499 scanNumber:
1500 if (arg >= objc) {
1501 DeleteScanNumberCache(numberCachePtr);
1502 goto badIndex;
1503 }
1504 if (count == BINARY_NOCOUNT) {
1505 if ((length - offset) < size) {
1506 goto done;
1507 }
1508 valuePtr = ScanNumber(buffer+offset, cmd, flags,
1509 &numberCachePtr);
1510 offset += size;
1511 } else {
1512 if (count == BINARY_ALL) {
1513 count = (length - offset) / size;
1514 }
1515 if ((length - offset) < (count * size)) {
1516 goto done;
1517 }
1518 valuePtr = Tcl_NewObj();
1519 src = buffer + offset;
1520 for (i = 0; i < count; i++) {
1521 elementPtr = ScanNumber(src, cmd, flags, &numberCachePtr);
1522 src += size;
1523 Tcl_ListObjAppendElement(NULL, valuePtr, elementPtr);
1524 }
1525 offset += count * size;
1526 }
1527
1528 resultPtr = Tcl_ObjSetVar2(interp, objv[arg], NULL, valuePtr,
1529 TCL_LEAVE_ERR_MSG);
1530 arg++;
1531 if (resultPtr == NULL) {
1532 DeleteScanNumberCache(numberCachePtr);
1533 return TCL_ERROR;
1534 }
1535 break;
1536 }
1537 case 'x':
1538 if (count == BINARY_NOCOUNT) {
1539 count = 1;
1540 }
1541 if ((count == BINARY_ALL) || (count > (length - offset))) {
1542 offset = length;
1543 } else {
1544 offset += count;
1545 }
1546 break;
1547 case 'X':
1548 if (count == BINARY_NOCOUNT) {
1549 count = 1;
1550 }
1551 if ((count == BINARY_ALL) || (count > offset)) {
1552 offset = 0;
1553 } else {
1554 offset -= count;
1555 }
1556 break;
1557 case '@':
1558 if (count == BINARY_NOCOUNT) {
1559 DeleteScanNumberCache(numberCachePtr);
1560 goto badCount;
1561 }
1562 if ((count == BINARY_ALL) || (count > length)) {
1563 offset = length;
1564 } else {
1565 offset = count;
1566 }
1567 break;
1568 default:
1569 DeleteScanNumberCache(numberCachePtr);
1570 errorString = str;
1571 goto badField;
1572 }
1573 }
1574
1575 /*
1576 * Set the result to the last position of the cursor.
1577 */
1578
1579 done:
1580 Tcl_SetObjResult(interp, Tcl_NewLongObj(arg - 3));
1581 DeleteScanNumberCache(numberCachePtr);
1582
1583 return TCL_OK;
1584
1585 badCount:
1586 errorString = "missing count for \"@\" field specifier";
1587 goto error;
1588
1589 badIndex:
1590 errorString = "not enough arguments for all format specifiers";
1591 goto error;
1592
1593 badField:
1594 {
1595 int ch;
1596 char buf[8] = "";
1597
1598 TclUtfToUCS4(errorString, &ch);
1599 buf[TclUCS4ToUtf(ch, buf)] = '\0';
1600 Tcl_SetObjResult(interp, Tcl_ObjPrintf(
1601 "bad field specifier \"%s\"", buf));
1602 return TCL_ERROR;
1603 }
1604
1605 error:
1606 Tcl_SetObjResult(interp, Tcl_NewStringObj(errorString, -1));
1607 return TCL_ERROR;
1608 }
1609
1610 /*
1611 *----------------------------------------------------------------------
1612 *
1613 * GetFormatSpec --
1614 *
1615 * This function parses the format strings used in the binary format and
1616 * scan commands.
1617 *
1618 * Results:
1619 * Moves the formatPtr to the start of the next command. Returns the
1620 * current command character and count in cmdPtr and countPtr. The count
1621 * is set to BINARY_ALL if the count character was '*' or BINARY_NOCOUNT
1622 * if no count was specified. Returns 1 on success, or 0 if the string
1623 * did not have a format specifier.
1624 *
1625 * Side effects:
1626 * None.
1627 *
1628 *----------------------------------------------------------------------
1629 */
1630
1631 static int
GetFormatSpec(const char ** formatPtr,char * cmdPtr,int * countPtr,int * flagsPtr)1632 GetFormatSpec(
1633 const char **formatPtr, /* Pointer to format string. */
1634 char *cmdPtr, /* Pointer to location of command char. */
1635 int *countPtr, /* Pointer to repeat count value. */
1636 int *flagsPtr) /* Pointer to field flags */
1637 {
1638 /*
1639 * Skip any leading blanks.
1640 */
1641
1642 while (**formatPtr == ' ') {
1643 (*formatPtr)++;
1644 }
1645
1646 /*
1647 * The string was empty, except for whitespace, so fail.
1648 */
1649
1650 if (!(**formatPtr)) {
1651 return 0;
1652 }
1653
1654 /*
1655 * Extract the command character and any trailing digits or '*'.
1656 */
1657
1658 *cmdPtr = **formatPtr;
1659 (*formatPtr)++;
1660 if (**formatPtr == 'u') {
1661 (*formatPtr)++;
1662 *flagsPtr |= BINARY_UNSIGNED;
1663 }
1664 if (**formatPtr == '*') {
1665 (*formatPtr)++;
1666 *countPtr = BINARY_ALL;
1667 } else if (isdigit(UCHAR(**formatPtr))) { /* INTL: digit */
1668 unsigned long int count;
1669
1670 errno = 0;
1671 count = strtoul(*formatPtr, (char **) formatPtr, 10);
1672 if (errno || (count > (unsigned long) INT_MAX)) {
1673 *countPtr = INT_MAX;
1674 } else {
1675 *countPtr = (int) count;
1676 }
1677 } else {
1678 *countPtr = BINARY_NOCOUNT;
1679 }
1680 return 1;
1681 }
1682
1683 /*
1684 *----------------------------------------------------------------------
1685 *
1686 * NeedReversing --
1687 *
1688 * This routine determines, if bytes of a number need to be re-ordered,
1689 * and returns a numeric code indicating the re-ordering to be done.
1690 * This depends on the endiannes of the machine and the desired format.
1691 * It is in effect a table (whose contents depend on the endianness of
1692 * the system) describing whether a value needs reversing or not. Anyone
1693 * porting the code to a big-endian platform should take care to make
1694 * sure that they define WORDS_BIGENDIAN though this is already done by
1695 * configure for the Unix build; little-endian platforms (including
1696 * Windows) don't need to do anything.
1697 *
1698 * Results:
1699 * 0 No re-ordering needed.
1700 * 1 Reverse the bytes: 01234567 <-> 76543210 (little to big)
1701 * 2 Apply this re-ordering: 01234567 <-> 45670123 (Nokia to little)
1702 * 3 Apply this re-ordering: 01234567 <-> 32107654 (Nokia to big)
1703 *
1704 * Side effects:
1705 * None
1706 *
1707 *----------------------------------------------------------------------
1708 */
1709
1710 static int
NeedReversing(int format)1711 NeedReversing(
1712 int format)
1713 {
1714 switch (format) {
1715 /* native floats and doubles: never reverse */
1716 case 'd':
1717 case 'f':
1718 /* big endian ints: never reverse */
1719 case 'I':
1720 case 'S':
1721 case 'W':
1722 #ifdef WORDS_BIGENDIAN
1723 /* native ints: reverse if we're little-endian */
1724 case 'n':
1725 case 't':
1726 case 'm':
1727 /* f: reverse if we're little-endian */
1728 case 'Q':
1729 case 'R':
1730 #else /* !WORDS_BIGENDIAN */
1731 /* small endian floats: reverse if we're big-endian */
1732 case 'r':
1733 #endif /* WORDS_BIGENDIAN */
1734 return 0;
1735
1736 #ifdef WORDS_BIGENDIAN
1737 /* small endian floats: reverse if we're big-endian */
1738 case 'q':
1739 case 'r':
1740 #else /* !WORDS_BIGENDIAN */
1741 /* native ints: reverse if we're little-endian */
1742 case 'n':
1743 case 't':
1744 case 'm':
1745 /* f: reverse if we're little-endian */
1746 case 'R':
1747 #endif /* WORDS_BIGENDIAN */
1748 /* small endian ints: always reverse */
1749 case 'i':
1750 case 's':
1751 case 'w':
1752 return 1;
1753
1754 #ifndef WORDS_BIGENDIAN
1755 /*
1756 * The Q and q formats need special handling to account for the unusual
1757 * byte ordering of 8-byte floats on Nokia 770 systems, which claim to be
1758 * little-endian, but also reverse word order.
1759 */
1760
1761 case 'Q':
1762 if (TclNokia770Doubles()) {
1763 return 3;
1764 }
1765 return 1;
1766 case 'q':
1767 if (TclNokia770Doubles()) {
1768 return 2;
1769 }
1770 return 0;
1771 #endif
1772 }
1773
1774 Tcl_Panic("unexpected fallthrough");
1775 return 0;
1776 }
1777
1778 /*
1779 *----------------------------------------------------------------------
1780 *
1781 * CopyNumber --
1782 *
1783 * This routine is called by FormatNumber and ScanNumber to copy a
1784 * floating-point number. If required, bytes are reversed while copying.
1785 * The behaviour is only fully defined when used with IEEE float and
1786 * double values (guaranteed to be 4 and 8 bytes long, respectively.)
1787 *
1788 * Results:
1789 * None
1790 *
1791 * Side effects:
1792 * Copies length bytes
1793 *
1794 *----------------------------------------------------------------------
1795 */
1796
1797 static void
CopyNumber(const void * from,void * to,unsigned length,int type)1798 CopyNumber(
1799 const void *from, /* source */
1800 void *to, /* destination */
1801 unsigned length, /* Number of bytes to copy */
1802 int type) /* What type of thing are we copying? */
1803 {
1804 switch (NeedReversing(type)) {
1805 case 0:
1806 memcpy(to, from, length);
1807 break;
1808 case 1: {
1809 const unsigned char *fromPtr = (const unsigned char *)from;
1810 unsigned char *toPtr = (unsigned char *)to;
1811
1812 switch (length) {
1813 case 4:
1814 toPtr[0] = fromPtr[3];
1815 toPtr[1] = fromPtr[2];
1816 toPtr[2] = fromPtr[1];
1817 toPtr[3] = fromPtr[0];
1818 break;
1819 case 8:
1820 toPtr[0] = fromPtr[7];
1821 toPtr[1] = fromPtr[6];
1822 toPtr[2] = fromPtr[5];
1823 toPtr[3] = fromPtr[4];
1824 toPtr[4] = fromPtr[3];
1825 toPtr[5] = fromPtr[2];
1826 toPtr[6] = fromPtr[1];
1827 toPtr[7] = fromPtr[0];
1828 break;
1829 }
1830 break;
1831 }
1832 case 2: {
1833 const unsigned char *fromPtr = (const unsigned char *)from;
1834 unsigned char *toPtr = (unsigned char *)to;
1835
1836 toPtr[0] = fromPtr[4];
1837 toPtr[1] = fromPtr[5];
1838 toPtr[2] = fromPtr[6];
1839 toPtr[3] = fromPtr[7];
1840 toPtr[4] = fromPtr[0];
1841 toPtr[5] = fromPtr[1];
1842 toPtr[6] = fromPtr[2];
1843 toPtr[7] = fromPtr[3];
1844 break;
1845 }
1846 case 3: {
1847 const unsigned char *fromPtr = (const unsigned char *)from;
1848 unsigned char *toPtr = (unsigned char *)to;
1849
1850 toPtr[0] = fromPtr[3];
1851 toPtr[1] = fromPtr[2];
1852 toPtr[2] = fromPtr[1];
1853 toPtr[3] = fromPtr[0];
1854 toPtr[4] = fromPtr[7];
1855 toPtr[5] = fromPtr[6];
1856 toPtr[6] = fromPtr[5];
1857 toPtr[7] = fromPtr[4];
1858 break;
1859 }
1860 }
1861 }
1862
1863 /*
1864 *----------------------------------------------------------------------
1865 *
1866 * FormatNumber --
1867 *
1868 * This routine is called by Tcl_BinaryObjCmd to format a number into a
1869 * location pointed at by cursor.
1870 *
1871 * Results:
1872 * A standard Tcl result.
1873 *
1874 * Side effects:
1875 * Moves the cursor to the next location to be written into.
1876 *
1877 *----------------------------------------------------------------------
1878 */
1879
1880 static int
FormatNumber(Tcl_Interp * interp,int type,Tcl_Obj * src,unsigned char ** cursorPtr)1881 FormatNumber(
1882 Tcl_Interp *interp, /* Current interpreter, used to report
1883 * errors. */
1884 int type, /* Type of number to format. */
1885 Tcl_Obj *src, /* Number to format. */
1886 unsigned char **cursorPtr) /* Pointer to index into destination buffer. */
1887 {
1888 long value;
1889 double dvalue;
1890 Tcl_WideInt wvalue;
1891 float fvalue;
1892
1893 switch (type) {
1894 case 'd':
1895 case 'q':
1896 case 'Q':
1897 /*
1898 * Double-precision floating point values. Tcl_GetDoubleFromObj
1899 * returns TCL_ERROR for NaN, but we can check by comparing the
1900 * object's type pointer.
1901 */
1902
1903 if (Tcl_GetDoubleFromObj(interp, src, &dvalue) != TCL_OK) {
1904 if (src->typePtr != &tclDoubleType) {
1905 return TCL_ERROR;
1906 }
1907 dvalue = src->internalRep.doubleValue;
1908 }
1909 CopyNumber(&dvalue, *cursorPtr, sizeof(double), type);
1910 *cursorPtr += sizeof(double);
1911 return TCL_OK;
1912
1913 case 'f':
1914 case 'r':
1915 case 'R':
1916 /*
1917 * Single-precision floating point values. Tcl_GetDoubleFromObj
1918 * returns TCL_ERROR for NaN, but we can check by comparing the
1919 * object's type pointer.
1920 */
1921
1922 if (Tcl_GetDoubleFromObj(interp, src, &dvalue) != TCL_OK) {
1923 if (src->typePtr != &tclDoubleType) {
1924 return TCL_ERROR;
1925 }
1926 dvalue = src->internalRep.doubleValue;
1927 }
1928
1929 /*
1930 * Because some compilers will generate floating point exceptions on
1931 * an overflow cast (e.g. Borland), we restrict the values to the
1932 * valid range for float.
1933 */
1934
1935 if (fabs(dvalue) > (double) FLT_MAX) {
1936 fvalue = (dvalue >= 0.0) ? FLT_MAX : -FLT_MAX;
1937 } else {
1938 fvalue = (float) dvalue;
1939 }
1940 CopyNumber(&fvalue, *cursorPtr, sizeof(float), type);
1941 *cursorPtr += sizeof(float);
1942 return TCL_OK;
1943
1944 /*
1945 * 64-bit integer values.
1946 */
1947 case 'w':
1948 case 'W':
1949 case 'm':
1950 if (Tcl_GetWideIntFromObj(interp, src, &wvalue) != TCL_OK) {
1951 return TCL_ERROR;
1952 }
1953 if (NeedReversing(type)) {
1954 *(*cursorPtr)++ = UCHAR(wvalue);
1955 *(*cursorPtr)++ = UCHAR(wvalue >> 8);
1956 *(*cursorPtr)++ = UCHAR(wvalue >> 16);
1957 *(*cursorPtr)++ = UCHAR(wvalue >> 24);
1958 *(*cursorPtr)++ = UCHAR(wvalue >> 32);
1959 *(*cursorPtr)++ = UCHAR(wvalue >> 40);
1960 *(*cursorPtr)++ = UCHAR(wvalue >> 48);
1961 *(*cursorPtr)++ = UCHAR(wvalue >> 56);
1962 } else {
1963 *(*cursorPtr)++ = UCHAR(wvalue >> 56);
1964 *(*cursorPtr)++ = UCHAR(wvalue >> 48);
1965 *(*cursorPtr)++ = UCHAR(wvalue >> 40);
1966 *(*cursorPtr)++ = UCHAR(wvalue >> 32);
1967 *(*cursorPtr)++ = UCHAR(wvalue >> 24);
1968 *(*cursorPtr)++ = UCHAR(wvalue >> 16);
1969 *(*cursorPtr)++ = UCHAR(wvalue >> 8);
1970 *(*cursorPtr)++ = UCHAR(wvalue);
1971 }
1972 return TCL_OK;
1973
1974 /*
1975 * 32-bit integer values.
1976 */
1977 case 'i':
1978 case 'I':
1979 case 'n':
1980 if (TclGetLongFromObj(interp, src, &value) != TCL_OK) {
1981 return TCL_ERROR;
1982 }
1983 if (NeedReversing(type)) {
1984 *(*cursorPtr)++ = UCHAR(value);
1985 *(*cursorPtr)++ = UCHAR(value >> 8);
1986 *(*cursorPtr)++ = UCHAR(value >> 16);
1987 *(*cursorPtr)++ = UCHAR(value >> 24);
1988 } else {
1989 *(*cursorPtr)++ = UCHAR(value >> 24);
1990 *(*cursorPtr)++ = UCHAR(value >> 16);
1991 *(*cursorPtr)++ = UCHAR(value >> 8);
1992 *(*cursorPtr)++ = UCHAR(value);
1993 }
1994 return TCL_OK;
1995
1996 /*
1997 * 16-bit integer values.
1998 */
1999 case 's':
2000 case 'S':
2001 case 't':
2002 if (TclGetLongFromObj(interp, src, &value) != TCL_OK) {
2003 return TCL_ERROR;
2004 }
2005 if (NeedReversing(type)) {
2006 *(*cursorPtr)++ = UCHAR(value);
2007 *(*cursorPtr)++ = UCHAR(value >> 8);
2008 } else {
2009 *(*cursorPtr)++ = UCHAR(value >> 8);
2010 *(*cursorPtr)++ = UCHAR(value);
2011 }
2012 return TCL_OK;
2013
2014 /*
2015 * 8-bit integer values.
2016 */
2017 case 'c':
2018 if (TclGetLongFromObj(interp, src, &value) != TCL_OK) {
2019 return TCL_ERROR;
2020 }
2021 *(*cursorPtr)++ = UCHAR(value);
2022 return TCL_OK;
2023
2024 default:
2025 Tcl_Panic("unexpected fallthrough");
2026 return TCL_ERROR;
2027 }
2028 }
2029
2030 /*
2031 *----------------------------------------------------------------------
2032 *
2033 * ScanNumber --
2034 *
2035 * This routine is called by Tcl_BinaryObjCmd to scan a number out of a
2036 * buffer.
2037 *
2038 * Results:
2039 * Returns a newly created object containing the scanned number. This
2040 * object has a ref count of zero.
2041 *
2042 * Side effects:
2043 * Might reuse an object in the number cache, place a new object in the
2044 * cache, or delete the cache and set the reference to it (itself passed
2045 * in by reference) to NULL.
2046 *
2047 *----------------------------------------------------------------------
2048 */
2049
2050 static Tcl_Obj *
ScanNumber(unsigned char * buffer,int type,int flags,Tcl_HashTable ** numberCachePtrPtr)2051 ScanNumber(
2052 unsigned char *buffer, /* Buffer to scan number from. */
2053 int type, /* Format character from "binary scan" */
2054 int flags, /* Format field flags */
2055 Tcl_HashTable **numberCachePtrPtr)
2056 /* Place to look for cache of scanned value
2057 * objects, or NULL if too many different
2058 * numbers have been scanned. */
2059 {
2060 long value;
2061 float fvalue;
2062 double dvalue;
2063 Tcl_WideUInt uwvalue;
2064
2065 /*
2066 * We cannot rely on the compiler to properly sign extend integer values
2067 * when we cast from smaller values to larger values because we don't know
2068 * the exact size of the integer types. So, we have to handle sign
2069 * extension explicitly by checking the high bit and padding with 1's as
2070 * needed. This practice is disabled if the BINARY_UNSIGNED flag is set.
2071 */
2072
2073 switch (type) {
2074 case 'c':
2075 /*
2076 * Characters need special handling. We want to produce a signed
2077 * result, but on some platforms (such as AIX) chars are unsigned. To
2078 * deal with this, check for a value that should be negative but
2079 * isn't.
2080 */
2081
2082 value = buffer[0];
2083 if (!(flags & BINARY_UNSIGNED)) {
2084 if (value & 0x80) {
2085 value |= -0x100;
2086 }
2087 }
2088 goto returnNumericObject;
2089
2090 /*
2091 * 16-bit numeric values. We need the sign extension trick (see above)
2092 * here as well.
2093 */
2094
2095 case 's':
2096 case 'S':
2097 case 't':
2098 if (NeedReversing(type)) {
2099 value = (long) (buffer[0] + (buffer[1] << 8));
2100 } else {
2101 value = (long) (buffer[1] + (buffer[0] << 8));
2102 }
2103 if (!(flags & BINARY_UNSIGNED)) {
2104 if (value & 0x8000) {
2105 value |= -0x10000;
2106 }
2107 }
2108 goto returnNumericObject;
2109
2110 /*
2111 * 32-bit numeric values.
2112 */
2113
2114 case 'i':
2115 case 'I':
2116 case 'n':
2117 if (NeedReversing(type)) {
2118 value = (long) (buffer[0]
2119 + (buffer[1] << 8)
2120 + (buffer[2] << 16)
2121 + (((long)buffer[3]) << 24));
2122 } else {
2123 value = (long) (buffer[3]
2124 + (buffer[2] << 8)
2125 + (buffer[1] << 16)
2126 + (((long) buffer[0]) << 24));
2127 }
2128
2129 /*
2130 * Check to see if the value was sign extended properly on systems
2131 * where an int is more than 32-bits.
2132 *
2133 * We avoid caching unsigned integers as we cannot distinguish between
2134 * 32bit signed and unsigned in the hash (short and char are ok).
2135 */
2136
2137 if (flags & BINARY_UNSIGNED) {
2138 return Tcl_NewWideIntObj((Tcl_WideInt)(unsigned long)value);
2139 }
2140 if ((value & (((unsigned) 1) << 31)) && (value > 0)) {
2141 value -= (((unsigned) 1) << 31);
2142 value -= (((unsigned) 1) << 31);
2143 }
2144
2145 returnNumericObject:
2146 if (*numberCachePtrPtr == NULL) {
2147 return Tcl_NewLongObj(value);
2148 } else {
2149 Tcl_HashTable *tablePtr = *numberCachePtrPtr;
2150 Tcl_HashEntry *hPtr;
2151 int isNew;
2152
2153 hPtr = Tcl_CreateHashEntry(tablePtr, INT2PTR(value), &isNew);
2154 if (!isNew) {
2155 return (Tcl_Obj *)Tcl_GetHashValue(hPtr);
2156 }
2157 if (tablePtr->numEntries <= BINARY_SCAN_MAX_CACHE) {
2158 Tcl_Obj *objPtr = Tcl_NewLongObj(value);
2159
2160 Tcl_IncrRefCount(objPtr);
2161 Tcl_SetHashValue(hPtr, objPtr);
2162 return objPtr;
2163 }
2164
2165 /*
2166 * We've overflowed the cache! Someone's parsing a LOT of varied
2167 * binary data in a single call! Bail out by switching back to the
2168 * old behaviour for the rest of the scan.
2169 *
2170 * Note that anyone just using the 'c' conversion (for bytes)
2171 * cannot trigger this.
2172 */
2173
2174 DeleteScanNumberCache(tablePtr);
2175 *numberCachePtrPtr = NULL;
2176 return Tcl_NewLongObj(value);
2177 }
2178
2179 /*
2180 * Do not cache wide (64-bit) values; they are already too large to
2181 * use as keys.
2182 */
2183
2184 case 'w':
2185 case 'W':
2186 case 'm':
2187 if (NeedReversing(type)) {
2188 uwvalue = ((Tcl_WideUInt) buffer[0])
2189 | (((Tcl_WideUInt) buffer[1]) << 8)
2190 | (((Tcl_WideUInt) buffer[2]) << 16)
2191 | (((Tcl_WideUInt) buffer[3]) << 24)
2192 | (((Tcl_WideUInt) buffer[4]) << 32)
2193 | (((Tcl_WideUInt) buffer[5]) << 40)
2194 | (((Tcl_WideUInt) buffer[6]) << 48)
2195 | (((Tcl_WideUInt) buffer[7]) << 56);
2196 } else {
2197 uwvalue = ((Tcl_WideUInt) buffer[7])
2198 | (((Tcl_WideUInt) buffer[6]) << 8)
2199 | (((Tcl_WideUInt) buffer[5]) << 16)
2200 | (((Tcl_WideUInt) buffer[4]) << 24)
2201 | (((Tcl_WideUInt) buffer[3]) << 32)
2202 | (((Tcl_WideUInt) buffer[2]) << 40)
2203 | (((Tcl_WideUInt) buffer[1]) << 48)
2204 | (((Tcl_WideUInt) buffer[0]) << 56);
2205 }
2206 if (flags & BINARY_UNSIGNED) {
2207 Tcl_Obj *bigObj = NULL;
2208 mp_int big;
2209
2210 TclBNInitBignumFromWideUInt(&big, uwvalue);
2211 bigObj = Tcl_NewBignumObj(&big);
2212 return bigObj;
2213 }
2214 return Tcl_NewWideIntObj((Tcl_WideInt) uwvalue);
2215
2216 /*
2217 * Do not cache double values; they are already too large to use as
2218 * keys and the values stored are utterly incompatible with the
2219 * integer part of the cache.
2220 */
2221
2222 /*
2223 * 32-bit IEEE single-precision floating point.
2224 */
2225
2226 case 'f':
2227 case 'R':
2228 case 'r':
2229 CopyNumber(buffer, &fvalue, sizeof(float), type);
2230 return Tcl_NewDoubleObj(fvalue);
2231
2232 /*
2233 * 64-bit IEEE double-precision floating point.
2234 */
2235
2236 case 'd':
2237 case 'Q':
2238 case 'q':
2239 CopyNumber(buffer, &dvalue, sizeof(double), type);
2240 return Tcl_NewDoubleObj(dvalue);
2241 }
2242 return NULL;
2243 }
2244
2245 /*
2246 *----------------------------------------------------------------------
2247 *
2248 * DeleteScanNumberCache --
2249 *
2250 * Deletes the hash table acting as a scan number cache.
2251 *
2252 * Results:
2253 * None
2254 *
2255 * Side effects:
2256 * Decrements the reference counts of the objects in the cache.
2257 *
2258 *----------------------------------------------------------------------
2259 */
2260
2261 static void
DeleteScanNumberCache(Tcl_HashTable * numberCachePtr)2262 DeleteScanNumberCache(
2263 Tcl_HashTable *numberCachePtr)
2264 /* Pointer to the hash table, or NULL (when
2265 * the cache has already been deleted due to
2266 * overflow.) */
2267 {
2268 Tcl_HashEntry *hEntry;
2269 Tcl_HashSearch search;
2270
2271 if (numberCachePtr == NULL) {
2272 return;
2273 }
2274
2275 hEntry = Tcl_FirstHashEntry(numberCachePtr, &search);
2276 while (hEntry != NULL) {
2277 Tcl_Obj *value = (Tcl_Obj *)Tcl_GetHashValue(hEntry);
2278
2279 if (value != NULL) {
2280 Tcl_DecrRefCount(value);
2281 }
2282 hEntry = Tcl_NextHashEntry(&search);
2283 }
2284 Tcl_DeleteHashTable(numberCachePtr);
2285 }
2286
2287 /*
2288 * ----------------------------------------------------------------------
2289 *
2290 * NOTES --
2291 *
2292 * Some measurements show that it is faster to use a table to to perform
2293 * uuencode and base64 value encoding than to calculate the output (at
2294 * least on intel P4 arch).
2295 *
2296 * Conversely using a lookup table for the decoding is slower than just
2297 * calculating the values. We therefore use the fastest of each method.
2298 *
2299 * Presumably this has to do with the size of the tables. The base64
2300 * decode table is 255 bytes while the encode table is only 65 bytes. The
2301 * choice likely depends on CPU memory cache sizes.
2302 */
2303
2304 /*
2305 *----------------------------------------------------------------------
2306 *
2307 * BinaryEncodeHex --
2308 *
2309 * Implement the [binary encode hex] binary encoding. clientData must be
2310 * a table to convert values to hexadecimal digits.
2311 *
2312 * Results:
2313 * Interp result set to an encoded byte array object
2314 *
2315 * Side effects:
2316 * None
2317 *
2318 *----------------------------------------------------------------------
2319 */
2320
2321 static int
BinaryEncodeHex(ClientData clientData,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])2322 BinaryEncodeHex(
2323 ClientData clientData,
2324 Tcl_Interp *interp,
2325 int objc,
2326 Tcl_Obj *const objv[])
2327 {
2328 Tcl_Obj *resultObj = NULL;
2329 unsigned char *data = NULL;
2330 unsigned char *cursor = NULL;
2331 int offset = 0, count = 0;
2332
2333 if (objc != 2) {
2334 Tcl_WrongNumArgs(interp, 1, objv, "data");
2335 return TCL_ERROR;
2336 }
2337
2338 TclNewObj(resultObj);
2339 data = Tcl_GetByteArrayFromObj(objv[1], &count);
2340 cursor = Tcl_SetByteArrayLength(resultObj, count * 2);
2341 for (offset = 0; offset < count; ++offset) {
2342 *cursor++ = HexDigits[(data[offset] >> 4) & 0x0F];
2343 *cursor++ = HexDigits[data[offset] & 0x0F];
2344 }
2345 Tcl_SetObjResult(interp, resultObj);
2346 return TCL_OK;
2347 }
2348
2349 /*
2350 *----------------------------------------------------------------------
2351 *
2352 * BinaryDecodeHex --
2353 *
2354 * Implement the [binary decode hex] binary encoding.
2355 *
2356 * Results:
2357 * Interp result set to an decoded byte array object
2358 *
2359 * Side effects:
2360 * None
2361 *
2362 *----------------------------------------------------------------------
2363 */
2364
2365 static int
BinaryDecodeHex(ClientData clientData,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])2366 BinaryDecodeHex(
2367 ClientData clientData,
2368 Tcl_Interp *interp,
2369 int objc,
2370 Tcl_Obj *const objv[])
2371 {
2372 Tcl_Obj *resultObj = NULL;
2373 unsigned char *data, *datastart, *dataend;
2374 unsigned char *begin, *cursor, c;
2375 int i, index, value, size, pure, count = 0, cut = 0, strict = 0;
2376 Tcl_UniChar ch = 0;
2377 enum {OPT_STRICT };
2378 static const char *const optStrings[] = { "-strict", NULL };
2379
2380 if (objc < 2 || objc > 3) {
2381 Tcl_WrongNumArgs(interp, 1, objv, "?options? data");
2382 return TCL_ERROR;
2383 }
2384 for (i = 1; i < objc - 1; ++i) {
2385 if (Tcl_GetIndexFromObj(interp, objv[i], optStrings, "option",
2386 TCL_EXACT, &index) != TCL_OK) {
2387 return TCL_ERROR;
2388 }
2389 switch (index) {
2390 case OPT_STRICT:
2391 strict = 1;
2392 break;
2393 }
2394 }
2395
2396 TclNewObj(resultObj);
2397 pure = TclIsPureByteArray(objv[objc - 1]);
2398 datastart = data = pure ? Tcl_GetByteArrayFromObj(objv[objc - 1], &count)
2399 : (unsigned char *) TclGetStringFromObj(objv[objc - 1], &count);
2400 dataend = data + count;
2401 size = (count + 1) / 2;
2402 begin = cursor = Tcl_SetByteArrayLength(resultObj, size);
2403 while (data < dataend) {
2404 value = 0;
2405 for (i = 0 ; i < 2 ; i++) {
2406 if (data >= dataend) {
2407 value <<= 4;
2408 break;
2409 }
2410
2411 c = *data++;
2412 if (!isxdigit((int) c)) {
2413 if (strict || !TclIsSpaceProc(c)) {
2414 goto badChar;
2415 }
2416 i--;
2417 continue;
2418 }
2419
2420 value <<= 4;
2421 c -= '0';
2422 if (c > 9) {
2423 c += ('0' - 'A') + 10;
2424 }
2425 if (c > 16) {
2426 c += ('A' - 'a');
2427 }
2428 value |= c & 0xF;
2429 }
2430 if (i < 2) {
2431 cut++;
2432 }
2433 *cursor++ = UCHAR(value);
2434 value = 0;
2435 }
2436 if (cut > size) {
2437 cut = size;
2438 }
2439 Tcl_SetByteArrayLength(resultObj, cursor - begin - cut);
2440 Tcl_SetObjResult(interp, resultObj);
2441 return TCL_OK;
2442
2443 badChar:
2444 if (pure) {
2445 ch = c;
2446 } else {
2447 TclUtfToUniChar((const char *)(data - 1), &ch);
2448 }
2449 TclDecrRefCount(resultObj);
2450 Tcl_SetObjResult(interp, Tcl_ObjPrintf(
2451 "invalid hexadecimal digit \"%c\" at position %d",
2452 ch, (int) (data - datastart - 1)));
2453 Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL);
2454 return TCL_ERROR;
2455 }
2456
2457 /*
2458 *----------------------------------------------------------------------
2459 *
2460 * BinaryEncode64 --
2461 *
2462 * This procedure implements the "binary encode base64" Tcl command.
2463 *
2464 * Results:
2465 * The base64 encoded value prescribed by the input arguments.
2466 *
2467 *----------------------------------------------------------------------
2468 */
2469
2470 #define OUTPUT(c) \
2471 do { \
2472 *cursor++ = (c); \
2473 outindex++; \
2474 if (maxlen > 0 && cursor != limit) { \
2475 if (outindex == maxlen) { \
2476 memcpy(cursor, wrapchar, wrapcharlen); \
2477 cursor += wrapcharlen; \
2478 outindex = 0; \
2479 } \
2480 } \
2481 if (cursor > limit) { \
2482 Tcl_Panic("limit hit"); \
2483 } \
2484 } while (0)
2485
2486 static int
BinaryEncode64(ClientData clientData,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])2487 BinaryEncode64(
2488 ClientData clientData,
2489 Tcl_Interp *interp,
2490 int objc,
2491 Tcl_Obj *const objv[])
2492 {
2493 Tcl_Obj *resultObj;
2494 unsigned char *data, *limit;
2495 int maxlen = 0;
2496 const char *wrapchar = "\n";
2497 int wrapcharlen = 1;
2498 int offset, i, index, size, outindex = 0, count = 0, purewrap = 1;
2499 enum { OPT_MAXLEN, OPT_WRAPCHAR };
2500 static const char *const optStrings[] = { "-maxlen", "-wrapchar", NULL };
2501
2502 if (objc < 2 || objc % 2 != 0) {
2503 Tcl_WrongNumArgs(interp, 1, objv,
2504 "?-maxlen len? ?-wrapchar char? data");
2505 return TCL_ERROR;
2506 }
2507 for (i = 1; i < objc - 1; i += 2) {
2508 if (Tcl_GetIndexFromObj(interp, objv[i], optStrings, "option",
2509 TCL_EXACT, &index) != TCL_OK) {
2510 return TCL_ERROR;
2511 }
2512 switch (index) {
2513 case OPT_MAXLEN:
2514 if (Tcl_GetIntFromObj(interp, objv[i + 1], &maxlen) != TCL_OK) {
2515 return TCL_ERROR;
2516 }
2517 if (maxlen < 0) {
2518 Tcl_SetObjResult(interp, Tcl_NewStringObj(
2519 "line length out of range", -1));
2520 Tcl_SetErrorCode(interp, "TCL", "BINARY", "ENCODE",
2521 "LINE_LENGTH", NULL);
2522 return TCL_ERROR;
2523 }
2524 break;
2525 case OPT_WRAPCHAR:
2526 purewrap = TclIsPureByteArray(objv[i + 1]);
2527 if (purewrap) {
2528 wrapchar = (const char *) Tcl_GetByteArrayFromObj(
2529 objv[i + 1], &wrapcharlen);
2530 } else {
2531 wrapchar = Tcl_GetStringFromObj(objv[i + 1], &wrapcharlen);
2532 }
2533 break;
2534 }
2535 }
2536 if (wrapcharlen == 0) {
2537 maxlen = 0;
2538 }
2539
2540 resultObj = Tcl_NewObj();
2541 data = Tcl_GetByteArrayFromObj(objv[objc - 1], &count);
2542 if (count > 0) {
2543 unsigned char *cursor = NULL;
2544
2545 size = (((count * 4) / 3) + 3) & ~3; /* ensure 4 byte chunks */
2546 if (maxlen > 0 && size > maxlen) {
2547 int adjusted = size + (wrapcharlen * (size / maxlen));
2548
2549 if (size % maxlen == 0) {
2550 adjusted -= wrapcharlen;
2551 }
2552 size = adjusted;
2553
2554 if (purewrap == 0) {
2555 /* Wrapchar is (possibly) non-byte, so build result as
2556 * general string, not bytearray */
2557 Tcl_SetObjLength(resultObj, size);
2558 cursor = (unsigned char *) TclGetString(resultObj);
2559 }
2560 }
2561 if (cursor == NULL) {
2562 cursor = Tcl_SetByteArrayLength(resultObj, size);
2563 }
2564 limit = cursor + size;
2565 for (offset = 0; offset < count; offset += 3) {
2566 unsigned char d[3] = {0, 0, 0};
2567
2568 for (i = 0; i < 3 && offset + i < count; ++i) {
2569 d[i] = data[offset + i];
2570 }
2571 OUTPUT(B64Digits[d[0] >> 2]);
2572 OUTPUT(B64Digits[((d[0] & 0x03) << 4) | (d[1] >> 4)]);
2573 if (offset + 1 < count) {
2574 OUTPUT(B64Digits[((d[1] & 0x0F) << 2) | (d[2] >> 6)]);
2575 } else {
2576 OUTPUT(B64Digits[64]);
2577 }
2578 if (offset+2 < count) {
2579 OUTPUT(B64Digits[d[2] & 0x3F]);
2580 } else {
2581 OUTPUT(B64Digits[64]);
2582 }
2583 }
2584 }
2585 Tcl_SetObjResult(interp, resultObj);
2586 return TCL_OK;
2587 }
2588 #undef OUTPUT
2589
2590 /*
2591 *----------------------------------------------------------------------
2592 *
2593 * BinaryEncodeUu --
2594 *
2595 * This implements the uuencode binary encoding. Input is broken into 6
2596 * bit chunks and a lookup table is used to turn these values into output
2597 * characters. This differs from the generic code above in that line
2598 * lengths are also encoded.
2599 *
2600 * Results:
2601 * Interp result set to an encoded byte array object
2602 *
2603 * Side effects:
2604 * None
2605 *
2606 *----------------------------------------------------------------------
2607 */
2608
2609 static int
BinaryEncodeUu(ClientData clientData,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])2610 BinaryEncodeUu(
2611 ClientData clientData,
2612 Tcl_Interp *interp,
2613 int objc,
2614 Tcl_Obj *const objv[])
2615 {
2616 Tcl_Obj *resultObj;
2617 unsigned char *data, *start, *cursor;
2618 int offset, count, rawLength, n, i, j, bits, index;
2619 int lineLength = 61;
2620 const unsigned char SingleNewline[] = { UCHAR('\n') };
2621 const unsigned char *wrapchar = SingleNewline;
2622 int wrapcharlen = sizeof(SingleNewline);
2623 enum { OPT_MAXLEN, OPT_WRAPCHAR };
2624 static const char *const optStrings[] = { "-maxlen", "-wrapchar", NULL };
2625
2626 if (objc < 2 || objc % 2 != 0) {
2627 Tcl_WrongNumArgs(interp, 1, objv,
2628 "?-maxlen len? ?-wrapchar char? data");
2629 return TCL_ERROR;
2630 }
2631 for (i = 1; i < objc - 1; i += 2) {
2632 if (Tcl_GetIndexFromObj(interp, objv[i], optStrings, "option",
2633 TCL_EXACT, &index) != TCL_OK) {
2634 return TCL_ERROR;
2635 }
2636 switch (index) {
2637 case OPT_MAXLEN:
2638 if (Tcl_GetIntFromObj(interp, objv[i + 1],
2639 &lineLength) != TCL_OK) {
2640 return TCL_ERROR;
2641 }
2642 if (lineLength < 5 || lineLength > 85) {
2643 Tcl_SetObjResult(interp, Tcl_NewStringObj(
2644 "line length out of range", -1));
2645 Tcl_SetErrorCode(interp, "TCL", "BINARY", "ENCODE",
2646 "LINE_LENGTH", NULL);
2647 return TCL_ERROR;
2648 }
2649 lineLength = ((lineLength - 1) & -4) + 1; /* 5, 9, 13 ... */
2650 break;
2651 case OPT_WRAPCHAR:
2652 wrapchar = (const unsigned char *) TclGetStringFromObj(
2653 objv[i + 1], &wrapcharlen);
2654 {
2655 const unsigned char *p = wrapchar;
2656 int numBytes = wrapcharlen;
2657
2658 while (numBytes) {
2659 switch (*p) {
2660 case '\t':
2661 case '\v':
2662 case '\f':
2663 case '\r':
2664 p++; numBytes--;
2665 continue;
2666 case '\n':
2667 numBytes--;
2668 break;
2669 default:
2670 badwrap:
2671 Tcl_SetObjResult(interp, Tcl_NewStringObj(
2672 "invalid wrapchar; will defeat decoding",
2673 -1));
2674 Tcl_SetErrorCode(interp, "TCL", "BINARY",
2675 "ENCODE", "WRAPCHAR", NULL);
2676 return TCL_ERROR;
2677 }
2678 }
2679 if (numBytes) {
2680 goto badwrap;
2681 }
2682 }
2683 break;
2684 }
2685 }
2686
2687 /*
2688 * Allocate the buffer. This is a little bit too long, but is "good
2689 * enough".
2690 */
2691
2692 TclNewObj(resultObj);
2693 offset = 0;
2694 data = Tcl_GetByteArrayFromObj(objv[objc - 1], &count);
2695 rawLength = (lineLength - 1) * 3 / 4;
2696 start = cursor = Tcl_SetByteArrayLength(resultObj,
2697 (lineLength + wrapcharlen) *
2698 ((count + (rawLength - 1)) / rawLength));
2699 n = bits = 0;
2700
2701 /*
2702 * Encode the data. Each output line first has the length of raw data
2703 * encoded by the output line described in it by one encoded byte, then
2704 * the encoded data follows (encoding each 6 bits as one character).
2705 * Encoded lines are always terminated by a newline.
2706 */
2707
2708 while (offset < count) {
2709 int lineLen = count - offset;
2710
2711 if (lineLen > rawLength) {
2712 lineLen = rawLength;
2713 }
2714 *cursor++ = UueDigits[lineLen];
2715 for (i = 0 ; i < lineLen ; i++) {
2716 n <<= 8;
2717 n |= data[offset++];
2718 for (bits += 8; bits > 6 ; bits -= 6) {
2719 *cursor++ = UueDigits[(n >> (bits - 6)) & 0x3F];
2720 }
2721 }
2722 if (bits > 0) {
2723 n <<= 8;
2724 *cursor++ = UueDigits[(n >> (bits + 2)) & 0x3F];
2725 bits = 0;
2726 }
2727 for (j = 0 ; j < wrapcharlen ; ++j) {
2728 *cursor++ = wrapchar[j];
2729 }
2730 }
2731
2732 /*
2733 * Fix the length of the output bytearray.
2734 */
2735
2736 Tcl_SetByteArrayLength(resultObj, cursor - start);
2737 Tcl_SetObjResult(interp, resultObj);
2738 return TCL_OK;
2739 }
2740
2741 /*
2742 *----------------------------------------------------------------------
2743 *
2744 * BinaryDecodeUu --
2745 *
2746 * Decode a uuencoded string.
2747 *
2748 * Results:
2749 * Interp result set to an byte array object
2750 *
2751 * Side effects:
2752 * None
2753 *
2754 *----------------------------------------------------------------------
2755 */
2756
2757 static int
BinaryDecodeUu(ClientData clientData,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])2758 BinaryDecodeUu(
2759 ClientData clientData,
2760 Tcl_Interp *interp,
2761 int objc,
2762 Tcl_Obj *const objv[])
2763 {
2764 Tcl_Obj *resultObj = NULL;
2765 unsigned char *data, *datastart, *dataend;
2766 unsigned char *begin, *cursor;
2767 int i, index, size, pure, count = 0, strict = 0, lineLen;
2768 unsigned char c;
2769 Tcl_UniChar ch = 0;
2770 enum { OPT_STRICT };
2771 static const char *const optStrings[] = { "-strict", NULL };
2772
2773 if (objc < 2 || objc > 3) {
2774 Tcl_WrongNumArgs(interp, 1, objv, "?options? data");
2775 return TCL_ERROR;
2776 }
2777 for (i = 1; i < objc - 1; ++i) {
2778 if (Tcl_GetIndexFromObj(interp, objv[i], optStrings, "option",
2779 TCL_EXACT, &index) != TCL_OK) {
2780 return TCL_ERROR;
2781 }
2782 switch (index) {
2783 case OPT_STRICT:
2784 strict = 1;
2785 break;
2786 }
2787 }
2788
2789 TclNewObj(resultObj);
2790 pure = TclIsPureByteArray(objv[objc - 1]);
2791 datastart = data = pure ? Tcl_GetByteArrayFromObj(objv[objc - 1], &count)
2792 : (unsigned char *) TclGetStringFromObj(objv[objc - 1], &count);
2793 dataend = data + count;
2794 size = ((count + 3) & ~3) * 3 / 4;
2795 begin = cursor = Tcl_SetByteArrayLength(resultObj, size);
2796 lineLen = -1;
2797
2798 /*
2799 * The decoding loop. First, we get the length of line (strictly, the
2800 * number of data bytes we expect to generate from the line) we're
2801 * processing this time round if it is not already known (i.e., when the
2802 * lineLen variable is set to the magic value, -1).
2803 */
2804
2805 while (data < dataend) {
2806 char d[4] = {0, 0, 0, 0};
2807
2808 if (lineLen < 0) {
2809 c = *data++;
2810 if (c < 32 || c > 96) {
2811 if (strict || !TclIsSpaceProc(c)) {
2812 goto badUu;
2813 }
2814 i--;
2815 continue;
2816 }
2817 lineLen = (c - 32) & 0x3F;
2818 }
2819
2820 /*
2821 * Now we read a four-character grouping.
2822 */
2823
2824 for (i = 0 ; i < 4 ; i++) {
2825 if (data < dataend) {
2826 d[i] = c = *data++;
2827 if (c < 32 || c > 96) {
2828 if (strict) {
2829 if (!TclIsSpaceProc(c)) {
2830 goto badUu;
2831 } else if (c == '\n') {
2832 goto shortUu;
2833 }
2834 }
2835 i--;
2836 continue;
2837 }
2838 }
2839 }
2840
2841 /*
2842 * Translate that grouping into (up to) three binary bytes output.
2843 */
2844
2845 if (lineLen > 0) {
2846 *cursor++ = (((d[0] - 0x20) & 0x3F) << 2)
2847 | (((d[1] - 0x20) & 0x3F) >> 4);
2848 if (--lineLen > 0) {
2849 *cursor++ = (((d[1] - 0x20) & 0x3F) << 4)
2850 | (((d[2] - 0x20) & 0x3F) >> 2);
2851 if (--lineLen > 0) {
2852 *cursor++ = (((d[2] - 0x20) & 0x3F) << 6)
2853 | (((d[3] - 0x20) & 0x3F));
2854 lineLen--;
2855 }
2856 }
2857 }
2858
2859 /*
2860 * If we've reached the end of the line, skip until we process a
2861 * newline.
2862 */
2863
2864 if (lineLen == 0 && data < dataend) {
2865 lineLen = -1;
2866 do {
2867 c = *data++;
2868 if (c == '\n') {
2869 break;
2870 } else if (c >= 32 && c <= 96) {
2871 data--;
2872 break;
2873 } else if (strict || !TclIsSpaceProc(c)) {
2874 goto badUu;
2875 }
2876 } while (data < dataend);
2877 }
2878 }
2879
2880 /*
2881 * Sanity check, clean up and finish.
2882 */
2883
2884 if (lineLen > 0 && strict) {
2885 goto shortUu;
2886 }
2887 Tcl_SetByteArrayLength(resultObj, cursor - begin);
2888 Tcl_SetObjResult(interp, resultObj);
2889 return TCL_OK;
2890
2891 shortUu:
2892 Tcl_SetObjResult(interp, Tcl_ObjPrintf("short uuencode data"));
2893 Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "SHORT", NULL);
2894 TclDecrRefCount(resultObj);
2895 return TCL_ERROR;
2896
2897 badUu:
2898 if (pure) {
2899 ch = c;
2900 } else {
2901 TclUtfToUniChar((const char *)(data - 1), &ch);
2902 }
2903 Tcl_SetObjResult(interp, Tcl_ObjPrintf(
2904 "invalid uuencode character \"%c\" at position %d",
2905 ch, (int) (data - datastart - 1)));
2906 Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL);
2907 TclDecrRefCount(resultObj);
2908 return TCL_ERROR;
2909 }
2910
2911 /*
2912 *----------------------------------------------------------------------
2913 *
2914 * BinaryDecode64 --
2915 *
2916 * Decode a base64 encoded string.
2917 *
2918 * Results:
2919 * Interp result set to an byte array object
2920 *
2921 * Side effects:
2922 * None
2923 *
2924 *----------------------------------------------------------------------
2925 */
2926
2927 static int
BinaryDecode64(ClientData clientData,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])2928 BinaryDecode64(
2929 ClientData clientData,
2930 Tcl_Interp *interp,
2931 int objc,
2932 Tcl_Obj *const objv[])
2933 {
2934 Tcl_Obj *resultObj = NULL;
2935 unsigned char *data, *datastart, *dataend, c = '\0';
2936 unsigned char *begin = NULL;
2937 unsigned char *cursor = NULL;
2938 int pure, strict = 0;
2939 int i, index, size, cut = 0, count = 0;
2940 Tcl_UniChar ch = 0;
2941 enum { OPT_STRICT };
2942 static const char *const optStrings[] = { "-strict", NULL };
2943
2944 if (objc < 2 || objc > 3) {
2945 Tcl_WrongNumArgs(interp, 1, objv, "?options? data");
2946 return TCL_ERROR;
2947 }
2948 for (i = 1; i < objc - 1; ++i) {
2949 if (Tcl_GetIndexFromObj(interp, objv[i], optStrings, "option",
2950 TCL_EXACT, &index) != TCL_OK) {
2951 return TCL_ERROR;
2952 }
2953 switch (index) {
2954 case OPT_STRICT:
2955 strict = 1;
2956 break;
2957 }
2958 }
2959
2960 TclNewObj(resultObj);
2961 pure = TclIsPureByteArray(objv[objc - 1]);
2962 datastart = data = pure ? Tcl_GetByteArrayFromObj(objv[objc - 1], &count)
2963 : (unsigned char *) TclGetStringFromObj(objv[objc - 1], &count);
2964 dataend = data + count;
2965 size = ((count + 3) & ~3) * 3 / 4;
2966 begin = cursor = Tcl_SetByteArrayLength(resultObj, size);
2967 while (data < dataend) {
2968 unsigned long value = 0;
2969
2970 /*
2971 * Decode the current block. Each base64 block consists of four input
2972 * characters A-Z, a-z, 0-9, +, or /. Each character supplies six bits
2973 * of output data, so each block's output is 24 bits (three bytes) in
2974 * length. The final block can be shorter by one or two bytes, denoted
2975 * by the input ending with one or two ='s, respectively.
2976 */
2977
2978 for (i = 0; i < 4; i++) {
2979 /*
2980 * Get the next input character. At end of input, pad with at most
2981 * two ='s. If more than two ='s would be needed, instead discard
2982 * the block read thus far.
2983 */
2984
2985 if (data < dataend) {
2986 c = *data++;
2987 } else if (i > 1) {
2988 c = '=';
2989 } else {
2990 if (strict && i <= 1) {
2991 /*
2992 * Single resp. unfulfilled char (each 4th next single
2993 * char) is rather bad64 error case in strict mode.
2994 */
2995
2996 goto bad64;
2997 }
2998 cut += 3;
2999 break;
3000 }
3001
3002 /*
3003 * Load the character into the block value. Handle ='s specially
3004 * because they're only valid as the last character or two of the
3005 * final block of input. Unless strict mode is enabled, skip any
3006 * input whitespace characters.
3007 */
3008
3009 if (cut) {
3010 if (c == '=' && i > 1) {
3011 value <<= 6;
3012 cut++;
3013 } else if (!strict) {
3014 i--;
3015 } else {
3016 goto bad64;
3017 }
3018 } else if (c >= 'A' && c <= 'Z') {
3019 value = (value << 6) | ((c - 'A') & 0x3F);
3020 } else if (c >= 'a' && c <= 'z') {
3021 value = (value << 6) | ((c - 'a' + 26) & 0x3F);
3022 } else if (c >= '0' && c <= '9') {
3023 value = (value << 6) | ((c - '0' + 52) & 0x3F);
3024 } else if (c == '+') {
3025 value = (value << 6) | 0x3E;
3026 } else if (c == '/') {
3027 value = (value << 6) | 0x3F;
3028 } else if (c == '=' && (!strict || i > 1)) {
3029 /*
3030 * "=" and "a=" is rather bad64 error case in strict mode.
3031 */
3032
3033 value <<= 6;
3034 if (i) {
3035 cut++;
3036 }
3037 } else if (strict) {
3038 goto bad64;
3039 } else {
3040 i--;
3041 }
3042 }
3043 *cursor++ = UCHAR((value >> 16) & 0xFF);
3044 *cursor++ = UCHAR((value >> 8) & 0xFF);
3045 *cursor++ = UCHAR(value & 0xFF);
3046
3047 /*
3048 * Since = is only valid within the final block, if it was encountered
3049 * but there are still more input characters, confirm that strict mode
3050 * is off and all subsequent characters are whitespace.
3051 */
3052
3053 if (cut && data < dataend) {
3054 if (strict) {
3055 goto bad64;
3056 }
3057 }
3058 }
3059 Tcl_SetByteArrayLength(resultObj, cursor - begin - cut);
3060 Tcl_SetObjResult(interp, resultObj);
3061 return TCL_OK;
3062
3063 bad64:
3064 if (pure) {
3065 ch = c;
3066 } else {
3067 /* The decoder is byte-oriented. If we saw a byte that's not a
3068 * valid member of the base64 alphabet, it could be the lead byte
3069 * of a multi-byte character. */
3070
3071 /* Safe because we know data is NUL-terminated */
3072 TclUtfToUniChar((const char *)(data - 1), &ch);
3073 }
3074
3075 Tcl_SetObjResult(interp, Tcl_ObjPrintf(
3076 "invalid base64 character \"%c\" at position %d", ch,
3077 (int) (data - datastart - 1)));
3078 Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL);
3079 TclDecrRefCount(resultObj);
3080 return TCL_ERROR;
3081 }
3082
3083 /*
3084 * Local Variables:
3085 * mode: c
3086 * c-basic-offset: 4
3087 * fill-column: 78
3088 * End:
3089 */
3090