1 /*
2  * tclBinary.c --
3  *
4  *	This file contains the implementation of the "binary" Tcl built-in
5  *	command and the Tcl binary data object.
6  *
7  * Copyright (c) 1997 by Sun Microsystems, Inc.
8  * Copyright (c) 1998-1999 by Scriptics Corporation.
9  *
10  * See the file "license.terms" for information on usage and redistribution of
11  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
12  */
13 
14 #include "tclInt.h"
15 #include "tommath.h"
16 
17 #include <math.h>
18 
19 /*
20  * The following constants are used by GetFormatSpec to indicate various
21  * special conditions in the parsing of a format specifier.
22  */
23 
24 #define BINARY_ALL -1		/* Use all elements in the argument. */
25 #define BINARY_NOCOUNT -2	/* No count was specified in format. */
26 
27 /*
28  * The following flags may be ORed together and returned by GetFormatSpec
29  */
30 
31 #define BINARY_SIGNED 0		/* Field to be read as signed data */
32 #define BINARY_UNSIGNED 1	/* Field to be read as unsigned data */
33 
34 /*
35  * The following defines the maximum number of different (integer) numbers
36  * placed in the object cache by 'binary scan' before it bails out and
37  * switches back to Plan A (creating a new object for each value.)
38  * Theoretically, it would be possible to keep the cache about for the values
39  * that are already in it, but that makes the code slower in practise when
40  * overflow happens, and makes little odds the rest of the time (as measured
41  * on my machine.) It is also slower (on the sample I tried at least) to grow
42  * the cache to hold all items we might want to put in it; presumably the
43  * extra cost of managing the memory for the enlarged table outweighs the
44  * benefit from allocating fewer objects. This is probably because as the
45  * number of objects increases, the likelihood of reuse of any particular one
46  * drops, and there is very little gain from larger maximum cache sizes (the
47  * value below is chosen to allow caching to work in full with conversion of
48  * bytes.) - DKF
49  */
50 
51 #define BINARY_SCAN_MAX_CACHE	260
52 
53 /*
54  * Prototypes for local procedures defined in this file:
55  */
56 
57 static void		DupByteArrayInternalRep(Tcl_Obj *srcPtr,
58 			    Tcl_Obj *copyPtr);
59 static int		FormatNumber(Tcl_Interp *interp, int type,
60 			    Tcl_Obj *src, unsigned char **cursorPtr);
61 static void		FreeByteArrayInternalRep(Tcl_Obj *objPtr);
62 static int		GetFormatSpec(const char **formatPtr, char *cmdPtr,
63 			    int *countPtr, int *flagsPtr);
64 static Tcl_Obj *	ScanNumber(unsigned char *buffer, int type,
65 			    int flags, Tcl_HashTable **numberCachePtr);
66 static int		SetByteArrayFromAny(Tcl_Interp *interp,
67 			    Tcl_Obj *objPtr);
68 static void		UpdateStringOfByteArray(Tcl_Obj *listPtr);
69 static void		DeleteScanNumberCache(Tcl_HashTable *numberCachePtr);
70 static int		NeedReversing(int format);
71 static void		CopyNumber(const void *from, void *to,
72 			    unsigned length, int type);
73 /* Binary ensemble commands */
74 static int		BinaryFormatCmd(ClientData clientData,
75 			    Tcl_Interp *interp,
76 			    int objc, Tcl_Obj *const objv[]);
77 static int		BinaryScanCmd(ClientData clientData,
78 			    Tcl_Interp *interp,
79 			    int objc, Tcl_Obj *const objv[]);
80 /* Binary encoding sub-ensemble commands */
81 static int		BinaryEncodeHex(ClientData clientData,
82 			    Tcl_Interp *interp,
83 			    int objc, Tcl_Obj *const objv[]);
84 static int		BinaryDecodeHex(ClientData clientData,
85 			    Tcl_Interp *interp,
86 			    int objc, Tcl_Obj *const objv[]);
87 static int		BinaryEncode64(ClientData clientData,
88 			    Tcl_Interp *interp,
89 			    int objc, Tcl_Obj *const objv[]);
90 static int		BinaryDecode64(ClientData clientData,
91 			    Tcl_Interp *interp,
92 			    int objc, Tcl_Obj *const objv[]);
93 static int		BinaryEncodeUu(ClientData clientData,
94 			    Tcl_Interp *interp, int objc,
95 			    Tcl_Obj *const objv[]);
96 static int		BinaryDecodeUu(ClientData clientData,
97 			    Tcl_Interp *interp,
98 			    int objc, Tcl_Obj *const objv[]);
99 
100 /*
101  * The following tables are used by the binary encoders
102  */
103 
104 static const char HexDigits[16] = {
105     '0', '1', '2', '3', '4', '5', '6', '7',
106     '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
107 };
108 
109 static const char UueDigits[65] = {
110     '`', '!', '"', '#', '$', '%', '&', '\'',
111     '(', ')', '*', '+', ',', '-', '.', '/',
112     '0', '1', '2', '3', '4', '5', '6', '7',
113     '8', '9', ':', ';', '<', '=', '>', '?',
114     '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
115     'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
116     'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
117     'X', 'Y', 'Z', '[', '\\',']', '^', '_',
118     '`'
119 };
120 
121 static const char B64Digits[65] = {
122     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
123     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
124     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
125     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
126     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
127     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
128     'w', 'x', 'y', 'z', '0', '1', '2', '3',
129     '4', '5', '6', '7', '8', '9', '+', '/',
130     '='
131 };
132 
133 /*
134  * How to construct the ensembles.
135  */
136 
137 static const EnsembleImplMap binaryMap[] = {
138     { "format", BinaryFormatCmd, TclCompileBasicMin1ArgCmd, NULL, NULL, 0 },
139     { "scan",   BinaryScanCmd, TclCompileBasicMin2ArgCmd, NULL, NULL, 0 },
140     { "encode", NULL, NULL, NULL, NULL, 0 },
141     { "decode", NULL, NULL, NULL, NULL, 0 },
142     { NULL, NULL, NULL, NULL, NULL, 0 }
143 };
144 static const EnsembleImplMap encodeMap[] = {
145     { "hex",      BinaryEncodeHex, TclCompileBasic1ArgCmd, NULL, NULL, 0 },
146     { "uuencode", BinaryEncodeUu,  NULL, NULL, NULL, 0 },
147     { "base64",   BinaryEncode64,  NULL, NULL, NULL, 0 },
148     { NULL, NULL, NULL, NULL, NULL, 0 }
149 };
150 static const EnsembleImplMap decodeMap[] = {
151     { "hex",      BinaryDecodeHex, TclCompileBasic1Or2ArgCmd, NULL, NULL, 0 },
152     { "uuencode", BinaryDecodeUu,  TclCompileBasic1Or2ArgCmd, NULL, NULL, 0 },
153     { "base64",   BinaryDecode64,  TclCompileBasic1Or2ArgCmd, NULL, NULL, 0 },
154     { NULL, NULL, NULL, NULL, NULL, 0 }
155 };
156 
157 /*
158  * The following object type represents an array of bytes. An array of bytes
159  * is not equivalent to an internationalized string. Conceptually, a string is
160  * an array of 16-bit quantities organized as a sequence of properly formed
161  * UTF-8 characters, while a ByteArray is an array of 8-bit quantities.
162  * Accessor functions are provided to convert a ByteArray to a String or a
163  * String to a ByteArray. Two or more consecutive bytes in an array of bytes
164  * may look like a single UTF-8 character if the array is casually treated as
165  * a string. But obtaining the String from a ByteArray is guaranteed to
166  * produced properly formed UTF-8 sequences so that there is a one-to-one map
167  * between bytes and characters.
168  *
169  * Converting a ByteArray to a String proceeds by casting each byte in the
170  * array to a 16-bit quantity, treating that number as a Unicode character,
171  * and storing the UTF-8 version of that Unicode character in the String. For
172  * ByteArrays consisting entirely of values 1..127, the corresponding String
173  * representation is the same as the ByteArray representation.
174  *
175  * Converting a String to a ByteArray proceeds by getting the Unicode
176  * representation of each character in the String, casting it to a byte by
177  * truncating the upper 8 bits, and then storing the byte in the ByteArray.
178  * Converting from ByteArray to String and back to ByteArray is not lossy, but
179  * converting an arbitrary String to a ByteArray may be.
180  */
181 
182 const Tcl_ObjType tclByteArrayType = {
183     "bytearray",
184     FreeByteArrayInternalRep,
185     DupByteArrayInternalRep,
186     UpdateStringOfByteArray,
187     SetByteArrayFromAny
188 };
189 
190 /*
191  * The following structure is the internal rep for a ByteArray object. Keeps
192  * track of how much memory has been used and how much has been allocated for
193  * the byte array to enable growing and shrinking of the ByteArray object with
194  * fewer mallocs.
195  */
196 
197 typedef struct ByteArray {
198     int used;			/* The number of bytes used in the byte
199 				 * array. */
200     int allocated;		/* The amount of space actually allocated
201 				 * minus 1 byte. */
202     unsigned char bytes[TCLFLEXARRAY];	/* The array of bytes. The actual size of this
203 				 * field depends on the 'allocated' field
204 				 * above. */
205 } ByteArray;
206 
207 #define BYTEARRAY_SIZE(len) \
208 		((unsigned) (TclOffset(ByteArray, bytes) + (len)))
209 #define GET_BYTEARRAY(objPtr) \
210 		((ByteArray *) (objPtr)->internalRep.twoPtrValue.ptr1)
211 #define SET_BYTEARRAY(objPtr, baPtr) \
212 		(objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (baPtr)
213 
214 
215 /*
216  *----------------------------------------------------------------------
217  *
218  * Tcl_NewByteArrayObj --
219  *
220  *	This procedure is creates a new ByteArray object and initializes it
221  *	from the given array of bytes.
222  *
223  * Results:
224  *	The newly create object is returned. This object will have no initial
225  *	string representation. The returned object has a ref count of 0.
226  *
227  * Side effects:
228  *	Memory allocated for new object and copy of byte array argument.
229  *
230  *----------------------------------------------------------------------
231  */
232 
233 #undef Tcl_NewByteArrayObj
234 
235 Tcl_Obj *
Tcl_NewByteArrayObj(const unsigned char * bytes,int length)236 Tcl_NewByteArrayObj(
237     const unsigned char *bytes,	/* The array of bytes used to initialize the
238 				 * new object. */
239     int length)			/* Length of the array of bytes, which must be
240 				 * >= 0. */
241 {
242 #ifdef TCL_MEM_DEBUG
243     return Tcl_DbNewByteArrayObj(bytes, length, "unknown", 0);
244 #else /* if not TCL_MEM_DEBUG */
245     Tcl_Obj *objPtr;
246 
247     TclNewObj(objPtr);
248     Tcl_SetByteArrayObj(objPtr, bytes, length);
249     return objPtr;
250 #endif /* TCL_MEM_DEBUG */
251 }
252 
253 /*
254  *----------------------------------------------------------------------
255  *
256  * Tcl_DbNewByteArrayObj --
257  *
258  *	This procedure is normally called when debugging: i.e., when
259  *	TCL_MEM_DEBUG is defined. It is the same as the Tcl_NewByteArrayObj
260  *	above except that it calls Tcl_DbCkalloc directly with the file name
261  *	and line number from its caller. This simplifies debugging since then
262  *	the [memory active] command will report the correct file name and line
263  *	number when reporting objects that haven't been freed.
264  *
265  *	When TCL_MEM_DEBUG is not defined, this procedure just returns the
266  *	result of calling Tcl_NewByteArrayObj.
267  *
268  * Results:
269  *	The newly create object is returned. This object will have no initial
270  *	string representation. The returned object has a ref count of 0.
271  *
272  * Side effects:
273  *	Memory allocated for new object and copy of byte array argument.
274  *
275  *----------------------------------------------------------------------
276  */
277 
278 Tcl_Obj *
Tcl_DbNewByteArrayObj(const unsigned char * bytes,int length,const char * file,int line)279 Tcl_DbNewByteArrayObj(
280     const unsigned char *bytes,	/* The array of bytes used to initialize the
281 				 * new object. */
282     int length,			/* Length of the array of bytes, which must be
283 				 * >= 0. */
284     const char *file,		/* The name of the source file calling this
285 				 * procedure; used for debugging. */
286     int line)			/* Line number in the source file; used for
287 				 * debugging. */
288 {
289 #ifdef TCL_MEM_DEBUG
290     Tcl_Obj *objPtr;
291 
292     TclDbNewObj(objPtr, file, line);
293     Tcl_SetByteArrayObj(objPtr, bytes, length);
294     return objPtr;
295 #else /* if not TCL_MEM_DEBUG */
296     return Tcl_NewByteArrayObj(bytes, length);
297 #endif /* TCL_MEM_DEBUG */
298 }
299 
300 /*
301  *---------------------------------------------------------------------------
302  *
303  * Tcl_SetByteArrayObj --
304  *
305  *	Modify an object to be a ByteArray object and to have the specified
306  *	array of bytes as its value.
307  *
308  * Results:
309  *	None.
310  *
311  * Side effects:
312  *	The object's old string rep and internal rep is freed. Memory
313  *	allocated for copy of byte array argument.
314  *
315  *----------------------------------------------------------------------
316  */
317 
318 void
Tcl_SetByteArrayObj(Tcl_Obj * objPtr,const unsigned char * bytes,int length)319 Tcl_SetByteArrayObj(
320     Tcl_Obj *objPtr,		/* Object to initialize as a ByteArray. */
321     const unsigned char *bytes,	/* The array of bytes to use as the new value.
322 				 * May be NULL even if length > 0. */
323     int length)			/* Length of the array of bytes, which must
324 				 * be >= 0. */
325 {
326     ByteArray *byteArrayPtr;
327 
328     if (Tcl_IsShared(objPtr)) {
329 	Tcl_Panic("%s called with shared object", "Tcl_SetByteArrayObj");
330     }
331     TclFreeIntRep(objPtr);
332     TclInvalidateStringRep(objPtr);
333 
334     if (length < 0) {
335 	length = 0;
336     }
337     byteArrayPtr = (ByteArray *)ckalloc(BYTEARRAY_SIZE(length));
338     byteArrayPtr->used = length;
339     byteArrayPtr->allocated = length;
340 
341     if ((bytes != NULL) && (length > 0)) {
342 	memcpy(byteArrayPtr->bytes, bytes, length);
343     }
344     objPtr->typePtr = &tclByteArrayType;
345     SET_BYTEARRAY(objPtr, byteArrayPtr);
346 }
347 
348 /*
349  *----------------------------------------------------------------------
350  *
351  * Tcl_GetByteArrayFromObj --
352  *
353  *	Attempt to get the array of bytes from the Tcl object. If the object
354  *	is not already a ByteArray object, an attempt will be made to convert
355  *	it to one.
356  *
357  * Results:
358  *	Pointer to array of bytes representing the ByteArray object.
359  *
360  * Side effects:
361  *	Frees old internal rep. Allocates memory for new internal rep.
362  *
363  *----------------------------------------------------------------------
364  */
365 
366 unsigned char *
Tcl_GetByteArrayFromObj(Tcl_Obj * objPtr,int * lengthPtr)367 Tcl_GetByteArrayFromObj(
368     Tcl_Obj *objPtr,		/* The ByteArray object. */
369     int *lengthPtr)		/* If non-NULL, filled with length of the
370 				 * array of bytes in the ByteArray object. */
371 {
372     ByteArray *baPtr;
373 
374     if (objPtr->typePtr != &tclByteArrayType) {
375 	SetByteArrayFromAny(NULL, objPtr);
376     }
377     baPtr = GET_BYTEARRAY(objPtr);
378 
379     if (lengthPtr != NULL) {
380 	*lengthPtr = baPtr->used;
381     }
382     return (unsigned char *) baPtr->bytes;
383 }
384 
385 /*
386  *----------------------------------------------------------------------
387  *
388  * Tcl_SetByteArrayLength --
389  *
390  *	This procedure changes the length of the byte array for this object.
391  *	Once the caller has set the length of the array, it is acceptable to
392  *	directly modify the bytes in the array up until Tcl_GetStringFromObj()
393  *	has been called on this object.
394  *
395  * Results:
396  *	The new byte array of the specified length.
397  *
398  * Side effects:
399  *	Allocates enough memory for an array of bytes of the requested size.
400  *	When growing the array, the old array is copied to the new array; new
401  *	bytes are undefined. When shrinking, the old array is truncated to the
402  *	specified length.
403  *
404  *----------------------------------------------------------------------
405  */
406 
407 unsigned char *
Tcl_SetByteArrayLength(Tcl_Obj * objPtr,int length)408 Tcl_SetByteArrayLength(
409     Tcl_Obj *objPtr,		/* The ByteArray object. */
410     int length)			/* New length for internal byte array. */
411 {
412     ByteArray *byteArrayPtr;
413 
414     if (Tcl_IsShared(objPtr)) {
415 	Tcl_Panic("%s called with shared object", "Tcl_SetByteArrayLength");
416     }
417     if (objPtr->typePtr != &tclByteArrayType) {
418 	SetByteArrayFromAny(NULL, objPtr);
419     }
420 
421     byteArrayPtr = GET_BYTEARRAY(objPtr);
422     if (length > byteArrayPtr->allocated) {
423 	byteArrayPtr = (ByteArray *)ckrealloc(byteArrayPtr, BYTEARRAY_SIZE(length));
424 	byteArrayPtr->allocated = length;
425 	SET_BYTEARRAY(objPtr, byteArrayPtr);
426     }
427     TclInvalidateStringRep(objPtr);
428     byteArrayPtr->used = length;
429     return byteArrayPtr->bytes;
430 }
431 
432 /*
433  *----------------------------------------------------------------------
434  *
435  * SetByteArrayFromAny --
436  *
437  *	Generate the ByteArray internal rep from the string rep.
438  *
439  * Results:
440  *	The return value is always TCL_OK.
441  *
442  * Side effects:
443  *	A ByteArray object is stored as the internal rep of objPtr.
444  *
445  *----------------------------------------------------------------------
446  */
447 
448 static int
SetByteArrayFromAny(Tcl_Interp * interp,Tcl_Obj * objPtr)449 SetByteArrayFromAny(
450     Tcl_Interp *interp,		/* Not used. */
451     Tcl_Obj *objPtr)		/* The object to convert to type ByteArray. */
452 {
453     int length;
454     const char *src, *srcEnd;
455     unsigned char *dst;
456     ByteArray *byteArrayPtr;
457     Tcl_UniChar ch = 0;
458 
459     if (objPtr->typePtr != &tclByteArrayType) {
460 	src = TclGetStringFromObj(objPtr, &length);
461 	srcEnd = src + length;
462 
463 	byteArrayPtr = (ByteArray *)ckalloc(BYTEARRAY_SIZE(length));
464 	for (dst = byteArrayPtr->bytes; src < srcEnd; ) {
465 	    src += TclUtfToUniChar(src, &ch);
466 	    *dst++ = UCHAR(ch);
467 	}
468 
469 	byteArrayPtr->used = dst - byteArrayPtr->bytes;
470 	byteArrayPtr->allocated = length;
471 
472 	TclFreeIntRep(objPtr);
473 	objPtr->typePtr = &tclByteArrayType;
474 	SET_BYTEARRAY(objPtr, byteArrayPtr);
475     }
476     return TCL_OK;
477 }
478 
479 /*
480  *----------------------------------------------------------------------
481  *
482  * FreeByteArrayInternalRep --
483  *
484  *	Deallocate the storage associated with a ByteArray data object's
485  *	internal representation.
486  *
487  * Results:
488  *	None.
489  *
490  * Side effects:
491  *	Frees memory.
492  *
493  *----------------------------------------------------------------------
494  */
495 
496 static void
FreeByteArrayInternalRep(Tcl_Obj * objPtr)497 FreeByteArrayInternalRep(
498     Tcl_Obj *objPtr)		/* Object with internal rep to free. */
499 {
500     ckfree(GET_BYTEARRAY(objPtr));
501     objPtr->typePtr = NULL;
502 }
503 
504 /*
505  *----------------------------------------------------------------------
506  *
507  * DupByteArrayInternalRep --
508  *
509  *	Initialize the internal representation of a ByteArray Tcl_Obj to a
510  *	copy of the internal representation of an existing ByteArray object.
511  *
512  * Results:
513  *	None.
514  *
515  * Side effects:
516  *	Allocates memory.
517  *
518  *----------------------------------------------------------------------
519  */
520 
521 static void
DupByteArrayInternalRep(Tcl_Obj * srcPtr,Tcl_Obj * copyPtr)522 DupByteArrayInternalRep(
523     Tcl_Obj *srcPtr,		/* Object with internal rep to copy. */
524     Tcl_Obj *copyPtr)		/* Object with internal rep to set. */
525 {
526     int length;
527     ByteArray *srcArrayPtr, *copyArrayPtr;
528 
529     srcArrayPtr = GET_BYTEARRAY(srcPtr);
530     length = srcArrayPtr->used;
531 
532     copyArrayPtr = (ByteArray *)ckalloc(BYTEARRAY_SIZE(length));
533     copyArrayPtr->used = length;
534     copyArrayPtr->allocated = length;
535     memcpy(copyArrayPtr->bytes, srcArrayPtr->bytes, length);
536     SET_BYTEARRAY(copyPtr, copyArrayPtr);
537 
538     copyPtr->typePtr = &tclByteArrayType;
539 }
540 
541 /*
542  *----------------------------------------------------------------------
543  *
544  * UpdateStringOfByteArray --
545  *
546  *	Update the string representation for a ByteArray data object. Note:
547  *	This procedure does not invalidate an existing old string rep so
548  *	storage will be lost if this has not already been done.
549  *
550  * Results:
551  *	None.
552  *
553  * Side effects:
554  *	The object's string is set to a valid string that results from the
555  *	ByteArray-to-string conversion.
556  *
557  *	The object becomes a string object -- the internal rep is discarded
558  *	and the typePtr becomes NULL.
559  *
560  *----------------------------------------------------------------------
561  */
562 
563 static void
UpdateStringOfByteArray(Tcl_Obj * objPtr)564 UpdateStringOfByteArray(
565     Tcl_Obj *objPtr)		/* ByteArray object whose string rep to
566 				 * update. */
567 {
568     int i, length, size;
569     unsigned char *src;
570     char *dst;
571     ByteArray *byteArrayPtr;
572 
573     byteArrayPtr = GET_BYTEARRAY(objPtr);
574     src = byteArrayPtr->bytes;
575     length = byteArrayPtr->used;
576 
577     /*
578      * How much space will string rep need?
579      */
580 
581     size = length;
582     for (i = 0; i < length && size >= 0; i++) {
583 	if ((src[i] == 0) || (src[i] > 127)) {
584 	    size++;
585 	}
586     }
587     if (size < 0) {
588 	Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
589     }
590 
591     dst = (char *)ckalloc(size + 1);
592     objPtr->bytes = dst;
593     objPtr->length = size;
594 
595     if (size == length) {
596 	memcpy(dst, src, size);
597 	dst[size] = '\0';
598     } else {
599 	for (i = 0; i < length; i++) {
600 	    dst += Tcl_UniCharToUtf(src[i], dst);
601 	}
602 	*dst = '\0';
603     }
604 }
605 
606 /*
607  *----------------------------------------------------------------------
608  *
609  * TclAppendBytesToByteArray --
610  *
611  *	This function appends an array of bytes to a byte array object. Note
612  *	that the object *must* be unshared, and the array of bytes *must not*
613  *	refer to the object being appended to.
614  *
615  * Results:
616  *	None.
617  *
618  * Side effects:
619  *	Allocates enough memory for an array of bytes of the requested total
620  *	size, or possibly larger. [Bug 2992970]
621  *
622  *----------------------------------------------------------------------
623  */
624 
625 void
TclAppendBytesToByteArray(Tcl_Obj * objPtr,const unsigned char * bytes,int len)626 TclAppendBytesToByteArray(
627     Tcl_Obj *objPtr,
628     const unsigned char *bytes,
629     int len)
630 {
631     ByteArray *byteArrayPtr;
632     int needed;
633 
634     if (Tcl_IsShared(objPtr)) {
635 	Tcl_Panic("%s called with shared object","TclAppendBytesToByteArray");
636     }
637     if (len < 0) {
638 	Tcl_Panic("%s must be called with definite number of bytes to append",
639 		"TclAppendBytesToByteArray");
640     }
641     if (len == 0) {
642 	/*
643 	 * Append zero bytes is a no-op.
644 	 */
645 
646 	return;
647     }
648     if (objPtr->typePtr != &tclByteArrayType) {
649 	SetByteArrayFromAny(NULL, objPtr);
650     }
651     byteArrayPtr = GET_BYTEARRAY(objPtr);
652 
653     if (len > INT_MAX - byteArrayPtr->used) {
654 	Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
655     }
656 
657     needed = byteArrayPtr->used + len;
658     /*
659      * If we need to, resize the allocated space in the byte array.
660      */
661 
662     if (needed > byteArrayPtr->allocated) {
663 	ByteArray *ptr = NULL;
664 	int attempt;
665 
666 	if (needed <= INT_MAX/2) {
667 	    /*
668 	     * Try to allocate double the total space that is needed.
669 	     */
670 
671 	    attempt = 2 * needed;
672 	    ptr = (ByteArray *)attemptckrealloc(byteArrayPtr, BYTEARRAY_SIZE(attempt));
673 	}
674 	if (ptr == NULL) {
675 	    /*
676 	     * Try to allocate double the increment that is needed (plus).
677 	     */
678 
679 	    unsigned int limit = INT_MAX - needed;
680 	    unsigned int extra = len + TCL_MIN_GROWTH;
681 	    int growth = (int) ((extra > limit) ? limit : extra);
682 
683 	    attempt = needed + growth;
684 	    ptr = (ByteArray *)attemptckrealloc(byteArrayPtr, BYTEARRAY_SIZE(attempt));
685 	}
686 	if (ptr == NULL) {
687 	    /*
688 	     * Last chance: Try to allocate exactly what is needed.
689 	     */
690 
691 	    attempt = needed;
692 	    ptr = (ByteArray *)ckrealloc(byteArrayPtr, BYTEARRAY_SIZE(attempt));
693 	}
694 	byteArrayPtr = ptr;
695 	byteArrayPtr->allocated = attempt;
696 	SET_BYTEARRAY(objPtr, byteArrayPtr);
697     }
698 
699     if (bytes) {
700 	memcpy(byteArrayPtr->bytes + byteArrayPtr->used, bytes, len);
701     }
702     byteArrayPtr->used += len;
703     TclInvalidateStringRep(objPtr);
704 }
705 
706 /*
707  *----------------------------------------------------------------------
708  *
709  * TclInitBinaryCmd --
710  *
711  *	This function is called to create the "binary" Tcl command. See the
712  *	user documentation for details on what it does.
713  *
714  * Results:
715  *	A command token for the new command.
716  *
717  * Side effects:
718  *	Creates a new binary command as a mapped ensemble.
719  *
720  *----------------------------------------------------------------------
721  */
722 
723 Tcl_Command
TclInitBinaryCmd(Tcl_Interp * interp)724 TclInitBinaryCmd(
725     Tcl_Interp *interp)
726 {
727     Tcl_Command binaryEnsemble;
728 
729     binaryEnsemble = TclMakeEnsemble(interp, "binary", binaryMap);
730     TclMakeEnsemble(interp, "binary encode", encodeMap);
731     TclMakeEnsemble(interp, "binary decode", decodeMap);
732     return binaryEnsemble;
733 }
734 
735 /*
736  *----------------------------------------------------------------------
737  *
738  * BinaryFormatCmd --
739  *
740  *	This procedure implements the "binary format" Tcl command.
741  *
742  * Results:
743  *	A standard Tcl result.
744  *
745  * Side effects:
746  *	See the user documentation.
747  *
748  *----------------------------------------------------------------------
749  */
750 
751 static int
BinaryFormatCmd(ClientData dummy,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])752 BinaryFormatCmd(
753     ClientData dummy,		/* Not used. */
754     Tcl_Interp *interp,		/* Current interpreter. */
755     int objc,			/* Number of arguments. */
756     Tcl_Obj *const objv[])	/* Argument objects. */
757 {
758     int arg;			/* Index of next argument to consume. */
759     int value = 0;		/* Current integer value to be packed.
760 				 * Initialized to avoid compiler warning. */
761     char cmd;			/* Current format character. */
762     int count;			/* Count associated with current format
763 				 * character. */
764     int flags;			/* Format field flags */
765     const char *format;		/* Pointer to current position in format
766 				 * string. */
767     Tcl_Obj *resultPtr = NULL;	/* Object holding result buffer. */
768     unsigned char *buffer;	/* Start of result buffer. */
769     unsigned char *cursor;	/* Current position within result buffer. */
770     unsigned char *maxPos;	/* Greatest position within result buffer that
771 				 * cursor has visited.*/
772     const char *errorString;
773     const char *errorValue, *str;
774     int offset, size, length;
775 
776     if (objc < 2) {
777 	Tcl_WrongNumArgs(interp, 1, objv, "formatString ?arg ...?");
778 	return TCL_ERROR;
779     }
780 
781     /*
782      * To avoid copying the data, we format the string in two passes. The
783      * first pass computes the size of the output buffer. The second pass
784      * places the formatted data into the buffer.
785      */
786 
787     format = TclGetString(objv[1]);
788     arg = 2;
789     offset = 0;
790     length = 0;
791     while (*format != '\0') {
792 	str = format;
793 	flags = 0;
794 	if (!GetFormatSpec(&format, &cmd, &count, &flags)) {
795 	    break;
796 	}
797 	switch (cmd) {
798 	case 'a':
799 	case 'A':
800 	case 'b':
801 	case 'B':
802 	case 'h':
803 	case 'H':
804 	    /*
805 	     * For string-type specifiers, the count corresponds to the number
806 	     * of bytes in a single argument.
807 	     */
808 
809 	    if (arg >= objc) {
810 		goto badIndex;
811 	    }
812 	    if (count == BINARY_ALL) {
813 		Tcl_GetByteArrayFromObj(objv[arg], &count);
814 	    } else if (count == BINARY_NOCOUNT) {
815 		count = 1;
816 	    }
817 	    arg++;
818 	    if (cmd == 'a' || cmd == 'A') {
819 		offset += count;
820 	    } else if (cmd == 'b' || cmd == 'B') {
821 		offset += (count + 7) / 8;
822 	    } else {
823 		offset += (count + 1) / 2;
824 	    }
825 	    break;
826 	case 'c':
827 	    size = 1;
828 	    goto doNumbers;
829 	case 't':
830 	case 's':
831 	case 'S':
832 	    size = 2;
833 	    goto doNumbers;
834 	case 'n':
835 	case 'i':
836 	case 'I':
837 	    size = 4;
838 	    goto doNumbers;
839 	case 'm':
840 	case 'w':
841 	case 'W':
842 	    size = 8;
843 	    goto doNumbers;
844 	case 'r':
845 	case 'R':
846 	case 'f':
847 	    size = sizeof(float);
848 	    goto doNumbers;
849 	case 'q':
850 	case 'Q':
851 	case 'd':
852 	    size = sizeof(double);
853 
854 	doNumbers:
855 	    if (arg >= objc) {
856 		goto badIndex;
857 	    }
858 
859 	    /*
860 	     * For number-type specifiers, the count corresponds to the number
861 	     * of elements in the list stored in a single argument. If no
862 	     * count is specified, then the argument is taken as a single
863 	     * non-list value.
864 	     */
865 
866 	    if (count == BINARY_NOCOUNT) {
867 		arg++;
868 		count = 1;
869 	    } else {
870 		int listc;
871 		Tcl_Obj **listv;
872 
873 		/*
874 		 * The macro evals its args more than once: avoid arg++
875 		 */
876 
877 		if (TclListObjGetElements(interp, objv[arg], &listc,
878 			&listv) != TCL_OK) {
879 		    return TCL_ERROR;
880 		}
881 		arg++;
882 
883 		if (count == BINARY_ALL) {
884 		    count = listc;
885 		} else if (count > listc) {
886 		    Tcl_SetObjResult(interp, Tcl_NewStringObj(
887 			    "number of elements in list does not match count",
888 			    -1));
889 		    return TCL_ERROR;
890 		}
891 	    }
892 	    offset += count*size;
893 	    break;
894 
895 	case 'x':
896 	    if (count == BINARY_ALL) {
897 		Tcl_SetObjResult(interp, Tcl_NewStringObj(
898 			"cannot use \"*\" in format string with \"x\"", -1));
899 		return TCL_ERROR;
900 	    } else if (count == BINARY_NOCOUNT) {
901 		count = 1;
902 	    }
903 	    offset += count;
904 	    break;
905 	case 'X':
906 	    if (count == BINARY_NOCOUNT) {
907 		count = 1;
908 	    }
909 	    if ((count > offset) || (count == BINARY_ALL)) {
910 		count = offset;
911 	    }
912 	    if (offset > length) {
913 		length = offset;
914 	    }
915 	    offset -= count;
916 	    break;
917 	case '@':
918 	    if (offset > length) {
919 		length = offset;
920 	    }
921 	    if (count == BINARY_ALL) {
922 		offset = length;
923 	    } else if (count == BINARY_NOCOUNT) {
924 		goto badCount;
925 	    } else {
926 		offset = count;
927 	    }
928 	    break;
929 	default:
930 	    errorString = str;
931 	    goto badField;
932 	}
933     }
934     if (offset > length) {
935 	length = offset;
936     }
937     if (length == 0) {
938 	return TCL_OK;
939     }
940 
941     /*
942      * Prepare the result object by preallocating the caclulated number of
943      * bytes and filling with nulls.
944      */
945 
946     resultPtr = Tcl_NewObj();
947     buffer = Tcl_SetByteArrayLength(resultPtr, length);
948     memset(buffer, 0, length);
949 
950     /*
951      * Pack the data into the result object. Note that we can skip the error
952      * checking during this pass, since we have already parsed the string
953      * once.
954      */
955 
956     arg = 2;
957     format = TclGetString(objv[1]);
958     cursor = buffer;
959     maxPos = cursor;
960     while (*format != 0) {
961 	flags = 0;
962 	if (!GetFormatSpec(&format, &cmd, &count, &flags)) {
963 	    break;
964 	}
965 	if ((count == 0) && (cmd != '@')) {
966 	    if (cmd != 'x') {
967 		arg++;
968 	    }
969 	    continue;
970 	}
971 	switch (cmd) {
972 	case 'a':
973 	case 'A': {
974 	    char pad = (char) (cmd == 'a' ? '\0' : ' ');
975 	    unsigned char *bytes;
976 
977 	    bytes = Tcl_GetByteArrayFromObj(objv[arg++], &length);
978 
979 	    if (count == BINARY_ALL) {
980 		count = length;
981 	    } else if (count == BINARY_NOCOUNT) {
982 		count = 1;
983 	    }
984 	    if (length >= count) {
985 		memcpy(cursor, bytes, count);
986 	    } else {
987 		memcpy(cursor, bytes, length);
988 		memset(cursor + length, pad, count - length);
989 	    }
990 	    cursor += count;
991 	    break;
992 	}
993 	case 'b':
994 	case 'B': {
995 	    unsigned char *last;
996 
997 	    str = TclGetStringFromObj(objv[arg], &length);
998 	    arg++;
999 	    if (count == BINARY_ALL) {
1000 		count = length;
1001 	    } else if (count == BINARY_NOCOUNT) {
1002 		count = 1;
1003 	    }
1004 	    last = cursor + ((count + 7) / 8);
1005 	    if (count > length) {
1006 		count = length;
1007 	    }
1008 	    value = 0;
1009 	    errorString = "binary";
1010 	    if (cmd == 'B') {
1011 		for (offset = 0; offset < count; offset++) {
1012 		    value <<= 1;
1013 		    if (str[offset] == '1') {
1014 			value |= 1;
1015 		    } else if (str[offset] != '0') {
1016 			errorValue = str;
1017 			Tcl_DecrRefCount(resultPtr);
1018 			goto badValue;
1019 		    }
1020 		    if (((offset + 1) % 8) == 0) {
1021 			*cursor++ = UCHAR(value);
1022 			value = 0;
1023 		    }
1024 		}
1025 	    } else {
1026 		for (offset = 0; offset < count; offset++) {
1027 		    value >>= 1;
1028 		    if (str[offset] == '1') {
1029 			value |= 128;
1030 		    } else if (str[offset] != '0') {
1031 			errorValue = str;
1032 			Tcl_DecrRefCount(resultPtr);
1033 			goto badValue;
1034 		    }
1035 		    if (!((offset + 1) % 8)) {
1036 			*cursor++ = UCHAR(value);
1037 			value = 0;
1038 		    }
1039 		}
1040 	    }
1041 	    if ((offset % 8) != 0) {
1042 		if (cmd == 'B') {
1043 		    value <<= 8 - (offset % 8);
1044 		} else {
1045 		    value >>= 8 - (offset % 8);
1046 		}
1047 		*cursor++ = UCHAR(value);
1048 	    }
1049 	    while (cursor < last) {
1050 		*cursor++ = '\0';
1051 	    }
1052 	    break;
1053 	}
1054 	case 'h':
1055 	case 'H': {
1056 	    unsigned char *last;
1057 	    int c;
1058 
1059 	    str = TclGetStringFromObj(objv[arg], &length);
1060 	    arg++;
1061 	    if (count == BINARY_ALL) {
1062 		count = length;
1063 	    } else if (count == BINARY_NOCOUNT) {
1064 		count = 1;
1065 	    }
1066 	    last = cursor + ((count + 1) / 2);
1067 	    if (count > length) {
1068 		count = length;
1069 	    }
1070 	    value = 0;
1071 	    errorString = "hexadecimal";
1072 	    if (cmd == 'H') {
1073 		for (offset = 0; offset < count; offset++) {
1074 		    value <<= 4;
1075 		    if (!isxdigit(UCHAR(str[offset]))) {     /* INTL: digit */
1076 			errorValue = str;
1077 			Tcl_DecrRefCount(resultPtr);
1078 			goto badValue;
1079 		    }
1080 		    c = str[offset] - '0';
1081 		    if (c > 9) {
1082 			c += ('0' - 'A') + 10;
1083 		    }
1084 		    if (c > 16) {
1085 			c += ('A' - 'a');
1086 		    }
1087 		    value |= (c & 0xF);
1088 		    if (offset % 2) {
1089 			*cursor++ = (char) value;
1090 			value = 0;
1091 		    }
1092 		}
1093 	    } else {
1094 		for (offset = 0; offset < count; offset++) {
1095 		    value >>= 4;
1096 
1097 		    if (!isxdigit(UCHAR(str[offset]))) {     /* INTL: digit */
1098 			errorValue = str;
1099 			Tcl_DecrRefCount(resultPtr);
1100 			goto badValue;
1101 		    }
1102 		    c = str[offset] - '0';
1103 		    if (c > 9) {
1104 			c += ('0' - 'A') + 10;
1105 		    }
1106 		    if (c > 16) {
1107 			c += ('A' - 'a');
1108 		    }
1109 		    value |= ((c << 4) & 0xF0);
1110 		    if (offset % 2) {
1111 			*cursor++ = UCHAR(value & 0xFF);
1112 			value = 0;
1113 		    }
1114 		}
1115 	    }
1116 	    if (offset % 2) {
1117 		if (cmd == 'H') {
1118 		    value <<= 4;
1119 		} else {
1120 		    value >>= 4;
1121 		}
1122 		*cursor++ = UCHAR(value);
1123 	    }
1124 
1125 	    while (cursor < last) {
1126 		*cursor++ = '\0';
1127 	    }
1128 	    break;
1129 	}
1130 	case 'c':
1131 	case 't':
1132 	case 's':
1133 	case 'S':
1134 	case 'n':
1135 	case 'i':
1136 	case 'I':
1137 	case 'm':
1138 	case 'w':
1139 	case 'W':
1140 	case 'r':
1141 	case 'R':
1142 	case 'd':
1143 	case 'q':
1144 	case 'Q':
1145 	case 'f': {
1146 	    int listc, i;
1147 	    Tcl_Obj **listv;
1148 
1149 	    if (count == BINARY_NOCOUNT) {
1150 		/*
1151 		 * Note that we are casting away the const-ness of objv, but
1152 		 * this is safe since we aren't going to modify the array.
1153 		 */
1154 
1155 		listv = (Tcl_Obj **) (objv + arg);
1156 		listc = 1;
1157 		count = 1;
1158 	    } else {
1159 		TclListObjGetElements(interp, objv[arg], &listc, &listv);
1160 		if (count == BINARY_ALL) {
1161 		    count = listc;
1162 		}
1163 	    }
1164 	    arg++;
1165 	    for (i = 0; i < count; i++) {
1166 		if (FormatNumber(interp, cmd, listv[i], &cursor) != TCL_OK) {
1167 		    Tcl_DecrRefCount(resultPtr);
1168 		    return TCL_ERROR;
1169 		}
1170 	    }
1171 	    break;
1172 	}
1173 	case 'x':
1174 	    if (count == BINARY_NOCOUNT) {
1175 		count = 1;
1176 	    }
1177 	    memset(cursor, 0, count);
1178 	    cursor += count;
1179 	    break;
1180 	case 'X':
1181 	    if (cursor > maxPos) {
1182 		maxPos = cursor;
1183 	    }
1184 	    if (count == BINARY_NOCOUNT) {
1185 		count = 1;
1186 	    }
1187 	    if ((count == BINARY_ALL) || (count > (cursor - buffer))) {
1188 		cursor = buffer;
1189 	    } else {
1190 		cursor -= count;
1191 	    }
1192 	    break;
1193 	case '@':
1194 	    if (cursor > maxPos) {
1195 		maxPos = cursor;
1196 	    }
1197 	    if (count == BINARY_ALL) {
1198 		cursor = maxPos;
1199 	    } else {
1200 		cursor = buffer + count;
1201 	    }
1202 	    break;
1203 	}
1204     }
1205     Tcl_SetObjResult(interp, resultPtr);
1206     return TCL_OK;
1207 
1208  badValue:
1209     Tcl_ResetResult(interp);
1210     Tcl_SetObjResult(interp, Tcl_ObjPrintf(
1211 	    "expected %s string but got \"%s\" instead",
1212 	    errorString, errorValue));
1213     return TCL_ERROR;
1214 
1215  badCount:
1216     errorString = "missing count for \"@\" field specifier";
1217     goto error;
1218 
1219  badIndex:
1220     errorString = "not enough arguments for all format specifiers";
1221     goto error;
1222 
1223  badField:
1224     {
1225 	int ch;
1226 	char buf[8] = "";
1227 
1228 	TclUtfToUCS4(errorString, &ch);
1229 	buf[TclUCS4ToUtf(ch, buf)] = '\0';
1230 	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
1231 		"bad field specifier \"%s\"", buf));
1232 	return TCL_ERROR;
1233     }
1234 
1235  error:
1236     Tcl_SetObjResult(interp, Tcl_NewStringObj(errorString, -1));
1237     return TCL_ERROR;
1238 }
1239 
1240 /*
1241  *----------------------------------------------------------------------
1242  *
1243  * BinaryScanCmd --
1244  *
1245  *	This procedure implements the "binary scan" Tcl command.
1246  *
1247  * Results:
1248  *	A standard Tcl result.
1249  *
1250  * Side effects:
1251  *	See the user documentation.
1252  *
1253  *----------------------------------------------------------------------
1254  */
1255 
1256 int
BinaryScanCmd(ClientData dummy,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])1257 BinaryScanCmd(
1258     ClientData dummy,		/* Not used. */
1259     Tcl_Interp *interp,		/* Current interpreter. */
1260     int objc,			/* Number of arguments. */
1261     Tcl_Obj *const objv[])	/* Argument objects. */
1262 {
1263     int arg;			/* Index of next argument to consume. */
1264     int value = 0;		/* Current integer value to be packed.
1265 				 * Initialized to avoid compiler warning. */
1266     char cmd;			/* Current format character. */
1267     int count;			/* Count associated with current format
1268 				 * character. */
1269     int flags;			/* Format field flags */
1270     const char *format;		/* Pointer to current position in format
1271 				 * string. */
1272     Tcl_Obj *resultPtr = NULL;	/* Object holding result buffer. */
1273     unsigned char *buffer;	/* Start of result buffer. */
1274     const char *errorString;
1275     const char *str;
1276     int offset, size, length;
1277 
1278     int i;
1279     Tcl_Obj *valuePtr, *elementPtr;
1280     Tcl_HashTable numberCacheHash;
1281     Tcl_HashTable *numberCachePtr;
1282 
1283     if (objc < 3) {
1284 	Tcl_WrongNumArgs(interp, 1, objv,
1285 		"value formatString ?varName ...?");
1286 	return TCL_ERROR;
1287     }
1288     numberCachePtr = &numberCacheHash;
1289     Tcl_InitHashTable(numberCachePtr, TCL_ONE_WORD_KEYS);
1290     buffer = Tcl_GetByteArrayFromObj(objv[1], &length);
1291     format = TclGetString(objv[2]);
1292     arg = 3;
1293     offset = 0;
1294     while (*format != '\0') {
1295 	str = format;
1296 	flags = 0;
1297 	if (!GetFormatSpec(&format, &cmd, &count, &flags)) {
1298 	    goto done;
1299 	}
1300 	switch (cmd) {
1301 	case 'a':
1302 	case 'A': {
1303 	    unsigned char *src;
1304 
1305 	    if (arg >= objc) {
1306 		DeleteScanNumberCache(numberCachePtr);
1307 		goto badIndex;
1308 	    }
1309 	    if (count == BINARY_ALL) {
1310 		count = length - offset;
1311 	    } else {
1312 		if (count == BINARY_NOCOUNT) {
1313 		    count = 1;
1314 		}
1315 		if (count > (length - offset)) {
1316 		    goto done;
1317 		}
1318 	    }
1319 
1320 	    src = buffer + offset;
1321 	    size = count;
1322 
1323 	    /*
1324 	     * Trim trailing nulls and spaces, if necessary.
1325 	     */
1326 
1327 	    if (cmd == 'A') {
1328 		while (size > 0) {
1329 		    if (src[size - 1] != '\0' && src[size - 1] != ' ') {
1330 			break;
1331 		    }
1332 		    size--;
1333 		}
1334 	    }
1335 
1336 	    /*
1337 	     * Have to do this #ifdef-fery because (as part of defining
1338 	     * Tcl_NewByteArrayObj) we removed the #def that hides this stuff
1339 	     * normally. If this code ever gets copied to another file, it
1340 	     * should be changed back to the simpler version.
1341 	     */
1342 
1343 #ifdef TCL_MEM_DEBUG
1344 	    valuePtr = Tcl_DbNewByteArrayObj(src, size, __FILE__, __LINE__);
1345 #else
1346 	    valuePtr = Tcl_NewByteArrayObj(src, size);
1347 #endif /* TCL_MEM_DEBUG */
1348 
1349 	    resultPtr = Tcl_ObjSetVar2(interp, objv[arg], NULL, valuePtr,
1350 		    TCL_LEAVE_ERR_MSG);
1351 	    arg++;
1352 	    if (resultPtr == NULL) {
1353 		DeleteScanNumberCache(numberCachePtr);
1354 		return TCL_ERROR;
1355 	    }
1356 	    offset += count;
1357 	    break;
1358 	}
1359 	case 'b':
1360 	case 'B': {
1361 	    unsigned char *src;
1362 	    char *dest;
1363 
1364 	    if (arg >= objc) {
1365 		DeleteScanNumberCache(numberCachePtr);
1366 		goto badIndex;
1367 	    }
1368 	    if (count == BINARY_ALL) {
1369 		count = (length - offset) * 8;
1370 	    } else {
1371 		if (count == BINARY_NOCOUNT) {
1372 		    count = 1;
1373 		}
1374 		if (count > (length - offset) * 8) {
1375 		    goto done;
1376 		}
1377 	    }
1378 	    src = buffer + offset;
1379 	    valuePtr = Tcl_NewObj();
1380 	    Tcl_SetObjLength(valuePtr, count);
1381 	    dest = TclGetString(valuePtr);
1382 
1383 	    if (cmd == 'b') {
1384 		for (i = 0; i < count; i++) {
1385 		    if (i % 8) {
1386 			value >>= 1;
1387 		    } else {
1388 			value = *src++;
1389 		    }
1390 		    *dest++ = (char) ((value & 1) ? '1' : '0');
1391 		}
1392 	    } else {
1393 		for (i = 0; i < count; i++) {
1394 		    if (i % 8) {
1395 			value <<= 1;
1396 		    } else {
1397 			value = *src++;
1398 		    }
1399 		    *dest++ = (char) ((value & 0x80) ? '1' : '0');
1400 		}
1401 	    }
1402 
1403 	    resultPtr = Tcl_ObjSetVar2(interp, objv[arg], NULL, valuePtr,
1404 		    TCL_LEAVE_ERR_MSG);
1405 	    arg++;
1406 	    if (resultPtr == NULL) {
1407 		DeleteScanNumberCache(numberCachePtr);
1408 		return TCL_ERROR;
1409 	    }
1410 	    offset += (count + 7) / 8;
1411 	    break;
1412 	}
1413 	case 'h':
1414 	case 'H': {
1415 	    char *dest;
1416 	    unsigned char *src;
1417 	    static const char hexdigit[] = "0123456789abcdef";
1418 
1419 	    if (arg >= objc) {
1420 		DeleteScanNumberCache(numberCachePtr);
1421 		goto badIndex;
1422 	    }
1423 	    if (count == BINARY_ALL) {
1424 		count = (length - offset)*2;
1425 	    } else {
1426 		if (count == BINARY_NOCOUNT) {
1427 		    count = 1;
1428 		}
1429 		if (count > (length - offset)*2) {
1430 		    goto done;
1431 		}
1432 	    }
1433 	    src = buffer + offset;
1434 	    valuePtr = Tcl_NewObj();
1435 	    Tcl_SetObjLength(valuePtr, count);
1436 	    dest = TclGetString(valuePtr);
1437 
1438 	    if (cmd == 'h') {
1439 		for (i = 0; i < count; i++) {
1440 		    if (i % 2) {
1441 			value >>= 4;
1442 		    } else {
1443 			value = *src++;
1444 		    }
1445 		    *dest++ = hexdigit[value & 0xF];
1446 		}
1447 	    } else {
1448 		for (i = 0; i < count; i++) {
1449 		    if (i % 2) {
1450 			value <<= 4;
1451 		    } else {
1452 			value = *src++;
1453 		    }
1454 		    *dest++ = hexdigit[(value >> 4) & 0xF];
1455 		}
1456 	    }
1457 
1458 	    resultPtr = Tcl_ObjSetVar2(interp, objv[arg], NULL, valuePtr,
1459 		    TCL_LEAVE_ERR_MSG);
1460 	    arg++;
1461 	    if (resultPtr == NULL) {
1462 		DeleteScanNumberCache(numberCachePtr);
1463 		return TCL_ERROR;
1464 	    }
1465 	    offset += (count + 1) / 2;
1466 	    break;
1467 	}
1468 	case 'c':
1469 	    size = 1;
1470 	    goto scanNumber;
1471 	case 't':
1472 	case 's':
1473 	case 'S':
1474 	    size = 2;
1475 	    goto scanNumber;
1476 	case 'n':
1477 	case 'i':
1478 	case 'I':
1479 	    size = 4;
1480 	    goto scanNumber;
1481 	case 'm':
1482 	case 'w':
1483 	case 'W':
1484 	    size = 8;
1485 	    goto scanNumber;
1486 	case 'r':
1487 	case 'R':
1488 	case 'f':
1489 	    size = sizeof(float);
1490 	    goto scanNumber;
1491 	case 'q':
1492 	case 'Q':
1493 	case 'd': {
1494 	    unsigned char *src;
1495 
1496 	    size = sizeof(double);
1497 	    /* fall through */
1498 
1499 	scanNumber:
1500 	    if (arg >= objc) {
1501 		DeleteScanNumberCache(numberCachePtr);
1502 		goto badIndex;
1503 	    }
1504 	    if (count == BINARY_NOCOUNT) {
1505 		if ((length - offset) < size) {
1506 		    goto done;
1507 		}
1508 		valuePtr = ScanNumber(buffer+offset, cmd, flags,
1509 			&numberCachePtr);
1510 		offset += size;
1511 	    } else {
1512 		if (count == BINARY_ALL) {
1513 		    count = (length - offset) / size;
1514 		}
1515 		if ((length - offset) < (count * size)) {
1516 		    goto done;
1517 		}
1518 		valuePtr = Tcl_NewObj();
1519 		src = buffer + offset;
1520 		for (i = 0; i < count; i++) {
1521 		    elementPtr = ScanNumber(src, cmd, flags, &numberCachePtr);
1522 		    src += size;
1523 		    Tcl_ListObjAppendElement(NULL, valuePtr, elementPtr);
1524 		}
1525 		offset += count * size;
1526 	    }
1527 
1528 	    resultPtr = Tcl_ObjSetVar2(interp, objv[arg], NULL, valuePtr,
1529 		    TCL_LEAVE_ERR_MSG);
1530 	    arg++;
1531 	    if (resultPtr == NULL) {
1532 		DeleteScanNumberCache(numberCachePtr);
1533 		return TCL_ERROR;
1534 	    }
1535 	    break;
1536 	}
1537 	case 'x':
1538 	    if (count == BINARY_NOCOUNT) {
1539 		count = 1;
1540 	    }
1541 	    if ((count == BINARY_ALL) || (count > (length - offset))) {
1542 		offset = length;
1543 	    } else {
1544 		offset += count;
1545 	    }
1546 	    break;
1547 	case 'X':
1548 	    if (count == BINARY_NOCOUNT) {
1549 		count = 1;
1550 	    }
1551 	    if ((count == BINARY_ALL) || (count > offset)) {
1552 		offset = 0;
1553 	    } else {
1554 		offset -= count;
1555 	    }
1556 	    break;
1557 	case '@':
1558 	    if (count == BINARY_NOCOUNT) {
1559 		DeleteScanNumberCache(numberCachePtr);
1560 		goto badCount;
1561 	    }
1562 	    if ((count == BINARY_ALL) || (count > length)) {
1563 		offset = length;
1564 	    } else {
1565 		offset = count;
1566 	    }
1567 	    break;
1568 	default:
1569 	    DeleteScanNumberCache(numberCachePtr);
1570 	    errorString = str;
1571 	    goto badField;
1572 	}
1573     }
1574 
1575     /*
1576      * Set the result to the last position of the cursor.
1577      */
1578 
1579  done:
1580     Tcl_SetObjResult(interp, Tcl_NewLongObj(arg - 3));
1581     DeleteScanNumberCache(numberCachePtr);
1582 
1583     return TCL_OK;
1584 
1585  badCount:
1586     errorString = "missing count for \"@\" field specifier";
1587     goto error;
1588 
1589  badIndex:
1590     errorString = "not enough arguments for all format specifiers";
1591     goto error;
1592 
1593  badField:
1594     {
1595 	int ch;
1596 	char buf[8] = "";
1597 
1598 	TclUtfToUCS4(errorString, &ch);
1599 	buf[TclUCS4ToUtf(ch, buf)] = '\0';
1600 	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
1601 		"bad field specifier \"%s\"", buf));
1602 	return TCL_ERROR;
1603     }
1604 
1605  error:
1606     Tcl_SetObjResult(interp, Tcl_NewStringObj(errorString, -1));
1607     return TCL_ERROR;
1608 }
1609 
1610 /*
1611  *----------------------------------------------------------------------
1612  *
1613  * GetFormatSpec --
1614  *
1615  *	This function parses the format strings used in the binary format and
1616  *	scan commands.
1617  *
1618  * Results:
1619  *	Moves the formatPtr to the start of the next command. Returns the
1620  *	current command character and count in cmdPtr and countPtr. The count
1621  *	is set to BINARY_ALL if the count character was '*' or BINARY_NOCOUNT
1622  *	if no count was specified. Returns 1 on success, or 0 if the string
1623  *	did not have a format specifier.
1624  *
1625  * Side effects:
1626  *	None.
1627  *
1628  *----------------------------------------------------------------------
1629  */
1630 
1631 static int
GetFormatSpec(const char ** formatPtr,char * cmdPtr,int * countPtr,int * flagsPtr)1632 GetFormatSpec(
1633     const char **formatPtr,	/* Pointer to format string. */
1634     char *cmdPtr,		/* Pointer to location of command char. */
1635     int *countPtr,		/* Pointer to repeat count value. */
1636     int *flagsPtr)		/* Pointer to field flags */
1637 {
1638     /*
1639      * Skip any leading blanks.
1640      */
1641 
1642     while (**formatPtr == ' ') {
1643 	(*formatPtr)++;
1644     }
1645 
1646     /*
1647      * The string was empty, except for whitespace, so fail.
1648      */
1649 
1650     if (!(**formatPtr)) {
1651 	return 0;
1652     }
1653 
1654     /*
1655      * Extract the command character and any trailing digits or '*'.
1656      */
1657 
1658     *cmdPtr = **formatPtr;
1659     (*formatPtr)++;
1660     if (**formatPtr == 'u') {
1661 	(*formatPtr)++;
1662 	*flagsPtr |= BINARY_UNSIGNED;
1663     }
1664     if (**formatPtr == '*') {
1665 	(*formatPtr)++;
1666 	*countPtr = BINARY_ALL;
1667     } else if (isdigit(UCHAR(**formatPtr))) { /* INTL: digit */
1668 	unsigned long int count;
1669 
1670 	errno = 0;
1671 	count = strtoul(*formatPtr, (char **) formatPtr, 10);
1672 	if (errno || (count > (unsigned long) INT_MAX)) {
1673 	    *countPtr = INT_MAX;
1674 	} else {
1675 	    *countPtr = (int) count;
1676 	}
1677     } else {
1678 	*countPtr = BINARY_NOCOUNT;
1679     }
1680     return 1;
1681 }
1682 
1683 /*
1684  *----------------------------------------------------------------------
1685  *
1686  * NeedReversing --
1687  *
1688  *	This routine determines, if bytes of a number need to be re-ordered,
1689  *	and returns a numeric code indicating the re-ordering to be done.
1690  *	This depends on the endiannes of the machine and the desired format.
1691  *	It is in effect a table (whose contents depend on the endianness of
1692  *	the system) describing whether a value needs reversing or not. Anyone
1693  *	porting the code to a big-endian platform should take care to make
1694  *	sure that they define WORDS_BIGENDIAN though this is already done by
1695  *	configure for the Unix build; little-endian platforms (including
1696  *	Windows) don't need to do anything.
1697  *
1698  * Results:
1699  *	0	No re-ordering needed.
1700  *	1	Reverse the bytes:	01234567 <-> 76543210 (little to big)
1701  *	2	Apply this re-ordering: 01234567 <-> 45670123 (Nokia to little)
1702  *	3	Apply this re-ordering: 01234567 <-> 32107654 (Nokia to big)
1703  *
1704  * Side effects:
1705  *	None
1706  *
1707  *----------------------------------------------------------------------
1708  */
1709 
1710 static int
NeedReversing(int format)1711 NeedReversing(
1712     int format)
1713 {
1714     switch (format) {
1715 	/* native floats and doubles: never reverse */
1716     case 'd':
1717     case 'f':
1718 	/* big endian ints: never reverse */
1719     case 'I':
1720     case 'S':
1721     case 'W':
1722 #ifdef WORDS_BIGENDIAN
1723 	/* native ints: reverse if we're little-endian */
1724     case 'n':
1725     case 't':
1726     case 'm':
1727 	/* f: reverse if we're little-endian */
1728     case 'Q':
1729     case 'R':
1730 #else /* !WORDS_BIGENDIAN */
1731 	/* small endian floats: reverse if we're big-endian */
1732     case 'r':
1733 #endif /* WORDS_BIGENDIAN */
1734 	return 0;
1735 
1736 #ifdef WORDS_BIGENDIAN
1737 	/* small endian floats: reverse if we're big-endian */
1738     case 'q':
1739     case 'r':
1740 #else /* !WORDS_BIGENDIAN */
1741 	/* native ints: reverse if we're little-endian */
1742     case 'n':
1743     case 't':
1744     case 'm':
1745 	/* f: reverse if we're little-endian */
1746     case 'R':
1747 #endif /* WORDS_BIGENDIAN */
1748 	/* small endian ints: always reverse */
1749     case 'i':
1750     case 's':
1751     case 'w':
1752 	return 1;
1753 
1754 #ifndef WORDS_BIGENDIAN
1755     /*
1756      * The Q and q formats need special handling to account for the unusual
1757      * byte ordering of 8-byte floats on Nokia 770 systems, which claim to be
1758      * little-endian, but also reverse word order.
1759      */
1760 
1761     case 'Q':
1762 	if (TclNokia770Doubles()) {
1763 	    return 3;
1764 	}
1765 	return 1;
1766     case 'q':
1767 	if (TclNokia770Doubles()) {
1768 	    return 2;
1769 	}
1770 	return 0;
1771 #endif
1772     }
1773 
1774     Tcl_Panic("unexpected fallthrough");
1775     return 0;
1776 }
1777 
1778 /*
1779  *----------------------------------------------------------------------
1780  *
1781  * CopyNumber --
1782  *
1783  *	This routine is called by FormatNumber and ScanNumber to copy a
1784  *	floating-point number. If required, bytes are reversed while copying.
1785  *	The behaviour is only fully defined when used with IEEE float and
1786  *	double values (guaranteed to be 4 and 8 bytes long, respectively.)
1787  *
1788  * Results:
1789  *	None
1790  *
1791  * Side effects:
1792  *	Copies length bytes
1793  *
1794  *----------------------------------------------------------------------
1795  */
1796 
1797 static void
CopyNumber(const void * from,void * to,unsigned length,int type)1798 CopyNumber(
1799     const void *from,		/* source */
1800     void *to,			/* destination */
1801     unsigned length,		/* Number of bytes to copy */
1802     int type)			/* What type of thing are we copying? */
1803 {
1804     switch (NeedReversing(type)) {
1805     case 0:
1806 	memcpy(to, from, length);
1807 	break;
1808     case 1: {
1809 	const unsigned char *fromPtr = (const unsigned char *)from;
1810 	unsigned char *toPtr = (unsigned char *)to;
1811 
1812 	switch (length) {
1813 	case 4:
1814 	    toPtr[0] = fromPtr[3];
1815 	    toPtr[1] = fromPtr[2];
1816 	    toPtr[2] = fromPtr[1];
1817 	    toPtr[3] = fromPtr[0];
1818 	    break;
1819 	case 8:
1820 	    toPtr[0] = fromPtr[7];
1821 	    toPtr[1] = fromPtr[6];
1822 	    toPtr[2] = fromPtr[5];
1823 	    toPtr[3] = fromPtr[4];
1824 	    toPtr[4] = fromPtr[3];
1825 	    toPtr[5] = fromPtr[2];
1826 	    toPtr[6] = fromPtr[1];
1827 	    toPtr[7] = fromPtr[0];
1828 	    break;
1829 	}
1830 	break;
1831     }
1832     case 2: {
1833 	const unsigned char *fromPtr = (const unsigned char *)from;
1834 	unsigned char *toPtr = (unsigned char *)to;
1835 
1836 	toPtr[0] = fromPtr[4];
1837 	toPtr[1] = fromPtr[5];
1838 	toPtr[2] = fromPtr[6];
1839 	toPtr[3] = fromPtr[7];
1840 	toPtr[4] = fromPtr[0];
1841 	toPtr[5] = fromPtr[1];
1842 	toPtr[6] = fromPtr[2];
1843 	toPtr[7] = fromPtr[3];
1844 	break;
1845     }
1846     case 3: {
1847 	const unsigned char *fromPtr = (const unsigned char *)from;
1848 	unsigned char *toPtr = (unsigned char *)to;
1849 
1850 	toPtr[0] = fromPtr[3];
1851 	toPtr[1] = fromPtr[2];
1852 	toPtr[2] = fromPtr[1];
1853 	toPtr[3] = fromPtr[0];
1854 	toPtr[4] = fromPtr[7];
1855 	toPtr[5] = fromPtr[6];
1856 	toPtr[6] = fromPtr[5];
1857 	toPtr[7] = fromPtr[4];
1858 	break;
1859     }
1860     }
1861 }
1862 
1863 /*
1864  *----------------------------------------------------------------------
1865  *
1866  * FormatNumber --
1867  *
1868  *	This routine is called by Tcl_BinaryObjCmd to format a number into a
1869  *	location pointed at by cursor.
1870  *
1871  * Results:
1872  *	A standard Tcl result.
1873  *
1874  * Side effects:
1875  *	Moves the cursor to the next location to be written into.
1876  *
1877  *----------------------------------------------------------------------
1878  */
1879 
1880 static int
FormatNumber(Tcl_Interp * interp,int type,Tcl_Obj * src,unsigned char ** cursorPtr)1881 FormatNumber(
1882     Tcl_Interp *interp,		/* Current interpreter, used to report
1883 				 * errors. */
1884     int type,			/* Type of number to format. */
1885     Tcl_Obj *src,		/* Number to format. */
1886     unsigned char **cursorPtr)	/* Pointer to index into destination buffer. */
1887 {
1888     long value;
1889     double dvalue;
1890     Tcl_WideInt wvalue;
1891     float fvalue;
1892 
1893     switch (type) {
1894     case 'd':
1895     case 'q':
1896     case 'Q':
1897 	/*
1898 	 * Double-precision floating point values. Tcl_GetDoubleFromObj
1899 	 * returns TCL_ERROR for NaN, but we can check by comparing the
1900 	 * object's type pointer.
1901 	 */
1902 
1903 	if (Tcl_GetDoubleFromObj(interp, src, &dvalue) != TCL_OK) {
1904 	    if (src->typePtr != &tclDoubleType) {
1905 		return TCL_ERROR;
1906 	    }
1907 	    dvalue = src->internalRep.doubleValue;
1908 	}
1909 	CopyNumber(&dvalue, *cursorPtr, sizeof(double), type);
1910 	*cursorPtr += sizeof(double);
1911 	return TCL_OK;
1912 
1913     case 'f':
1914     case 'r':
1915     case 'R':
1916 	/*
1917 	 * Single-precision floating point values. Tcl_GetDoubleFromObj
1918 	 * returns TCL_ERROR for NaN, but we can check by comparing the
1919 	 * object's type pointer.
1920 	 */
1921 
1922 	if (Tcl_GetDoubleFromObj(interp, src, &dvalue) != TCL_OK) {
1923 	    if (src->typePtr != &tclDoubleType) {
1924 		return TCL_ERROR;
1925 	    }
1926 	    dvalue = src->internalRep.doubleValue;
1927 	}
1928 
1929 	/*
1930 	 * Because some compilers will generate floating point exceptions on
1931 	 * an overflow cast (e.g. Borland), we restrict the values to the
1932 	 * valid range for float.
1933 	 */
1934 
1935 	if (fabs(dvalue) > (double) FLT_MAX) {
1936 	    fvalue = (dvalue >= 0.0) ? FLT_MAX : -FLT_MAX;
1937 	} else {
1938 	    fvalue = (float) dvalue;
1939 	}
1940 	CopyNumber(&fvalue, *cursorPtr, sizeof(float), type);
1941 	*cursorPtr += sizeof(float);
1942 	return TCL_OK;
1943 
1944 	/*
1945 	 * 64-bit integer values.
1946 	 */
1947     case 'w':
1948     case 'W':
1949     case 'm':
1950 	if (Tcl_GetWideIntFromObj(interp, src, &wvalue) != TCL_OK) {
1951 	    return TCL_ERROR;
1952 	}
1953 	if (NeedReversing(type)) {
1954 	    *(*cursorPtr)++ = UCHAR(wvalue);
1955 	    *(*cursorPtr)++ = UCHAR(wvalue >> 8);
1956 	    *(*cursorPtr)++ = UCHAR(wvalue >> 16);
1957 	    *(*cursorPtr)++ = UCHAR(wvalue >> 24);
1958 	    *(*cursorPtr)++ = UCHAR(wvalue >> 32);
1959 	    *(*cursorPtr)++ = UCHAR(wvalue >> 40);
1960 	    *(*cursorPtr)++ = UCHAR(wvalue >> 48);
1961 	    *(*cursorPtr)++ = UCHAR(wvalue >> 56);
1962 	} else {
1963 	    *(*cursorPtr)++ = UCHAR(wvalue >> 56);
1964 	    *(*cursorPtr)++ = UCHAR(wvalue >> 48);
1965 	    *(*cursorPtr)++ = UCHAR(wvalue >> 40);
1966 	    *(*cursorPtr)++ = UCHAR(wvalue >> 32);
1967 	    *(*cursorPtr)++ = UCHAR(wvalue >> 24);
1968 	    *(*cursorPtr)++ = UCHAR(wvalue >> 16);
1969 	    *(*cursorPtr)++ = UCHAR(wvalue >> 8);
1970 	    *(*cursorPtr)++ = UCHAR(wvalue);
1971 	}
1972 	return TCL_OK;
1973 
1974 	/*
1975 	 * 32-bit integer values.
1976 	 */
1977     case 'i':
1978     case 'I':
1979     case 'n':
1980 	if (TclGetLongFromObj(interp, src, &value) != TCL_OK) {
1981 	    return TCL_ERROR;
1982 	}
1983 	if (NeedReversing(type)) {
1984 	    *(*cursorPtr)++ = UCHAR(value);
1985 	    *(*cursorPtr)++ = UCHAR(value >> 8);
1986 	    *(*cursorPtr)++ = UCHAR(value >> 16);
1987 	    *(*cursorPtr)++ = UCHAR(value >> 24);
1988 	} else {
1989 	    *(*cursorPtr)++ = UCHAR(value >> 24);
1990 	    *(*cursorPtr)++ = UCHAR(value >> 16);
1991 	    *(*cursorPtr)++ = UCHAR(value >> 8);
1992 	    *(*cursorPtr)++ = UCHAR(value);
1993 	}
1994 	return TCL_OK;
1995 
1996 	/*
1997 	 * 16-bit integer values.
1998 	 */
1999     case 's':
2000     case 'S':
2001     case 't':
2002 	if (TclGetLongFromObj(interp, src, &value) != TCL_OK) {
2003 	    return TCL_ERROR;
2004 	}
2005 	if (NeedReversing(type)) {
2006 	    *(*cursorPtr)++ = UCHAR(value);
2007 	    *(*cursorPtr)++ = UCHAR(value >> 8);
2008 	} else {
2009 	    *(*cursorPtr)++ = UCHAR(value >> 8);
2010 	    *(*cursorPtr)++ = UCHAR(value);
2011 	}
2012 	return TCL_OK;
2013 
2014 	/*
2015 	 * 8-bit integer values.
2016 	 */
2017     case 'c':
2018 	if (TclGetLongFromObj(interp, src, &value) != TCL_OK) {
2019 	    return TCL_ERROR;
2020 	}
2021 	*(*cursorPtr)++ = UCHAR(value);
2022 	return TCL_OK;
2023 
2024     default:
2025 	Tcl_Panic("unexpected fallthrough");
2026 	return TCL_ERROR;
2027     }
2028 }
2029 
2030 /*
2031  *----------------------------------------------------------------------
2032  *
2033  * ScanNumber --
2034  *
2035  *	This routine is called by Tcl_BinaryObjCmd to scan a number out of a
2036  *	buffer.
2037  *
2038  * Results:
2039  *	Returns a newly created object containing the scanned number. This
2040  *	object has a ref count of zero.
2041  *
2042  * Side effects:
2043  *	Might reuse an object in the number cache, place a new object in the
2044  *	cache, or delete the cache and set the reference to it (itself passed
2045  *	in by reference) to NULL.
2046  *
2047  *----------------------------------------------------------------------
2048  */
2049 
2050 static Tcl_Obj *
ScanNumber(unsigned char * buffer,int type,int flags,Tcl_HashTable ** numberCachePtrPtr)2051 ScanNumber(
2052     unsigned char *buffer,	/* Buffer to scan number from. */
2053     int type,			/* Format character from "binary scan" */
2054     int flags,			/* Format field flags */
2055     Tcl_HashTable **numberCachePtrPtr)
2056 				/* Place to look for cache of scanned value
2057 				 * objects, or NULL if too many different
2058 				 * numbers have been scanned. */
2059 {
2060     long value;
2061     float fvalue;
2062     double dvalue;
2063     Tcl_WideUInt uwvalue;
2064 
2065     /*
2066      * We cannot rely on the compiler to properly sign extend integer values
2067      * when we cast from smaller values to larger values because we don't know
2068      * the exact size of the integer types. So, we have to handle sign
2069      * extension explicitly by checking the high bit and padding with 1's as
2070      * needed. This practice is disabled if the BINARY_UNSIGNED flag is set.
2071      */
2072 
2073     switch (type) {
2074     case 'c':
2075 	/*
2076 	 * Characters need special handling. We want to produce a signed
2077 	 * result, but on some platforms (such as AIX) chars are unsigned. To
2078 	 * deal with this, check for a value that should be negative but
2079 	 * isn't.
2080 	 */
2081 
2082 	value = buffer[0];
2083 	if (!(flags & BINARY_UNSIGNED)) {
2084 	    if (value & 0x80) {
2085 		value |= -0x100;
2086 	    }
2087 	}
2088 	goto returnNumericObject;
2089 
2090 	/*
2091 	 * 16-bit numeric values. We need the sign extension trick (see above)
2092 	 * here as well.
2093 	 */
2094 
2095     case 's':
2096     case 'S':
2097     case 't':
2098 	if (NeedReversing(type)) {
2099 	    value = (long) (buffer[0] + (buffer[1] << 8));
2100 	} else {
2101 	    value = (long) (buffer[1] + (buffer[0] << 8));
2102 	}
2103 	if (!(flags & BINARY_UNSIGNED)) {
2104 	    if (value & 0x8000) {
2105 		value |= -0x10000;
2106 	    }
2107 	}
2108 	goto returnNumericObject;
2109 
2110 	/*
2111 	 * 32-bit numeric values.
2112 	 */
2113 
2114     case 'i':
2115     case 'I':
2116     case 'n':
2117 	if (NeedReversing(type)) {
2118 	    value = (long) (buffer[0]
2119 		    + (buffer[1] << 8)
2120 		    + (buffer[2] << 16)
2121 		    + (((long)buffer[3]) << 24));
2122 	} else {
2123 	    value = (long) (buffer[3]
2124 		    + (buffer[2] << 8)
2125 		    + (buffer[1] << 16)
2126 		    + (((long) buffer[0]) << 24));
2127 	}
2128 
2129 	/*
2130 	 * Check to see if the value was sign extended properly on systems
2131 	 * where an int is more than 32-bits.
2132 	 *
2133 	 * We avoid caching unsigned integers as we cannot distinguish between
2134 	 * 32bit signed and unsigned in the hash (short and char are ok).
2135 	 */
2136 
2137 	if (flags & BINARY_UNSIGNED) {
2138 	    return Tcl_NewWideIntObj((Tcl_WideInt)(unsigned long)value);
2139 	}
2140 	if ((value & (((unsigned) 1) << 31)) && (value > 0)) {
2141 	    value -= (((unsigned) 1) << 31);
2142 	    value -= (((unsigned) 1) << 31);
2143 	}
2144 
2145     returnNumericObject:
2146 	if (*numberCachePtrPtr == NULL) {
2147 	    return Tcl_NewLongObj(value);
2148 	} else {
2149 	    Tcl_HashTable *tablePtr = *numberCachePtrPtr;
2150 	    Tcl_HashEntry *hPtr;
2151 	    int isNew;
2152 
2153 	    hPtr = Tcl_CreateHashEntry(tablePtr, INT2PTR(value), &isNew);
2154 	    if (!isNew) {
2155 		return (Tcl_Obj *)Tcl_GetHashValue(hPtr);
2156 	    }
2157 	    if (tablePtr->numEntries <= BINARY_SCAN_MAX_CACHE) {
2158 		Tcl_Obj *objPtr = Tcl_NewLongObj(value);
2159 
2160 		Tcl_IncrRefCount(objPtr);
2161 		Tcl_SetHashValue(hPtr, objPtr);
2162 		return objPtr;
2163 	    }
2164 
2165 	    /*
2166 	     * We've overflowed the cache! Someone's parsing a LOT of varied
2167 	     * binary data in a single call! Bail out by switching back to the
2168 	     * old behaviour for the rest of the scan.
2169 	     *
2170 	     * Note that anyone just using the 'c' conversion (for bytes)
2171 	     * cannot trigger this.
2172 	     */
2173 
2174 	    DeleteScanNumberCache(tablePtr);
2175 	    *numberCachePtrPtr = NULL;
2176 	    return Tcl_NewLongObj(value);
2177 	}
2178 
2179 	/*
2180 	 * Do not cache wide (64-bit) values; they are already too large to
2181 	 * use as keys.
2182 	 */
2183 
2184     case 'w':
2185     case 'W':
2186     case 'm':
2187 	if (NeedReversing(type)) {
2188 	    uwvalue = ((Tcl_WideUInt) buffer[0])
2189 		    | (((Tcl_WideUInt) buffer[1]) << 8)
2190 		    | (((Tcl_WideUInt) buffer[2]) << 16)
2191 		    | (((Tcl_WideUInt) buffer[3]) << 24)
2192 		    | (((Tcl_WideUInt) buffer[4]) << 32)
2193 		    | (((Tcl_WideUInt) buffer[5]) << 40)
2194 		    | (((Tcl_WideUInt) buffer[6]) << 48)
2195 		    | (((Tcl_WideUInt) buffer[7]) << 56);
2196 	} else {
2197 	    uwvalue = ((Tcl_WideUInt) buffer[7])
2198 		    | (((Tcl_WideUInt) buffer[6]) << 8)
2199 		    | (((Tcl_WideUInt) buffer[5]) << 16)
2200 		    | (((Tcl_WideUInt) buffer[4]) << 24)
2201 		    | (((Tcl_WideUInt) buffer[3]) << 32)
2202 		    | (((Tcl_WideUInt) buffer[2]) << 40)
2203 		    | (((Tcl_WideUInt) buffer[1]) << 48)
2204 		    | (((Tcl_WideUInt) buffer[0]) << 56);
2205 	}
2206 	if (flags & BINARY_UNSIGNED) {
2207 	    Tcl_Obj *bigObj = NULL;
2208 	    mp_int big;
2209 
2210 	    TclBNInitBignumFromWideUInt(&big, uwvalue);
2211 	    bigObj = Tcl_NewBignumObj(&big);
2212 	    return bigObj;
2213 	}
2214 	return Tcl_NewWideIntObj((Tcl_WideInt) uwvalue);
2215 
2216 	/*
2217 	 * Do not cache double values; they are already too large to use as
2218 	 * keys and the values stored are utterly incompatible with the
2219 	 * integer part of the cache.
2220 	 */
2221 
2222 	/*
2223 	 * 32-bit IEEE single-precision floating point.
2224 	 */
2225 
2226     case 'f':
2227     case 'R':
2228     case 'r':
2229 	CopyNumber(buffer, &fvalue, sizeof(float), type);
2230 	return Tcl_NewDoubleObj(fvalue);
2231 
2232 	/*
2233 	 * 64-bit IEEE double-precision floating point.
2234 	 */
2235 
2236     case 'd':
2237     case 'Q':
2238     case 'q':
2239 	CopyNumber(buffer, &dvalue, sizeof(double), type);
2240 	return Tcl_NewDoubleObj(dvalue);
2241     }
2242     return NULL;
2243 }
2244 
2245 /*
2246  *----------------------------------------------------------------------
2247  *
2248  * DeleteScanNumberCache --
2249  *
2250  *	Deletes the hash table acting as a scan number cache.
2251  *
2252  * Results:
2253  *	None
2254  *
2255  * Side effects:
2256  *	Decrements the reference counts of the objects in the cache.
2257  *
2258  *----------------------------------------------------------------------
2259  */
2260 
2261 static void
DeleteScanNumberCache(Tcl_HashTable * numberCachePtr)2262 DeleteScanNumberCache(
2263     Tcl_HashTable *numberCachePtr)
2264 				/* Pointer to the hash table, or NULL (when
2265 				 * the cache has already been deleted due to
2266 				 * overflow.) */
2267 {
2268     Tcl_HashEntry *hEntry;
2269     Tcl_HashSearch search;
2270 
2271     if (numberCachePtr == NULL) {
2272 	return;
2273     }
2274 
2275     hEntry = Tcl_FirstHashEntry(numberCachePtr, &search);
2276     while (hEntry != NULL) {
2277 	Tcl_Obj *value = (Tcl_Obj *)Tcl_GetHashValue(hEntry);
2278 
2279 	if (value != NULL) {
2280 	    Tcl_DecrRefCount(value);
2281 	}
2282 	hEntry = Tcl_NextHashEntry(&search);
2283     }
2284     Tcl_DeleteHashTable(numberCachePtr);
2285 }
2286 
2287 /*
2288  * ----------------------------------------------------------------------
2289  *
2290  * NOTES --
2291  *
2292  *	Some measurements show that it is faster to use a table to to perform
2293  *	uuencode and base64 value encoding than to calculate the output (at
2294  *	least on intel P4 arch).
2295  *
2296  *	Conversely using a lookup table for the decoding is slower than just
2297  *	calculating the values. We therefore use the fastest of each method.
2298  *
2299  *	Presumably this has to do with the size of the tables. The base64
2300  *	decode table is 255 bytes while the encode table is only 65 bytes. The
2301  *	choice likely depends on CPU memory cache sizes.
2302  */
2303 
2304 /*
2305  *----------------------------------------------------------------------
2306  *
2307  * BinaryEncodeHex --
2308  *
2309  *	Implement the [binary encode hex] binary encoding. clientData must be
2310  *	a table to convert values to hexadecimal digits.
2311  *
2312  * Results:
2313  *	Interp result set to an encoded byte array object
2314  *
2315  * Side effects:
2316  *	None
2317  *
2318  *----------------------------------------------------------------------
2319  */
2320 
2321 static int
BinaryEncodeHex(ClientData clientData,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])2322 BinaryEncodeHex(
2323     ClientData clientData,
2324     Tcl_Interp *interp,
2325     int objc,
2326     Tcl_Obj *const objv[])
2327 {
2328     Tcl_Obj *resultObj = NULL;
2329     unsigned char *data = NULL;
2330     unsigned char *cursor = NULL;
2331     int offset = 0, count = 0;
2332 
2333     if (objc != 2) {
2334 	Tcl_WrongNumArgs(interp, 1, objv, "data");
2335 	return TCL_ERROR;
2336     }
2337 
2338     TclNewObj(resultObj);
2339     data = Tcl_GetByteArrayFromObj(objv[1], &count);
2340     cursor = Tcl_SetByteArrayLength(resultObj, count * 2);
2341     for (offset = 0; offset < count; ++offset) {
2342 	*cursor++ = HexDigits[(data[offset] >> 4) & 0x0F];
2343 	*cursor++ = HexDigits[data[offset] & 0x0F];
2344     }
2345     Tcl_SetObjResult(interp, resultObj);
2346     return TCL_OK;
2347 }
2348 
2349 /*
2350  *----------------------------------------------------------------------
2351  *
2352  * BinaryDecodeHex --
2353  *
2354  *	Implement the [binary decode hex] binary encoding.
2355  *
2356  * Results:
2357  *	Interp result set to an decoded byte array object
2358  *
2359  * Side effects:
2360  *	None
2361  *
2362  *----------------------------------------------------------------------
2363  */
2364 
2365 static int
BinaryDecodeHex(ClientData clientData,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])2366 BinaryDecodeHex(
2367     ClientData clientData,
2368     Tcl_Interp *interp,
2369     int objc,
2370     Tcl_Obj *const objv[])
2371 {
2372     Tcl_Obj *resultObj = NULL;
2373     unsigned char *data, *datastart, *dataend;
2374     unsigned char *begin, *cursor, c;
2375     int i, index, value, size, pure, count = 0, cut = 0, strict = 0;
2376     Tcl_UniChar ch = 0;
2377     enum {OPT_STRICT };
2378     static const char *const optStrings[] = { "-strict", NULL };
2379 
2380     if (objc < 2 || objc > 3) {
2381 	Tcl_WrongNumArgs(interp, 1, objv, "?options? data");
2382 	return TCL_ERROR;
2383     }
2384     for (i = 1; i < objc - 1; ++i) {
2385 	if (Tcl_GetIndexFromObj(interp, objv[i], optStrings, "option",
2386 		TCL_EXACT, &index) != TCL_OK) {
2387 	    return TCL_ERROR;
2388 	}
2389 	switch (index) {
2390 	case OPT_STRICT:
2391 	    strict = 1;
2392 	    break;
2393 	}
2394     }
2395 
2396     TclNewObj(resultObj);
2397     pure = TclIsPureByteArray(objv[objc - 1]);
2398     datastart = data = pure ? Tcl_GetByteArrayFromObj(objv[objc - 1], &count)
2399 	    : (unsigned char *) TclGetStringFromObj(objv[objc - 1], &count);
2400     dataend = data + count;
2401     size = (count + 1) / 2;
2402     begin = cursor = Tcl_SetByteArrayLength(resultObj, size);
2403     while (data < dataend) {
2404 	value = 0;
2405 	for (i = 0 ; i < 2 ; i++) {
2406 	    if (data >= dataend) {
2407 		value <<= 4;
2408 		break;
2409 	    }
2410 
2411 	    c = *data++;
2412 	    if (!isxdigit((int) c)) {
2413 		if (strict || !TclIsSpaceProc(c)) {
2414 		    goto badChar;
2415 		}
2416 		i--;
2417 		continue;
2418 	    }
2419 
2420 	    value <<= 4;
2421 	    c -= '0';
2422 	    if (c > 9) {
2423 		c += ('0' - 'A') + 10;
2424 	    }
2425 	    if (c > 16) {
2426 		c += ('A' - 'a');
2427 	    }
2428 	    value |= c & 0xF;
2429 	}
2430 	if (i < 2) {
2431 	    cut++;
2432 	}
2433 	*cursor++ = UCHAR(value);
2434 	value = 0;
2435     }
2436     if (cut > size) {
2437 	cut = size;
2438     }
2439     Tcl_SetByteArrayLength(resultObj, cursor - begin - cut);
2440     Tcl_SetObjResult(interp, resultObj);
2441     return TCL_OK;
2442 
2443   badChar:
2444     if (pure) {
2445 	ch = c;
2446     } else {
2447 	TclUtfToUniChar((const char *)(data - 1), &ch);
2448     }
2449     TclDecrRefCount(resultObj);
2450     Tcl_SetObjResult(interp, Tcl_ObjPrintf(
2451 	    "invalid hexadecimal digit \"%c\" at position %d",
2452 	    ch, (int) (data - datastart - 1)));
2453     Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL);
2454     return TCL_ERROR;
2455 }
2456 
2457 /*
2458  *----------------------------------------------------------------------
2459  *
2460  * BinaryEncode64 --
2461  *
2462  *	This procedure implements the "binary encode base64" Tcl command.
2463  *
2464  * Results:
2465  *	The base64 encoded value prescribed by the input arguments.
2466  *
2467  *----------------------------------------------------------------------
2468  */
2469 
2470 #define OUTPUT(c) \
2471     do {						\
2472 	*cursor++ = (c);				\
2473 	outindex++;					\
2474 	if (maxlen > 0 && cursor != limit) {		\
2475 	    if (outindex == maxlen) {			\
2476 		memcpy(cursor, wrapchar, wrapcharlen);	\
2477 		cursor += wrapcharlen;			\
2478 		outindex = 0;				\
2479 	    }						\
2480 	}						\
2481 	if (cursor > limit) {				\
2482 	    Tcl_Panic("limit hit");			\
2483 	}						\
2484     } while (0)
2485 
2486 static int
BinaryEncode64(ClientData clientData,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])2487 BinaryEncode64(
2488     ClientData clientData,
2489     Tcl_Interp *interp,
2490     int objc,
2491     Tcl_Obj *const objv[])
2492 {
2493     Tcl_Obj *resultObj;
2494     unsigned char *data, *limit;
2495     int maxlen = 0;
2496     const char *wrapchar = "\n";
2497     int wrapcharlen = 1;
2498     int offset, i, index, size, outindex = 0, count = 0, purewrap = 1;
2499     enum { OPT_MAXLEN, OPT_WRAPCHAR };
2500     static const char *const optStrings[] = { "-maxlen", "-wrapchar", NULL };
2501 
2502     if (objc < 2 || objc % 2 != 0) {
2503 	Tcl_WrongNumArgs(interp, 1, objv,
2504 		"?-maxlen len? ?-wrapchar char? data");
2505 	return TCL_ERROR;
2506     }
2507     for (i = 1; i < objc - 1; i += 2) {
2508 	if (Tcl_GetIndexFromObj(interp, objv[i], optStrings, "option",
2509 		TCL_EXACT, &index) != TCL_OK) {
2510 	    return TCL_ERROR;
2511 	}
2512 	switch (index) {
2513 	case OPT_MAXLEN:
2514 	    if (Tcl_GetIntFromObj(interp, objv[i + 1], &maxlen) != TCL_OK) {
2515 		return TCL_ERROR;
2516 	    }
2517 	    if (maxlen < 0) {
2518 		Tcl_SetObjResult(interp, Tcl_NewStringObj(
2519 			"line length out of range", -1));
2520 		Tcl_SetErrorCode(interp, "TCL", "BINARY", "ENCODE",
2521 			"LINE_LENGTH", NULL);
2522 		return TCL_ERROR;
2523 	    }
2524 	    break;
2525 	case OPT_WRAPCHAR:
2526 	    purewrap = TclIsPureByteArray(objv[i + 1]);
2527 	    if (purewrap) {
2528 		wrapchar = (const char *) Tcl_GetByteArrayFromObj(
2529 			objv[i + 1], &wrapcharlen);
2530 	    } else {
2531 		wrapchar = Tcl_GetStringFromObj(objv[i + 1], &wrapcharlen);
2532 	    }
2533 	    break;
2534 	}
2535     }
2536     if (wrapcharlen == 0) {
2537 	maxlen = 0;
2538     }
2539 
2540     resultObj = Tcl_NewObj();
2541     data = Tcl_GetByteArrayFromObj(objv[objc - 1], &count);
2542     if (count > 0) {
2543 	unsigned char *cursor = NULL;
2544 
2545 	size = (((count * 4) / 3) + 3) & ~3;	/* ensure 4 byte chunks */
2546 	if (maxlen > 0 && size > maxlen) {
2547 	    int adjusted = size + (wrapcharlen * (size / maxlen));
2548 
2549 	    if (size % maxlen == 0) {
2550 		adjusted -= wrapcharlen;
2551 	    }
2552 	    size = adjusted;
2553 
2554 	    if (purewrap == 0) {
2555 		/* Wrapchar is (possibly) non-byte, so build result as
2556 		 * general string, not bytearray */
2557 		Tcl_SetObjLength(resultObj, size);
2558 		cursor = (unsigned char *) TclGetString(resultObj);
2559 	    }
2560 	}
2561 	if (cursor == NULL) {
2562 	    cursor = Tcl_SetByteArrayLength(resultObj, size);
2563 	}
2564 	limit = cursor + size;
2565 	for (offset = 0; offset < count; offset += 3) {
2566 	    unsigned char d[3] = {0, 0, 0};
2567 
2568 	    for (i = 0; i < 3 && offset + i < count; ++i) {
2569 		d[i] = data[offset + i];
2570 	    }
2571 	    OUTPUT(B64Digits[d[0] >> 2]);
2572 	    OUTPUT(B64Digits[((d[0] & 0x03) << 4) | (d[1] >> 4)]);
2573 	    if (offset + 1 < count) {
2574 		OUTPUT(B64Digits[((d[1] & 0x0F) << 2) | (d[2] >> 6)]);
2575 	    } else {
2576 		OUTPUT(B64Digits[64]);
2577 	    }
2578 	    if (offset+2 < count) {
2579 		OUTPUT(B64Digits[d[2] & 0x3F]);
2580 	    } else {
2581 		OUTPUT(B64Digits[64]);
2582 	    }
2583 	}
2584     }
2585     Tcl_SetObjResult(interp, resultObj);
2586     return TCL_OK;
2587 }
2588 #undef OUTPUT
2589 
2590 /*
2591  *----------------------------------------------------------------------
2592  *
2593  * BinaryEncodeUu --
2594  *
2595  *	This implements the uuencode binary encoding. Input is broken into 6
2596  *	bit chunks and a lookup table is used to turn these values into output
2597  *	characters. This differs from the generic code above in that line
2598  *	lengths are also encoded.
2599  *
2600  * Results:
2601  *	Interp result set to an encoded byte array object
2602  *
2603  * Side effects:
2604  *	None
2605  *
2606  *----------------------------------------------------------------------
2607  */
2608 
2609 static int
BinaryEncodeUu(ClientData clientData,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])2610 BinaryEncodeUu(
2611     ClientData clientData,
2612     Tcl_Interp *interp,
2613     int objc,
2614     Tcl_Obj *const objv[])
2615 {
2616     Tcl_Obj *resultObj;
2617     unsigned char *data, *start, *cursor;
2618     int offset, count, rawLength, n, i, j, bits, index;
2619     int lineLength = 61;
2620     const unsigned char SingleNewline[] = { UCHAR('\n') };
2621     const unsigned char *wrapchar = SingleNewline;
2622     int wrapcharlen = sizeof(SingleNewline);
2623     enum { OPT_MAXLEN, OPT_WRAPCHAR };
2624     static const char *const optStrings[] = { "-maxlen", "-wrapchar", NULL };
2625 
2626     if (objc < 2 || objc % 2 != 0) {
2627 	Tcl_WrongNumArgs(interp, 1, objv,
2628 		"?-maxlen len? ?-wrapchar char? data");
2629 	return TCL_ERROR;
2630     }
2631     for (i = 1; i < objc - 1; i += 2) {
2632 	if (Tcl_GetIndexFromObj(interp, objv[i], optStrings, "option",
2633 		TCL_EXACT, &index) != TCL_OK) {
2634 	    return TCL_ERROR;
2635 	}
2636 	switch (index) {
2637 	case OPT_MAXLEN:
2638 	    if (Tcl_GetIntFromObj(interp, objv[i + 1],
2639 		    &lineLength) != TCL_OK) {
2640 		return TCL_ERROR;
2641 	    }
2642 	    if (lineLength < 5 || lineLength > 85) {
2643 		Tcl_SetObjResult(interp, Tcl_NewStringObj(
2644 			"line length out of range", -1));
2645 		Tcl_SetErrorCode(interp, "TCL", "BINARY", "ENCODE",
2646 			"LINE_LENGTH", NULL);
2647 		return TCL_ERROR;
2648 	    }
2649 	    lineLength = ((lineLength - 1) & -4) + 1; /* 5, 9, 13 ... */
2650 	    break;
2651 	case OPT_WRAPCHAR:
2652 	    wrapchar = (const unsigned char *) TclGetStringFromObj(
2653 		    objv[i + 1], &wrapcharlen);
2654 	    {
2655 		const unsigned char *p = wrapchar;
2656 		int numBytes = wrapcharlen;
2657 
2658 		while (numBytes) {
2659 		    switch (*p) {
2660 			case '\t':
2661 			case '\v':
2662 			case '\f':
2663 			case '\r':
2664 			    p++; numBytes--;
2665 			    continue;
2666 			case '\n':
2667 			    numBytes--;
2668 			    break;
2669 			default:
2670 			badwrap:
2671 			    Tcl_SetObjResult(interp, Tcl_NewStringObj(
2672 				    "invalid wrapchar; will defeat decoding",
2673 				    -1));
2674 			    Tcl_SetErrorCode(interp, "TCL", "BINARY",
2675 				    "ENCODE", "WRAPCHAR", NULL);
2676 			    return TCL_ERROR;
2677 		    }
2678 		}
2679 		if (numBytes) {
2680 		    goto badwrap;
2681 		}
2682 	    }
2683 	    break;
2684 	}
2685     }
2686 
2687     /*
2688      * Allocate the buffer. This is a little bit too long, but is "good
2689      * enough".
2690      */
2691 
2692     TclNewObj(resultObj);
2693     offset = 0;
2694     data = Tcl_GetByteArrayFromObj(objv[objc - 1], &count);
2695     rawLength = (lineLength - 1) * 3 / 4;
2696     start = cursor = Tcl_SetByteArrayLength(resultObj,
2697 	    (lineLength + wrapcharlen) *
2698 	    ((count + (rawLength - 1)) / rawLength));
2699     n = bits = 0;
2700 
2701     /*
2702      * Encode the data. Each output line first has the length of raw data
2703      * encoded by the output line described in it by one encoded byte, then
2704      * the encoded data follows (encoding each 6 bits as one character).
2705      * Encoded lines are always terminated by a newline.
2706      */
2707 
2708     while (offset < count) {
2709 	int lineLen = count - offset;
2710 
2711 	if (lineLen > rawLength) {
2712 	    lineLen = rawLength;
2713 	}
2714 	*cursor++ = UueDigits[lineLen];
2715 	for (i = 0 ; i < lineLen ; i++) {
2716 	    n <<= 8;
2717 	    n |= data[offset++];
2718 	    for (bits += 8; bits > 6 ; bits -= 6) {
2719 		*cursor++ = UueDigits[(n >> (bits - 6)) & 0x3F];
2720 	    }
2721 	}
2722 	if (bits > 0) {
2723 	    n <<= 8;
2724 	    *cursor++ = UueDigits[(n >> (bits + 2)) & 0x3F];
2725 	    bits = 0;
2726 	}
2727 	for (j = 0 ; j < wrapcharlen ; ++j) {
2728 	    *cursor++ = wrapchar[j];
2729 	}
2730     }
2731 
2732     /*
2733      * Fix the length of the output bytearray.
2734      */
2735 
2736     Tcl_SetByteArrayLength(resultObj, cursor - start);
2737     Tcl_SetObjResult(interp, resultObj);
2738     return TCL_OK;
2739 }
2740 
2741 /*
2742  *----------------------------------------------------------------------
2743  *
2744  * BinaryDecodeUu --
2745  *
2746  *	Decode a uuencoded string.
2747  *
2748  * Results:
2749  *	Interp result set to an byte array object
2750  *
2751  * Side effects:
2752  *	None
2753  *
2754  *----------------------------------------------------------------------
2755  */
2756 
2757 static int
BinaryDecodeUu(ClientData clientData,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])2758 BinaryDecodeUu(
2759     ClientData clientData,
2760     Tcl_Interp *interp,
2761     int objc,
2762     Tcl_Obj *const objv[])
2763 {
2764     Tcl_Obj *resultObj = NULL;
2765     unsigned char *data, *datastart, *dataend;
2766     unsigned char *begin, *cursor;
2767     int i, index, size, pure, count = 0, strict = 0, lineLen;
2768     unsigned char c;
2769     Tcl_UniChar ch = 0;
2770     enum { OPT_STRICT };
2771     static const char *const optStrings[] = { "-strict", NULL };
2772 
2773     if (objc < 2 || objc > 3) {
2774 	Tcl_WrongNumArgs(interp, 1, objv, "?options? data");
2775 	return TCL_ERROR;
2776     }
2777     for (i = 1; i < objc - 1; ++i) {
2778 	if (Tcl_GetIndexFromObj(interp, objv[i], optStrings, "option",
2779 		TCL_EXACT, &index) != TCL_OK) {
2780 	    return TCL_ERROR;
2781 	}
2782 	switch (index) {
2783 	case OPT_STRICT:
2784 	    strict = 1;
2785 	    break;
2786 	}
2787     }
2788 
2789     TclNewObj(resultObj);
2790     pure = TclIsPureByteArray(objv[objc - 1]);
2791     datastart = data = pure ? Tcl_GetByteArrayFromObj(objv[objc - 1], &count)
2792 	    : (unsigned char *) TclGetStringFromObj(objv[objc - 1], &count);
2793     dataend = data + count;
2794     size = ((count + 3) & ~3) * 3 / 4;
2795     begin = cursor = Tcl_SetByteArrayLength(resultObj, size);
2796     lineLen = -1;
2797 
2798     /*
2799      * The decoding loop. First, we get the length of line (strictly, the
2800      * number of data bytes we expect to generate from the line) we're
2801      * processing this time round if it is not already known (i.e., when the
2802      * lineLen variable is set to the magic value, -1).
2803      */
2804 
2805     while (data < dataend) {
2806 	char d[4] = {0, 0, 0, 0};
2807 
2808 	if (lineLen < 0) {
2809 	    c = *data++;
2810 	    if (c < 32 || c > 96) {
2811 		if (strict || !TclIsSpaceProc(c)) {
2812 		    goto badUu;
2813 		}
2814 		i--;
2815 		continue;
2816 	    }
2817 	    lineLen = (c - 32) & 0x3F;
2818 	}
2819 
2820 	/*
2821 	 * Now we read a four-character grouping.
2822 	 */
2823 
2824 	for (i = 0 ; i < 4 ; i++) {
2825 	    if (data < dataend) {
2826 		d[i] = c = *data++;
2827 		if (c < 32 || c > 96) {
2828 		    if (strict) {
2829 			if (!TclIsSpaceProc(c)) {
2830 			    goto badUu;
2831 			} else if (c == '\n') {
2832 			    goto shortUu;
2833 			}
2834 		    }
2835 		    i--;
2836 		    continue;
2837 		}
2838 	    }
2839 	}
2840 
2841 	/*
2842 	 * Translate that grouping into (up to) three binary bytes output.
2843 	 */
2844 
2845 	if (lineLen > 0) {
2846 	    *cursor++ = (((d[0] - 0x20) & 0x3F) << 2)
2847 		    | (((d[1] - 0x20) & 0x3F) >> 4);
2848 	    if (--lineLen > 0) {
2849 		*cursor++ = (((d[1] - 0x20) & 0x3F) << 4)
2850 			| (((d[2] - 0x20) & 0x3F) >> 2);
2851 		if (--lineLen > 0) {
2852 		    *cursor++ = (((d[2] - 0x20) & 0x3F) << 6)
2853 			    | (((d[3] - 0x20) & 0x3F));
2854 		    lineLen--;
2855 		}
2856 	    }
2857 	}
2858 
2859 	/*
2860 	 * If we've reached the end of the line, skip until we process a
2861 	 * newline.
2862 	 */
2863 
2864 	if (lineLen == 0 && data < dataend) {
2865 	    lineLen = -1;
2866 	    do {
2867 		c = *data++;
2868 		if (c == '\n') {
2869 		    break;
2870 		} else if (c >= 32 && c <= 96) {
2871 		    data--;
2872 		    break;
2873 		} else if (strict || !TclIsSpaceProc(c)) {
2874 		    goto badUu;
2875 		}
2876 	    } while (data < dataend);
2877 	}
2878     }
2879 
2880     /*
2881      * Sanity check, clean up and finish.
2882      */
2883 
2884     if (lineLen > 0 && strict) {
2885 	goto shortUu;
2886     }
2887     Tcl_SetByteArrayLength(resultObj, cursor - begin);
2888     Tcl_SetObjResult(interp, resultObj);
2889     return TCL_OK;
2890 
2891   shortUu:
2892     Tcl_SetObjResult(interp, Tcl_ObjPrintf("short uuencode data"));
2893     Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "SHORT", NULL);
2894     TclDecrRefCount(resultObj);
2895     return TCL_ERROR;
2896 
2897   badUu:
2898     if (pure) {
2899 	ch = c;
2900     } else {
2901 	TclUtfToUniChar((const char *)(data - 1), &ch);
2902     }
2903     Tcl_SetObjResult(interp, Tcl_ObjPrintf(
2904 	    "invalid uuencode character \"%c\" at position %d",
2905 	    ch, (int) (data - datastart - 1)));
2906     Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL);
2907     TclDecrRefCount(resultObj);
2908     return TCL_ERROR;
2909 }
2910 
2911 /*
2912  *----------------------------------------------------------------------
2913  *
2914  * BinaryDecode64 --
2915  *
2916  *	Decode a base64 encoded string.
2917  *
2918  * Results:
2919  *	Interp result set to an byte array object
2920  *
2921  * Side effects:
2922  *	None
2923  *
2924  *----------------------------------------------------------------------
2925  */
2926 
2927 static int
BinaryDecode64(ClientData clientData,Tcl_Interp * interp,int objc,Tcl_Obj * const objv[])2928 BinaryDecode64(
2929     ClientData clientData,
2930     Tcl_Interp *interp,
2931     int objc,
2932     Tcl_Obj *const objv[])
2933 {
2934     Tcl_Obj *resultObj = NULL;
2935     unsigned char *data, *datastart, *dataend, c = '\0';
2936     unsigned char *begin = NULL;
2937     unsigned char *cursor = NULL;
2938     int pure, strict = 0;
2939     int i, index, size, cut = 0, count = 0;
2940     Tcl_UniChar ch = 0;
2941     enum { OPT_STRICT };
2942     static const char *const optStrings[] = { "-strict", NULL };
2943 
2944     if (objc < 2 || objc > 3) {
2945 	Tcl_WrongNumArgs(interp, 1, objv, "?options? data");
2946 	return TCL_ERROR;
2947     }
2948     for (i = 1; i < objc - 1; ++i) {
2949 	if (Tcl_GetIndexFromObj(interp, objv[i], optStrings, "option",
2950 		TCL_EXACT, &index) != TCL_OK) {
2951 	    return TCL_ERROR;
2952 	}
2953 	switch (index) {
2954 	case OPT_STRICT:
2955 	    strict = 1;
2956 	    break;
2957 	}
2958     }
2959 
2960     TclNewObj(resultObj);
2961     pure = TclIsPureByteArray(objv[objc - 1]);
2962     datastart = data = pure ? Tcl_GetByteArrayFromObj(objv[objc - 1], &count)
2963 	    : (unsigned char *) TclGetStringFromObj(objv[objc - 1], &count);
2964     dataend = data + count;
2965     size = ((count + 3) & ~3) * 3 / 4;
2966     begin = cursor = Tcl_SetByteArrayLength(resultObj, size);
2967     while (data < dataend) {
2968 	unsigned long value = 0;
2969 
2970 	/*
2971 	 * Decode the current block. Each base64 block consists of four input
2972 	 * characters A-Z, a-z, 0-9, +, or /. Each character supplies six bits
2973 	 * of output data, so each block's output is 24 bits (three bytes) in
2974 	 * length. The final block can be shorter by one or two bytes, denoted
2975 	 * by the input ending with one or two ='s, respectively.
2976 	 */
2977 
2978 	for (i = 0; i < 4; i++) {
2979 	    /*
2980 	     * Get the next input character. At end of input, pad with at most
2981 	     * two ='s. If more than two ='s would be needed, instead discard
2982 	     * the block read thus far.
2983 	     */
2984 
2985 	    if (data < dataend) {
2986 		c = *data++;
2987 	    } else if (i > 1) {
2988 		c = '=';
2989 	    } else {
2990 		if (strict && i <= 1) {
2991 		    /*
2992 		     * Single resp. unfulfilled char (each 4th next single
2993 		     * char) is rather bad64 error case in strict mode.
2994 		     */
2995 
2996 		    goto bad64;
2997 		}
2998 		cut += 3;
2999 		break;
3000 	    }
3001 
3002 	    /*
3003 	     * Load the character into the block value. Handle ='s specially
3004 	     * because they're only valid as the last character or two of the
3005 	     * final block of input. Unless strict mode is enabled, skip any
3006 	     * input whitespace characters.
3007 	     */
3008 
3009 	    if (cut) {
3010 		if (c == '=' && i > 1) {
3011 		    value <<= 6;
3012 		    cut++;
3013 		} else if (!strict) {
3014 		    i--;
3015 		} else {
3016 		    goto bad64;
3017 		}
3018 	    } else if (c >= 'A' && c <= 'Z') {
3019 		value = (value << 6) | ((c - 'A') & 0x3F);
3020 	    } else if (c >= 'a' && c <= 'z') {
3021 		value = (value << 6) | ((c - 'a' + 26) & 0x3F);
3022 	    } else if (c >= '0' && c <= '9') {
3023 		value = (value << 6) | ((c - '0' + 52) & 0x3F);
3024 	    } else if (c == '+') {
3025 		value = (value << 6) | 0x3E;
3026 	    } else if (c == '/') {
3027 		value = (value << 6) | 0x3F;
3028 	    } else if (c == '=' && (!strict || i > 1)) {
3029 		/*
3030 		 * "=" and "a=" is rather bad64 error case in strict mode.
3031 		 */
3032 
3033 		value <<= 6;
3034 		if (i) {
3035 		    cut++;
3036 		}
3037 	    } else if (strict) {
3038 		goto bad64;
3039 	    } else {
3040 		i--;
3041 	    }
3042 	}
3043 	*cursor++ = UCHAR((value >> 16) & 0xFF);
3044 	*cursor++ = UCHAR((value >> 8) & 0xFF);
3045 	*cursor++ = UCHAR(value & 0xFF);
3046 
3047 	/*
3048 	 * Since = is only valid within the final block, if it was encountered
3049 	 * but there are still more input characters, confirm that strict mode
3050 	 * is off and all subsequent characters are whitespace.
3051 	 */
3052 
3053 	if (cut && data < dataend) {
3054 	    if (strict) {
3055 		goto bad64;
3056 	    }
3057 	}
3058     }
3059     Tcl_SetByteArrayLength(resultObj, cursor - begin - cut);
3060     Tcl_SetObjResult(interp, resultObj);
3061     return TCL_OK;
3062 
3063   bad64:
3064     if (pure) {
3065 	ch = c;
3066     } else {
3067 	/* The decoder is byte-oriented. If we saw a byte that's not a
3068 	 * valid member of the base64 alphabet, it could be the lead byte
3069 	 * of a multi-byte character. */
3070 
3071 	/* Safe because we know data is NUL-terminated */
3072 	TclUtfToUniChar((const char *)(data - 1), &ch);
3073     }
3074 
3075     Tcl_SetObjResult(interp, Tcl_ObjPrintf(
3076 	    "invalid base64 character \"%c\" at position %d", ch,
3077 	    (int) (data - datastart - 1)));
3078     Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL);
3079     TclDecrRefCount(resultObj);
3080     return TCL_ERROR;
3081 }
3082 
3083 /*
3084  * Local Variables:
3085  * mode: c
3086  * c-basic-offset: 4
3087  * fill-column: 78
3088  * End:
3089  */
3090