1 /*
2 ** Copyright (c) 2006 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the Simplified BSD License (also
6 ** known as the "2-Clause License" or "FreeBSD License".)
7 **
8 ** This program is distributed in the hope that it will be useful,
9 ** but without any warranty; without even the implied warranty of
10 ** merchantability or fitness for a particular purpose.
11 **
12 ** Author contact information:
13 ** drh@hwaci.com
14 ** http://www.hwaci.com/drh/
15 **
16 *******************************************************************************
17 **
18 ** A Blob is a variable-length containers for arbitrary string
19 ** or binary data.
20 */
21 #include "config.h"
22 #if defined(FOSSIL_ENABLE_MINIZ)
23 # define MINIZ_HEADER_FILE_ONLY
24 # include "miniz.c"
25 #else
26 # include <zlib.h>
27 #endif
28 #include "blob.h"
29 #if defined(_WIN32)
30 #include <fcntl.h>
31 #include <io.h>
32 #endif
33
34 #if INTERFACE
35 /*
36 ** A Blob can hold a string or a binary object of arbitrary size. The
37 ** size changes as necessary.
38 */
39 struct Blob {
40 unsigned int nUsed; /* Number of bytes used in aData[] */
41 unsigned int nAlloc; /* Number of bytes allocated for aData[] */
42 unsigned int iCursor; /* Next character of input to parse */
43 unsigned int blobFlags; /* One or more BLOBFLAG_* bits */
44 char *aData; /* Where the information is stored */
45 void (*xRealloc)(Blob*, unsigned int); /* Function to reallocate the buffer */
46 };
47
48 /*
49 ** Allowed values for Blob.blobFlags
50 */
51 #define BLOBFLAG_NotSQL 0x0001 /* Non-SQL text */
52
53 /*
54 ** The current size of a Blob
55 */
56 #define blob_size(X) ((X)->nUsed)
57
58 /*
59 ** The buffer holding the blob data
60 */
61 #define blob_buffer(X) ((X)->aData)
62
63 /*
64 ** Seek whence parameter values
65 */
66 #define BLOB_SEEK_SET 1
67 #define BLOB_SEEK_CUR 2
68
69 #endif /* INTERFACE */
70
71 /*
72 ** Make sure a blob is initialized
73 */
74 #define blob_is_init(x) \
75 assert((x)->xRealloc==blobReallocMalloc || (x)->xRealloc==blobReallocStatic)
76
77 /*
78 ** Make sure a blob does not contain malloced memory.
79 **
80 ** This might fail if we are unlucky and x is uninitialized. For that
81 ** reason it should only be used locally for debugging. Leave it turned
82 ** off for production.
83 */
84 #if 0 /* Enable for debugging only */
85 #define assert_blob_is_reset(x) assert(blob_is_reset(x))
86 #else
87 #define assert_blob_is_reset(x)
88 #endif
89
90
91
92 /*
93 ** We find that the built-in isspace() function does not work for
94 ** some international character sets. So here is a substitute.
95 */
fossil_isspace(char c)96 int fossil_isspace(char c){
97 return c==' ' || (c<='\r' && c>='\t');
98 }
99
100 /*
101 ** Other replacements for ctype.h functions.
102 */
fossil_islower(char c)103 int fossil_islower(char c){ return c>='a' && c<='z'; }
fossil_isupper(char c)104 int fossil_isupper(char c){ return c>='A' && c<='Z'; }
fossil_isdigit(char c)105 int fossil_isdigit(char c){ return c>='0' && c<='9'; }
fossil_tolower(char c)106 int fossil_tolower(char c){
107 return fossil_isupper(c) ? c - 'A' + 'a' : c;
108 }
fossil_toupper(char c)109 int fossil_toupper(char c){
110 return fossil_islower(c) ? c - 'a' + 'A' : c;
111 }
fossil_isalpha(char c)112 int fossil_isalpha(char c){
113 return (c>='a' && c<='z') || (c>='A' && c<='Z');
114 }
fossil_isalnum(char c)115 int fossil_isalnum(char c){
116 return (c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9');
117 }
118
119 /* Return true if and only if the entire string consists of only
120 ** alphanumeric characters.
121 */
fossil_no_strange_characters(const char * z)122 int fossil_no_strange_characters(const char *z){
123 while( z && (fossil_isalnum(z[0]) || z[0]=='_' || z[0]=='-') ) z++;
124 return z[0]==0;
125 }
126
127
128 /*
129 ** COMMAND: test-isspace
130 **
131 ** Verify that the fossil_isspace() routine is working correctly by
132 ** testing it on all possible inputs.
133 */
isspace_cmd(void)134 void isspace_cmd(void){
135 int i;
136 for(i=0; i<=255; i++){
137 if( i==' ' || i=='\n' || i=='\t' || i=='\v'
138 || i=='\f' || i=='\r' ){
139 assert( fossil_isspace((char)i) );
140 }else{
141 assert( !fossil_isspace((char)i) );
142 }
143 }
144 fossil_print("All 256 characters OK\n");
145 }
146
147 /*
148 ** This routine is called if a blob operation fails because we
149 ** have run out of memory.
150 */
blob_panic(void)151 static void blob_panic(void){
152 static const char zErrMsg[] = "out of memory\n";
153 fputs(zErrMsg, stderr);
154 fossil_exit(1);
155 }
156
157 /*
158 ** A reallocation function that assumes that aData came from malloc().
159 ** This function attempts to resize the buffer of the blob to hold
160 ** newSize bytes.
161 **
162 ** No attempt is made to recover from an out-of-memory error.
163 ** If an OOM error occurs, an error message is printed on stderr
164 ** and the program exits.
165 */
blobReallocMalloc(Blob * pBlob,unsigned int newSize)166 void blobReallocMalloc(Blob *pBlob, unsigned int newSize){
167 if( newSize==0 ){
168 free(pBlob->aData);
169 pBlob->aData = 0;
170 pBlob->nAlloc = 0;
171 pBlob->nUsed = 0;
172 pBlob->iCursor = 0;
173 pBlob->blobFlags = 0;
174 }else if( newSize>pBlob->nAlloc || newSize<pBlob->nAlloc-4000 ){
175 char *pNew = fossil_realloc(pBlob->aData, newSize);
176 pBlob->aData = pNew;
177 pBlob->nAlloc = newSize;
178 if( pBlob->nUsed>pBlob->nAlloc ){
179 pBlob->nUsed = pBlob->nAlloc;
180 }
181 }
182 }
183
184 /*
185 ** An initializer for Blobs
186 */
187 #if INTERFACE
188 #define BLOB_INITIALIZER {0,0,0,0,0,blobReallocMalloc}
189 #endif
190 const Blob empty_blob = BLOB_INITIALIZER;
191
192 /*
193 ** A reallocation function for when the initial string is in unmanaged
194 ** space. Copy the string to memory obtained from malloc().
195 */
blobReallocStatic(Blob * pBlob,unsigned int newSize)196 static void blobReallocStatic(Blob *pBlob, unsigned int newSize){
197 if( newSize==0 ){
198 *pBlob = empty_blob;
199 }else{
200 char *pNew = fossil_malloc( newSize );
201 if( pBlob->nUsed>newSize ) pBlob->nUsed = newSize;
202 memcpy(pNew, pBlob->aData, pBlob->nUsed);
203 pBlob->aData = pNew;
204 pBlob->xRealloc = blobReallocMalloc;
205 pBlob->nAlloc = newSize;
206 }
207 }
208
209 /*
210 ** Reset a blob to be an empty container.
211 */
blob_reset(Blob * pBlob)212 void blob_reset(Blob *pBlob){
213 blob_is_init(pBlob);
214 pBlob->xRealloc(pBlob, 0);
215 }
216
217
218 /*
219 ** Return true if the blob has been zeroed - in other words if it contains
220 ** no malloced memory. This only works reliably if the blob has been
221 ** initialized - it can return a false negative on an uninitialized blob.
222 */
blob_is_reset(Blob * pBlob)223 int blob_is_reset(Blob *pBlob){
224 if( pBlob==0 ) return 1;
225 if( pBlob->nUsed ) return 0;
226 if( pBlob->xRealloc==blobReallocMalloc && pBlob->nAlloc ) return 0;
227 return 1;
228 }
229
230 /*
231 ** Initialize a blob to a string or byte-array constant of a specified length.
232 ** Any prior data in the blob is discarded.
233 */
blob_init(Blob * pBlob,const char * zData,int size)234 void blob_init(Blob *pBlob, const char *zData, int size){
235 assert_blob_is_reset(pBlob);
236 if( zData==0 ){
237 *pBlob = empty_blob;
238 }else{
239 if( size<=0 ) size = strlen(zData);
240 pBlob->nUsed = pBlob->nAlloc = size;
241 pBlob->aData = (char*)zData;
242 pBlob->iCursor = 0;
243 pBlob->blobFlags = 0;
244 pBlob->xRealloc = blobReallocStatic;
245 }
246 }
247
248 /*
249 ** Initialize a blob to a nul-terminated string.
250 ** Any prior data in the blob is discarded.
251 */
blob_set(Blob * pBlob,const char * zStr)252 void blob_set(Blob *pBlob, const char *zStr){
253 blob_init(pBlob, zStr, -1);
254 }
255
256 /*
257 ** Initialize a blob to a nul-terminated string obtained from fossil_malloc().
258 ** The blob will take responsibility for freeing the string.
259 */
blob_set_dynamic(Blob * pBlob,char * zStr)260 void blob_set_dynamic(Blob *pBlob, char *zStr){
261 blob_init(pBlob, zStr, -1);
262 pBlob->xRealloc = blobReallocMalloc;
263 }
264
265 /*
266 ** Initialize a blob to an empty string.
267 */
blob_zero(Blob * pBlob)268 void blob_zero(Blob *pBlob){
269 static const char zEmpty[] = "";
270 assert_blob_is_reset(pBlob);
271 pBlob->nUsed = 0;
272 pBlob->nAlloc = 1;
273 pBlob->aData = (char*)zEmpty;
274 pBlob->iCursor = 0;
275 pBlob->blobFlags = 0;
276 pBlob->xRealloc = blobReallocStatic;
277 }
278
279 /*
280 ** Append text or data to the end of a blob. Or, if pBlob==NULL, send
281 ** the text to standard output in terminal mode, or to standard CGI output
282 ** in CGI mode.
283 **
284 ** If nData<0 then output all of aData up to the first 0x00 byte.
285 **
286 ** Use the blob_append() routine in all application code. The blob_append()
287 ** routine is faster, but blob_append_full() handles all the corner cases.
288 ** The blob_append() routine automatically calls blob_append_full() if
289 ** necessary.
290 */
blob_append_full(Blob * pBlob,const char * aData,int nData)291 static void blob_append_full(Blob *pBlob, const char *aData, int nData){
292 sqlite3_int64 nNew;
293 /* assert( aData!=0 || nData==0 ); // omitted for speed */
294 /* blob_is_init(pBlob); // omitted for speed */
295 if( nData<0 ) nData = strlen(aData);
296 if( nData==0 ) return;
297 if( pBlob==0 ){
298 if( g.cgiOutput ){
299 pBlob = cgi_output_blob();
300 }else{
301 fossil_puts(aData, 0, nData);
302 return;
303 }
304 }
305 nNew = pBlob->nUsed;
306 nNew += nData;
307 if( nNew >= pBlob->nAlloc ){
308 nNew += pBlob->nAlloc;
309 nNew += 100;
310 if( nNew>=0x7fff0000 ){
311 blob_panic();
312 }
313 pBlob->xRealloc(pBlob, (int)nNew);
314 if( pBlob->nUsed + nData >= pBlob->nAlloc ){
315 blob_panic();
316 }
317 }
318 memcpy(&pBlob->aData[pBlob->nUsed], aData, nData);
319 pBlob->nUsed += nData;
320 pBlob->aData[pBlob->nUsed] = 0; /* Blobs are always nul-terminated */
321 }
blob_append(Blob * pBlob,const char * aData,int nData)322 void blob_append(Blob *pBlob, const char *aData, int nData){
323 sqlite3_int64 nUsed;
324 /* assert( aData!=0 || nData==0 ); // omitted for speed */
325 if( nData<=0 || pBlob==0 || pBlob->nUsed + nData >= pBlob->nAlloc ){
326 blob_append_full(pBlob, aData, nData);
327 return;
328 }
329 nUsed = pBlob->nUsed;
330 pBlob->nUsed += nData;
331 pBlob->aData[pBlob->nUsed] = 0;
332 memcpy(&pBlob->aData[nUsed], aData, nData);
333 }
334
335 /*
336 ** Append a string literal to a blob.
337 */
338 #if INTERFACE
339 #define blob_append_string(BLOB,STR) blob_append(BLOB,STR,sizeof(STR)-1)
340 #endif
341
342 /*
343 ** Append a single character to the blob. If pBlob is zero then the
344 ** character is written directly to stdout.
345 */
blob_append_char(Blob * pBlob,char c)346 void blob_append_char(Blob *pBlob, char c){
347 if( pBlob==0 || pBlob->nUsed+1 >= pBlob->nAlloc ){
348 blob_append_full(pBlob, &c, 1);
349 }else{
350 pBlob->aData[pBlob->nUsed++] = c;
351 }
352 }
353
354 /*
355 ** Copy a blob. pTo is reinitialized to be a copy of pFrom.
356 */
blob_copy(Blob * pTo,Blob * pFrom)357 void blob_copy(Blob *pTo, Blob *pFrom){
358 blob_is_init(pFrom);
359 blob_zero(pTo);
360 blob_append(pTo, blob_buffer(pFrom), blob_size(pFrom));
361 }
362
363 /*
364 ** Append the second blob onto the end of the first blob and reset the
365 ** second blob. If the first blob (pTo) is NULL, then the content
366 ** of the second blob is written to stdout or to CGI depending on if the
367 ** Fossil is running in terminal or CGI mode.
368 */
blob_append_xfer(Blob * pTo,Blob * pFrom)369 void blob_append_xfer(Blob *pTo, Blob *pFrom){
370 blob_append(pTo, blob_buffer(pFrom), blob_size(pFrom));
371 blob_reset(pFrom);
372 }
373
374 /*
375 ** Write into pOut, a string literal representation for the first n bytes
376 ** of z[]. The string literal representation is compatible with C, TCL,
377 ** and JSON. Double-quotes are added to both ends. Double-quote and
378 ** backslash characters are escaped.
379 */
blob_append_tcl_literal(Blob * pOut,const char * z,int n)380 void blob_append_tcl_literal(Blob *pOut, const char *z, int n){
381 int i;
382 blob_append_char(pOut, '"');
383 for(i=0; i<n; i++){
384 char c = z[i];
385 switch( c ){
386 case '\r': c = 'r';
387 case '[':
388 case ']':
389 case '$':
390 case '"':
391 case '\\':
392 blob_append_char(pOut, '\\');
393 default:
394 blob_append_char(pOut, c);
395 }
396 }
397 blob_append_char(pOut, '"');
398 }
blob_append_json_literal(Blob * pOut,const char * z,int n)399 void blob_append_json_literal(Blob *pOut, const char *z, int n){
400 int i;
401 blob_append_char(pOut, '"');
402 for(i=0; i<n; i++){
403 char c = z[i];
404 switch( c ){
405 case 0x00:
406 case 0x01:
407 case 0x02:
408 case 0x03:
409 case 0x04:
410 case 0x05:
411 case 0x06:
412 case 0x07: c += '0' - 0x00; blob_append(pOut, "\\u000",5); break;
413 case 0x0b:
414 case 0x0e:
415 case 0x0f: c += 'a' - 0x0a; blob_append(pOut, "\\u000",5); break;
416 case 0x10:
417 case 0x11:
418 case 0x12:
419 case 0x13:
420 case 0x14:
421 case 0x15:
422 case 0x16:
423 case 0x17:
424 case 0x18:
425 case 0x19: c += '0' - 0x10; blob_append(pOut, "\\u001",5); break;
426 case 0x1a:
427 case 0x1b:
428 case 0x1c:
429 case 0x1d:
430 case 0x1e:
431 case 0x1f: c += 'a' - 0x1a; blob_append(pOut, "\\u001",5); break;
432 case '\b': c = 'b'; blob_append_char(pOut, '\\'); break;
433 case '\t': c = 't'; blob_append_char(pOut, '\\'); break;
434 case '\r': c = 'r'; blob_append_char(pOut, '\\'); break;
435 case '\n': c = 'n'; blob_append_char(pOut, '\\'); break;
436 case '\f': c = 'f'; blob_append_char(pOut, '\\'); break;
437 case '"': blob_append_char(pOut, '\\'); break;
438 case '\\': blob_append_char(pOut, '\\'); break;
439 default: break;
440 }
441 blob_append_char(pOut, c);
442 }
443 blob_append_char(pOut, '"');
444 }
445
446
447 /*
448 ** Return a pointer to a null-terminated string for a blob.
449 */
blob_str(Blob * p)450 char *blob_str(Blob *p){
451 blob_is_init(p);
452 if( p->nUsed==0 ){
453 blob_append_char(p, 0); /* NOTE: Changes nUsed. */
454 p->nUsed = 0;
455 }
456 if( p->nUsed<p->nAlloc ){
457 p->aData[p->nUsed] = 0;
458 }else{
459 blob_materialize(p);
460 }
461 return p->aData;
462 }
463
464 /*
465 ** Compute the string length of a Blob. If there are embedded
466 ** nul characters, truncate the to blob at the first nul.
467 */
blob_strlen(Blob * p)468 int blob_strlen(Blob *p){
469 char *z = blob_str(p);
470 if( z==0 ) return 0;
471 p->nUsed = (int)strlen(p->aData);
472 return p->nUsed;
473 }
474
475 /*
476 ** Return a pointer to a null-terminated string for a blob that has
477 ** been created using blob_append_sql() and not blob_appendf(). If
478 ** text was ever added using blob_appendf() then throw an error.
479 */
blob_sql_text(Blob * p)480 char *blob_sql_text(Blob *p){
481 blob_is_init(p);
482 if( (p->blobFlags & BLOBFLAG_NotSQL) ){
483 fossil_panic("use of blob_appendf() to construct SQL text");
484 }
485 return blob_str(p);
486 }
487
488
489 /*
490 ** Return a pointer to a null-terminated string for a blob.
491 **
492 ** WARNING: If the blob is ephemeral, it might cause a '\000'
493 ** character to be inserted into the middle of the parent blob.
494 ** Example: Suppose p is a token extracted from some larger
495 ** blob pBig using blob_token(). If you call this routine on p,
496 ** then a '\000' character will be inserted in the middle of
497 ** pBig in order to cause p to be nul-terminated. If pBig
498 ** should not be modified, then use blob_str() instead of this
499 ** routine. blob_str() will make a copy of the p if necessary
500 ** to avoid modifying pBig.
501 */
blob_terminate(Blob * p)502 char *blob_terminate(Blob *p){
503 blob_is_init(p);
504 if( p->nUsed==0 ) return "";
505 p->aData[p->nUsed] = 0;
506 return p->aData;
507 }
508
509 /*
510 ** Compare two blobs. Return negative, zero, or positive if the first
511 ** blob is less then, equal to, or greater than the second.
512 */
blob_compare(Blob * pA,Blob * pB)513 int blob_compare(Blob *pA, Blob *pB){
514 int szA, szB, sz, rc;
515 blob_is_init(pA);
516 blob_is_init(pB);
517 szA = blob_size(pA);
518 szB = blob_size(pB);
519 sz = szA<szB ? szA : szB;
520 rc = memcmp(blob_buffer(pA), blob_buffer(pB), sz);
521 if( rc==0 ){
522 rc = szA - szB;
523 }
524 return rc;
525 }
526
527 /*
528 ** Compare two blobs in constant time and return zero if they are equal.
529 ** Constant time comparison only applies for blobs of the same length.
530 ** If lengths are different, immediately returns 1.
531 */
blob_constant_time_cmp(Blob * pA,Blob * pB)532 int blob_constant_time_cmp(Blob *pA, Blob *pB){
533 int szA, szB, i;
534 unsigned char *buf1, *buf2;
535 unsigned char rc = 0;
536
537 blob_is_init(pA);
538 blob_is_init(pB);
539 szA = blob_size(pA);
540 szB = blob_size(pB);
541 if( szA!=szB || szA==0 ) return 1;
542
543 buf1 = (unsigned char*)blob_buffer(pA);
544 buf2 = (unsigned char*)blob_buffer(pB);
545
546 for( i=0; i<szA; i++ ){
547 rc = rc | (buf1[i] ^ buf2[i]);
548 }
549
550 return rc;
551 }
552
553 /*
554 ** Compare a blob to a string. Return TRUE if they are equal.
555 */
blob_eq_str(Blob * pBlob,const char * z,int n)556 int blob_eq_str(Blob *pBlob, const char *z, int n){
557 Blob t;
558 blob_is_init(pBlob);
559 if( n<=0 ) n = (int)strlen(z);
560 t.aData = (char*)z;
561 t.nUsed = n;
562 t.xRealloc = blobReallocStatic;
563 return blob_compare(pBlob, &t)==0;
564 }
565
566 /*
567 ** This macro compares a blob against a string constant. We use the sizeof()
568 ** operator on the string constant twice, so it really does need to be a
569 ** string literal or character array - not a character pointer.
570 */
571 #if INTERFACE
572 # define blob_eq(B,S) \
573 ((B)->nUsed==sizeof(S"")-1 && memcmp((B)->aData,S,sizeof(S)-1)==0)
574 #endif
575
576
577 /*
578 ** Attempt to resize a blob so that its internal buffer is
579 ** nByte in size. The blob is truncated if necessary.
580 */
blob_resize(Blob * pBlob,unsigned int newSize)581 void blob_resize(Blob *pBlob, unsigned int newSize){
582 pBlob->xRealloc(pBlob, newSize+1);
583 pBlob->nUsed = newSize;
584 pBlob->aData[newSize] = 0;
585 }
586
587 /*
588 ** Ensures that the given blob has at least the given amount of memory
589 ** allocated to it. Does not modify pBlob->nUsed nor will it reduce
590 ** the currently-allocated amount of memory.
591 **
592 ** For semantic compatibility with blob_append_full(), if newSize is
593 ** >=0x7fff000 (~2GB) then this function will trigger blob_panic(). If
594 ** it didn't, it would be possible to bypass that hard-coded limit via
595 ** this function.
596 **
597 ** We've had at least one report:
598 ** https://fossil-scm.org/forum/forumpost/b7bbd28db4
599 ** which implies that this is unconditionally failing on mingw 32-bit
600 ** builds.
601 */
blob_reserve(Blob * pBlob,unsigned int newSize)602 void blob_reserve(Blob *pBlob, unsigned int newSize){
603 if(newSize>=0x7fff0000 ){
604 blob_panic();
605 }else if(newSize>pBlob->nUsed){
606 pBlob->xRealloc(pBlob, newSize);
607 pBlob->aData[newSize] = 0;
608 }
609 }
610
611 /*
612 ** Make sure a blob is nul-terminated and is not a pointer to unmanaged
613 ** space. Return a pointer to the data.
614 */
blob_materialize(Blob * pBlob)615 char *blob_materialize(Blob *pBlob){
616 blob_resize(pBlob, pBlob->nUsed);
617 return pBlob->aData;
618 }
619
620
621 /*
622 ** Call dehttpize on a blob. This causes an ephemeral blob to be
623 ** materialized.
624 */
blob_dehttpize(Blob * pBlob)625 void blob_dehttpize(Blob *pBlob){
626 blob_materialize(pBlob);
627 pBlob->nUsed = dehttpize(pBlob->aData);
628 }
629
630 /*
631 ** Extract N bytes from blob pFrom and use it to initialize blob pTo.
632 ** Return the actual number of bytes extracted.
633 **
634 ** After this call completes, pTo will be an ephemeral blob.
635 */
blob_extract(Blob * pFrom,int N,Blob * pTo)636 int blob_extract(Blob *pFrom, int N, Blob *pTo){
637 blob_is_init(pFrom);
638 assert_blob_is_reset(pTo);
639 if( pFrom->iCursor + N > pFrom->nUsed ){
640 N = pFrom->nUsed - pFrom->iCursor;
641 if( N<=0 ){
642 blob_zero(pTo);
643 return 0;
644 }
645 }
646 pTo->nUsed = N;
647 pTo->nAlloc = N;
648 pTo->aData = &pFrom->aData[pFrom->iCursor];
649 pTo->iCursor = 0;
650 pTo->xRealloc = blobReallocStatic;
651 pFrom->iCursor += N;
652 return N;
653 }
654
655 /*
656 ** Rewind the cursor on a blob back to the beginning.
657 */
blob_rewind(Blob * p)658 void blob_rewind(Blob *p){
659 p->iCursor = 0;
660 }
661
662 /*
663 ** Truncate a blob back to zero length
664 */
blob_truncate(Blob * p,int sz)665 void blob_truncate(Blob *p, int sz){
666 if( sz>=0 && sz<p->nUsed ) p->nUsed = sz;
667 }
668
669 /*
670 ** Seek the cursor in a blob to the indicated offset.
671 */
blob_seek(Blob * p,int offset,int whence)672 int blob_seek(Blob *p, int offset, int whence){
673 if( whence==BLOB_SEEK_SET ){
674 p->iCursor = offset;
675 }else if( whence==BLOB_SEEK_CUR ){
676 p->iCursor += offset;
677 }
678 if( p->iCursor>p->nUsed ){
679 p->iCursor = p->nUsed;
680 }
681 return p->iCursor;
682 }
683
684 /*
685 ** Return the current offset into the blob
686 */
blob_tell(Blob * p)687 int blob_tell(Blob *p){
688 return p->iCursor;
689 }
690
691 /*
692 ** Extract a single line of text from pFrom beginning at the current
693 ** cursor location and use that line of text to initialize pTo.
694 ** pTo will include the terminating \n. Return the number of bytes
695 ** in the line including the \n at the end. 0 is returned at
696 ** end-of-file.
697 **
698 ** The cursor of pFrom is left pointing at the first byte past the
699 ** \n that terminated the line.
700 **
701 ** pTo will be an ephermeral blob. If pFrom changes, it might alter
702 ** pTo as well.
703 */
blob_line(Blob * pFrom,Blob * pTo)704 int blob_line(Blob *pFrom, Blob *pTo){
705 char *aData = pFrom->aData;
706 int n = pFrom->nUsed;
707 int i = pFrom->iCursor;
708
709 while( i<n && aData[i]!='\n' ){ i++; }
710 if( i<n ){
711 assert( aData[i]=='\n' );
712 i++;
713 }
714 blob_extract(pFrom, i-pFrom->iCursor, pTo);
715 return pTo->nUsed;
716 }
717
718 /*
719 ** Trim whitespace off of the end of a blob. Return the number
720 ** of characters remaining.
721 **
722 ** All this does is reduce the length counter. This routine does
723 ** not insert a new zero terminator.
724 */
blob_trim(Blob * p)725 int blob_trim(Blob *p){
726 char *z = p->aData;
727 int n = p->nUsed;
728 while( n>0 && fossil_isspace(z[n-1]) ){ n--; }
729 p->nUsed = n;
730 return n;
731 }
732
733 /*
734 ** Extract a single token from pFrom and use it to initialize pTo.
735 ** Return the number of bytes in the token. If no token is found,
736 ** return 0.
737 **
738 ** A token consists of one or more non-space characters. Leading
739 ** whitespace is ignored.
740 **
741 ** The cursor of pFrom is left pointing at the first character past
742 ** the end of the token.
743 **
744 ** pTo will be an ephermeral blob. If pFrom changes, it might alter
745 ** pTo as well.
746 */
blob_token(Blob * pFrom,Blob * pTo)747 int blob_token(Blob *pFrom, Blob *pTo){
748 char *aData = pFrom->aData;
749 int n = pFrom->nUsed;
750 int i = pFrom->iCursor;
751 while( i<n && fossil_isspace(aData[i]) ){ i++; }
752 pFrom->iCursor = i;
753 while( i<n && !fossil_isspace(aData[i]) ){ i++; }
754 blob_extract(pFrom, i-pFrom->iCursor, pTo);
755 while( i<n && fossil_isspace(aData[i]) ){ i++; }
756 pFrom->iCursor = i;
757 return pTo->nUsed;
758 }
759
760 /*
761 ** Extract a single SQL token from pFrom and use it to initialize pTo.
762 ** Return the number of bytes in the token. If no token is found,
763 ** return 0.
764 **
765 ** An SQL token consists of one or more non-space characters. If the
766 ** first character is ' then the token is terminated by a matching '
767 ** (ignoring double '') or by the end of the string
768 **
769 ** The cursor of pFrom is left pointing at the first character past
770 ** the end of the token.
771 **
772 ** pTo will be an ephermeral blob. If pFrom changes, it might alter
773 ** pTo as well.
774 */
blob_sqltoken(Blob * pFrom,Blob * pTo)775 int blob_sqltoken(Blob *pFrom, Blob *pTo){
776 char *aData = pFrom->aData;
777 int n = pFrom->nUsed;
778 int i = pFrom->iCursor;
779 while( i<n && fossil_isspace(aData[i]) ){ i++; }
780 pFrom->iCursor = i;
781 if( aData[i]=='\'' ){
782 i++;
783 while( i<n ){
784 if( aData[i]=='\'' ){
785 if( aData[++i]!='\'' ) break;
786 }
787 i++;
788 }
789 }else{
790 while( i<n && !fossil_isspace(aData[i]) ){ i++; }
791 }
792 blob_extract(pFrom, i-pFrom->iCursor, pTo);
793 while( i<n && fossil_isspace(aData[i]) ){ i++; }
794 pFrom->iCursor = i;
795 return pTo->nUsed;
796 }
797
798 /*
799 ** Extract everything from the current cursor to the end of the blob
800 ** into a new blob. The new blob is an ephemerial reference to the
801 ** original blob. The cursor of the original blob is unchanged.
802 */
blob_tail(Blob * pFrom,Blob * pTo)803 int blob_tail(Blob *pFrom, Blob *pTo){
804 int iCursor = pFrom->iCursor;
805 blob_extract(pFrom, pFrom->nUsed-pFrom->iCursor, pTo);
806 pFrom->iCursor = iCursor;
807 return pTo->nUsed;
808 }
809
810 /*
811 ** Copy N lines of text from pFrom into pTo. The copy begins at the
812 ** current cursor position of pIn. The pIn cursor is left pointing
813 ** at the first character past the last \n copied.
814 **
815 ** If pTo==NULL then this routine simply skips over N lines.
816 */
blob_copy_lines(Blob * pTo,Blob * pFrom,int N)817 void blob_copy_lines(Blob *pTo, Blob *pFrom, int N){
818 char *z = pFrom->aData;
819 int i = pFrom->iCursor;
820 int n = pFrom->nUsed;
821 int cnt = 0;
822
823 if( N==0 ) return;
824 while( i<n ){
825 if( z[i]=='\n' ){
826 cnt++;
827 if( cnt==N ){
828 i++;
829 break;
830 }
831 }
832 i++;
833 }
834 if( pTo ){
835 blob_append(pTo, &pFrom->aData[pFrom->iCursor], i - pFrom->iCursor);
836 }
837 pFrom->iCursor = i;
838 }
839
840 /*
841 ** Ensure that the text in pBlob ends with '\n'
842 */
blob_add_final_newline(Blob * pBlob)843 void blob_add_final_newline(Blob *pBlob){
844 if( pBlob->nUsed<=0 ) return;
845 if( pBlob->aData[pBlob->nUsed-1]!='\n' ){
846 blob_append_char(pBlob, '\n');
847 }
848 }
849
850 /*
851 ** Return true if the blob contains a valid base16 identifier artifact hash.
852 **
853 ** The value returned is actually one of HNAME_SHA1 OR HNAME_K256 if the
854 ** hash is valid. Both of these are non-zero and therefore "true".
855 ** If the hash is not valid, then HNAME_ERROR is returned, which is zero or
856 ** false.
857 */
blob_is_hname(Blob * pBlob)858 int blob_is_hname(Blob *pBlob){
859 return hname_validate(blob_buffer(pBlob), blob_size(pBlob));
860 }
861
862 /*
863 ** Return true if the blob contains a valid filename
864 */
blob_is_filename(Blob * pBlob)865 int blob_is_filename(Blob *pBlob){
866 return file_is_simple_pathname(blob_str(pBlob), 1);
867 }
868
869 /*
870 ** Return true if the blob contains a valid 32-bit integer. Store
871 ** the integer value in *pValue.
872 */
blob_is_int(Blob * pBlob,int * pValue)873 int blob_is_int(Blob *pBlob, int *pValue){
874 const char *z = blob_buffer(pBlob);
875 int i, n, c, v;
876 n = blob_size(pBlob);
877 v = 0;
878 for(i=0; i<n && (c = z[i])!=0 && c>='0' && c<='9'; i++){
879 v = v*10 + c - '0';
880 }
881 if( i==n ){
882 *pValue = v;
883 return 1;
884 }else{
885 return 0;
886 }
887 }
888
889 /*
890 ** Return true if the blob contains a valid 64-bit integer. Store
891 ** the integer value in *pValue.
892 */
blob_is_int64(Blob * pBlob,sqlite3_int64 * pValue)893 int blob_is_int64(Blob *pBlob, sqlite3_int64 *pValue){
894 const char *z = blob_buffer(pBlob);
895 int i, n, c;
896 sqlite3_int64 v;
897 n = blob_size(pBlob);
898 v = 0;
899 for(i=0; i<n && (c = z[i])!=0 && c>='0' && c<='9'; i++){
900 v = v*10 + c - '0';
901 }
902 if( i==n ){
903 *pValue = v;
904 return 1;
905 }else{
906 return 0;
907 }
908 }
909
910 /*
911 ** Zero or reset an array of Blobs.
912 */
blobarray_zero(Blob * aBlob,int n)913 void blobarray_zero(Blob *aBlob, int n){
914 int i;
915 for(i=0; i<n; i++) blob_zero(&aBlob[i]);
916 }
blobarray_reset(Blob * aBlob,int n)917 void blobarray_reset(Blob *aBlob, int n){
918 int i;
919 for(i=0; i<n; i++) blob_reset(&aBlob[i]);
920 }
921
922 /*
923 ** Parse a blob into space-separated tokens. Store each token in
924 ** an element of the blobarray aToken[]. aToken[] is nToken elements in
925 ** size. Return the number of tokens seen.
926 */
blob_tokenize(Blob * pIn,Blob * aToken,int nToken)927 int blob_tokenize(Blob *pIn, Blob *aToken, int nToken){
928 int i;
929 for(i=0; i<nToken && blob_token(pIn, &aToken[i]); i++){}
930 return i;
931 }
932
933 /*
934 ** Do printf-style string rendering and append the results to a blob. Or
935 ** if pBlob==0, do printf-style string rendering directly to stdout.
936 **
937 ** The blob_appendf() version sets the BLOBFLAG_NotSQL bit in Blob.blobFlags
938 ** whereas blob_append_sql() does not.
939 */
blob_appendf(Blob * pBlob,const char * zFormat,...)940 void blob_appendf(Blob *pBlob, const char *zFormat, ...){
941 va_list ap;
942 va_start(ap, zFormat);
943 vxprintf(pBlob, zFormat, ap);
944 va_end(ap);
945 if( pBlob ) pBlob->blobFlags |= BLOBFLAG_NotSQL;
946 }
blob_append_sql(Blob * pBlob,const char * zFormat,...)947 void blob_append_sql(Blob *pBlob, const char *zFormat, ...){
948 va_list ap;
949 va_start(ap, zFormat);
950 vxprintf(pBlob, zFormat, ap);
951 va_end(ap);
952 }
blob_vappendf(Blob * pBlob,const char * zFormat,va_list ap)953 void blob_vappendf(Blob *pBlob, const char *zFormat, va_list ap){
954 vxprintf(pBlob, zFormat, ap);
955 }
956
957 /*
958 ** Initialize a blob to the data on an input channel. Return
959 ** the number of bytes read into the blob. Any prior content
960 ** of the blob is discarded, not freed.
961 */
blob_read_from_channel(Blob * pBlob,FILE * in,int nToRead)962 int blob_read_from_channel(Blob *pBlob, FILE *in, int nToRead){
963 size_t n;
964 blob_zero(pBlob);
965 if( nToRead<0 ){
966 char zBuf[10000];
967 while( !feof(in) ){
968 n = fread(zBuf, 1, sizeof(zBuf), in);
969 if( n>0 ){
970 blob_append(pBlob, zBuf, n);
971 }
972 }
973 }else{
974 blob_resize(pBlob, nToRead);
975 n = fread(blob_buffer(pBlob), 1, nToRead, in);
976 blob_resize(pBlob, n);
977 }
978 return blob_size(pBlob);
979 }
980
981 /*
982 ** Initialize a blob to be the content of a file. If the filename
983 ** is blank or "-" then read from standard input.
984 **
985 ** If zFilename is a symbolic link, behavior depends on the eFType
986 ** parameter:
987 **
988 ** * If eFType is ExtFILE or allow-symlinks is OFF, then the
989 ** pBlob is initialized to the *content* of the object to which
990 ** the zFilename symlink points.
991 **
992 ** * If eFType is RepoFILE and allow-symlinks is ON, then the
993 ** pBlob is initialized to the *name* of the object to which
994 ** the zFilename symlink points.
995 **
996 ** Any prior content of the blob is discarded, not freed.
997 **
998 ** Return the number of bytes read. Calls fossil_fatal() on error (i.e.
999 ** it exit()s and does not return).
1000 */
blob_read_from_file(Blob * pBlob,const char * zFilename,int eFType)1001 sqlite3_int64 blob_read_from_file(
1002 Blob *pBlob, /* The blob to be initialized */
1003 const char *zFilename, /* Extract content from this file */
1004 int eFType /* ExtFILE or RepoFILE - see above */
1005 ){
1006 sqlite3_int64 size, got;
1007 FILE *in;
1008 if( zFilename==0 || zFilename[0]==0
1009 || (zFilename[0]=='-' && zFilename[1]==0) ){
1010 return blob_read_from_channel(pBlob, stdin, -1);
1011 }
1012 if( file_islink(zFilename) ){
1013 return blob_read_link(pBlob, zFilename);
1014 }
1015 size = file_size(zFilename, eFType);
1016 blob_zero(pBlob);
1017 if( size<0 ){
1018 fossil_fatal("no such file: %s", zFilename);
1019 }
1020 if( size==0 ){
1021 return 0;
1022 }
1023 blob_resize(pBlob, size);
1024 in = fossil_fopen(zFilename, "rb");
1025 if( in==0 ){
1026 fossil_fatal("cannot open %s for reading", zFilename);
1027 }
1028 got = fread(blob_buffer(pBlob), 1, size, in);
1029 fclose(in);
1030 if( got<size ){
1031 blob_resize(pBlob, got);
1032 }
1033 return got;
1034 }
1035
1036 /*
1037 ** Reads symlink destination path and puts int into blob.
1038 ** Any prior content of the blob is discarded, not freed.
1039 **
1040 ** Returns length of destination path.
1041 **
1042 ** On windows, zeros blob and returns 0.
1043 */
blob_read_link(Blob * pBlob,const char * zFilename)1044 int blob_read_link(Blob *pBlob, const char *zFilename){
1045 #if !defined(_WIN32)
1046 char zBuf[1024];
1047 ssize_t len = readlink(zFilename, zBuf, 1023);
1048 if( len < 0 ){
1049 fossil_fatal("cannot read symbolic link %s", zFilename);
1050 }
1051 zBuf[len] = 0; /* null-terminate */
1052 blob_zero(pBlob);
1053 blob_appendf(pBlob, "%s", zBuf);
1054 return len;
1055 #else
1056 blob_zero(pBlob);
1057 return 0;
1058 #endif
1059 }
1060
1061 /*
1062 ** Write the content of a blob into a file.
1063 **
1064 ** If the filename is blank or "-" then write to standard output.
1065 **
1066 ** This routine always assumes ExtFILE. If zFilename is a symbolic link
1067 ** then the content is written into the object that symbolic link points
1068 ** to, not into the symbolic link itself. This is true regardless of
1069 ** the allow-symlinks setting.
1070 **
1071 ** Return the number of bytes written.
1072 */
blob_write_to_file(Blob * pBlob,const char * zFilename)1073 int blob_write_to_file(Blob *pBlob, const char *zFilename){
1074 FILE *out;
1075 int nWrote;
1076
1077 if( zFilename[0]==0 || (zFilename[0]=='-' && zFilename[1]==0) ){
1078 blob_is_init(pBlob);
1079 #if defined(_WIN32)
1080 nWrote = fossil_utf8_to_console(blob_buffer(pBlob), blob_size(pBlob), 0);
1081 if( nWrote>=0 ) return nWrote;
1082 fflush(stdout);
1083 _setmode(_fileno(stdout), _O_BINARY);
1084 #endif
1085 nWrote = fwrite(blob_buffer(pBlob), 1, blob_size(pBlob), stdout);
1086 #if defined(_WIN32)
1087 fflush(stdout);
1088 _setmode(_fileno(stdout), _O_TEXT);
1089 #endif
1090 }else{
1091 file_mkfolder(zFilename, ExtFILE, 1, 0);
1092 out = fossil_fopen(zFilename, "wb");
1093 if( out==0 ){
1094 #if defined(_WIN32)
1095 const char *zReserved = file_is_win_reserved(zFilename);
1096 if( zReserved ){
1097 fossil_fatal("cannot open \"%s\" because \"%s\" is "
1098 "a reserved name on Windows", zFilename, zReserved);
1099 }
1100 #endif
1101 fossil_fatal_recursive("unable to open file \"%s\" for writing",
1102 zFilename);
1103 return 0;
1104 }
1105 blob_is_init(pBlob);
1106 nWrote = fwrite(blob_buffer(pBlob), 1, blob_size(pBlob), out);
1107 fclose(out);
1108 if( nWrote!=blob_size(pBlob) ){
1109 fossil_fatal_recursive("short write: %d of %d bytes to %s", nWrote,
1110 blob_size(pBlob), zFilename);
1111 }
1112 }
1113 return nWrote;
1114 }
1115
1116 /*
1117 ** Compress a blob pIn. Store the result in pOut. It is ok for pIn and
1118 ** pOut to be the same blob.
1119 **
1120 ** pOut must either be the same as pIn or else uninitialized.
1121 */
blob_compress(Blob * pIn,Blob * pOut)1122 void blob_compress(Blob *pIn, Blob *pOut){
1123 unsigned int nIn = blob_size(pIn);
1124 unsigned int nOut = 13 + nIn + (nIn+999)/1000;
1125 unsigned long int nOut2;
1126 unsigned char *outBuf;
1127 Blob temp;
1128 blob_zero(&temp);
1129 blob_resize(&temp, nOut+4);
1130 outBuf = (unsigned char*)blob_buffer(&temp);
1131 outBuf[0] = nIn>>24 & 0xff;
1132 outBuf[1] = nIn>>16 & 0xff;
1133 outBuf[2] = nIn>>8 & 0xff;
1134 outBuf[3] = nIn & 0xff;
1135 nOut2 = (long int)nOut;
1136 compress(&outBuf[4], &nOut2,
1137 (unsigned char*)blob_buffer(pIn), blob_size(pIn));
1138 if( pOut==pIn ) blob_reset(pOut);
1139 assert_blob_is_reset(pOut);
1140 *pOut = temp;
1141 blob_resize(pOut, nOut2+4);
1142 }
1143
1144 /*
1145 ** COMMAND: test-compress
1146 **
1147 ** Usage: %fossil test-compress INPUTFILE OUTPUTFILE
1148 **
1149 ** Run compression on INPUTFILE and write the result into OUTPUTFILE.
1150 **
1151 ** This is used to test and debug the blob_compress() routine.
1152 */
compress_cmd(void)1153 void compress_cmd(void){
1154 Blob f;
1155 if( g.argc!=4 ) usage("INPUTFILE OUTPUTFILE");
1156 blob_read_from_file(&f, g.argv[2], ExtFILE);
1157 blob_compress(&f, &f);
1158 blob_write_to_file(&f, g.argv[3]);
1159 }
1160
1161 /*
1162 ** Compress the concatenation of a blobs pIn1 and pIn2. Store the result
1163 ** in pOut.
1164 **
1165 ** pOut must be either uninitialized or must be the same as either pIn1 or
1166 ** pIn2.
1167 */
blob_compress2(Blob * pIn1,Blob * pIn2,Blob * pOut)1168 void blob_compress2(Blob *pIn1, Blob *pIn2, Blob *pOut){
1169 unsigned int nIn = blob_size(pIn1) + blob_size(pIn2);
1170 unsigned int nOut = 13 + nIn + (nIn+999)/1000;
1171 unsigned char *outBuf;
1172 z_stream stream;
1173 Blob temp;
1174 blob_zero(&temp);
1175 blob_resize(&temp, nOut+4);
1176 outBuf = (unsigned char*)blob_buffer(&temp);
1177 outBuf[0] = nIn>>24 & 0xff;
1178 outBuf[1] = nIn>>16 & 0xff;
1179 outBuf[2] = nIn>>8 & 0xff;
1180 outBuf[3] = nIn & 0xff;
1181 stream.zalloc = (alloc_func)0;
1182 stream.zfree = (free_func)0;
1183 stream.opaque = 0;
1184 stream.avail_out = nOut;
1185 stream.next_out = &outBuf[4];
1186 deflateInit(&stream, 9);
1187 stream.avail_in = blob_size(pIn1);
1188 stream.next_in = (unsigned char*)blob_buffer(pIn1);
1189 deflate(&stream, 0);
1190 stream.avail_in = blob_size(pIn2);
1191 stream.next_in = (unsigned char*)blob_buffer(pIn2);
1192 deflate(&stream, 0);
1193 deflate(&stream, Z_FINISH);
1194 blob_resize(&temp, stream.total_out + 4);
1195 deflateEnd(&stream);
1196 if( pOut==pIn1 ) blob_reset(pOut);
1197 if( pOut==pIn2 ) blob_reset(pOut);
1198 assert_blob_is_reset(pOut);
1199 *pOut = temp;
1200 }
1201
1202 /*
1203 ** COMMAND: test-compress-2
1204 **
1205 ** Usage: %fossil test-compress-2 IN1 IN2 OUT
1206 **
1207 ** Read files IN1 and IN2, concatenate the content, compress the
1208 ** content, then write results into OUT.
1209 **
1210 ** This is used to test and debug the blob_compress2() routine.
1211 */
compress2_cmd(void)1212 void compress2_cmd(void){
1213 Blob f1, f2;
1214 if( g.argc!=5 ) usage("INPUTFILE1 INPUTFILE2 OUTPUTFILE");
1215 blob_read_from_file(&f1, g.argv[2], ExtFILE);
1216 blob_read_from_file(&f2, g.argv[3], ExtFILE);
1217 blob_compress2(&f1, &f2, &f1);
1218 blob_write_to_file(&f1, g.argv[4]);
1219 }
1220
1221 /*
1222 ** Uncompress blob pIn and store the result in pOut. It is ok for pIn and
1223 ** pOut to be the same blob.
1224 **
1225 ** pOut must be either uninitialized or the same as pIn.
1226 */
blob_uncompress(Blob * pIn,Blob * pOut)1227 int blob_uncompress(Blob *pIn, Blob *pOut){
1228 unsigned int nOut;
1229 unsigned char *inBuf;
1230 unsigned int nIn = blob_size(pIn);
1231 Blob temp;
1232 int rc;
1233 unsigned long int nOut2;
1234 if( nIn<=4 ){
1235 return 0;
1236 }
1237 inBuf = (unsigned char*)blob_buffer(pIn);
1238 nOut = (inBuf[0]<<24) + (inBuf[1]<<16) + (inBuf[2]<<8) + inBuf[3];
1239 blob_zero(&temp);
1240 blob_resize(&temp, nOut+1);
1241 nOut2 = (long int)nOut;
1242 rc = uncompress((unsigned char*)blob_buffer(&temp), &nOut2,
1243 &inBuf[4], nIn - 4);
1244 if( rc!=Z_OK ){
1245 blob_reset(&temp);
1246 return 1;
1247 }
1248 blob_resize(&temp, nOut2);
1249 if( pOut==pIn ) blob_reset(pOut);
1250 assert_blob_is_reset(pOut);
1251 *pOut = temp;
1252 return 0;
1253 }
1254
1255 /*
1256 ** COMMAND: test-uncompress
1257 **
1258 ** Usage: %fossil test-uncompress IN OUT
1259 **
1260 ** Read the content of file IN, uncompress that content, and write the
1261 ** result into OUT. This command is intended for testing of the
1262 ** blob_compress() function.
1263 */
uncompress_cmd(void)1264 void uncompress_cmd(void){
1265 Blob f;
1266 if( g.argc!=4 ) usage("INPUTFILE OUTPUTFILE");
1267 blob_read_from_file(&f, g.argv[2], ExtFILE);
1268 blob_uncompress(&f, &f);
1269 blob_write_to_file(&f, g.argv[3]);
1270 }
1271
1272 /*
1273 ** COMMAND: test-cycle-compress
1274 **
1275 ** Compress and uncompress each file named on the command line.
1276 ** Verify that the original content is recovered.
1277 */
test_cycle_compress(void)1278 void test_cycle_compress(void){
1279 int i;
1280 Blob b1, b2, b3;
1281 for(i=2; i<g.argc; i++){
1282 blob_read_from_file(&b1, g.argv[i], ExtFILE);
1283 blob_compress(&b1, &b2);
1284 blob_uncompress(&b2, &b3);
1285 if( blob_compare(&b1, &b3) ){
1286 fossil_fatal("compress/uncompress cycle failed for %s", g.argv[i]);
1287 }
1288 blob_reset(&b1);
1289 blob_reset(&b2);
1290 blob_reset(&b3);
1291 }
1292 fossil_print("ok\n");
1293 }
1294
1295 /*
1296 ** Convert every \n character in the given blob into \r\n.
1297 */
blob_add_cr(Blob * p)1298 void blob_add_cr(Blob *p){
1299 char *z = p->aData;
1300 int j = p->nUsed;
1301 int i, n;
1302 for(i=n=0; i<j; i++){
1303 if( z[i]=='\n' ) n++;
1304 }
1305 j += n;
1306 if( j>=p->nAlloc ){
1307 blob_resize(p, j);
1308 z = p->aData;
1309 }
1310 p->nUsed = j;
1311 z[j] = 0;
1312 while( j>i ){
1313 if( (z[--j] = z[--i]) =='\n' ){
1314 z[--j] = '\r';
1315 }
1316 }
1317 }
1318
1319 /*
1320 ** Remove every \r character from the given blob, replacing each one with
1321 ** a \n character if it was not already part of a \r\n pair.
1322 */
blob_to_lf_only(Blob * p)1323 void blob_to_lf_only(Blob *p){
1324 int i, j;
1325 char *z = blob_materialize(p);
1326 for(i=j=0; z[i]; i++){
1327 if( z[i]!='\r' ) z[j++] = z[i];
1328 else if( z[i+1]!='\n' ) z[j++] = '\n';
1329 }
1330 z[j] = 0;
1331 p->nUsed = j;
1332 }
1333
1334 /*
1335 ** Convert blob from cp1252 to UTF-8. As cp1252 is a superset
1336 ** of iso8859-1, this is useful on UNIX as well.
1337 **
1338 ** This table contains the character translations for 0x80..0xA0.
1339 */
1340
1341 static const unsigned short cp1252[32] = {
1342 0x20ac, 0x81, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1343 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x8D, 0x017D, 0x8F,
1344 0x90, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1345 0x2DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x9D, 0x017E, 0x0178
1346 };
1347
blob_cp1252_to_utf8(Blob * p)1348 void blob_cp1252_to_utf8(Blob *p){
1349 unsigned char *z = (unsigned char *)p->aData;
1350 int j = p->nUsed;
1351 int i, n;
1352 for(i=n=0; i<j; i++){
1353 if( z[i]>=0x80 ){
1354 if( (z[i]<0xa0) && (cp1252[z[i]&0x1f]>=0x800) ){
1355 n++;
1356 }
1357 n++;
1358 }
1359 }
1360 j += n;
1361 if( j>=p->nAlloc ){
1362 blob_resize(p, j);
1363 z = (unsigned char *)p->aData;
1364 }
1365 p->nUsed = j;
1366 z[j] = 0;
1367 while( j>i ){
1368 if( z[--i]>=0x80 ){
1369 if( z[i]<0xa0 ){
1370 unsigned short sym = cp1252[z[i]&0x1f];
1371 if( sym>=0x800 ){
1372 z[--j] = 0x80 | (sym&0x3f);
1373 z[--j] = 0x80 | ((sym>>6)&0x3f);
1374 z[--j] = 0xe0 | (sym>>12);
1375 }else{
1376 z[--j] = 0x80 | (sym&0x3f);
1377 z[--j] = 0xc0 | (sym>>6);
1378 }
1379 }else{
1380 z[--j] = 0x80 | (z[i]&0x3f);
1381 z[--j] = 0xC0 | (z[i]>>6);
1382 }
1383 }else{
1384 z[--j] = z[i];
1385 }
1386 }
1387 }
1388
1389 /*
1390 ** ASCII (for reference):
1391 ** x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf
1392 ** 0x ^` ^a ^b ^c ^d ^e ^f ^g \b \t \n () \f \r ^n ^o
1393 ** 1x ^p ^q ^r ^s ^t ^u ^v ^w ^x ^y ^z ^{ ^| ^} ^~ ^
1394 ** 2x () ! " # $ % & ' ( ) * + , - . /
1395 ** 3x 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
1396 ** 4x @ A B C D E F G H I J K L M N O
1397 ** 5x P Q R S T U V W X Y Z [ \ ] ^ _
1398 ** 6x ` a b c d e f g h i j k l m n o
1399 ** 7x p q r s t u v w x y z { | } ~ ^_
1400 */
1401
1402 /*
1403 ** Meanings for bytes in a filename:
1404 **
1405 ** 0 Ordinary character. No encoding required
1406 ** 1 Needs to be escaped
1407 ** 2 Illegal character. Do not allow in a filename
1408 ** 3 First byte of a 2-byte UTF-8
1409 ** 4 First byte of a 3-byte UTF-8
1410 ** 5 First byte of a 4-byte UTF-8
1411 */
1412 static const char aSafeChar[256] = {
1413 #ifdef _WIN32
1414 /* Windows
1415 ** Prohibit: all control characters, including tab, \r and \n
1416 ** Escape: (space) " # $ % & ' ( ) * ; < > ? [ ] ^ ` { | }
1417 */
1418 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */
1419 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x */
1420 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 1x */
1421 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 2x */
1422 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */
1423 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */
1424 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, /* 5x */
1425 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */
1426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 7x */
1427 #else
1428 /* Unix
1429 ** Prohibit: all control characters, including tab, \r and \n
1430 ** Escape: (space) ! " # $ % & ' ( ) * ; < > ? [ \ ] ^ ` { | }
1431 */
1432 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */
1433 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x */
1434 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 1x */
1435 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 2x */
1436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */
1437 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */
1438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 5x */
1439 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */
1440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 7x */
1441 #endif
1442 /* all bytes 0x80 through 0xbf are unescaped, being secondary
1443 ** bytes to UTF8 characters. Bytes 0xc0 through 0xff are the
1444 ** first byte of a UTF8 character and do get escaped */
1445 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 8x */
1446 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 9x */
1447 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* ax */
1448 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* bx */
1449 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* cx */
1450 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* dx */
1451 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, /* ex */
1452 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 /* fx */
1453 };
1454
1455 /*
1456 ** pBlob is a shell command under construction. This routine safely
1457 ** appends filename argument zIn.
1458 **
1459 ** The argument is escaped if it contains white space or other characters
1460 ** that need to be escaped for the shell. If zIn contains characters
1461 ** that cannot be safely escaped, then throw a fatal error.
1462 **
1463 ** If the isFilename argument is true, then the argument is expected
1464 ** to be a filename. As shell commands commonly have command-line
1465 ** options that begin with "-" and since we do not want an attacker
1466 ** to be able to invoke these switches using filenames that begin
1467 ** with "-", if zIn begins with "-", prepend an additional "./"
1468 ** (or ".\\" on Windows).
1469 */
blob_append_escaped_arg(Blob * pBlob,const char * zIn,int isFilename)1470 void blob_append_escaped_arg(Blob *pBlob, const char *zIn, int isFilename){
1471 int i;
1472 unsigned char c;
1473 int needEscape = 0;
1474 int n = blob_size(pBlob);
1475 char *z = blob_buffer(pBlob);
1476
1477 /* Look for illegal byte-sequences and byte-sequences that require
1478 ** escaping. No control-characters are allowed. All spaces and
1479 ** non-ASCII unicode characters and some punctuation characters require
1480 ** escaping. */
1481 for(i=0; (c = (unsigned char)zIn[i])!=0; i++){
1482 if( aSafeChar[c] ){
1483 unsigned char x = aSafeChar[c];
1484 needEscape = 1;
1485 if( x==2 ){
1486 Blob bad;
1487 blob_token(pBlob, &bad);
1488 fossil_fatal("the [%s] argument to the \"%s\" command contains "
1489 "a character (ascii 0x%02x) that is not allowed in "
1490 "filename arguments",
1491 zIn, blob_str(&bad), c);
1492 }else if( x>2 ){
1493 if( (zIn[i+1]&0xc0)!=0x80
1494 || (x>=4 && (zIn[i+2]&0xc0)!=0x80)
1495 || (x==5 && (zIn[i+3]&0xc0)!=0x80)
1496 ){
1497 Blob bad;
1498 blob_token(pBlob, &bad);
1499 fossil_fatal("the [%s] argument to the \"%s\" command contains "
1500 "an illegal UTF-8 character",
1501 zIn, blob_str(&bad));
1502 }
1503 i += x-2;
1504 }
1505 }
1506 }
1507
1508 /* Separate from the previous argument by a space */
1509 if( n>0 && !fossil_isspace(z[n-1]) ){
1510 blob_append_char(pBlob, ' ');
1511 }
1512
1513 /* Check for characters that need quoting */
1514 if( !needEscape ){
1515 if( isFilename && zIn[0]=='-' ){
1516 blob_append_char(pBlob, '.');
1517 #if defined(_WIN32)
1518 blob_append_char(pBlob, '\\');
1519 #else
1520 blob_append_char(pBlob, '/');
1521 #endif
1522 }
1523 blob_append(pBlob, zIn, -1);
1524 }else{
1525 #if defined(_WIN32)
1526 /* Quoting strategy for windows:
1527 ** Put the entire name inside of "...". Any " characters within
1528 ** the name get doubled.
1529 */
1530 blob_append_char(pBlob, '"');
1531 if( isFilename && zIn[0]=='-' ){
1532 blob_append_char(pBlob, '.');
1533 blob_append_char(pBlob, '\\');
1534 }else if( zIn[0]=='/' ){
1535 blob_append_char(pBlob, '.');
1536 }
1537 for(i=0; (c = (unsigned char)zIn[i])!=0; i++){
1538 blob_append_char(pBlob, (char)c);
1539 if( c=='"' ) blob_append_char(pBlob, '"');
1540 }
1541 blob_append_char(pBlob, '"');
1542 #else
1543 /* Quoting strategy for unix:
1544 ** If the name does not contain ', then surround the whole thing
1545 ** with '...'. If there is one or more ' characters within the
1546 ** name, then put \ before each special character.
1547 */
1548 if( strchr(zIn,'\'') ){
1549 if( isFilename && zIn[0]=='-' ){
1550 blob_append_char(pBlob, '.');
1551 blob_append_char(pBlob, '/');
1552 }
1553 for(i=0; (c = (unsigned char)zIn[i])!=0; i++){
1554 if( aSafeChar[c] && aSafeChar[c]!=2 ) blob_append_char(pBlob, '\\');
1555 blob_append_char(pBlob, (char)c);
1556 }
1557 }else{
1558 blob_append_char(pBlob, '\'');
1559 if( isFilename && zIn[0]=='-' ){
1560 blob_append_char(pBlob, '.');
1561 blob_append_char(pBlob, '/');
1562 }
1563 blob_append(pBlob, zIn, -1);
1564 blob_append_char(pBlob, '\'');
1565 }
1566 #endif
1567 }
1568 }
1569
1570 /*
1571 ** COMMAND: test-escaped-arg
1572 **
1573 ** Usage %fossil ARGS ...
1574 **
1575 ** Run each argument through blob_append_escaped_arg() and show the
1576 ** result. Append each argument to "fossil test-echo" and run that
1577 ** using fossil_system() to verify that it really does get escaped
1578 ** correctly.
1579 **
1580 ** Other options:
1581 **
1582 ** --filename-args BOOL Subsequent arguments are assumed to be
1583 ** filenames if BOOL is true, or not if BOOL
1584 ** is false. Defaults on.
1585 **
1586 ** --hex HEX Skip the --hex flag and instead decode HEX
1587 ** into ascii. This provides a way to insert
1588 ** unusual characters as an argument for testing.
1589 **
1590 ** --compare HEX ASCII Verify that argument ASCII is identical to
1591 ** to decoded HEX.
1592 **
1593 ** --fuzz N Run N fuzz cases. Each cases is a call
1594 ** to "fossil test-escaped-arg --compare HEX ARG"
1595 ** where HEX and ARG are the same argument.
1596 ** The argument is chosen at random.
1597 */
test_escaped_arg_command(void)1598 void test_escaped_arg_command(void){
1599 int i;
1600 Blob x;
1601 const char *zArg;
1602 int isFilename = 1;
1603 char zBuf[100];
1604 blob_init(&x, 0, 0);
1605 for(i=2; i<g.argc; i++){
1606 zArg = g.argv[i];
1607 if( fossil_strcmp(zArg, "--hex")==0 && i+1<g.argc ){
1608 size_t n = strlen(g.argv[++i]);
1609 if( n>=(sizeof(zBuf)-1)*2 ){
1610 fossil_fatal("Argument to --hex is too big");
1611 }
1612 memset(zBuf, 0, sizeof(zBuf));
1613 decode16((const unsigned char*)g.argv[i], (unsigned char*)zBuf, (int)n);
1614 zArg = zBuf;
1615 }else if( fossil_strcmp(zArg, "--compare")==0 && i+2<g.argc ){
1616 size_t n = strlen(g.argv[++i]);
1617 if( n>=(sizeof(zBuf)-1)*2 ){
1618 fossil_fatal("HEX argument to --compare is too big");
1619 }
1620 memset(zBuf, 0, sizeof(zBuf));
1621 if( decode16((const unsigned char*)g.argv[i], (unsigned char*)zBuf,
1622 (int)n) ){
1623 fossil_fatal("HEX decode of %s failed", g.argv[i]);
1624 }
1625 zArg = g.argv[++i];
1626 if( zArg[0]=='-' ){
1627 fossil_fatal("filename argument \"%s\" begins with \"-\"", zArg);
1628 }
1629 #ifdef _WIN32
1630 if( zBuf[0]=='-' && zArg[0]=='.' && zArg[1]=='\\' ) zArg += 2;
1631 #else
1632 if( zBuf[0]=='-' && zArg[0]=='.' && zArg[1]=='/' ) zArg += 2;
1633 #endif
1634 if( strcmp(zBuf, zArg)!=0 ){
1635 fossil_fatal("argument disagree: \"%s\" (%s) versus \"%s\"",
1636 zBuf, g.argv[i-1], zArg);
1637 }
1638 continue;
1639 }else if( fossil_strcmp(zArg, "--fuzz")==0 && i+1<g.argc ){
1640 int n = atoi(g.argv[++i]);
1641 int j;
1642 for(j=0; j<n; j++){
1643 unsigned char m, k;
1644 int rc;
1645 unsigned char zWord[100];
1646 sqlite3_randomness(sizeof(m), &m);
1647 m = (m%40)+5;
1648 sqlite3_randomness(m, zWord); /* Between 5 and 45 bytes of randomness */
1649 for(k=0; k<m; k++){
1650 unsigned char cx = zWord[k];
1651 if( cx<0x20 || cx>=0x7f ){
1652 /* Translate illegal bytes into various non-ASCII unicode
1653 ** characters in order to exercise those code paths */
1654 unsigned int u;
1655 if( cx>=0x7f ){
1656 u = cx;
1657 }else if( cx>=0x08 ){
1658 u = 0x800 + cx;
1659 }else{
1660 u = 0x10000 + cx;
1661 }
1662 if( u<0x00080 ){
1663 zWord[k] = u & 0xFF;
1664 }else if( u<0x00800 ){
1665 zWord[k++] = 0xC0 + (u8)((u>>6)&0x1F);
1666 zWord[k] = 0x80 + (u8)(u & 0x3F);
1667 }else if( u<0x10000 ){
1668 zWord[k++] = 0xE0 + (u8)((u>>12)&0x0F);
1669 zWord[k++] = 0x80 + (u8)((u>>6) & 0x3F);
1670 zWord[k] = 0x80 + (u8)(u & 0x3F);
1671 }else{
1672 zWord[k++] = 0xF0 + (u8)((u>>18) & 0x07);
1673 zWord[k++] = 0x80 + (u8)((u>>12) & 0x3F);
1674 zWord[k++] = 0x80 + (u8)((u>>6) & 0x3F);
1675 zWord[k] = 0x80 + (u8)(u & 0x3F);
1676 }
1677 }
1678 }
1679 zWord[k] = 0;
1680 encode16(zWord, (unsigned char*)zBuf, (int)k);
1681 blob_appendf(&x, "%$ test-escaped-arg --compare %s %$",
1682 g.nameOfExe, zBuf,zWord);
1683 rc = fossil_system(blob_str(&x));
1684 if( rc ) fossil_fatal("failed test (%d): %s\n", rc, blob_str(&x));
1685 blob_reset(&x);
1686 }
1687 continue;
1688 }else if( fossil_strcmp(zArg, "--filename-args")==0 ){
1689 if( i+1<g.argc ){
1690 i++;
1691 isFilename = is_truth(g.argv[i]);
1692 }
1693 continue;
1694 }
1695 fossil_print("%3d [%s]: ", i, zArg);
1696 if( isFilename ){
1697 blob_appendf(&x, "%$ test-echo %$", g.nameOfExe, zArg);
1698 }else{
1699 blob_appendf(&x, "%$ test-echo %!$", g.nameOfExe, zArg);
1700 }
1701 fossil_print("%s\n", blob_str(&x));
1702 fossil_system(blob_str(&x));
1703 blob_reset(&x);
1704 }
1705 }
1706
1707 /*
1708 ** A read(2)-like impl for the Blob class. Reads (copies) up to nLen
1709 ** bytes from pIn, starting at position pIn->iCursor, and copies them
1710 ** to pDest (which must be valid memory at least nLen bytes long).
1711 **
1712 ** Returns the number of bytes read/copied, which may be less than
1713 ** nLen (if end-of-blob is encountered).
1714 **
1715 ** Updates pIn's cursor.
1716 **
1717 ** Returns 0 if pIn contains no data.
1718 */
blob_read(Blob * pIn,void * pDest,unsigned int nLen)1719 unsigned int blob_read(Blob *pIn, void * pDest, unsigned int nLen ){
1720 if( !pIn->aData || (pIn->iCursor >= pIn->nUsed) ){
1721 return 0;
1722 } else if( (pIn->iCursor + nLen) > (unsigned int)pIn->nUsed ){
1723 nLen = (unsigned int) (pIn->nUsed - pIn->iCursor);
1724 }
1725 assert( pIn->nUsed > pIn->iCursor );
1726 assert( (pIn->iCursor+nLen) <= pIn->nUsed );
1727 if( nLen ){
1728 memcpy( pDest, pIn->aData, nLen );
1729 pIn->iCursor += nLen;
1730 }
1731 return nLen;
1732 }
1733
1734 /*
1735 ** Swaps the contents of the given blobs. Results
1736 ** are unspecified if either value is NULL or both
1737 ** point to the same blob.
1738 */
blob_swap(Blob * pLeft,Blob * pRight)1739 void blob_swap( Blob *pLeft, Blob *pRight ){
1740 Blob swap = *pLeft;
1741 *pLeft = *pRight;
1742 *pRight = swap;
1743 }
1744
1745 /*
1746 ** Strip a possible byte-order-mark (BOM) from the blob. On Windows, if there
1747 ** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
1748 ** done. If useMbcs is false and there is no BOM, the input string is assumed
1749 ** to be UTF-8 already, so no conversion is done.
1750 */
blob_to_utf8_no_bom(Blob * pBlob,int useMbcs)1751 void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
1752 char *zUtf8;
1753 int bomSize = 0;
1754 int bomReverse = 0;
1755 if( starts_with_utf8_bom(pBlob, &bomSize) ){
1756 struct Blob temp;
1757 zUtf8 = blob_str(pBlob) + bomSize;
1758 blob_zero(&temp);
1759 blob_append(&temp, zUtf8, -1);
1760 blob_swap(pBlob, &temp);
1761 blob_reset(&temp);
1762 }else if( starts_with_utf16_bom(pBlob, &bomSize, &bomReverse) ){
1763 zUtf8 = blob_buffer(pBlob);
1764 if( bomReverse ){
1765 /* Found BOM, but with reversed bytes */
1766 unsigned int i = blob_size(pBlob);
1767 while( i>1 ){
1768 /* swap bytes of unicode representation */
1769 char zTemp = zUtf8[--i];
1770 zUtf8[i] = zUtf8[i-1];
1771 zUtf8[--i] = zTemp;
1772 }
1773 }
1774 /* Make sure the blob contains two terminating 0-bytes */
1775 blob_append(pBlob, "\000\000", 3);
1776 zUtf8 = blob_str(pBlob) + bomSize;
1777 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1778 blob_reset(pBlob);
1779 blob_set_dynamic(pBlob, zUtf8);
1780 }else if( useMbcs && invalid_utf8(pBlob) ){
1781 #if defined(_WIN32) || defined(__CYGWIN__)
1782 zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1783 blob_reset(pBlob);
1784 blob_append(pBlob, zUtf8, -1);
1785 fossil_mbcs_free(zUtf8);
1786 #else
1787 blob_cp1252_to_utf8(pBlob);
1788 #endif /* _WIN32 */
1789 }
1790 }
1791