1 /*
2 ** Copyright (c) 2006 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the Simplified BSD License (also
6 ** known as the "2-Clause License" or "FreeBSD License".)
7 **
8 ** This program is distributed in the hope that it will be useful,
9 ** but without any warranty; without even the implied warranty of
10 ** merchantability or fitness for a particular purpose.
11 **
12 ** Author contact information:
13 **   drh@hwaci.com
14 **   http://www.hwaci.com/drh/
15 **
16 *******************************************************************************
17 **
18 ** A Blob is a variable-length containers for arbitrary string
19 ** or binary data.
20 */
21 #include "config.h"
22 #if defined(FOSSIL_ENABLE_MINIZ)
23 #  define MINIZ_HEADER_FILE_ONLY
24 #  include "miniz.c"
25 #else
26 #  include <zlib.h>
27 #endif
28 #include "blob.h"
29 #if defined(_WIN32)
30 #include <fcntl.h>
31 #include <io.h>
32 #endif
33 
34 #if INTERFACE
35 /*
36 ** A Blob can hold a string or a binary object of arbitrary size.  The
37 ** size changes as necessary.
38 */
39 struct Blob {
40   unsigned int nUsed;            /* Number of bytes used in aData[] */
41   unsigned int nAlloc;           /* Number of bytes allocated for aData[] */
42   unsigned int iCursor;          /* Next character of input to parse */
43   unsigned int blobFlags;        /* One or more BLOBFLAG_* bits */
44   char *aData;                   /* Where the information is stored */
45   void (*xRealloc)(Blob*, unsigned int); /* Function to reallocate the buffer */
46 };
47 
48 /*
49 ** Allowed values for Blob.blobFlags
50 */
51 #define BLOBFLAG_NotSQL  0x0001      /* Non-SQL text */
52 
53 /*
54 ** The current size of a Blob
55 */
56 #define blob_size(X)  ((X)->nUsed)
57 
58 /*
59 ** The buffer holding the blob data
60 */
61 #define blob_buffer(X)  ((X)->aData)
62 
63 /*
64 ** Seek whence parameter values
65 */
66 #define BLOB_SEEK_SET 1
67 #define BLOB_SEEK_CUR 2
68 
69 #endif /* INTERFACE */
70 
71 /*
72 ** Make sure a blob is initialized
73 */
74 #define blob_is_init(x) \
75   assert((x)->xRealloc==blobReallocMalloc || (x)->xRealloc==blobReallocStatic)
76 
77 /*
78 ** Make sure a blob does not contain malloced memory.
79 **
80 ** This might fail if we are unlucky and x is uninitialized.  For that
81 ** reason it should only be used locally for debugging.  Leave it turned
82 ** off for production.
83 */
84 #if 0  /* Enable for debugging only */
85 #define assert_blob_is_reset(x) assert(blob_is_reset(x))
86 #else
87 #define assert_blob_is_reset(x)
88 #endif
89 
90 
91 
92 /*
93 ** We find that the built-in isspace() function does not work for
94 ** some international character sets.  So here is a substitute.
95 */
fossil_isspace(char c)96 int fossil_isspace(char c){
97   return c==' ' || (c<='\r' && c>='\t');
98 }
99 
100 /*
101 ** Other replacements for ctype.h functions.
102 */
fossil_islower(char c)103 int fossil_islower(char c){ return c>='a' && c<='z'; }
fossil_isupper(char c)104 int fossil_isupper(char c){ return c>='A' && c<='Z'; }
fossil_isdigit(char c)105 int fossil_isdigit(char c){ return c>='0' && c<='9'; }
fossil_tolower(char c)106 int fossil_tolower(char c){
107   return fossil_isupper(c) ? c - 'A' + 'a' : c;
108 }
fossil_toupper(char c)109 int fossil_toupper(char c){
110   return fossil_islower(c) ? c - 'a' + 'A' : c;
111 }
fossil_isalpha(char c)112 int fossil_isalpha(char c){
113   return (c>='a' && c<='z') || (c>='A' && c<='Z');
114 }
fossil_isalnum(char c)115 int fossil_isalnum(char c){
116   return (c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9');
117 }
118 
119 /* Return true if and only if the entire string consists of only
120 ** alphanumeric characters.
121 */
fossil_no_strange_characters(const char * z)122 int fossil_no_strange_characters(const char *z){
123   while( z && (fossil_isalnum(z[0]) || z[0]=='_' || z[0]=='-') ) z++;
124   return z[0]==0;
125 }
126 
127 
128 /*
129 ** COMMAND: test-isspace
130 **
131 ** Verify that the fossil_isspace() routine is working correctly by
132 ** testing it on all possible inputs.
133 */
isspace_cmd(void)134 void isspace_cmd(void){
135   int i;
136   for(i=0; i<=255; i++){
137     if( i==' ' || i=='\n' || i=='\t' || i=='\v'
138         || i=='\f' || i=='\r' ){
139       assert( fossil_isspace((char)i) );
140     }else{
141       assert( !fossil_isspace((char)i) );
142     }
143   }
144   fossil_print("All 256 characters OK\n");
145 }
146 
147 /*
148 ** This routine is called if a blob operation fails because we
149 ** have run out of memory.
150 */
blob_panic(void)151 static void blob_panic(void){
152   static const char zErrMsg[] = "out of memory\n";
153   fputs(zErrMsg, stderr);
154   fossil_exit(1);
155 }
156 
157 /*
158 ** A reallocation function that assumes that aData came from malloc().
159 ** This function attempts to resize the buffer of the blob to hold
160 ** newSize bytes.
161 **
162 ** No attempt is made to recover from an out-of-memory error.
163 ** If an OOM error occurs, an error message is printed on stderr
164 ** and the program exits.
165 */
blobReallocMalloc(Blob * pBlob,unsigned int newSize)166 void blobReallocMalloc(Blob *pBlob, unsigned int newSize){
167   if( newSize==0 ){
168     free(pBlob->aData);
169     pBlob->aData = 0;
170     pBlob->nAlloc = 0;
171     pBlob->nUsed = 0;
172     pBlob->iCursor = 0;
173     pBlob->blobFlags = 0;
174   }else if( newSize>pBlob->nAlloc || newSize<pBlob->nAlloc-4000 ){
175     char *pNew = fossil_realloc(pBlob->aData, newSize);
176     pBlob->aData = pNew;
177     pBlob->nAlloc = newSize;
178     if( pBlob->nUsed>pBlob->nAlloc ){
179       pBlob->nUsed = pBlob->nAlloc;
180     }
181   }
182 }
183 
184 /*
185 ** An initializer for Blobs
186 */
187 #if INTERFACE
188 #define BLOB_INITIALIZER  {0,0,0,0,0,blobReallocMalloc}
189 #endif
190 const Blob empty_blob = BLOB_INITIALIZER;
191 
192 /*
193 ** A reallocation function for when the initial string is in unmanaged
194 ** space.  Copy the string to memory obtained from malloc().
195 */
blobReallocStatic(Blob * pBlob,unsigned int newSize)196 static void blobReallocStatic(Blob *pBlob, unsigned int newSize){
197   if( newSize==0 ){
198     *pBlob = empty_blob;
199   }else{
200     char *pNew = fossil_malloc( newSize );
201     if( pBlob->nUsed>newSize ) pBlob->nUsed = newSize;
202     memcpy(pNew, pBlob->aData, pBlob->nUsed);
203     pBlob->aData = pNew;
204     pBlob->xRealloc = blobReallocMalloc;
205     pBlob->nAlloc = newSize;
206   }
207 }
208 
209 /*
210 ** Reset a blob to be an empty container.
211 */
blob_reset(Blob * pBlob)212 void blob_reset(Blob *pBlob){
213   blob_is_init(pBlob);
214   pBlob->xRealloc(pBlob, 0);
215 }
216 
217 
218 /*
219 ** Return true if the blob has been zeroed - in other words if it contains
220 ** no malloced memory.  This only works reliably if the blob has been
221 ** initialized - it can return a false negative on an uninitialized blob.
222 */
blob_is_reset(Blob * pBlob)223 int blob_is_reset(Blob *pBlob){
224   if( pBlob==0 ) return 1;
225   if( pBlob->nUsed ) return 0;
226   if( pBlob->xRealloc==blobReallocMalloc && pBlob->nAlloc ) return 0;
227   return 1;
228 }
229 
230 /*
231 ** Initialize a blob to a string or byte-array constant of a specified length.
232 ** Any prior data in the blob is discarded.
233 */
blob_init(Blob * pBlob,const char * zData,int size)234 void blob_init(Blob *pBlob, const char *zData, int size){
235   assert_blob_is_reset(pBlob);
236   if( zData==0 ){
237     *pBlob = empty_blob;
238   }else{
239     if( size<=0 ) size = strlen(zData);
240     pBlob->nUsed = pBlob->nAlloc = size;
241     pBlob->aData = (char*)zData;
242     pBlob->iCursor = 0;
243     pBlob->blobFlags = 0;
244     pBlob->xRealloc = blobReallocStatic;
245   }
246 }
247 
248 /*
249 ** Initialize a blob to a nul-terminated string.
250 ** Any prior data in the blob is discarded.
251 */
blob_set(Blob * pBlob,const char * zStr)252 void blob_set(Blob *pBlob, const char *zStr){
253   blob_init(pBlob, zStr, -1);
254 }
255 
256 /*
257 ** Initialize a blob to a nul-terminated string obtained from fossil_malloc().
258 ** The blob will take responsibility for freeing the string.
259 */
blob_set_dynamic(Blob * pBlob,char * zStr)260 void blob_set_dynamic(Blob *pBlob, char *zStr){
261   blob_init(pBlob, zStr, -1);
262   pBlob->xRealloc = blobReallocMalloc;
263 }
264 
265 /*
266 ** Initialize a blob to an empty string.
267 */
blob_zero(Blob * pBlob)268 void blob_zero(Blob *pBlob){
269   static const char zEmpty[] = "";
270   assert_blob_is_reset(pBlob);
271   pBlob->nUsed = 0;
272   pBlob->nAlloc = 1;
273   pBlob->aData = (char*)zEmpty;
274   pBlob->iCursor = 0;
275   pBlob->blobFlags = 0;
276   pBlob->xRealloc = blobReallocStatic;
277 }
278 
279 /*
280 ** Append text or data to the end of a blob.  Or, if pBlob==NULL, send
281 ** the text to standard output in terminal mode, or to standard CGI output
282 ** in CGI mode.
283 **
284 ** If nData<0 then output all of aData up to the first 0x00 byte.
285 **
286 ** Use the blob_append() routine in all application code.  The blob_append()
287 ** routine is faster, but blob_append_full() handles all the corner cases.
288 ** The blob_append() routine automatically calls blob_append_full() if
289 ** necessary.
290 */
blob_append_full(Blob * pBlob,const char * aData,int nData)291 static void blob_append_full(Blob *pBlob, const char *aData, int nData){
292   sqlite3_int64 nNew;
293   /* assert( aData!=0 || nData==0 ); // omitted for speed */
294   /* blob_is_init(pBlob); // omitted for speed */
295   if( nData<0 ) nData = strlen(aData);
296   if( nData==0 ) return;
297   if( pBlob==0 ){
298     if( g.cgiOutput ){
299       pBlob = cgi_output_blob();
300     }else{
301       fossil_puts(aData, 0, nData);
302       return;
303     }
304   }
305   nNew = pBlob->nUsed;
306   nNew += nData;
307   if( nNew >= pBlob->nAlloc ){
308     nNew += pBlob->nAlloc;
309     nNew += 100;
310     if( nNew>=0x7fff0000 ){
311       blob_panic();
312     }
313     pBlob->xRealloc(pBlob, (int)nNew);
314     if( pBlob->nUsed + nData >= pBlob->nAlloc ){
315       blob_panic();
316     }
317   }
318   memcpy(&pBlob->aData[pBlob->nUsed], aData, nData);
319   pBlob->nUsed += nData;
320   pBlob->aData[pBlob->nUsed] = 0;   /* Blobs are always nul-terminated */
321 }
blob_append(Blob * pBlob,const char * aData,int nData)322 void blob_append(Blob *pBlob, const char *aData, int nData){
323   sqlite3_int64 nUsed;
324   /* assert( aData!=0 || nData==0 ); // omitted for speed */
325   if( nData<=0 || pBlob==0 || pBlob->nUsed + nData >= pBlob->nAlloc ){
326     blob_append_full(pBlob, aData, nData);
327     return;
328   }
329   nUsed = pBlob->nUsed;
330   pBlob->nUsed += nData;
331   pBlob->aData[pBlob->nUsed] = 0;
332   memcpy(&pBlob->aData[nUsed], aData, nData);
333 }
334 
335 /*
336 ** Append a string literal to a blob.
337 */
338 #if INTERFACE
339 #define blob_append_string(BLOB,STR) blob_append(BLOB,STR,sizeof(STR)-1)
340 #endif
341 
342 /*
343 ** Append a single character to the blob.  If pBlob is zero then the
344 ** character is written directly to stdout.
345 */
blob_append_char(Blob * pBlob,char c)346 void blob_append_char(Blob *pBlob, char c){
347   if( pBlob==0 || pBlob->nUsed+1 >= pBlob->nAlloc ){
348     blob_append_full(pBlob, &c, 1);
349   }else{
350     pBlob->aData[pBlob->nUsed++] = c;
351   }
352 }
353 
354 /*
355 ** Copy a blob.  pTo is reinitialized to be a copy of pFrom.
356 */
blob_copy(Blob * pTo,Blob * pFrom)357 void blob_copy(Blob *pTo, Blob *pFrom){
358   blob_is_init(pFrom);
359   blob_zero(pTo);
360   blob_append(pTo, blob_buffer(pFrom), blob_size(pFrom));
361 }
362 
363 /*
364 ** Append the second blob onto the end of the first blob and reset the
365 ** second blob.  If the first blob (pTo) is NULL, then the content
366 ** of the second blob is written to stdout or to CGI depending on if the
367 ** Fossil is running in terminal or CGI mode.
368 */
blob_append_xfer(Blob * pTo,Blob * pFrom)369 void blob_append_xfer(Blob *pTo, Blob *pFrom){
370   blob_append(pTo, blob_buffer(pFrom), blob_size(pFrom));
371   blob_reset(pFrom);
372 }
373 
374 /*
375 ** Write into pOut, a string literal representation for the first n bytes
376 ** of z[].  The string literal representation is compatible with C, TCL,
377 ** and JSON.  Double-quotes are added to both ends.  Double-quote and
378 ** backslash characters are escaped.
379 */
blob_append_tcl_literal(Blob * pOut,const char * z,int n)380 void blob_append_tcl_literal(Blob *pOut, const char *z, int n){
381   int i;
382   blob_append_char(pOut, '"');
383   for(i=0; i<n; i++){
384     char c = z[i];
385     switch( c ){
386       case '\r':  c = 'r';
387       case '[':
388       case ']':
389       case '$':
390       case '"':
391       case '\\':
392         blob_append_char(pOut, '\\');
393       default:
394         blob_append_char(pOut, c);
395     }
396   }
397   blob_append_char(pOut, '"');
398 }
blob_append_json_literal(Blob * pOut,const char * z,int n)399 void blob_append_json_literal(Blob *pOut, const char *z, int n){
400   int i;
401   blob_append_char(pOut, '"');
402   for(i=0; i<n; i++){
403     char c = z[i];
404     switch( c ){
405       case 0x00:
406       case 0x01:
407       case 0x02:
408       case 0x03:
409       case 0x04:
410       case 0x05:
411       case 0x06:
412       case 0x07: c += '0' - 0x00; blob_append(pOut, "\\u000",5); break;
413       case 0x0b:
414       case 0x0e:
415       case 0x0f: c += 'a' - 0x0a; blob_append(pOut, "\\u000",5); break;
416       case 0x10:
417       case 0x11:
418       case 0x12:
419       case 0x13:
420       case 0x14:
421       case 0x15:
422       case 0x16:
423       case 0x17:
424       case 0x18:
425       case 0x19: c += '0' - 0x10; blob_append(pOut, "\\u001",5); break;
426       case 0x1a:
427       case 0x1b:
428       case 0x1c:
429       case 0x1d:
430       case 0x1e:
431       case 0x1f: c += 'a' - 0x1a; blob_append(pOut, "\\u001",5); break;
432       case '\b': c = 'b';         blob_append_char(pOut, '\\');  break;
433       case '\t': c = 't';         blob_append_char(pOut, '\\');  break;
434       case '\r': c = 'r';         blob_append_char(pOut, '\\');  break;
435       case '\n': c = 'n';         blob_append_char(pOut, '\\');  break;
436       case '\f': c = 'f';         blob_append_char(pOut, '\\');  break;
437       case '"':                   blob_append_char(pOut, '\\');  break;
438       case '\\':                  blob_append_char(pOut, '\\');  break;
439       default:                                                   break;
440     }
441     blob_append_char(pOut, c);
442   }
443   blob_append_char(pOut, '"');
444 }
445 
446 
447 /*
448 ** Return a pointer to a null-terminated string for a blob.
449 */
blob_str(Blob * p)450 char *blob_str(Blob *p){
451   blob_is_init(p);
452   if( p->nUsed==0 ){
453     blob_append_char(p, 0); /* NOTE: Changes nUsed. */
454     p->nUsed = 0;
455   }
456   if( p->nUsed<p->nAlloc ){
457     p->aData[p->nUsed] = 0;
458   }else{
459     blob_materialize(p);
460   }
461   return p->aData;
462 }
463 
464 /*
465 ** Compute the string length of a Blob.  If there are embedded
466 ** nul characters, truncate the to blob at the first nul.
467 */
blob_strlen(Blob * p)468 int blob_strlen(Blob *p){
469   char *z = blob_str(p);
470   if( z==0 ) return 0;
471   p->nUsed = (int)strlen(p->aData);
472   return p->nUsed;
473 }
474 
475 /*
476 ** Return a pointer to a null-terminated string for a blob that has
477 ** been created using blob_append_sql() and not blob_appendf().  If
478 ** text was ever added using blob_appendf() then throw an error.
479 */
blob_sql_text(Blob * p)480 char *blob_sql_text(Blob *p){
481   blob_is_init(p);
482   if( (p->blobFlags & BLOBFLAG_NotSQL) ){
483     fossil_panic("use of blob_appendf() to construct SQL text");
484   }
485   return blob_str(p);
486 }
487 
488 
489 /*
490 ** Return a pointer to a null-terminated string for a blob.
491 **
492 ** WARNING:  If the blob is ephemeral, it might cause a '\000'
493 ** character to be inserted into the middle of the parent blob.
494 ** Example:  Suppose p is a token extracted from some larger
495 ** blob pBig using blob_token().  If you call this routine on p,
496 ** then a '\000' character will be inserted in the middle of
497 ** pBig in order to cause p to be nul-terminated.  If pBig
498 ** should not be modified, then use blob_str() instead of this
499 ** routine.  blob_str() will make a copy of the p if necessary
500 ** to avoid modifying pBig.
501 */
blob_terminate(Blob * p)502 char *blob_terminate(Blob *p){
503   blob_is_init(p);
504   if( p->nUsed==0 ) return "";
505   p->aData[p->nUsed] = 0;
506   return p->aData;
507 }
508 
509 /*
510 ** Compare two blobs.  Return negative, zero, or positive if the first
511 ** blob is less then, equal to, or greater than the second.
512 */
blob_compare(Blob * pA,Blob * pB)513 int blob_compare(Blob *pA, Blob *pB){
514   int szA, szB, sz, rc;
515   blob_is_init(pA);
516   blob_is_init(pB);
517   szA = blob_size(pA);
518   szB = blob_size(pB);
519   sz = szA<szB ? szA : szB;
520   rc = memcmp(blob_buffer(pA), blob_buffer(pB), sz);
521   if( rc==0 ){
522     rc = szA - szB;
523   }
524   return rc;
525 }
526 
527 /*
528 ** Compare two blobs in constant time and return zero if they are equal.
529 ** Constant time comparison only applies for blobs of the same length.
530 ** If lengths are different, immediately returns 1.
531 */
blob_constant_time_cmp(Blob * pA,Blob * pB)532 int blob_constant_time_cmp(Blob *pA, Blob *pB){
533   int szA, szB, i;
534   unsigned char *buf1, *buf2;
535   unsigned char rc = 0;
536 
537   blob_is_init(pA);
538   blob_is_init(pB);
539   szA = blob_size(pA);
540   szB = blob_size(pB);
541   if( szA!=szB || szA==0 ) return 1;
542 
543   buf1 = (unsigned char*)blob_buffer(pA);
544   buf2 = (unsigned char*)blob_buffer(pB);
545 
546   for( i=0; i<szA; i++ ){
547     rc = rc | (buf1[i] ^ buf2[i]);
548   }
549 
550   return rc;
551 }
552 
553 /*
554 ** Compare a blob to a string.  Return TRUE if they are equal.
555 */
blob_eq_str(Blob * pBlob,const char * z,int n)556 int blob_eq_str(Blob *pBlob, const char *z, int n){
557   Blob t;
558   blob_is_init(pBlob);
559   if( n<=0 ) n = (int)strlen(z);
560   t.aData = (char*)z;
561   t.nUsed = n;
562   t.xRealloc = blobReallocStatic;
563   return blob_compare(pBlob, &t)==0;
564 }
565 
566 /*
567 ** This macro compares a blob against a string constant.  We use the sizeof()
568 ** operator on the string constant twice, so it really does need to be a
569 ** string literal or character array - not a character pointer.
570 */
571 #if INTERFACE
572 # define blob_eq(B,S) \
573      ((B)->nUsed==sizeof(S"")-1 && memcmp((B)->aData,S,sizeof(S)-1)==0)
574 #endif
575 
576 
577 /*
578 ** Attempt to resize a blob so that its internal buffer is
579 ** nByte in size.  The blob is truncated if necessary.
580 */
blob_resize(Blob * pBlob,unsigned int newSize)581 void blob_resize(Blob *pBlob, unsigned int newSize){
582   pBlob->xRealloc(pBlob, newSize+1);
583   pBlob->nUsed = newSize;
584   pBlob->aData[newSize] = 0;
585 }
586 
587 /*
588 ** Ensures that the given blob has at least the given amount of memory
589 ** allocated to it. Does not modify pBlob->nUsed nor will it reduce
590 ** the currently-allocated amount of memory.
591 **
592 ** For semantic compatibility with blob_append_full(), if newSize is
593 ** >=0x7fff000 (~2GB) then this function will trigger blob_panic(). If
594 ** it didn't, it would be possible to bypass that hard-coded limit via
595 ** this function.
596 **
597 ** We've had at least one report:
598 **   https://fossil-scm.org/forum/forumpost/b7bbd28db4
599 ** which implies that this is unconditionally failing on mingw 32-bit
600 ** builds.
601 */
blob_reserve(Blob * pBlob,unsigned int newSize)602 void blob_reserve(Blob *pBlob, unsigned int newSize){
603   if(newSize>=0x7fff0000 ){
604     blob_panic();
605   }else if(newSize>pBlob->nUsed){
606     pBlob->xRealloc(pBlob, newSize);
607     pBlob->aData[newSize] = 0;
608   }
609 }
610 
611 /*
612 ** Make sure a blob is nul-terminated and is not a pointer to unmanaged
613 ** space.  Return a pointer to the data.
614 */
blob_materialize(Blob * pBlob)615 char *blob_materialize(Blob *pBlob){
616   blob_resize(pBlob, pBlob->nUsed);
617   return pBlob->aData;
618 }
619 
620 
621 /*
622 ** Call dehttpize on a blob.  This causes an ephemeral blob to be
623 ** materialized.
624 */
blob_dehttpize(Blob * pBlob)625 void blob_dehttpize(Blob *pBlob){
626   blob_materialize(pBlob);
627   pBlob->nUsed = dehttpize(pBlob->aData);
628 }
629 
630 /*
631 ** Extract N bytes from blob pFrom and use it to initialize blob pTo.
632 ** Return the actual number of bytes extracted.
633 **
634 ** After this call completes, pTo will be an ephemeral blob.
635 */
blob_extract(Blob * pFrom,int N,Blob * pTo)636 int blob_extract(Blob *pFrom, int N, Blob *pTo){
637   blob_is_init(pFrom);
638   assert_blob_is_reset(pTo);
639   if( pFrom->iCursor + N > pFrom->nUsed ){
640     N = pFrom->nUsed - pFrom->iCursor;
641     if( N<=0 ){
642       blob_zero(pTo);
643       return 0;
644     }
645   }
646   pTo->nUsed = N;
647   pTo->nAlloc = N;
648   pTo->aData = &pFrom->aData[pFrom->iCursor];
649   pTo->iCursor = 0;
650   pTo->xRealloc = blobReallocStatic;
651   pFrom->iCursor += N;
652   return N;
653 }
654 
655 /*
656 ** Rewind the cursor on a blob back to the beginning.
657 */
blob_rewind(Blob * p)658 void blob_rewind(Blob *p){
659   p->iCursor = 0;
660 }
661 
662 /*
663 ** Truncate a blob back to zero length
664 */
blob_truncate(Blob * p,int sz)665 void blob_truncate(Blob *p, int sz){
666   if( sz>=0 && sz<p->nUsed ) p->nUsed = sz;
667 }
668 
669 /*
670 ** Seek the cursor in a blob to the indicated offset.
671 */
blob_seek(Blob * p,int offset,int whence)672 int blob_seek(Blob *p, int offset, int whence){
673   if( whence==BLOB_SEEK_SET ){
674     p->iCursor = offset;
675   }else if( whence==BLOB_SEEK_CUR ){
676     p->iCursor += offset;
677   }
678   if( p->iCursor>p->nUsed ){
679     p->iCursor = p->nUsed;
680   }
681   return p->iCursor;
682 }
683 
684 /*
685 ** Return the current offset into the blob
686 */
blob_tell(Blob * p)687 int blob_tell(Blob *p){
688   return p->iCursor;
689 }
690 
691 /*
692 ** Extract a single line of text from pFrom beginning at the current
693 ** cursor location and use that line of text to initialize pTo.
694 ** pTo will include the terminating \n.  Return the number of bytes
695 ** in the line including the \n at the end.  0 is returned at
696 ** end-of-file.
697 **
698 ** The cursor of pFrom is left pointing at the first byte past the
699 ** \n that terminated the line.
700 **
701 ** pTo will be an ephermeral blob.  If pFrom changes, it might alter
702 ** pTo as well.
703 */
blob_line(Blob * pFrom,Blob * pTo)704 int blob_line(Blob *pFrom, Blob *pTo){
705   char *aData = pFrom->aData;
706   int n = pFrom->nUsed;
707   int i = pFrom->iCursor;
708 
709   while( i<n && aData[i]!='\n' ){ i++; }
710   if( i<n ){
711     assert( aData[i]=='\n' );
712     i++;
713   }
714   blob_extract(pFrom, i-pFrom->iCursor, pTo);
715   return pTo->nUsed;
716 }
717 
718 /*
719 ** Trim whitespace off of the end of a blob.  Return the number
720 ** of characters remaining.
721 **
722 ** All this does is reduce the length counter.  This routine does
723 ** not insert a new zero terminator.
724 */
blob_trim(Blob * p)725 int blob_trim(Blob *p){
726   char *z = p->aData;
727   int n = p->nUsed;
728   while( n>0 && fossil_isspace(z[n-1]) ){ n--; }
729   p->nUsed = n;
730   return n;
731 }
732 
733 /*
734 ** Extract a single token from pFrom and use it to initialize pTo.
735 ** Return the number of bytes in the token.  If no token is found,
736 ** return 0.
737 **
738 ** A token consists of one or more non-space characters.  Leading
739 ** whitespace is ignored.
740 **
741 ** The cursor of pFrom is left pointing at the first character past
742 ** the end of the token.
743 **
744 ** pTo will be an ephermeral blob.  If pFrom changes, it might alter
745 ** pTo as well.
746 */
blob_token(Blob * pFrom,Blob * pTo)747 int blob_token(Blob *pFrom, Blob *pTo){
748   char *aData = pFrom->aData;
749   int n = pFrom->nUsed;
750   int i = pFrom->iCursor;
751   while( i<n && fossil_isspace(aData[i]) ){ i++; }
752   pFrom->iCursor = i;
753   while( i<n && !fossil_isspace(aData[i]) ){ i++; }
754   blob_extract(pFrom, i-pFrom->iCursor, pTo);
755   while( i<n && fossil_isspace(aData[i]) ){ i++; }
756   pFrom->iCursor = i;
757   return pTo->nUsed;
758 }
759 
760 /*
761 ** Extract a single SQL token from pFrom and use it to initialize pTo.
762 ** Return the number of bytes in the token.  If no token is found,
763 ** return 0.
764 **
765 ** An SQL token consists of one or more non-space characters.  If the
766 ** first character is ' then the token is terminated by a matching '
767 ** (ignoring double '') or by the end of the string
768 **
769 ** The cursor of pFrom is left pointing at the first character past
770 ** the end of the token.
771 **
772 ** pTo will be an ephermeral blob.  If pFrom changes, it might alter
773 ** pTo as well.
774 */
blob_sqltoken(Blob * pFrom,Blob * pTo)775 int blob_sqltoken(Blob *pFrom, Blob *pTo){
776   char *aData = pFrom->aData;
777   int n = pFrom->nUsed;
778   int i = pFrom->iCursor;
779   while( i<n && fossil_isspace(aData[i]) ){ i++; }
780   pFrom->iCursor = i;
781   if( aData[i]=='\'' ){
782     i++;
783     while( i<n ){
784       if( aData[i]=='\'' ){
785         if( aData[++i]!='\'' ) break;
786       }
787       i++;
788     }
789   }else{
790     while( i<n && !fossil_isspace(aData[i]) ){ i++; }
791   }
792   blob_extract(pFrom, i-pFrom->iCursor, pTo);
793   while( i<n && fossil_isspace(aData[i]) ){ i++; }
794   pFrom->iCursor = i;
795   return pTo->nUsed;
796 }
797 
798 /*
799 ** Extract everything from the current cursor to the end of the blob
800 ** into a new blob.  The new blob is an ephemerial reference to the
801 ** original blob.  The cursor of the original blob is unchanged.
802 */
blob_tail(Blob * pFrom,Blob * pTo)803 int blob_tail(Blob *pFrom, Blob *pTo){
804   int iCursor = pFrom->iCursor;
805   blob_extract(pFrom, pFrom->nUsed-pFrom->iCursor, pTo);
806   pFrom->iCursor = iCursor;
807   return pTo->nUsed;
808 }
809 
810 /*
811 ** Copy N lines of text from pFrom into pTo.  The copy begins at the
812 ** current cursor position of pIn.  The pIn cursor is left pointing
813 ** at the first character past the last \n copied.
814 **
815 ** If pTo==NULL then this routine simply skips over N lines.
816 */
blob_copy_lines(Blob * pTo,Blob * pFrom,int N)817 void blob_copy_lines(Blob *pTo, Blob *pFrom, int N){
818   char *z = pFrom->aData;
819   int i = pFrom->iCursor;
820   int n = pFrom->nUsed;
821   int cnt = 0;
822 
823   if( N==0 ) return;
824   while( i<n ){
825     if( z[i]=='\n' ){
826       cnt++;
827       if( cnt==N ){
828         i++;
829         break;
830       }
831     }
832     i++;
833   }
834   if( pTo ){
835     blob_append(pTo, &pFrom->aData[pFrom->iCursor], i - pFrom->iCursor);
836   }
837   pFrom->iCursor = i;
838 }
839 
840 /*
841 ** Ensure that the text in pBlob ends with '\n'
842 */
blob_add_final_newline(Blob * pBlob)843 void blob_add_final_newline(Blob *pBlob){
844   if( pBlob->nUsed<=0 ) return;
845   if( pBlob->aData[pBlob->nUsed-1]!='\n' ){
846     blob_append_char(pBlob, '\n');
847   }
848 }
849 
850 /*
851 ** Return true if the blob contains a valid base16 identifier artifact hash.
852 **
853 ** The value returned is actually one of HNAME_SHA1 OR HNAME_K256 if the
854 ** hash is valid.  Both of these are non-zero and therefore "true".
855 ** If the hash is not valid, then HNAME_ERROR is returned, which is zero or
856 ** false.
857 */
blob_is_hname(Blob * pBlob)858 int blob_is_hname(Blob *pBlob){
859   return hname_validate(blob_buffer(pBlob), blob_size(pBlob));
860 }
861 
862 /*
863 ** Return true if the blob contains a valid filename
864 */
blob_is_filename(Blob * pBlob)865 int blob_is_filename(Blob *pBlob){
866   return file_is_simple_pathname(blob_str(pBlob), 1);
867 }
868 
869 /*
870 ** Return true if the blob contains a valid 32-bit integer.  Store
871 ** the integer value in *pValue.
872 */
blob_is_int(Blob * pBlob,int * pValue)873 int blob_is_int(Blob *pBlob, int *pValue){
874   const char *z = blob_buffer(pBlob);
875   int i, n, c, v;
876   n = blob_size(pBlob);
877   v = 0;
878   for(i=0; i<n && (c = z[i])!=0 && c>='0' && c<='9'; i++){
879     v = v*10 + c - '0';
880   }
881   if( i==n ){
882     *pValue = v;
883     return 1;
884   }else{
885     return 0;
886   }
887 }
888 
889 /*
890 ** Return true if the blob contains a valid 64-bit integer.  Store
891 ** the integer value in *pValue.
892 */
blob_is_int64(Blob * pBlob,sqlite3_int64 * pValue)893 int blob_is_int64(Blob *pBlob, sqlite3_int64 *pValue){
894   const char *z = blob_buffer(pBlob);
895   int i, n, c;
896   sqlite3_int64 v;
897   n = blob_size(pBlob);
898   v = 0;
899   for(i=0; i<n && (c = z[i])!=0 && c>='0' && c<='9'; i++){
900     v = v*10 + c - '0';
901   }
902   if( i==n ){
903     *pValue = v;
904     return 1;
905   }else{
906     return 0;
907   }
908 }
909 
910 /*
911 ** Zero or reset an array of Blobs.
912 */
blobarray_zero(Blob * aBlob,int n)913 void blobarray_zero(Blob *aBlob, int n){
914   int i;
915   for(i=0; i<n; i++) blob_zero(&aBlob[i]);
916 }
blobarray_reset(Blob * aBlob,int n)917 void blobarray_reset(Blob *aBlob, int n){
918   int i;
919   for(i=0; i<n; i++) blob_reset(&aBlob[i]);
920 }
921 
922 /*
923 ** Parse a blob into space-separated tokens.  Store each token in
924 ** an element of the blobarray aToken[].  aToken[] is nToken elements in
925 ** size.  Return the number of tokens seen.
926 */
blob_tokenize(Blob * pIn,Blob * aToken,int nToken)927 int blob_tokenize(Blob *pIn, Blob *aToken, int nToken){
928   int i;
929   for(i=0; i<nToken && blob_token(pIn, &aToken[i]); i++){}
930   return i;
931 }
932 
933 /*
934 ** Do printf-style string rendering and append the results to a blob.  Or
935 ** if pBlob==0, do printf-style string rendering directly to stdout.
936 **
937 ** The blob_appendf() version sets the BLOBFLAG_NotSQL bit in Blob.blobFlags
938 ** whereas blob_append_sql() does not.
939 */
blob_appendf(Blob * pBlob,const char * zFormat,...)940 void blob_appendf(Blob *pBlob, const char *zFormat, ...){
941   va_list ap;
942   va_start(ap, zFormat);
943   vxprintf(pBlob, zFormat, ap);
944   va_end(ap);
945   if( pBlob ) pBlob->blobFlags |= BLOBFLAG_NotSQL;
946 }
blob_append_sql(Blob * pBlob,const char * zFormat,...)947 void blob_append_sql(Blob *pBlob, const char *zFormat, ...){
948   va_list ap;
949   va_start(ap, zFormat);
950   vxprintf(pBlob, zFormat, ap);
951   va_end(ap);
952 }
blob_vappendf(Blob * pBlob,const char * zFormat,va_list ap)953 void blob_vappendf(Blob *pBlob, const char *zFormat, va_list ap){
954   vxprintf(pBlob, zFormat, ap);
955 }
956 
957 /*
958 ** Initialize a blob to the data on an input channel.  Return
959 ** the number of bytes read into the blob.  Any prior content
960 ** of the blob is discarded, not freed.
961 */
blob_read_from_channel(Blob * pBlob,FILE * in,int nToRead)962 int blob_read_from_channel(Blob *pBlob, FILE *in, int nToRead){
963   size_t n;
964   blob_zero(pBlob);
965   if( nToRead<0 ){
966     char zBuf[10000];
967     while( !feof(in) ){
968       n = fread(zBuf, 1, sizeof(zBuf), in);
969       if( n>0 ){
970         blob_append(pBlob, zBuf, n);
971       }
972     }
973   }else{
974     blob_resize(pBlob, nToRead);
975     n = fread(blob_buffer(pBlob), 1, nToRead, in);
976     blob_resize(pBlob, n);
977   }
978   return blob_size(pBlob);
979 }
980 
981 /*
982 ** Initialize a blob to be the content of a file.  If the filename
983 ** is blank or "-" then read from standard input.
984 **
985 ** If zFilename is a symbolic link, behavior depends on the eFType
986 ** parameter:
987 **
988 **    *  If eFType is ExtFILE or allow-symlinks is OFF, then the
989 **       pBlob is initialized to the *content* of the object to which
990 **       the zFilename symlink points.
991 **
992 **    *  If eFType is RepoFILE and allow-symlinks is ON, then the
993 **       pBlob is initialized to the *name* of the object to which
994 **       the zFilename symlink points.
995 **
996 ** Any prior content of the blob is discarded, not freed.
997 **
998 ** Return the number of bytes read. Calls fossil_fatal() on error (i.e.
999 ** it exit()s and does not return).
1000 */
blob_read_from_file(Blob * pBlob,const char * zFilename,int eFType)1001 sqlite3_int64 blob_read_from_file(
1002   Blob *pBlob,               /* The blob to be initialized */
1003   const char *zFilename,     /* Extract content from this file */
1004   int eFType                 /* ExtFILE or RepoFILE - see above */
1005 ){
1006   sqlite3_int64 size, got;
1007   FILE *in;
1008   if( zFilename==0 || zFilename[0]==0
1009         || (zFilename[0]=='-' && zFilename[1]==0) ){
1010     return blob_read_from_channel(pBlob, stdin, -1);
1011   }
1012   if( file_islink(zFilename) ){
1013     return blob_read_link(pBlob, zFilename);
1014   }
1015   size = file_size(zFilename, eFType);
1016   blob_zero(pBlob);
1017   if( size<0 ){
1018     fossil_fatal("no such file: %s", zFilename);
1019   }
1020   if( size==0 ){
1021     return 0;
1022   }
1023   blob_resize(pBlob, size);
1024   in = fossil_fopen(zFilename, "rb");
1025   if( in==0 ){
1026     fossil_fatal("cannot open %s for reading", zFilename);
1027   }
1028   got = fread(blob_buffer(pBlob), 1, size, in);
1029   fclose(in);
1030   if( got<size ){
1031     blob_resize(pBlob, got);
1032   }
1033   return got;
1034 }
1035 
1036 /*
1037 ** Reads symlink destination path and puts int into blob.
1038 ** Any prior content of the blob is discarded, not freed.
1039 **
1040 ** Returns length of destination path.
1041 **
1042 ** On windows, zeros blob and returns 0.
1043 */
blob_read_link(Blob * pBlob,const char * zFilename)1044 int blob_read_link(Blob *pBlob, const char *zFilename){
1045 #if !defined(_WIN32)
1046   char zBuf[1024];
1047   ssize_t len = readlink(zFilename, zBuf, 1023);
1048   if( len < 0 ){
1049     fossil_fatal("cannot read symbolic link %s", zFilename);
1050   }
1051   zBuf[len] = 0;   /* null-terminate */
1052   blob_zero(pBlob);
1053   blob_appendf(pBlob, "%s", zBuf);
1054   return len;
1055 #else
1056   blob_zero(pBlob);
1057   return 0;
1058 #endif
1059 }
1060 
1061 /*
1062 ** Write the content of a blob into a file.
1063 **
1064 ** If the filename is blank or "-" then write to standard output.
1065 **
1066 ** This routine always assumes ExtFILE.  If zFilename is a symbolic link
1067 ** then the content is written into the object that symbolic link points
1068 ** to, not into the symbolic link itself.  This is true regardless of
1069 ** the allow-symlinks setting.
1070 **
1071 ** Return the number of bytes written.
1072 */
blob_write_to_file(Blob * pBlob,const char * zFilename)1073 int blob_write_to_file(Blob *pBlob, const char *zFilename){
1074   FILE *out;
1075   int nWrote;
1076 
1077   if( zFilename[0]==0 || (zFilename[0]=='-' && zFilename[1]==0) ){
1078     blob_is_init(pBlob);
1079 #if defined(_WIN32)
1080     nWrote = fossil_utf8_to_console(blob_buffer(pBlob), blob_size(pBlob), 0);
1081     if( nWrote>=0 ) return nWrote;
1082     fflush(stdout);
1083     _setmode(_fileno(stdout), _O_BINARY);
1084 #endif
1085     nWrote = fwrite(blob_buffer(pBlob), 1, blob_size(pBlob), stdout);
1086 #if defined(_WIN32)
1087     fflush(stdout);
1088     _setmode(_fileno(stdout), _O_TEXT);
1089 #endif
1090   }else{
1091     file_mkfolder(zFilename, ExtFILE, 1, 0);
1092     out = fossil_fopen(zFilename, "wb");
1093     if( out==0 ){
1094 #if defined(_WIN32)
1095       const char *zReserved = file_is_win_reserved(zFilename);
1096       if( zReserved ){
1097         fossil_fatal("cannot open \"%s\" because \"%s\" is "
1098              "a reserved name on Windows", zFilename, zReserved);
1099       }
1100 #endif
1101       fossil_fatal_recursive("unable to open file \"%s\" for writing",
1102                              zFilename);
1103       return 0;
1104     }
1105     blob_is_init(pBlob);
1106     nWrote = fwrite(blob_buffer(pBlob), 1, blob_size(pBlob), out);
1107     fclose(out);
1108     if( nWrote!=blob_size(pBlob) ){
1109       fossil_fatal_recursive("short write: %d of %d bytes to %s", nWrote,
1110          blob_size(pBlob), zFilename);
1111     }
1112   }
1113   return nWrote;
1114 }
1115 
1116 /*
1117 ** Compress a blob pIn.  Store the result in pOut.  It is ok for pIn and
1118 ** pOut to be the same blob.
1119 **
1120 ** pOut must either be the same as pIn or else uninitialized.
1121 */
blob_compress(Blob * pIn,Blob * pOut)1122 void blob_compress(Blob *pIn, Blob *pOut){
1123   unsigned int nIn = blob_size(pIn);
1124   unsigned int nOut = 13 + nIn + (nIn+999)/1000;
1125   unsigned long int nOut2;
1126   unsigned char *outBuf;
1127   Blob temp;
1128   blob_zero(&temp);
1129   blob_resize(&temp, nOut+4);
1130   outBuf = (unsigned char*)blob_buffer(&temp);
1131   outBuf[0] = nIn>>24 & 0xff;
1132   outBuf[1] = nIn>>16 & 0xff;
1133   outBuf[2] = nIn>>8 & 0xff;
1134   outBuf[3] = nIn & 0xff;
1135   nOut2 = (long int)nOut;
1136   compress(&outBuf[4], &nOut2,
1137            (unsigned char*)blob_buffer(pIn), blob_size(pIn));
1138   if( pOut==pIn ) blob_reset(pOut);
1139   assert_blob_is_reset(pOut);
1140   *pOut = temp;
1141   blob_resize(pOut, nOut2+4);
1142 }
1143 
1144 /*
1145 ** COMMAND: test-compress
1146 **
1147 ** Usage: %fossil test-compress INPUTFILE OUTPUTFILE
1148 **
1149 ** Run compression on INPUTFILE and write the result into OUTPUTFILE.
1150 **
1151 ** This is used to test and debug the blob_compress() routine.
1152 */
compress_cmd(void)1153 void compress_cmd(void){
1154   Blob f;
1155   if( g.argc!=4 ) usage("INPUTFILE OUTPUTFILE");
1156   blob_read_from_file(&f, g.argv[2], ExtFILE);
1157   blob_compress(&f, &f);
1158   blob_write_to_file(&f, g.argv[3]);
1159 }
1160 
1161 /*
1162 ** Compress the concatenation of a blobs pIn1 and pIn2.  Store the result
1163 ** in pOut.
1164 **
1165 ** pOut must be either uninitialized or must be the same as either pIn1 or
1166 ** pIn2.
1167 */
blob_compress2(Blob * pIn1,Blob * pIn2,Blob * pOut)1168 void blob_compress2(Blob *pIn1, Blob *pIn2, Blob *pOut){
1169   unsigned int nIn = blob_size(pIn1) + blob_size(pIn2);
1170   unsigned int nOut = 13 + nIn + (nIn+999)/1000;
1171   unsigned char *outBuf;
1172   z_stream stream;
1173   Blob temp;
1174   blob_zero(&temp);
1175   blob_resize(&temp, nOut+4);
1176   outBuf = (unsigned char*)blob_buffer(&temp);
1177   outBuf[0] = nIn>>24 & 0xff;
1178   outBuf[1] = nIn>>16 & 0xff;
1179   outBuf[2] = nIn>>8 & 0xff;
1180   outBuf[3] = nIn & 0xff;
1181   stream.zalloc = (alloc_func)0;
1182   stream.zfree = (free_func)0;
1183   stream.opaque = 0;
1184   stream.avail_out = nOut;
1185   stream.next_out = &outBuf[4];
1186   deflateInit(&stream, 9);
1187   stream.avail_in = blob_size(pIn1);
1188   stream.next_in = (unsigned char*)blob_buffer(pIn1);
1189   deflate(&stream, 0);
1190   stream.avail_in = blob_size(pIn2);
1191   stream.next_in = (unsigned char*)blob_buffer(pIn2);
1192   deflate(&stream, 0);
1193   deflate(&stream, Z_FINISH);
1194   blob_resize(&temp, stream.total_out + 4);
1195   deflateEnd(&stream);
1196   if( pOut==pIn1 ) blob_reset(pOut);
1197   if( pOut==pIn2 ) blob_reset(pOut);
1198   assert_blob_is_reset(pOut);
1199   *pOut = temp;
1200 }
1201 
1202 /*
1203 ** COMMAND: test-compress-2
1204 **
1205 ** Usage: %fossil test-compress-2 IN1 IN2 OUT
1206 **
1207 ** Read files IN1 and IN2, concatenate the content, compress the
1208 ** content, then write results into OUT.
1209 **
1210 ** This is used to test and debug the blob_compress2() routine.
1211 */
compress2_cmd(void)1212 void compress2_cmd(void){
1213   Blob f1, f2;
1214   if( g.argc!=5 ) usage("INPUTFILE1 INPUTFILE2 OUTPUTFILE");
1215   blob_read_from_file(&f1, g.argv[2], ExtFILE);
1216   blob_read_from_file(&f2, g.argv[3], ExtFILE);
1217   blob_compress2(&f1, &f2, &f1);
1218   blob_write_to_file(&f1, g.argv[4]);
1219 }
1220 
1221 /*
1222 ** Uncompress blob pIn and store the result in pOut.  It is ok for pIn and
1223 ** pOut to be the same blob.
1224 **
1225 ** pOut must be either uninitialized or the same as pIn.
1226 */
blob_uncompress(Blob * pIn,Blob * pOut)1227 int blob_uncompress(Blob *pIn, Blob *pOut){
1228   unsigned int nOut;
1229   unsigned char *inBuf;
1230   unsigned int nIn = blob_size(pIn);
1231   Blob temp;
1232   int rc;
1233   unsigned long int nOut2;
1234   if( nIn<=4 ){
1235     return 0;
1236   }
1237   inBuf = (unsigned char*)blob_buffer(pIn);
1238   nOut = (inBuf[0]<<24) + (inBuf[1]<<16) + (inBuf[2]<<8) + inBuf[3];
1239   blob_zero(&temp);
1240   blob_resize(&temp, nOut+1);
1241   nOut2 = (long int)nOut;
1242   rc = uncompress((unsigned char*)blob_buffer(&temp), &nOut2,
1243                   &inBuf[4], nIn - 4);
1244   if( rc!=Z_OK ){
1245     blob_reset(&temp);
1246     return 1;
1247   }
1248   blob_resize(&temp, nOut2);
1249   if( pOut==pIn ) blob_reset(pOut);
1250   assert_blob_is_reset(pOut);
1251   *pOut = temp;
1252   return 0;
1253 }
1254 
1255 /*
1256 ** COMMAND: test-uncompress
1257 **
1258 ** Usage: %fossil test-uncompress IN OUT
1259 **
1260 ** Read the content of file IN, uncompress that content, and write the
1261 ** result into OUT.  This command is intended for testing of the
1262 ** blob_compress() function.
1263 */
uncompress_cmd(void)1264 void uncompress_cmd(void){
1265   Blob f;
1266   if( g.argc!=4 ) usage("INPUTFILE OUTPUTFILE");
1267   blob_read_from_file(&f, g.argv[2], ExtFILE);
1268   blob_uncompress(&f, &f);
1269   blob_write_to_file(&f, g.argv[3]);
1270 }
1271 
1272 /*
1273 ** COMMAND: test-cycle-compress
1274 **
1275 ** Compress and uncompress each file named on the command line.
1276 ** Verify that the original content is recovered.
1277 */
test_cycle_compress(void)1278 void test_cycle_compress(void){
1279   int i;
1280   Blob b1, b2, b3;
1281   for(i=2; i<g.argc; i++){
1282     blob_read_from_file(&b1, g.argv[i], ExtFILE);
1283     blob_compress(&b1, &b2);
1284     blob_uncompress(&b2, &b3);
1285     if( blob_compare(&b1, &b3) ){
1286       fossil_fatal("compress/uncompress cycle failed for %s", g.argv[i]);
1287     }
1288     blob_reset(&b1);
1289     blob_reset(&b2);
1290     blob_reset(&b3);
1291   }
1292   fossil_print("ok\n");
1293 }
1294 
1295 /*
1296 ** Convert every \n character in the given blob into \r\n.
1297 */
blob_add_cr(Blob * p)1298 void blob_add_cr(Blob *p){
1299   char *z = p->aData;
1300   int j   = p->nUsed;
1301   int i, n;
1302   for(i=n=0; i<j; i++){
1303     if( z[i]=='\n' ) n++;
1304   }
1305   j += n;
1306   if( j>=p->nAlloc ){
1307     blob_resize(p, j);
1308     z = p->aData;
1309   }
1310   p->nUsed = j;
1311   z[j] = 0;
1312   while( j>i ){
1313     if( (z[--j] = z[--i]) =='\n' ){
1314       z[--j] = '\r';
1315     }
1316   }
1317 }
1318 
1319 /*
1320 ** Remove every \r character from the given blob, replacing each one with
1321 ** a \n character if it was not already part of a \r\n pair.
1322 */
blob_to_lf_only(Blob * p)1323 void blob_to_lf_only(Blob *p){
1324   int i, j;
1325   char *z = blob_materialize(p);
1326   for(i=j=0; z[i]; i++){
1327     if( z[i]!='\r' ) z[j++] = z[i];
1328     else if( z[i+1]!='\n' ) z[j++] = '\n';
1329   }
1330   z[j] = 0;
1331   p->nUsed = j;
1332 }
1333 
1334 /*
1335 ** Convert blob from cp1252 to UTF-8. As cp1252 is a superset
1336 ** of iso8859-1, this is useful on UNIX as well.
1337 **
1338 ** This table contains the character translations for 0x80..0xA0.
1339 */
1340 
1341 static const unsigned short cp1252[32] = {
1342   0x20ac,   0x81, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1343   0x02C6, 0x2030, 0x0160, 0x2039, 0x0152,   0x8D, 0x017D,   0x8F,
1344     0x90, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1345    0x2DC, 0x2122, 0x0161, 0x203A, 0x0153,   0x9D, 0x017E, 0x0178
1346 };
1347 
blob_cp1252_to_utf8(Blob * p)1348 void blob_cp1252_to_utf8(Blob *p){
1349   unsigned char *z = (unsigned char *)p->aData;
1350   int j   = p->nUsed;
1351   int i, n;
1352   for(i=n=0; i<j; i++){
1353     if( z[i]>=0x80 ){
1354       if( (z[i]<0xa0) && (cp1252[z[i]&0x1f]>=0x800) ){
1355         n++;
1356       }
1357       n++;
1358     }
1359   }
1360   j += n;
1361   if( j>=p->nAlloc ){
1362     blob_resize(p, j);
1363     z = (unsigned char *)p->aData;
1364   }
1365   p->nUsed = j;
1366   z[j] = 0;
1367   while( j>i ){
1368     if( z[--i]>=0x80 ){
1369       if( z[i]<0xa0 ){
1370         unsigned short sym = cp1252[z[i]&0x1f];
1371         if( sym>=0x800 ){
1372           z[--j] = 0x80 | (sym&0x3f);
1373           z[--j] = 0x80 | ((sym>>6)&0x3f);
1374           z[--j] = 0xe0 | (sym>>12);
1375         }else{
1376           z[--j] = 0x80 | (sym&0x3f);
1377           z[--j] = 0xc0 | (sym>>6);
1378         }
1379       }else{
1380         z[--j] = 0x80 | (z[i]&0x3f);
1381         z[--j] = 0xC0 | (z[i]>>6);
1382       }
1383     }else{
1384       z[--j] = z[i];
1385     }
1386   }
1387 }
1388 
1389 /*
1390 ** ASCII (for reference):
1391 **    x0  x1  x2  x3  x4  x5  x6  x7  x8  x9  xa  xb  xc  xd  xe  xf
1392 ** 0x ^`  ^a  ^b  ^c  ^d  ^e  ^f  ^g  \b  \t  \n  ()  \f  \r  ^n  ^o
1393 ** 1x ^p  ^q  ^r  ^s  ^t  ^u  ^v  ^w  ^x  ^y  ^z  ^{  ^|  ^}  ^~  ^
1394 ** 2x ()  !   "   #   $   %   &   '   (   )   *   +   ,   -   .   /
1395 ** 3x 0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ?
1396 ** 4x @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O
1397 ** 5x P   Q   R   S   T   U   V   W   X   Y   Z   [   \   ]   ^   _
1398 ** 6x `   a   b   c   d   e   f   g   h   i   j   k   l   m   n   o
1399 ** 7x p   q   r   s   t   u   v   w   x   y   z   {   |   }   ~   ^_
1400 */
1401 
1402 /*
1403 ** Meanings for bytes in a filename:
1404 **
1405 **    0      Ordinary character.  No encoding required
1406 **    1      Needs to be escaped
1407 **    2      Illegal character.  Do not allow in a filename
1408 **    3      First byte of a 2-byte UTF-8
1409 **    4      First byte of a 3-byte UTF-8
1410 **    5      First byte of a 4-byte UTF-8
1411 */
1412 static const char aSafeChar[256] = {
1413 #ifdef _WIN32
1414 /* Windows
1415 ** Prohibit:  all control characters, including tab, \r and \n
1416 ** Escape:    (space) " # $ % & ' ( ) * ; < > ? [ ] ^ ` { | }
1417 */
1418 /*  x0  x1  x2  x3  x4  x5  x6  x7  x8  x9  xa  xb  xc  xd  xe  xf  */
1419      2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, /* 0x */
1420      2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, /* 1x */
1421      1,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  0, /* 2x */
1422      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  1,  1, /* 3x */
1423      1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 4x */
1424      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  1,  1,  0, /* 5x */
1425      1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 6x */
1426      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  0,  1, /* 7x */
1427 #else
1428 /* Unix
1429 ** Prohibit:  all control characters, including tab, \r and \n
1430 ** Escape:    (space) ! " # $ % & ' ( ) * ; < > ? [ \ ] ^ ` { | }
1431 */
1432 /*  x0  x1  x2  x3  x4  x5  x6  x7  x8  x9  xa  xb  xc  xd  xe  xf  */
1433      2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, /* 0x */
1434      2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, /* 1x */
1435      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  0, /* 2x */
1436      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  1,  1, /* 3x */
1437      1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 4x */
1438      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  0, /* 5x */
1439      1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 6x */
1440      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  0,  1, /* 7x */
1441 #endif
1442     /* all bytes 0x80 through 0xbf are unescaped, being secondary
1443     ** bytes to UTF8 characters.  Bytes 0xc0 through 0xff are the
1444     ** first byte of a UTF8 character and do get escaped */
1445      2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, /* 8x */
1446      2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, /* 9x */
1447      2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, /* ax */
1448      2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, /* bx */
1449      3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3, /* cx */
1450      3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3, /* dx */
1451      4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, /* ex */
1452      5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5  /* fx */
1453 };
1454 
1455 /*
1456 ** pBlob is a shell command under construction.  This routine safely
1457 ** appends filename argument zIn.
1458 **
1459 ** The argument is escaped if it contains white space or other characters
1460 ** that need to be escaped for the shell.  If zIn contains characters
1461 ** that cannot be safely escaped, then throw a fatal error.
1462 **
1463 ** If the isFilename argument is true, then the argument is expected
1464 ** to be a filename.  As shell commands commonly have command-line
1465 ** options that begin with "-" and since we do not want an attacker
1466 ** to be able to invoke these switches using filenames that begin
1467 ** with "-", if zIn begins with "-", prepend an additional "./"
1468 ** (or ".\\" on Windows).
1469 */
blob_append_escaped_arg(Blob * pBlob,const char * zIn,int isFilename)1470 void blob_append_escaped_arg(Blob *pBlob, const char *zIn, int isFilename){
1471   int i;
1472   unsigned char c;
1473   int needEscape = 0;
1474   int n = blob_size(pBlob);
1475   char *z = blob_buffer(pBlob);
1476 
1477   /* Look for illegal byte-sequences and byte-sequences that require
1478   ** escaping.  No control-characters are allowed.  All spaces and
1479   ** non-ASCII unicode characters and some punctuation characters require
1480   ** escaping. */
1481   for(i=0; (c = (unsigned char)zIn[i])!=0; i++){
1482     if( aSafeChar[c] ){
1483       unsigned char x = aSafeChar[c];
1484       needEscape = 1;
1485       if( x==2 ){
1486         Blob bad;
1487         blob_token(pBlob, &bad);
1488         fossil_fatal("the [%s] argument to the \"%s\" command contains "
1489                      "a character (ascii 0x%02x) that is not allowed in "
1490                      "filename arguments",
1491                      zIn, blob_str(&bad), c);
1492       }else if( x>2 ){
1493         if( (zIn[i+1]&0xc0)!=0x80
1494          || (x>=4 && (zIn[i+2]&0xc0)!=0x80)
1495          || (x==5 && (zIn[i+3]&0xc0)!=0x80)
1496         ){
1497           Blob bad;
1498           blob_token(pBlob, &bad);
1499           fossil_fatal("the [%s] argument to the \"%s\" command contains "
1500                        "an illegal UTF-8 character",
1501                        zIn, blob_str(&bad));
1502         }
1503         i += x-2;
1504       }
1505     }
1506   }
1507 
1508   /* Separate from the previous argument by a space */
1509   if( n>0 && !fossil_isspace(z[n-1]) ){
1510     blob_append_char(pBlob, ' ');
1511   }
1512 
1513   /* Check for characters that need quoting */
1514   if( !needEscape ){
1515     if( isFilename && zIn[0]=='-' ){
1516       blob_append_char(pBlob, '.');
1517 #if defined(_WIN32)
1518       blob_append_char(pBlob, '\\');
1519 #else
1520       blob_append_char(pBlob, '/');
1521 #endif
1522     }
1523     blob_append(pBlob, zIn, -1);
1524   }else{
1525 #if defined(_WIN32)
1526     /* Quoting strategy for windows:
1527     ** Put the entire name inside of "...".  Any " characters within
1528     ** the name get doubled.
1529     */
1530     blob_append_char(pBlob, '"');
1531     if( isFilename && zIn[0]=='-' ){
1532       blob_append_char(pBlob, '.');
1533       blob_append_char(pBlob, '\\');
1534     }else if( zIn[0]=='/' ){
1535       blob_append_char(pBlob, '.');
1536     }
1537     for(i=0; (c = (unsigned char)zIn[i])!=0; i++){
1538       blob_append_char(pBlob, (char)c);
1539       if( c=='"' ) blob_append_char(pBlob, '"');
1540     }
1541     blob_append_char(pBlob, '"');
1542 #else
1543     /* Quoting strategy for unix:
1544     ** If the name does not contain ', then surround the whole thing
1545     ** with '...'.   If there is one or more ' characters within the
1546     ** name, then put \ before each special character.
1547     */
1548     if( strchr(zIn,'\'') ){
1549       if( isFilename && zIn[0]=='-' ){
1550         blob_append_char(pBlob, '.');
1551         blob_append_char(pBlob, '/');
1552       }
1553       for(i=0; (c = (unsigned char)zIn[i])!=0; i++){
1554         if( aSafeChar[c] && aSafeChar[c]!=2 ) blob_append_char(pBlob, '\\');
1555         blob_append_char(pBlob, (char)c);
1556       }
1557     }else{
1558       blob_append_char(pBlob, '\'');
1559       if( isFilename && zIn[0]=='-' ){
1560         blob_append_char(pBlob, '.');
1561         blob_append_char(pBlob, '/');
1562       }
1563       blob_append(pBlob, zIn, -1);
1564       blob_append_char(pBlob, '\'');
1565     }
1566 #endif
1567   }
1568 }
1569 
1570 /*
1571 ** COMMAND: test-escaped-arg
1572 **
1573 ** Usage %fossil ARGS ...
1574 **
1575 ** Run each argument through blob_append_escaped_arg() and show the
1576 ** result.  Append each argument to "fossil test-echo" and run that
1577 ** using fossil_system() to verify that it really does get escaped
1578 ** correctly.
1579 **
1580 ** Other options:
1581 **
1582 **    --filename-args BOOL      Subsequent arguments are assumed to be
1583 **                              filenames if BOOL is true, or not if BOOL
1584 **                              is false.  Defaults on.
1585 **
1586 **    --hex HEX                 Skip the --hex flag and instead decode HEX
1587 **                              into ascii.  This provides a way to insert
1588 **                              unusual characters as an argument for testing.
1589 **
1590 **    --compare HEX ASCII       Verify that argument ASCII is identical to
1591 **                              to decoded HEX.
1592 **
1593 **    --fuzz N                  Run N fuzz cases.  Each cases is a call
1594 **                              to "fossil test-escaped-arg --compare HEX ARG"
1595 **                              where HEX and ARG are the same argument.
1596 **                              The argument is chosen at random.
1597 */
test_escaped_arg_command(void)1598 void test_escaped_arg_command(void){
1599   int i;
1600   Blob x;
1601   const char *zArg;
1602   int isFilename = 1;
1603   char zBuf[100];
1604   blob_init(&x, 0, 0);
1605   for(i=2; i<g.argc; i++){
1606     zArg = g.argv[i];
1607     if( fossil_strcmp(zArg, "--hex")==0 && i+1<g.argc ){
1608       size_t n = strlen(g.argv[++i]);
1609       if( n>=(sizeof(zBuf)-1)*2 ){
1610         fossil_fatal("Argument to --hex is too big");
1611       }
1612       memset(zBuf, 0, sizeof(zBuf));
1613       decode16((const unsigned char*)g.argv[i], (unsigned char*)zBuf, (int)n);
1614       zArg = zBuf;
1615     }else if( fossil_strcmp(zArg, "--compare")==0 && i+2<g.argc ){
1616       size_t n = strlen(g.argv[++i]);
1617       if( n>=(sizeof(zBuf)-1)*2 ){
1618         fossil_fatal("HEX argument to --compare is too big");
1619       }
1620       memset(zBuf, 0, sizeof(zBuf));
1621       if( decode16((const unsigned char*)g.argv[i], (unsigned char*)zBuf,
1622                    (int)n) ){
1623         fossil_fatal("HEX decode of %s failed", g.argv[i]);
1624       }
1625       zArg = g.argv[++i];
1626       if( zArg[0]=='-' ){
1627         fossil_fatal("filename argument \"%s\" begins with \"-\"", zArg);
1628       }
1629 #ifdef _WIN32
1630       if( zBuf[0]=='-' && zArg[0]=='.' && zArg[1]=='\\' ) zArg += 2;
1631 #else
1632       if( zBuf[0]=='-' && zArg[0]=='.' && zArg[1]=='/' ) zArg += 2;
1633 #endif
1634       if( strcmp(zBuf, zArg)!=0 ){
1635         fossil_fatal("argument disagree: \"%s\" (%s) versus \"%s\"",
1636                      zBuf, g.argv[i-1], zArg);
1637       }
1638       continue;
1639     }else if( fossil_strcmp(zArg, "--fuzz")==0 && i+1<g.argc ){
1640       int n = atoi(g.argv[++i]);
1641       int j;
1642       for(j=0; j<n; j++){
1643         unsigned char m, k;
1644         int rc;
1645         unsigned char zWord[100];
1646         sqlite3_randomness(sizeof(m), &m);
1647         m = (m%40)+5;
1648         sqlite3_randomness(m, zWord); /* Between 5 and 45 bytes of randomness */
1649         for(k=0; k<m; k++){
1650           unsigned char cx = zWord[k];
1651           if( cx<0x20 || cx>=0x7f ){
1652             /* Translate illegal bytes into various non-ASCII unicode
1653             ** characters in order to exercise those code paths */
1654             unsigned int u;
1655             if( cx>=0x7f ){
1656               u = cx;
1657             }else if( cx>=0x08 ){
1658               u = 0x800 + cx;
1659             }else{
1660               u = 0x10000 + cx;
1661             }
1662             if( u<0x00080 ){
1663               zWord[k] = u & 0xFF;
1664             }else if( u<0x00800 ){
1665               zWord[k++] = 0xC0 + (u8)((u>>6)&0x1F);
1666               zWord[k] =   0x80 + (u8)(u & 0x3F);
1667             }else if( u<0x10000 ){
1668               zWord[k++] = 0xE0 + (u8)((u>>12)&0x0F);
1669               zWord[k++] = 0x80 + (u8)((u>>6) & 0x3F);
1670               zWord[k] =   0x80 + (u8)(u & 0x3F);
1671             }else{
1672               zWord[k++] = 0xF0 + (u8)((u>>18) & 0x07);
1673               zWord[k++] = 0x80 + (u8)((u>>12) & 0x3F);
1674               zWord[k++] = 0x80 + (u8)((u>>6) & 0x3F);
1675               zWord[k]   = 0x80 + (u8)(u & 0x3F);
1676             }
1677           }
1678         }
1679         zWord[k] = 0;
1680         encode16(zWord, (unsigned char*)zBuf, (int)k);
1681         blob_appendf(&x, "%$ test-escaped-arg --compare %s %$",
1682                          g.nameOfExe, zBuf,zWord);
1683         rc = fossil_system(blob_str(&x));
1684         if( rc ) fossil_fatal("failed test (%d): %s\n", rc, blob_str(&x));
1685         blob_reset(&x);
1686       }
1687       continue;
1688     }else if( fossil_strcmp(zArg, "--filename-args")==0 ){
1689        if( i+1<g.argc ){
1690          i++;
1691          isFilename = is_truth(g.argv[i]);
1692        }
1693        continue;
1694     }
1695     fossil_print("%3d [%s]: ", i, zArg);
1696     if( isFilename ){
1697       blob_appendf(&x, "%$ test-echo %$", g.nameOfExe, zArg);
1698     }else{
1699       blob_appendf(&x, "%$ test-echo %!$", g.nameOfExe, zArg);
1700     }
1701     fossil_print("%s\n", blob_str(&x));
1702     fossil_system(blob_str(&x));
1703     blob_reset(&x);
1704   }
1705 }
1706 
1707 /*
1708 ** A read(2)-like impl for the Blob class. Reads (copies) up to nLen
1709 ** bytes from pIn, starting at position pIn->iCursor, and copies them
1710 ** to pDest (which must be valid memory at least nLen bytes long).
1711 **
1712 ** Returns the number of bytes read/copied, which may be less than
1713 ** nLen (if end-of-blob is encountered).
1714 **
1715 ** Updates pIn's cursor.
1716 **
1717 ** Returns 0 if pIn contains no data.
1718 */
blob_read(Blob * pIn,void * pDest,unsigned int nLen)1719 unsigned int blob_read(Blob *pIn, void * pDest, unsigned int nLen ){
1720   if( !pIn->aData || (pIn->iCursor >= pIn->nUsed) ){
1721     return 0;
1722   } else if( (pIn->iCursor + nLen) > (unsigned int)pIn->nUsed ){
1723     nLen = (unsigned int) (pIn->nUsed - pIn->iCursor);
1724   }
1725   assert( pIn->nUsed > pIn->iCursor );
1726   assert( (pIn->iCursor+nLen)  <= pIn->nUsed );
1727   if( nLen ){
1728     memcpy( pDest, pIn->aData, nLen );
1729     pIn->iCursor += nLen;
1730   }
1731   return nLen;
1732 }
1733 
1734 /*
1735 ** Swaps the contents of the given blobs. Results
1736 ** are unspecified if either value is NULL or both
1737 ** point to the same blob.
1738 */
blob_swap(Blob * pLeft,Blob * pRight)1739 void blob_swap( Blob *pLeft, Blob *pRight ){
1740   Blob swap = *pLeft;
1741   *pLeft = *pRight;
1742   *pRight = swap;
1743 }
1744 
1745 /*
1746 ** Strip a possible byte-order-mark (BOM) from the blob. On Windows, if there
1747 ** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
1748 ** done.  If useMbcs is false and there is no BOM, the input string is assumed
1749 ** to be UTF-8 already, so no conversion is done.
1750 */
blob_to_utf8_no_bom(Blob * pBlob,int useMbcs)1751 void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
1752   char *zUtf8;
1753   int bomSize = 0;
1754   int bomReverse = 0;
1755   if( starts_with_utf8_bom(pBlob, &bomSize) ){
1756     struct Blob temp;
1757     zUtf8 = blob_str(pBlob) + bomSize;
1758     blob_zero(&temp);
1759     blob_append(&temp, zUtf8, -1);
1760     blob_swap(pBlob, &temp);
1761     blob_reset(&temp);
1762   }else if( starts_with_utf16_bom(pBlob, &bomSize, &bomReverse) ){
1763     zUtf8 = blob_buffer(pBlob);
1764     if( bomReverse ){
1765       /* Found BOM, but with reversed bytes */
1766       unsigned int i = blob_size(pBlob);
1767       while( i>1 ){
1768         /* swap bytes of unicode representation */
1769         char zTemp = zUtf8[--i];
1770         zUtf8[i] = zUtf8[i-1];
1771         zUtf8[--i] = zTemp;
1772       }
1773     }
1774     /* Make sure the blob contains two terminating 0-bytes */
1775     blob_append(pBlob, "\000\000", 3);
1776     zUtf8 = blob_str(pBlob) + bomSize;
1777     zUtf8 = fossil_unicode_to_utf8(zUtf8);
1778     blob_reset(pBlob);
1779     blob_set_dynamic(pBlob, zUtf8);
1780   }else if( useMbcs && invalid_utf8(pBlob) ){
1781 #if defined(_WIN32) || defined(__CYGWIN__)
1782     zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1783     blob_reset(pBlob);
1784     blob_append(pBlob, zUtf8, -1);
1785     fossil_mbcs_free(zUtf8);
1786 #else
1787     blob_cp1252_to_utf8(pBlob);
1788 #endif /* _WIN32 */
1789   }
1790 }
1791