1 /*
2 ** This program is a debugging and analysis utility that displays
3 ** information about an FTS3 or FTS4 index.
4 **
5 ** Link this program against the SQLite3 amalgamation with the
6 ** SQLITE_ENABLE_FTS4 compile-time option.  Then run it as:
7 **
8 **    fts3view DATABASE
9 **
10 ** to get a list of all FTS3/4 tables in DATABASE, or do
11 **
12 **    fts3view DATABASE TABLE COMMAND ....
13 **
14 ** to see various aspects of the TABLE table.  Type fts3view with no
15 ** arguments for a list of available COMMANDs.
16 */
17 #include <stdio.h>
18 #include <stdarg.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <ctype.h>
22 #include "sqlite3.h"
23 
24 /*
25 ** Extra command-line arguments:
26 */
27 int nExtra;
28 char **azExtra;
29 
30 /*
31 ** Look for a command-line argument.
32 */
findOption(const char * zName,int hasArg,const char * zDefault)33 const char *findOption(const char *zName, int hasArg, const char *zDefault){
34   int i;
35   const char *zResult = zDefault;
36   for(i=0; i<nExtra; i++){
37     const char *z = azExtra[i];
38     while( z[0]=='-' ) z++;
39     if( strcmp(z, zName)==0 ){
40       int j = 1;
41       if( hasArg==0 || i==nExtra-1 ) j = 0;
42       zResult = azExtra[i+j];
43       while( i+j<nExtra ){
44         azExtra[i] = azExtra[i+j+1];
45         i++;
46       }
47       break;
48     }
49   }
50   return zResult;
51 }
52 
53 
54 /*
55 ** Prepare an SQL query
56 */
prepare(sqlite3 * db,const char * zFormat,...)57 static sqlite3_stmt *prepare(sqlite3 *db, const char *zFormat, ...){
58   va_list ap;
59   char *zSql;
60   sqlite3_stmt *pStmt;
61   int rc;
62 
63   va_start(ap, zFormat);
64   zSql = sqlite3_vmprintf(zFormat, ap);
65   va_end(ap);
66   rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
67   if( rc ){
68     fprintf(stderr, "Error: %s\nSQL: %s\n", sqlite3_errmsg(db), zSql);
69     exit(1);
70   }
71   sqlite3_free(zSql);
72   return pStmt;
73 }
74 
75 /*
76 ** Run an SQL statement
77 */
runSql(sqlite3 * db,const char * zFormat,...)78 static int runSql(sqlite3 *db, const char *zFormat, ...){
79   va_list ap;
80   char *zSql;
81   int rc;
82 
83   va_start(ap, zFormat);
84   zSql = sqlite3_vmprintf(zFormat, ap);
85   rc = sqlite3_exec(db, zSql, 0, 0, 0);
86   va_end(ap);
87   return rc;
88 }
89 
90 /*
91 ** Show the table schema
92 */
showSchema(sqlite3 * db,const char * zTab)93 static void showSchema(sqlite3 *db, const char *zTab){
94   sqlite3_stmt *pStmt;
95   pStmt = prepare(db,
96             "SELECT sql FROM sqlite_schema"
97             " WHERE name LIKE '%q%%'"
98             " ORDER BY 1",
99             zTab);
100   while( sqlite3_step(pStmt)==SQLITE_ROW ){
101     printf("%s;\n", sqlite3_column_text(pStmt, 0));
102   }
103   sqlite3_finalize(pStmt);
104   pStmt = prepare(db, "PRAGMA page_size");
105   while( sqlite3_step(pStmt)==SQLITE_ROW ){
106     printf("PRAGMA page_size=%s;\n", sqlite3_column_text(pStmt, 0));
107   }
108   sqlite3_finalize(pStmt);
109   pStmt = prepare(db, "PRAGMA journal_mode");
110   while( sqlite3_step(pStmt)==SQLITE_ROW ){
111     printf("PRAGMA journal_mode=%s;\n", sqlite3_column_text(pStmt, 0));
112   }
113   sqlite3_finalize(pStmt);
114   pStmt = prepare(db, "PRAGMA auto_vacuum");
115   while( sqlite3_step(pStmt)==SQLITE_ROW ){
116     const char *zType = "???";
117     switch( sqlite3_column_int(pStmt, 0) ){
118       case 0:  zType = "OFF";         break;
119       case 1:  zType = "FULL";        break;
120       case 2:  zType = "INCREMENTAL"; break;
121     }
122     printf("PRAGMA auto_vacuum=%s;\n", zType);
123   }
124   sqlite3_finalize(pStmt);
125   pStmt = prepare(db, "PRAGMA encoding");
126   while( sqlite3_step(pStmt)==SQLITE_ROW ){
127     printf("PRAGMA encoding=%s;\n", sqlite3_column_text(pStmt, 0));
128   }
129   sqlite3_finalize(pStmt);
130 }
131 
132 /*
133 ** Read a 64-bit variable-length integer from memory starting at p[0].
134 ** Return the number of bytes read, or 0 on error.
135 ** The value is stored in *v.
136 */
getVarint(const unsigned char * p,sqlite_int64 * v)137 int getVarint(const unsigned char *p, sqlite_int64 *v){
138   const unsigned char *q = p;
139   sqlite_uint64 x = 0, y = 1;
140   while( (*q&0x80)==0x80 && q-(unsigned char *)p<9 ){
141     x += y * (*q++ & 0x7f);
142     y <<= 7;
143   }
144   x += y * (*q++);
145   *v = (sqlite_int64) x;
146   return (int) (q - (unsigned char *)p);
147 }
148 
149 
150 /* Show the content of the %_stat table
151 */
showStat(sqlite3 * db,const char * zTab)152 static void showStat(sqlite3 *db, const char *zTab){
153   sqlite3_stmt *pStmt;
154   pStmt = prepare(db, "SELECT id, value FROM '%q_stat'", zTab);
155   while( sqlite3_step(pStmt)==SQLITE_ROW ){
156     printf("stat[%d] =", sqlite3_column_int(pStmt, 0));
157     switch( sqlite3_column_type(pStmt, 1) ){
158       case SQLITE_INTEGER: {
159         printf(" %d\n", sqlite3_column_int(pStmt, 1));
160         break;
161       }
162       case SQLITE_BLOB: {
163         unsigned char *x = (unsigned char*)sqlite3_column_blob(pStmt, 1);
164         int len = sqlite3_column_bytes(pStmt, 1);
165         int i = 0;
166         sqlite3_int64 v;
167         while( i<len ){
168           i += getVarint(x, &v);
169           printf(" %lld", v);
170         }
171         printf("\n");
172         break;
173       }
174     }
175   }
176   sqlite3_finalize(pStmt);
177 }
178 
179 /*
180 ** Report on the vocabulary.  This creates an fts4aux table with a random
181 ** name, but deletes it in the end.
182 */
showVocabulary(sqlite3 * db,const char * zTab)183 static void showVocabulary(sqlite3 *db, const char *zTab){
184   char *zAux;
185   sqlite3_uint64 r;
186   sqlite3_stmt *pStmt;
187   int nDoc = 0;
188   int nToken = 0;
189   int nOccurrence = 0;
190   int nTop;
191   int n, i;
192 
193   sqlite3_randomness(sizeof(r), &r);
194   zAux = sqlite3_mprintf("viewer_%llx", zTab, r);
195   runSql(db, "BEGIN");
196   pStmt = prepare(db, "SELECT count(*) FROM %Q", zTab);
197   while( sqlite3_step(pStmt)==SQLITE_ROW ){
198     nDoc = sqlite3_column_int(pStmt, 0);
199   }
200   sqlite3_finalize(pStmt);
201   printf("Number of documents...................... %9d\n", nDoc);
202 
203   runSql(db, "CREATE VIRTUAL TABLE %s USING fts4aux(%Q)", zAux, zTab);
204   pStmt = prepare(db,
205              "SELECT count(*), sum(occurrences) FROM %s WHERE col='*'",
206              zAux);
207   while( sqlite3_step(pStmt)==SQLITE_ROW ){
208     nToken = sqlite3_column_int(pStmt, 0);
209     nOccurrence = sqlite3_column_int(pStmt, 1);
210   }
211   sqlite3_finalize(pStmt);
212   printf("Total tokens in all documents............ %9d\n", nOccurrence);
213   printf("Total number of distinct tokens.......... %9d\n", nToken);
214   if( nToken==0 ) goto end_vocab;
215 
216   n = 0;
217   pStmt = prepare(db, "SELECT count(*) FROM %s"
218                       " WHERE col='*' AND occurrences==1", zAux);
219   while( sqlite3_step(pStmt)==SQLITE_ROW ){
220     n = sqlite3_column_int(pStmt, 0);
221   }
222   sqlite3_finalize(pStmt);
223   printf("Tokens used exactly once................. %9d %5.2f%%\n",
224           n, n*100.0/nToken);
225 
226   n = 0;
227   pStmt = prepare(db, "SELECT count(*) FROM %s"
228                       " WHERE col='*' AND documents==1", zAux);
229   while( sqlite3_step(pStmt)==SQLITE_ROW ){
230     n = sqlite3_column_int(pStmt, 0);
231   }
232   sqlite3_finalize(pStmt);
233   printf("Tokens used in only one document......... %9d %5.2f%%\n",
234           n, n*100.0/nToken);
235 
236   if( nDoc>=2000 ){
237     n = 0;
238     pStmt = prepare(db, "SELECT count(*) FROM %s"
239                         " WHERE col='*' AND occurrences<=%d", zAux, nDoc/1000);
240     while( sqlite3_step(pStmt)==SQLITE_ROW ){
241       n = sqlite3_column_int(pStmt, 0);
242     }
243     sqlite3_finalize(pStmt);
244     printf("Tokens used in 0.1%% or less of docs...... %9d %5.2f%%\n",
245             n, n*100.0/nToken);
246   }
247 
248   if( nDoc>=200 ){
249     n = 0;
250     pStmt = prepare(db, "SELECT count(*) FROM %s"
251                         " WHERE col='*' AND occurrences<=%d", zAux, nDoc/100);
252     while( sqlite3_step(pStmt)==SQLITE_ROW ){
253       n = sqlite3_column_int(pStmt, 0);
254     }
255     sqlite3_finalize(pStmt);
256     printf("Tokens used in 1%% or less of docs........ %9d %5.2f%%\n",
257             n, n*100.0/nToken);
258   }
259 
260   nTop = atoi(findOption("top", 1, "25"));
261   printf("The %d most common tokens:\n", nTop);
262   pStmt = prepare(db,
263             "SELECT term, documents FROM %s"
264             " WHERE col='*'"
265             " ORDER BY documents DESC, term"
266             " LIMIT %d", zAux, nTop);
267   i = 0;
268   while( sqlite3_step(pStmt)==SQLITE_ROW ){
269     i++;
270     n = sqlite3_column_int(pStmt, 1);
271     printf("  %2d. %-30s %9d docs %5.2f%%\n", i,
272       sqlite3_column_text(pStmt, 0), n, n*100.0/nDoc);
273   }
274   sqlite3_finalize(pStmt);
275 
276 end_vocab:
277   runSql(db, "ROLLBACK");
278   sqlite3_free(zAux);
279 }
280 
281 /*
282 ** Report on the number and sizes of segments
283 */
showSegmentStats(sqlite3 * db,const char * zTab)284 static void showSegmentStats(sqlite3 *db, const char *zTab){
285   sqlite3_stmt *pStmt;
286   int nSeg = 0;
287   sqlite3_int64 szSeg = 0, mxSeg = 0;
288   int nIdx = 0;
289   sqlite3_int64 szIdx = 0, mxIdx = 0;
290   int nRoot = 0;
291   sqlite3_int64 szRoot = 0, mxRoot = 0;
292   sqlite3_int64 mx;
293   int nLeaf;
294   int n;
295   int pgsz;
296   int mxLevel;
297   int i;
298 
299   pStmt = prepare(db,
300                   "SELECT count(*), sum(length(block)), max(length(block))"
301                   " FROM '%q_segments'",
302                   zTab);
303   while( sqlite3_step(pStmt)==SQLITE_ROW ){
304     nSeg = sqlite3_column_int(pStmt, 0);
305     szSeg = sqlite3_column_int64(pStmt, 1);
306     mxSeg = sqlite3_column_int64(pStmt, 2);
307   }
308   sqlite3_finalize(pStmt);
309   pStmt = prepare(db,
310             "SELECT count(*), sum(length(block)), max(length(block))"
311             "  FROM '%q_segments' a JOIN '%q_segdir' b"
312             " WHERE a.blockid BETWEEN b.leaves_end_block+1 AND b.end_block",
313             zTab, zTab);
314   while( sqlite3_step(pStmt)==SQLITE_ROW ){
315     nIdx = sqlite3_column_int(pStmt, 0);
316     szIdx = sqlite3_column_int64(pStmt, 1);
317     mxIdx = sqlite3_column_int64(pStmt, 2);
318   }
319   sqlite3_finalize(pStmt);
320   pStmt = prepare(db,
321             "SELECT count(*), sum(length(root)), max(length(root))"
322             "  FROM '%q_segdir'",
323             zTab);
324   while( sqlite3_step(pStmt)==SQLITE_ROW ){
325     nRoot = sqlite3_column_int(pStmt, 0);
326     szRoot = sqlite3_column_int64(pStmt, 1);
327     mxRoot = sqlite3_column_int64(pStmt, 2);
328   }
329   sqlite3_finalize(pStmt);
330 
331   printf("Number of segments....................... %9d\n", nSeg+nRoot);
332   printf("Number of leaf segments.................. %9d\n", nSeg-nIdx);
333   printf("Number of index segments................. %9d\n", nIdx);
334   printf("Number of root segments.................. %9d\n", nRoot);
335   printf("Total size of all segments............... %9lld\n", szSeg+szRoot);
336   printf("Total size of all leaf segments.......... %9lld\n", szSeg-szIdx);
337   printf("Total size of all index segments......... %9lld\n", szIdx);
338   printf("Total size of all root segments.......... %9lld\n", szRoot);
339   if( nSeg>0 ){
340     printf("Average size of all segments............. %11.1f\n",
341             (double)(szSeg+szRoot)/(double)(nSeg+nRoot));
342     printf("Average size of leaf segments............ %11.1f\n",
343             (double)(szSeg-szIdx)/(double)(nSeg-nIdx));
344   }
345   if( nIdx>0 ){
346     printf("Average size of index segments........... %11.1f\n",
347             (double)szIdx/(double)nIdx);
348   }
349   if( nRoot>0 ){
350     printf("Average size of root segments............ %11.1f\n",
351             (double)szRoot/(double)nRoot);
352   }
353   mx = mxSeg;
354   if( mx<mxRoot ) mx = mxRoot;
355   printf("Maximum segment size..................... %9lld\n", mx);
356   printf("Maximum index segment size............... %9lld\n", mxIdx);
357   printf("Maximum root segment size................ %9lld\n", mxRoot);
358 
359   pStmt = prepare(db, "PRAGMA page_size");
360   pgsz = 1024;
361   while( sqlite3_step(pStmt)==SQLITE_ROW ){
362     pgsz = sqlite3_column_int(pStmt, 0);
363   }
364   sqlite3_finalize(pStmt);
365   printf("Database page size....................... %9d\n", pgsz);
366   pStmt = prepare(db,
367             "SELECT count(*)"
368             "  FROM '%q_segments' a JOIN '%q_segdir' b"
369             " WHERE a.blockid BETWEEN b.start_block AND b.leaves_end_block"
370             "   AND length(a.block)>%d",
371             zTab, zTab, pgsz-45);
372   n = 0;
373   while( sqlite3_step(pStmt)==SQLITE_ROW ){
374     n = sqlite3_column_int(pStmt, 0);
375   }
376   sqlite3_finalize(pStmt);
377   nLeaf = nSeg - nIdx;
378   printf("Leaf segments larger than %5d bytes.... %9d   %5.2f%%\n",
379          pgsz-45, n, nLeaf>0 ? n*100.0/nLeaf : 0.0);
380 
381   pStmt = prepare(db, "SELECT max(level%%1024) FROM '%q_segdir'", zTab);
382   mxLevel = 0;
383   while( sqlite3_step(pStmt)==SQLITE_ROW ){
384     mxLevel = sqlite3_column_int(pStmt, 0);
385   }
386   sqlite3_finalize(pStmt);
387 
388   for(i=0; i<=mxLevel; i++){
389     pStmt = prepare(db,
390            "SELECT count(*), sum(len), avg(len), max(len), sum(len>%d),"
391            "       count(distinct idx)"
392            "  FROM (SELECT length(a.block) AS len, idx"
393            "          FROM '%q_segments' a JOIN '%q_segdir' b"
394            "         WHERE (a.blockid BETWEEN b.start_block"
395                                        " AND b.leaves_end_block)"
396            "           AND (b.level%%1024)==%d)",
397            pgsz-45, zTab, zTab, i);
398     if( sqlite3_step(pStmt)==SQLITE_ROW
399      && (nLeaf = sqlite3_column_int(pStmt, 0))>0
400     ){
401       sqlite3_int64 sz;
402       nIdx = sqlite3_column_int(pStmt, 5);
403       printf("For level %d:\n", i);
404       printf("  Number of indexes...................... %9d\n", nIdx);
405       printf("  Number of leaf segments................ %9d\n", nLeaf);
406       if( nIdx>1 ){
407         printf("  Average leaf segments per index........ %11.1f\n",
408                (double)nLeaf/(double)nIdx);
409       }
410       printf("  Total size of all leaf segments........ %9lld\n",
411              (sz = sqlite3_column_int64(pStmt, 1)));
412       printf("  Average size of leaf segments.......... %11.1f\n",
413              sqlite3_column_double(pStmt, 2));
414       if( nIdx>1 ){
415         printf("  Average leaf segment size per index.... %11.1f\n",
416                (double)sz/(double)nIdx);
417       }
418       printf("  Maximum leaf segment size.............. %9lld\n",
419              sqlite3_column_int64(pStmt, 3));
420       n = sqlite3_column_int(pStmt, 4);
421       printf("  Leaf segments larger than %5d bytes.. %9d   %5.2f%%\n",
422              pgsz-45, n, n*100.0/nLeaf);
423     }
424     sqlite3_finalize(pStmt);
425   }
426 }
427 
428 /*
429 ** Print a single "tree" line of the segdir map output.
430 */
printTreeLine(sqlite3_int64 iLower,sqlite3_int64 iUpper)431 static void printTreeLine(sqlite3_int64 iLower, sqlite3_int64 iUpper){
432   printf("                 tree   %9lld", iLower);
433   if( iUpper>iLower ){
434     printf(" thru %9lld  (%lld blocks)", iUpper, iUpper-iLower+1);
435   }
436   printf("\n");
437 }
438 
439 /*
440 ** Check to see if the block of a %_segments entry is NULL.
441 */
isNullSegment(sqlite3 * db,const char * zTab,sqlite3_int64 iBlockId)442 static int isNullSegment(sqlite3 *db, const char *zTab, sqlite3_int64 iBlockId){
443   sqlite3_stmt *pStmt;
444   int rc = 1;
445 
446   pStmt = prepare(db, "SELECT block IS NULL FROM '%q_segments'"
447                       " WHERE blockid=%lld", zTab, iBlockId);
448   if( sqlite3_step(pStmt)==SQLITE_ROW ){
449     rc = sqlite3_column_int(pStmt, 0);
450   }
451   sqlite3_finalize(pStmt);
452   return rc;
453 }
454 
455 /*
456 ** Show a map of segments derived from the %_segdir table.
457 */
showSegdirMap(sqlite3 * db,const char * zTab)458 static void showSegdirMap(sqlite3 *db, const char *zTab){
459   int mxIndex, iIndex;
460   sqlite3_stmt *pStmt = 0;
461   sqlite3_stmt *pStmt2 = 0;
462   int prevLevel;
463 
464   pStmt = prepare(db, "SELECT max(level/1024) FROM '%q_segdir'", zTab);
465   if( sqlite3_step(pStmt)==SQLITE_ROW ){
466     mxIndex = sqlite3_column_int(pStmt, 0);
467   }else{
468     mxIndex = 0;
469   }
470   sqlite3_finalize(pStmt);
471 
472   printf("Number of inverted indices............... %3d\n", mxIndex+1);
473   pStmt = prepare(db,
474     "SELECT level, idx, start_block, leaves_end_block, end_block, rowid"
475     "  FROM '%q_segdir'"
476     " WHERE level/1024==?"
477     " ORDER BY level DESC, idx",
478     zTab);
479   pStmt2 = prepare(db,
480     "SELECT blockid FROM '%q_segments'"
481     " WHERE blockid BETWEEN ? AND ? ORDER BY blockid",
482     zTab);
483   for(iIndex=0; iIndex<=mxIndex; iIndex++){
484     if( mxIndex>0 ){
485       printf("**************************** Index %d "
486              "****************************\n", iIndex);
487     }
488     sqlite3_bind_int(pStmt, 1, iIndex);
489     prevLevel = -1;
490     while( sqlite3_step(pStmt)==SQLITE_ROW ){
491       int iLevel = sqlite3_column_int(pStmt, 0)%1024;
492       int iIdx = sqlite3_column_int(pStmt, 1);
493       sqlite3_int64 iStart = sqlite3_column_int64(pStmt, 2);
494       sqlite3_int64 iLEnd = sqlite3_column_int64(pStmt, 3);
495       sqlite3_int64 iEnd = sqlite3_column_int64(pStmt, 4);
496       char rtag[20];
497       if( iLevel!=prevLevel ){
498         printf("level %2d idx %2d", iLevel, iIdx);
499         prevLevel = iLevel;
500       }else{
501         printf("         idx %2d", iIdx);
502       }
503       sqlite3_snprintf(sizeof(rtag), rtag, "r%lld",
504                        sqlite3_column_int64(pStmt,5));
505       printf("  root   %9s\n", rtag);
506       if( iLEnd>iStart ){
507         sqlite3_int64 iLower, iPrev = 0, iX;
508         if( iLEnd+1<=iEnd ){
509           sqlite3_bind_int64(pStmt2, 1, iLEnd+1);
510           sqlite3_bind_int64(pStmt2, 2, iEnd);
511           iLower = -1;
512           while( sqlite3_step(pStmt2)==SQLITE_ROW ){
513             iX = sqlite3_column_int64(pStmt2, 0);
514             if( iLower<0 ){
515               iLower = iPrev = iX;
516             }else if( iX==iPrev+1 ){
517               iPrev = iX;
518             }else{
519               printTreeLine(iLower, iPrev);
520               iLower = iPrev = iX;
521             }
522           }
523           sqlite3_reset(pStmt2);
524           if( iLower>=0 ){
525             if( iLower==iPrev && iLower==iEnd
526              && isNullSegment(db,zTab,iLower)
527             ){
528               printf("                 null   %9lld\n", iLower);
529             }else{
530               printTreeLine(iLower, iPrev);
531             }
532           }
533         }
534         printf("                 leaves %9lld thru %9lld  (%lld blocks)\n",
535                iStart, iLEnd, iLEnd - iStart + 1);
536       }
537     }
538     sqlite3_reset(pStmt);
539   }
540   sqlite3_finalize(pStmt);
541   sqlite3_finalize(pStmt2);
542 }
543 
544 /*
545 ** Decode a single segment block and display the results on stdout.
546 */
decodeSegment(const unsigned char * aData,int nData)547 static void decodeSegment(
548   const unsigned char *aData,   /* Content to print */
549   int nData                     /* Number of bytes of content */
550 ){
551   sqlite3_int64 iChild = 0;
552   sqlite3_int64 iPrefix;
553   sqlite3_int64 nTerm;
554   sqlite3_int64 n;
555   sqlite3_int64 iDocsz;
556   int iHeight;
557   sqlite3_int64 i = 0;
558   int cnt = 0;
559   char zTerm[1000];
560 
561   i += getVarint(aData, &n);
562   iHeight = (int)n;
563   printf("height: %d\n", iHeight);
564   if( iHeight>0 ){
565     i += getVarint(aData+i, &iChild);
566     printf("left-child: %lld\n", iChild);
567   }
568   while( i<nData ){
569     if( (cnt++)>0 ){
570       i += getVarint(aData+i, &iPrefix);
571     }else{
572       iPrefix = 0;
573     }
574     i += getVarint(aData+i, &nTerm);
575     if( iPrefix+nTerm+1 >= sizeof(zTerm) ){
576       fprintf(stderr, "term to long\n");
577       exit(1);
578     }
579     memcpy(zTerm+iPrefix, aData+i, (size_t)nTerm);
580     zTerm[iPrefix+nTerm] = 0;
581     i += nTerm;
582     if( iHeight==0 ){
583       i += getVarint(aData+i, &iDocsz);
584       printf("term: %-25s doclist %7lld bytes offset %lld\n", zTerm, iDocsz, i);
585       i += iDocsz;
586     }else{
587       printf("term: %-25s child %lld\n", zTerm, ++iChild);
588     }
589   }
590 }
591 
592 
593 /*
594 ** Print a a blob as hex and ascii.
595 */
printBlob(const unsigned char * aData,int nData)596 static void printBlob(
597   const unsigned char *aData,   /* Content to print */
598   int nData                     /* Number of bytes of content */
599 ){
600   int i, j;
601   const char *zOfstFmt;
602   const int perLine = 16;
603 
604   if( (nData&~0xfff)==0 ){
605     zOfstFmt = " %03x: ";
606   }else if( (nData&~0xffff)==0 ){
607     zOfstFmt = " %04x: ";
608   }else if( (nData&~0xfffff)==0 ){
609     zOfstFmt = " %05x: ";
610   }else if( (nData&~0xffffff)==0 ){
611     zOfstFmt = " %06x: ";
612   }else{
613     zOfstFmt = " %08x: ";
614   }
615 
616   for(i=0; i<nData; i += perLine){
617     fprintf(stdout, zOfstFmt, i);
618     for(j=0; j<perLine; j++){
619       if( i+j>nData ){
620         fprintf(stdout, "   ");
621       }else{
622         fprintf(stdout,"%02x ", aData[i+j]);
623       }
624     }
625     for(j=0; j<perLine; j++){
626       if( i+j>nData ){
627         fprintf(stdout, " ");
628       }else{
629         fprintf(stdout,"%c", isprint(aData[i+j]) ? aData[i+j] : '.');
630       }
631     }
632     fprintf(stdout,"\n");
633   }
634 }
635 
636 /*
637 ** Convert text to a 64-bit integer
638 */
atoi64(const char * z)639 static sqlite3_int64 atoi64(const char *z){
640   sqlite3_int64 v = 0;
641   while( z[0]>='0' && z[0]<='9' ){
642      v = v*10 + z[0] - '0';
643      z++;
644   }
645   return v;
646 }
647 
648 /*
649 ** Return a prepared statement which, when stepped, will return in its
650 ** first column the blob associated with segment zId.  If zId begins with
651 ** 'r' then it is a rowid of a %_segdir entry.  Otherwise it is a
652 ** %_segment entry.
653 */
prepareToGetSegment(sqlite3 * db,const char * zTab,const char * zId)654 static sqlite3_stmt *prepareToGetSegment(
655   sqlite3 *db,         /* The database */
656   const char *zTab,    /* The FTS3/4 table name */
657   const char *zId      /* ID of the segment to open */
658 ){
659   sqlite3_stmt *pStmt;
660   if( zId[0]=='r' ){
661     pStmt = prepare(db, "SELECT root FROM '%q_segdir' WHERE rowid=%lld",
662                     zTab, atoi64(zId+1));
663   }else{
664     pStmt = prepare(db, "SELECT block FROM '%q_segments' WHERE blockid=%lld",
665                     zTab, atoi64(zId));
666   }
667   return pStmt;
668 }
669 
670 /*
671 ** Print the content of a segment or of the root of a segdir.  The segment
672 ** or root is identified by azExtra[0].  If the first character of azExtra[0]
673 ** is 'r' then the remainder is the integer rowid of the %_segdir entry.
674 ** If the first character of azExtra[0] is not 'r' then, then all of
675 ** azExtra[0] is an integer which is the block number.
676 **
677 ** If the --raw option is present in azExtra, then a hex dump is provided.
678 ** Otherwise a decoding is shown.
679 */
showSegment(sqlite3 * db,const char * zTab)680 static void showSegment(sqlite3 *db, const char *zTab){
681   const unsigned char *aData;
682   int nData;
683   sqlite3_stmt *pStmt;
684 
685   pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
686   if( sqlite3_step(pStmt)!=SQLITE_ROW ){
687     sqlite3_finalize(pStmt);
688     return;
689   }
690   nData = sqlite3_column_bytes(pStmt, 0);
691   aData = sqlite3_column_blob(pStmt, 0);
692   printf("Segment %s of size %d bytes:\n", azExtra[0], nData);
693   if( findOption("raw", 0, 0)!=0 ){
694     printBlob(aData, nData);
695   }else{
696     decodeSegment(aData, nData);
697   }
698   sqlite3_finalize(pStmt);
699 }
700 
701 /*
702 ** Decode a single doclist and display the results on stdout.
703 */
decodeDoclist(const unsigned char * aData,int nData)704 static void decodeDoclist(
705   const unsigned char *aData,   /* Content to print */
706   int nData                     /* Number of bytes of content */
707 ){
708   sqlite3_int64 iPrevDocid = 0;
709   sqlite3_int64 iDocid;
710   sqlite3_int64 iPos;
711   sqlite3_int64 iPrevPos = 0;
712   sqlite3_int64 iCol;
713   int i = 0;
714 
715   while( i<nData ){
716     i += getVarint(aData+i, &iDocid);
717     printf("docid %lld col0", iDocid+iPrevDocid);
718     iPrevDocid += iDocid;
719     iPrevPos = 0;
720     while( 1 ){
721       i += getVarint(aData+i, &iPos);
722       if( iPos==1 ){
723         i += getVarint(aData+i, &iCol);
724         printf(" col%lld", iCol);
725         iPrevPos = 0;
726       }else if( iPos==0 ){
727         printf("\n");
728         break;
729       }else{
730         iPrevPos += iPos - 2;
731         printf(" %lld", iPrevPos);
732       }
733     }
734   }
735 }
736 
737 
738 /*
739 ** Print the content of a doclist.  The segment or segdir-root is
740 ** identified by azExtra[0].  If the first character of azExtra[0]
741 ** is 'r' then the remainder is the integer rowid of the %_segdir entry.
742 ** If the first character of azExtra[0] is not 'r' then, then all of
743 ** azExtra[0] is an integer which is the block number.  The offset
744 ** into the segment is identified by azExtra[1].  The size of the doclist
745 ** is azExtra[2].
746 **
747 ** If the --raw option is present in azExtra, then a hex dump is provided.
748 ** Otherwise a decoding is shown.
749 */
showDoclist(sqlite3 * db,const char * zTab)750 static void showDoclist(sqlite3 *db, const char *zTab){
751   const unsigned char *aData;
752   sqlite3_int64 offset;
753   int nData;
754   sqlite3_stmt *pStmt;
755 
756   offset = atoi64(azExtra[1]);
757   nData = atoi(azExtra[2]);
758   pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
759   if( sqlite3_step(pStmt)!=SQLITE_ROW ){
760     sqlite3_finalize(pStmt);
761     return;
762   }
763   aData = sqlite3_column_blob(pStmt, 0);
764   printf("Doclist at %s offset %lld of size %d bytes:\n",
765          azExtra[0], offset, nData);
766   if( findOption("raw", 0, 0)!=0 ){
767     printBlob(aData+offset, nData);
768   }else{
769     decodeDoclist(aData+offset, nData);
770   }
771   sqlite3_finalize(pStmt);
772 }
773 
774 /*
775 ** Show the top N largest segments
776 */
listBigSegments(sqlite3 * db,const char * zTab)777 static void listBigSegments(sqlite3 *db, const char *zTab){
778   int nTop, i;
779   sqlite3_stmt *pStmt;
780   sqlite3_int64 sz;
781   sqlite3_int64 id;
782 
783   nTop = atoi(findOption("top", 1, "25"));
784   printf("The %d largest segments:\n", nTop);
785   pStmt = prepare(db,
786             "SELECT blockid, length(block) AS len FROM '%q_segments'"
787             " ORDER BY 2 DESC, 1"
788             " LIMIT %d", zTab, nTop);
789   i = 0;
790   while( sqlite3_step(pStmt)==SQLITE_ROW ){
791     i++;
792     id = sqlite3_column_int64(pStmt, 0);
793     sz = sqlite3_column_int64(pStmt, 1);
794     printf("  %2d. %9lld size %lld\n", i, id, sz);
795   }
796   sqlite3_finalize(pStmt);
797 }
798 
799 
800 
usage(const char * argv0)801 static void usage(const char *argv0){
802   fprintf(stderr, "Usage: %s DATABASE\n"
803                   "   or: %s DATABASE FTS3TABLE ARGS...\n", argv0, argv0);
804   fprintf(stderr,
805     "ARGS:\n"
806     "  big-segments [--top N]                    show the largest segments\n"
807     "  doclist BLOCKID OFFSET SIZE [--raw]       Decode a doclist\n"
808     "  schema                                    FTS table schema\n"
809     "  segdir                                    directory of segments\n"
810     "  segment BLOCKID [--raw]                   content of a segment\n"
811     "  segment-stats                             info on segment sizes\n"
812     "  stat                                      the %%_stat table\n"
813     "  vocabulary [--top N]                      document vocabulary\n"
814   );
815   exit(1);
816 }
817 
main(int argc,char ** argv)818 int main(int argc, char **argv){
819   sqlite3 *db;
820   int rc;
821   const char *zTab;
822   const char *zCmd;
823 
824   if( argc<2 ) usage(argv[0]);
825   rc = sqlite3_open(argv[1], &db);
826   if( rc ){
827     fprintf(stderr, "Cannot open %s\n", argv[1]);
828     exit(1);
829   }
830   if( argc==2 ){
831     sqlite3_stmt *pStmt;
832     int cnt = 0;
833     pStmt = prepare(db, "SELECT b.sql"
834                         "  FROM sqlite_schema a, sqlite_schema b"
835                         " WHERE a.name GLOB '*_segdir'"
836                         "   AND b.name=substr(a.name,1,length(a.name)-7)"
837                         " ORDER BY 1");
838     while( sqlite3_step(pStmt)==SQLITE_ROW ){
839       cnt++;
840       printf("%s;\n", sqlite3_column_text(pStmt, 0));
841     }
842     sqlite3_finalize(pStmt);
843     if( cnt==0 ){
844       printf("/* No FTS3/4 tables found in database %s */\n", argv[1]);
845     }
846     return 0;
847   }
848   if( argc<4 ) usage(argv[0]);
849   zTab = argv[2];
850   zCmd = argv[3];
851   nExtra = argc-4;
852   azExtra = argv+4;
853   if( strcmp(zCmd,"big-segments")==0 ){
854     listBigSegments(db, zTab);
855   }else if( strcmp(zCmd,"doclist")==0 ){
856     if( argc<7 ) usage(argv[0]);
857     showDoclist(db, zTab);
858   }else if( strcmp(zCmd,"schema")==0 ){
859     showSchema(db, zTab);
860   }else if( strcmp(zCmd,"segdir")==0 ){
861     showSegdirMap(db, zTab);
862   }else if( strcmp(zCmd,"segment")==0 ){
863     if( argc<5 ) usage(argv[0]);
864     showSegment(db, zTab);
865   }else if( strcmp(zCmd,"segment-stats")==0 ){
866     showSegmentStats(db, zTab);
867   }else if( strcmp(zCmd,"stat")==0 ){
868     showStat(db, zTab);
869   }else if( strcmp(zCmd,"vocabulary")==0 ){
870     showVocabulary(db, zTab);
871   }else{
872     usage(argv[0]);
873   }
874   return 0;
875 }
876