1# Run this TCL script using "testfixture" in order get a report that shows
2# how much disk space is used by a particular data to actually store data
3# versus how much space is unused.
4#
5
6if {[catch {
7
8# Argument $tname is the name of a table within the database opened by
9# database handle [db]. Return true if it is a WITHOUT ROWID table, or
10# false otherwise.
11#
12proc is_without_rowid {tname} {
13  set t [string map {' ''} $tname]
14  db eval "PRAGMA index_list = '$t'" o {
15    if {$o(origin) == "pk"} {
16      set n $o(name)
17      if {0==[db one { SELECT count(*) FROM sqlite_master WHERE name=$n }]} {
18        return 1
19      }
20    }
21  }
22  return 0
23}
24
25# Read and run TCL commands from standard input.  Used to implement
26# the --tclsh option.
27#
28proc tclsh {} {
29  set line {}
30  while {![eof stdin]} {
31    if {$line!=""} {
32      puts -nonewline "> "
33    } else {
34      puts -nonewline "% "
35    }
36    flush stdout
37    append line [gets stdin]
38    if {[info complete $line]} {
39      if {[catch {uplevel #0 $line} result]} {
40        puts stderr "Error: $result"
41      } elseif {$result!=""} {
42        puts $result
43      }
44      set line {}
45    } else {
46      append line \n
47    }
48  }
49}
50
51
52# Get the name of the database to analyze
53#
54proc usage {} {
55  set argv0 [file rootname [file tail [info nameofexecutable]]]
56  puts stderr "Usage: $argv0 ?--pageinfo? ?--stats? database-filename"
57  puts stderr {
58Analyze the SQLite3 database file specified by the "database-filename"
59argument and output a report detailing size and storage efficiency
60information for the database and its constituent tables and indexes.
61
62Options:
63
64   --pageinfo   Show how each page of the database-file is used
65
66   --stats      Output SQL text that creates a new database containing
67                statistics about the database that was analyzed
68
69   --tclsh      Run the built-in TCL interpreter interactively (for debugging)
70
71   --version    Show the version number of SQLite
72}
73  exit 1
74}
75set file_to_analyze {}
76set flags(-pageinfo) 0
77set flags(-stats) 0
78set flags(-debug) 0
79append argv {}
80foreach arg $argv {
81  if {[regexp {^-+pageinfo$} $arg]} {
82    set flags(-pageinfo) 1
83  } elseif {[regexp {^-+stats$} $arg]} {
84    set flags(-stats) 1
85  } elseif {[regexp {^-+debug$} $arg]} {
86    set flags(-debug) 1
87  } elseif {[regexp {^-+tclsh$} $arg]} {
88    tclsh
89    exit 0
90  } elseif {[regexp {^-+version$} $arg]} {
91    sqlite3 mem :memory:
92    puts [mem one {SELECT sqlite_version()||' '||sqlite_source_id()}]
93    mem close
94    exit 0
95  } elseif {[regexp {^-} $arg]} {
96    puts stderr "Unknown option: $arg"
97    usage
98  } elseif {$file_to_analyze!=""} {
99    usage
100  } else {
101    set file_to_analyze $arg
102  }
103}
104if {$file_to_analyze==""} usage
105set root_filename $file_to_analyze
106regexp {^file:(//)?([^?]*)} $file_to_analyze all x1 root_filename
107if {![file exists $root_filename]} {
108  puts stderr "No such file: $root_filename"
109  exit 1
110}
111if {![file readable $root_filename]} {
112  puts stderr "File is not readable: $root_filename"
113  exit 1
114}
115set true_file_size [file size $root_filename]
116if {$true_file_size<512} {
117  puts stderr "Empty or malformed database: $root_filename"
118  exit 1
119}
120
121# Compute the total file size assuming test_multiplexor is being used.
122# Assume that SQLITE_ENABLE_8_3_NAMES might be enabled
123#
124set extension [file extension $root_filename]
125set pattern $root_filename
126append pattern {[0-3][0-9][0-9]}
127foreach f [glob -nocomplain $pattern] {
128  incr true_file_size [file size $f]
129  set extension {}
130}
131if {[string length $extension]>=2 && [string length $extension]<=4} {
132  set pattern [file rootname $root_filename]
133  append pattern {.[0-3][0-9][0-9]}
134  foreach f [glob -nocomplain $pattern] {
135    incr true_file_size [file size $f]
136  }
137}
138
139# Open the database
140#
141if {[catch {sqlite3 db $file_to_analyze -uri 1} msg]} {
142  puts stderr "error trying to open $file_to_analyze: $msg"
143  exit 1
144}
145if {$flags(-debug)} {
146  proc dbtrace {txt} {puts $txt; flush stdout;}
147  db trace ::dbtrace
148}
149
150db eval {SELECT count(*) FROM sqlite_master}
151set pageSize [expr {wide([db one {PRAGMA page_size}])}]
152
153if {$flags(-pageinfo)} {
154  db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
155  db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} {
156    puts "$pageno $name $path"
157  }
158  exit 0
159}
160if {$flags(-stats)} {
161  db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
162  puts "BEGIN;"
163  puts "CREATE TABLE stats("
164  puts "  name       STRING,           /* Name of table or index */"
165  puts "  path       INTEGER,          /* Path to page from root */"
166  puts "  pageno     INTEGER,          /* Page number */"
167  puts "  pagetype   STRING,           /* 'internal', 'leaf' or 'overflow' */"
168  puts "  ncell      INTEGER,          /* Cells on page (0 for overflow) */"
169  puts "  payload    INTEGER,          /* Bytes of payload on this page */"
170  puts "  unused     INTEGER,          /* Bytes of unused space on this page */"
171  puts "  mx_payload INTEGER,          /* Largest payload size of all cells */"
172  puts "  pgoffset   INTEGER,          /* Offset of page in file */"
173  puts "  pgsize     INTEGER           /* Size of the page */"
174  puts ");"
175  db eval {SELECT quote(name) || ',' ||
176                  quote(path) || ',' ||
177                  quote(pageno) || ',' ||
178                  quote(pagetype) || ',' ||
179                  quote(ncell) || ',' ||
180                  quote(payload) || ',' ||
181                  quote(unused) || ',' ||
182                  quote(mx_payload) || ',' ||
183                  quote(pgoffset) || ',' ||
184                  quote(pgsize) AS x FROM stat} {
185    puts "INSERT INTO stats VALUES($x);"
186  }
187  puts "COMMIT;"
188  exit 0
189}
190
191
192# In-memory database for collecting statistics. This script loops through
193# the tables and indices in the database being analyzed, adding a row for each
194# to an in-memory database (for which the schema is shown below). It then
195# queries the in-memory db to produce the space-analysis report.
196#
197sqlite3 mem :memory:
198if {$flags(-debug)} {
199  proc dbtrace {txt} {puts $txt; flush stdout;}
200  mem trace ::dbtrace
201}
202set tabledef {CREATE TABLE space_used(
203   name clob,        -- Name of a table or index in the database file
204   tblname clob,     -- Name of associated table
205   is_index boolean, -- TRUE if it is an index, false for a table
206   is_without_rowid boolean, -- TRUE if WITHOUT ROWID table
207   nentry int,       -- Number of entries in the BTree
208   leaf_entries int, -- Number of leaf entries
209   depth int,        -- Depth of the b-tree
210   payload int,      -- Total amount of data stored in this table or index
211   ovfl_payload int, -- Total amount of data stored on overflow pages
212   ovfl_cnt int,     -- Number of entries that use overflow
213   mx_payload int,   -- Maximum payload size
214   int_pages int,    -- Number of interior pages used
215   leaf_pages int,   -- Number of leaf pages used
216   ovfl_pages int,   -- Number of overflow pages used
217   int_unused int,   -- Number of unused bytes on interior pages
218   leaf_unused int,  -- Number of unused bytes on primary pages
219   ovfl_unused int,  -- Number of unused bytes on overflow pages
220   gap_cnt int,      -- Number of gaps in the page layout
221   compressed_size int  -- Total bytes stored on disk
222);}
223mem eval $tabledef
224
225# Create a temporary "dbstat" virtual table.
226#
227db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
228db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat
229         ORDER BY name, path}
230db eval {DROP TABLE temp.stat}
231
232set isCompressed 0
233set compressOverhead 0
234set depth 0
235set sql { SELECT name, tbl_name FROM sqlite_master WHERE rootpage>0 }
236foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] {
237
238  set is_index [expr {$name!=$tblname}]
239  set is_without_rowid [is_without_rowid $name]
240  db eval {
241    SELECT
242      sum(ncell) AS nentry,
243      sum((pagetype=='leaf')*ncell) AS leaf_entries,
244      sum(payload) AS payload,
245      sum((pagetype=='overflow') * payload) AS ovfl_payload,
246      sum(path LIKE '%+000000') AS ovfl_cnt,
247      max(mx_payload) AS mx_payload,
248      sum(pagetype=='internal') AS int_pages,
249      sum(pagetype=='leaf') AS leaf_pages,
250      sum(pagetype=='overflow') AS ovfl_pages,
251      sum((pagetype=='internal') * unused) AS int_unused,
252      sum((pagetype=='leaf') * unused) AS leaf_unused,
253      sum((pagetype=='overflow') * unused) AS ovfl_unused,
254      sum(pgsize) AS compressed_size,
255      max((length(CASE WHEN path LIKE '%+%' THEN '' ELSE path END)+3)/4)
256        AS depth
257    FROM temp.dbstat WHERE name = $name
258  } break
259
260  set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
261  set storage [expr {$total_pages*$pageSize}]
262  if {!$isCompressed && $storage>$compressed_size} {
263    set isCompressed 1
264    set compressOverhead 14
265  }
266
267  # Column 'gap_cnt' is set to the number of non-contiguous entries in the
268  # list of pages visited if the b-tree structure is traversed in a top-down
269  # fashion (each node visited before its child-tree is passed). Any overflow
270  # chains present are traversed from start to finish before any child-tree
271  # is.
272  #
273  set gap_cnt 0
274  set prev 0
275  db eval {
276    SELECT pageno, pagetype FROM temp.dbstat
277     WHERE name=$name
278     ORDER BY pageno
279  } {
280    if {$prev>0 && $pagetype=="leaf" && $pageno!=$prev+1} {
281      incr gap_cnt
282    }
283    set prev $pageno
284  }
285  mem eval {
286    INSERT INTO space_used VALUES(
287      $name,
288      $tblname,
289      $is_index,
290      $is_without_rowid,
291      $nentry,
292      $leaf_entries,
293      $depth,
294      $payload,
295      $ovfl_payload,
296      $ovfl_cnt,
297      $mx_payload,
298      $int_pages,
299      $leaf_pages,
300      $ovfl_pages,
301      $int_unused,
302      $leaf_unused,
303      $ovfl_unused,
304      $gap_cnt,
305      $compressed_size
306    );
307  }
308}
309
310proc integerify {real} {
311  if {[string is double -strict $real]} {
312    return [expr {wide($real)}]
313  } else {
314    return 0
315  }
316}
317mem function int integerify
318
319# Quote a string for use in an SQL query. Examples:
320#
321# [quote {hello world}]   == {'hello world'}
322# [quote {hello world's}] == {'hello world''s'}
323#
324proc quote {txt} {
325  return [string map {' ''} $txt]
326}
327
328# Output a title line
329#
330proc titleline {title} {
331  if {$title==""} {
332    puts [string repeat * 79]
333  } else {
334    set len [string length $title]
335    set stars [string repeat * [expr 79-$len-5]]
336    puts "*** $title $stars"
337  }
338}
339
340# Generate a single line of output in the statistics section of the
341# report.
342#
343proc statline {title value {extra {}}} {
344  set len [string length $title]
345  set dots [string repeat . [expr 50-$len]]
346  set len [string length $value]
347  set sp2 [string range {          } $len end]
348  if {$extra ne ""} {
349    set extra " $extra"
350  }
351  puts "$title$dots $value$sp2$extra"
352}
353
354# Generate a formatted percentage value for $num/$denom
355#
356proc percent {num denom {of {}}} {
357  if {$denom==0.0} {return ""}
358  set v [expr {$num*100.0/$denom}]
359  set of {}
360  if {$v==100.0 || $v<0.001 || ($v>1.0 && $v<99.0)} {
361    return [format {%5.1f%% %s} $v $of]
362  } elseif {$v<0.1 || $v>99.9} {
363    return [format {%7.3f%% %s} $v $of]
364  } else {
365    return [format {%6.2f%% %s} $v $of]
366  }
367}
368
369proc divide {num denom} {
370  if {$denom==0} {return 0.0}
371  return [format %.2f [expr double($num)/double($denom)]]
372}
373
374# Generate a subreport that covers some subset of the database.
375# the $where clause determines which subset to analyze.
376#
377proc subreport {title where showFrag} {
378  global pageSize file_pgcnt compressOverhead
379
380  # Query the in-memory database for the sum of various statistics
381  # for the subset of tables/indices identified by the WHERE clause in
382  # $where. Note that even if the WHERE clause matches no rows, the
383  # following query returns exactly one row (because it is an aggregate).
384  #
385  # The results of the query are stored directly by SQLite into local
386  # variables (i.e. $nentry, $payload etc.).
387  #
388  mem eval "
389    SELECT
390      int(sum(
391        CASE WHEN (is_without_rowid OR is_index) THEN nentry
392             ELSE leaf_entries
393        END
394      )) AS nentry,
395      int(sum(payload)) AS payload,
396      int(sum(ovfl_payload)) AS ovfl_payload,
397      max(mx_payload) AS mx_payload,
398      int(sum(ovfl_cnt)) as ovfl_cnt,
399      int(sum(leaf_pages)) AS leaf_pages,
400      int(sum(int_pages)) AS int_pages,
401      int(sum(ovfl_pages)) AS ovfl_pages,
402      int(sum(leaf_unused)) AS leaf_unused,
403      int(sum(int_unused)) AS int_unused,
404      int(sum(ovfl_unused)) AS ovfl_unused,
405      int(sum(gap_cnt)) AS gap_cnt,
406      int(sum(compressed_size)) AS compressed_size,
407      int(max(depth)) AS depth,
408      count(*) AS cnt
409    FROM space_used WHERE $where" {} {}
410
411  # Output the sub-report title, nicely decorated with * characters.
412  #
413  puts ""
414  titleline $title
415  puts ""
416
417  # Calculate statistics and store the results in TCL variables, as follows:
418  #
419  # total_pages: Database pages consumed.
420  # total_pages_percent: Pages consumed as a percentage of the file.
421  # storage: Bytes consumed.
422  # payload_percent: Payload bytes used as a percentage of $storage.
423  # total_unused: Unused bytes on pages.
424  # avg_payload: Average payload per btree entry.
425  # avg_fanout: Average fanout for internal pages.
426  # avg_unused: Average unused bytes per btree entry.
427  # avg_meta: Average metadata overhead per entry.
428  # ovfl_cnt_percent: Percentage of btree entries that use overflow pages.
429  #
430  set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
431  set total_pages_percent [percent $total_pages $file_pgcnt]
432  set storage [expr {$total_pages*$pageSize}]
433  set payload_percent [percent $payload $storage {of storage consumed}]
434  set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}]
435  set avg_payload [divide $payload $nentry]
436  set avg_unused [divide $total_unused $nentry]
437  set total_meta [expr {$storage - $payload - $total_unused}]
438  set total_meta [expr {$total_meta + 4*($ovfl_pages - $ovfl_cnt)}]
439  set meta_percent [percent $total_meta $storage {of metadata}]
440  set avg_meta [divide $total_meta $nentry]
441  if {$int_pages>0} {
442    # TODO: Is this formula correct?
443    set nTab [mem eval "
444      SELECT count(*) FROM (
445          SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0
446      )
447    "]
448    set avg_fanout [mem eval "
449      SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used
450          WHERE $where
451    "]
452    set avg_fanout [format %.2f $avg_fanout]
453  }
454  set ovfl_cnt_percent [percent $ovfl_cnt $nentry {of all entries}]
455
456  # Print out the sub-report statistics.
457  #
458  statline {Percentage of total database} $total_pages_percent
459  statline {Number of entries} $nentry
460  statline {Bytes of storage consumed} $storage
461  if {$compressed_size!=$storage} {
462    set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}]
463    set pct [expr {$compressed_size*100.0/$storage}]
464    set pct [format {%5.1f%%} $pct]
465    statline {Bytes used after compression} $compressed_size $pct
466  }
467  statline {Bytes of payload} $payload $payload_percent
468  statline {Bytes of metadata} $total_meta $meta_percent
469  if {$cnt==1} {statline {B-tree depth} $depth}
470  statline {Average payload per entry} $avg_payload
471  statline {Average unused bytes per entry} $avg_unused
472  statline {Average metadata per entry} $avg_meta
473  if {[info exists avg_fanout]} {
474    statline {Average fanout} $avg_fanout
475  }
476  if {$showFrag && $total_pages>1} {
477    set fragmentation [percent $gap_cnt [expr {$total_pages-1}]]
478    statline {Non-sequential pages} $gap_cnt $fragmentation
479  }
480  statline {Maximum payload per entry} $mx_payload
481  statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent
482  if {$int_pages>0} {
483    statline {Index pages used} $int_pages
484  }
485  statline {Primary pages used} $leaf_pages
486  statline {Overflow pages used} $ovfl_pages
487  statline {Total pages used} $total_pages
488  if {$int_unused>0} {
489    set int_unused_percent [
490         percent $int_unused [expr {$int_pages*$pageSize}] {of index space}]
491    statline "Unused bytes on index pages" $int_unused $int_unused_percent
492  }
493  statline "Unused bytes on primary pages" $leaf_unused [
494     percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}]
495  statline "Unused bytes on overflow pages" $ovfl_unused [
496     percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}]
497  statline "Unused bytes on all pages" $total_unused [
498               percent $total_unused $storage {of all space}]
499  return 1
500}
501
502# Calculate the overhead in pages caused by auto-vacuum.
503#
504# This procedure calculates and returns the number of pages used by the
505# auto-vacuum 'pointer-map'. If the database does not support auto-vacuum,
506# then 0 is returned. The two arguments are the size of the database file in
507# pages and the page size used by the database (in bytes).
508proc autovacuum_overhead {filePages pageSize} {
509
510  # Set $autovacuum to non-zero for databases that support auto-vacuum.
511  set autovacuum [db one {PRAGMA auto_vacuum}]
512
513  # If the database is not an auto-vacuum database or the file consists
514  # of one page only then there is no overhead for auto-vacuum. Return zero.
515  if {0==$autovacuum || $filePages==1} {
516    return 0
517  }
518
519  # The number of entries on each pointer map page. The layout of the
520  # database file is one pointer-map page, followed by $ptrsPerPage other
521  # pages, followed by a pointer-map page etc. The first pointer-map page
522  # is the second page of the file overall.
523  set ptrsPerPage [expr double($pageSize/5)]
524
525  # Return the number of pointer map pages in the database.
526  return [expr wide(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))]
527}
528
529
530# Calculate the summary statistics for the database and store the results
531# in TCL variables. They are output below. Variables are as follows:
532#
533# pageSize:      Size of each page in bytes.
534# file_bytes:    File size in bytes.
535# file_pgcnt:    Number of pages in the file.
536# file_pgcnt2:   Number of pages in the file (calculated).
537# av_pgcnt:      Pages consumed by the auto-vacuum pointer-map.
538# av_percent:    Percentage of the file consumed by auto-vacuum pointer-map.
539# inuse_pgcnt:   Data pages in the file.
540# inuse_percent: Percentage of pages used to store data.
541# free_pgcnt:    Free pages calculated as (<total pages> - <in-use pages>)
542# free_pgcnt2:   Free pages in the file according to the file header.
543# free_percent:  Percentage of file consumed by free pages (calculated).
544# free_percent2: Percentage of file consumed by free pages (header).
545# ntable:        Number of tables in the db.
546# nindex:        Number of indices in the db.
547# nautoindex:    Number of indices created automatically.
548# nmanindex:     Number of indices created manually.
549# user_payload:  Number of bytes of payload in table btrees
550#                (not including sqlite_master)
551# user_percent:  $user_payload as a percentage of total file size.
552
553### The following, setting $file_bytes based on the actual size of the file
554### on disk, causes this tool to choke on zipvfs databases. So set it based
555### on the return of [PRAGMA page_count] instead.
556if 0 {
557  set file_bytes  [file size $file_to_analyze]
558  set file_pgcnt  [expr {$file_bytes/$pageSize}]
559}
560set file_pgcnt  [db one {PRAGMA page_count}]
561set file_bytes  [expr {$file_pgcnt * $pageSize}]
562
563set av_pgcnt    [autovacuum_overhead $file_pgcnt $pageSize]
564set av_percent  [percent $av_pgcnt $file_pgcnt]
565
566set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used}
567set inuse_pgcnt   [expr wide([mem eval $sql])]
568set inuse_percent [percent $inuse_pgcnt $file_pgcnt]
569
570set free_pgcnt    [expr {$file_pgcnt-$inuse_pgcnt-$av_pgcnt}]
571set free_percent  [percent $free_pgcnt $file_pgcnt]
572set free_pgcnt2   [db one {PRAGMA freelist_count}]
573set free_percent2 [percent $free_pgcnt2 $file_pgcnt]
574
575set file_pgcnt2 [expr {$inuse_pgcnt+$free_pgcnt2+$av_pgcnt}]
576
577set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}]
578set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}]
579set sql {SELECT count(*) FROM sqlite_master WHERE name LIKE 'sqlite_autoindex%'}
580set nautoindex [db eval $sql]
581set nmanindex [expr {$nindex-$nautoindex}]
582
583# set total_payload [mem eval "SELECT sum(payload) FROM space_used"]
584set user_payload [mem one {SELECT int(sum(payload)) FROM space_used
585     WHERE NOT is_index AND name NOT LIKE 'sqlite_master'}]
586set user_percent [percent $user_payload $file_bytes]
587
588# Output the summary statistics calculated above.
589#
590puts "/** Disk-Space Utilization Report For $root_filename"
591puts ""
592statline {Page size in bytes} $pageSize
593statline {Pages in the whole file (measured)} $file_pgcnt
594statline {Pages in the whole file (calculated)} $file_pgcnt2
595statline {Pages that store data} $inuse_pgcnt $inuse_percent
596statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2
597statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent
598statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent
599statline {Number of tables in the database} $ntable
600statline {Number of indices} $nindex
601statline {Number of defined indices} $nmanindex
602statline {Number of implied indices} $nautoindex
603if {$isCompressed} {
604  statline {Size of uncompressed content in bytes} $file_bytes
605  set efficiency [percent $true_file_size $file_bytes]
606  statline {Size of compressed file on disk} $true_file_size $efficiency
607} else {
608  statline {Size of the file in bytes} $file_bytes
609}
610statline {Bytes of user payload stored} $user_payload $user_percent
611
612# Output table rankings
613#
614puts ""
615titleline "Page counts for all tables with their indices"
616puts ""
617mem eval {SELECT tblname, count(*) AS cnt,
618              int(sum(int_pages+leaf_pages+ovfl_pages)) AS size
619          FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} {
620  statline [string toupper $tblname] $size [percent $size $file_pgcnt]
621}
622puts ""
623titleline "Page counts for all tables and indices separately"
624puts ""
625mem eval {
626  SELECT
627       upper(name) AS nm,
628       int(int_pages+leaf_pages+ovfl_pages) AS size
629    FROM space_used
630   ORDER BY size+0 DESC, name} {} {
631  statline $nm $size [percent $size $file_pgcnt]
632}
633if {$isCompressed} {
634  puts ""
635  titleline "Bytes of disk space used after compression"
636  puts ""
637  set csum 0
638  mem eval {SELECT tblname,
639                  int(sum(compressed_size)) +
640                         $compressOverhead*sum(int_pages+leaf_pages+ovfl_pages)
641                        AS csize
642          FROM space_used GROUP BY tblname ORDER BY csize+0 DESC, tblname} {} {
643    incr csum $csize
644    statline [string toupper $tblname] $csize [percent $csize $true_file_size]
645  }
646  set overhead [expr {$true_file_size - $csum}]
647  if {$overhead>0} {
648    statline {Header and free space} $overhead [percent $overhead $true_file_size]
649  }
650}
651
652# Output subreports
653#
654if {$nindex>0} {
655  subreport {All tables and indices} 1 0
656}
657subreport {All tables} {NOT is_index} 0
658if {$nindex>0} {
659  subreport {All indices} {is_index} 0
660}
661foreach tbl [mem eval {SELECT DISTINCT tblname name FROM space_used
662                       ORDER BY name}] {
663  set qn [quote $tbl]
664  set name [string toupper $tbl]
665  set n [mem eval {SELECT count(*) FROM space_used WHERE tblname=$tbl}]
666  if {$n>1} {
667    set idxlist [mem eval "SELECT name FROM space_used
668                            WHERE tblname='$qn' AND is_index
669                            ORDER BY 1"]
670    subreport "Table $name and all its indices" "tblname='$qn'" 0
671    subreport "Table $name w/o any indices" "name='$qn'" 1
672    if {[llength $idxlist]>1} {
673      subreport "Indices of table $name" "tblname='$qn' AND is_index" 0
674    }
675    foreach idx $idxlist {
676      set qidx [quote $idx]
677      subreport "Index [string toupper $idx] of table $name" "name='$qidx'" 1
678    }
679  } else {
680    subreport "Table $name" "name='$qn'" 1
681  }
682}
683
684# Output instructions on what the numbers above mean.
685#
686puts ""
687titleline Definitions
688puts {
689Page size in bytes
690
691    The number of bytes in a single page of the database file.
692    Usually 1024.
693
694Number of pages in the whole file
695}
696puts "    The number of $pageSize-byte pages that go into forming the complete
697    database"
698puts {
699Pages that store data
700
701    The number of pages that store data, either as primary B*Tree pages or
702    as overflow pages.  The number at the right is the data pages divided by
703    the total number of pages in the file.
704
705Pages on the freelist
706
707    The number of pages that are not currently in use but are reserved for
708    future use.  The percentage at the right is the number of freelist pages
709    divided by the total number of pages in the file.
710
711Pages of auto-vacuum overhead
712
713    The number of pages that store data used by the database to facilitate
714    auto-vacuum. This is zero for databases that do not support auto-vacuum.
715
716Number of tables in the database
717
718    The number of tables in the database, including the SQLITE_MASTER table
719    used to store schema information.
720
721Number of indices
722
723    The total number of indices in the database.
724
725Number of defined indices
726
727    The number of indices created using an explicit CREATE INDEX statement.
728
729Number of implied indices
730
731    The number of indices used to implement PRIMARY KEY or UNIQUE constraints
732    on tables.
733
734Size of the file in bytes
735
736    The total amount of disk space used by the entire database files.
737
738Bytes of user payload stored
739
740    The total number of bytes of user payload stored in the database. The
741    schema information in the SQLITE_MASTER table is not counted when
742    computing this number.  The percentage at the right shows the payload
743    divided by the total file size.
744
745Percentage of total database
746
747    The amount of the complete database file that is devoted to storing
748    information described by this category.
749
750Number of entries
751
752    The total number of B-Tree key/value pairs stored under this category.
753
754Bytes of storage consumed
755
756    The total amount of disk space required to store all B-Tree entries
757    under this category.  The is the total number of pages used times
758    the pages size.
759
760Bytes of payload
761
762    The amount of payload stored under this category.  Payload is the data
763    part of table entries and the key part of index entries.  The percentage
764    at the right is the bytes of payload divided by the bytes of storage
765    consumed.
766
767Bytes of metadata
768
769    The amount of formatting and structural information stored in the
770    table or index.  Metadata includes the btree page header, the cell pointer
771    array, the size field for each cell, the left child pointer or non-leaf
772    cells, the overflow pointers for overflow cells, and the rowid value for
773    rowid table cells.  In other words, metadata is everything that is neither
774    unused space nor content.  The record header in the payload is counted as
775    content, not metadata.
776
777Average payload per entry
778
779    The average amount of payload on each entry.  This is just the bytes of
780    payload divided by the number of entries.
781
782Average unused bytes per entry
783
784    The average amount of free space remaining on all pages under this
785    category on a per-entry basis.  This is the number of unused bytes on
786    all pages divided by the number of entries.
787
788Non-sequential pages
789
790    The number of pages in the table or index that are out of sequence.
791    Many filesystems are optimized for sequential file access so a small
792    number of non-sequential pages might result in faster queries,
793    especially for larger database files that do not fit in the disk cache.
794    Note that after running VACUUM, the root page of each table or index is
795    at the beginning of the database file and all other pages are in a
796    separate part of the database file, resulting in a single non-
797    sequential page.
798
799Maximum payload per entry
800
801    The largest payload size of any entry.
802
803Entries that use overflow
804
805    The number of entries that user one or more overflow pages.
806
807Total pages used
808
809    This is the number of pages used to hold all information in the current
810    category.  This is the sum of index, primary, and overflow pages.
811
812Index pages used
813
814    This is the number of pages in a table B-tree that hold only key (rowid)
815    information and no data.
816
817Primary pages used
818
819    This is the number of B-tree pages that hold both key and data.
820
821Overflow pages used
822
823    The total number of overflow pages used for this category.
824
825Unused bytes on index pages
826
827    The total number of bytes of unused space on all index pages.  The
828    percentage at the right is the number of unused bytes divided by the
829    total number of bytes on index pages.
830
831Unused bytes on primary pages
832
833    The total number of bytes of unused space on all primary pages.  The
834    percentage at the right is the number of unused bytes divided by the
835    total number of bytes on primary pages.
836
837Unused bytes on overflow pages
838
839    The total number of bytes of unused space on all overflow pages.  The
840    percentage at the right is the number of unused bytes divided by the
841    total number of bytes on overflow pages.
842
843Unused bytes on all pages
844
845    The total number of bytes of unused space on all primary and overflow
846    pages.  The percentage at the right is the number of unused bytes
847    divided by the total number of bytes.
848}
849
850# Output a dump of the in-memory database. This can be used for more
851# complex offline analysis.
852#
853titleline {}
854puts "The entire text of this report can be sourced into any SQL database"
855puts "engine for further analysis.  All of the text above is an SQL comment."
856puts "The data used to generate this report follows:"
857puts "*/"
858puts "BEGIN;"
859puts $tabledef
860unset -nocomplain x
861mem eval {SELECT * FROM space_used} x {
862  puts -nonewline "INSERT INTO space_used VALUES"
863  set sep (
864  foreach col $x(*) {
865    set v $x($col)
866    if {$v=="" || ![string is double $v]} {set v '[quote $v]'}
867    puts -nonewline $sep$v
868    set sep ,
869  }
870  puts ");"
871}
872puts "COMMIT;"
873
874} err]} {
875  puts "ERROR: $err"
876  puts $errorInfo
877  exit 1
878}
879