1package bolt
2
3import (
4	"errors"
5	"fmt"
6	"hash/fnv"
7	"log"
8	"os"
9	"runtime"
10	"runtime/debug"
11	"strings"
12	"sync"
13	"time"
14	"unsafe"
15)
16
17// The largest step that can be taken when remapping the mmap.
18const maxMmapStep = 1 << 30 // 1GB
19
20// The data file format version.
21const version = 2
22
23// Represents a marker value to indicate that a file is a Bolt DB.
24const magic uint32 = 0xED0CDAED
25
26// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
27// syncing changes to a file.  This is required as some operating systems,
28// such as OpenBSD, do not have a unified buffer cache (UBC) and writes
29// must be synchronized using the msync(2) syscall.
30const IgnoreNoSync = runtime.GOOS == "openbsd"
31
32// Default values if not set in a DB instance.
33const (
34	DefaultMaxBatchSize  int = 1000
35	DefaultMaxBatchDelay     = 10 * time.Millisecond
36	DefaultAllocSize         = 16 * 1024 * 1024
37)
38
39// default page size for db is set to the OS page size.
40var defaultPageSize = os.Getpagesize()
41
42// DB represents a collection of buckets persisted to a file on disk.
43// All data access is performed through transactions which can be obtained through the DB.
44// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
45type DB struct {
46	// When enabled, the database will perform a Check() after every commit.
47	// A panic is issued if the database is in an inconsistent state. This
48	// flag has a large performance impact so it should only be used for
49	// debugging purposes.
50	StrictMode bool
51
52	// Setting the NoSync flag will cause the database to skip fsync()
53	// calls after each commit. This can be useful when bulk loading data
54	// into a database and you can restart the bulk load in the event of
55	// a system failure or database corruption. Do not set this flag for
56	// normal use.
57	//
58	// If the package global IgnoreNoSync constant is true, this value is
59	// ignored.  See the comment on that constant for more details.
60	//
61	// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
62	NoSync bool
63
64	// When true, skips the truncate call when growing the database.
65	// Setting this to true is only safe on non-ext3/ext4 systems.
66	// Skipping truncation avoids preallocation of hard drive space and
67	// bypasses a truncate() and fsync() syscall on remapping.
68	//
69	// https://github.com/boltdb/bolt/issues/284
70	NoGrowSync bool
71
72	// If you want to read the entire database fast, you can set MmapFlag to
73	// syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
74	MmapFlags int
75
76	// MaxBatchSize is the maximum size of a batch. Default value is
77	// copied from DefaultMaxBatchSize in Open.
78	//
79	// If <=0, disables batching.
80	//
81	// Do not change concurrently with calls to Batch.
82	MaxBatchSize int
83
84	// MaxBatchDelay is the maximum delay before a batch starts.
85	// Default value is copied from DefaultMaxBatchDelay in Open.
86	//
87	// If <=0, effectively disables batching.
88	//
89	// Do not change concurrently with calls to Batch.
90	MaxBatchDelay time.Duration
91
92	// AllocSize is the amount of space allocated when the database
93	// needs to create new pages. This is done to amortize the cost
94	// of truncate() and fsync() when growing the data file.
95	AllocSize int
96
97	path     string
98	file     *os.File
99	lockfile *os.File // windows only
100	dataref  []byte   // mmap'ed readonly, write throws SEGV
101	data     *[maxMapSize]byte
102	datasz   int
103	filesz   int // current on disk file size
104	meta0    *meta
105	meta1    *meta
106	pageSize int
107	opened   bool
108	rwtx     *Tx
109	txs      []*Tx
110	freelist *freelist
111	stats    Stats
112
113	pagePool sync.Pool
114
115	batchMu sync.Mutex
116	batch   *batch
117
118	rwlock   sync.Mutex   // Allows only one writer at a time.
119	metalock sync.Mutex   // Protects meta page access.
120	mmaplock sync.RWMutex // Protects mmap access during remapping.
121	statlock sync.RWMutex // Protects stats access.
122
123	ops struct {
124		writeAt func(b []byte, off int64) (n int, err error)
125	}
126
127	// Read only mode.
128	// When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
129	readOnly bool
130}
131
132// Path returns the path to currently open database file.
133func (db *DB) Path() string {
134	return db.path
135}
136
137// GoString returns the Go string representation of the database.
138func (db *DB) GoString() string {
139	return fmt.Sprintf("bolt.DB{path:%q}", db.path)
140}
141
142// String returns the string representation of the database.
143func (db *DB) String() string {
144	return fmt.Sprintf("DB<%q>", db.path)
145}
146
147// Open creates and opens a database at the given path.
148// If the file does not exist then it will be created automatically.
149// Passing in nil options will cause Bolt to open the database with the default options.
150func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
151	var db = &DB{opened: true}
152
153	// Set default options if no options are provided.
154	if options == nil {
155		options = DefaultOptions
156	}
157	db.NoGrowSync = options.NoGrowSync
158	db.MmapFlags = options.MmapFlags
159
160	// Set default values for later DB operations.
161	db.MaxBatchSize = DefaultMaxBatchSize
162	db.MaxBatchDelay = DefaultMaxBatchDelay
163	db.AllocSize = DefaultAllocSize
164
165	flag := os.O_RDWR
166	if options.ReadOnly {
167		flag = os.O_RDONLY
168		db.readOnly = true
169	}
170
171	// Open data file and separate sync handler for metadata writes.
172	db.path = path
173	var err error
174	if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
175		_ = db.close()
176		return nil, err
177	}
178
179	// Lock file so that other processes using Bolt in read-write mode cannot
180	// use the database  at the same time. This would cause corruption since
181	// the two processes would write meta pages and free pages separately.
182	// The database file is locked exclusively (only one process can grab the lock)
183	// if !options.ReadOnly.
184	// The database file is locked using the shared lock (more than one process may
185	// hold a lock at the same time) otherwise (options.ReadOnly is set).
186	if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil {
187		_ = db.close()
188		return nil, err
189	}
190
191	// Default values for test hooks
192	db.ops.writeAt = db.file.WriteAt
193
194	// Initialize the database if it doesn't exist.
195	if info, err := db.file.Stat(); err != nil {
196		return nil, err
197	} else if info.Size() == 0 {
198		// Initialize new files with meta pages.
199		if err := db.init(); err != nil {
200			return nil, err
201		}
202	} else {
203		// Read the first meta page to determine the page size.
204		var buf [0x1000]byte
205		if _, err := db.file.ReadAt(buf[:], 0); err == nil {
206			m := db.pageInBuffer(buf[:], 0).meta()
207			if err := m.validate(); err != nil {
208				// If we can't read the page size, we can assume it's the same
209				// as the OS -- since that's how the page size was chosen in the
210				// first place.
211				//
212				// If the first page is invalid and this OS uses a different
213				// page size than what the database was created with then we
214				// are out of luck and cannot access the database.
215				db.pageSize = os.Getpagesize()
216			} else {
217				db.pageSize = int(m.pageSize)
218			}
219		}
220	}
221
222	// Initialize page pool.
223	db.pagePool = sync.Pool{
224		New: func() interface{} {
225			return make([]byte, db.pageSize)
226		},
227	}
228
229	// Memory map the data file.
230	if err := db.mmap(options.InitialMmapSize); err != nil {
231		_ = db.close()
232		return nil, err
233	}
234
235	// Read in the freelist.
236	db.freelist = newFreelist()
237	db.freelist.read(db.page(db.meta().freelist))
238
239	// Mark the database as opened and return.
240	return db, nil
241}
242
243// mmap opens the underlying memory-mapped file and initializes the meta references.
244// minsz is the minimum size that the new mmap can be.
245func (db *DB) mmap(minsz int) error {
246	db.mmaplock.Lock()
247	defer db.mmaplock.Unlock()
248
249	info, err := db.file.Stat()
250	if err != nil {
251		return fmt.Errorf("mmap stat error: %s", err)
252	} else if int(info.Size()) < db.pageSize*2 {
253		return fmt.Errorf("file size too small")
254	}
255
256	// Ensure the size is at least the minimum size.
257	var size = int(info.Size())
258	if size < minsz {
259		size = minsz
260	}
261	size, err = db.mmapSize(size)
262	if err != nil {
263		return err
264	}
265
266	// Dereference all mmap references before unmapping.
267	if db.rwtx != nil {
268		db.rwtx.root.dereference()
269	}
270
271	// Unmap existing data before continuing.
272	if err := db.munmap(); err != nil {
273		return err
274	}
275
276	// Memory-map the data file as a byte slice.
277	if err := mmap(db, size); err != nil {
278		return err
279	}
280
281	// Save references to the meta pages.
282	db.meta0 = db.page(0).meta()
283	db.meta1 = db.page(1).meta()
284
285	// Validate the meta pages. We only return an error if both meta pages fail
286	// validation, since meta0 failing validation means that it wasn't saved
287	// properly -- but we can recover using meta1. And vice-versa.
288	err0 := db.meta0.validate()
289	err1 := db.meta1.validate()
290	if err0 != nil && err1 != nil {
291		return err0
292	}
293
294	return nil
295}
296
297// munmap unmaps the data file from memory.
298func (db *DB) munmap() error {
299	if err := munmap(db); err != nil {
300		return fmt.Errorf("unmap error: " + err.Error())
301	}
302	return nil
303}
304
305// mmapSize determines the appropriate size for the mmap given the current size
306// of the database. The minimum size is 32KB and doubles until it reaches 1GB.
307// Returns an error if the new mmap size is greater than the max allowed.
308func (db *DB) mmapSize(size int) (int, error) {
309	// Double the size from 32KB until 1GB.
310	for i := uint(15); i <= 30; i++ {
311		if size <= 1<<i {
312			return 1 << i, nil
313		}
314	}
315
316	// Verify the requested size is not above the maximum allowed.
317	if size > maxMapSize {
318		return 0, fmt.Errorf("mmap too large")
319	}
320
321	// If larger than 1GB then grow by 1GB at a time.
322	sz := int64(size)
323	if remainder := sz % int64(maxMmapStep); remainder > 0 {
324		sz += int64(maxMmapStep) - remainder
325	}
326
327	// Ensure that the mmap size is a multiple of the page size.
328	// This should always be true since we're incrementing in MBs.
329	pageSize := int64(db.pageSize)
330	if (sz % pageSize) != 0 {
331		sz = ((sz / pageSize) + 1) * pageSize
332	}
333
334	// If we've exceeded the max size then only grow up to the max size.
335	if sz > maxMapSize {
336		sz = maxMapSize
337	}
338
339	return int(sz), nil
340}
341
342// init creates a new database file and initializes its meta pages.
343func (db *DB) init() error {
344	// Set the page size to the OS page size.
345	db.pageSize = os.Getpagesize()
346
347	// Create two meta pages on a buffer.
348	buf := make([]byte, db.pageSize*4)
349	for i := 0; i < 2; i++ {
350		p := db.pageInBuffer(buf[:], pgid(i))
351		p.id = pgid(i)
352		p.flags = metaPageFlag
353
354		// Initialize the meta page.
355		m := p.meta()
356		m.magic = magic
357		m.version = version
358		m.pageSize = uint32(db.pageSize)
359		m.freelist = 2
360		m.root = bucket{root: 3}
361		m.pgid = 4
362		m.txid = txid(i)
363		m.checksum = m.sum64()
364	}
365
366	// Write an empty freelist at page 3.
367	p := db.pageInBuffer(buf[:], pgid(2))
368	p.id = pgid(2)
369	p.flags = freelistPageFlag
370	p.count = 0
371
372	// Write an empty leaf page at page 4.
373	p = db.pageInBuffer(buf[:], pgid(3))
374	p.id = pgid(3)
375	p.flags = leafPageFlag
376	p.count = 0
377
378	// Write the buffer to our data file.
379	if _, err := db.ops.writeAt(buf, 0); err != nil {
380		return err
381	}
382	if err := fdatasync(db); err != nil {
383		return err
384	}
385
386	return nil
387}
388
389// Close releases all database resources.
390// All transactions must be closed before closing the database.
391func (db *DB) Close() error {
392	db.rwlock.Lock()
393	defer db.rwlock.Unlock()
394
395	db.metalock.Lock()
396	defer db.metalock.Unlock()
397
398	db.mmaplock.RLock()
399	defer db.mmaplock.RUnlock()
400
401	return db.close()
402}
403
404func (db *DB) close() error {
405	if !db.opened {
406		return nil
407	}
408
409	db.opened = false
410
411	db.freelist = nil
412
413	// Clear ops.
414	db.ops.writeAt = nil
415
416	// Close the mmap.
417	if err := db.munmap(); err != nil {
418		return err
419	}
420
421	// Close file handles.
422	if db.file != nil {
423		// No need to unlock read-only file.
424		if !db.readOnly {
425			// Unlock the file.
426			if err := funlock(db); err != nil {
427				log.Printf("bolt.Close(): funlock error: %s", err)
428			}
429		}
430
431		// Close the file descriptor.
432		if err := db.file.Close(); err != nil {
433			return fmt.Errorf("db file close: %s", err)
434		}
435		db.file = nil
436	}
437
438	db.path = ""
439	return nil
440}
441
442// Begin starts a new transaction.
443// Multiple read-only transactions can be used concurrently but only one
444// write transaction can be used at a time. Starting multiple write transactions
445// will cause the calls to block and be serialized until the current write
446// transaction finishes.
447//
448// Transactions should not be dependent on one another. Opening a read
449// transaction and a write transaction in the same goroutine can cause the
450// writer to deadlock because the database periodically needs to re-mmap itself
451// as it grows and it cannot do that while a read transaction is open.
452//
453// If a long running read transaction (for example, a snapshot transaction) is
454// needed, you might want to set DB.InitialMmapSize to a large enough value
455// to avoid potential blocking of write transaction.
456//
457// IMPORTANT: You must close read-only transactions after you are finished or
458// else the database will not reclaim old pages.
459func (db *DB) Begin(writable bool) (*Tx, error) {
460	if writable {
461		return db.beginRWTx()
462	}
463	return db.beginTx()
464}
465
466func (db *DB) beginTx() (*Tx, error) {
467	// Lock the meta pages while we initialize the transaction. We obtain
468	// the meta lock before the mmap lock because that's the order that the
469	// write transaction will obtain them.
470	db.metalock.Lock()
471
472	// Obtain a read-only lock on the mmap. When the mmap is remapped it will
473	// obtain a write lock so all transactions must finish before it can be
474	// remapped.
475	db.mmaplock.RLock()
476
477	// Exit if the database is not open yet.
478	if !db.opened {
479		db.mmaplock.RUnlock()
480		db.metalock.Unlock()
481		return nil, ErrDatabaseNotOpen
482	}
483
484	// Create a transaction associated with the database.
485	t := &Tx{}
486	t.init(db)
487
488	// Keep track of transaction until it closes.
489	db.txs = append(db.txs, t)
490	n := len(db.txs)
491
492	// Unlock the meta pages.
493	db.metalock.Unlock()
494
495	// Update the transaction stats.
496	db.statlock.Lock()
497	db.stats.TxN++
498	db.stats.OpenTxN = n
499	db.statlock.Unlock()
500
501	return t, nil
502}
503
504func (db *DB) beginRWTx() (*Tx, error) {
505	// If the database was opened with Options.ReadOnly, return an error.
506	if db.readOnly {
507		return nil, ErrDatabaseReadOnly
508	}
509
510	// Obtain writer lock. This is released by the transaction when it closes.
511	// This enforces only one writer transaction at a time.
512	db.rwlock.Lock()
513
514	// Once we have the writer lock then we can lock the meta pages so that
515	// we can set up the transaction.
516	db.metalock.Lock()
517	defer db.metalock.Unlock()
518
519	// Exit if the database is not open yet.
520	if !db.opened {
521		db.rwlock.Unlock()
522		return nil, ErrDatabaseNotOpen
523	}
524
525	// Create a transaction associated with the database.
526	t := &Tx{writable: true}
527	t.init(db)
528	db.rwtx = t
529
530	// Free any pages associated with closed read-only transactions.
531	var minid txid = 0xFFFFFFFFFFFFFFFF
532	for _, t := range db.txs {
533		if t.meta.txid < minid {
534			minid = t.meta.txid
535		}
536	}
537	if minid > 0 {
538		db.freelist.release(minid - 1)
539	}
540
541	return t, nil
542}
543
544// removeTx removes a transaction from the database.
545func (db *DB) removeTx(tx *Tx) {
546	// Release the read lock on the mmap.
547	db.mmaplock.RUnlock()
548
549	// Use the meta lock to restrict access to the DB object.
550	db.metalock.Lock()
551
552	// Remove the transaction.
553	for i, t := range db.txs {
554		if t == tx {
555			last := len(db.txs) - 1
556			db.txs[i] = db.txs[last]
557			db.txs[last] = nil
558			db.txs = db.txs[:last]
559			break
560		}
561	}
562	n := len(db.txs)
563
564	// Unlock the meta pages.
565	db.metalock.Unlock()
566
567	// Merge statistics.
568	db.statlock.Lock()
569	db.stats.OpenTxN = n
570	db.stats.TxStats.add(&tx.stats)
571	db.statlock.Unlock()
572}
573
574// Update executes a function within the context of a read-write managed transaction.
575// If no error is returned from the function then the transaction is committed.
576// If an error is returned then the entire transaction is rolled back.
577// Any error that is returned from the function or returned from the commit is
578// returned from the Update() method.
579//
580// Attempting to manually commit or rollback within the function will cause a panic.
581func (db *DB) Update(fn func(*Tx) error) error {
582	t, err := db.Begin(true)
583	if err != nil {
584		return err
585	}
586
587	// Make sure the transaction rolls back in the event of a panic.
588	defer func() {
589		if t.db != nil {
590			t.rollback()
591		}
592	}()
593
594	// Mark as a managed tx so that the inner function cannot manually commit.
595	t.managed = true
596
597	// If an error is returned from the function then rollback and return error.
598	err = fn(t)
599	t.managed = false
600	if err != nil {
601		_ = t.Rollback()
602		return err
603	}
604
605	return t.Commit()
606}
607
608// View executes a function within the context of a managed read-only transaction.
609// Any error that is returned from the function is returned from the View() method.
610//
611// Attempting to manually rollback within the function will cause a panic.
612func (db *DB) View(fn func(*Tx) error) error {
613	t, err := db.Begin(false)
614	if err != nil {
615		return err
616	}
617
618	// Make sure the transaction rolls back in the event of a panic.
619	defer func() {
620		if t.db != nil {
621			t.rollback()
622		}
623	}()
624
625	// Mark as a managed tx so that the inner function cannot manually rollback.
626	t.managed = true
627
628	// If an error is returned from the function then pass it through.
629	err = fn(t)
630	t.managed = false
631	if err != nil {
632		_ = t.Rollback()
633		return err
634	}
635
636	if err := t.Rollback(); err != nil {
637		return err
638	}
639
640	return nil
641}
642
643// Batch calls fn as part of a batch. It behaves similar to Update,
644// except:
645//
646// 1. concurrent Batch calls can be combined into a single Bolt
647// transaction.
648//
649// 2. the function passed to Batch may be called multiple times,
650// regardless of whether it returns error or not.
651//
652// This means that Batch function side effects must be idempotent and
653// take permanent effect only after a successful return is seen in
654// caller.
655//
656// The maximum batch size and delay can be adjusted with DB.MaxBatchSize
657// and DB.MaxBatchDelay, respectively.
658//
659// Batch is only useful when there are multiple goroutines calling it.
660func (db *DB) Batch(fn func(*Tx) error) error {
661	errCh := make(chan error, 1)
662
663	db.batchMu.Lock()
664	if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
665		// There is no existing batch, or the existing batch is full; start a new one.
666		db.batch = &batch{
667			db: db,
668		}
669		db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger)
670	}
671	db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
672	if len(db.batch.calls) >= db.MaxBatchSize {
673		// wake up batch, it's ready to run
674		go db.batch.trigger()
675	}
676	db.batchMu.Unlock()
677
678	err := <-errCh
679	if err == trySolo {
680		err = db.Update(fn)
681	}
682	return err
683}
684
685type call struct {
686	fn  func(*Tx) error
687	err chan<- error
688}
689
690type batch struct {
691	db    *DB
692	timer *time.Timer
693	start sync.Once
694	calls []call
695}
696
697// trigger runs the batch if it hasn't already been run.
698func (b *batch) trigger() {
699	b.start.Do(b.run)
700}
701
702// run performs the transactions in the batch and communicates results
703// back to DB.Batch.
704func (b *batch) run() {
705	b.db.batchMu.Lock()
706	b.timer.Stop()
707	// Make sure no new work is added to this batch, but don't break
708	// other batches.
709	if b.db.batch == b {
710		b.db.batch = nil
711	}
712	b.db.batchMu.Unlock()
713
714retry:
715	for len(b.calls) > 0 {
716		var failIdx = -1
717		err := b.db.Update(func(tx *Tx) error {
718			for i, c := range b.calls {
719				if err := safelyCall(c.fn, tx); err != nil {
720					failIdx = i
721					return err
722				}
723			}
724			return nil
725		})
726
727		if failIdx >= 0 {
728			// take the failing transaction out of the batch. it's
729			// safe to shorten b.calls here because db.batch no longer
730			// points to us, and we hold the mutex anyway.
731			c := b.calls[failIdx]
732			b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1]
733			// tell the submitter re-run it solo, continue with the rest of the batch
734			c.err <- trySolo
735			continue retry
736		}
737
738		// pass success, or bolt internal errors, to all callers
739		for _, c := range b.calls {
740			if c.err != nil {
741				c.err <- err
742			}
743		}
744		break retry
745	}
746}
747
748// trySolo is a special sentinel error value used for signaling that a
749// transaction function should be re-run. It should never be seen by
750// callers.
751var trySolo = errors.New("batch function returned an error and should be re-run solo")
752
753type panicked struct {
754	reason interface{}
755}
756
757func (p panicked) Error() string {
758	if err, ok := p.reason.(error); ok {
759		return err.Error()
760	}
761	return fmt.Sprintf("panic: %v", p.reason)
762}
763
764func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
765	defer func() {
766		if p := recover(); p != nil {
767			err = panicked{p}
768		}
769	}()
770	return fn(tx)
771}
772
773// Sync executes fdatasync() against the database file handle.
774//
775// This is not necessary under normal operation, however, if you use NoSync
776// then it allows you to force the database file to sync against the disk.
777func (db *DB) Sync() error { return fdatasync(db) }
778
779// Stats retrieves ongoing performance stats for the database.
780// This is only updated when a transaction closes.
781func (db *DB) Stats() Stats {
782	db.statlock.RLock()
783	defer db.statlock.RUnlock()
784	return db.stats
785}
786
787// This is for internal access to the raw data bytes from the C cursor, use
788// carefully, or not at all.
789func (db *DB) Info() *Info {
790	return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize}
791}
792
793// page retrieves a page reference from the mmap based on the current page size.
794func (db *DB) page(id pgid) *page {
795	pos := id * pgid(db.pageSize)
796	return (*page)(unsafe.Pointer(&db.data[pos]))
797}
798
799// pageInBuffer retrieves a page reference from a given byte array based on the current page size.
800func (db *DB) pageInBuffer(b []byte, id pgid) *page {
801	return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)]))
802}
803
804// meta retrieves the current meta page reference.
805func (db *DB) meta() *meta {
806	// We have to return the meta with the highest txid which doesn't fail
807	// validation. Otherwise, we can cause errors when in fact the database is
808	// in a consistent state. metaA is the one with the higher txid.
809	metaA := db.meta0
810	metaB := db.meta1
811	if db.meta1.txid > db.meta0.txid {
812		metaA = db.meta1
813		metaB = db.meta0
814	}
815
816	// Use higher meta page if valid. Otherwise fallback to previous, if valid.
817	if err := metaA.validate(); err == nil {
818		return metaA
819	} else if err := metaB.validate(); err == nil {
820		return metaB
821	}
822
823	// This should never be reached, because both meta1 and meta0 were validated
824	// on mmap() and we do fsync() on every write.
825	panic("bolt.DB.meta(): invalid meta pages")
826}
827
828// allocate returns a contiguous block of memory starting at a given page.
829func (db *DB) allocate(count int) (*page, error) {
830	// Allocate a temporary buffer for the page.
831	var buf []byte
832	if count == 1 {
833		buf = db.pagePool.Get().([]byte)
834	} else {
835		buf = make([]byte, count*db.pageSize)
836	}
837	p := (*page)(unsafe.Pointer(&buf[0]))
838	p.overflow = uint32(count - 1)
839
840	// Use pages from the freelist if they are available.
841	if p.id = db.freelist.allocate(count); p.id != 0 {
842		return p, nil
843	}
844
845	// Resize mmap() if we're at the end.
846	p.id = db.rwtx.meta.pgid
847	var minsz = int((p.id+pgid(count))+1) * db.pageSize
848	if minsz >= db.datasz {
849		if err := db.mmap(minsz); err != nil {
850			return nil, fmt.Errorf("mmap allocate error: %s", err)
851		}
852	}
853
854	// Move the page id high water mark.
855	db.rwtx.meta.pgid += pgid(count)
856
857	return p, nil
858}
859
860// grow grows the size of the database to the given sz.
861func (db *DB) grow(sz int) error {
862	// Ignore if the new size is less than available file size.
863	if sz <= db.filesz {
864		return nil
865	}
866
867	// If the data is smaller than the alloc size then only allocate what's needed.
868	// Once it goes over the allocation size then allocate in chunks.
869	if db.datasz < db.AllocSize {
870		sz = db.datasz
871	} else {
872		sz += db.AllocSize
873	}
874
875	// Truncate and fsync to ensure file size metadata is flushed.
876	// https://github.com/boltdb/bolt/issues/284
877	if !db.NoGrowSync && !db.readOnly {
878		if runtime.GOOS != "windows" {
879			if err := db.file.Truncate(int64(sz)); err != nil {
880				return fmt.Errorf("file resize error: %s", err)
881			}
882		}
883		if err := db.file.Sync(); err != nil {
884			return fmt.Errorf("file sync error: %s", err)
885		}
886	}
887
888	db.filesz = sz
889	return nil
890}
891
892func (db *DB) IsReadOnly() bool {
893	return db.readOnly
894}
895
896// Options represents the options that can be set when opening a database.
897type Options struct {
898	// Timeout is the amount of time to wait to obtain a file lock.
899	// When set to zero it will wait indefinitely. This option is only
900	// available on Darwin and Linux.
901	Timeout time.Duration
902
903	// Sets the DB.NoGrowSync flag before memory mapping the file.
904	NoGrowSync bool
905
906	// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
907	// grab a shared lock (UNIX).
908	ReadOnly bool
909
910	// Sets the DB.MmapFlags flag before memory mapping the file.
911	MmapFlags int
912
913	// InitialMmapSize is the initial mmap size of the database
914	// in bytes. Read transactions won't block write transaction
915	// if the InitialMmapSize is large enough to hold database mmap
916	// size. (See DB.Begin for more information)
917	//
918	// If <=0, the initial map size is 0.
919	// If initialMmapSize is smaller than the previous database size,
920	// it takes no effect.
921	InitialMmapSize int
922}
923
924// DefaultOptions represent the options used if nil options are passed into Open().
925// No timeout is used which will cause Bolt to wait indefinitely for a lock.
926var DefaultOptions = &Options{
927	Timeout:    0,
928	NoGrowSync: false,
929}
930
931// Stats represents statistics about the database.
932type Stats struct {
933	// Freelist stats
934	FreePageN     int // total number of free pages on the freelist
935	PendingPageN  int // total number of pending pages on the freelist
936	FreeAlloc     int // total bytes allocated in free pages
937	FreelistInuse int // total bytes used by the freelist
938
939	// Transaction stats
940	TxN     int // total number of started read transactions
941	OpenTxN int // number of currently open read transactions
942
943	TxStats TxStats // global, ongoing stats.
944}
945
946// Sub calculates and returns the difference between two sets of database stats.
947// This is useful when obtaining stats at two different points and time and
948// you need the performance counters that occurred within that time span.
949func (s *Stats) Sub(other *Stats) Stats {
950	if other == nil {
951		return *s
952	}
953	var diff Stats
954	diff.FreePageN = s.FreePageN
955	diff.PendingPageN = s.PendingPageN
956	diff.FreeAlloc = s.FreeAlloc
957	diff.FreelistInuse = s.FreelistInuse
958	diff.TxN = s.TxN - other.TxN
959	diff.TxStats = s.TxStats.Sub(&other.TxStats)
960	return diff
961}
962
963func (s *Stats) add(other *Stats) {
964	s.TxStats.add(&other.TxStats)
965}
966
967type Info struct {
968	Data     uintptr
969	PageSize int
970}
971
972type meta struct {
973	magic    uint32
974	version  uint32
975	pageSize uint32
976	flags    uint32
977	root     bucket
978	freelist pgid
979	pgid     pgid
980	txid     txid
981	checksum uint64
982}
983
984// validate checks the marker bytes and version of the meta page to ensure it matches this binary.
985func (m *meta) validate() error {
986	if m.magic != magic {
987		return ErrInvalid
988	} else if m.version != version {
989		return ErrVersionMismatch
990	} else if m.checksum != 0 && m.checksum != m.sum64() {
991		return ErrChecksum
992	}
993	return nil
994}
995
996// copy copies one meta object to another.
997func (m *meta) copy(dest *meta) {
998	*dest = *m
999}
1000
1001// write writes the meta onto a page.
1002func (m *meta) write(p *page) {
1003	if m.root.root >= m.pgid {
1004		panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
1005	} else if m.freelist >= m.pgid {
1006		panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
1007	}
1008
1009	// Page id is either going to be 0 or 1 which we can determine by the transaction ID.
1010	p.id = pgid(m.txid % 2)
1011	p.flags |= metaPageFlag
1012
1013	// Calculate the checksum.
1014	m.checksum = m.sum64()
1015
1016	m.copy(p.meta())
1017}
1018
1019// generates the checksum for the meta.
1020func (m *meta) sum64() uint64 {
1021	var h = fnv.New64a()
1022	_, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
1023	return h.Sum64()
1024}
1025
1026// _assert will panic with a given formatted message if the given condition is false.
1027func _assert(condition bool, msg string, v ...interface{}) {
1028	if !condition {
1029		panic(fmt.Sprintf("assertion failed: "+msg, v...))
1030	}
1031}
1032
1033func warn(v ...interface{})              { fmt.Fprintln(os.Stderr, v...) }
1034func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) }
1035
1036func printstack() {
1037	stack := strings.Join(strings.Split(string(debug.Stack()), "\n")[2:], "\n")
1038	fmt.Fprintln(os.Stderr, stack)
1039}
1040