1package bolt 2 3import ( 4 "errors" 5 "fmt" 6 "hash/fnv" 7 "log" 8 "os" 9 "runtime" 10 "runtime/debug" 11 "strings" 12 "sync" 13 "time" 14 "unsafe" 15) 16 17// The largest step that can be taken when remapping the mmap. 18const maxMmapStep = 1 << 30 // 1GB 19 20// The data file format version. 21const version = 2 22 23// Represents a marker value to indicate that a file is a Bolt DB. 24const magic uint32 = 0xED0CDAED 25 26// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when 27// syncing changes to a file. This is required as some operating systems, 28// such as OpenBSD, do not have a unified buffer cache (UBC) and writes 29// must be synchronized using the msync(2) syscall. 30const IgnoreNoSync = runtime.GOOS == "openbsd" 31 32// Default values if not set in a DB instance. 33const ( 34 DefaultMaxBatchSize int = 1000 35 DefaultMaxBatchDelay = 10 * time.Millisecond 36 DefaultAllocSize = 16 * 1024 * 1024 37) 38 39// default page size for db is set to the OS page size. 40var defaultPageSize = os.Getpagesize() 41 42// DB represents a collection of buckets persisted to a file on disk. 43// All data access is performed through transactions which can be obtained through the DB. 44// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called. 45type DB struct { 46 // When enabled, the database will perform a Check() after every commit. 47 // A panic is issued if the database is in an inconsistent state. This 48 // flag has a large performance impact so it should only be used for 49 // debugging purposes. 50 StrictMode bool 51 52 // Setting the NoSync flag will cause the database to skip fsync() 53 // calls after each commit. This can be useful when bulk loading data 54 // into a database and you can restart the bulk load in the event of 55 // a system failure or database corruption. Do not set this flag for 56 // normal use. 57 // 58 // If the package global IgnoreNoSync constant is true, this value is 59 // ignored. See the comment on that constant for more details. 60 // 61 // THIS IS UNSAFE. PLEASE USE WITH CAUTION. 62 NoSync bool 63 64 // When true, skips the truncate call when growing the database. 65 // Setting this to true is only safe on non-ext3/ext4 systems. 66 // Skipping truncation avoids preallocation of hard drive space and 67 // bypasses a truncate() and fsync() syscall on remapping. 68 // 69 // https://github.com/boltdb/bolt/issues/284 70 NoGrowSync bool 71 72 // If you want to read the entire database fast, you can set MmapFlag to 73 // syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead. 74 MmapFlags int 75 76 // MaxBatchSize is the maximum size of a batch. Default value is 77 // copied from DefaultMaxBatchSize in Open. 78 // 79 // If <=0, disables batching. 80 // 81 // Do not change concurrently with calls to Batch. 82 MaxBatchSize int 83 84 // MaxBatchDelay is the maximum delay before a batch starts. 85 // Default value is copied from DefaultMaxBatchDelay in Open. 86 // 87 // If <=0, effectively disables batching. 88 // 89 // Do not change concurrently with calls to Batch. 90 MaxBatchDelay time.Duration 91 92 // AllocSize is the amount of space allocated when the database 93 // needs to create new pages. This is done to amortize the cost 94 // of truncate() and fsync() when growing the data file. 95 AllocSize int 96 97 path string 98 file *os.File 99 lockfile *os.File // windows only 100 dataref []byte // mmap'ed readonly, write throws SEGV 101 data *[maxMapSize]byte 102 datasz int 103 filesz int // current on disk file size 104 meta0 *meta 105 meta1 *meta 106 pageSize int 107 opened bool 108 rwtx *Tx 109 txs []*Tx 110 freelist *freelist 111 stats Stats 112 113 pagePool sync.Pool 114 115 batchMu sync.Mutex 116 batch *batch 117 118 rwlock sync.Mutex // Allows only one writer at a time. 119 metalock sync.Mutex // Protects meta page access. 120 mmaplock sync.RWMutex // Protects mmap access during remapping. 121 statlock sync.RWMutex // Protects stats access. 122 123 ops struct { 124 writeAt func(b []byte, off int64) (n int, err error) 125 } 126 127 // Read only mode. 128 // When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately. 129 readOnly bool 130} 131 132// Path returns the path to currently open database file. 133func (db *DB) Path() string { 134 return db.path 135} 136 137// GoString returns the Go string representation of the database. 138func (db *DB) GoString() string { 139 return fmt.Sprintf("bolt.DB{path:%q}", db.path) 140} 141 142// String returns the string representation of the database. 143func (db *DB) String() string { 144 return fmt.Sprintf("DB<%q>", db.path) 145} 146 147// Open creates and opens a database at the given path. 148// If the file does not exist then it will be created automatically. 149// Passing in nil options will cause Bolt to open the database with the default options. 150func Open(path string, mode os.FileMode, options *Options) (*DB, error) { 151 var db = &DB{opened: true} 152 153 // Set default options if no options are provided. 154 if options == nil { 155 options = DefaultOptions 156 } 157 db.NoGrowSync = options.NoGrowSync 158 db.MmapFlags = options.MmapFlags 159 160 // Set default values for later DB operations. 161 db.MaxBatchSize = DefaultMaxBatchSize 162 db.MaxBatchDelay = DefaultMaxBatchDelay 163 db.AllocSize = DefaultAllocSize 164 165 flag := os.O_RDWR 166 if options.ReadOnly { 167 flag = os.O_RDONLY 168 db.readOnly = true 169 } 170 171 // Open data file and separate sync handler for metadata writes. 172 db.path = path 173 var err error 174 if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil { 175 _ = db.close() 176 return nil, err 177 } 178 179 // Lock file so that other processes using Bolt in read-write mode cannot 180 // use the database at the same time. This would cause corruption since 181 // the two processes would write meta pages and free pages separately. 182 // The database file is locked exclusively (only one process can grab the lock) 183 // if !options.ReadOnly. 184 // The database file is locked using the shared lock (more than one process may 185 // hold a lock at the same time) otherwise (options.ReadOnly is set). 186 if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil { 187 _ = db.close() 188 return nil, err 189 } 190 191 // Default values for test hooks 192 db.ops.writeAt = db.file.WriteAt 193 194 // Initialize the database if it doesn't exist. 195 if info, err := db.file.Stat(); err != nil { 196 return nil, err 197 } else if info.Size() == 0 { 198 // Initialize new files with meta pages. 199 if err := db.init(); err != nil { 200 return nil, err 201 } 202 } else { 203 // Read the first meta page to determine the page size. 204 var buf [0x1000]byte 205 if _, err := db.file.ReadAt(buf[:], 0); err == nil { 206 m := db.pageInBuffer(buf[:], 0).meta() 207 if err := m.validate(); err != nil { 208 // If we can't read the page size, we can assume it's the same 209 // as the OS -- since that's how the page size was chosen in the 210 // first place. 211 // 212 // If the first page is invalid and this OS uses a different 213 // page size than what the database was created with then we 214 // are out of luck and cannot access the database. 215 db.pageSize = os.Getpagesize() 216 } else { 217 db.pageSize = int(m.pageSize) 218 } 219 } 220 } 221 222 // Initialize page pool. 223 db.pagePool = sync.Pool{ 224 New: func() interface{} { 225 return make([]byte, db.pageSize) 226 }, 227 } 228 229 // Memory map the data file. 230 if err := db.mmap(options.InitialMmapSize); err != nil { 231 _ = db.close() 232 return nil, err 233 } 234 235 // Read in the freelist. 236 db.freelist = newFreelist() 237 db.freelist.read(db.page(db.meta().freelist)) 238 239 // Mark the database as opened and return. 240 return db, nil 241} 242 243// mmap opens the underlying memory-mapped file and initializes the meta references. 244// minsz is the minimum size that the new mmap can be. 245func (db *DB) mmap(minsz int) error { 246 db.mmaplock.Lock() 247 defer db.mmaplock.Unlock() 248 249 info, err := db.file.Stat() 250 if err != nil { 251 return fmt.Errorf("mmap stat error: %s", err) 252 } else if int(info.Size()) < db.pageSize*2 { 253 return fmt.Errorf("file size too small") 254 } 255 256 // Ensure the size is at least the minimum size. 257 var size = int(info.Size()) 258 if size < minsz { 259 size = minsz 260 } 261 size, err = db.mmapSize(size) 262 if err != nil { 263 return err 264 } 265 266 // Dereference all mmap references before unmapping. 267 if db.rwtx != nil { 268 db.rwtx.root.dereference() 269 } 270 271 // Unmap existing data before continuing. 272 if err := db.munmap(); err != nil { 273 return err 274 } 275 276 // Memory-map the data file as a byte slice. 277 if err := mmap(db, size); err != nil { 278 return err 279 } 280 281 // Save references to the meta pages. 282 db.meta0 = db.page(0).meta() 283 db.meta1 = db.page(1).meta() 284 285 // Validate the meta pages. We only return an error if both meta pages fail 286 // validation, since meta0 failing validation means that it wasn't saved 287 // properly -- but we can recover using meta1. And vice-versa. 288 err0 := db.meta0.validate() 289 err1 := db.meta1.validate() 290 if err0 != nil && err1 != nil { 291 return err0 292 } 293 294 return nil 295} 296 297// munmap unmaps the data file from memory. 298func (db *DB) munmap() error { 299 if err := munmap(db); err != nil { 300 return fmt.Errorf("unmap error: " + err.Error()) 301 } 302 return nil 303} 304 305// mmapSize determines the appropriate size for the mmap given the current size 306// of the database. The minimum size is 32KB and doubles until it reaches 1GB. 307// Returns an error if the new mmap size is greater than the max allowed. 308func (db *DB) mmapSize(size int) (int, error) { 309 // Double the size from 32KB until 1GB. 310 for i := uint(15); i <= 30; i++ { 311 if size <= 1<<i { 312 return 1 << i, nil 313 } 314 } 315 316 // Verify the requested size is not above the maximum allowed. 317 if size > maxMapSize { 318 return 0, fmt.Errorf("mmap too large") 319 } 320 321 // If larger than 1GB then grow by 1GB at a time. 322 sz := int64(size) 323 if remainder := sz % int64(maxMmapStep); remainder > 0 { 324 sz += int64(maxMmapStep) - remainder 325 } 326 327 // Ensure that the mmap size is a multiple of the page size. 328 // This should always be true since we're incrementing in MBs. 329 pageSize := int64(db.pageSize) 330 if (sz % pageSize) != 0 { 331 sz = ((sz / pageSize) + 1) * pageSize 332 } 333 334 // If we've exceeded the max size then only grow up to the max size. 335 if sz > maxMapSize { 336 sz = maxMapSize 337 } 338 339 return int(sz), nil 340} 341 342// init creates a new database file and initializes its meta pages. 343func (db *DB) init() error { 344 // Set the page size to the OS page size. 345 db.pageSize = os.Getpagesize() 346 347 // Create two meta pages on a buffer. 348 buf := make([]byte, db.pageSize*4) 349 for i := 0; i < 2; i++ { 350 p := db.pageInBuffer(buf[:], pgid(i)) 351 p.id = pgid(i) 352 p.flags = metaPageFlag 353 354 // Initialize the meta page. 355 m := p.meta() 356 m.magic = magic 357 m.version = version 358 m.pageSize = uint32(db.pageSize) 359 m.freelist = 2 360 m.root = bucket{root: 3} 361 m.pgid = 4 362 m.txid = txid(i) 363 m.checksum = m.sum64() 364 } 365 366 // Write an empty freelist at page 3. 367 p := db.pageInBuffer(buf[:], pgid(2)) 368 p.id = pgid(2) 369 p.flags = freelistPageFlag 370 p.count = 0 371 372 // Write an empty leaf page at page 4. 373 p = db.pageInBuffer(buf[:], pgid(3)) 374 p.id = pgid(3) 375 p.flags = leafPageFlag 376 p.count = 0 377 378 // Write the buffer to our data file. 379 if _, err := db.ops.writeAt(buf, 0); err != nil { 380 return err 381 } 382 if err := fdatasync(db); err != nil { 383 return err 384 } 385 386 return nil 387} 388 389// Close releases all database resources. 390// All transactions must be closed before closing the database. 391func (db *DB) Close() error { 392 db.rwlock.Lock() 393 defer db.rwlock.Unlock() 394 395 db.metalock.Lock() 396 defer db.metalock.Unlock() 397 398 db.mmaplock.RLock() 399 defer db.mmaplock.RUnlock() 400 401 return db.close() 402} 403 404func (db *DB) close() error { 405 if !db.opened { 406 return nil 407 } 408 409 db.opened = false 410 411 db.freelist = nil 412 413 // Clear ops. 414 db.ops.writeAt = nil 415 416 // Close the mmap. 417 if err := db.munmap(); err != nil { 418 return err 419 } 420 421 // Close file handles. 422 if db.file != nil { 423 // No need to unlock read-only file. 424 if !db.readOnly { 425 // Unlock the file. 426 if err := funlock(db); err != nil { 427 log.Printf("bolt.Close(): funlock error: %s", err) 428 } 429 } 430 431 // Close the file descriptor. 432 if err := db.file.Close(); err != nil { 433 return fmt.Errorf("db file close: %s", err) 434 } 435 db.file = nil 436 } 437 438 db.path = "" 439 return nil 440} 441 442// Begin starts a new transaction. 443// Multiple read-only transactions can be used concurrently but only one 444// write transaction can be used at a time. Starting multiple write transactions 445// will cause the calls to block and be serialized until the current write 446// transaction finishes. 447// 448// Transactions should not be dependent on one another. Opening a read 449// transaction and a write transaction in the same goroutine can cause the 450// writer to deadlock because the database periodically needs to re-mmap itself 451// as it grows and it cannot do that while a read transaction is open. 452// 453// If a long running read transaction (for example, a snapshot transaction) is 454// needed, you might want to set DB.InitialMmapSize to a large enough value 455// to avoid potential blocking of write transaction. 456// 457// IMPORTANT: You must close read-only transactions after you are finished or 458// else the database will not reclaim old pages. 459func (db *DB) Begin(writable bool) (*Tx, error) { 460 if writable { 461 return db.beginRWTx() 462 } 463 return db.beginTx() 464} 465 466func (db *DB) beginTx() (*Tx, error) { 467 // Lock the meta pages while we initialize the transaction. We obtain 468 // the meta lock before the mmap lock because that's the order that the 469 // write transaction will obtain them. 470 db.metalock.Lock() 471 472 // Obtain a read-only lock on the mmap. When the mmap is remapped it will 473 // obtain a write lock so all transactions must finish before it can be 474 // remapped. 475 db.mmaplock.RLock() 476 477 // Exit if the database is not open yet. 478 if !db.opened { 479 db.mmaplock.RUnlock() 480 db.metalock.Unlock() 481 return nil, ErrDatabaseNotOpen 482 } 483 484 // Create a transaction associated with the database. 485 t := &Tx{} 486 t.init(db) 487 488 // Keep track of transaction until it closes. 489 db.txs = append(db.txs, t) 490 n := len(db.txs) 491 492 // Unlock the meta pages. 493 db.metalock.Unlock() 494 495 // Update the transaction stats. 496 db.statlock.Lock() 497 db.stats.TxN++ 498 db.stats.OpenTxN = n 499 db.statlock.Unlock() 500 501 return t, nil 502} 503 504func (db *DB) beginRWTx() (*Tx, error) { 505 // If the database was opened with Options.ReadOnly, return an error. 506 if db.readOnly { 507 return nil, ErrDatabaseReadOnly 508 } 509 510 // Obtain writer lock. This is released by the transaction when it closes. 511 // This enforces only one writer transaction at a time. 512 db.rwlock.Lock() 513 514 // Once we have the writer lock then we can lock the meta pages so that 515 // we can set up the transaction. 516 db.metalock.Lock() 517 defer db.metalock.Unlock() 518 519 // Exit if the database is not open yet. 520 if !db.opened { 521 db.rwlock.Unlock() 522 return nil, ErrDatabaseNotOpen 523 } 524 525 // Create a transaction associated with the database. 526 t := &Tx{writable: true} 527 t.init(db) 528 db.rwtx = t 529 530 // Free any pages associated with closed read-only transactions. 531 var minid txid = 0xFFFFFFFFFFFFFFFF 532 for _, t := range db.txs { 533 if t.meta.txid < minid { 534 minid = t.meta.txid 535 } 536 } 537 if minid > 0 { 538 db.freelist.release(minid - 1) 539 } 540 541 return t, nil 542} 543 544// removeTx removes a transaction from the database. 545func (db *DB) removeTx(tx *Tx) { 546 // Release the read lock on the mmap. 547 db.mmaplock.RUnlock() 548 549 // Use the meta lock to restrict access to the DB object. 550 db.metalock.Lock() 551 552 // Remove the transaction. 553 for i, t := range db.txs { 554 if t == tx { 555 last := len(db.txs) - 1 556 db.txs[i] = db.txs[last] 557 db.txs[last] = nil 558 db.txs = db.txs[:last] 559 break 560 } 561 } 562 n := len(db.txs) 563 564 // Unlock the meta pages. 565 db.metalock.Unlock() 566 567 // Merge statistics. 568 db.statlock.Lock() 569 db.stats.OpenTxN = n 570 db.stats.TxStats.add(&tx.stats) 571 db.statlock.Unlock() 572} 573 574// Update executes a function within the context of a read-write managed transaction. 575// If no error is returned from the function then the transaction is committed. 576// If an error is returned then the entire transaction is rolled back. 577// Any error that is returned from the function or returned from the commit is 578// returned from the Update() method. 579// 580// Attempting to manually commit or rollback within the function will cause a panic. 581func (db *DB) Update(fn func(*Tx) error) error { 582 t, err := db.Begin(true) 583 if err != nil { 584 return err 585 } 586 587 // Make sure the transaction rolls back in the event of a panic. 588 defer func() { 589 if t.db != nil { 590 t.rollback() 591 } 592 }() 593 594 // Mark as a managed tx so that the inner function cannot manually commit. 595 t.managed = true 596 597 // If an error is returned from the function then rollback and return error. 598 err = fn(t) 599 t.managed = false 600 if err != nil { 601 _ = t.Rollback() 602 return err 603 } 604 605 return t.Commit() 606} 607 608// View executes a function within the context of a managed read-only transaction. 609// Any error that is returned from the function is returned from the View() method. 610// 611// Attempting to manually rollback within the function will cause a panic. 612func (db *DB) View(fn func(*Tx) error) error { 613 t, err := db.Begin(false) 614 if err != nil { 615 return err 616 } 617 618 // Make sure the transaction rolls back in the event of a panic. 619 defer func() { 620 if t.db != nil { 621 t.rollback() 622 } 623 }() 624 625 // Mark as a managed tx so that the inner function cannot manually rollback. 626 t.managed = true 627 628 // If an error is returned from the function then pass it through. 629 err = fn(t) 630 t.managed = false 631 if err != nil { 632 _ = t.Rollback() 633 return err 634 } 635 636 if err := t.Rollback(); err != nil { 637 return err 638 } 639 640 return nil 641} 642 643// Batch calls fn as part of a batch. It behaves similar to Update, 644// except: 645// 646// 1. concurrent Batch calls can be combined into a single Bolt 647// transaction. 648// 649// 2. the function passed to Batch may be called multiple times, 650// regardless of whether it returns error or not. 651// 652// This means that Batch function side effects must be idempotent and 653// take permanent effect only after a successful return is seen in 654// caller. 655// 656// The maximum batch size and delay can be adjusted with DB.MaxBatchSize 657// and DB.MaxBatchDelay, respectively. 658// 659// Batch is only useful when there are multiple goroutines calling it. 660func (db *DB) Batch(fn func(*Tx) error) error { 661 errCh := make(chan error, 1) 662 663 db.batchMu.Lock() 664 if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) { 665 // There is no existing batch, or the existing batch is full; start a new one. 666 db.batch = &batch{ 667 db: db, 668 } 669 db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger) 670 } 671 db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh}) 672 if len(db.batch.calls) >= db.MaxBatchSize { 673 // wake up batch, it's ready to run 674 go db.batch.trigger() 675 } 676 db.batchMu.Unlock() 677 678 err := <-errCh 679 if err == trySolo { 680 err = db.Update(fn) 681 } 682 return err 683} 684 685type call struct { 686 fn func(*Tx) error 687 err chan<- error 688} 689 690type batch struct { 691 db *DB 692 timer *time.Timer 693 start sync.Once 694 calls []call 695} 696 697// trigger runs the batch if it hasn't already been run. 698func (b *batch) trigger() { 699 b.start.Do(b.run) 700} 701 702// run performs the transactions in the batch and communicates results 703// back to DB.Batch. 704func (b *batch) run() { 705 b.db.batchMu.Lock() 706 b.timer.Stop() 707 // Make sure no new work is added to this batch, but don't break 708 // other batches. 709 if b.db.batch == b { 710 b.db.batch = nil 711 } 712 b.db.batchMu.Unlock() 713 714retry: 715 for len(b.calls) > 0 { 716 var failIdx = -1 717 err := b.db.Update(func(tx *Tx) error { 718 for i, c := range b.calls { 719 if err := safelyCall(c.fn, tx); err != nil { 720 failIdx = i 721 return err 722 } 723 } 724 return nil 725 }) 726 727 if failIdx >= 0 { 728 // take the failing transaction out of the batch. it's 729 // safe to shorten b.calls here because db.batch no longer 730 // points to us, and we hold the mutex anyway. 731 c := b.calls[failIdx] 732 b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1] 733 // tell the submitter re-run it solo, continue with the rest of the batch 734 c.err <- trySolo 735 continue retry 736 } 737 738 // pass success, or bolt internal errors, to all callers 739 for _, c := range b.calls { 740 if c.err != nil { 741 c.err <- err 742 } 743 } 744 break retry 745 } 746} 747 748// trySolo is a special sentinel error value used for signaling that a 749// transaction function should be re-run. It should never be seen by 750// callers. 751var trySolo = errors.New("batch function returned an error and should be re-run solo") 752 753type panicked struct { 754 reason interface{} 755} 756 757func (p panicked) Error() string { 758 if err, ok := p.reason.(error); ok { 759 return err.Error() 760 } 761 return fmt.Sprintf("panic: %v", p.reason) 762} 763 764func safelyCall(fn func(*Tx) error, tx *Tx) (err error) { 765 defer func() { 766 if p := recover(); p != nil { 767 err = panicked{p} 768 } 769 }() 770 return fn(tx) 771} 772 773// Sync executes fdatasync() against the database file handle. 774// 775// This is not necessary under normal operation, however, if you use NoSync 776// then it allows you to force the database file to sync against the disk. 777func (db *DB) Sync() error { return fdatasync(db) } 778 779// Stats retrieves ongoing performance stats for the database. 780// This is only updated when a transaction closes. 781func (db *DB) Stats() Stats { 782 db.statlock.RLock() 783 defer db.statlock.RUnlock() 784 return db.stats 785} 786 787// This is for internal access to the raw data bytes from the C cursor, use 788// carefully, or not at all. 789func (db *DB) Info() *Info { 790 return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize} 791} 792 793// page retrieves a page reference from the mmap based on the current page size. 794func (db *DB) page(id pgid) *page { 795 pos := id * pgid(db.pageSize) 796 return (*page)(unsafe.Pointer(&db.data[pos])) 797} 798 799// pageInBuffer retrieves a page reference from a given byte array based on the current page size. 800func (db *DB) pageInBuffer(b []byte, id pgid) *page { 801 return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)])) 802} 803 804// meta retrieves the current meta page reference. 805func (db *DB) meta() *meta { 806 // We have to return the meta with the highest txid which doesn't fail 807 // validation. Otherwise, we can cause errors when in fact the database is 808 // in a consistent state. metaA is the one with the higher txid. 809 metaA := db.meta0 810 metaB := db.meta1 811 if db.meta1.txid > db.meta0.txid { 812 metaA = db.meta1 813 metaB = db.meta0 814 } 815 816 // Use higher meta page if valid. Otherwise fallback to previous, if valid. 817 if err := metaA.validate(); err == nil { 818 return metaA 819 } else if err := metaB.validate(); err == nil { 820 return metaB 821 } 822 823 // This should never be reached, because both meta1 and meta0 were validated 824 // on mmap() and we do fsync() on every write. 825 panic("bolt.DB.meta(): invalid meta pages") 826} 827 828// allocate returns a contiguous block of memory starting at a given page. 829func (db *DB) allocate(count int) (*page, error) { 830 // Allocate a temporary buffer for the page. 831 var buf []byte 832 if count == 1 { 833 buf = db.pagePool.Get().([]byte) 834 } else { 835 buf = make([]byte, count*db.pageSize) 836 } 837 p := (*page)(unsafe.Pointer(&buf[0])) 838 p.overflow = uint32(count - 1) 839 840 // Use pages from the freelist if they are available. 841 if p.id = db.freelist.allocate(count); p.id != 0 { 842 return p, nil 843 } 844 845 // Resize mmap() if we're at the end. 846 p.id = db.rwtx.meta.pgid 847 var minsz = int((p.id+pgid(count))+1) * db.pageSize 848 if minsz >= db.datasz { 849 if err := db.mmap(minsz); err != nil { 850 return nil, fmt.Errorf("mmap allocate error: %s", err) 851 } 852 } 853 854 // Move the page id high water mark. 855 db.rwtx.meta.pgid += pgid(count) 856 857 return p, nil 858} 859 860// grow grows the size of the database to the given sz. 861func (db *DB) grow(sz int) error { 862 // Ignore if the new size is less than available file size. 863 if sz <= db.filesz { 864 return nil 865 } 866 867 // If the data is smaller than the alloc size then only allocate what's needed. 868 // Once it goes over the allocation size then allocate in chunks. 869 if db.datasz < db.AllocSize { 870 sz = db.datasz 871 } else { 872 sz += db.AllocSize 873 } 874 875 // Truncate and fsync to ensure file size metadata is flushed. 876 // https://github.com/boltdb/bolt/issues/284 877 if !db.NoGrowSync && !db.readOnly { 878 if runtime.GOOS != "windows" { 879 if err := db.file.Truncate(int64(sz)); err != nil { 880 return fmt.Errorf("file resize error: %s", err) 881 } 882 } 883 if err := db.file.Sync(); err != nil { 884 return fmt.Errorf("file sync error: %s", err) 885 } 886 } 887 888 db.filesz = sz 889 return nil 890} 891 892func (db *DB) IsReadOnly() bool { 893 return db.readOnly 894} 895 896// Options represents the options that can be set when opening a database. 897type Options struct { 898 // Timeout is the amount of time to wait to obtain a file lock. 899 // When set to zero it will wait indefinitely. This option is only 900 // available on Darwin and Linux. 901 Timeout time.Duration 902 903 // Sets the DB.NoGrowSync flag before memory mapping the file. 904 NoGrowSync bool 905 906 // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to 907 // grab a shared lock (UNIX). 908 ReadOnly bool 909 910 // Sets the DB.MmapFlags flag before memory mapping the file. 911 MmapFlags int 912 913 // InitialMmapSize is the initial mmap size of the database 914 // in bytes. Read transactions won't block write transaction 915 // if the InitialMmapSize is large enough to hold database mmap 916 // size. (See DB.Begin for more information) 917 // 918 // If <=0, the initial map size is 0. 919 // If initialMmapSize is smaller than the previous database size, 920 // it takes no effect. 921 InitialMmapSize int 922} 923 924// DefaultOptions represent the options used if nil options are passed into Open(). 925// No timeout is used which will cause Bolt to wait indefinitely for a lock. 926var DefaultOptions = &Options{ 927 Timeout: 0, 928 NoGrowSync: false, 929} 930 931// Stats represents statistics about the database. 932type Stats struct { 933 // Freelist stats 934 FreePageN int // total number of free pages on the freelist 935 PendingPageN int // total number of pending pages on the freelist 936 FreeAlloc int // total bytes allocated in free pages 937 FreelistInuse int // total bytes used by the freelist 938 939 // Transaction stats 940 TxN int // total number of started read transactions 941 OpenTxN int // number of currently open read transactions 942 943 TxStats TxStats // global, ongoing stats. 944} 945 946// Sub calculates and returns the difference between two sets of database stats. 947// This is useful when obtaining stats at two different points and time and 948// you need the performance counters that occurred within that time span. 949func (s *Stats) Sub(other *Stats) Stats { 950 if other == nil { 951 return *s 952 } 953 var diff Stats 954 diff.FreePageN = s.FreePageN 955 diff.PendingPageN = s.PendingPageN 956 diff.FreeAlloc = s.FreeAlloc 957 diff.FreelistInuse = s.FreelistInuse 958 diff.TxN = s.TxN - other.TxN 959 diff.TxStats = s.TxStats.Sub(&other.TxStats) 960 return diff 961} 962 963func (s *Stats) add(other *Stats) { 964 s.TxStats.add(&other.TxStats) 965} 966 967type Info struct { 968 Data uintptr 969 PageSize int 970} 971 972type meta struct { 973 magic uint32 974 version uint32 975 pageSize uint32 976 flags uint32 977 root bucket 978 freelist pgid 979 pgid pgid 980 txid txid 981 checksum uint64 982} 983 984// validate checks the marker bytes and version of the meta page to ensure it matches this binary. 985func (m *meta) validate() error { 986 if m.magic != magic { 987 return ErrInvalid 988 } else if m.version != version { 989 return ErrVersionMismatch 990 } else if m.checksum != 0 && m.checksum != m.sum64() { 991 return ErrChecksum 992 } 993 return nil 994} 995 996// copy copies one meta object to another. 997func (m *meta) copy(dest *meta) { 998 *dest = *m 999} 1000 1001// write writes the meta onto a page. 1002func (m *meta) write(p *page) { 1003 if m.root.root >= m.pgid { 1004 panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid)) 1005 } else if m.freelist >= m.pgid { 1006 panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid)) 1007 } 1008 1009 // Page id is either going to be 0 or 1 which we can determine by the transaction ID. 1010 p.id = pgid(m.txid % 2) 1011 p.flags |= metaPageFlag 1012 1013 // Calculate the checksum. 1014 m.checksum = m.sum64() 1015 1016 m.copy(p.meta()) 1017} 1018 1019// generates the checksum for the meta. 1020func (m *meta) sum64() uint64 { 1021 var h = fnv.New64a() 1022 _, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:]) 1023 return h.Sum64() 1024} 1025 1026// _assert will panic with a given formatted message if the given condition is false. 1027func _assert(condition bool, msg string, v ...interface{}) { 1028 if !condition { 1029 panic(fmt.Sprintf("assertion failed: "+msg, v...)) 1030 } 1031} 1032 1033func warn(v ...interface{}) { fmt.Fprintln(os.Stderr, v...) } 1034func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) } 1035 1036func printstack() { 1037 stack := strings.Join(strings.Split(string(debug.Stack()), "\n")[2:], "\n") 1038 fmt.Fprintln(os.Stderr, stack) 1039} 1040