1// Copyright 2014 The lldb Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// The storage space management. 6 7package lldb 8 9import ( 10 "bytes" 11 "errors" 12 "fmt" 13 "io" 14 "sort" 15 "strings" 16 "sync" 17 18 "github.com/cznic/internal/buffer" 19 "github.com/cznic/mathutil" 20 "github.com/cznic/zappy" 21) 22 23const ( 24 maxBuf = maxRq + 20 25) 26 27// Options are passed to the NewAllocator to amend some configuration. The 28// compatibility promise is the same as of struct types in the Go standard 29// library - introducing changes can be made only by adding new exported 30// fields, which is backward compatible as long as client code uses field names 31// to assign values of imported struct types literals. 32// 33// NOTE: No options are currently defined. 34type Options struct{} 35 36// AllocStats record statistics about a Filer. It can be optionally filled by 37// Allocator.Verify, if successful. 38type AllocStats struct { 39 Handles int64 // total valid handles in use 40 Compression int64 // number of compressed blocks 41 TotalAtoms int64 // total number of atoms == AllocAtoms + FreeAtoms 42 AllocBytes int64 // bytes allocated (after decompression, if/where used) 43 AllocAtoms int64 // atoms allocated/used, including relocation atoms 44 Relocations int64 // number of relocated used blocks 45 FreeAtoms int64 // atoms unused 46 AllocMap map[int64]int64 // allocated block size in atoms -> count of such blocks 47 FreeMap map[int64]int64 // free block size in atoms -> count of such blocks 48} 49 50/* 51 52Allocator implements "raw" storage space management (allocation and 53deallocation) for a low level of a DB engine. The storage is an abstraction 54provided by a Filer. 55 56The terms MUST or MUST NOT, if/where used in the documentation of Allocator, 57written in all caps as seen here, are a requirement for any possible 58alternative implementations aiming for compatibility with this one. 59 60Filer file 61 62A Filer file, or simply 'file', is a linear, contiguous sequence of blocks. 63Blocks may be either free (currently unused) or allocated (currently used). 64Some blocks may eventually become virtual in a sense as they may not be 65realized in the storage (sparse files). 66 67Free Lists Table 68 69File starts with a FLT. This table records heads of 14 doubly linked free 70lists. The zero based index (I) vs minimal size of free blocks in that list, 71except the last one which registers free blocks of size 4112+ atoms: 72 73 MinSize == 2^I 74 75 For example 0 -> 1, 1 -> 2, ... 12 -> 4096. 76 77Each entry in the FLT is 8 bytes in netwtork order, MSB MUST be zero, ie. the 78slot value is effectively only 7 bytes. The value is the handle of the head of 79the respective doubly linked free list. The FLT size is 14*8 == 112(0x70) 80bytes. If the free blocks list for any particular size is empty, the respective 81FLT slot is zero. Sizes of free blocks in one list MUST NOT overlap with sizes 82of free lists in other list. For example, even though a free block of size 2 83technically is of minimal size >= 1, it MUST NOT be put to the list for slot 0 84(minimal size 1), but in slot 1( minimal size 2). 85 86 slot 0: sizes [1, 2) 87 slot 1: sizes [2, 4) 88 slot 2: sizes [4, 8) 89 ... 90 slot 11: sizes [2048, 4096) 91 slot 12: sizes [4096, 4112) 92 slot 13: sizes [4112, inf) 93 94The last FLT slot collects all free blocks bigger than its minimal size. That 95still respects the 'no overlap' invariant. 96 97File blocks 98 99A block is a linear, contiguous sequence of atoms. The first and last atoms of 100a block provide information about, for example, whether the block is free or 101used, what is the size of the block, etc. Details are discussed elsewhere. The 102first block of a file starts immediately after FLT, ie. at file offset 103112(0x70). 104 105Block atoms 106 107An atom is a fixed size piece of a block (and thus of a file too); it is 16 108bytes long. A consequence is that for a valid file: 109 110 filesize == 0 (mod 16) 111 112The first atom of the first block is considered to be atom #1. 113 114Block handles 115 116A handle is an integer referring to a block. The reference is the number of the 117atom the block starts with. Put in other way: 118 119 handle == offset/16 - 6 120 offset == 16 * (handle + 6) 121 122`offset` is the offset of the first byte of the block, measured in bytes 123- as in fseek(3). Handle has type `int64`, but only the lower 7 bytes may be 124nonzero while referring to a block, both in code as well as when persisted in 125the the file's internal bookkeeping structures - see 'Block types' bellow. So a 126handle is effectively only `uint56`. This also means that the maximum usable 127size of a file is 2^56 atoms. That is 2^60 bytes == 1 exabyte (10^18 bytes). 128 129Nil handles 130 131A handle with numeric value of '0' refers to no block. 132 133Zero padding 134 135A padding is used to round-up a block size to be a whole number of atoms. Any 136padding, if present, MUST be all zero bytes. Note that the size of padding is 137in [0, 15]. 138 139Content wiping 140 141When a block is deallocated, its data content is not wiped as the added 142overhead may be substantial while not necessarily needed. Client code should 143however overwrite the content of any block having sensitive data with eg. zeros 144(good compression) - before deallocating the block. 145 146Block tags 147 148Every block is tagged in its first byte (a head tag) and last byte (tail tag). 149Block types are: 150 151 1. Short content used block (head tags 0x00-0xFB) 152 2. Long content used block (head tag 0xFC) 153 3. Relocated used block (head tag 0xFD) 154 4. Short, single atom, free block (head tag 0xFE) 155 5. Long free block (head tag 0xFF) 156 157Note: Relocated used block, 3. above (head tag 0xFD) MUST NOT refer to blocks 158other then 1. or 2. above (head tags 0x00-0xFC). 159 160Content blocks 161 162Used blocks (head tags 0x00-0xFC) tail tag distinguish used/unused block and if 163content is compressed or not. 164 165Content compression 166 167The tail flag of an used block is one of 168 169 CC == 0 // Content is not compressed. 170 CC == 1 // Content is in zappy compression format. 171 172If compression of written content is enabled, there are two cases: If 173compressed size < original size then the compressed content should be written 174if it will save at least one atom of the block. If compressed size >= original 175size then the compressed content should not be used. 176 177It's recommended to use compression. For example the BTrees implementation 178assumes compression is used. Using compression may cause a slowdown in some 179cases while it may as well cause a speedup. 180 181Short content block 182 183Short content block carries content of length between N == 0(0x00) and N == 184251(0xFB) bytes. 185 186 |<-first atom start ... last atom end->| 187 +---++-- ... --+-- ... --++------+ 188 | 0 || 1... | 0x*...0x*E || 0x*F | 189 +---++-- ... --+-- ... --++------+ 190 | N || content | padding || CC | 191 +---++-- ... --+-- ... --++------+ 192 193 A == (N+1)/16 + 1 // The number of atoms in the block [1, 16] 194 padding == 15 - (N+1)%16 // Length of the zero padding 195 196Long content block 197 198Long content block carries content of length between N == 252(0xFC) and N == 19965787(0x100FB) bytes. 200 201 |<-first atom start ... last atom end->| 202 +------++------+-- ... --+-- ... --++------+ 203 | 0 || 1..2 | 3... | 0x*...0x*E || 0x*F | 204 +------++------+-- ... --+-- ... --++------+ 205 | 0xFC || M | content | padding || CC | 206 +------++------+-- ... --+-- ... --++------+ 207 208 A == (N+3)/16 + 1 // The number of atoms in the block [16, 4112] 209 M == N % 0x10000 // Stored as 2 bytes in network byte order 210 padding == 15 - (N+3)%16 // Length of the zero padding 211 212Relocated used block 213 214Relocated block allows to permanently assign a handle to some content and 215resize the content anytime afterwards without having to update all the possible 216existing references; the handle can be constant while the content size may be 217dynamic. When relocating a block, any space left by the original block content, 218above this single atom block, MUST be reclaimed. 219 220Relocations MUST point only to a used short or long block == blocks with tags 2210x00...0xFC. 222 223 +------++------+---------++----+ 224 | 0 || 1..7 | 8...14 || 15 | 225 +------++------+---------++----+ 226 | 0xFD || H | padding || 0 | 227 +------++------+---------++----+ 228 229H is the handle of the relocated block in network byte order. 230 231Free blocks 232 233Free blocks are the result of space deallocation. Free blocks are organized in 234one or more doubly linked lists, abstracted by the FLT interface. Free blocks 235MUST be "registered" by putting them in such list. Allocator MUST reuse a big 236enough free block, if such exists, before growing the file size. When a free 237block is created by deallocation or reallocation it MUST be joined with any 238adjacently existing free blocks before "registering". If the resulting free 239block is now a last block of a file, the free block MUST be discarded and the 240file size MUST be truncated accordingly instead. Put differently, there MUST 241NOT ever be a free block at the file end. 242 243A single free atom 244 245Is an unused block of size 1 atom. 246 247 +------++------+--------++------+ 248 | 0 || 1..7 | 8...14 || 15 | 249 +------++------+--------++------+ 250 | 0xFE || P | N || 0xFE | 251 +------++------+--------++------+ 252 253P and N, stored in network byte order, are the previous and next free block 254handles in the doubly linked list to which this free block belongs. 255 256A long unused block 257 258Is an unused block of size > 1 atom. 259 260 +------++------+-------+---------+- ... -+----------++------+ 261 | 0 || 1..7 | 8..14 | 15...21 | | Z-7..Z-1 || Z | 262 +------++------+-------+---------+- ... -+----------++------+ 263 | 0xFF || S | P | N | Leak | S || 0xFF | 264 +------++------+-------+---------+- ... -+----------++------+ 265 266 Z == 16 * S - 1 267 268S is the size of this unused block in atoms. P and N are the previous and next 269free block handles in the doubly linked list to which this free block belongs. 270Leak contains any data the block had before deallocating this block. See also 271the subtitle 'Content wiping' above. S, P and N are stored in network byte 272order. Large free blocks may trigger a consideration of file hole punching of 273the Leak field - for some value of 'large'. 274 275Note: Allocator methods vs CRUD[1]: 276 277 Alloc [C]reate 278 Get [R]ead 279 Realloc [U]pdate 280 Free [D]elete 281 282Note: No Allocator method returns io.EOF. 283 284 [1]: http://en.wikipedia.org/wiki/Create,_read,_update_and_delete 285 286*/ 287type Allocator struct { 288 f Filer 289 flt flt 290 cache cache 291 m map[int64]*node 292 lru lst 293 mu sync.Mutex 294 expHit int64 295 expMiss int64 296 cacheSz int 297 hit uint16 298 miss uint16 299 Compress bool // enables content compression 300} 301 302// NewAllocator returns a new Allocator. To open an existing file, pass its 303// Filer. To create a "new" file, pass a Filer which file is of zero size. 304func NewAllocator(f Filer, opts *Options) (a *Allocator, err error) { 305 if opts == nil { // Enforce *Options is always passed 306 return nil, errors.New("NewAllocator: nil opts passed") 307 } 308 309 a = &Allocator{ 310 f: f, 311 cacheSz: 10, 312 } 313 314 a.cinit() 315 switch x := f.(type) { 316 case *RollbackFiler: 317 x.afterRollback = func() error { 318 a.cinit() 319 return a.flt.load(a.f, 0) 320 } 321 case *ACIDFiler0: 322 x.RollbackFiler.afterRollback = func() error { 323 a.cinit() 324 return a.flt.load(a.f, 0) 325 } 326 } 327 328 sz, err := f.Size() 329 if err != nil { 330 return 331 } 332 333 a.flt.init() 334 if sz == 0 { 335 var b [fltSz]byte 336 if err = a.f.BeginUpdate(); err != nil { 337 return 338 } 339 340 if _, err = f.WriteAt(b[:], 0); err != nil { 341 _ = a.f.Rollback() 342 return 343 } 344 345 return a, a.f.EndUpdate() 346 } 347 348 return a, a.flt.load(f, 0) 349} 350 351// CacheStats reports cache statistics. 352// 353//TODO return a struct perhaps. 354func (a *Allocator) CacheStats() (buffersUsed, buffersTotal int, bytesUsed, bytesTotal, hits, misses int64) { 355 buffersUsed = len(a.m) 356 buffersTotal = buffersUsed + len(a.cache) 357 bytesUsed = a.lru.size() 358 bytesTotal = bytesUsed + a.cache.size() 359 hits = a.expHit 360 misses = a.expMiss 361 return 362} 363 364func (a *Allocator) cinit() { 365 for h, n := range a.m { 366 a.cache.put(a.lru.remove(n)) 367 delete(a.m, h) 368 } 369 if a.m == nil { 370 a.m = map[int64]*node{} 371 } 372} 373 374func (a *Allocator) cadd(b []byte, h int64) { 375 if len(a.m) < a.cacheSz { 376 n := a.cache.get(len(b)) 377 n.h = h 378 copy(n.b, b) 379 a.m[h] = a.lru.pushFront(n) 380 return 381 } 382 383 // cache full 384 delete(a.m, a.cache.put(a.lru.removeBack()).h) 385 n := a.cache.get(len(b)) 386 n.h = h 387 copy(n.b, b) 388 a.m[h] = a.lru.pushFront(n) 389 return 390} 391 392func (a *Allocator) cfree(h int64) { 393 n, ok := a.m[h] 394 if !ok { // must have been evicted 395 return 396 } 397 398 a.cache.put(a.lru.remove(n)) 399 delete(a.m, h) 400} 401 402// Alloc allocates storage space for b and returns the handle of the new block 403// with content set to b or an error, if any. The returned handle is valid only 404// while the block is used - until the block is deallocated. No two valid 405// handles share the same value within the same Filer, but any value of a 406// handle not referring to any used block may become valid any time as a result 407// of Alloc. 408// 409// Invoking Alloc on an empty Allocator is guaranteed to return handle with 410// value 1. The intended use of content of handle 1 is a root "directory" of 411// other data held by an Allocator. 412// 413// Passing handles not obtained initially from Alloc or not anymore valid to 414// any other Allocator methods can result in an irreparably corrupted database. 415func (a *Allocator) Alloc(b []byte) (handle int64, err error) { 416 pbuf := buffer.Get(zappy.MaxEncodedLen(len(b))) 417 defer buffer.Put(pbuf) 418 buf := *pbuf 419 buf, _, cc, err := a.makeUsedBlock(buf, b) 420 if err != nil { 421 return 422 } 423 424 if handle, err = a.alloc(buf, cc); err == nil { 425 a.cadd(b, handle) 426 } 427 return 428} 429 430func (a *Allocator) alloc(b []byte, cc byte) (h int64, err error) { 431 rqAtoms := n2atoms(len(b)) 432 if h = a.flt.find(rqAtoms); h == 0 { // must grow 433 var sz int64 434 if sz, err = a.f.Size(); err != nil { 435 return 436 } 437 438 h = off2h(sz) 439 err = a.writeUsedBlock(h, cc, b) 440 return 441 } 442 443 // Handle is the first item of a free blocks list. 444 tag, s, prev, next, err := a.nfo(h) 445 if err != nil { 446 return 447 } 448 449 if tag != tagFreeShort && tag != tagFreeLong { 450 err = &ErrILSEQ{Type: ErrExpFreeTag, Off: h2off(h), Arg: int64(tag)} 451 return 452 } 453 454 if prev != 0 { 455 err = &ErrILSEQ{Type: ErrHead, Off: h2off(h), Arg: prev} 456 return 457 } 458 459 if s < int64(rqAtoms) { 460 err = &ErrILSEQ{Type: ErrSmall, Arg: int64(rqAtoms), Arg2: s, Off: h2off(h)} 461 return 462 } 463 464 if err = a.unlink(h, s, prev, next); err != nil { 465 return 466 } 467 468 if s > int64(rqAtoms) { 469 freeH := h + int64(rqAtoms) 470 freeAtoms := s - int64(rqAtoms) 471 if err = a.link(freeH, freeAtoms); err != nil { 472 return 473 } 474 } 475 return h, a.writeUsedBlock(h, cc, b) 476} 477 478// Free deallocates the block referred to by handle or returns an error, if 479// any. 480// 481// After Free succeeds, handle is invalid and must not be used. 482// 483// Handle must have been obtained initially from Alloc and must be still valid, 484// otherwise a database may get irreparably corrupted. 485func (a *Allocator) Free(handle int64) (err error) { 486 if handle <= 0 || handle > maxHandle { 487 return &ErrINVAL{"Allocator.Free: handle out of limits", handle} 488 } 489 490 a.cfree(handle) 491 return a.free(handle, 0, true) 492} 493 494func (a *Allocator) free(h, from int64, acceptRelocs bool) (err error) { 495 tag, atoms, _, n, err := a.nfo(h) 496 if err != nil { 497 return 498 } 499 500 switch tag { 501 default: 502 // nop 503 case tagUsedLong: 504 // nop 505 case tagUsedRelocated: 506 if !acceptRelocs { 507 return &ErrILSEQ{Type: ErrUnexpReloc, Off: h2off(h), Arg: h2off(from)} 508 } 509 510 if err = a.free(n, h, false); err != nil { 511 return 512 } 513 case tagFreeShort, tagFreeLong: 514 return &ErrINVAL{"Allocator.Free: attempt to free a free block at off", h2off(h)} 515 } 516 517 return a.free2(h, atoms) 518} 519 520func (a *Allocator) free2(h, atoms int64) (err error) { 521 sz, err := a.f.Size() 522 if err != nil { 523 return 524 } 525 526 ltag, latoms, lp, ln, err := a.leftNfo(h) 527 if err != nil { 528 return 529 } 530 531 if ltag != tagFreeShort && ltag != tagFreeLong { 532 latoms = 0 533 } 534 535 var rtag byte 536 var ratoms, rp, rn int64 537 538 isTail := h2off(h)+atoms*16 == sz 539 if !isTail { 540 if rtag, ratoms, rp, rn, err = a.nfo(h + atoms); err != nil { 541 return 542 } 543 } 544 545 if rtag != tagFreeShort && rtag != tagFreeLong { 546 ratoms = 0 547 } 548 549 switch { 550 case latoms == 0 && ratoms == 0: 551 // -> isolated <- 552 if isTail { // cut tail 553 return a.f.Truncate(h2off(h)) 554 } 555 556 return a.link(h, atoms) 557 case latoms == 0 && ratoms != 0: 558 // right join -> 559 if err = a.unlink(h+atoms, ratoms, rp, rn); err != nil { 560 return 561 } 562 563 return a.link(h, atoms+ratoms) 564 case latoms != 0 && ratoms == 0: 565 // <- left join 566 if err = a.unlink(h-latoms, latoms, lp, ln); err != nil { 567 return 568 } 569 570 if isTail { 571 return a.f.Truncate(h2off(h - latoms)) 572 } 573 574 return a.link(h-latoms, latoms+atoms) 575 } 576 577 // case latoms != 0 && ratoms != 0: 578 // <- middle join -> 579 lh, rh := h-latoms, h+atoms 580 if err = a.unlink(lh, latoms, lp, ln); err != nil { 581 return 582 } 583 584 // Prev unlink may have invalidated rp or rn 585 if _, _, rp, rn, err = a.nfo(rh); err != nil { 586 return 587 } 588 589 if err = a.unlink(rh, ratoms, rp, rn); err != nil { 590 return 591 } 592 593 return a.link(h-latoms, latoms+atoms+ratoms) 594} 595 596// Add a free block h to the appropriate free list 597func (a *Allocator) link(h, atoms int64) (err error) { 598 if err = a.makeFree(h, atoms, 0, a.flt.head(atoms)); err != nil { 599 return 600 } 601 602 return a.flt.setHead(h, atoms, a.f) 603} 604 605// Remove free block h from the free list 606func (a *Allocator) unlink(h, atoms, p, n int64) (err error) { 607 switch { 608 case p == 0 && n == 0: 609 // single item list, must be head 610 return a.flt.setHead(0, atoms, a.f) 611 case p == 0 && n != 0: 612 // head of list (has next item[s]) 613 if err = a.prev(n, 0); err != nil { 614 return 615 } 616 617 // new head 618 return a.flt.setHead(n, atoms, a.f) 619 case p != 0 && n == 0: 620 // last item in list 621 return a.next(p, 0) 622 } 623 // case p != 0 && n != 0: 624 // intermediate item in a list 625 if err = a.next(p, n); err != nil { 626 return 627 } 628 629 return a.prev(n, p) 630} 631 632//TODO remove ? 633// Return len(slice) == n, reuse src if possible. 634func need(n int, src []byte) []byte { 635 if cap(src) < n { 636 return *buffer.Get(n) 637 } 638 639 return src[:n] 640} 641 642// Get returns the data content of a block referred to by handle or an error if 643// any. The returned slice may be a sub-slice of buf if buf was large enough 644// to hold the entire content. Otherwise, a newly allocated slice will be 645// returned. It is valid to pass a nil buf. 646// 647// If the content was stored using compression then it is transparently 648// returned decompressed. 649// 650// Handle must have been obtained initially from Alloc and must be still valid, 651// otherwise invalid data may be returned without detecting the error. 652// 653// Get is safe for concurrent access by multiple goroutines iff no other 654// goroutine mutates the DB. 655func (a *Allocator) Get(buf []byte, handle int64) (b []byte, err error) { 656 buf = buf[:cap(buf)] 657 a.mu.Lock() // X1+ 658 if n, ok := a.m[handle]; ok { 659 a.lru.moveToFront(n) 660 b = need(len(n.b), buf) 661 copy(b, n.b) 662 a.expHit++ 663 a.hit++ 664 a.mu.Unlock() // X1- 665 return 666 } 667 668 a.expMiss++ 669 a.miss++ 670 if a.miss > 10 && len(a.m) < 500 { 671 if 100*a.hit/a.miss < 95 { 672 a.cacheSz++ 673 } 674 a.hit, a.miss = 0, 0 675 } 676 a.mu.Unlock() // X1- 677 678 defer func(h int64) { 679 if err == nil { 680 a.mu.Lock() // X2+ 681 a.cadd(b, h) 682 a.mu.Unlock() // X2- 683 } 684 }(handle) 685 686 pfirst := buffer.Get(16) 687 defer buffer.Put(pfirst) 688 first := *pfirst 689 relocated := false 690 relocSrc := handle 691reloc: 692 if handle <= 0 || handle > maxHandle { 693 return nil, &ErrINVAL{"Allocator.Get: handle out of limits", handle} 694 } 695 696 off := h2off(handle) 697 if err = a.read(first, off); err != nil { 698 return 699 } 700 701 switch tag := first[0]; tag { 702 default: 703 dlen := int(tag) 704 atoms := n2atoms(dlen) 705 switch atoms { 706 case 1: 707 switch tag = first[15]; tag { 708 default: 709 return nil, &ErrILSEQ{Type: ErrTailTag, Off: off, Arg: int64(tag)} 710 case tagNotCompressed: 711 b = need(dlen, buf) 712 copy(b, first[1:]) 713 return 714 case tagCompressed: 715 return zappy.Decode(buf, first[1:dlen+1]) 716 } 717 default: 718 pcc := buffer.Get(1) 719 defer buffer.Put(pcc) 720 cc := *pcc 721 dlen := int(tag) 722 atoms := n2atoms(dlen) 723 tailOff := off + 16*int64(atoms) - 1 724 if err = a.read(cc, tailOff); err != nil { 725 return 726 } 727 728 switch tag = cc[0]; tag { 729 default: 730 return nil, &ErrILSEQ{Type: ErrTailTag, Off: off, Arg: int64(tag)} 731 case tagNotCompressed: 732 b = need(dlen, buf) 733 off += 1 734 if err = a.read(b, off); err != nil { 735 b = buf[:0] 736 } 737 return 738 case tagCompressed: 739 pzbuf := buffer.Get(dlen) 740 defer buffer.Put(pzbuf) 741 zbuf := *pzbuf 742 off += 1 743 if err = a.read(zbuf, off); err != nil { 744 return buf[:0], err 745 } 746 747 return zappy.Decode(buf, zbuf) 748 } 749 } 750 case 0: 751 return buf[:0], nil 752 case tagUsedLong: 753 pcc := buffer.Get(1) 754 defer buffer.Put(pcc) 755 cc := *pcc 756 dlen := m2n(int(first[1])<<8 | int(first[2])) 757 atoms := n2atoms(dlen) 758 tailOff := off + 16*int64(atoms) - 1 759 if err = a.read(cc, tailOff); err != nil { 760 return 761 } 762 763 switch tag = cc[0]; tag { 764 default: 765 return nil, &ErrILSEQ{Type: ErrTailTag, Off: off, Arg: int64(tag)} 766 case tagNotCompressed: 767 b = need(dlen, buf) 768 off += 3 769 if err = a.read(b, off); err != nil { 770 b = buf[:0] 771 } 772 return 773 case tagCompressed: 774 pzbuf := buffer.Get(dlen) 775 defer buffer.Put(pzbuf) 776 zbuf := *pzbuf 777 off += 3 778 if err = a.read(zbuf, off); err != nil { 779 return buf[:0], err 780 } 781 782 return zappy.Decode(buf, zbuf) 783 } 784 case tagFreeShort, tagFreeLong: 785 return nil, &ErrILSEQ{Type: ErrExpUsedTag, Off: off, Arg: int64(tag)} 786 case tagUsedRelocated: 787 if relocated { 788 return nil, &ErrILSEQ{Type: ErrUnexpReloc, Off: off, Arg: relocSrc} 789 } 790 791 handle = b2h(first[1:]) 792 relocated = true 793 goto reloc 794 } 795} 796 797var reallocTestHook bool 798 799// Realloc sets the content of a block referred to by handle or returns an 800// error, if any. 801// 802// Handle must have been obtained initially from Alloc and must be still valid, 803// otherwise a database may get irreparably corrupted. 804func (a *Allocator) Realloc(handle int64, b []byte) (err error) { 805 if handle <= 0 || handle > maxHandle { 806 return &ErrINVAL{"Realloc: handle out of limits", handle} 807 } 808 809 a.cfree(handle) 810 if err = a.realloc(handle, b); err != nil { 811 return 812 } 813 814 if reallocTestHook { 815 if err = cacheAudit(a.m, &a.lru); err != nil { 816 return 817 } 818 } 819 820 a.cadd(b, handle) 821 return 822} 823 824func (a *Allocator) realloc(handle int64, b []byte) (err error) { 825 var dlen, needAtoms0 int 826 827 pb8 := buffer.Get(8) 828 defer buffer.Put(pb8) 829 b8 := *pb8 830 pdst := buffer.Get(zappy.MaxEncodedLen(len(b))) 831 defer buffer.Put(pdst) 832 dst := *pdst 833 b, needAtoms0, cc, err := a.makeUsedBlock(dst, b) 834 if err != nil { 835 return 836 } 837 838 needAtoms := int64(needAtoms0) 839 off := h2off(handle) 840 if err = a.read(b8[:], off); err != nil { 841 return 842 } 843 844 switch tag := b8[0]; tag { 845 default: 846 dlen = int(b8[0]) 847 case tagUsedLong: 848 dlen = m2n(int(b8[1])<<8 | int(b8[2])) 849 case tagUsedRelocated: 850 if err = a.free(b2h(b8[1:]), handle, false); err != nil { 851 return err 852 } 853 854 dlen = 0 855 case tagFreeShort, tagFreeLong: 856 return &ErrINVAL{"Allocator.Realloc: invalid handle", handle} 857 } 858 859 atoms := int64(n2atoms(dlen)) 860retry: 861 switch { 862 case needAtoms < atoms: 863 // in place shrink 864 if err = a.writeUsedBlock(handle, cc, b); err != nil { 865 return 866 } 867 868 fh, fa := handle+needAtoms, atoms-needAtoms 869 var sz int64 870 if sz, err = a.f.Size(); err != nil { 871 return err 872 } 873 874 if h2off(fh)+16*fa == sz { 875 return a.f.Truncate(h2off(fh)) 876 } 877 878 return a.free2(fh, fa) 879 case needAtoms == atoms: 880 // in place replace 881 return a.writeUsedBlock(handle, cc, b) 882 } 883 884 // case needAtoms > atoms: 885 // in place extend or relocate 886 var sz int64 887 if sz, err = a.f.Size(); err != nil { 888 return 889 } 890 891 off = h2off(handle) 892 switch { 893 case off+atoms*16 == sz: 894 // relocating tail block - shortcut 895 return a.writeUsedBlock(handle, cc, b) 896 default: 897 if off+atoms*16 < sz { 898 // handle is not a tail block, check right neighbour 899 rh := handle + atoms 900 rtag, ratoms, p, n, e := a.nfo(rh) 901 if e != nil { 902 return e 903 } 904 905 if rtag == tagFreeShort || rtag == tagFreeLong { 906 // Right neighbour is a free block 907 if needAtoms <= atoms+ratoms { 908 // can expand in place 909 if err = a.unlink(rh, ratoms, p, n); err != nil { 910 return 911 } 912 913 atoms += ratoms 914 goto retry 915 916 } 917 } 918 } 919 } 920 921 if atoms > 1 { 922 if err = a.realloc(handle, nil); err != nil { 923 return 924 } 925 } 926 927 var newH int64 928 if newH, err = a.alloc(b, cc); err != nil { 929 return err 930 } 931 932 prb := buffer.CGet(16) 933 defer buffer.Put(prb) 934 rb := *prb 935 rb[0] = tagUsedRelocated 936 h2b(rb[1:], newH) 937 if err = a.writeAt(rb[:], h2off(handle)); err != nil { 938 return 939 } 940 941 return a.writeUsedBlock(newH, cc, b) 942} 943 944func (a *Allocator) writeAt(b []byte, off int64) (err error) { 945 var n int 946 if n, err = a.f.WriteAt(b, off); err != nil { 947 return 948 } 949 950 if n != len(b) { 951 err = io.ErrShortWrite 952 } 953 return 954} 955 956func (a *Allocator) write(off int64, b ...[]byte) (err error) { 957 rq := 0 958 for _, part := range b { 959 rq += len(part) 960 } 961 pbuf := buffer.Get(rq) 962 defer buffer.Put(pbuf) 963 buf := *pbuf 964 buf = buf[:0] 965 for _, part := range b { 966 buf = append(buf, part...) 967 } 968 return a.writeAt(buf, off) 969} 970 971func (a *Allocator) read(b []byte, off int64) (err error) { 972 var rn int 973 if rn, err = a.f.ReadAt(b, off); rn != len(b) { 974 return &ErrILSEQ{Type: ErrOther, Off: off, More: err} 975 } 976 977 return nil 978} 979 980// nfo returns h's tag. If it's a free block then return also (s)ize (in 981// atoms), (p)rev and (n)ext. If it's a used block then only (s)ize is returned 982// (again in atoms). If it's a used relocate block then (n)ext is set to the 983// relocation target handle. 984func (a *Allocator) nfo(h int64) (tag byte, s, p, n int64, err error) { 985 off := h2off(h) 986 rq := int64(22) 987 sz, err := a.f.Size() 988 if err != nil { 989 return 990 } 991 992 if off+rq >= sz { 993 if rq = sz - off; rq < 15 { 994 err = io.ErrUnexpectedEOF 995 return 996 } 997 } 998 999 pbuf := buffer.Get(22) 1000 defer buffer.Put(pbuf) 1001 buf := *pbuf 1002 if err = a.read(buf[:rq], off); err != nil { 1003 return 1004 } 1005 1006 switch tag = buf[0]; tag { 1007 default: 1008 s = int64(n2atoms(int(tag))) 1009 case tagUsedLong: 1010 s = int64(n2atoms(m2n(int(buf[1])<<8 | int(buf[2])))) 1011 case tagFreeLong: 1012 if rq < 22 { 1013 err = io.ErrUnexpectedEOF 1014 return 1015 } 1016 1017 s, p, n = b2h(buf[1:]), b2h(buf[8:]), b2h(buf[15:]) 1018 case tagUsedRelocated: 1019 s, n = 1, b2h(buf[1:]) 1020 case tagFreeShort: 1021 s, p, n = 1, b2h(buf[1:]), b2h(buf[8:]) 1022 } 1023 return 1024} 1025 1026// leftNfo returns nfo for h's left neighbor if h > 1 and the left neighbor is 1027// a free block. Otherwise all zero values are returned instead. 1028func (a *Allocator) leftNfo(h int64) (tag byte, s, p, n int64, err error) { 1029 if !(h > 1) { 1030 return 1031 } 1032 1033 pbuf := buffer.Get(8) 1034 defer buffer.Put(pbuf) 1035 buf := *pbuf 1036 off := h2off(h) 1037 if err = a.read(buf[:], off-8); err != nil { 1038 return 1039 } 1040 1041 switch tag := buf[7]; tag { 1042 case tagFreeShort: 1043 return a.nfo(h - 1) 1044 case tagFreeLong: 1045 return a.nfo(h - b2h(buf[:])) 1046 } 1047 return 1048} 1049 1050// Set h.prev = p 1051func (a *Allocator) prev(h, p int64) (err error) { 1052 pb := buffer.Get(7) 1053 defer buffer.Put(pb) 1054 b := *pb 1055 off := h2off(h) 1056 if err = a.read(b[:1], off); err != nil { 1057 return 1058 } 1059 1060 switch tag := b[0]; tag { 1061 default: 1062 return &ErrILSEQ{Type: ErrExpFreeTag, Off: off, Arg: int64(tag)} 1063 case tagFreeShort: 1064 off += 1 1065 case tagFreeLong: 1066 off += 8 1067 } 1068 return a.writeAt(h2b(b[:7], p), off) 1069} 1070 1071// Set h.next = n 1072func (a *Allocator) next(h, n int64) (err error) { 1073 pb := buffer.Get(7) 1074 defer buffer.Put(pb) 1075 b := *pb 1076 off := h2off(h) 1077 if err = a.read(b[:1], off); err != nil { 1078 return 1079 } 1080 1081 switch tag := b[0]; tag { 1082 default: 1083 return &ErrILSEQ{Type: ErrExpFreeTag, Off: off, Arg: int64(tag)} 1084 case tagFreeShort: 1085 off += 8 1086 case tagFreeLong: 1087 off += 15 1088 } 1089 return a.writeAt(h2b(b[:7], n), off) 1090} 1091 1092// Make the filer image @h a free block. 1093func (a *Allocator) makeFree(h, atoms, prev, next int64) (err error) { 1094 pbuf := buffer.Get(22) 1095 defer buffer.Put(pbuf) 1096 buf := *pbuf 1097 switch { 1098 case atoms == 1: 1099 buf[0], buf[15] = tagFreeShort, tagFreeShort 1100 h2b(buf[1:], prev) 1101 h2b(buf[8:], next) 1102 if err = a.write(h2off(h), buf[:16]); err != nil { 1103 return 1104 } 1105 default: 1106 1107 buf[0] = tagFreeLong 1108 h2b(buf[1:], atoms) 1109 h2b(buf[8:], prev) 1110 h2b(buf[15:], next) 1111 if err = a.write(h2off(h), buf[:22]); err != nil { 1112 return 1113 } 1114 1115 h2b(buf[:], atoms) 1116 buf[7] = tagFreeLong 1117 if err = a.write(h2off(h+atoms)-8, buf[:8]); err != nil { 1118 return 1119 } 1120 } 1121 if prev != 0 { 1122 if err = a.next(prev, h); err != nil { 1123 return 1124 } 1125 } 1126 1127 if next != 0 { 1128 err = a.prev(next, h) 1129 } 1130 return 1131} 1132 1133func (a *Allocator) makeUsedBlock(dst []byte, b []byte) (w []byte, rqAtoms int, cc byte, err error) { 1134 cc = tagNotCompressed 1135 w = b 1136 1137 var n int 1138 if n = len(b); n > maxRq { 1139 return nil, 0, 0, &ErrINVAL{"Allocator.makeUsedBlock: content size out of limits", n} 1140 } 1141 1142 rqAtoms = n2atoms(n) 1143 if a.Compress && n > 14 { // attempt compression 1144 if dst, err = zappy.Encode(dst, b); err != nil { 1145 return 1146 } 1147 1148 n2 := len(dst) 1149 if rqAtoms2 := n2atoms(n2); rqAtoms2 < rqAtoms { // compression saved at least a single atom 1150 w, rqAtoms, cc = dst, rqAtoms2, tagCompressed 1151 } 1152 } 1153 return 1154} 1155 1156func (a *Allocator) writeUsedBlock(h int64, cc byte, b []byte) (err error) { 1157 n := len(b) 1158 rq := n2atoms(n) << 4 1159 pbuf := buffer.Get(rq) 1160 defer buffer.Put(pbuf) 1161 buf := *pbuf 1162 switch n <= maxShort { 1163 case true: 1164 buf[0] = byte(n) 1165 copy(buf[1:], b) 1166 case false: 1167 m := n2m(n) 1168 buf[0], buf[1], buf[2] = tagUsedLong, byte(m>>8), byte(m) 1169 copy(buf[3:], b) 1170 } 1171 if p := n2padding(n); p != 0 { 1172 copy(buf[rq-1-p:], zeros[:]) 1173 } 1174 buf[rq-1] = cc 1175 return a.writeAt(buf, h2off(h)) 1176} 1177 1178func (a *Allocator) verifyUnused(h, totalAtoms int64, tag byte, log func(error) bool, fast bool) (atoms, prev, next int64, err error) { 1179 switch tag { 1180 default: 1181 panic("internal error") 1182 case tagFreeShort: 1183 var b [16]byte 1184 off := h2off(h) 1185 if err = a.read(b[:], off); err != nil { 1186 return 1187 } 1188 1189 if b[15] != tagFreeShort { 1190 err = &ErrILSEQ{Type: ErrShortFreeTailTag, Off: off, Arg: int64(b[15])} 1191 log(err) 1192 return 1193 } 1194 1195 atoms, prev, next = 1, b2h(b[1:]), b2h(b[8:]) 1196 case tagFreeLong: 1197 var b [22]byte 1198 off := h2off(h) 1199 if err = a.read(b[:], off); err != nil { 1200 return 1201 } 1202 1203 atoms, prev, next = b2h(b[1:]), b2h(b[8:]), b2h(b[15:]) 1204 if fast { 1205 return 1206 } 1207 1208 if atoms < 2 { 1209 err = &ErrILSEQ{Type: ErrLongFreeBlkTooShort, Off: off, Arg: atoms} 1210 break 1211 } 1212 1213 if h+atoms-1 > totalAtoms { 1214 err = &ErrILSEQ{Type: ErrLongFreeBlkTooLong, Off: off, Arg: atoms} 1215 break 1216 } 1217 1218 if prev > totalAtoms { 1219 err = &ErrILSEQ{Type: ErrLongFreePrevBeyondEOF, Off: off, Arg: next} 1220 break 1221 } 1222 1223 if next > totalAtoms { 1224 err = &ErrILSEQ{Type: ErrLongFreeNextBeyondEOF, Off: off, Arg: next} 1225 break 1226 } 1227 1228 toff := h2off(h+atoms) - 8 1229 if err = a.read(b[:8], toff); err != nil { 1230 return 1231 } 1232 1233 if b[7] != tag { 1234 err = &ErrILSEQ{Type: ErrLongFreeTailTag, Off: off, Arg: int64(b[7])} 1235 break 1236 } 1237 1238 if s2 := b2h(b[:]); s2 != atoms { 1239 err = &ErrILSEQ{Type: ErrVerifyTailSize, Off: off, Arg: atoms, Arg2: s2} 1240 break 1241 } 1242 1243 } 1244 if err != nil { 1245 log(err) 1246 } 1247 return 1248} 1249 1250func (a *Allocator) verifyUsed(h, totalAtoms int64, tag byte, buf, ubuf []byte, log func(error) bool, fast bool) (compressed bool, dlen int, atoms, link int64, err error) { 1251 var ( 1252 padding int 1253 doff int64 1254 padZeros [15]byte 1255 tailBuf [16]byte 1256 ) 1257 1258 switch tag { 1259 default: // Short used 1260 dlen = int(tag) 1261 atoms = int64((dlen+1)/16) + 1 1262 padding = 15 - (dlen+1)%16 1263 doff = h2off(h) + 1 1264 case tagUsedLong: 1265 off := h2off(h) + 1 1266 var b2 [2]byte 1267 if err = a.read(b2[:], off); err != nil { 1268 return 1269 } 1270 1271 dlen = m2n(int(b2[0])<<8 | int(b2[1])) 1272 atoms = int64((dlen+3)/16) + 1 1273 padding = 15 - (dlen+3)%16 1274 doff = h2off(h) + 3 1275 case tagUsedRelocated: 1276 dlen = 7 1277 atoms = 1 1278 padding = 7 1279 doff = h2off(h) + 1 1280 case tagFreeShort, tagFreeLong: 1281 panic("internal error") 1282 } 1283 1284 if fast { 1285 if tag == tagUsedRelocated { 1286 dlen = 0 1287 if err = a.read(buf[:7], doff); err != nil { 1288 return 1289 } 1290 1291 link = b2h(buf) 1292 } 1293 1294 return false, dlen, atoms, link, nil 1295 } 1296 1297 if ok := h+atoms-1 <= totalAtoms; !ok { // invalid last block 1298 err = &ErrILSEQ{Type: ErrVerifyUsedSpan, Off: h2off(h), Arg: atoms} 1299 log(err) 1300 return 1301 } 1302 1303 tailsz := 1 + padding 1304 off := h2off(h) + 16*atoms - int64(tailsz) 1305 if err = a.read(tailBuf[:tailsz], off); err != nil { 1306 return false, 0, 0, 0, err 1307 } 1308 1309 if ok := bytes.Equal(padZeros[:padding], tailBuf[:padding]); !ok { 1310 err = &ErrILSEQ{Type: ErrVerifyPadding, Off: h2off(h)} 1311 log(err) 1312 return 1313 } 1314 1315 var cc byte 1316 switch cc = tailBuf[padding]; cc { 1317 default: 1318 err = &ErrILSEQ{Type: ErrTailTag, Off: h2off(h)} 1319 log(err) 1320 return 1321 case tagCompressed: 1322 compressed = true 1323 if tag == tagUsedRelocated { 1324 err = &ErrILSEQ{Type: ErrTailTag, Off: h2off(h)} 1325 log(err) 1326 return 1327 } 1328 1329 fallthrough 1330 case tagNotCompressed: 1331 if err = a.read(buf[:dlen], doff); err != nil { 1332 return false, 0, 0, 0, err 1333 } 1334 } 1335 1336 if cc == tagCompressed { 1337 if ubuf, err = zappy.Decode(ubuf, buf[:dlen]); err != nil || len(ubuf) > maxRq { 1338 err = &ErrILSEQ{Type: ErrDecompress, Off: h2off(h)} 1339 log(err) 1340 return 1341 } 1342 1343 dlen = len(ubuf) 1344 } 1345 1346 if tag == tagUsedRelocated { 1347 link = b2h(buf) 1348 if link == 0 { 1349 err = &ErrILSEQ{Type: ErrNullReloc, Off: h2off(h)} 1350 log(err) 1351 return 1352 } 1353 1354 if link > totalAtoms { // invalid last block 1355 err = &ErrILSEQ{Type: ErrRelocBeyondEOF, Off: h2off(h), Arg: link} 1356 log(err) 1357 return 1358 } 1359 } 1360 1361 return 1362} 1363 1364var nolog = func(error) bool { return false } 1365 1366// Verify attempts to find any structural errors in a Filer wrt the 1367// organization of it as defined by Allocator. 'bitmap' is a scratch pad for 1368// necessary bookkeeping and will grow to at most to Allocator's 1369// Filer.Size()/128 (0,78%). Any problems found are reported to 'log' except 1370// non verify related errors like disk read fails etc. If 'log' returns false 1371// or the error doesn't allow to (reliably) continue, the verification process 1372// is stopped and an error is returned from the Verify function. Passing a nil 1373// log works like providing a log function always returning false. Any 1374// non-structural errors, like for instance Filer read errors, are NOT reported 1375// to 'log', but returned as the Verify's return value, because Verify cannot 1376// proceed in such cases. Verify returns nil only if it fully completed 1377// verifying Allocator's Filer without detecting any error. 1378// 1379// It is recommended to limit the number reported problems by returning false 1380// from 'log' after reaching some limit. Huge and corrupted DB can produce an 1381// overwhelming error report dataset. 1382// 1383// The verifying process will scan the whole DB at least 3 times (a trade 1384// between processing space and time consumed). It doesn't read the content of 1385// free blocks above the head/tail info bytes. If the 3rd phase detects lost 1386// free space, then a 4th scan (a faster one) is performed to precisely report 1387// all of them. 1388// 1389// If the DB/Filer to be verified is reasonably small, respective if its 1390// size/128 can comfortably fit within process's free memory, then it is 1391// recommended to consider using a MemFiler for the bit map. 1392// 1393// Statistics are returned via 'stats' if non nil. The statistics are valid 1394// only if Verify succeeded, ie. it didn't reported anything to log and it 1395// returned a nil error. 1396func (a *Allocator) Verify(bitmap Filer, log func(error) bool, stats *AllocStats) (err error) { 1397 if log == nil { 1398 log = nolog 1399 } 1400 1401 n, err := bitmap.Size() 1402 if err != nil { 1403 return 1404 } 1405 1406 if n != 0 { 1407 return &ErrINVAL{"Allocator.Verify: bit map initial size non zero (%d)", n} 1408 } 1409 1410 var bits int64 1411 bitMask := [8]byte{1, 2, 4, 8, 16, 32, 64, 128} 1412 byteBuf := []byte{0} 1413 1414 //DONE 1415 // +performance, this implementation is hopefully correct but _very_ 1416 // naive, probably good as a prototype only. Use maybe a MemFiler 1417 // "cache" etc. 1418 // ---- 1419 // Turns out the OS caching is as effective as it can probably get. 1420 bit := func(on bool, h int64) (wasOn bool, err error) { 1421 m := bitMask[h&7] 1422 off := h >> 3 1423 var v byte 1424 sz, err := bitmap.Size() 1425 if err != nil { 1426 return 1427 } 1428 1429 if off < sz { 1430 if n, err := bitmap.ReadAt(byteBuf, off); n != 1 { 1431 return false, &ErrILSEQ{Type: ErrOther, Off: off, More: fmt.Errorf("Allocator.Verify - reading bitmap: %s", err)} 1432 } 1433 1434 v = byteBuf[0] 1435 } 1436 switch wasOn = v&m != 0; on { 1437 case true: 1438 if !wasOn { 1439 v |= m 1440 bits++ 1441 } 1442 case false: 1443 if wasOn { 1444 v ^= m 1445 bits-- 1446 } 1447 } 1448 byteBuf[0] = v 1449 if n, err := bitmap.WriteAt(byteBuf, off); n != 1 || err != nil { 1450 return false, &ErrILSEQ{Type: ErrOther, Off: off, More: fmt.Errorf("Allocator.Verify - writing bitmap: %s", err)} 1451 } 1452 1453 return 1454 } 1455 1456 // Phase 1 - sequentially scan a.f to reliably determine block 1457 // boundaries. Set a bit for every block start. 1458 var ( 1459 buf, ubuf [maxRq]byte 1460 prevH, h, atoms int64 1461 wasOn bool 1462 tag byte 1463 st = AllocStats{ 1464 AllocMap: map[int64]int64{}, 1465 FreeMap: map[int64]int64{}, 1466 } 1467 dlen int 1468 ) 1469 1470 fsz, err := a.f.Size() 1471 if err != nil { 1472 return 1473 } 1474 1475 ok := fsz%16 == 0 1476 totalAtoms := (fsz - fltSz) / atomLen 1477 if !ok { 1478 err = &ErrILSEQ{Type: ErrFileSize, Name: a.f.Name(), Arg: fsz} 1479 log(err) 1480 return 1481 } 1482 1483 st.TotalAtoms = totalAtoms 1484 prevTag := -1 1485 lastH := int64(-1) 1486 1487 for h = 1; h <= totalAtoms; h += atoms { 1488 prevH = h // For checking last block == used 1489 1490 off := h2off(h) 1491 if err = a.read(buf[:1], off); err != nil { 1492 return 1493 } 1494 1495 switch tag = buf[0]; tag { 1496 default: // Short used 1497 fallthrough 1498 case tagUsedLong, tagUsedRelocated: 1499 var compressed bool 1500 if compressed, dlen, atoms, _, err = a.verifyUsed(h, totalAtoms, tag, buf[:], ubuf[:], log, false); err != nil { 1501 return 1502 } 1503 1504 if compressed { 1505 st.Compression++ 1506 } 1507 st.AllocAtoms += atoms 1508 switch { 1509 case tag == tagUsedRelocated: 1510 st.AllocMap[1]++ 1511 st.Relocations++ 1512 default: 1513 st.AllocMap[atoms]++ 1514 st.AllocBytes += int64(dlen) 1515 st.Handles++ 1516 } 1517 case tagFreeShort, tagFreeLong: 1518 if prevTag == tagFreeShort || prevTag == tagFreeLong { 1519 err = &ErrILSEQ{Type: ErrAdjacentFree, Off: h2off(lastH), Arg: off} 1520 log(err) 1521 return 1522 } 1523 1524 if atoms, _, _, err = a.verifyUnused(h, totalAtoms, tag, log, false); err != nil { 1525 return 1526 } 1527 1528 st.FreeMap[atoms]++ 1529 st.FreeAtoms += atoms 1530 } 1531 1532 if wasOn, err = bit(true, h); err != nil { 1533 return 1534 } 1535 1536 if wasOn { 1537 panic("internal error") 1538 } 1539 1540 prevTag = int(tag) 1541 lastH = h 1542 } 1543 1544 if totalAtoms != 0 && (tag == tagFreeShort || tag == tagFreeLong) { 1545 err = &ErrILSEQ{Type: ErrFreeTailBlock, Off: h2off(prevH)} 1546 log(err) 1547 return 1548 } 1549 1550 // Phase 2 - check used blocks, turn off the map bit for every used 1551 // block. 1552 for h = 1; h <= totalAtoms; h += atoms { 1553 off := h2off(h) 1554 if err = a.read(buf[:1], off); err != nil { 1555 return 1556 } 1557 1558 var link int64 1559 switch tag = buf[0]; tag { 1560 default: // Short used 1561 fallthrough 1562 case tagUsedLong, tagUsedRelocated: 1563 if _, _, atoms, link, err = a.verifyUsed(h, totalAtoms, tag, buf[:], ubuf[:], log, true); err != nil { 1564 return 1565 } 1566 case tagFreeShort, tagFreeLong: 1567 if atoms, _, _, err = a.verifyUnused(h, totalAtoms, tag, log, true); err != nil { 1568 return 1569 } 1570 } 1571 1572 turnoff := true 1573 switch tag { 1574 case tagUsedRelocated: 1575 if err = a.read(buf[:1], h2off(link)); err != nil { 1576 return 1577 } 1578 1579 switch linkedTag := buf[0]; linkedTag { 1580 case tagFreeShort, tagFreeLong, tagUsedRelocated: 1581 err = &ErrILSEQ{Type: ErrInvalidRelocTarget, Off: off, Arg: link} 1582 log(err) 1583 return 1584 } 1585 1586 case tagFreeShort, tagFreeLong: 1587 turnoff = false 1588 } 1589 1590 if !turnoff { 1591 continue 1592 } 1593 1594 if wasOn, err = bit(false, h); err != nil { 1595 return 1596 } 1597 1598 if !wasOn { 1599 panic("internal error") 1600 } 1601 1602 } 1603 1604 // Phase 3 - using the flt check heads link to proper free blocks. For 1605 // every free block, walk the list, verify the {next, prev} links and 1606 // turn the respective map bit off. After processing all free lists, 1607 // the map bits count should be zero. Otherwise there are "lost" free 1608 // blocks. 1609 1610 var prev, next, fprev, fnext int64 1611 rep := a.flt 1612 1613 for _, list := range rep { 1614 prev, next = 0, list.head 1615 for ; next != 0; prev, next = next, fnext { 1616 if wasOn, err = bit(false, next); err != nil { 1617 return 1618 } 1619 1620 if !wasOn { 1621 err = &ErrILSEQ{Type: ErrFLT, Off: h2off(next), Arg: h} 1622 log(err) 1623 return 1624 } 1625 1626 off := h2off(next) 1627 if err = a.read(buf[:1], off); err != nil { 1628 return 1629 } 1630 1631 switch tag = buf[0]; tag { 1632 default: 1633 panic("internal error") 1634 case tagFreeShort, tagFreeLong: 1635 if atoms, fprev, fnext, err = a.verifyUnused(next, totalAtoms, tag, log, true); err != nil { 1636 return 1637 } 1638 1639 if min := list.minSize; atoms < min { 1640 err = &ErrILSEQ{Type: ErrFLTSize, Off: h2off(next), Arg: atoms, Arg2: min} 1641 log(err) 1642 return 1643 } 1644 1645 if fprev != prev { 1646 err = &ErrILSEQ{Type: ErrFreeChaining, Off: h2off(next)} 1647 log(err) 1648 return 1649 } 1650 } 1651 } 1652 1653 } 1654 1655 if bits == 0 { // Verify succeeded 1656 if stats != nil { 1657 *stats = st 1658 } 1659 return 1660 } 1661 1662 // Phase 4 - if after phase 3 there are lost free blocks, report all of 1663 // them to 'log' 1664 for i := range ubuf { // setup zeros for compares 1665 ubuf[i] = 0 1666 } 1667 1668 var off, lh int64 1669 rem, err := bitmap.Size() 1670 if err != nil { 1671 return err 1672 } 1673 1674 for rem != 0 { 1675 rq := int(mathutil.MinInt64(64*1024, rem)) 1676 var n int 1677 if n, err = bitmap.ReadAt(buf[:rq], off); n != rq { 1678 return &ErrILSEQ{Type: ErrOther, Off: off, More: fmt.Errorf("bitmap ReadAt(size %d, off %#x): %s", rq, off, err)} 1679 } 1680 1681 if !bytes.Equal(buf[:rq], ubuf[:rq]) { 1682 for d, v := range buf[:rq] { 1683 if v != 0 { 1684 for i, m := range bitMask { 1685 if v&m != 0 { 1686 lh = 8*(off+int64(d)) + int64(i) 1687 err = &ErrILSEQ{Type: ErrLostFreeBlock, Off: h2off(lh)} 1688 log(err) 1689 return 1690 } 1691 } 1692 } 1693 } 1694 } 1695 1696 off += int64(rq) 1697 rem -= int64(rq) 1698 } 1699 1700 return 1701} 1702 1703type fltSlot struct { 1704 head int64 1705 minSize int64 1706} 1707 1708func (f fltSlot) String() string { 1709 return fmt.Sprintf("head %#x, minSize %#x\n", f.head, f.minSize) 1710} 1711 1712type flt [14]fltSlot 1713 1714func (f *flt) init() { 1715 sz := 1 1716 for i := range *f { 1717 f[i].minSize, f[i].head = int64(sz), 0 1718 sz <<= 1 1719 } 1720 f[13].minSize = 4112 1721} 1722 1723func (f *flt) load(fi Filer, off int64) (err error) { 1724 pb := buffer.Get(fltSz) 1725 defer buffer.Put(pb) 1726 b := *pb 1727 if _, err = fi.ReadAt(b[:], off); err != nil { 1728 return 1729 } 1730 1731 for i := range *f { 1732 off := 8*i + 1 1733 f[i].head = b2h(b[off:]) 1734 } 1735 return 1736} 1737 1738func (f *flt) find(rq int) (h int64) { 1739 switch { 1740 case rq < 1: 1741 panic(rq) 1742 case rq >= maxFLTRq: 1743 h, f[13].head = f[13].head, 0 1744 return 1745 default: 1746 g := f[mathutil.Log2Uint16(uint16(rq)):] 1747 for i := range g { 1748 p := &g[i] 1749 if rq <= int(p.minSize) { 1750 if h = p.head; h != 0 { 1751 p.head = 0 1752 return 1753 } 1754 } 1755 } 1756 return 1757 } 1758} 1759 1760func (f *flt) head(atoms int64) (h int64) { 1761 switch { 1762 case atoms < 1: 1763 panic(atoms) 1764 case atoms >= maxFLTRq: 1765 return f[13].head 1766 default: 1767 lg := mathutil.Log2Uint16(uint16(atoms)) 1768 g := f[lg:] 1769 for i := range g { 1770 if atoms < g[i+1].minSize { 1771 return g[i].head 1772 } 1773 } 1774 panic("internal error") 1775 } 1776} 1777 1778func (f *flt) setHead(h, atoms int64, fi Filer) (err error) { 1779 switch { 1780 case atoms < 1: 1781 panic(atoms) 1782 case atoms >= maxFLTRq: 1783 pb := buffer.Get(7) 1784 defer buffer.Put(pb) 1785 b := *pb 1786 if _, err = fi.WriteAt(h2b(b[:], h), 8*13+1); err != nil { 1787 return 1788 } 1789 1790 f[13].head = h 1791 return 1792 default: 1793 lg := mathutil.Log2Uint16(uint16(atoms)) 1794 g := f[lg:] 1795 for i := range f { 1796 if atoms < g[i+1].minSize { 1797 pb := buffer.Get(7) 1798 defer buffer.Put(pb) 1799 b := *pb 1800 if _, err = fi.WriteAt(h2b(b[:], h), 8*int64(i+lg)+1); err != nil { 1801 return 1802 } 1803 1804 g[i].head = h 1805 return 1806 } 1807 } 1808 panic("internal error") 1809 } 1810} 1811 1812func (f *flt) String() string { 1813 a := []string{} 1814 for i, v := range *f { 1815 a = append(a, fmt.Sprintf("[%2d] %s", i, v)) 1816 } 1817 return strings.Join(a, "") 1818} 1819 1820type node struct { 1821 b []byte 1822 h int64 1823 prev, next *node 1824} 1825 1826type cache []*node 1827 1828func (c *cache) get(n int) *node { 1829 r, _ := c.get2(n) 1830 return r 1831} 1832 1833func (c *cache) get2(n int) (r *node, isZeroed bool) { 1834 s := *c 1835 lens := len(s) 1836 if lens == 0 { 1837 return &node{b: make([]byte, n, mathutil.Min(2*n, maxBuf))}, true 1838 } 1839 1840 i := sort.Search(lens, func(x int) bool { return len(s[x].b) >= n }) 1841 if i == lens { 1842 i-- 1843 s[i].b, isZeroed = make([]byte, n, mathutil.Min(2*n, maxBuf)), true 1844 } 1845 1846 r = s[i] 1847 r.b = r.b[:n] 1848 copy(s[i:], s[i+1:]) 1849 s = s[:lens-1] 1850 *c = s 1851 return 1852} 1853 1854func (c *cache) cget(n int) (r *node) { 1855 r, ok := c.get2(n) 1856 if ok { 1857 return 1858 } 1859 1860 for i := range r.b { 1861 r.b[i] = 0 1862 } 1863 return 1864} 1865 1866func (c *cache) size() (sz int64) { 1867 for _, n := range *c { 1868 sz += int64(cap(n.b)) 1869 } 1870 return 1871} 1872 1873func (c *cache) put(n *node) *node { 1874 s := *c 1875 n.b = n.b[:cap(n.b)] 1876 lenb := len(n.b) 1877 lens := len(s) 1878 i := sort.Search(lens, func(x int) bool { return len(s[x].b) >= lenb }) 1879 s = append(s, nil) 1880 copy(s[i+1:], s[i:]) 1881 s[i] = n 1882 *c = s 1883 return n 1884} 1885 1886type lst struct { 1887 front, back *node 1888} 1889 1890func (l *lst) pushFront(n *node) *node { 1891 if l.front == nil { 1892 l.front, l.back, n.prev, n.next = n, n, nil, nil 1893 return n 1894 } 1895 1896 n.prev, n.next, l.front.prev, l.front = nil, l.front, n, n 1897 return n 1898} 1899 1900func (l *lst) remove(n *node) *node { 1901 if n.prev == nil { 1902 l.front = n.next 1903 } else { 1904 n.prev.next = n.next 1905 } 1906 if n.next == nil { 1907 l.back = n.prev 1908 } else { 1909 n.next.prev = n.prev 1910 } 1911 n.prev, n.next = nil, nil 1912 return n 1913} 1914 1915func (l *lst) removeBack() *node { 1916 return l.remove(l.back) 1917} 1918 1919func (l *lst) moveToFront(n *node) *node { 1920 return l.pushFront(l.remove(n)) 1921} 1922 1923func (l *lst) size() (sz int64) { 1924 for n := l.front; n != nil; n = n.next { 1925 sz += int64(cap(n.b)) 1926 } 1927 return 1928} 1929 1930func cacheAudit(m map[int64]*node, l *lst) (err error) { 1931 cnt := 0 1932 for h, n := range m { 1933 if g, e := n.h, h; g != e { 1934 return fmt.Errorf("cacheAudit: invalid node handle %d != %d", g, e) 1935 } 1936 1937 if cnt, err = l.audit(n, true); err != nil { 1938 return 1939 } 1940 } 1941 1942 if g, e := cnt, len(m); g != e { 1943 return fmt.Errorf("cacheAudit: invalid cache size %d != %d", g, e) 1944 } 1945 1946 return 1947} 1948 1949func (l *lst) audit(n *node, onList bool) (cnt int, err error) { 1950 if !onList && (n.prev != nil || n.next != nil) { 1951 return -1, fmt.Errorf("lst.audit: free node with non nil linkage") 1952 } 1953 1954 if l.front == nil && l.back != nil || l.back == nil && l.front != nil { 1955 return -1, fmt.Errorf("lst.audit: one of .front/.back is nil while the other is non nil") 1956 } 1957 1958 if l.front == l.back && l.front != nil { 1959 x := l.front 1960 if x.prev != nil || x.next != nil { 1961 return -1, fmt.Errorf("lst.audit: single node has non nil linkage") 1962 } 1963 1964 if onList && x != n { 1965 return -1, fmt.Errorf("lst.audit: single node is alien") 1966 } 1967 } 1968 1969 seen := false 1970 var prev *node 1971 x := l.front 1972 for x != nil { 1973 cnt++ 1974 if x.prev != prev { 1975 return -1, fmt.Errorf("lst.audit: broken .prev linkage") 1976 } 1977 1978 if x == n { 1979 seen = true 1980 } 1981 1982 prev = x 1983 x = x.next 1984 } 1985 1986 if prev != l.back { 1987 return -1, fmt.Errorf("lst.audit: broken .back linkage") 1988 } 1989 1990 if onList && !seen { 1991 return -1, fmt.Errorf("lst.audit: node missing in list") 1992 } 1993 1994 if !onList && seen { 1995 return -1, fmt.Errorf("lst.audit: node should not be on the list") 1996 } 1997 1998 return 1999} 2000