1// Copyright 2014 The lldb Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Structural transactions.
6
7package lldb
8
9//DONE+ TransactionalMemoryFiler
10//	----
11//	Use NewRollbackFiler(myMemFiler, ...)
12
13/*
14
15bfBits: 3
16BenchmarkRollbackFiler	20000000	       102 ns/op	   9.73 MB/s
17
18bfBits: 4
19BenchmarkRollbackFiler	50000000	        55.7 ns/op	  17.95 MB/s
20
21bfBits: 5
22BenchmarkRollbackFiler	100000000	        32.2 ns/op	  31.06 MB/s
23
24bfBits: 6
25BenchmarkRollbackFiler	100000000	        20.6 ns/op	  48.46 MB/s
26
27bfBits: 7
28BenchmarkRollbackFiler	100000000	        15.1 ns/op	  66.12 MB/s
29
30bfBits: 8
31BenchmarkRollbackFiler	100000000	        10.5 ns/op	  95.66 MB/s
32
33bfBits: 9
34BenchmarkRollbackFiler	200000000	         8.02 ns/op	 124.74 MB/s
35
36bfBits: 10
37BenchmarkRollbackFiler	200000000	         9.25 ns/op	 108.09 MB/s
38
39bfBits: 11
40BenchmarkRollbackFiler	100000000	        11.7 ns/op	  85.47 MB/s
41
42bfBits: 12
43BenchmarkRollbackFiler	100000000	        17.2 ns/op	  57.99 MB/s
44
45bfBits: 13
46BenchmarkRollbackFiler	100000000	        32.7 ns/op	  30.58 MB/s
47
48bfBits: 14
49BenchmarkRollbackFiler	50000000	        39.6 ns/op	  25.27 MB/s
50
51*/
52
53import (
54	"fmt"
55	"io"
56	"sync"
57
58	"github.com/cznic/fileutil"
59	"github.com/cznic/internal/buffer"
60	"github.com/cznic/mathutil"
61)
62
63var (
64	_ Filer = &bitFiler{}      // Ensure bitFiler is a Filer.
65	_ Filer = &RollbackFiler{} // ditto
66)
67
68const (
69	bfBits = 12
70	bfSize = 1 << bfBits
71	bfMask = bfSize - 1
72)
73
74type (
75	bitPage struct {
76		prev, next *bitPage
77		pdata      *[]byte
78		data       []byte
79		dirty      bool
80	}
81
82	bitFilerMap map[int64]*bitPage
83
84	bitFiler struct {
85		parent Filer
86		m      bitFilerMap
87		size   int64
88	}
89)
90
91func newBitFiler(parent Filer) (f *bitFiler, err error) {
92	sz, err := parent.Size()
93	if err != nil {
94		return
95	}
96
97	return &bitFiler{parent: parent, m: bitFilerMap{}, size: sz}, nil
98}
99
100func (f *bitFiler) BeginUpdate() error { panic("internal error") }
101func (f *bitFiler) EndUpdate() error   { panic("internal error") }
102func (f *bitFiler) Rollback() error    { panic("internal error") }
103func (f *bitFiler) Sync() error        { panic("internal error") }
104
105func (f *bitFiler) Close() (err error)   { return }
106func (f *bitFiler) Name() string         { return fmt.Sprintf("%p.bitfiler", f) }
107func (f *bitFiler) Size() (int64, error) { return f.size, nil }
108
109func (f *bitFiler) free() {
110	for _, pg := range f.m {
111		buffer.Put(pg.pdata)
112	}
113}
114
115func (f *bitFiler) PunchHole(off, size int64) (err error) {
116	first := off >> bfBits
117	if off&bfMask != 0 {
118		first++
119	}
120	off += size - 1
121	last := off >> bfBits
122	if off&bfMask != 0 {
123		last--
124	}
125	if limit := f.size >> bfBits; last > limit {
126		last = limit
127	}
128	for pgI := first; pgI <= last; pgI++ {
129		pg := &bitPage{}
130		pg.pdata = buffer.CGet(bfSize)
131		pg.data = *pg.pdata
132		pg.dirty = true
133		f.m[pgI] = pg
134	}
135	return
136}
137
138func (f *bitFiler) ReadAt(b []byte, off int64) (n int, err error) {
139	avail := f.size - off
140	pgI := off >> bfBits
141	pgO := int(off & bfMask)
142	rem := len(b)
143	if int64(rem) >= avail {
144		rem = int(avail)
145		err = io.EOF
146	}
147	for rem != 0 && avail > 0 {
148		pg := f.m[pgI]
149		if pg == nil {
150			pg = &bitPage{}
151			pg.pdata = buffer.CGet(bfSize)
152			pg.data = *pg.pdata
153			if f.parent != nil {
154				_, err = f.parent.ReadAt(pg.data, off&^bfMask)
155				if err != nil && !fileutil.IsEOF(err) {
156					return
157				}
158
159				err = nil
160			}
161			f.m[pgI] = pg
162		}
163		nc := copy(b[:mathutil.Min(rem, bfSize)], pg.data[pgO:])
164		pgI++
165		pgO = 0
166		rem -= nc
167		n += nc
168		b = b[nc:]
169		off += int64(nc)
170	}
171	return
172}
173
174func (f *bitFiler) Truncate(size int64) (err error) {
175	switch {
176	case size < 0:
177		return &ErrINVAL{"Truncate size", size}
178	case size == 0:
179		f.m = bitFilerMap{}
180		f.size = 0
181		return
182	}
183
184	first := size >> bfBits
185	if size&bfMask != 0 {
186		first++
187	}
188	last := f.size >> bfBits
189	if f.size&bfMask != 0 {
190		last++
191	}
192	for ; first < last; first++ {
193		if bp, ok := f.m[first]; ok {
194			buffer.Put(bp.pdata)
195		}
196		delete(f.m, first)
197	}
198
199	f.size = size
200	return
201}
202
203func (f *bitFiler) WriteAt(b []byte, off int64) (n int, err error) {
204	off0 := off
205	pgI := off >> bfBits
206	pgO := int(off & bfMask)
207	n = len(b)
208	rem := n
209	var nc int
210	for rem != 0 {
211		pg := f.m[pgI]
212		if pg == nil {
213			pg = &bitPage{}
214			pg.pdata = buffer.CGet(bfSize)
215			pg.data = *pg.pdata
216			if f.parent != nil {
217				_, err = f.parent.ReadAt(pg.data, off&^bfMask)
218				if err != nil && !fileutil.IsEOF(err) {
219					return
220				}
221
222				err = nil
223			}
224			f.m[pgI] = pg
225		}
226		nc = copy(pg.data[pgO:], b)
227		pgI++
228		pg.dirty = true
229		pgO = 0
230		rem -= nc
231		b = b[nc:]
232		off += int64(nc)
233	}
234	f.size = mathutil.MaxInt64(f.size, off0+int64(n))
235	return
236}
237
238func (f *bitFiler) link() {
239	for pgI, pg := range f.m {
240		nx, ok := f.m[pgI+1]
241		if !ok || !nx.dirty {
242			continue
243		}
244
245		nx.prev, pg.next = pg, nx
246	}
247}
248
249func (f *bitFiler) dumpDirty(w io.WriterAt) (nwr int, err error) {
250	f.link()
251	for pgI, pg := range f.m {
252		if !pg.dirty {
253			continue
254		}
255
256		for pg.prev != nil && pg.prev.dirty {
257			pg = pg.prev
258			pgI--
259		}
260
261		for pg != nil && pg.dirty {
262			if _, err := w.WriteAt(pg.data, pgI<<bfBits); err != nil {
263				return 0, err
264			}
265
266			nwr++
267			pg.dirty = false
268			pg = pg.next
269			pgI++
270		}
271	}
272	return
273}
274
275// RollbackFiler is a Filer implementing structural transaction handling.
276// Structural transactions should be small and short lived because all non
277// committed data are held in memory until committed or discarded by a
278// Rollback.
279//
280// While using RollbackFiler, every intended update of the wrapped Filler, by
281// WriteAt, Truncate or PunchHole, _must_ be made within a transaction.
282// Attempts to do it outside of a transaction will return ErrPERM. OTOH,
283// invoking ReadAt outside of a transaction is not a problem.
284//
285// No nested transactions: All updates within a transaction are held in memory.
286// On a matching EndUpdate the updates held in memory are actually written to
287// the wrapped Filer.
288//
289// Nested transactions: Correct data will be seen from RollbackFiler when any
290// level of a nested transaction is rollbacked. The actual writing to the
291// wrapped Filer happens only when the outer most transaction nesting level is
292// closed.
293//
294// Invoking Rollback is an alternative to EndUpdate. It discards all changes
295// made at the current transaction level and returns the "state" (possibly not
296// yet persisted) of the Filer to what it was before the corresponding
297// BeginUpdate.
298//
299// During an open transaction, all reads (using ReadAt) are "dirty" reads,
300// seeing the uncommitted changes made to the Filer's data.
301//
302// Lldb databases should be based upon a RollbackFiler.
303//
304// With a wrapped MemFiler one gets transactional memory. With, for example a
305// wrapped disk based SimpleFileFiler it protects against at least some HW
306// errors - if Rollback is properly invoked on such failures and/or if there's
307// some WAL or 2PC or whatever other safe mechanism based recovery procedure
308// used by the client.
309//
310// The "real" writes to the wrapped Filer (or WAL instead) go through the
311// writerAt supplied to NewRollbackFiler.
312//
313// List of functions/methods which are recommended to be wrapped in a
314// BeginUpdate/EndUpdate structural transaction:
315//
316// 	Allocator.Alloc
317// 	Allocator.Free
318// 	Allocator.Realloc
319//
320//	CreateBTree
321// 	RemoveBTree
322// 	BTree.Clear
323// 	BTree.Delete
324// 	BTree.DeleteAny
325// 	BTree.Clear
326// 	BTree.Extract
327// 	BTree.Get (it can mutate the DB)
328// 	BTree.Put
329// 	BTree.Set
330//
331// NOTE: RollbackFiler is a generic solution intended to wrap Filers provided
332// by this package which do not implement any of the transactional methods.
333// RollbackFiler thus _does not_ invoke any of the transactional methods of its
334// wrapped Filer.
335//
336// RollbackFiler is safe for concurrent use by multiple goroutines.
337type RollbackFiler struct {
338	mu           sync.RWMutex
339	inCallbackMu sync.RWMutex
340	bitFiler     *bitFiler
341	checkpoint   func(int64) error
342	f            Filer
343	writerAt     io.WriterAt
344
345	// afterRollback, if not nil, is called after performing Rollback
346	// without errros.
347	afterRollback func() error
348	tlevel        int // transaction nesting level, 0 == not in transaction
349	closed        bool
350	inCallback    bool
351}
352
353// NewRollbackFiler returns a RollbackFiler wrapping f.
354//
355// The checkpoint parameter
356//
357// The checkpoint function is called after closing (by EndUpdate) the upper
358// most level open transaction if all calls of writerAt were successful and the
359// DB (or eg. a WAL) is thus now in a consistent state (virtually, in the ideal
360// world with no write caches, no HW failures, no process crashes, ...).
361//
362// NOTE: In, for example, a 2PC it is necessary to reflect also the sz
363// parameter as the new file size (as in the parameter to Truncate). All
364// changes were successfully written already by writerAt before invoking
365// checkpoint.
366//
367// The writerAt parameter
368//
369// The writerAt interface is used to commit the updates of the wrapped Filer.
370// If any invocation of writerAt fails then a non nil error will be returned
371// from EndUpdate and checkpoint will _not_ ne called.  Neither is necessary to
372// call Rollback. The rule of thumb: The [structural] transaction [level] is
373// closed by invoking exactly once one of EndUpdate _or_ Rollback.
374//
375// It is presumed that writerAt uses WAL or 2PC or whatever other safe
376// mechanism to physically commit the updates.
377//
378// Updates performed by invocations of writerAt are byte-precise, but not
379// necessarily maximum possible length precise. IOW, for example an update
380// crossing page boundaries may be performed by more than one writerAt
381// invocation.  No offset sorting is performed.  This may change if it proves
382// to be a problem. Such change would be considered backward compatible.
383//
384// NOTE: Using RollbackFiler, but failing to ever invoke a matching "closing"
385// EndUpdate after an "opening" BeginUpdate means neither writerAt or
386// checkpoint will ever get called - with all the possible data loss
387// consequences.
388func NewRollbackFiler(f Filer, checkpoint func(sz int64) error, writerAt io.WriterAt) (r *RollbackFiler, err error) {
389	if f == nil || checkpoint == nil || writerAt == nil {
390		return nil, &ErrINVAL{Src: "lldb.NewRollbackFiler, nil argument"}
391	}
392
393	return &RollbackFiler{
394		checkpoint: checkpoint,
395		f:          f,
396		writerAt:   writerAt,
397	}, nil
398}
399
400// Implements Filer.
401func (r *RollbackFiler) BeginUpdate() (err error) {
402	r.mu.Lock()
403	defer r.mu.Unlock()
404
405	parent := r.f
406	if r.tlevel != 0 {
407		parent = r.bitFiler
408	}
409	r.bitFiler, err = newBitFiler(parent)
410	if err != nil {
411		return
412	}
413
414	r.tlevel++
415	return
416}
417
418// Implements Filer.
419//
420// Close will return an error if not invoked at nesting level 0.  However, to
421// allow emergency closing from eg. a signal handler; if Close is invoked
422// within an open transaction(s), it rollbacks any non committed open
423// transactions and performs the Close operation.
424//
425// IOW: Regardless of the transaction nesting level the Close is always
426// performed but any uncommitted transaction data are lost.
427func (r *RollbackFiler) Close() (err error) {
428	r.mu.Lock()
429	defer r.mu.Unlock()
430
431	if r.closed {
432		return &ErrPERM{r.f.Name() + ": Already closed"}
433	}
434
435	r.closed = true
436	if err = r.f.Close(); err != nil {
437		return
438	}
439
440	if r.tlevel != 0 {
441		err = &ErrPERM{r.f.Name() + ": Close inside an open transaction"}
442	}
443
444	if r.bitFiler != nil {
445		r.bitFiler.free()
446		r.bitFiler = nil
447	}
448
449	return
450}
451
452// Implements Filer.
453func (r *RollbackFiler) EndUpdate() (err error) {
454	r.mu.Lock()
455	defer r.mu.Unlock()
456
457	if r.tlevel == 0 {
458		return &ErrPERM{r.f.Name() + " : EndUpdate outside of a transaction"}
459	}
460
461	sz, err := r.size() // Cannot call .Size() -> deadlock
462	if err != nil {
463		return
464	}
465
466	r.tlevel--
467	bf := r.bitFiler
468	parent := bf.parent
469	w := r.writerAt
470	if r.tlevel != 0 {
471		w = parent
472	}
473	nwr, err := bf.dumpDirty(w)
474	if err != nil {
475		return
476	}
477
478	switch {
479	case r.tlevel == 0:
480		defer func() {
481			r.bitFiler.free()
482			r.bitFiler = nil
483		}()
484
485		if nwr == 0 {
486			return
487		}
488
489		return r.checkpoint(sz)
490	default:
491		r.bitFiler.free()
492		r.bitFiler = parent.(*bitFiler)
493		sz, _ := bf.Size() // bitFiler.Size() never returns err != nil
494		return parent.Truncate(sz)
495	}
496}
497
498// Implements Filer.
499func (r *RollbackFiler) Name() string {
500	r.mu.RLock()
501	defer r.mu.RUnlock()
502
503	return r.f.Name()
504}
505
506// Implements Filer.
507func (r *RollbackFiler) PunchHole(off, size int64) error {
508	r.mu.Lock()
509	defer r.mu.Unlock()
510
511	if r.tlevel == 0 {
512		return &ErrPERM{r.f.Name() + ": PunchHole outside of a transaction"}
513	}
514
515	if off < 0 {
516		return &ErrINVAL{r.f.Name() + ": PunchHole off", off}
517	}
518
519	if size < 0 || off+size > r.bitFiler.size {
520		return &ErrINVAL{r.f.Name() + ": PunchHole size", size}
521	}
522
523	return r.bitFiler.PunchHole(off, size)
524}
525
526// Implements Filer.
527func (r *RollbackFiler) ReadAt(b []byte, off int64) (n int, err error) {
528	r.inCallbackMu.RLock()
529	defer r.inCallbackMu.RUnlock()
530	if !r.inCallback {
531		r.mu.RLock()
532		defer r.mu.RUnlock()
533	}
534	if r.tlevel == 0 {
535		return r.f.ReadAt(b, off)
536	}
537
538	return r.bitFiler.ReadAt(b, off)
539}
540
541// Implements Filer.
542func (r *RollbackFiler) Rollback() (err error) {
543	r.mu.Lock()
544	defer r.mu.Unlock()
545
546	if r.tlevel == 0 {
547		return &ErrPERM{r.f.Name() + ": Rollback outside of a transaction"}
548	}
549
550	if r.tlevel > 1 {
551		r.bitFiler.free()
552		r.bitFiler = r.bitFiler.parent.(*bitFiler)
553	}
554	r.tlevel--
555	if f := r.afterRollback; f != nil {
556		r.inCallbackMu.Lock()
557		r.inCallback = true
558		r.inCallbackMu.Unlock()
559		defer func() {
560			r.inCallbackMu.Lock()
561			r.inCallback = false
562			r.inCallbackMu.Unlock()
563		}()
564		return f()
565	}
566	return
567}
568
569func (r *RollbackFiler) size() (sz int64, err error) {
570	if r.tlevel == 0 {
571		return r.f.Size()
572	}
573
574	return r.bitFiler.Size()
575}
576
577// Implements Filer.
578func (r *RollbackFiler) Size() (sz int64, err error) {
579	r.mu.Lock()
580	defer r.mu.Unlock()
581
582	return r.size()
583}
584
585// Implements Filer.
586func (r *RollbackFiler) Sync() error {
587	r.mu.Lock()
588	defer r.mu.Unlock()
589
590	return r.f.Sync()
591}
592
593// Implements Filer.
594func (r *RollbackFiler) Truncate(size int64) error {
595	r.mu.Lock()
596	defer r.mu.Unlock()
597
598	if r.tlevel == 0 {
599		return &ErrPERM{r.f.Name() + ": Truncate outside of a transaction"}
600	}
601
602	return r.bitFiler.Truncate(size)
603}
604
605// Implements Filer.
606func (r *RollbackFiler) WriteAt(b []byte, off int64) (n int, err error) {
607	r.mu.Lock()
608	defer r.mu.Unlock()
609
610	if r.tlevel == 0 {
611		return 0, &ErrPERM{r.f.Name() + ": WriteAt outside of a transaction"}
612	}
613
614	return r.bitFiler.WriteAt(b, off)
615}
616