1// Copyright 2017 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package backend
16
17import (
18	"bytes"
19	"sort"
20)
21
22// txBuffer handles functionality shared between txWriteBuffer and txReadBuffer.
23type txBuffer struct {
24	buckets map[string]*bucketBuffer
25}
26
27func (txb *txBuffer) reset() {
28	for k, v := range txb.buckets {
29		if v.used == 0 {
30			// demote
31			delete(txb.buckets, k)
32		}
33		v.used = 0
34	}
35}
36
37// txWriteBuffer buffers writes of pending updates that have not yet committed.
38type txWriteBuffer struct {
39	txBuffer
40	seq bool
41}
42
43func (txw *txWriteBuffer) put(bucket, k, v []byte) {
44	txw.seq = false
45	txw.putSeq(bucket, k, v)
46}
47
48func (txw *txWriteBuffer) putSeq(bucket, k, v []byte) {
49	b, ok := txw.buckets[string(bucket)]
50	if !ok {
51		b = newBucketBuffer()
52		txw.buckets[string(bucket)] = b
53	}
54	b.add(k, v)
55}
56
57func (txw *txWriteBuffer) writeback(txr *txReadBuffer) {
58	for k, wb := range txw.buckets {
59		rb, ok := txr.buckets[k]
60		if !ok {
61			delete(txw.buckets, k)
62			txr.buckets[k] = wb
63			continue
64		}
65		if !txw.seq && wb.used > 1 {
66			// assume no duplicate keys
67			sort.Sort(wb)
68		}
69		rb.merge(wb)
70	}
71	txw.reset()
72}
73
74// txReadBuffer accesses buffered updates.
75type txReadBuffer struct{ txBuffer }
76
77func (txr *txReadBuffer) Range(bucketName, key, endKey []byte, limit int64) ([][]byte, [][]byte) {
78	if b := txr.buckets[string(bucketName)]; b != nil {
79		return b.Range(key, endKey, limit)
80	}
81	return nil, nil
82}
83
84func (txr *txReadBuffer) ForEach(bucketName []byte, visitor func(k, v []byte) error) error {
85	if b := txr.buckets[string(bucketName)]; b != nil {
86		return b.ForEach(visitor)
87	}
88	return nil
89}
90
91// unsafeCopy returns a copy of txReadBuffer, caller should acquire backend.readTx.RLock()
92func (txr *txReadBuffer) unsafeCopy() txReadBuffer {
93	txrCopy := txReadBuffer{
94		txBuffer: txBuffer{
95			buckets: make(map[string]*bucketBuffer, len(txr.txBuffer.buckets)),
96		},
97	}
98	for bucketName, bucket := range txr.txBuffer.buckets {
99		txrCopy.txBuffer.buckets[bucketName] = bucket.Copy()
100	}
101	return txrCopy
102}
103
104type kv struct {
105	key []byte
106	val []byte
107}
108
109// bucketBuffer buffers key-value pairs that are pending commit.
110type bucketBuffer struct {
111	buf []kv
112	// used tracks number of elements in use so buf can be reused without reallocation.
113	used int
114}
115
116func newBucketBuffer() *bucketBuffer {
117	return &bucketBuffer{buf: make([]kv, 512), used: 0}
118}
119
120func (bb *bucketBuffer) Range(key, endKey []byte, limit int64) (keys [][]byte, vals [][]byte) {
121	f := func(i int) bool { return bytes.Compare(bb.buf[i].key, key) >= 0 }
122	idx := sort.Search(bb.used, f)
123	if idx < 0 {
124		return nil, nil
125	}
126	if len(endKey) == 0 {
127		if bytes.Equal(key, bb.buf[idx].key) {
128			keys = append(keys, bb.buf[idx].key)
129			vals = append(vals, bb.buf[idx].val)
130		}
131		return keys, vals
132	}
133	if bytes.Compare(endKey, bb.buf[idx].key) <= 0 {
134		return nil, nil
135	}
136	for i := idx; i < bb.used && int64(len(keys)) < limit; i++ {
137		if bytes.Compare(endKey, bb.buf[i].key) <= 0 {
138			break
139		}
140		keys = append(keys, bb.buf[i].key)
141		vals = append(vals, bb.buf[i].val)
142	}
143	return keys, vals
144}
145
146func (bb *bucketBuffer) ForEach(visitor func(k, v []byte) error) error {
147	for i := 0; i < bb.used; i++ {
148		if err := visitor(bb.buf[i].key, bb.buf[i].val); err != nil {
149			return err
150		}
151	}
152	return nil
153}
154
155func (bb *bucketBuffer) add(k, v []byte) {
156	bb.buf[bb.used].key, bb.buf[bb.used].val = k, v
157	bb.used++
158	if bb.used == len(bb.buf) {
159		buf := make([]kv, (3*len(bb.buf))/2)
160		copy(buf, bb.buf)
161		bb.buf = buf
162	}
163}
164
165// merge merges data from bbsrc into bb.
166func (bb *bucketBuffer) merge(bbsrc *bucketBuffer) {
167	for i := 0; i < bbsrc.used; i++ {
168		bb.add(bbsrc.buf[i].key, bbsrc.buf[i].val)
169	}
170	if bb.used == bbsrc.used {
171		return
172	}
173	if bytes.Compare(bb.buf[(bb.used-bbsrc.used)-1].key, bbsrc.buf[0].key) < 0 {
174		return
175	}
176
177	sort.Stable(bb)
178
179	// remove duplicates, using only newest update
180	widx := 0
181	for ridx := 1; ridx < bb.used; ridx++ {
182		if !bytes.Equal(bb.buf[ridx].key, bb.buf[widx].key) {
183			widx++
184		}
185		bb.buf[widx] = bb.buf[ridx]
186	}
187	bb.used = widx + 1
188}
189
190func (bb *bucketBuffer) Len() int { return bb.used }
191func (bb *bucketBuffer) Less(i, j int) bool {
192	return bytes.Compare(bb.buf[i].key, bb.buf[j].key) < 0
193}
194func (bb *bucketBuffer) Swap(i, j int) { bb.buf[i], bb.buf[j] = bb.buf[j], bb.buf[i] }
195
196func (bb *bucketBuffer) Copy() *bucketBuffer {
197	bbCopy := bucketBuffer{
198		buf:  make([]kv, len(bb.buf)),
199		used: bb.used,
200	}
201	copy(bbCopy.buf, bb.buf)
202	return &bbCopy
203}
204