1// Copyright 2017 The etcd Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package backend 16 17import ( 18 "bytes" 19 "sort" 20) 21 22// txBuffer handles functionality shared between txWriteBuffer and txReadBuffer. 23type txBuffer struct { 24 buckets map[string]*bucketBuffer 25} 26 27func (txb *txBuffer) reset() { 28 for k, v := range txb.buckets { 29 if v.used == 0 { 30 // demote 31 delete(txb.buckets, k) 32 } 33 v.used = 0 34 } 35} 36 37// txWriteBuffer buffers writes of pending updates that have not yet committed. 38type txWriteBuffer struct { 39 txBuffer 40 seq bool 41} 42 43func (txw *txWriteBuffer) put(bucket, k, v []byte) { 44 txw.seq = false 45 txw.putSeq(bucket, k, v) 46} 47 48func (txw *txWriteBuffer) putSeq(bucket, k, v []byte) { 49 b, ok := txw.buckets[string(bucket)] 50 if !ok { 51 b = newBucketBuffer() 52 txw.buckets[string(bucket)] = b 53 } 54 b.add(k, v) 55} 56 57func (txw *txWriteBuffer) writeback(txr *txReadBuffer) { 58 for k, wb := range txw.buckets { 59 rb, ok := txr.buckets[k] 60 if !ok { 61 delete(txw.buckets, k) 62 txr.buckets[k] = wb 63 continue 64 } 65 if !txw.seq && wb.used > 1 { 66 // assume no duplicate keys 67 sort.Sort(wb) 68 } 69 rb.merge(wb) 70 } 71 txw.reset() 72} 73 74// txReadBuffer accesses buffered updates. 75type txReadBuffer struct{ txBuffer } 76 77func (txr *txReadBuffer) Range(bucketName, key, endKey []byte, limit int64) ([][]byte, [][]byte) { 78 if b := txr.buckets[string(bucketName)]; b != nil { 79 return b.Range(key, endKey, limit) 80 } 81 return nil, nil 82} 83 84func (txr *txReadBuffer) ForEach(bucketName []byte, visitor func(k, v []byte) error) error { 85 if b := txr.buckets[string(bucketName)]; b != nil { 86 return b.ForEach(visitor) 87 } 88 return nil 89} 90 91// unsafeCopy returns a copy of txReadBuffer, caller should acquire backend.readTx.RLock() 92func (txr *txReadBuffer) unsafeCopy() txReadBuffer { 93 txrCopy := txReadBuffer{ 94 txBuffer: txBuffer{ 95 buckets: make(map[string]*bucketBuffer, len(txr.txBuffer.buckets)), 96 }, 97 } 98 for bucketName, bucket := range txr.txBuffer.buckets { 99 txrCopy.txBuffer.buckets[bucketName] = bucket.Copy() 100 } 101 return txrCopy 102} 103 104type kv struct { 105 key []byte 106 val []byte 107} 108 109// bucketBuffer buffers key-value pairs that are pending commit. 110type bucketBuffer struct { 111 buf []kv 112 // used tracks number of elements in use so buf can be reused without reallocation. 113 used int 114} 115 116func newBucketBuffer() *bucketBuffer { 117 return &bucketBuffer{buf: make([]kv, 512), used: 0} 118} 119 120func (bb *bucketBuffer) Range(key, endKey []byte, limit int64) (keys [][]byte, vals [][]byte) { 121 f := func(i int) bool { return bytes.Compare(bb.buf[i].key, key) >= 0 } 122 idx := sort.Search(bb.used, f) 123 if idx < 0 { 124 return nil, nil 125 } 126 if len(endKey) == 0 { 127 if bytes.Equal(key, bb.buf[idx].key) { 128 keys = append(keys, bb.buf[idx].key) 129 vals = append(vals, bb.buf[idx].val) 130 } 131 return keys, vals 132 } 133 if bytes.Compare(endKey, bb.buf[idx].key) <= 0 { 134 return nil, nil 135 } 136 for i := idx; i < bb.used && int64(len(keys)) < limit; i++ { 137 if bytes.Compare(endKey, bb.buf[i].key) <= 0 { 138 break 139 } 140 keys = append(keys, bb.buf[i].key) 141 vals = append(vals, bb.buf[i].val) 142 } 143 return keys, vals 144} 145 146func (bb *bucketBuffer) ForEach(visitor func(k, v []byte) error) error { 147 for i := 0; i < bb.used; i++ { 148 if err := visitor(bb.buf[i].key, bb.buf[i].val); err != nil { 149 return err 150 } 151 } 152 return nil 153} 154 155func (bb *bucketBuffer) add(k, v []byte) { 156 bb.buf[bb.used].key, bb.buf[bb.used].val = k, v 157 bb.used++ 158 if bb.used == len(bb.buf) { 159 buf := make([]kv, (3*len(bb.buf))/2) 160 copy(buf, bb.buf) 161 bb.buf = buf 162 } 163} 164 165// merge merges data from bbsrc into bb. 166func (bb *bucketBuffer) merge(bbsrc *bucketBuffer) { 167 for i := 0; i < bbsrc.used; i++ { 168 bb.add(bbsrc.buf[i].key, bbsrc.buf[i].val) 169 } 170 if bb.used == bbsrc.used { 171 return 172 } 173 if bytes.Compare(bb.buf[(bb.used-bbsrc.used)-1].key, bbsrc.buf[0].key) < 0 { 174 return 175 } 176 177 sort.Stable(bb) 178 179 // remove duplicates, using only newest update 180 widx := 0 181 for ridx := 1; ridx < bb.used; ridx++ { 182 if !bytes.Equal(bb.buf[ridx].key, bb.buf[widx].key) { 183 widx++ 184 } 185 bb.buf[widx] = bb.buf[ridx] 186 } 187 bb.used = widx + 1 188} 189 190func (bb *bucketBuffer) Len() int { return bb.used } 191func (bb *bucketBuffer) Less(i, j int) bool { 192 return bytes.Compare(bb.buf[i].key, bb.buf[j].key) < 0 193} 194func (bb *bucketBuffer) Swap(i, j int) { bb.buf[i], bb.buf[j] = bb.buf[j], bb.buf[i] } 195 196func (bb *bucketBuffer) Copy() *bucketBuffer { 197 bbCopy := bucketBuffer{ 198 buf: make([]kv, len(bb.buf)), 199 used: bb.used, 200 } 201 copy(bbCopy.buf, bb.buf) 202 return &bbCopy 203} 204