1// Copyright 2015 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package raft
16
17import (
18	"errors"
19	"sync"
20
21	pb "github.com/coreos/etcd/raft/raftpb"
22)
23
24// ErrCompacted is returned by Storage.Entries/Compact when a requested
25// index is unavailable because it predates the last snapshot.
26var ErrCompacted = errors.New("requested index is unavailable due to compaction")
27
28// ErrSnapOutOfDate is returned by Storage.CreateSnapshot when a requested
29// index is older than the existing snapshot.
30var ErrSnapOutOfDate = errors.New("requested index is older than the existing snapshot")
31
32// ErrUnavailable is returned by Storage interface when the requested log entries
33// are unavailable.
34var ErrUnavailable = errors.New("requested entry at index is unavailable")
35
36// ErrSnapshotTemporarilyUnavailable is returned by the Storage interface when the required
37// snapshot is temporarily unavailable.
38var ErrSnapshotTemporarilyUnavailable = errors.New("snapshot is temporarily unavailable")
39
40// Storage is an interface that may be implemented by the application
41// to retrieve log entries from storage.
42//
43// If any Storage method returns an error, the raft instance will
44// become inoperable and refuse to participate in elections; the
45// application is responsible for cleanup and recovery in this case.
46type Storage interface {
47	// InitialState returns the saved HardState and ConfState information.
48	InitialState() (pb.HardState, pb.ConfState, error)
49	// Entries returns a slice of log entries in the range [lo,hi).
50	// MaxSize limits the total size of the log entries returned, but
51	// Entries returns at least one entry if any.
52	Entries(lo, hi, maxSize uint64) ([]pb.Entry, error)
53	// Term returns the term of entry i, which must be in the range
54	// [FirstIndex()-1, LastIndex()]. The term of the entry before
55	// FirstIndex is retained for matching purposes even though the
56	// rest of that entry may not be available.
57	Term(i uint64) (uint64, error)
58	// LastIndex returns the index of the last entry in the log.
59	LastIndex() (uint64, error)
60	// FirstIndex returns the index of the first log entry that is
61	// possibly available via Entries (older entries have been incorporated
62	// into the latest Snapshot; if storage only contains the dummy entry the
63	// first log entry is not available).
64	FirstIndex() (uint64, error)
65	// Snapshot returns the most recent snapshot.
66	// If snapshot is temporarily unavailable, it should return ErrSnapshotTemporarilyUnavailable,
67	// so raft state machine could know that Storage needs some time to prepare
68	// snapshot and call Snapshot later.
69	Snapshot() (pb.Snapshot, error)
70}
71
72// MemoryStorage implements the Storage interface backed by an
73// in-memory array.
74type MemoryStorage struct {
75	// Protects access to all fields. Most methods of MemoryStorage are
76	// run on the raft goroutine, but Append() is run on an application
77	// goroutine.
78	sync.Mutex
79
80	hardState pb.HardState
81	snapshot  pb.Snapshot
82	// ents[i] has raft log position i+snapshot.Metadata.Index
83	ents []pb.Entry
84}
85
86// NewMemoryStorage creates an empty MemoryStorage.
87func NewMemoryStorage() *MemoryStorage {
88	return &MemoryStorage{
89		// When starting from scratch populate the list with a dummy entry at term zero.
90		ents: make([]pb.Entry, 1),
91	}
92}
93
94// InitialState implements the Storage interface.
95func (ms *MemoryStorage) InitialState() (pb.HardState, pb.ConfState, error) {
96	return ms.hardState, ms.snapshot.Metadata.ConfState, nil
97}
98
99// SetHardState saves the current HardState.
100func (ms *MemoryStorage) SetHardState(st pb.HardState) error {
101	ms.hardState = st
102	return nil
103}
104
105// Entries implements the Storage interface.
106func (ms *MemoryStorage) Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) {
107	ms.Lock()
108	defer ms.Unlock()
109	offset := ms.ents[0].Index
110	if lo <= offset {
111		return nil, ErrCompacted
112	}
113	if hi > ms.lastIndex()+1 {
114		raftLogger.Panicf("entries' hi(%d) is out of bound lastindex(%d)", hi, ms.lastIndex())
115	}
116	// only contains dummy entries.
117	if len(ms.ents) == 1 {
118		return nil, ErrUnavailable
119	}
120
121	ents := ms.ents[lo-offset : hi-offset]
122	return limitSize(ents, maxSize), nil
123}
124
125// Term implements the Storage interface.
126func (ms *MemoryStorage) Term(i uint64) (uint64, error) {
127	ms.Lock()
128	defer ms.Unlock()
129	offset := ms.ents[0].Index
130	if i < offset {
131		return 0, ErrCompacted
132	}
133	if int(i-offset) >= len(ms.ents) {
134		return 0, ErrUnavailable
135	}
136	return ms.ents[i-offset].Term, nil
137}
138
139// LastIndex implements the Storage interface.
140func (ms *MemoryStorage) LastIndex() (uint64, error) {
141	ms.Lock()
142	defer ms.Unlock()
143	return ms.lastIndex(), nil
144}
145
146func (ms *MemoryStorage) lastIndex() uint64 {
147	return ms.ents[0].Index + uint64(len(ms.ents)) - 1
148}
149
150// FirstIndex implements the Storage interface.
151func (ms *MemoryStorage) FirstIndex() (uint64, error) {
152	ms.Lock()
153	defer ms.Unlock()
154	return ms.firstIndex(), nil
155}
156
157func (ms *MemoryStorage) firstIndex() uint64 {
158	return ms.ents[0].Index + 1
159}
160
161// Snapshot implements the Storage interface.
162func (ms *MemoryStorage) Snapshot() (pb.Snapshot, error) {
163	ms.Lock()
164	defer ms.Unlock()
165	return ms.snapshot, nil
166}
167
168// ApplySnapshot overwrites the contents of this Storage object with
169// those of the given snapshot.
170func (ms *MemoryStorage) ApplySnapshot(snap pb.Snapshot) error {
171	ms.Lock()
172	defer ms.Unlock()
173
174	//handle check for old snapshot being applied
175	msIndex := ms.snapshot.Metadata.Index
176	snapIndex := snap.Metadata.Index
177	if msIndex >= snapIndex {
178		return ErrSnapOutOfDate
179	}
180
181	ms.snapshot = snap
182	ms.ents = []pb.Entry{{Term: snap.Metadata.Term, Index: snap.Metadata.Index}}
183	return nil
184}
185
186// CreateSnapshot makes a snapshot which can be retrieved with Snapshot() and
187// can be used to reconstruct the state at that point.
188// If any configuration changes have been made since the last compaction,
189// the result of the last ApplyConfChange must be passed in.
190func (ms *MemoryStorage) CreateSnapshot(i uint64, cs *pb.ConfState, data []byte) (pb.Snapshot, error) {
191	ms.Lock()
192	defer ms.Unlock()
193	if i <= ms.snapshot.Metadata.Index {
194		return pb.Snapshot{}, ErrSnapOutOfDate
195	}
196
197	offset := ms.ents[0].Index
198	if i > ms.lastIndex() {
199		raftLogger.Panicf("snapshot %d is out of bound lastindex(%d)", i, ms.lastIndex())
200	}
201
202	ms.snapshot.Metadata.Index = i
203	ms.snapshot.Metadata.Term = ms.ents[i-offset].Term
204	if cs != nil {
205		ms.snapshot.Metadata.ConfState = *cs
206	}
207	ms.snapshot.Data = data
208	return ms.snapshot, nil
209}
210
211// Compact discards all log entries prior to compactIndex.
212// It is the application's responsibility to not attempt to compact an index
213// greater than raftLog.applied.
214func (ms *MemoryStorage) Compact(compactIndex uint64) error {
215	ms.Lock()
216	defer ms.Unlock()
217	offset := ms.ents[0].Index
218	if compactIndex <= offset {
219		return ErrCompacted
220	}
221	if compactIndex > ms.lastIndex() {
222		raftLogger.Panicf("compact %d is out of bound lastindex(%d)", compactIndex, ms.lastIndex())
223	}
224
225	i := compactIndex - offset
226	ents := make([]pb.Entry, 1, 1+uint64(len(ms.ents))-i)
227	ents[0].Index = ms.ents[i].Index
228	ents[0].Term = ms.ents[i].Term
229	ents = append(ents, ms.ents[i+1:]...)
230	ms.ents = ents
231	return nil
232}
233
234// Append the new entries to storage.
235// TODO (xiangli): ensure the entries are continuous and
236// entries[0].Index > ms.entries[0].Index
237func (ms *MemoryStorage) Append(entries []pb.Entry) error {
238	if len(entries) == 0 {
239		return nil
240	}
241
242	ms.Lock()
243	defer ms.Unlock()
244
245	first := ms.firstIndex()
246	last := entries[0].Index + uint64(len(entries)) - 1
247
248	// shortcut if there is no new entry.
249	if last < first {
250		return nil
251	}
252	// truncate compacted entries
253	if first > entries[0].Index {
254		entries = entries[first-entries[0].Index:]
255	}
256
257	offset := entries[0].Index - ms.ents[0].Index
258	switch {
259	case uint64(len(ms.ents)) > offset:
260		ms.ents = append([]pb.Entry{}, ms.ents[:offset]...)
261		ms.ents = append(ms.ents, entries...)
262	case uint64(len(ms.ents)) == offset:
263		ms.ents = append(ms.ents, entries...)
264	default:
265		raftLogger.Panicf("missing log entry [last: %d, append at: %d]",
266			ms.lastIndex(), entries[0].Index)
267	}
268	return nil
269}
270