1// Copyright 2015 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package wal
16
17import (
18	"io"
19	"os"
20	"path/filepath"
21	"time"
22
23	"go.etcd.io/etcd/pkg/fileutil"
24	"go.etcd.io/etcd/wal/walpb"
25	"go.uber.org/zap"
26)
27
28// Repair tries to repair ErrUnexpectedEOF in the
29// last wal file by truncating.
30func Repair(lg *zap.Logger, dirpath string) bool {
31	f, err := openLast(lg, dirpath)
32	if err != nil {
33		return false
34	}
35	defer f.Close()
36
37	if lg != nil {
38		lg.Info("repairing", zap.String("path", f.Name()))
39	} else {
40		plog.Noticef("repairing %v", f.Name())
41	}
42
43	rec := &walpb.Record{}
44	decoder := newDecoder(f)
45	for {
46		lastOffset := decoder.lastOffset()
47		err := decoder.decode(rec)
48		switch err {
49		case nil:
50			// update crc of the decoder when necessary
51			switch rec.Type {
52			case crcType:
53				crc := decoder.crc.Sum32()
54				// current crc of decoder must match the crc of the record.
55				// do no need to match 0 crc, since the decoder is a new one at this case.
56				if crc != 0 && rec.Validate(crc) != nil {
57					return false
58				}
59				decoder.updateCRC(rec.Crc)
60			}
61			continue
62
63		case io.EOF:
64			if lg != nil {
65				lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.EOF))
66			}
67			return true
68
69		case io.ErrUnexpectedEOF:
70			bf, bferr := os.Create(f.Name() + ".broken")
71			if bferr != nil {
72				if lg != nil {
73					lg.Warn("failed to create backup file", zap.String("path", f.Name()+".broken"), zap.Error(bferr))
74				} else {
75					plog.Errorf("could not repair %v, failed to create backup file", f.Name())
76				}
77				return false
78			}
79			defer bf.Close()
80
81			if _, err = f.Seek(0, io.SeekStart); err != nil {
82				if lg != nil {
83					lg.Warn("failed to read file", zap.String("path", f.Name()), zap.Error(err))
84				} else {
85					plog.Errorf("could not repair %v, failed to read file", f.Name())
86				}
87				return false
88			}
89
90			if _, err = io.Copy(bf, f); err != nil {
91				if lg != nil {
92					lg.Warn("failed to copy", zap.String("from", f.Name()+".broken"), zap.String("to", f.Name()), zap.Error(err))
93				} else {
94					plog.Errorf("could not repair %v, failed to copy file", f.Name())
95				}
96				return false
97			}
98
99			if err = f.Truncate(lastOffset); err != nil {
100				if lg != nil {
101					lg.Warn("failed to truncate", zap.String("path", f.Name()), zap.Error(err))
102				} else {
103					plog.Errorf("could not repair %v, failed to truncate file", f.Name())
104				}
105				return false
106			}
107
108			start := time.Now()
109			if err = fileutil.Fsync(f.File); err != nil {
110				if lg != nil {
111					lg.Warn("failed to fsync", zap.String("path", f.Name()), zap.Error(err))
112				} else {
113					plog.Errorf("could not repair %v, failed to sync file", f.Name())
114				}
115				return false
116			}
117			walFsyncSec.Observe(time.Since(start).Seconds())
118
119			if lg != nil {
120				lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.ErrUnexpectedEOF))
121			}
122			return true
123
124		default:
125			if lg != nil {
126				lg.Warn("failed to repair", zap.String("path", f.Name()), zap.Error(err))
127			} else {
128				plog.Errorf("could not repair error (%v)", err)
129			}
130			return false
131		}
132	}
133}
134
135// openLast opens the last wal file for read and write.
136func openLast(lg *zap.Logger, dirpath string) (*fileutil.LockedFile, error) {
137	names, err := readWALNames(lg, dirpath)
138	if err != nil {
139		return nil, err
140	}
141	last := filepath.Join(dirpath, names[len(names)-1])
142	return fileutil.LockFile(last, os.O_RDWR, fileutil.PrivateFileMode)
143}
144