1// Copyright 2015 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package wal
16
17import (
18	"io"
19	"os"
20	"path/filepath"
21	"time"
22
23	"go.etcd.io/etcd/pkg/fileutil"
24	"go.etcd.io/etcd/wal/walpb"
25	"go.uber.org/zap"
26)
27
28// Repair tries to repair ErrUnexpectedEOF in the
29// last wal file by truncating.
30func Repair(lg *zap.Logger, dirpath string) bool {
31	if lg == nil {
32		lg = zap.NewNop()
33	}
34	f, err := openLast(lg, dirpath)
35	if err != nil {
36		return false
37	}
38	defer f.Close()
39
40	lg.Info("repairing", zap.String("path", f.Name()))
41
42	rec := &walpb.Record{}
43	decoder := newDecoder(f)
44	for {
45		lastOffset := decoder.lastOffset()
46		err := decoder.decode(rec)
47		switch err {
48		case nil:
49			// update crc of the decoder when necessary
50			switch rec.Type {
51			case crcType:
52				crc := decoder.crc.Sum32()
53				// current crc of decoder must match the crc of the record.
54				// do no need to match 0 crc, since the decoder is a new one at this case.
55				if crc != 0 && rec.Validate(crc) != nil {
56					return false
57				}
58				decoder.updateCRC(rec.Crc)
59			}
60			continue
61
62		case io.EOF:
63			lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.EOF))
64			return true
65
66		case io.ErrUnexpectedEOF:
67			bf, bferr := os.Create(f.Name() + ".broken")
68			if bferr != nil {
69				lg.Warn("failed to create backup file", zap.String("path", f.Name()+".broken"), zap.Error(bferr))
70				return false
71			}
72			defer bf.Close()
73
74			if _, err = f.Seek(0, io.SeekStart); err != nil {
75				lg.Warn("failed to read file", zap.String("path", f.Name()), zap.Error(err))
76				return false
77			}
78
79			if _, err = io.Copy(bf, f); err != nil {
80				lg.Warn("failed to copy", zap.String("from", f.Name()+".broken"), zap.String("to", f.Name()), zap.Error(err))
81				return false
82			}
83
84			if err = f.Truncate(lastOffset); err != nil {
85				lg.Warn("failed to truncate", zap.String("path", f.Name()), zap.Error(err))
86				return false
87			}
88
89			start := time.Now()
90			if err = fileutil.Fsync(f.File); err != nil {
91				lg.Warn("failed to fsync", zap.String("path", f.Name()), zap.Error(err))
92				return false
93			}
94			walFsyncSec.Observe(time.Since(start).Seconds())
95
96			lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.ErrUnexpectedEOF))
97			return true
98
99		default:
100			lg.Warn("failed to repair", zap.String("path", f.Name()), zap.Error(err))
101			return false
102		}
103	}
104}
105
106// openLast opens the last wal file for read and write.
107func openLast(lg *zap.Logger, dirpath string) (*fileutil.LockedFile, error) {
108	names, err := readWALNames(lg, dirpath)
109	if err != nil {
110		return nil, err
111	}
112	last := filepath.Join(dirpath, names[len(names)-1])
113	return fileutil.LockFile(last, os.O_RDWR, fileutil.PrivateFileMode)
114}
115