1// Copyright 2015 The etcd Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package wal 16 17import ( 18 "io" 19 "os" 20 "path/filepath" 21 "time" 22 23 "go.etcd.io/etcd/pkg/fileutil" 24 "go.etcd.io/etcd/wal/walpb" 25 "go.uber.org/zap" 26) 27 28// Repair tries to repair ErrUnexpectedEOF in the 29// last wal file by truncating. 30func Repair(lg *zap.Logger, dirpath string) bool { 31 if lg == nil { 32 lg = zap.NewNop() 33 } 34 f, err := openLast(lg, dirpath) 35 if err != nil { 36 return false 37 } 38 defer f.Close() 39 40 lg.Info("repairing", zap.String("path", f.Name())) 41 42 rec := &walpb.Record{} 43 decoder := newDecoder(f) 44 for { 45 lastOffset := decoder.lastOffset() 46 err := decoder.decode(rec) 47 switch err { 48 case nil: 49 // update crc of the decoder when necessary 50 switch rec.Type { 51 case crcType: 52 crc := decoder.crc.Sum32() 53 // current crc of decoder must match the crc of the record. 54 // do no need to match 0 crc, since the decoder is a new one at this case. 55 if crc != 0 && rec.Validate(crc) != nil { 56 return false 57 } 58 decoder.updateCRC(rec.Crc) 59 } 60 continue 61 62 case io.EOF: 63 lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.EOF)) 64 return true 65 66 case io.ErrUnexpectedEOF: 67 bf, bferr := os.Create(f.Name() + ".broken") 68 if bferr != nil { 69 lg.Warn("failed to create backup file", zap.String("path", f.Name()+".broken"), zap.Error(bferr)) 70 return false 71 } 72 defer bf.Close() 73 74 if _, err = f.Seek(0, io.SeekStart); err != nil { 75 lg.Warn("failed to read file", zap.String("path", f.Name()), zap.Error(err)) 76 return false 77 } 78 79 if _, err = io.Copy(bf, f); err != nil { 80 lg.Warn("failed to copy", zap.String("from", f.Name()+".broken"), zap.String("to", f.Name()), zap.Error(err)) 81 return false 82 } 83 84 if err = f.Truncate(lastOffset); err != nil { 85 lg.Warn("failed to truncate", zap.String("path", f.Name()), zap.Error(err)) 86 return false 87 } 88 89 start := time.Now() 90 if err = fileutil.Fsync(f.File); err != nil { 91 lg.Warn("failed to fsync", zap.String("path", f.Name()), zap.Error(err)) 92 return false 93 } 94 walFsyncSec.Observe(time.Since(start).Seconds()) 95 96 lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.ErrUnexpectedEOF)) 97 return true 98 99 default: 100 lg.Warn("failed to repair", zap.String("path", f.Name()), zap.Error(err)) 101 return false 102 } 103 } 104} 105 106// openLast opens the last wal file for read and write. 107func openLast(lg *zap.Logger, dirpath string) (*fileutil.LockedFile, error) { 108 names, err := readWALNames(lg, dirpath) 109 if err != nil { 110 return nil, err 111 } 112 last := filepath.Join(dirpath, names[len(names)-1]) 113 return fileutil.LockFile(last, os.O_RDWR, fileutil.PrivateFileMode) 114} 115