1// Copyright 2015 The etcd Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package wal 16 17import ( 18 "io" 19 "os" 20 "path/filepath" 21 "time" 22 23 "go.etcd.io/etcd/pkg/fileutil" 24 "go.etcd.io/etcd/wal/walpb" 25 "go.uber.org/zap" 26) 27 28// Repair tries to repair ErrUnexpectedEOF in the 29// last wal file by truncating. 30func Repair(lg *zap.Logger, dirpath string) bool { 31 f, err := openLast(lg, dirpath) 32 if err != nil { 33 return false 34 } 35 defer f.Close() 36 37 if lg != nil { 38 lg.Info("repairing", zap.String("path", f.Name())) 39 } else { 40 plog.Noticef("repairing %v", f.Name()) 41 } 42 43 rec := &walpb.Record{} 44 decoder := newDecoder(f) 45 for { 46 lastOffset := decoder.lastOffset() 47 err := decoder.decode(rec) 48 switch err { 49 case nil: 50 // update crc of the decoder when necessary 51 switch rec.Type { 52 case crcType: 53 crc := decoder.crc.Sum32() 54 // current crc of decoder must match the crc of the record. 55 // do no need to match 0 crc, since the decoder is a new one at this case. 56 if crc != 0 && rec.Validate(crc) != nil { 57 return false 58 } 59 decoder.updateCRC(rec.Crc) 60 } 61 continue 62 63 case io.EOF: 64 if lg != nil { 65 lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.EOF)) 66 } 67 return true 68 69 case io.ErrUnexpectedEOF: 70 bf, bferr := os.Create(f.Name() + ".broken") 71 if bferr != nil { 72 if lg != nil { 73 lg.Warn("failed to create backup file", zap.String("path", f.Name()+".broken"), zap.Error(bferr)) 74 } else { 75 plog.Errorf("could not repair %v, failed to create backup file", f.Name()) 76 } 77 return false 78 } 79 defer bf.Close() 80 81 if _, err = f.Seek(0, io.SeekStart); err != nil { 82 if lg != nil { 83 lg.Warn("failed to read file", zap.String("path", f.Name()), zap.Error(err)) 84 } else { 85 plog.Errorf("could not repair %v, failed to read file", f.Name()) 86 } 87 return false 88 } 89 90 if _, err = io.Copy(bf, f); err != nil { 91 if lg != nil { 92 lg.Warn("failed to copy", zap.String("from", f.Name()+".broken"), zap.String("to", f.Name()), zap.Error(err)) 93 } else { 94 plog.Errorf("could not repair %v, failed to copy file", f.Name()) 95 } 96 return false 97 } 98 99 if err = f.Truncate(lastOffset); err != nil { 100 if lg != nil { 101 lg.Warn("failed to truncate", zap.String("path", f.Name()), zap.Error(err)) 102 } else { 103 plog.Errorf("could not repair %v, failed to truncate file", f.Name()) 104 } 105 return false 106 } 107 108 start := time.Now() 109 if err = fileutil.Fsync(f.File); err != nil { 110 if lg != nil { 111 lg.Warn("failed to fsync", zap.String("path", f.Name()), zap.Error(err)) 112 } else { 113 plog.Errorf("could not repair %v, failed to sync file", f.Name()) 114 } 115 return false 116 } 117 walFsyncSec.Observe(time.Since(start).Seconds()) 118 119 if lg != nil { 120 lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.ErrUnexpectedEOF)) 121 } 122 return true 123 124 default: 125 if lg != nil { 126 lg.Warn("failed to repair", zap.String("path", f.Name()), zap.Error(err)) 127 } else { 128 plog.Errorf("could not repair error (%v)", err) 129 } 130 return false 131 } 132 } 133} 134 135// openLast opens the last wal file for read and write. 136func openLast(lg *zap.Logger, dirpath string) (*fileutil.LockedFile, error) { 137 names, err := readWALNames(lg, dirpath) 138 if err != nil { 139 return nil, err 140 } 141 last := filepath.Join(dirpath, names[len(names)-1]) 142 return fileutil.LockFile(last, os.O_RDWR, fileutil.PrivateFileMode) 143} 144