1// The panicwrap package provides functions for capturing and handling 2// panics in your application. It does this by re-executing the running 3// application and monitoring stderr for any panics. At the same time, 4// stdout/stderr/etc. are set to the same values so that data is shuttled 5// through properly, making the existence of panicwrap mostly transparent. 6// 7// Panics are only detected when the subprocess exits with a non-zero 8// exit status, since this is the only time panics are real. Otherwise, 9// "panic-like" output is ignored. 10package panicwrap 11 12import ( 13 "bytes" 14 "errors" 15 "io" 16 "os" 17 "os/exec" 18 "os/signal" 19 "runtime" 20 "sync/atomic" 21 "syscall" 22 "time" 23) 24 25const ( 26 DEFAULT_COOKIE_KEY = "cccf35992f8f3cd8d1d28f0109dd953e26664531" 27 DEFAULT_COOKIE_VAL = "7c28215aca87789f95b406b8dd91aa5198406750" 28) 29 30// HandlerFunc is the type called when a panic is detected. 31type HandlerFunc func(string) 32 33// WrapConfig is the configuration for panicwrap when wrapping an existing 34// binary. To get started, in general, you only need the BasicWrap function 35// that will set this up for you. However, for more customizability, 36// WrapConfig and Wrap can be used. 37type WrapConfig struct { 38 // Handler is the function called when a panic occurs. 39 Handler HandlerFunc 40 41 // The cookie key and value are used within environmental variables 42 // to tell the child process that it is already executing so that 43 // wrap doesn't re-wrap itself. 44 CookieKey string 45 CookieValue string 46 47 // If true, the panic will not be mirrored to the configured writer 48 // and will instead ONLY go to the handler. This lets you effectively 49 // hide panics from the end user. This is not recommended because if 50 // your handler fails, the panic is effectively lost. 51 HidePanic bool 52 53 // The amount of time that a process must exit within after detecting 54 // a panic header for panicwrap to assume it is a panic. Defaults to 55 // 300 milliseconds. 56 DetectDuration time.Duration 57 58 // The writer to send the stderr to. If this is nil, then it defaults 59 // to os.Stderr. 60 Writer io.Writer 61 62 // The writer to send stdout to. If this is nil, then it defaults to 63 // os.Stdout. 64 Stdout io.Writer 65 66 // Catch and igore these signals in the parent process, let the child 67 // handle them gracefully. 68 IgnoreSignals []os.Signal 69 70 // Catch these signals in the parent process and manually forward 71 // them to the child process. Some signals such as SIGINT are usually 72 // sent to the entire process group so setting it isn't necessary. Other 73 // signals like SIGTERM are only sent to the parent process and need 74 // to be forwarded. This defaults to empty. 75 ForwardSignals []os.Signal 76} 77 78// BasicWrap calls Wrap with the given handler function, using defaults 79// for everything else. See Wrap and WrapConfig for more information on 80// functionality and return values. 81func BasicWrap(f HandlerFunc) (int, error) { 82 return Wrap(&WrapConfig{ 83 Handler: f, 84 }) 85} 86 87// Wrap wraps the current executable in a handler to catch panics. It 88// returns an error if there was an error during the wrapping process. 89// If the error is nil, then the int result indicates the exit status of the 90// child process. If the exit status is -1, then this is the child process, 91// and execution should continue as normal. Otherwise, this is the parent 92// process and the child successfully ran already, and you should exit the 93// process with the returned exit status. 94// 95// This function should be called very very early in your program's execution. 96// Ideally, this runs as the first line of code of main. 97// 98// Once this is called, the given WrapConfig shouldn't be modified or used 99// any further. 100func Wrap(c *WrapConfig) (int, error) { 101 if c.Handler == nil { 102 return -1, errors.New("Handler must be set") 103 } 104 105 if c.DetectDuration == 0 { 106 c.DetectDuration = 300 * time.Millisecond 107 } 108 109 if c.Writer == nil { 110 c.Writer = os.Stderr 111 } 112 113 // If we're already wrapped, exit out. 114 if Wrapped(c) { 115 return -1, nil 116 } 117 118 // Get the path to our current executable 119 exePath, err := os.Executable() 120 if err != nil { 121 return -1, err 122 } 123 124 // Pipe the stderr so we can read all the data as we look for panics 125 stderr_r, stderr_w := io.Pipe() 126 127 // doneCh is closed when we're done, signaling any other goroutines 128 // to end immediately. 129 doneCh := make(chan struct{}) 130 131 // panicCh is the channel on which the panic text will actually be 132 // sent. 133 panicCh := make(chan string) 134 135 // On close, make sure to finish off the copying of data to stderr 136 defer func() { 137 defer close(doneCh) 138 stderr_w.Close() 139 <-panicCh 140 }() 141 142 // Start the goroutine that will watch stderr for any panics 143 go trackPanic(stderr_r, c.Writer, c.DetectDuration, panicCh) 144 145 // Create the writer for stdout that we're going to use 146 var stdout_w io.Writer = os.Stdout 147 if c.Stdout != nil { 148 stdout_w = c.Stdout 149 } 150 151 // Build a subcommand to re-execute ourselves. We make sure to 152 // set the environmental variable to include our cookie. We also 153 // set stdin/stdout to match the config. Finally, we pipe stderr 154 // through ourselves in order to watch for panics. 155 cmd := exec.Command(exePath, os.Args[1:]...) 156 cmd.Env = append(os.Environ(), c.CookieKey+"="+c.CookieValue) 157 cmd.Stdin = os.Stdin 158 cmd.Stdout = stdout_w 159 cmd.Stderr = stderr_w 160 161 // Windows doesn't support this, but on other platforms pass in 162 // the original file descriptors so they can be used. 163 if runtime.GOOS != "windows" { 164 cmd.ExtraFiles = []*os.File{os.Stdin, os.Stdout, os.Stderr} 165 } 166 167 if err := cmd.Start(); err != nil { 168 return 1, err 169 } 170 171 // Listen to signals and capture them forever. We allow the child 172 // process to handle them in some way. 173 sigCh := make(chan os.Signal) 174 fwdSigCh := make(chan os.Signal) 175 if len(c.IgnoreSignals) == 0 { 176 c.IgnoreSignals = []os.Signal{os.Interrupt} 177 } 178 signal.Notify(sigCh, c.IgnoreSignals...) 179 signal.Notify(fwdSigCh, c.ForwardSignals...) 180 go func() { 181 defer signal.Stop(sigCh) 182 defer signal.Stop(fwdSigCh) 183 for { 184 select { 185 case <-doneCh: 186 return 187 case s := <-fwdSigCh: 188 if cmd.Process != nil { 189 cmd.Process.Signal(s) 190 } 191 case <-sigCh: 192 } 193 } 194 }() 195 196 if err := cmd.Wait(); err != nil { 197 exitErr, ok := err.(*exec.ExitError) 198 if !ok { 199 // This is some other kind of subprocessing error. 200 return 1, err 201 } 202 203 exitStatus := 1 204 if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { 205 exitStatus = status.ExitStatus() 206 } 207 208 // Close the writer end so that the tracker goroutine ends at some point 209 stderr_w.Close() 210 211 // Wait on the panic data 212 panicTxt := <-panicCh 213 if panicTxt != "" { 214 if !c.HidePanic { 215 c.Writer.Write([]byte(panicTxt)) 216 } 217 218 c.Handler(panicTxt) 219 } 220 221 return exitStatus, nil 222 } 223 224 return 0, nil 225} 226 227// Wrapped checks if we're already wrapped according to the configuration 228// given. 229// 230// It must be only called once with a non-nil configuration as it unsets 231// the environment variable it uses to check if we are already wrapped. 232// This prevents false positive if your program tries to execute itself 233// recursively. 234// 235// Wrapped is very cheap and can be used early to short-circuit some pre-wrap 236// logic your application may have. 237// 238// If the given configuration is nil, then this will return a cached 239// value of Wrapped. This is useful because Wrapped is usually called early 240// to verify a process hasn't been wrapped before wrapping. After this, 241// the value of Wrapped hardly changes and is process-global, so other 242// libraries can check with Wrapped(nil). 243func Wrapped(c *WrapConfig) bool { 244 if c == nil { 245 return wrapCache.Load().(bool) 246 } 247 248 if c.CookieKey == "" { 249 c.CookieKey = DEFAULT_COOKIE_KEY 250 } 251 252 if c.CookieValue == "" { 253 c.CookieValue = DEFAULT_COOKIE_VAL 254 } 255 256 // If the cookie key/value match our environment, then we are the 257 // child, so just exit now and tell the caller that we're the child 258 result := os.Getenv(c.CookieKey) == c.CookieValue 259 if result { 260 os.Unsetenv(c.CookieKey) 261 } 262 wrapCache.Store(result) 263 return result 264} 265 266// wrapCache is the cached value for Wrapped when called with nil 267var wrapCache atomic.Value 268 269func init() { 270 wrapCache.Store(false) 271} 272 273// trackPanic monitors the given reader for a panic. If a panic is detected, 274// it is outputted on the result channel. This will close the channel once 275// it is complete. 276func trackPanic(r io.Reader, w io.Writer, dur time.Duration, result chan<- string) { 277 defer close(result) 278 279 var panicTimer <-chan time.Time 280 panicBuf := new(bytes.Buffer) 281 panicHeaders := [][]byte{ 282 []byte("panic:"), 283 []byte("fatal error: fault"), 284 } 285 panicType := -1 286 287 tempBuf := make([]byte, 2048) 288 for { 289 var buf []byte 290 var n int 291 292 if panicTimer == nil && panicBuf.Len() > 0 { 293 // We're not tracking a panic but the buffer length is 294 // greater than 0. We need to clear out that buffer, but 295 // look for another panic along the way. 296 297 // First, remove the previous panic header so we don't loop 298 w.Write(panicBuf.Next(len(panicHeaders[panicType]))) 299 300 // Next, assume that this is our new buffer to inspect 301 n = panicBuf.Len() 302 buf = make([]byte, n) 303 copy(buf, panicBuf.Bytes()) 304 panicBuf.Reset() 305 } else { 306 var err error 307 buf = tempBuf 308 n, err = r.Read(buf) 309 if n <= 0 && err == io.EOF { 310 if panicBuf.Len() > 0 { 311 // We were tracking a panic, assume it was a panic 312 // and return that as the result. 313 result <- panicBuf.String() 314 } 315 316 return 317 } 318 } 319 320 if panicTimer != nil { 321 // We're tracking what we think is a panic right now. 322 // If the timer ended, then it is not a panic. 323 isPanic := true 324 select { 325 case <-panicTimer: 326 isPanic = false 327 default: 328 } 329 330 // No matter what, buffer the text some more. 331 panicBuf.Write(buf[0:n]) 332 333 if !isPanic { 334 // It isn't a panic, stop tracking. Clean-up will happen 335 // on the next iteration. 336 panicTimer = nil 337 } 338 339 continue 340 } 341 342 panicType = -1 343 flushIdx := n 344 for i, header := range panicHeaders { 345 idx := bytes.Index(buf[0:n], header) 346 if idx >= 0 { 347 panicType = i 348 flushIdx = idx 349 break 350 } 351 } 352 353 // Flush to stderr what isn't a panic 354 w.Write(buf[0:flushIdx]) 355 356 if panicType == -1 { 357 // Not a panic so just continue along 358 continue 359 } 360 361 // We have a panic header. Write we assume is a panic os far. 362 panicBuf.Write(buf[flushIdx:n]) 363 panicTimer = time.After(dur) 364 } 365} 366