1// The panicwrap package provides functions for capturing and handling
2// panics in your application. It does this by re-executing the running
3// application and monitoring stderr for any panics. At the same time,
4// stdout/stderr/etc. are set to the same values so that data is shuttled
5// through properly, making the existence of panicwrap mostly transparent.
6//
7// Panics are only detected when the subprocess exits with a non-zero
8// exit status, since this is the only time panics are real. Otherwise,
9// "panic-like" output is ignored.
10package panicwrap
11
12import (
13	"bytes"
14	"errors"
15	"io"
16	"os"
17	"os/exec"
18	"os/signal"
19	"runtime"
20	"sync/atomic"
21	"syscall"
22	"time"
23)
24
25const (
26	DEFAULT_COOKIE_KEY = "cccf35992f8f3cd8d1d28f0109dd953e26664531"
27	DEFAULT_COOKIE_VAL = "7c28215aca87789f95b406b8dd91aa5198406750"
28)
29
30// HandlerFunc is the type called when a panic is detected.
31type HandlerFunc func(string)
32
33// WrapConfig is the configuration for panicwrap when wrapping an existing
34// binary. To get started, in general, you only need the BasicWrap function
35// that will set this up for you. However, for more customizability,
36// WrapConfig and Wrap can be used.
37type WrapConfig struct {
38	// Handler is the function called when a panic occurs.
39	Handler HandlerFunc
40
41	// The cookie key and value are used within environmental variables
42	// to tell the child process that it is already executing so that
43	// wrap doesn't re-wrap itself.
44	CookieKey   string
45	CookieValue string
46
47	// If true, the panic will not be mirrored to the configured writer
48	// and will instead ONLY go to the handler. This lets you effectively
49	// hide panics from the end user. This is not recommended because if
50	// your handler fails, the panic is effectively lost.
51	HidePanic bool
52
53	// The amount of time that a process must exit within after detecting
54	// a panic header for panicwrap to assume it is a panic. Defaults to
55	// 300 milliseconds.
56	DetectDuration time.Duration
57
58	// The writer to send the stderr to. If this is nil, then it defaults
59	// to os.Stderr.
60	Writer io.Writer
61
62	// The writer to send stdout to. If this is nil, then it defaults to
63	// os.Stdout.
64	Stdout io.Writer
65
66	// Catch and igore these signals in the parent process, let the child
67	// handle them gracefully.
68	IgnoreSignals []os.Signal
69
70	// Catch these signals in the parent process and manually forward
71	// them to the child process. Some signals such as SIGINT are usually
72	// sent to the entire process group so setting it isn't necessary. Other
73	// signals like SIGTERM are only sent to the parent process and need
74	// to be forwarded. This defaults to empty.
75	ForwardSignals []os.Signal
76}
77
78// BasicWrap calls Wrap with the given handler function, using defaults
79// for everything else. See Wrap and WrapConfig for more information on
80// functionality and return values.
81func BasicWrap(f HandlerFunc) (int, error) {
82	return Wrap(&WrapConfig{
83		Handler: f,
84	})
85}
86
87// Wrap wraps the current executable in a handler to catch panics. It
88// returns an error if there was an error during the wrapping process.
89// If the error is nil, then the int result indicates the exit status of the
90// child process. If the exit status is -1, then this is the child process,
91// and execution should continue as normal. Otherwise, this is the parent
92// process and the child successfully ran already, and you should exit the
93// process with the returned exit status.
94//
95// This function should be called very very early in your program's execution.
96// Ideally, this runs as the first line of code of main.
97//
98// Once this is called, the given WrapConfig shouldn't be modified or used
99// any further.
100func Wrap(c *WrapConfig) (int, error) {
101	if c.Handler == nil {
102		return -1, errors.New("Handler must be set")
103	}
104
105	if c.DetectDuration == 0 {
106		c.DetectDuration = 300 * time.Millisecond
107	}
108
109	if c.Writer == nil {
110		c.Writer = os.Stderr
111	}
112
113	// If we're already wrapped, exit out.
114	if Wrapped(c) {
115		return -1, nil
116	}
117
118	// Get the path to our current executable
119	exePath, err := os.Executable()
120	if err != nil {
121		return -1, err
122	}
123
124	// Pipe the stderr so we can read all the data as we look for panics
125	stderr_r, stderr_w := io.Pipe()
126
127	// doneCh is closed when we're done, signaling any other goroutines
128	// to end immediately.
129	doneCh := make(chan struct{})
130
131	// panicCh is the channel on which the panic text will actually be
132	// sent.
133	panicCh := make(chan string)
134
135	// On close, make sure to finish off the copying of data to stderr
136	defer func() {
137		defer close(doneCh)
138		stderr_w.Close()
139		<-panicCh
140	}()
141
142	// Start the goroutine that will watch stderr for any panics
143	go trackPanic(stderr_r, c.Writer, c.DetectDuration, panicCh)
144
145	// Create the writer for stdout that we're going to use
146	var stdout_w io.Writer = os.Stdout
147	if c.Stdout != nil {
148		stdout_w = c.Stdout
149	}
150
151	// Build a subcommand to re-execute ourselves. We make sure to
152	// set the environmental variable to include our cookie. We also
153	// set stdin/stdout to match the config. Finally, we pipe stderr
154	// through ourselves in order to watch for panics.
155	cmd := exec.Command(exePath, os.Args[1:]...)
156	cmd.Env = append(os.Environ(), c.CookieKey+"="+c.CookieValue)
157	cmd.Stdin = os.Stdin
158	cmd.Stdout = stdout_w
159	cmd.Stderr = stderr_w
160
161	// Windows doesn't support this, but on other platforms pass in
162	// the original file descriptors so they can be used.
163	if runtime.GOOS != "windows" {
164		cmd.ExtraFiles = []*os.File{os.Stdin, os.Stdout, os.Stderr}
165	}
166
167	if err := cmd.Start(); err != nil {
168		return 1, err
169	}
170
171	// Listen to signals and capture them forever. We allow the child
172	// process to handle them in some way.
173	sigCh := make(chan os.Signal)
174	fwdSigCh := make(chan os.Signal)
175	if len(c.IgnoreSignals) == 0 {
176		c.IgnoreSignals = []os.Signal{os.Interrupt}
177	}
178	signal.Notify(sigCh, c.IgnoreSignals...)
179	signal.Notify(fwdSigCh, c.ForwardSignals...)
180	go func() {
181		defer signal.Stop(sigCh)
182		defer signal.Stop(fwdSigCh)
183		for {
184			select {
185			case <-doneCh:
186				return
187			case s := <-fwdSigCh:
188				if cmd.Process != nil {
189					cmd.Process.Signal(s)
190				}
191			case <-sigCh:
192			}
193		}
194	}()
195
196	if err := cmd.Wait(); err != nil {
197		exitErr, ok := err.(*exec.ExitError)
198		if !ok {
199			// This is some other kind of subprocessing error.
200			return 1, err
201		}
202
203		exitStatus := 1
204		if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
205			exitStatus = status.ExitStatus()
206		}
207
208		// Close the writer end so that the tracker goroutine ends at some point
209		stderr_w.Close()
210
211		// Wait on the panic data
212		panicTxt := <-panicCh
213		if panicTxt != "" {
214			if !c.HidePanic {
215				c.Writer.Write([]byte(panicTxt))
216			}
217
218			c.Handler(panicTxt)
219		}
220
221		return exitStatus, nil
222	}
223
224	return 0, nil
225}
226
227// Wrapped checks if we're already wrapped according to the configuration
228// given.
229//
230// It must be only called once with a non-nil configuration as it unsets
231// the environment variable it uses to check if we are already wrapped.
232// This prevents false positive if your program tries to execute itself
233// recursively.
234//
235// Wrapped is very cheap and can be used early to short-circuit some pre-wrap
236// logic your application may have.
237//
238// If the given configuration is nil, then this will return a cached
239// value of Wrapped. This is useful because Wrapped is usually called early
240// to verify a process hasn't been wrapped before wrapping. After this,
241// the value of Wrapped hardly changes and is process-global, so other
242// libraries can check with Wrapped(nil).
243func Wrapped(c *WrapConfig) bool {
244	if c == nil {
245		return wrapCache.Load().(bool)
246	}
247
248	if c.CookieKey == "" {
249		c.CookieKey = DEFAULT_COOKIE_KEY
250	}
251
252	if c.CookieValue == "" {
253		c.CookieValue = DEFAULT_COOKIE_VAL
254	}
255
256	// If the cookie key/value match our environment, then we are the
257	// child, so just exit now and tell the caller that we're the child
258	result := os.Getenv(c.CookieKey) == c.CookieValue
259	if result {
260		os.Unsetenv(c.CookieKey)
261	}
262	wrapCache.Store(result)
263	return result
264}
265
266// wrapCache is the cached value for Wrapped when called with nil
267var wrapCache atomic.Value
268
269func init() {
270	wrapCache.Store(false)
271}
272
273// trackPanic monitors the given reader for a panic. If a panic is detected,
274// it is outputted on the result channel. This will close the channel once
275// it is complete.
276func trackPanic(r io.Reader, w io.Writer, dur time.Duration, result chan<- string) {
277	defer close(result)
278
279	var panicTimer <-chan time.Time
280	panicBuf := new(bytes.Buffer)
281	panicHeaders := [][]byte{
282		[]byte("panic:"),
283		[]byte("fatal error: fault"),
284	}
285	panicType := -1
286
287	tempBuf := make([]byte, 2048)
288	for {
289		var buf []byte
290		var n int
291
292		if panicTimer == nil && panicBuf.Len() > 0 {
293			// We're not tracking a panic but the buffer length is
294			// greater than 0. We need to clear out that buffer, but
295			// look for another panic along the way.
296
297			// First, remove the previous panic header so we don't loop
298			w.Write(panicBuf.Next(len(panicHeaders[panicType])))
299
300			// Next, assume that this is our new buffer to inspect
301			n = panicBuf.Len()
302			buf = make([]byte, n)
303			copy(buf, panicBuf.Bytes())
304			panicBuf.Reset()
305		} else {
306			var err error
307			buf = tempBuf
308			n, err = r.Read(buf)
309			if n <= 0 && err == io.EOF {
310				if panicBuf.Len() > 0 {
311					// We were tracking a panic, assume it was a panic
312					// and return that as the result.
313					result <- panicBuf.String()
314				}
315
316				return
317			}
318		}
319
320		if panicTimer != nil {
321			// We're tracking what we think is a panic right now.
322			// If the timer ended, then it is not a panic.
323			isPanic := true
324			select {
325			case <-panicTimer:
326				isPanic = false
327			default:
328			}
329
330			// No matter what, buffer the text some more.
331			panicBuf.Write(buf[0:n])
332
333			if !isPanic {
334				// It isn't a panic, stop tracking. Clean-up will happen
335				// on the next iteration.
336				panicTimer = nil
337			}
338
339			continue
340		}
341
342		panicType = -1
343		flushIdx := n
344		for i, header := range panicHeaders {
345			idx := bytes.Index(buf[0:n], header)
346			if idx >= 0 {
347				panicType = i
348				flushIdx = idx
349				break
350			}
351		}
352
353		// Flush to stderr what isn't a panic
354		w.Write(buf[0:flushIdx])
355
356		if panicType == -1 {
357			// Not a panic so just continue along
358			continue
359		}
360
361		// We have a panic header. Write we assume is a panic os far.
362		panicBuf.Write(buf[flushIdx:n])
363		panicTimer = time.After(dur)
364	}
365}
366