1// Licensed to Elasticsearch B.V. under one or more contributor
2// license agreements. See the NOTICE file distributed with
3// this work for additional information regarding copyright
4// ownership. Elasticsearch B.V. licenses this file to you under
5// the Apache License, Version 2.0 (the "License"); you may
6// not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9//     http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18package apm // import "go.elastic.co/apm"
19
20import (
21	"crypto/rand"
22	"fmt"
23	"net"
24	"os"
25	"reflect"
26	"runtime"
27	"syscall"
28	"time"
29
30	"github.com/pkg/errors"
31
32	"go.elastic.co/apm/internal/pkgerrorsutil"
33	"go.elastic.co/apm/model"
34	"go.elastic.co/apm/stacktrace"
35)
36
37const (
38	// maxErrorGraphSize is the maximum number of errors
39	// to report in an error tree. Once this number of
40	// nodes is reached, we will stop recursing through
41	// error causes.
42	maxErrorTreeNodes = 50
43)
44
45// Recovered creates an Error with t.NewError(err), where
46// err is either v (if v implements error), or otherwise
47// fmt.Errorf("%v", v). The value v is expected to have
48// come from a panic.
49func (t *Tracer) Recovered(v interface{}) *Error {
50	var e *Error
51	switch v := v.(type) {
52	case error:
53		e = t.NewError(v)
54	default:
55		e = t.NewError(fmt.Errorf("%v", v))
56	}
57	return e
58}
59
60// NewError returns a new Error with details taken from err.
61// NewError will panic if called with a nil error.
62//
63// The exception message will be set to err.Error().
64// The exception module and type will be set to the package
65// and type name of the cause of the error, respectively,
66// where the cause has the same definition as given by
67// github.com/pkg/errors.
68//
69// If err implements
70//   type interface {
71//       StackTrace() github.com/pkg/errors.StackTrace
72//   }
73// or
74//   type interface {
75//       StackTrace() []stacktrace.Frame
76//   }
77// then one of those will be used to set the error
78// stacktrace. Otherwise, NewError will take a stacktrace.
79//
80// If err implements
81//   type interface {Type() string}
82// then that will be used to set the error type.
83//
84// If err implements
85//   type interface {Code() string}
86// or
87//   type interface {Code() float64}
88// then one of those will be used to set the error code.
89func (t *Tracer) NewError(err error) *Error {
90	if err == nil {
91		panic("NewError must be called with a non-nil error")
92	}
93	e := t.newError()
94	e.cause = err
95	e.err = err.Error()
96	if e.recording {
97		rand.Read(e.ID[:]) // ignore error, can't do anything about it
98		initException(&e.exception, err, e.stackTraceLimit)
99		if len(e.exception.stacktrace) == 0 {
100			e.SetStacktrace(2)
101		}
102	}
103	return e
104}
105
106// NewErrorLog returns a new Error for the given ErrorLogRecord.
107//
108// The resulting Error's stacktrace will not be set. Call the
109// SetStacktrace method to set it, if desired.
110//
111// If r.Message is empty, "[EMPTY]" will be used.
112func (t *Tracer) NewErrorLog(r ErrorLogRecord) *Error {
113	e := t.newError()
114	e.cause = r.Error
115	e.err = e.log.Message
116	if e.recording {
117		e.log = ErrorLogRecord{
118			Message:       truncateString(r.Message),
119			MessageFormat: truncateString(r.MessageFormat),
120			Level:         truncateString(r.Level),
121			LoggerName:    truncateString(r.LoggerName),
122		}
123		if e.log.Message == "" {
124			e.log.Message = "[EMPTY]"
125		}
126		rand.Read(e.ID[:]) // ignore error, can't do anything about it
127		if r.Error != nil {
128			initException(&e.exception, r.Error, e.stackTraceLimit)
129		}
130	}
131	return e
132}
133
134// newError returns a new Error associated with the Tracer.
135func (t *Tracer) newError() *Error {
136	e, _ := t.errorDataPool.Get().(*ErrorData)
137	if e == nil {
138		e = &ErrorData{
139			tracer: t,
140			Context: Context{
141				captureBodyMask: CaptureBodyErrors,
142			},
143		}
144	}
145
146	instrumentationConfig := t.instrumentationConfig()
147	e.recording = instrumentationConfig.recording
148	if e.recording {
149		e.Timestamp = time.Now()
150		e.Context.captureHeaders = instrumentationConfig.captureHeaders
151		e.Context.sanitizedFieldNames = instrumentationConfig.sanitizedFieldNames
152		e.stackTraceLimit = instrumentationConfig.stackTraceLimit
153	}
154
155	return &Error{ErrorData: e}
156}
157
158// Error describes an error occurring in the monitored service.
159type Error struct {
160	// ErrorData holds the error data. This field is set to nil when
161	// the error's Send method is called.
162	*ErrorData
163
164	// cause holds the original error.
165	//
166	// It is accessible via the Cause method:
167	// https://godoc.org/github.com/pkg/errors#Cause
168	cause error
169
170	// string holds original error string
171	err string
172}
173
174// ErrorData holds the details for an error, and is embedded inside Error.
175// When the error is sent, its ErrorData field will be set to nil.
176type ErrorData struct {
177	tracer             *Tracer
178	recording          bool
179	stackTraceLimit    int
180	exception          exceptionData
181	log                ErrorLogRecord
182	logStacktrace      []stacktrace.Frame
183	transactionSampled bool
184	transactionType    string
185
186	// ID is the unique identifier of the error. This is set by
187	// the various error constructors, and is exposed only so
188	// the error ID can be logged or displayed to the user.
189	ID ErrorID
190
191	// TraceID is the unique identifier of the trace in which
192	// this error occurred. If the error is not associated with
193	// a trace, this will be the zero value.
194	TraceID TraceID
195
196	// TransactionID is the unique identifier of the transaction
197	// in which this error occurred. If the error is not associated
198	// with a transaction, this will be the zero value.
199	TransactionID SpanID
200
201	// ParentID is the unique identifier of the transaction or span
202	// in which this error occurred. If the error is not associated
203	// with a transaction or span, this will be the zero value.
204	ParentID SpanID
205
206	// Culprit is the name of the function that caused the error.
207	//
208	// This is initially unset; if it remains unset by the time
209	// Send is invoked, and the error has a stacktrace, the first
210	// non-library frame in the stacktrace will be considered the
211	// culprit.
212	Culprit string
213
214	// Timestamp records the time at which the error occurred.
215	// This is set when the Error object is created, but may
216	// be overridden any time before the Send method is called.
217	Timestamp time.Time
218
219	// Handled records whether or not the error was handled. This
220	// is ignored by "log" errors with no associated error value.
221	Handled bool
222
223	// Context holds the context for this error.
224	Context Context
225}
226
227// Cause returns original error assigned to Error, nil if Error or Error.cause is nil.
228// https://godoc.org/github.com/pkg/errors#Cause
229func (e *Error) Cause() error {
230	if e != nil {
231		return e.cause
232	}
233	return nil
234}
235
236// Error returns string message for error.
237// if Error or Error.cause is nil, "[EMPTY]" will be used.
238func (e *Error) Error() string {
239	if e != nil {
240		return e.err
241	}
242	return "[EMPTY]"
243}
244
245// SetTransaction sets TraceID, TransactionID, and ParentID to the transaction's
246// IDs, and records the transaction's Type and whether or not it was sampled.
247//
248// If any custom context has been recorded in tx, it will also be carried across
249// to e, but will not override any custom context already recorded on e.
250func (e *Error) SetTransaction(tx *Transaction) {
251	tx.mu.RLock()
252	traceContext := tx.traceContext
253	var txType string
254	var custom model.IfaceMap
255	if !tx.ended() {
256		txType = tx.Type
257		custom = tx.Context.model.Custom
258	}
259	tx.mu.RUnlock()
260	e.setSpanData(traceContext, traceContext.Span, txType, custom)
261}
262
263// SetSpan sets TraceID, TransactionID, and ParentID to the span's IDs.
264//
265// There is no need to call both SetTransaction and SetSpan. If you do call
266// both, then SetSpan must be called second in order to set the error's
267// ParentID correctly.
268//
269// If any custom context has been recorded in s's transaction, it will
270// also be carried across to e, but will not override any custom context
271// already recorded on e.
272func (e *Error) SetSpan(s *Span) {
273	var txType string
274	var custom model.IfaceMap
275	if s.tx != nil {
276		s.tx.mu.RLock()
277		if !s.tx.ended() {
278			txType = s.tx.Type
279			custom = s.tx.Context.model.Custom
280		}
281		s.tx.mu.RUnlock()
282	}
283	e.setSpanData(s.traceContext, s.transactionID, txType, custom)
284}
285
286func (e *Error) setSpanData(
287	traceContext TraceContext,
288	transactionID SpanID,
289	transactionType string,
290	customContext model.IfaceMap,
291) {
292	e.TraceID = traceContext.Trace
293	e.ParentID = traceContext.Span
294	e.TransactionID = transactionID
295	e.transactionSampled = traceContext.Options.Recorded()
296	if e.transactionSampled {
297		e.transactionType = transactionType
298	}
299	if n := len(customContext); n != 0 {
300		m := len(e.Context.model.Custom)
301		e.Context.model.Custom = append(e.Context.model.Custom, customContext...)
302		// If there was already custom context in e, shift the custom context from
303		// tx to the beginning of the slice so that e's context takes precedence.
304		if m != 0 {
305			copy(e.Context.model.Custom[n:], e.Context.model.Custom[:m])
306			copy(e.Context.model.Custom[:n], customContext)
307		}
308	}
309}
310
311// Send enqueues the error for sending to the Elastic APM server.
312//
313// Send will set e.ErrorData to nil, so the error must not be
314// modified after Send returns.
315func (e *Error) Send() {
316	if e == nil || e.sent() {
317		return
318	}
319	if e.recording {
320		e.ErrorData.enqueue()
321	} else {
322		e.reset()
323	}
324	e.ErrorData = nil
325}
326
327func (e *Error) sent() bool {
328	return e.ErrorData == nil
329}
330
331func (e *ErrorData) enqueue() {
332	select {
333	case e.tracer.events <- tracerEvent{eventType: errorEvent, err: e}:
334	default:
335		// Enqueuing an error should never block.
336		e.tracer.statsMu.Lock()
337		e.tracer.stats.ErrorsDropped++
338		e.tracer.statsMu.Unlock()
339		e.reset()
340	}
341}
342
343func (e *ErrorData) reset() {
344	*e = ErrorData{
345		tracer:        e.tracer,
346		logStacktrace: e.logStacktrace[:0],
347		Context:       e.Context,
348		exception:     e.exception,
349	}
350	e.Context.reset()
351	e.exception.reset()
352	e.tracer.errorDataPool.Put(e)
353}
354
355type exceptionData struct {
356	message    string
357	stacktrace []stacktrace.Frame
358	cause      []exceptionData
359	ErrorDetails
360}
361
362func (e *exceptionData) reset() {
363	*e = exceptionData{
364		cause:      e.cause[:0],
365		stacktrace: e.stacktrace[:0],
366		ErrorDetails: ErrorDetails{
367			attrs: e.ErrorDetails.attrs,
368			Cause: e.ErrorDetails.Cause[:0],
369		},
370	}
371	for k := range e.attrs {
372		delete(e.attrs, k)
373	}
374}
375
376func initException(e *exceptionData, err error, stackTraceLimit int) {
377	b := exceptionDataBuilder{stackTraceLimit: stackTraceLimit}
378	b.init(e, err)
379}
380
381type exceptionDataBuilder struct {
382	stackTraceLimit int
383	errorCount      int
384	pointerErrors   map[uintptr]struct{}
385}
386
387func (b *exceptionDataBuilder) init(e *exceptionData, err error) bool {
388	b.errorCount++
389	reflectValue := reflect.ValueOf(err)
390	reflectType := reflectValue.Type()
391	switch reflectType.Kind() {
392	case reflect.Chan, reflect.Func, reflect.Map, reflect.Ptr, reflect.Slice, reflect.UnsafePointer:
393		// Prevent infinite recursion due to cyclic error causes.
394		ptrVal := reflectValue.Pointer()
395		if b.pointerErrors == nil {
396			b.pointerErrors = map[uintptr]struct{}{ptrVal: struct{}{}}
397		} else {
398			if _, ok := b.pointerErrors[ptrVal]; ok {
399				return false
400			}
401			b.pointerErrors[ptrVal] = struct{}{}
402		}
403	}
404
405	e.message = truncateString(err.Error())
406	if e.message == "" {
407		e.message = "[EMPTY]"
408	}
409
410	namedType := reflectType
411	if reflectType.Name() == "" && reflectType.Kind() == reflect.Ptr {
412		namedType = reflectType.Elem()
413	}
414	e.Type.Name = namedType.Name()
415	e.Type.PackagePath = namedType.PkgPath()
416
417	// If the error implements Type, use that to
418	// override the type name determined through
419	// reflection.
420	if err, ok := err.(interface {
421		Type() string
422	}); ok {
423		e.Type.Name = err.Type()
424	}
425
426	// If the error implements a Code method, use
427	// that to set the exception code.
428	switch err := err.(type) {
429	case interface {
430		Code() string
431	}:
432		e.Code.String = err.Code()
433	case interface {
434		Code() float64
435	}:
436		e.Code.Number = err.Code()
437	}
438
439	// If the error implements an Unwrap or Cause method, use that to set the cause error.
440	// Unwrap is defined by errors wrapped using fmt.Errorf, while Cause is defined by
441	// errors wrapped using pkg/errors.Wrap.
442	switch err := err.(type) {
443	case interface{ Unwrap() error }:
444		if cause := err.Unwrap(); cause != nil {
445			e.ErrorDetails.Cause = append(e.ErrorDetails.Cause, cause)
446		}
447	case interface{ Cause() error }:
448		if cause := err.Cause(); cause != nil {
449			e.ErrorDetails.Cause = append(e.ErrorDetails.Cause, cause)
450		}
451	}
452
453	// Run registered ErrorDetailers over the error.
454	for _, ed := range typeErrorDetailers[reflectType] {
455		ed.ErrorDetails(err, &e.ErrorDetails)
456	}
457	for _, ed := range errorDetailers {
458		ed.ErrorDetails(err, &e.ErrorDetails)
459	}
460
461	e.Code.String = truncateString(e.Code.String)
462	e.Type.Name = truncateString(e.Type.Name)
463	e.Type.PackagePath = truncateString(e.Type.PackagePath)
464	b.initErrorStacktrace(&e.stacktrace, err)
465
466	for _, err := range e.ErrorDetails.Cause {
467		if b.errorCount >= maxErrorTreeNodes {
468			break
469		}
470		var data exceptionData
471		if b.init(&data, err) {
472			e.cause = append(e.cause, data)
473		}
474	}
475	return true
476}
477
478func (b *exceptionDataBuilder) initErrorStacktrace(out *[]stacktrace.Frame, err error) {
479	type internalStackTracer interface {
480		StackTrace() []stacktrace.Frame
481	}
482	type errorsStackTracer interface {
483		StackTrace() errors.StackTrace
484	}
485	type runtimeStackTracer interface {
486		StackTrace() *runtime.Frames
487	}
488	switch stackTracer := err.(type) {
489	case internalStackTracer:
490		stackTrace := stackTracer.StackTrace()
491		if b.stackTraceLimit >= 0 && len(stackTrace) > b.stackTraceLimit {
492			stackTrace = stackTrace[:b.stackTraceLimit]
493		}
494		*out = append(*out, stackTrace...)
495	case errorsStackTracer:
496		stackTrace := stackTracer.StackTrace()
497		pkgerrorsutil.AppendStacktrace(stackTrace, out, b.stackTraceLimit)
498	case runtimeStackTracer:
499		frames := stackTracer.StackTrace()
500		count := 0
501		for {
502			if b.stackTraceLimit >= 0 && count == b.stackTraceLimit {
503				break
504			}
505			frame, more := frames.Next()
506			*out = append(*out, stacktrace.RuntimeFrame(frame))
507			if !more {
508				break
509			}
510			count++
511		}
512	}
513}
514
515// SetStacktrace sets the stacktrace for the error,
516// skipping the first skip number of frames, excluding
517// the SetStacktrace function.
518func (e *Error) SetStacktrace(skip int) {
519	out := &e.exception.stacktrace
520	if e.log.Message != "" {
521		out = &e.logStacktrace
522	}
523	*out = stacktrace.AppendStacktrace((*out)[:0], skip+1, e.stackTraceLimit)
524}
525
526// ErrorLogRecord holds details of an error log record.
527type ErrorLogRecord struct {
528	// Message holds the message for the log record,
529	// e.g. "failed to connect to %s".
530	//
531	// If this is empty, "[EMPTY]" will be used.
532	Message string
533
534	// MessageFormat holds the non-interpolated format
535	// of the log record, e.g. "failed to connect to %s".
536	//
537	// This is optional.
538	MessageFormat string
539
540	// Level holds the severity level of the log record.
541	//
542	// This is optional.
543	Level string
544
545	// LoggerName holds the name of the logger used.
546	//
547	// This is optional.
548	LoggerName string
549
550	// Error is an error associated with the log record.
551	//
552	// This is optional.
553	Error error
554}
555
556// ErrorID uniquely identifies an error.
557type ErrorID TraceID
558
559// String returns id in its hex-encoded format.
560func (id ErrorID) String() string {
561	return TraceID(id).String()
562}
563
564func init() {
565	RegisterErrorDetailer(ErrorDetailerFunc(func(err error, details *ErrorDetails) {
566		if errTemporary(err) {
567			details.SetAttr("temporary", true)
568		}
569		if errTimeout(err) {
570			details.SetAttr("timeout", true)
571		}
572	}))
573	RegisterTypeErrorDetailer(reflect.TypeOf(&net.OpError{}), ErrorDetailerFunc(func(err error, details *ErrorDetails) {
574		opErr := err.(*net.OpError)
575		details.SetAttr("op", opErr.Op)
576		details.SetAttr("net", opErr.Net)
577		if opErr.Source != nil {
578			if addr := opErr.Source; addr != nil {
579				details.SetAttr("source", fmt.Sprintf("%s:%s", addr.Network(), addr.String()))
580			}
581		}
582		if opErr.Addr != nil {
583			if addr := opErr.Addr; addr != nil {
584				details.SetAttr("addr", fmt.Sprintf("%s:%s", addr.Network(), addr.String()))
585			}
586		}
587		details.Cause = append(details.Cause, opErr.Err)
588	}))
589	RegisterTypeErrorDetailer(reflect.TypeOf(&os.LinkError{}), ErrorDetailerFunc(func(err error, details *ErrorDetails) {
590		linkErr := err.(*os.LinkError)
591		details.SetAttr("op", linkErr.Op)
592		details.SetAttr("old", linkErr.Old)
593		details.SetAttr("new", linkErr.New)
594		details.Cause = append(details.Cause, linkErr.Err)
595	}))
596	RegisterTypeErrorDetailer(reflect.TypeOf(&os.PathError{}), ErrorDetailerFunc(func(err error, details *ErrorDetails) {
597		pathErr := err.(*os.PathError)
598		details.SetAttr("op", pathErr.Op)
599		details.SetAttr("path", pathErr.Path)
600		details.Cause = append(details.Cause, pathErr.Err)
601	}))
602	RegisterTypeErrorDetailer(reflect.TypeOf(&os.SyscallError{}), ErrorDetailerFunc(func(err error, details *ErrorDetails) {
603		syscallErr := err.(*os.SyscallError)
604		details.SetAttr("syscall", syscallErr.Syscall)
605		details.Cause = append(details.Cause, syscallErr.Err)
606	}))
607	RegisterTypeErrorDetailer(reflect.TypeOf(syscall.Errno(0)), ErrorDetailerFunc(func(err error, details *ErrorDetails) {
608		errno := err.(syscall.Errno)
609		details.Code.String = errnoName(errno)
610		if details.Code.String == "" {
611			details.Code.Number = float64(errno)
612		}
613	}))
614}
615
616func errTemporary(err error) bool {
617	type temporaryError interface {
618		Temporary() bool
619	}
620	terr, ok := err.(temporaryError)
621	return ok && terr.Temporary()
622}
623
624func errTimeout(err error) bool {
625	type timeoutError interface {
626		Timeout() bool
627	}
628	terr, ok := err.(timeoutError)
629	return ok && terr.Timeout()
630}
631
632// RegisterTypeErrorDetailer registers e to be called for any error with
633// the concrete type t.
634//
635// Each ErrorDetailer registered in this way will be called, in the order
636// registered, for each error of type t created via Tracer.NewError or
637// Tracer.NewErrorLog.
638//
639// RegisterTypeErrorDetailer must not be called during tracer operation;
640// it is intended to be called at package init time.
641func RegisterTypeErrorDetailer(t reflect.Type, e ErrorDetailer) {
642	typeErrorDetailers[t] = append(typeErrorDetailers[t], e)
643}
644
645// RegisterErrorDetailer registers e in the global list of ErrorDetailers.
646//
647// Each ErrorDetailer registered in this way will be called, in the order
648// registered, for each error created via Tracer.NewError or Tracer.NewErrorLog.
649//
650// RegisterErrorDetailer must not be called during tracer operation; it is
651// intended to be called at package init time.
652func RegisterErrorDetailer(e ErrorDetailer) {
653	errorDetailers = append(errorDetailers, e)
654}
655
656var (
657	typeErrorDetailers = make(map[reflect.Type][]ErrorDetailer)
658	errorDetailers     []ErrorDetailer
659)
660
661// ErrorDetails holds details of an error, which can be altered or
662// extended by registering an ErrorDetailer with RegisterErrorDetailer
663// or RegisterTypeErrorDetailer.
664type ErrorDetails struct {
665	attrs map[string]interface{}
666
667	// Type holds information about the error type, initialized
668	// with the type name and type package path using reflection.
669	Type struct {
670		// Name holds the error type name.
671		Name string
672
673		// PackagePath holds the error type package path.
674		PackagePath string
675	}
676
677	// Code holds an error code.
678	Code struct {
679		// String holds a string-based error code. If this is set, then Number is ignored.
680		//
681		// This field will be initialized to the result of calling an error's Code method,
682		// if the error implements the following interface:
683		//
684		//     type interface StringCoder {
685		//         Code() string
686		//     }
687		String string
688
689		// Number holds a numerical error code. This is ignored if String is set.
690		//
691		// This field will be initialized to the result of calling an error's Code
692		// method, if the error implements the following interface:
693		//
694		//     type interface NumberCoder {
695		//         Code() float64
696		//     }
697		Number float64
698	}
699
700	// Cause holds the errors that were the cause of this error.
701	Cause []error
702}
703
704// SetAttr sets the attribute with key k to value v.
705func (d *ErrorDetails) SetAttr(k string, v interface{}) {
706	if d.attrs == nil {
707		d.attrs = make(map[string]interface{})
708	}
709	d.attrs[k] = v
710}
711
712// ErrorDetailer defines an interface for altering or extending the ErrorDetails for an error.
713//
714// ErrorDetailers can be registered using the package-level functions RegisterErrorDetailer and
715// RegisterTypeErrorDetailer.
716type ErrorDetailer interface {
717	// ErrorDetails is called to update or alter details for err.
718	ErrorDetails(err error, details *ErrorDetails)
719}
720
721// ErrorDetailerFunc is a function type implementing ErrorDetailer.
722type ErrorDetailerFunc func(error, *ErrorDetails)
723
724// ErrorDetails calls f(err, details).
725func (f ErrorDetailerFunc) ErrorDetails(err error, details *ErrorDetails) {
726	f(err, details)
727}
728