1package notifier 2 3import ( 4 "context" 5 "errors" 6 "fmt" 7 "net/url" 8 "sort" 9 "time" 10 11 apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" 12 "github.com/prometheus/alertmanager/notify" 13 "github.com/prometheus/alertmanager/types" 14 "github.com/prometheus/common/model" 15 "golang.org/x/sync/errgroup" 16) 17 18const ( 19 maxTestReceiversWorkers = 10 20) 21 22var ( 23 ErrNoReceivers = errors.New("no receivers") 24) 25 26type TestReceiversResult struct { 27 Alert types.Alert 28 Receivers []TestReceiverResult 29 NotifedAt time.Time 30} 31 32type TestReceiverResult struct { 33 Name string 34 Configs []TestReceiverConfigResult 35} 36 37type TestReceiverConfigResult struct { 38 Name string 39 UID string 40 Status string 41 Error error 42} 43 44type InvalidReceiverError struct { 45 Receiver *apimodels.PostableGrafanaReceiver 46 Err error 47} 48 49func (e InvalidReceiverError) Error() string { 50 return fmt.Sprintf("the receiver is invalid: %s", e.Err) 51} 52 53type ReceiverTimeoutError struct { 54 Receiver *apimodels.PostableGrafanaReceiver 55 Err error 56} 57 58func (e ReceiverTimeoutError) Error() string { 59 return fmt.Sprintf("the receiver timed out: %s", e.Err) 60} 61 62func (am *Alertmanager) TestReceivers(ctx context.Context, c apimodels.TestReceiversConfigParams) (*TestReceiversResult, error) { 63 // now represents the start time of the test 64 now := time.Now() 65 testAlert := newTestAlert(c, now, now) 66 67 // we must set a group key that is unique per test as some receivers use this key to deduplicate alerts 68 ctx = notify.WithGroupKey(ctx, testAlert.Labels.String()+now.String()) 69 70 tmpl, err := am.getTemplate() 71 if err != nil { 72 return nil, fmt.Errorf("failed to get template: %w", err) 73 } 74 75 // job contains all metadata required to test a receiver 76 type job struct { 77 Config *apimodels.PostableGrafanaReceiver 78 ReceiverName string 79 Notifier notify.Notifier 80 } 81 82 // result contains the receiver that was tested and an error that is non-nil if the test failed 83 type result struct { 84 Config *apimodels.PostableGrafanaReceiver 85 ReceiverName string 86 Error error 87 } 88 89 newTestReceiversResult := func(alert types.Alert, results []result, notifiedAt time.Time) *TestReceiversResult { 90 m := make(map[string]TestReceiverResult) 91 for _, receiver := range c.Receivers { 92 // set up the result for this receiver 93 m[receiver.Name] = TestReceiverResult{ 94 Name: receiver.Name, 95 // A Grafana receiver can have multiple nested receivers 96 Configs: make([]TestReceiverConfigResult, 0, len(receiver.GrafanaManagedReceivers)), 97 } 98 } 99 for _, next := range results { 100 tmp := m[next.ReceiverName] 101 status := "ok" 102 if next.Error != nil { 103 status = "failed" 104 } 105 tmp.Configs = append(tmp.Configs, TestReceiverConfigResult{ 106 Name: next.Config.Name, 107 UID: next.Config.UID, 108 Status: status, 109 Error: processNotifierError(next.Config, next.Error), 110 }) 111 m[next.ReceiverName] = tmp 112 } 113 v := new(TestReceiversResult) 114 v.Alert = alert 115 v.Receivers = make([]TestReceiverResult, 0, len(c.Receivers)) 116 v.NotifedAt = notifiedAt 117 for _, next := range m { 118 v.Receivers = append(v.Receivers, next) 119 } 120 121 // Make sure the return order is deterministic. 122 sort.Slice(v.Receivers, func(i, j int) bool { 123 return v.Receivers[i].Name < v.Receivers[j].Name 124 }) 125 126 return v 127 } 128 129 // invalid keeps track of all invalid receiver configurations 130 invalid := make([]result, 0, len(c.Receivers)) 131 // jobs keeps track of all receivers that need to be sent test notifications 132 jobs := make([]job, 0, len(c.Receivers)) 133 134 for _, receiver := range c.Receivers { 135 for _, next := range receiver.GrafanaManagedReceivers { 136 n, err := am.buildReceiverIntegration(next, tmpl) 137 if err != nil { 138 invalid = append(invalid, result{ 139 Config: next, 140 ReceiverName: next.Name, 141 Error: err, 142 }) 143 } else { 144 jobs = append(jobs, job{ 145 Config: next, 146 ReceiverName: receiver.Name, 147 Notifier: n, 148 }) 149 } 150 } 151 } 152 153 if len(invalid)+len(jobs) == 0 { 154 return nil, ErrNoReceivers 155 } 156 157 if len(jobs) == 0 { 158 return newTestReceiversResult(testAlert, invalid, now), nil 159 } 160 161 numWorkers := maxTestReceiversWorkers 162 if numWorkers > len(jobs) { 163 numWorkers = len(jobs) 164 } 165 166 resultCh := make(chan result, len(jobs)) 167 workCh := make(chan job, len(jobs)) 168 for _, job := range jobs { 169 workCh <- job 170 } 171 close(workCh) 172 173 g, ctx := errgroup.WithContext(ctx) 174 for i := 0; i < numWorkers; i++ { 175 g.Go(func() error { 176 for next := range workCh { 177 v := result{ 178 Config: next.Config, 179 ReceiverName: next.ReceiverName, 180 } 181 if _, err := next.Notifier.Notify(ctx, &testAlert); err != nil { 182 v.Error = err 183 } 184 resultCh <- v 185 } 186 return nil 187 }) 188 } 189 g.Wait() // nolint 190 close(resultCh) 191 192 results := make([]result, 0, len(jobs)) 193 for next := range resultCh { 194 results = append(results, next) 195 } 196 197 return newTestReceiversResult(testAlert, append(invalid, results...), now), nil 198} 199 200func newTestAlert(c apimodels.TestReceiversConfigParams, startsAt, updatedAt time.Time) types.Alert { 201 var ( 202 defaultAnnotations = model.LabelSet{ 203 "summary": "Notification test", 204 "__value_string__": "[ metric='foo' labels={instance=bar} value=10 ]", 205 } 206 defaultLabels = model.LabelSet{ 207 "alertname": "TestAlert", 208 "instance": "Grafana", 209 } 210 ) 211 212 alert := types.Alert{ 213 Alert: model.Alert{ 214 Labels: defaultLabels, 215 Annotations: defaultAnnotations, 216 StartsAt: startsAt, 217 }, 218 UpdatedAt: updatedAt, 219 } 220 221 if c.Alert != nil { 222 if c.Alert.Annotations != nil { 223 for k, v := range c.Alert.Annotations { 224 alert.Annotations[k] = v 225 } 226 } 227 if c.Alert.Labels != nil { 228 for k, v := range c.Alert.Labels { 229 alert.Labels[k] = v 230 } 231 } 232 } 233 234 return alert 235} 236 237func processNotifierError(config *apimodels.PostableGrafanaReceiver, err error) error { 238 if err == nil { 239 return nil 240 } 241 242 var urlError *url.Error 243 if errors.As(err, &urlError) { 244 if urlError.Timeout() { 245 return ReceiverTimeoutError{ 246 Receiver: config, 247 Err: err, 248 } 249 } 250 } 251 252 if errors.Is(err, context.DeadlineExceeded) { 253 return ReceiverTimeoutError{ 254 Receiver: config, 255 Err: err, 256 } 257 } 258 259 return err 260} 261