1// Copyright 2016 The Prometheus Authors 2// Licensed under the Apache License, Version 2.0 (the "License"); 3// you may not use this file except in compliance with the License. 4// You may obtain a copy of the License at 5// 6// http://www.apache.org/licenses/LICENSE-2.0 7// 8// Unless required by applicable law or agreed to in writing, software 9// distributed under the License is distributed on an "AS IS" BASIS, 10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11// See the License for the specific language governing permissions and 12// limitations under the License. 13 14// Package promhttp provides tooling around HTTP servers and clients. 15// 16// First, the package allows the creation of http.Handler instances to expose 17// Prometheus metrics via HTTP. promhttp.Handler acts on the 18// prometheus.DefaultGatherer. With HandlerFor, you can create a handler for a 19// custom registry or anything that implements the Gatherer interface. It also 20// allows the creation of handlers that act differently on errors or allow to 21// log errors. 22// 23// Second, the package provides tooling to instrument instances of http.Handler 24// via middleware. Middleware wrappers follow the naming scheme 25// InstrumentHandlerX, where X describes the intended use of the middleware. 26// See each function's doc comment for specific details. 27// 28// Finally, the package allows for an http.RoundTripper to be instrumented via 29// middleware. Middleware wrappers follow the naming scheme 30// InstrumentRoundTripperX, where X describes the intended use of the 31// middleware. See each function's doc comment for specific details. 32package promhttp 33 34import ( 35 "compress/gzip" 36 "fmt" 37 "io" 38 "net/http" 39 "strings" 40 "sync" 41 "time" 42 43 "github.com/prometheus/common/expfmt" 44 45 "github.com/prometheus/client_golang/prometheus" 46) 47 48const ( 49 contentTypeHeader = "Content-Type" 50 contentEncodingHeader = "Content-Encoding" 51 acceptEncodingHeader = "Accept-Encoding" 52) 53 54var gzipPool = sync.Pool{ 55 New: func() interface{} { 56 return gzip.NewWriter(nil) 57 }, 58} 59 60// Handler returns an http.Handler for the prometheus.DefaultGatherer, using 61// default HandlerOpts, i.e. it reports the first error as an HTTP error, it has 62// no error logging, and it applies compression if requested by the client. 63// 64// The returned http.Handler is already instrumented using the 65// InstrumentMetricHandler function and the prometheus.DefaultRegisterer. If you 66// create multiple http.Handlers by separate calls of the Handler function, the 67// metrics used for instrumentation will be shared between them, providing 68// global scrape counts. 69// 70// This function is meant to cover the bulk of basic use cases. If you are doing 71// anything that requires more customization (including using a non-default 72// Gatherer, different instrumentation, and non-default HandlerOpts), use the 73// HandlerFor function. See there for details. 74func Handler() http.Handler { 75 return InstrumentMetricHandler( 76 prometheus.DefaultRegisterer, HandlerFor(prometheus.DefaultGatherer, HandlerOpts{}), 77 ) 78} 79 80// HandlerFor returns an uninstrumented http.Handler for the provided 81// Gatherer. The behavior of the Handler is defined by the provided 82// HandlerOpts. Thus, HandlerFor is useful to create http.Handlers for custom 83// Gatherers, with non-default HandlerOpts, and/or with custom (or no) 84// instrumentation. Use the InstrumentMetricHandler function to apply the same 85// kind of instrumentation as it is used by the Handler function. 86func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler { 87 var ( 88 inFlightSem chan struct{} 89 errCnt = prometheus.NewCounterVec( 90 prometheus.CounterOpts{ 91 Name: "promhttp_metric_handler_errors_total", 92 Help: "Total number of internal errors encountered by the promhttp metric handler.", 93 }, 94 []string{"cause"}, 95 ) 96 ) 97 98 if opts.MaxRequestsInFlight > 0 { 99 inFlightSem = make(chan struct{}, opts.MaxRequestsInFlight) 100 } 101 if opts.Registry != nil { 102 // Initialize all possibilites that can occur below. 103 errCnt.WithLabelValues("gathering") 104 errCnt.WithLabelValues("encoding") 105 if err := opts.Registry.Register(errCnt); err != nil { 106 if are, ok := err.(prometheus.AlreadyRegisteredError); ok { 107 errCnt = are.ExistingCollector.(*prometheus.CounterVec) 108 } else { 109 panic(err) 110 } 111 } 112 } 113 114 h := http.HandlerFunc(func(rsp http.ResponseWriter, req *http.Request) { 115 if inFlightSem != nil { 116 select { 117 case inFlightSem <- struct{}{}: // All good, carry on. 118 defer func() { <-inFlightSem }() 119 default: 120 http.Error(rsp, fmt.Sprintf( 121 "Limit of concurrent requests reached (%d), try again later.", opts.MaxRequestsInFlight, 122 ), http.StatusServiceUnavailable) 123 return 124 } 125 } 126 mfs, err := reg.Gather() 127 if err != nil { 128 if opts.ErrorLog != nil { 129 opts.ErrorLog.Println("error gathering metrics:", err) 130 } 131 errCnt.WithLabelValues("gathering").Inc() 132 switch opts.ErrorHandling { 133 case PanicOnError: 134 panic(err) 135 case ContinueOnError: 136 if len(mfs) == 0 { 137 // Still report the error if no metrics have been gathered. 138 httpError(rsp, err) 139 return 140 } 141 case HTTPErrorOnError: 142 httpError(rsp, err) 143 return 144 } 145 } 146 147 contentType := expfmt.Negotiate(req.Header) 148 header := rsp.Header() 149 header.Set(contentTypeHeader, string(contentType)) 150 151 w := io.Writer(rsp) 152 if !opts.DisableCompression && gzipAccepted(req.Header) { 153 header.Set(contentEncodingHeader, "gzip") 154 gz := gzipPool.Get().(*gzip.Writer) 155 defer gzipPool.Put(gz) 156 157 gz.Reset(w) 158 defer gz.Close() 159 160 w = gz 161 } 162 163 enc := expfmt.NewEncoder(w, contentType) 164 165 var lastErr error 166 for _, mf := range mfs { 167 if err := enc.Encode(mf); err != nil { 168 lastErr = err 169 if opts.ErrorLog != nil { 170 opts.ErrorLog.Println("error encoding and sending metric family:", err) 171 } 172 errCnt.WithLabelValues("encoding").Inc() 173 switch opts.ErrorHandling { 174 case PanicOnError: 175 panic(err) 176 case ContinueOnError: 177 // Handled later. 178 case HTTPErrorOnError: 179 httpError(rsp, err) 180 return 181 } 182 } 183 } 184 185 if lastErr != nil { 186 httpError(rsp, lastErr) 187 } 188 }) 189 190 if opts.Timeout <= 0 { 191 return h 192 } 193 return http.TimeoutHandler(h, opts.Timeout, fmt.Sprintf( 194 "Exceeded configured timeout of %v.\n", 195 opts.Timeout, 196 )) 197} 198 199// InstrumentMetricHandler is usually used with an http.Handler returned by the 200// HandlerFor function. It instruments the provided http.Handler with two 201// metrics: A counter vector "promhttp_metric_handler_requests_total" to count 202// scrapes partitioned by HTTP status code, and a gauge 203// "promhttp_metric_handler_requests_in_flight" to track the number of 204// simultaneous scrapes. This function idempotently registers collectors for 205// both metrics with the provided Registerer. It panics if the registration 206// fails. The provided metrics are useful to see how many scrapes hit the 207// monitored target (which could be from different Prometheus servers or other 208// scrapers), and how often they overlap (which would result in more than one 209// scrape in flight at the same time). Note that the scrapes-in-flight gauge 210// will contain the scrape by which it is exposed, while the scrape counter will 211// only get incremented after the scrape is complete (as only then the status 212// code is known). For tracking scrape durations, use the 213// "scrape_duration_seconds" gauge created by the Prometheus server upon each 214// scrape. 215func InstrumentMetricHandler(reg prometheus.Registerer, handler http.Handler) http.Handler { 216 cnt := prometheus.NewCounterVec( 217 prometheus.CounterOpts{ 218 Name: "promhttp_metric_handler_requests_total", 219 Help: "Total number of scrapes by HTTP status code.", 220 }, 221 []string{"code"}, 222 ) 223 // Initialize the most likely HTTP status codes. 224 cnt.WithLabelValues("200") 225 cnt.WithLabelValues("500") 226 cnt.WithLabelValues("503") 227 if err := reg.Register(cnt); err != nil { 228 if are, ok := err.(prometheus.AlreadyRegisteredError); ok { 229 cnt = are.ExistingCollector.(*prometheus.CounterVec) 230 } else { 231 panic(err) 232 } 233 } 234 235 gge := prometheus.NewGauge(prometheus.GaugeOpts{ 236 Name: "promhttp_metric_handler_requests_in_flight", 237 Help: "Current number of scrapes being served.", 238 }) 239 if err := reg.Register(gge); err != nil { 240 if are, ok := err.(prometheus.AlreadyRegisteredError); ok { 241 gge = are.ExistingCollector.(prometheus.Gauge) 242 } else { 243 panic(err) 244 } 245 } 246 247 return InstrumentHandlerCounter(cnt, InstrumentHandlerInFlight(gge, handler)) 248} 249 250// HandlerErrorHandling defines how a Handler serving metrics will handle 251// errors. 252type HandlerErrorHandling int 253 254// These constants cause handlers serving metrics to behave as described if 255// errors are encountered. 256const ( 257 // Serve an HTTP status code 500 upon the first error 258 // encountered. Report the error message in the body. 259 HTTPErrorOnError HandlerErrorHandling = iota 260 // Ignore errors and try to serve as many metrics as possible. However, 261 // if no metrics can be served, serve an HTTP status code 500 and the 262 // last error message in the body. Only use this in deliberate "best 263 // effort" metrics collection scenarios. In this case, it is highly 264 // recommended to provide other means of detecting errors: By setting an 265 // ErrorLog in HandlerOpts, the errors are logged. By providing a 266 // Registry in HandlerOpts, the exposed metrics include an error counter 267 // "promhttp_metric_handler_errors_total", which can be used for 268 // alerts. 269 ContinueOnError 270 // Panic upon the first error encountered (useful for "crash only" apps). 271 PanicOnError 272) 273 274// Logger is the minimal interface HandlerOpts needs for logging. Note that 275// log.Logger from the standard library implements this interface, and it is 276// easy to implement by custom loggers, if they don't do so already anyway. 277type Logger interface { 278 Println(v ...interface{}) 279} 280 281// HandlerOpts specifies options how to serve metrics via an http.Handler. The 282// zero value of HandlerOpts is a reasonable default. 283type HandlerOpts struct { 284 // ErrorLog specifies an optional logger for errors collecting and 285 // serving metrics. If nil, errors are not logged at all. 286 ErrorLog Logger 287 // ErrorHandling defines how errors are handled. Note that errors are 288 // logged regardless of the configured ErrorHandling provided ErrorLog 289 // is not nil. 290 ErrorHandling HandlerErrorHandling 291 // If Registry is not nil, it is used to register a metric 292 // "promhttp_metric_handler_errors_total", partitioned by "cause". A 293 // failed registration causes a panic. Note that this error counter is 294 // different from the instrumentation you get from the various 295 // InstrumentHandler... helpers. It counts errors that don't necessarily 296 // result in a non-2xx HTTP status code. There are two typical cases: 297 // (1) Encoding errors that only happen after streaming of the HTTP body 298 // has already started (and the status code 200 has been sent). This 299 // should only happen with custom collectors. (2) Collection errors with 300 // no effect on the HTTP status code because ErrorHandling is set to 301 // ContinueOnError. 302 Registry prometheus.Registerer 303 // If DisableCompression is true, the handler will never compress the 304 // response, even if requested by the client. 305 DisableCompression bool 306 // The number of concurrent HTTP requests is limited to 307 // MaxRequestsInFlight. Additional requests are responded to with 503 308 // Service Unavailable and a suitable message in the body. If 309 // MaxRequestsInFlight is 0 or negative, no limit is applied. 310 MaxRequestsInFlight int 311 // If handling a request takes longer than Timeout, it is responded to 312 // with 503 ServiceUnavailable and a suitable Message. No timeout is 313 // applied if Timeout is 0 or negative. Note that with the current 314 // implementation, reaching the timeout simply ends the HTTP requests as 315 // described above (and even that only if sending of the body hasn't 316 // started yet), while the bulk work of gathering all the metrics keeps 317 // running in the background (with the eventual result to be thrown 318 // away). Until the implementation is improved, it is recommended to 319 // implement a separate timeout in potentially slow Collectors. 320 Timeout time.Duration 321} 322 323// gzipAccepted returns whether the client will accept gzip-encoded content. 324func gzipAccepted(header http.Header) bool { 325 a := header.Get(acceptEncodingHeader) 326 parts := strings.Split(a, ",") 327 for _, part := range parts { 328 part = strings.TrimSpace(part) 329 if part == "gzip" || strings.HasPrefix(part, "gzip;") { 330 return true 331 } 332 } 333 return false 334} 335 336// httpError removes any content-encoding header and then calls http.Error with 337// the provided error and http.StatusInternalServerErrer. Error contents is 338// supposed to be uncompressed plain text. However, same as with a plain 339// http.Error, any header settings will be void if the header has already been 340// sent. The error message will still be written to the writer, but it will 341// probably be of limited use. 342func httpError(rsp http.ResponseWriter, err error) { 343 rsp.Header().Del(contentEncodingHeader) 344 http.Error( 345 rsp, 346 "An error has occurred while serving metrics:\n\n"+err.Error(), 347 http.StatusInternalServerError, 348 ) 349} 350