1package configutil 2 3import ( 4 "context" 5 "errors" 6 "fmt" 7 "time" 8 9 "github.com/hashicorp/vault/sdk/helper/parseutil" 10 11 monitoring "cloud.google.com/go/monitoring/apiv3" 12 "github.com/armon/go-metrics" 13 "github.com/armon/go-metrics/circonus" 14 "github.com/armon/go-metrics/datadog" 15 "github.com/armon/go-metrics/prometheus" 16 stackdriver "github.com/google/go-metrics-stackdriver" 17 stackdrivervault "github.com/google/go-metrics-stackdriver/vault" 18 "github.com/hashicorp/go-multierror" 19 "github.com/hashicorp/hcl" 20 "github.com/hashicorp/hcl/hcl/ast" 21 "github.com/hashicorp/vault/helper/metricsutil" 22 "github.com/mitchellh/cli" 23 "google.golang.org/api/option" 24) 25 26const ( 27 PrometheusDefaultRetentionTime = 24 * time.Hour 28 UsageGaugeDefaultPeriod = 10 * time.Minute 29 MaximumGaugeCardinalityDefault = 500 30 LeaseMetricsEpsilonDefault = time.Hour 31 NumLeaseMetricsTimeBucketsDefault = 168 32) 33 34// Telemetry is the telemetry configuration for the server 35type Telemetry struct { 36 FoundKeys []string `hcl:",decodedFields"` 37 UnusedKeys UnusedKeyMap `hcl:",unusedKeyPositions"` 38 StatsiteAddr string `hcl:"statsite_address"` 39 StatsdAddr string `hcl:"statsd_address"` 40 41 DisableHostname bool `hcl:"disable_hostname"` 42 EnableHostnameLabel bool `hcl:"enable_hostname_label"` 43 MetricsPrefix string `hcl:"metrics_prefix"` 44 UsageGaugePeriod time.Duration 45 UsageGaugePeriodRaw interface{} `hcl:"usage_gauge_period,alias:UsageGaugePeriod"` 46 47 MaximumGaugeCardinality int `hcl:"maximum_gauge_cardinality"` 48 49 // Circonus: see https://github.com/circonus-labs/circonus-gometrics 50 // for more details on the various configuration options. 51 // Valid configuration combinations: 52 // - CirconusAPIToken 53 // metric management enabled (search for existing check or create a new one) 54 // - CirconusSubmissionUrl 55 // metric management disabled (use check with specified submission_url, 56 // broker must be using a public SSL certificate) 57 // - CirconusAPIToken + CirconusCheckSubmissionURL 58 // metric management enabled (use check with specified submission_url) 59 // - CirconusAPIToken + CirconusCheckID 60 // metric management enabled (use check with specified id) 61 62 // CirconusAPIToken is a valid API Token used to create/manage check. If provided, 63 // metric management is enabled. 64 // Default: none 65 CirconusAPIToken string `hcl:"circonus_api_token"` 66 // CirconusAPIApp is an app name associated with API token. 67 // Default: "consul" 68 CirconusAPIApp string `hcl:"circonus_api_app"` 69 // CirconusAPIURL is the base URL to use for contacting the Circonus API. 70 // Default: "https://api.circonus.com/v2" 71 CirconusAPIURL string `hcl:"circonus_api_url"` 72 // CirconusSubmissionInterval is the interval at which metrics are submitted to Circonus. 73 // Default: 10s 74 CirconusSubmissionInterval string `hcl:"circonus_submission_interval"` 75 // CirconusCheckSubmissionURL is the check.config.submission_url field from a 76 // previously created HTTPTRAP check. 77 // Default: none 78 CirconusCheckSubmissionURL string `hcl:"circonus_submission_url"` 79 // CirconusCheckID is the check id (not check bundle id) from a previously created 80 // HTTPTRAP check. The numeric portion of the check._cid field. 81 // Default: none 82 CirconusCheckID string `hcl:"circonus_check_id"` 83 // CirconusCheckForceMetricActivation will force enabling metrics, as they are encountered, 84 // if the metric already exists and is NOT active. If check management is enabled, the default 85 // behavior is to add new metrics as they are encountered. If the metric already exists in the 86 // check, it will *NOT* be activated. This setting overrides that behavior. 87 // Default: "false" 88 CirconusCheckForceMetricActivation string `hcl:"circonus_check_force_metric_activation"` 89 // CirconusCheckInstanceID serves to uniquely identify the metrics coming from this "instance". 90 // It can be used to maintain metric continuity with transient or ephemeral instances as 91 // they move around within an infrastructure. 92 // Default: hostname:app 93 CirconusCheckInstanceID string `hcl:"circonus_check_instance_id"` 94 // CirconusCheckSearchTag is a special tag which, when coupled with the instance id, helps to 95 // narrow down the search results when neither a Submission URL or Check ID is provided. 96 // Default: service:app (e.g. service:consul) 97 CirconusCheckSearchTag string `hcl:"circonus_check_search_tag"` 98 // CirconusCheckTags is a comma separated list of tags to apply to the check. Note that 99 // the value of CirconusCheckSearchTag will always be added to the check. 100 // Default: none 101 CirconusCheckTags string `hcl:"circonus_check_tags"` 102 // CirconusCheckDisplayName is the name for the check which will be displayed in the Circonus UI. 103 // Default: value of CirconusCheckInstanceID 104 CirconusCheckDisplayName string `hcl:"circonus_check_display_name"` 105 // CirconusBrokerID is an explicit broker to use when creating a new check. The numeric portion 106 // of broker._cid. If metric management is enabled and neither a Submission URL nor Check ID 107 // is provided, an attempt will be made to search for an existing check using Instance ID and 108 // Search Tag. If one is not found, a new HTTPTRAP check will be created. 109 // Default: use Select Tag if provided, otherwise, a random Enterprise Broker associated 110 // with the specified API token or the default Circonus Broker. 111 // Default: none 112 CirconusBrokerID string `hcl:"circonus_broker_id"` 113 // CirconusBrokerSelectTag is a special tag which will be used to select a broker when 114 // a Broker ID is not provided. The best use of this is to as a hint for which broker 115 // should be used based on *where* this particular instance is running. 116 // (e.g. a specific geo location or datacenter, dc:sfo) 117 // Default: none 118 CirconusBrokerSelectTag string `hcl:"circonus_broker_select_tag"` 119 120 // Dogstats: 121 // DogStatsdAddr is the address of a dogstatsd instance. If provided, 122 // metrics will be sent to that instance 123 DogStatsDAddr string `hcl:"dogstatsd_addr"` 124 125 // DogStatsdTags are the global tags that should be sent with each packet to dogstatsd 126 // It is a list of strings, where each string looks like "my_tag_name:my_tag_value" 127 DogStatsDTags []string `hcl:"dogstatsd_tags"` 128 129 // Prometheus: 130 // PrometheusRetentionTime is the retention time for prometheus metrics if greater than 0. 131 // Default: 24h 132 PrometheusRetentionTime time.Duration `hcl:"-"` 133 PrometheusRetentionTimeRaw interface{} `hcl:"prometheus_retention_time"` 134 135 // Stackdriver: 136 // StackdriverProjectID is the project to publish stackdriver metrics to. 137 StackdriverProjectID string `hcl:"stackdriver_project_id"` 138 // StackdriverLocation is the GCP or AWS region of the monitored resource. 139 StackdriverLocation string `hcl:"stackdriver_location"` 140 // StackdriverNamespace is the namespace identifier, such as a cluster name. 141 StackdriverNamespace string `hcl:"stackdriver_namespace"` 142 // StackdriverDebugLogs will write additional stackdriver related debug logs to stderr. 143 StackdriverDebugLogs bool `hcl:"stackdriver_debug_logs"` 144 145 // How often metrics for lease expiry will be aggregated 146 LeaseMetricsEpsilon time.Duration 147 LeaseMetricsEpsilonRaw interface{} `hcl:"lease_metrics_epsilon"` 148 149 // Number of buckets by time that will be used in lease aggregation 150 NumLeaseMetricsTimeBuckets int `hcl:"num_lease_metrics_buckets"` 151 152 // Whether or not telemetry should add labels for namespaces 153 LeaseMetricsNameSpaceLabels bool `hcl:"add_lease_metrics_namespace_labels"` 154 155 // FilterDefault is the default for whether to allow a metric that's not 156 // covered by the prefix filter. 157 FilterDefault *bool `hcl:"filter_default"` 158 159 // PrefixFilter is a list of filter rules to apply for allowing 160 // or blocking metrics by prefix. 161 PrefixFilter []string `hcl:"prefix_filter"` 162} 163 164func (t *Telemetry) Validate(source string) []ConfigError { 165 return ValidateUnusedFields(t.UnusedKeys, source) 166} 167 168func (t *Telemetry) GoString() string { 169 return fmt.Sprintf("*%#v", *t) 170} 171 172func parseTelemetry(result *SharedConfig, list *ast.ObjectList) error { 173 if len(list.Items) > 1 { 174 return fmt.Errorf("only one 'telemetry' block is permitted") 175 } 176 177 // Get our one item 178 item := list.Items[0] 179 180 if result.Telemetry == nil { 181 result.Telemetry = &Telemetry{} 182 } 183 184 if err := hcl.DecodeObject(&result.Telemetry, item.Val); err != nil { 185 return multierror.Prefix(err, "telemetry:") 186 } 187 188 if result.Telemetry.PrometheusRetentionTimeRaw != nil { 189 var err error 190 if result.Telemetry.PrometheusRetentionTime, err = parseutil.ParseDurationSecond(result.Telemetry.PrometheusRetentionTimeRaw); err != nil { 191 return err 192 } 193 result.Telemetry.PrometheusRetentionTimeRaw = nil 194 } else { 195 result.Telemetry.PrometheusRetentionTime = PrometheusDefaultRetentionTime 196 } 197 198 if result.Telemetry.UsageGaugePeriodRaw != nil { 199 if result.Telemetry.UsageGaugePeriodRaw == "none" { 200 result.Telemetry.UsageGaugePeriod = 0 201 } else { 202 var err error 203 if result.Telemetry.UsageGaugePeriod, err = parseutil.ParseDurationSecond(result.Telemetry.UsageGaugePeriodRaw); err != nil { 204 return err 205 } 206 result.Telemetry.UsageGaugePeriodRaw = nil 207 } 208 } else { 209 result.Telemetry.UsageGaugePeriod = UsageGaugeDefaultPeriod 210 } 211 212 if result.Telemetry.MaximumGaugeCardinality == 0 { 213 result.Telemetry.MaximumGaugeCardinality = MaximumGaugeCardinalityDefault 214 } 215 216 if result.Telemetry.LeaseMetricsEpsilonRaw != nil { 217 if result.Telemetry.LeaseMetricsEpsilonRaw == "none" { 218 result.Telemetry.LeaseMetricsEpsilonRaw = 0 219 } else { 220 var err error 221 if result.Telemetry.LeaseMetricsEpsilon, err = parseutil.ParseDurationSecond(result.Telemetry.LeaseMetricsEpsilonRaw); err != nil { 222 return err 223 } 224 result.Telemetry.LeaseMetricsEpsilonRaw = nil 225 } 226 } else { 227 result.Telemetry.LeaseMetricsEpsilon = LeaseMetricsEpsilonDefault 228 } 229 230 if result.Telemetry.NumLeaseMetricsTimeBuckets == 0 { 231 result.Telemetry.NumLeaseMetricsTimeBuckets = NumLeaseMetricsTimeBucketsDefault 232 } 233 234 return nil 235} 236 237type SetupTelemetryOpts struct { 238 Config *Telemetry 239 Ui cli.Ui 240 ServiceName string 241 DisplayName string 242 UserAgent string 243 ClusterName string 244} 245 246// SetupTelemetry is used to setup the telemetry sub-systems and returns the 247// in-memory sink to be used in http configuration 248func SetupTelemetry(opts *SetupTelemetryOpts) (*metrics.InmemSink, *metricsutil.ClusterMetricSink, bool, error) { 249 if opts == nil { 250 return nil, nil, false, errors.New("nil opts passed into SetupTelemetry") 251 } 252 253 if opts.Config == nil { 254 opts.Config = &Telemetry{} 255 } 256 257 /* Setup telemetry 258 Aggregate on 10 second intervals for 1 minute. Expose the 259 metrics over stderr when there is a SIGUSR1 received. 260 */ 261 inm := metrics.NewInmemSink(10*time.Second, time.Minute) 262 metrics.DefaultInmemSignal(inm) 263 264 if opts.Config.MetricsPrefix != "" { 265 opts.ServiceName = opts.Config.MetricsPrefix 266 } 267 268 metricsConf := metrics.DefaultConfig(opts.ServiceName) 269 metricsConf.EnableHostname = !opts.Config.DisableHostname 270 metricsConf.EnableHostnameLabel = opts.Config.EnableHostnameLabel 271 if opts.Config.FilterDefault != nil { 272 metricsConf.FilterDefault = *opts.Config.FilterDefault 273 } 274 275 // Configure the statsite sink 276 var fanout metrics.FanoutSink 277 var prometheusEnabled bool 278 279 // Configure the Prometheus sink 280 if opts.Config.PrometheusRetentionTime != 0 { 281 prometheusEnabled = true 282 prometheusOpts := prometheus.PrometheusOpts{ 283 Expiration: opts.Config.PrometheusRetentionTime, 284 } 285 286 sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts) 287 if err != nil { 288 return nil, nil, false, err 289 } 290 fanout = append(fanout, sink) 291 } 292 293 if opts.Config.StatsiteAddr != "" { 294 sink, err := metrics.NewStatsiteSink(opts.Config.StatsiteAddr) 295 if err != nil { 296 return nil, nil, false, err 297 } 298 fanout = append(fanout, sink) 299 } 300 301 // Configure the statsd sink 302 if opts.Config.StatsdAddr != "" { 303 sink, err := metrics.NewStatsdSink(opts.Config.StatsdAddr) 304 if err != nil { 305 return nil, nil, false, err 306 } 307 fanout = append(fanout, sink) 308 } 309 310 // Configure the Circonus sink 311 if opts.Config.CirconusAPIToken != "" || opts.Config.CirconusCheckSubmissionURL != "" { 312 cfg := &circonus.Config{} 313 cfg.Interval = opts.Config.CirconusSubmissionInterval 314 cfg.CheckManager.API.TokenKey = opts.Config.CirconusAPIToken 315 cfg.CheckManager.API.TokenApp = opts.Config.CirconusAPIApp 316 cfg.CheckManager.API.URL = opts.Config.CirconusAPIURL 317 cfg.CheckManager.Check.SubmissionURL = opts.Config.CirconusCheckSubmissionURL 318 cfg.CheckManager.Check.ID = opts.Config.CirconusCheckID 319 cfg.CheckManager.Check.ForceMetricActivation = opts.Config.CirconusCheckForceMetricActivation 320 cfg.CheckManager.Check.InstanceID = opts.Config.CirconusCheckInstanceID 321 cfg.CheckManager.Check.SearchTag = opts.Config.CirconusCheckSearchTag 322 cfg.CheckManager.Check.DisplayName = opts.Config.CirconusCheckDisplayName 323 cfg.CheckManager.Check.Tags = opts.Config.CirconusCheckTags 324 cfg.CheckManager.Broker.ID = opts.Config.CirconusBrokerID 325 cfg.CheckManager.Broker.SelectTag = opts.Config.CirconusBrokerSelectTag 326 327 if cfg.CheckManager.API.TokenApp == "" { 328 cfg.CheckManager.API.TokenApp = opts.ServiceName 329 } 330 331 if cfg.CheckManager.Check.DisplayName == "" { 332 cfg.CheckManager.Check.DisplayName = opts.DisplayName 333 } 334 335 if cfg.CheckManager.Check.SearchTag == "" { 336 cfg.CheckManager.Check.SearchTag = fmt.Sprintf("service:%s", opts.ServiceName) 337 } 338 339 sink, err := circonus.NewCirconusSink(cfg) 340 if err != nil { 341 return nil, nil, false, err 342 } 343 sink.Start() 344 fanout = append(fanout, sink) 345 } 346 347 if opts.Config.DogStatsDAddr != "" { 348 var tags []string 349 350 if opts.Config.DogStatsDTags != nil { 351 tags = opts.Config.DogStatsDTags 352 } 353 354 sink, err := datadog.NewDogStatsdSink(opts.Config.DogStatsDAddr, metricsConf.HostName) 355 if err != nil { 356 return nil, nil, false, fmt.Errorf("failed to start DogStatsD sink: %w", err) 357 } 358 sink.SetTags(tags) 359 fanout = append(fanout, sink) 360 } 361 362 // Configure the stackdriver sink 363 if opts.Config.StackdriverProjectID != "" { 364 client, err := monitoring.NewMetricClient(context.Background(), option.WithUserAgent(opts.UserAgent)) 365 if err != nil { 366 return nil, nil, false, fmt.Errorf("Failed to create stackdriver client: %v", err) 367 } 368 sink := stackdriver.NewSink(client, &stackdriver.Config{ 369 LabelExtractor: stackdrivervault.Extractor, 370 Bucketer: stackdrivervault.Bucketer, 371 ProjectID: opts.Config.StackdriverProjectID, 372 Location: opts.Config.StackdriverLocation, 373 Namespace: opts.Config.StackdriverNamespace, 374 DebugLogs: opts.Config.StackdriverDebugLogs, 375 }) 376 fanout = append(fanout, sink) 377 } 378 379 // Initialize the global sink 380 if len(fanout) > 1 { 381 // Hostname enabled will create poor quality metrics name for prometheus 382 if !opts.Config.DisableHostname { 383 opts.Ui.Warn("telemetry.disable_hostname has been set to false. Recommended setting is true for Prometheus to avoid poorly named metrics.") 384 } 385 } else { 386 metricsConf.EnableHostname = false 387 } 388 fanout = append(fanout, inm) 389 globalMetrics, err := metrics.NewGlobal(metricsConf, fanout) 390 if err != nil { 391 return nil, nil, false, err 392 } 393 394 // Intialize a wrapper around the global sink; this will be passed to Core 395 // and to any backend. 396 wrapper := metricsutil.NewClusterMetricSink(opts.ClusterName, globalMetrics) 397 wrapper.MaxGaugeCardinality = opts.Config.MaximumGaugeCardinality 398 wrapper.GaugeInterval = opts.Config.UsageGaugePeriod 399 wrapper.TelemetryConsts.LeaseMetricsEpsilon = opts.Config.LeaseMetricsEpsilon 400 wrapper.TelemetryConsts.LeaseMetricsNameSpaceLabels = opts.Config.LeaseMetricsNameSpaceLabels 401 wrapper.TelemetryConsts.NumLeaseMetricsTimeBuckets = opts.Config.NumLeaseMetricsTimeBuckets 402 403 // Parse the metric filters 404 telemetryAllowedPrefixes, telemetryBlockedPrefixes, err := parsePrefixFilter(opts.Config.PrefixFilter) 405 406 if err != nil { 407 return nil, nil, false, err 408 } 409 410 metrics.UpdateFilter(telemetryAllowedPrefixes, telemetryBlockedPrefixes) 411 return inm, wrapper, prometheusEnabled, nil 412} 413 414func parsePrefixFilter(prefixFilters []string) ([]string, []string, error) { 415 var telemetryAllowedPrefixes, telemetryBlockedPrefixes []string 416 417 for _, rule := range prefixFilters { 418 if rule == "" { 419 return nil, nil, fmt.Errorf("Cannot have empty filter rule in prefix_filter") 420 } 421 switch rule[0] { 422 case '+': 423 telemetryAllowedPrefixes = append(telemetryAllowedPrefixes, rule[1:]) 424 case '-': 425 telemetryBlockedPrefixes = append(telemetryBlockedPrefixes, rule[1:]) 426 default: 427 return nil, nil, fmt.Errorf("Filter rule must begin with either '+' or '-': %q", rule) 428 } 429 } 430 return telemetryAllowedPrefixes, telemetryBlockedPrefixes, nil 431} 432