1package configutil
2
3import (
4	"context"
5	"errors"
6	"fmt"
7	"time"
8
9	"github.com/hashicorp/vault/sdk/helper/parseutil"
10
11	monitoring "cloud.google.com/go/monitoring/apiv3"
12	"github.com/armon/go-metrics"
13	"github.com/armon/go-metrics/circonus"
14	"github.com/armon/go-metrics/datadog"
15	"github.com/armon/go-metrics/prometheus"
16	stackdriver "github.com/google/go-metrics-stackdriver"
17	stackdrivervault "github.com/google/go-metrics-stackdriver/vault"
18	"github.com/hashicorp/go-multierror"
19	"github.com/hashicorp/hcl"
20	"github.com/hashicorp/hcl/hcl/ast"
21	"github.com/hashicorp/vault/helper/metricsutil"
22	"github.com/mitchellh/cli"
23	"google.golang.org/api/option"
24)
25
26const (
27	PrometheusDefaultRetentionTime    = 24 * time.Hour
28	UsageGaugeDefaultPeriod           = 10 * time.Minute
29	MaximumGaugeCardinalityDefault    = 500
30	LeaseMetricsEpsilonDefault        = time.Hour
31	NumLeaseMetricsTimeBucketsDefault = 168
32)
33
34// Telemetry is the telemetry configuration for the server
35type Telemetry struct {
36	FoundKeys    []string     `hcl:",decodedFields"`
37	UnusedKeys   UnusedKeyMap `hcl:",unusedKeyPositions"`
38	StatsiteAddr string       `hcl:"statsite_address"`
39	StatsdAddr   string       `hcl:"statsd_address"`
40
41	DisableHostname     bool   `hcl:"disable_hostname"`
42	EnableHostnameLabel bool   `hcl:"enable_hostname_label"`
43	MetricsPrefix       string `hcl:"metrics_prefix"`
44	UsageGaugePeriod    time.Duration
45	UsageGaugePeriodRaw interface{} `hcl:"usage_gauge_period,alias:UsageGaugePeriod"`
46
47	MaximumGaugeCardinality int `hcl:"maximum_gauge_cardinality"`
48
49	// Circonus: see https://github.com/circonus-labs/circonus-gometrics
50	// for more details on the various configuration options.
51	// Valid configuration combinations:
52	//    - CirconusAPIToken
53	//      metric management enabled (search for existing check or create a new one)
54	//    - CirconusSubmissionUrl
55	//      metric management disabled (use check with specified submission_url,
56	//      broker must be using a public SSL certificate)
57	//    - CirconusAPIToken + CirconusCheckSubmissionURL
58	//      metric management enabled (use check with specified submission_url)
59	//    - CirconusAPIToken + CirconusCheckID
60	//      metric management enabled (use check with specified id)
61
62	// CirconusAPIToken is a valid API Token used to create/manage check. If provided,
63	// metric management is enabled.
64	// Default: none
65	CirconusAPIToken string `hcl:"circonus_api_token"`
66	// CirconusAPIApp is an app name associated with API token.
67	// Default: "consul"
68	CirconusAPIApp string `hcl:"circonus_api_app"`
69	// CirconusAPIURL is the base URL to use for contacting the Circonus API.
70	// Default: "https://api.circonus.com/v2"
71	CirconusAPIURL string `hcl:"circonus_api_url"`
72	// CirconusSubmissionInterval is the interval at which metrics are submitted to Circonus.
73	// Default: 10s
74	CirconusSubmissionInterval string `hcl:"circonus_submission_interval"`
75	// CirconusCheckSubmissionURL is the check.config.submission_url field from a
76	// previously created HTTPTRAP check.
77	// Default: none
78	CirconusCheckSubmissionURL string `hcl:"circonus_submission_url"`
79	// CirconusCheckID is the check id (not check bundle id) from a previously created
80	// HTTPTRAP check. The numeric portion of the check._cid field.
81	// Default: none
82	CirconusCheckID string `hcl:"circonus_check_id"`
83	// CirconusCheckForceMetricActivation will force enabling metrics, as they are encountered,
84	// if the metric already exists and is NOT active. If check management is enabled, the default
85	// behavior is to add new metrics as they are encountered. If the metric already exists in the
86	// check, it will *NOT* be activated. This setting overrides that behavior.
87	// Default: "false"
88	CirconusCheckForceMetricActivation string `hcl:"circonus_check_force_metric_activation"`
89	// CirconusCheckInstanceID serves to uniquely identify the metrics coming from this "instance".
90	// It can be used to maintain metric continuity with transient or ephemeral instances as
91	// they move around within an infrastructure.
92	// Default: hostname:app
93	CirconusCheckInstanceID string `hcl:"circonus_check_instance_id"`
94	// CirconusCheckSearchTag is a special tag which, when coupled with the instance id, helps to
95	// narrow down the search results when neither a Submission URL or Check ID is provided.
96	// Default: service:app (e.g. service:consul)
97	CirconusCheckSearchTag string `hcl:"circonus_check_search_tag"`
98	// CirconusCheckTags is a comma separated list of tags to apply to the check. Note that
99	// the value of CirconusCheckSearchTag will always be added to the check.
100	// Default: none
101	CirconusCheckTags string `hcl:"circonus_check_tags"`
102	// CirconusCheckDisplayName is the name for the check which will be displayed in the Circonus UI.
103	// Default: value of CirconusCheckInstanceID
104	CirconusCheckDisplayName string `hcl:"circonus_check_display_name"`
105	// CirconusBrokerID is an explicit broker to use when creating a new check. The numeric portion
106	// of broker._cid. If metric management is enabled and neither a Submission URL nor Check ID
107	// is provided, an attempt will be made to search for an existing check using Instance ID and
108	// Search Tag. If one is not found, a new HTTPTRAP check will be created.
109	// Default: use Select Tag if provided, otherwise, a random Enterprise Broker associated
110	// with the specified API token or the default Circonus Broker.
111	// Default: none
112	CirconusBrokerID string `hcl:"circonus_broker_id"`
113	// CirconusBrokerSelectTag is a special tag which will be used to select a broker when
114	// a Broker ID is not provided. The best use of this is to as a hint for which broker
115	// should be used based on *where* this particular instance is running.
116	// (e.g. a specific geo location or datacenter, dc:sfo)
117	// Default: none
118	CirconusBrokerSelectTag string `hcl:"circonus_broker_select_tag"`
119
120	// Dogstats:
121	// DogStatsdAddr is the address of a dogstatsd instance. If provided,
122	// metrics will be sent to that instance
123	DogStatsDAddr string `hcl:"dogstatsd_addr"`
124
125	// DogStatsdTags are the global tags that should be sent with each packet to dogstatsd
126	// It is a list of strings, where each string looks like "my_tag_name:my_tag_value"
127	DogStatsDTags []string `hcl:"dogstatsd_tags"`
128
129	// Prometheus:
130	// PrometheusRetentionTime is the retention time for prometheus metrics if greater than 0.
131	// Default: 24h
132	PrometheusRetentionTime    time.Duration `hcl:"-"`
133	PrometheusRetentionTimeRaw interface{}   `hcl:"prometheus_retention_time"`
134
135	// Stackdriver:
136	// StackdriverProjectID is the project to publish stackdriver metrics to.
137	StackdriverProjectID string `hcl:"stackdriver_project_id"`
138	// StackdriverLocation is the GCP or AWS region of the monitored resource.
139	StackdriverLocation string `hcl:"stackdriver_location"`
140	// StackdriverNamespace is the namespace identifier, such as a cluster name.
141	StackdriverNamespace string `hcl:"stackdriver_namespace"`
142	// StackdriverDebugLogs will write additional stackdriver related debug logs to stderr.
143	StackdriverDebugLogs bool `hcl:"stackdriver_debug_logs"`
144
145	// How often metrics for lease expiry will be aggregated
146	LeaseMetricsEpsilon    time.Duration
147	LeaseMetricsEpsilonRaw interface{} `hcl:"lease_metrics_epsilon"`
148
149	// Number of buckets by time that will be used in lease aggregation
150	NumLeaseMetricsTimeBuckets int `hcl:"num_lease_metrics_buckets"`
151
152	// Whether or not telemetry should add labels for namespaces
153	LeaseMetricsNameSpaceLabels bool `hcl:"add_lease_metrics_namespace_labels"`
154
155	// FilterDefault is the default for whether to allow a metric that's not
156	// covered by the prefix filter.
157	FilterDefault *bool `hcl:"filter_default"`
158
159	// PrefixFilter is a list of filter rules to apply for allowing
160	// or blocking metrics by prefix.
161	PrefixFilter []string `hcl:"prefix_filter"`
162}
163
164func (t *Telemetry) Validate(source string) []ConfigError {
165	return ValidateUnusedFields(t.UnusedKeys, source)
166}
167
168func (t *Telemetry) GoString() string {
169	return fmt.Sprintf("*%#v", *t)
170}
171
172func parseTelemetry(result *SharedConfig, list *ast.ObjectList) error {
173	if len(list.Items) > 1 {
174		return fmt.Errorf("only one 'telemetry' block is permitted")
175	}
176
177	// Get our one item
178	item := list.Items[0]
179
180	if result.Telemetry == nil {
181		result.Telemetry = &Telemetry{}
182	}
183
184	if err := hcl.DecodeObject(&result.Telemetry, item.Val); err != nil {
185		return multierror.Prefix(err, "telemetry:")
186	}
187
188	if result.Telemetry.PrometheusRetentionTimeRaw != nil {
189		var err error
190		if result.Telemetry.PrometheusRetentionTime, err = parseutil.ParseDurationSecond(result.Telemetry.PrometheusRetentionTimeRaw); err != nil {
191			return err
192		}
193		result.Telemetry.PrometheusRetentionTimeRaw = nil
194	} else {
195		result.Telemetry.PrometheusRetentionTime = PrometheusDefaultRetentionTime
196	}
197
198	if result.Telemetry.UsageGaugePeriodRaw != nil {
199		if result.Telemetry.UsageGaugePeriodRaw == "none" {
200			result.Telemetry.UsageGaugePeriod = 0
201		} else {
202			var err error
203			if result.Telemetry.UsageGaugePeriod, err = parseutil.ParseDurationSecond(result.Telemetry.UsageGaugePeriodRaw); err != nil {
204				return err
205			}
206			result.Telemetry.UsageGaugePeriodRaw = nil
207		}
208	} else {
209		result.Telemetry.UsageGaugePeriod = UsageGaugeDefaultPeriod
210	}
211
212	if result.Telemetry.MaximumGaugeCardinality == 0 {
213		result.Telemetry.MaximumGaugeCardinality = MaximumGaugeCardinalityDefault
214	}
215
216	if result.Telemetry.LeaseMetricsEpsilonRaw != nil {
217		if result.Telemetry.LeaseMetricsEpsilonRaw == "none" {
218			result.Telemetry.LeaseMetricsEpsilonRaw = 0
219		} else {
220			var err error
221			if result.Telemetry.LeaseMetricsEpsilon, err = parseutil.ParseDurationSecond(result.Telemetry.LeaseMetricsEpsilonRaw); err != nil {
222				return err
223			}
224			result.Telemetry.LeaseMetricsEpsilonRaw = nil
225		}
226	} else {
227		result.Telemetry.LeaseMetricsEpsilon = LeaseMetricsEpsilonDefault
228	}
229
230	if result.Telemetry.NumLeaseMetricsTimeBuckets == 0 {
231		result.Telemetry.NumLeaseMetricsTimeBuckets = NumLeaseMetricsTimeBucketsDefault
232	}
233
234	return nil
235}
236
237type SetupTelemetryOpts struct {
238	Config      *Telemetry
239	Ui          cli.Ui
240	ServiceName string
241	DisplayName string
242	UserAgent   string
243	ClusterName string
244}
245
246// SetupTelemetry is used to setup the telemetry sub-systems and returns the
247// in-memory sink to be used in http configuration
248func SetupTelemetry(opts *SetupTelemetryOpts) (*metrics.InmemSink, *metricsutil.ClusterMetricSink, bool, error) {
249	if opts == nil {
250		return nil, nil, false, errors.New("nil opts passed into SetupTelemetry")
251	}
252
253	if opts.Config == nil {
254		opts.Config = &Telemetry{}
255	}
256
257	/* Setup telemetry
258	Aggregate on 10 second intervals for 1 minute. Expose the
259	metrics over stderr when there is a SIGUSR1 received.
260	*/
261	inm := metrics.NewInmemSink(10*time.Second, time.Minute)
262	metrics.DefaultInmemSignal(inm)
263
264	if opts.Config.MetricsPrefix != "" {
265		opts.ServiceName = opts.Config.MetricsPrefix
266	}
267
268	metricsConf := metrics.DefaultConfig(opts.ServiceName)
269	metricsConf.EnableHostname = !opts.Config.DisableHostname
270	metricsConf.EnableHostnameLabel = opts.Config.EnableHostnameLabel
271	if opts.Config.FilterDefault != nil {
272		metricsConf.FilterDefault = *opts.Config.FilterDefault
273	}
274
275	// Configure the statsite sink
276	var fanout metrics.FanoutSink
277	var prometheusEnabled bool
278
279	// Configure the Prometheus sink
280	if opts.Config.PrometheusRetentionTime != 0 {
281		prometheusEnabled = true
282		prometheusOpts := prometheus.PrometheusOpts{
283			Expiration: opts.Config.PrometheusRetentionTime,
284		}
285
286		sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts)
287		if err != nil {
288			return nil, nil, false, err
289		}
290		fanout = append(fanout, sink)
291	}
292
293	if opts.Config.StatsiteAddr != "" {
294		sink, err := metrics.NewStatsiteSink(opts.Config.StatsiteAddr)
295		if err != nil {
296			return nil, nil, false, err
297		}
298		fanout = append(fanout, sink)
299	}
300
301	// Configure the statsd sink
302	if opts.Config.StatsdAddr != "" {
303		sink, err := metrics.NewStatsdSink(opts.Config.StatsdAddr)
304		if err != nil {
305			return nil, nil, false, err
306		}
307		fanout = append(fanout, sink)
308	}
309
310	// Configure the Circonus sink
311	if opts.Config.CirconusAPIToken != "" || opts.Config.CirconusCheckSubmissionURL != "" {
312		cfg := &circonus.Config{}
313		cfg.Interval = opts.Config.CirconusSubmissionInterval
314		cfg.CheckManager.API.TokenKey = opts.Config.CirconusAPIToken
315		cfg.CheckManager.API.TokenApp = opts.Config.CirconusAPIApp
316		cfg.CheckManager.API.URL = opts.Config.CirconusAPIURL
317		cfg.CheckManager.Check.SubmissionURL = opts.Config.CirconusCheckSubmissionURL
318		cfg.CheckManager.Check.ID = opts.Config.CirconusCheckID
319		cfg.CheckManager.Check.ForceMetricActivation = opts.Config.CirconusCheckForceMetricActivation
320		cfg.CheckManager.Check.InstanceID = opts.Config.CirconusCheckInstanceID
321		cfg.CheckManager.Check.SearchTag = opts.Config.CirconusCheckSearchTag
322		cfg.CheckManager.Check.DisplayName = opts.Config.CirconusCheckDisplayName
323		cfg.CheckManager.Check.Tags = opts.Config.CirconusCheckTags
324		cfg.CheckManager.Broker.ID = opts.Config.CirconusBrokerID
325		cfg.CheckManager.Broker.SelectTag = opts.Config.CirconusBrokerSelectTag
326
327		if cfg.CheckManager.API.TokenApp == "" {
328			cfg.CheckManager.API.TokenApp = opts.ServiceName
329		}
330
331		if cfg.CheckManager.Check.DisplayName == "" {
332			cfg.CheckManager.Check.DisplayName = opts.DisplayName
333		}
334
335		if cfg.CheckManager.Check.SearchTag == "" {
336			cfg.CheckManager.Check.SearchTag = fmt.Sprintf("service:%s", opts.ServiceName)
337		}
338
339		sink, err := circonus.NewCirconusSink(cfg)
340		if err != nil {
341			return nil, nil, false, err
342		}
343		sink.Start()
344		fanout = append(fanout, sink)
345	}
346
347	if opts.Config.DogStatsDAddr != "" {
348		var tags []string
349
350		if opts.Config.DogStatsDTags != nil {
351			tags = opts.Config.DogStatsDTags
352		}
353
354		sink, err := datadog.NewDogStatsdSink(opts.Config.DogStatsDAddr, metricsConf.HostName)
355		if err != nil {
356			return nil, nil, false, fmt.Errorf("failed to start DogStatsD sink: %w", err)
357		}
358		sink.SetTags(tags)
359		fanout = append(fanout, sink)
360	}
361
362	// Configure the stackdriver sink
363	if opts.Config.StackdriverProjectID != "" {
364		client, err := monitoring.NewMetricClient(context.Background(), option.WithUserAgent(opts.UserAgent))
365		if err != nil {
366			return nil, nil, false, fmt.Errorf("Failed to create stackdriver client: %v", err)
367		}
368		sink := stackdriver.NewSink(client, &stackdriver.Config{
369			LabelExtractor: stackdrivervault.Extractor,
370			Bucketer:       stackdrivervault.Bucketer,
371			ProjectID:      opts.Config.StackdriverProjectID,
372			Location:       opts.Config.StackdriverLocation,
373			Namespace:      opts.Config.StackdriverNamespace,
374			DebugLogs:      opts.Config.StackdriverDebugLogs,
375		})
376		fanout = append(fanout, sink)
377	}
378
379	// Initialize the global sink
380	if len(fanout) > 1 {
381		// Hostname enabled will create poor quality metrics name for prometheus
382		if !opts.Config.DisableHostname {
383			opts.Ui.Warn("telemetry.disable_hostname has been set to false. Recommended setting is true for Prometheus to avoid poorly named metrics.")
384		}
385	} else {
386		metricsConf.EnableHostname = false
387	}
388	fanout = append(fanout, inm)
389	globalMetrics, err := metrics.NewGlobal(metricsConf, fanout)
390	if err != nil {
391		return nil, nil, false, err
392	}
393
394	// Intialize a wrapper around the global sink; this will be passed to Core
395	// and to any backend.
396	wrapper := metricsutil.NewClusterMetricSink(opts.ClusterName, globalMetrics)
397	wrapper.MaxGaugeCardinality = opts.Config.MaximumGaugeCardinality
398	wrapper.GaugeInterval = opts.Config.UsageGaugePeriod
399	wrapper.TelemetryConsts.LeaseMetricsEpsilon = opts.Config.LeaseMetricsEpsilon
400	wrapper.TelemetryConsts.LeaseMetricsNameSpaceLabels = opts.Config.LeaseMetricsNameSpaceLabels
401	wrapper.TelemetryConsts.NumLeaseMetricsTimeBuckets = opts.Config.NumLeaseMetricsTimeBuckets
402
403	// Parse the metric filters
404	telemetryAllowedPrefixes, telemetryBlockedPrefixes, err := parsePrefixFilter(opts.Config.PrefixFilter)
405
406	if err != nil {
407		return nil, nil, false, err
408	}
409
410	metrics.UpdateFilter(telemetryAllowedPrefixes, telemetryBlockedPrefixes)
411	return inm, wrapper, prometheusEnabled, nil
412}
413
414func parsePrefixFilter(prefixFilters []string) ([]string, []string, error) {
415	var telemetryAllowedPrefixes, telemetryBlockedPrefixes []string
416
417	for _, rule := range prefixFilters {
418		if rule == "" {
419			return nil, nil, fmt.Errorf("Cannot have empty filter rule in prefix_filter")
420		}
421		switch rule[0] {
422		case '+':
423			telemetryAllowedPrefixes = append(telemetryAllowedPrefixes, rule[1:])
424		case '-':
425			telemetryBlockedPrefixes = append(telemetryBlockedPrefixes, rule[1:])
426		default:
427			return nil, nil, fmt.Errorf("Filter rule must begin with either '+' or '-': %q", rule)
428		}
429	}
430	return telemetryAllowedPrefixes, telemetryBlockedPrefixes, nil
431}
432