1// Copyright 2015 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package etcdserver
16
17import (
18	goruntime "runtime"
19	"time"
20
21	"github.com/coreos/etcd/pkg/runtime"
22	"github.com/coreos/etcd/version"
23	"github.com/prometheus/client_golang/prometheus"
24)
25
26var (
27	hasLeader = prometheus.NewGauge(prometheus.GaugeOpts{
28		Namespace: "etcd",
29		Subsystem: "server",
30		Name:      "has_leader",
31		Help:      "Whether or not a leader exists. 1 is existence, 0 is not.",
32	})
33	isLeader = prometheus.NewGauge(prometheus.GaugeOpts{
34		Namespace: "etcd",
35		Subsystem: "server",
36		Name:      "is_leader",
37		Help:      "Whether or not this member is a leader. 1 if is, 0 otherwise.",
38	})
39	leaderChanges = prometheus.NewCounter(prometheus.CounterOpts{
40		Namespace: "etcd",
41		Subsystem: "server",
42		Name:      "leader_changes_seen_total",
43		Help:      "The number of leader changes seen.",
44	})
45	heartbeatSendFailures = prometheus.NewCounter(prometheus.CounterOpts{
46		Namespace: "etcd",
47		Subsystem: "server",
48		Name:      "heartbeat_send_failures_total",
49		Help:      "The total number of leader heartbeat send failures (likely overloaded from slow disk).",
50	})
51	slowApplies = prometheus.NewCounter(prometheus.CounterOpts{
52		Namespace: "etcd",
53		Subsystem: "server",
54		Name:      "slow_apply_total",
55		Help:      "The total number of slow apply requests (likely overloaded from slow disk).",
56	})
57	proposalsCommitted = prometheus.NewGauge(prometheus.GaugeOpts{
58		Namespace: "etcd",
59		Subsystem: "server",
60		Name:      "proposals_committed_total",
61		Help:      "The total number of consensus proposals committed.",
62	})
63	proposalsApplied = prometheus.NewGauge(prometheus.GaugeOpts{
64		Namespace: "etcd",
65		Subsystem: "server",
66		Name:      "proposals_applied_total",
67		Help:      "The total number of consensus proposals applied.",
68	})
69	proposalsPending = prometheus.NewGauge(prometheus.GaugeOpts{
70		Namespace: "etcd",
71		Subsystem: "server",
72		Name:      "proposals_pending",
73		Help:      "The current number of pending proposals to commit.",
74	})
75	proposalsFailed = prometheus.NewCounter(prometheus.CounterOpts{
76		Namespace: "etcd",
77		Subsystem: "server",
78		Name:      "proposals_failed_total",
79		Help:      "The total number of failed proposals seen.",
80	})
81	leaseExpired = prometheus.NewCounter(prometheus.CounterOpts{
82		Namespace: "etcd_debugging",
83		Subsystem: "server",
84		Name:      "lease_expired_total",
85		Help:      "The total number of expired leases.",
86	})
87	slowReadIndex = prometheus.NewCounter(prometheus.CounterOpts{
88		Namespace: "etcd",
89		Subsystem: "server",
90		Name:      "slow_read_indexes_total",
91		Help:      "The total number of pending read indexes not in sync with leader's or timed out read index requests.",
92	})
93	readIndexFailed = prometheus.NewCounter(prometheus.CounterOpts{
94		Namespace: "etcd",
95		Subsystem: "server",
96		Name:      "read_indexes_failed_total",
97		Help:      "The total number of failed read indexes seen.",
98	})
99	quotaBackendBytes = prometheus.NewGauge(prometheus.GaugeOpts{
100		Namespace: "etcd",
101		Subsystem: "server",
102		Name:      "quota_backend_bytes",
103		Help:      "Current backend storage quota size in bytes.",
104	})
105	currentVersion = prometheus.NewGaugeVec(prometheus.GaugeOpts{
106		Namespace: "etcd",
107		Subsystem: "server",
108		Name:      "version",
109		Help:      "Which version is running. 1 for 'server_version' label with current version.",
110	},
111		[]string{"server_version"})
112	currentGoVersion = prometheus.NewGaugeVec(prometheus.GaugeOpts{
113		Namespace: "etcd",
114		Subsystem: "server",
115		Name:      "go_version",
116		Help:      "Which Go version server is running with. 1 for 'server_go_version' label with current version.",
117	},
118		[]string{"server_go_version"})
119	serverID = prometheus.NewGaugeVec(prometheus.GaugeOpts{
120		Namespace: "etcd",
121		Subsystem: "server",
122		Name:      "id",
123		Help:      "Server or member ID in hexadecimal format. 1 for 'server_id' label with current ID.",
124	},
125		[]string{"server_id"})
126)
127
128func init() {
129	prometheus.MustRegister(hasLeader)
130	prometheus.MustRegister(isLeader)
131	prometheus.MustRegister(leaderChanges)
132	prometheus.MustRegister(heartbeatSendFailures)
133	prometheus.MustRegister(slowApplies)
134	prometheus.MustRegister(proposalsCommitted)
135	prometheus.MustRegister(proposalsApplied)
136	prometheus.MustRegister(proposalsPending)
137	prometheus.MustRegister(proposalsFailed)
138	prometheus.MustRegister(leaseExpired)
139	prometheus.MustRegister(slowReadIndex)
140	prometheus.MustRegister(readIndexFailed)
141	prometheus.MustRegister(quotaBackendBytes)
142	prometheus.MustRegister(currentVersion)
143	prometheus.MustRegister(currentGoVersion)
144	prometheus.MustRegister(serverID)
145
146	currentVersion.With(prometheus.Labels{
147		"server_version": version.Version,
148	}).Set(1)
149	currentGoVersion.With(prometheus.Labels{
150		"server_go_version": goruntime.Version(),
151	}).Set(1)
152}
153
154func monitorFileDescriptor(done <-chan struct{}) {
155	ticker := time.NewTicker(5 * time.Second)
156	defer ticker.Stop()
157	for {
158		used, err := runtime.FDUsage()
159		if err != nil {
160			plog.Errorf("cannot monitor file descriptor usage (%v)", err)
161			return
162		}
163		limit, err := runtime.FDLimit()
164		if err != nil {
165			plog.Errorf("cannot monitor file descriptor usage (%v)", err)
166			return
167		}
168		if used >= limit/5*4 {
169			plog.Warningf("80%% of the file descriptor limit is used [used = %d, limit = %d]", used, limit)
170		}
171		select {
172		case <-ticker.C:
173		case <-done:
174			return
175		}
176	}
177}
178