1// Copyright 2015 The etcd Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package etcdserver 16 17import ( 18 goruntime "runtime" 19 "time" 20 21 "github.com/coreos/etcd/pkg/runtime" 22 "github.com/coreos/etcd/version" 23 "github.com/prometheus/client_golang/prometheus" 24) 25 26var ( 27 hasLeader = prometheus.NewGauge(prometheus.GaugeOpts{ 28 Namespace: "etcd", 29 Subsystem: "server", 30 Name: "has_leader", 31 Help: "Whether or not a leader exists. 1 is existence, 0 is not.", 32 }) 33 isLeader = prometheus.NewGauge(prometheus.GaugeOpts{ 34 Namespace: "etcd", 35 Subsystem: "server", 36 Name: "is_leader", 37 Help: "Whether or not this member is a leader. 1 if is, 0 otherwise.", 38 }) 39 leaderChanges = prometheus.NewCounter(prometheus.CounterOpts{ 40 Namespace: "etcd", 41 Subsystem: "server", 42 Name: "leader_changes_seen_total", 43 Help: "The number of leader changes seen.", 44 }) 45 heartbeatSendFailures = prometheus.NewCounter(prometheus.CounterOpts{ 46 Namespace: "etcd", 47 Subsystem: "server", 48 Name: "heartbeat_send_failures_total", 49 Help: "The total number of leader heartbeat send failures (likely overloaded from slow disk).", 50 }) 51 slowApplies = prometheus.NewCounter(prometheus.CounterOpts{ 52 Namespace: "etcd", 53 Subsystem: "server", 54 Name: "slow_apply_total", 55 Help: "The total number of slow apply requests (likely overloaded from slow disk).", 56 }) 57 proposalsCommitted = prometheus.NewGauge(prometheus.GaugeOpts{ 58 Namespace: "etcd", 59 Subsystem: "server", 60 Name: "proposals_committed_total", 61 Help: "The total number of consensus proposals committed.", 62 }) 63 proposalsApplied = prometheus.NewGauge(prometheus.GaugeOpts{ 64 Namespace: "etcd", 65 Subsystem: "server", 66 Name: "proposals_applied_total", 67 Help: "The total number of consensus proposals applied.", 68 }) 69 proposalsPending = prometheus.NewGauge(prometheus.GaugeOpts{ 70 Namespace: "etcd", 71 Subsystem: "server", 72 Name: "proposals_pending", 73 Help: "The current number of pending proposals to commit.", 74 }) 75 proposalsFailed = prometheus.NewCounter(prometheus.CounterOpts{ 76 Namespace: "etcd", 77 Subsystem: "server", 78 Name: "proposals_failed_total", 79 Help: "The total number of failed proposals seen.", 80 }) 81 slowReadIndex = prometheus.NewCounter(prometheus.CounterOpts{ 82 Namespace: "etcd", 83 Subsystem: "server", 84 Name: "slow_read_indexes_total", 85 Help: "The total number of pending read indexes not in sync with leader's or timed out read index requests.", 86 }) 87 readIndexFailed = prometheus.NewCounter(prometheus.CounterOpts{ 88 Namespace: "etcd", 89 Subsystem: "server", 90 Name: "read_indexes_failed_total", 91 Help: "The total number of failed read indexes seen.", 92 }) 93 quotaBackendBytes = prometheus.NewGauge(prometheus.GaugeOpts{ 94 Namespace: "etcd", 95 Subsystem: "server", 96 Name: "quota_backend_bytes", 97 Help: "Current backend storage quota size in bytes.", 98 }) 99 currentVersion = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 100 Namespace: "etcd", 101 Subsystem: "server", 102 Name: "version", 103 Help: "Which version is running. 1 for 'server_version' label with current version.", 104 }, 105 []string{"server_version"}) 106 currentGoVersion = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 107 Namespace: "etcd", 108 Subsystem: "server", 109 Name: "go_version", 110 Help: "Which Go version server is running with. 1 for 'server_go_version' label with current version.", 111 }, 112 []string{"server_go_version"}) 113 serverID = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 114 Namespace: "etcd", 115 Subsystem: "server", 116 Name: "id", 117 Help: "Server or member ID in hexadecimal format. 1 for 'server_id' label with current ID.", 118 }, 119 []string{"server_id"}) 120) 121 122func init() { 123 prometheus.MustRegister(hasLeader) 124 prometheus.MustRegister(isLeader) 125 prometheus.MustRegister(leaderChanges) 126 prometheus.MustRegister(heartbeatSendFailures) 127 prometheus.MustRegister(slowApplies) 128 prometheus.MustRegister(proposalsCommitted) 129 prometheus.MustRegister(proposalsApplied) 130 prometheus.MustRegister(proposalsPending) 131 prometheus.MustRegister(proposalsFailed) 132 prometheus.MustRegister(slowReadIndex) 133 prometheus.MustRegister(readIndexFailed) 134 prometheus.MustRegister(quotaBackendBytes) 135 prometheus.MustRegister(currentVersion) 136 prometheus.MustRegister(currentGoVersion) 137 prometheus.MustRegister(serverID) 138 139 currentVersion.With(prometheus.Labels{ 140 "server_version": version.Version, 141 }).Set(1) 142 currentGoVersion.With(prometheus.Labels{ 143 "server_go_version": goruntime.Version(), 144 }).Set(1) 145} 146 147func monitorFileDescriptor(done <-chan struct{}) { 148 ticker := time.NewTicker(5 * time.Second) 149 defer ticker.Stop() 150 for { 151 used, err := runtime.FDUsage() 152 if err != nil { 153 plog.Errorf("cannot monitor file descriptor usage (%v)", err) 154 return 155 } 156 limit, err := runtime.FDLimit() 157 if err != nil { 158 plog.Errorf("cannot monitor file descriptor usage (%v)", err) 159 return 160 } 161 if used >= limit/5*4 { 162 plog.Warningf("80%% of the file descriptor limit is used [used = %d, limit = %d]", used, limit) 163 } 164 select { 165 case <-ticker.C: 166 case <-done: 167 return 168 } 169 } 170} 171