1 /**
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 package org.apache.hadoop.hdfs.server.datanode.metrics;
19 
20 import static org.apache.hadoop.metrics2.impl.MsInfo.SessionId;
21 
22 import org.apache.hadoop.classification.InterfaceAudience;
23 import org.apache.hadoop.conf.Configuration;
24 import org.apache.hadoop.hdfs.DFSConfigKeys;
25 import org.apache.hadoop.hdfs.DFSUtil;
26 import org.apache.hadoop.metrics2.MetricsSystem;
27 import org.apache.hadoop.metrics2.annotation.Metric;
28 import org.apache.hadoop.metrics2.annotation.Metrics;
29 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
30 import org.apache.hadoop.metrics2.lib.MetricsRegistry;
31 import org.apache.hadoop.metrics2.lib.MutableCounterLong;
32 import org.apache.hadoop.metrics2.lib.MutableQuantiles;
33 import org.apache.hadoop.metrics2.lib.MutableRate;
34 import org.apache.hadoop.metrics2.source.JvmMetrics;
35 
36 /**
37  *
38  * This class is for maintaining  the various DataNode statistics
39  * and publishing them through the metrics interfaces.
40  * This also registers the JMX MBean for RPC.
41  * <p>
42  * This class has a number of metrics variables that are publicly accessible;
43  * these variables (objects) have methods to update their values;
44  *  for example:
45  *  <p> {@link #blocksRead}.inc()
46  *
47  */
48 @InterfaceAudience.Private
49 @Metrics(about="DataNode metrics", context="dfs")
50 public class DataNodeMetrics {
51 
52   @Metric MutableCounterLong bytesWritten;
53   @Metric("Milliseconds spent writing")
54   MutableCounterLong totalWriteTime;
55   @Metric MutableCounterLong bytesRead;
56   @Metric("Milliseconds spent reading")
57   MutableCounterLong totalReadTime;
58   @Metric MutableCounterLong blocksWritten;
59   @Metric MutableCounterLong blocksRead;
60   @Metric MutableCounterLong blocksReplicated;
61   @Metric MutableCounterLong blocksRemoved;
62   @Metric MutableCounterLong blocksVerified;
63   @Metric MutableCounterLong blockVerificationFailures;
64   @Metric MutableCounterLong blocksCached;
65   @Metric MutableCounterLong blocksUncached;
66   @Metric MutableCounterLong readsFromLocalClient;
67   @Metric MutableCounterLong readsFromRemoteClient;
68   @Metric MutableCounterLong writesFromLocalClient;
69   @Metric MutableCounterLong writesFromRemoteClient;
70   @Metric MutableCounterLong blocksGetLocalPathInfo;
71   @Metric("Bytes read by remote client")
72   MutableCounterLong remoteBytesRead;
73   @Metric("Bytes written by remote client")
74   MutableCounterLong remoteBytesWritten;
75 
76   // RamDisk metrics on read/write
77   @Metric MutableCounterLong ramDiskBlocksWrite;
78   @Metric MutableCounterLong ramDiskBlocksWriteFallback;
79   @Metric MutableCounterLong ramDiskBytesWrite;
80   @Metric MutableCounterLong ramDiskBlocksReadHits;
81 
82   // RamDisk metrics on eviction
83   @Metric MutableCounterLong ramDiskBlocksEvicted;
84   @Metric MutableCounterLong ramDiskBlocksEvictedWithoutRead;
85   @Metric MutableRate        ramDiskBlocksEvictionWindowMs;
86   final MutableQuantiles[]   ramDiskBlocksEvictionWindowMsQuantiles;
87 
88 
89   // RamDisk metrics on lazy persist
90   @Metric MutableCounterLong ramDiskBlocksLazyPersisted;
91   @Metric MutableCounterLong ramDiskBlocksDeletedBeforeLazyPersisted;
92   @Metric MutableCounterLong ramDiskBytesLazyPersisted;
93   @Metric MutableRate        ramDiskBlocksLazyPersistWindowMs;
94   final MutableQuantiles[]   ramDiskBlocksLazyPersistWindowMsQuantiles;
95 
96   @Metric MutableCounterLong fsyncCount;
97 
98   @Metric MutableCounterLong volumeFailures;
99 
100   @Metric("Count of network errors on the datanode")
101   MutableCounterLong datanodeNetworkErrors;
102 
103   @Metric MutableRate readBlockOp;
104   @Metric MutableRate writeBlockOp;
105   @Metric MutableRate blockChecksumOp;
106   @Metric MutableRate copyBlockOp;
107   @Metric MutableRate replaceBlockOp;
108   @Metric MutableRate heartbeats;
109   @Metric MutableRate blockReports;
110   @Metric MutableRate incrementalBlockReports;
111   @Metric MutableRate cacheReports;
112   @Metric MutableRate packetAckRoundTripTimeNanos;
113   final MutableQuantiles[] packetAckRoundTripTimeNanosQuantiles;
114 
115   @Metric MutableRate flushNanos;
116   final MutableQuantiles[] flushNanosQuantiles;
117 
118   @Metric MutableRate fsyncNanos;
119   final MutableQuantiles[] fsyncNanosQuantiles;
120 
121   @Metric MutableRate sendDataPacketBlockedOnNetworkNanos;
122   final MutableQuantiles[] sendDataPacketBlockedOnNetworkNanosQuantiles;
123   @Metric MutableRate sendDataPacketTransferNanos;
124   final MutableQuantiles[] sendDataPacketTransferNanosQuantiles;
125 
126   final MetricsRegistry registry = new MetricsRegistry("datanode");
127   final String name;
128   JvmMetrics jvmMetrics = null;
129 
DataNodeMetrics(String name, String sessionId, int[] intervals, final JvmMetrics jvmMetrics)130   public DataNodeMetrics(String name, String sessionId, int[] intervals,
131       final JvmMetrics jvmMetrics) {
132     this.name = name;
133     this.jvmMetrics = jvmMetrics;
134     registry.tag(SessionId, sessionId);
135 
136     final int len = intervals.length;
137     packetAckRoundTripTimeNanosQuantiles = new MutableQuantiles[len];
138     flushNanosQuantiles = new MutableQuantiles[len];
139     fsyncNanosQuantiles = new MutableQuantiles[len];
140     sendDataPacketBlockedOnNetworkNanosQuantiles = new MutableQuantiles[len];
141     sendDataPacketTransferNanosQuantiles = new MutableQuantiles[len];
142     ramDiskBlocksEvictionWindowMsQuantiles = new MutableQuantiles[len];
143     ramDiskBlocksLazyPersistWindowMsQuantiles = new MutableQuantiles[len];
144 
145     for (int i = 0; i < len; i++) {
146       int interval = intervals[i];
147       packetAckRoundTripTimeNanosQuantiles[i] = registry.newQuantiles(
148           "packetAckRoundTripTimeNanos" + interval + "s",
149           "Packet Ack RTT in ns", "ops", "latency", interval);
150       flushNanosQuantiles[i] = registry.newQuantiles(
151           "flushNanos" + interval + "s",
152           "Disk flush latency in ns", "ops", "latency", interval);
153       fsyncNanosQuantiles[i] = registry.newQuantiles(
154           "fsyncNanos" + interval + "s", "Disk fsync latency in ns",
155           "ops", "latency", interval);
156       sendDataPacketBlockedOnNetworkNanosQuantiles[i] = registry.newQuantiles(
157           "sendDataPacketBlockedOnNetworkNanos" + interval + "s",
158           "Time blocked on network while sending a packet in ns",
159           "ops", "latency", interval);
160       sendDataPacketTransferNanosQuantiles[i] = registry.newQuantiles(
161           "sendDataPacketTransferNanos" + interval + "s",
162           "Time reading from disk and writing to network while sending " +
163           "a packet in ns", "ops", "latency", interval);
164       ramDiskBlocksEvictionWindowMsQuantiles[i] = registry.newQuantiles(
165           "ramDiskBlocksEvictionWindows" + interval + "s",
166           "Time between the RamDisk block write and eviction in ms",
167           "ops", "latency", interval);
168       ramDiskBlocksLazyPersistWindowMsQuantiles[i] = registry.newQuantiles(
169           "ramDiskBlocksLazyPersistWindows" + interval + "s",
170           "Time between the RamDisk block write and disk persist in ms",
171           "ops", "latency", interval);
172     }
173   }
174 
create(Configuration conf, String dnName)175   public static DataNodeMetrics create(Configuration conf, String dnName) {
176     String sessionId = conf.get(DFSConfigKeys.DFS_METRICS_SESSION_ID_KEY);
177     MetricsSystem ms = DefaultMetricsSystem.instance();
178     JvmMetrics jm = JvmMetrics.create("DataNode", sessionId, ms);
179     String name = "DataNodeActivity-"+ (dnName.isEmpty()
180         ? "UndefinedDataNodeName"+ DFSUtil.getRandom().nextInt()
181             : dnName.replace(':', '-'));
182 
183     // Percentile measurement is off by default, by watching no intervals
184     int[] intervals =
185         conf.getInts(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY);
186 
187     return ms.register(name, null, new DataNodeMetrics(name, sessionId,
188         intervals, jm));
189   }
190 
name()191   public String name() { return name; }
192 
getJvmMetrics()193   public JvmMetrics getJvmMetrics() {
194     return jvmMetrics;
195   }
196 
addHeartbeat(long latency)197   public void addHeartbeat(long latency) {
198     heartbeats.add(latency);
199   }
200 
addBlockReport(long latency)201   public void addBlockReport(long latency) {
202     blockReports.add(latency);
203   }
204 
addIncrementalBlockReport(long latency)205   public void addIncrementalBlockReport(long latency) {
206     incrementalBlockReports.add(latency);
207   }
208 
addCacheReport(long latency)209   public void addCacheReport(long latency) {
210     cacheReports.add(latency);
211   }
212 
incrBlocksReplicated(int delta)213   public void incrBlocksReplicated(int delta) {
214     blocksReplicated.incr(delta);
215   }
216 
incrBlocksWritten()217   public void incrBlocksWritten() {
218     blocksWritten.incr();
219   }
220 
incrBlocksRemoved(int delta)221   public void incrBlocksRemoved(int delta) {
222     blocksRemoved.incr(delta);
223   }
224 
incrBytesWritten(int delta)225   public void incrBytesWritten(int delta) {
226     bytesWritten.incr(delta);
227   }
228 
incrBlockVerificationFailures()229   public void incrBlockVerificationFailures() {
230     blockVerificationFailures.incr();
231   }
232 
incrBlocksVerified()233   public void incrBlocksVerified() {
234     blocksVerified.incr();
235   }
236 
237 
incrBlocksCached(int delta)238   public void incrBlocksCached(int delta) {
239     blocksCached.incr(delta);
240   }
241 
incrBlocksUncached(int delta)242   public void incrBlocksUncached(int delta) {
243     blocksUncached.incr(delta);
244   }
245 
addReadBlockOp(long latency)246   public void addReadBlockOp(long latency) {
247     readBlockOp.add(latency);
248   }
249 
addWriteBlockOp(long latency)250   public void addWriteBlockOp(long latency) {
251     writeBlockOp.add(latency);
252   }
253 
addReplaceBlockOp(long latency)254   public void addReplaceBlockOp(long latency) {
255     replaceBlockOp.add(latency);
256   }
257 
addCopyBlockOp(long latency)258   public void addCopyBlockOp(long latency) {
259     copyBlockOp.add(latency);
260   }
261 
addBlockChecksumOp(long latency)262   public void addBlockChecksumOp(long latency) {
263     blockChecksumOp.add(latency);
264   }
265 
incrBytesRead(int delta)266   public void incrBytesRead(int delta) {
267     bytesRead.incr(delta);
268   }
269 
incrBlocksRead()270   public void incrBlocksRead() {
271     blocksRead.incr();
272   }
273 
incrFsyncCount()274   public void incrFsyncCount() {
275     fsyncCount.incr();
276   }
277 
incrTotalWriteTime(long timeTaken)278   public void incrTotalWriteTime(long timeTaken) {
279     totalWriteTime.incr(timeTaken);
280   }
281 
incrTotalReadTime(long timeTaken)282   public void incrTotalReadTime(long timeTaken) {
283     totalReadTime.incr(timeTaken);
284   }
285 
286 
addPacketAckRoundTripTimeNanos(long latencyNanos)287   public void addPacketAckRoundTripTimeNanos(long latencyNanos) {
288     packetAckRoundTripTimeNanos.add(latencyNanos);
289     for (MutableQuantiles q : packetAckRoundTripTimeNanosQuantiles) {
290       q.add(latencyNanos);
291     }
292   }
293 
addFlushNanos(long latencyNanos)294   public void addFlushNanos(long latencyNanos) {
295     flushNanos.add(latencyNanos);
296     for (MutableQuantiles q : flushNanosQuantiles) {
297       q.add(latencyNanos);
298     }
299   }
300 
addFsyncNanos(long latencyNanos)301   public void addFsyncNanos(long latencyNanos) {
302     fsyncNanos.add(latencyNanos);
303     for (MutableQuantiles q : fsyncNanosQuantiles) {
304       q.add(latencyNanos);
305     }
306   }
307 
shutdown()308   public void shutdown() {
309     DefaultMetricsSystem.shutdown();
310   }
311 
incrWritesFromClient(boolean local, long size)312   public void incrWritesFromClient(boolean local, long size) {
313     if(local) {
314       writesFromLocalClient.incr();
315     } else {
316       writesFromRemoteClient.incr();
317       remoteBytesWritten.incr(size);
318     }
319   }
320 
incrReadsFromClient(boolean local, long size)321   public void incrReadsFromClient(boolean local, long size) {
322 
323     if (local) {
324       readsFromLocalClient.incr();
325     } else {
326       readsFromRemoteClient.incr();
327       remoteBytesRead.incr(size);
328     }
329   }
330 
incrVolumeFailures()331   public void incrVolumeFailures() {
332     volumeFailures.incr();
333   }
334 
incrDatanodeNetworkErrors()335   public void incrDatanodeNetworkErrors() {
336     datanodeNetworkErrors.incr();
337   }
338 
339   /** Increment for getBlockLocalPathInfo calls */
incrBlocksGetLocalPathInfo()340   public void incrBlocksGetLocalPathInfo() {
341     blocksGetLocalPathInfo.incr();
342   }
343 
addSendDataPacketBlockedOnNetworkNanos(long latencyNanos)344   public void addSendDataPacketBlockedOnNetworkNanos(long latencyNanos) {
345     sendDataPacketBlockedOnNetworkNanos.add(latencyNanos);
346     for (MutableQuantiles q : sendDataPacketBlockedOnNetworkNanosQuantiles) {
347       q.add(latencyNanos);
348     }
349   }
350 
addSendDataPacketTransferNanos(long latencyNanos)351   public void addSendDataPacketTransferNanos(long latencyNanos) {
352     sendDataPacketTransferNanos.add(latencyNanos);
353     for (MutableQuantiles q : sendDataPacketTransferNanosQuantiles) {
354       q.add(latencyNanos);
355     }
356   }
357 
incrRamDiskBlocksWrite()358   public void incrRamDiskBlocksWrite() {
359     ramDiskBlocksWrite.incr();
360   }
361 
incrRamDiskBlocksWriteFallback()362   public void incrRamDiskBlocksWriteFallback() {
363     ramDiskBlocksWriteFallback.incr();
364   }
365 
addRamDiskBytesWrite(long bytes)366   public void addRamDiskBytesWrite(long bytes) {
367     ramDiskBytesWrite.incr(bytes);
368   }
369 
incrRamDiskBlocksReadHits()370   public void incrRamDiskBlocksReadHits() {
371     ramDiskBlocksReadHits.incr();
372   }
373 
incrRamDiskBlocksEvicted()374   public void incrRamDiskBlocksEvicted() {
375     ramDiskBlocksEvicted.incr();
376   }
377 
incrRamDiskBlocksEvictedWithoutRead()378   public void incrRamDiskBlocksEvictedWithoutRead() {
379     ramDiskBlocksEvictedWithoutRead.incr();
380   }
381 
addRamDiskBlocksEvictionWindowMs(long latencyMs)382   public void addRamDiskBlocksEvictionWindowMs(long latencyMs) {
383     ramDiskBlocksEvictionWindowMs.add(latencyMs);
384     for (MutableQuantiles q : ramDiskBlocksEvictionWindowMsQuantiles) {
385       q.add(latencyMs);
386     }
387   }
388 
incrRamDiskBlocksLazyPersisted()389   public void incrRamDiskBlocksLazyPersisted() {
390     ramDiskBlocksLazyPersisted.incr();
391   }
392 
incrRamDiskBlocksDeletedBeforeLazyPersisted()393   public void incrRamDiskBlocksDeletedBeforeLazyPersisted() {
394     ramDiskBlocksDeletedBeforeLazyPersisted.incr();
395   }
396 
incrRamDiskBytesLazyPersisted(long bytes)397   public void incrRamDiskBytesLazyPersisted(long bytes) {
398     ramDiskBytesLazyPersisted.incr(bytes);
399   }
400 
addRamDiskBlocksLazyPersistWindowMs(long latencyMs)401   public void addRamDiskBlocksLazyPersistWindowMs(long latencyMs) {
402     ramDiskBlocksLazyPersistWindowMs.add(latencyMs);
403     for (MutableQuantiles q : ramDiskBlocksLazyPersistWindowMsQuantiles) {
404       q.add(latencyMs);
405     }
406   }
407 }
408