1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.hadoop.hbase.regionserver; 19 20 import java.util.Collection; 21 import java.util.List; 22 import java.util.concurrent.ScheduledExecutorService; 23 import java.util.concurrent.TimeUnit; 24 25 import org.apache.commons.lang.StringUtils; 26 import org.apache.commons.logging.Log; 27 import org.apache.commons.logging.LogFactory; 28 import org.apache.hadoop.hbase.classification.InterfaceAudience; 29 import org.apache.hadoop.hbase.CompatibilitySingletonFactory; 30 import org.apache.hadoop.hbase.HConstants; 31 import org.apache.hadoop.hbase.HDFSBlocksDistribution; 32 import org.apache.hadoop.hbase.HRegionInfo; 33 import org.apache.hadoop.hbase.ServerName; 34 import org.apache.hadoop.hbase.io.hfile.BlockCache; 35 import org.apache.hadoop.hbase.io.hfile.CacheConfig; 36 import org.apache.hadoop.hbase.io.hfile.CacheStats; 37 import org.apache.hadoop.hbase.wal.BoundedRegionGroupingProvider; 38 import org.apache.hadoop.hbase.wal.DefaultWALProvider; 39 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 40 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; 41 import org.apache.hadoop.metrics2.MetricsExecutor; 42 43 /** 44 * Impl for exposing HRegionServer Information through Hadoop's metrics 2 system. 45 */ 46 @InterfaceAudience.Private 47 class MetricsRegionServerWrapperImpl 48 implements MetricsRegionServerWrapper { 49 50 private static final Log LOG = LogFactory.getLog(MetricsRegionServerWrapperImpl.class); 51 52 private final HRegionServer regionServer; 53 54 private BlockCache blockCache; 55 56 private volatile long numStores = 0; 57 private volatile long numWALFiles = 0; 58 private volatile long walFileSize = 0; 59 private volatile long numStoreFiles = 0; 60 private volatile long memstoreSize = 0; 61 private volatile long storeFileSize = 0; 62 private volatile double requestsPerSecond = 0.0; 63 private volatile long readRequestsCount = 0; 64 private volatile long writeRequestsCount = 0; 65 private volatile long checkAndMutateChecksFailed = 0; 66 private volatile long checkAndMutateChecksPassed = 0; 67 private volatile long storefileIndexSize = 0; 68 private volatile long totalStaticIndexSize = 0; 69 private volatile long totalStaticBloomSize = 0; 70 private volatile long numMutationsWithoutWAL = 0; 71 private volatile long dataInMemoryWithoutWAL = 0; 72 private volatile double percentFileLocal = 0; 73 private volatile double percentFileLocalSecondaryRegions = 0; 74 private volatile long flushedCellsCount = 0; 75 private volatile long compactedCellsCount = 0; 76 private volatile long majorCompactedCellsCount = 0; 77 private volatile long flushedCellsSize = 0; 78 private volatile long compactedCellsSize = 0; 79 private volatile long majorCompactedCellsSize = 0; 80 private volatile long blockedRequestsCount = 0L; 81 82 private CacheStats cacheStats; 83 private ScheduledExecutorService executor; 84 private Runnable runnable; 85 private long period; 86 MetricsRegionServerWrapperImpl(final HRegionServer regionServer)87 public MetricsRegionServerWrapperImpl(final HRegionServer regionServer) { 88 this.regionServer = regionServer; 89 initBlockCache(); 90 91 this.period = 92 regionServer.conf.getLong(HConstants.REGIONSERVER_METRICS_PERIOD, 93 HConstants.DEFAULT_REGIONSERVER_METRICS_PERIOD); 94 95 this.executor = CompatibilitySingletonFactory.getInstance(MetricsExecutor.class).getExecutor(); 96 this.runnable = new RegionServerMetricsWrapperRunnable(); 97 this.executor.scheduleWithFixedDelay(this.runnable, this.period, this.period, 98 TimeUnit.MILLISECONDS); 99 100 if (LOG.isInfoEnabled()) { 101 LOG.info("Computing regionserver metrics every " + this.period + " milliseconds"); 102 } 103 } 104 105 /** 106 * It's possible that due to threading the block cache could not be initialized 107 * yet (testing multiple region servers in one jvm). So we need to try and initialize 108 * the blockCache and cacheStats reference multiple times until we succeed. 109 */ initBlockCache()110 private synchronized void initBlockCache() { 111 CacheConfig cacheConfig = this.regionServer.cacheConfig; 112 if (cacheConfig != null && this.blockCache == null) { 113 this.blockCache = cacheConfig.getBlockCache(); 114 } 115 116 if (this.blockCache != null && this.cacheStats == null) { 117 this.cacheStats = blockCache.getStats(); 118 } 119 } 120 121 @Override getClusterId()122 public String getClusterId() { 123 return regionServer.getClusterId(); 124 } 125 126 @Override getStartCode()127 public long getStartCode() { 128 return regionServer.getStartcode(); 129 } 130 131 @Override getZookeeperQuorum()132 public String getZookeeperQuorum() { 133 ZooKeeperWatcher zk = regionServer.getZooKeeper(); 134 if (zk == null) { 135 return ""; 136 } 137 return zk.getQuorum(); 138 } 139 140 @Override getCoprocessors()141 public String getCoprocessors() { 142 String[] coprocessors = regionServer.getRegionServerCoprocessors(); 143 if (coprocessors == null || coprocessors.length == 0) { 144 return ""; 145 } 146 return StringUtils.join(coprocessors, ", "); 147 } 148 149 @Override getServerName()150 public String getServerName() { 151 ServerName serverName = regionServer.getServerName(); 152 if (serverName == null) { 153 return ""; 154 } 155 return serverName.getServerName(); 156 } 157 158 @Override getNumOnlineRegions()159 public long getNumOnlineRegions() { 160 Collection<Region> onlineRegionsLocalContext = regionServer.getOnlineRegionsLocalContext(); 161 if (onlineRegionsLocalContext == null) { 162 return 0; 163 } 164 return onlineRegionsLocalContext.size(); 165 } 166 167 @Override getTotalRequestCount()168 public long getTotalRequestCount() { 169 return regionServer.rpcServices.requestCount.get(); 170 } 171 172 @Override getSplitQueueSize()173 public int getSplitQueueSize() { 174 if (this.regionServer.compactSplitThread == null) { 175 return 0; 176 } 177 return this.regionServer.compactSplitThread.getSplitQueueSize(); 178 } 179 180 @Override getCompactionQueueSize()181 public int getCompactionQueueSize() { 182 //The thread could be zero. if so assume there is no queue. 183 if (this.regionServer.compactSplitThread == null) { 184 return 0; 185 } 186 return this.regionServer.compactSplitThread.getCompactionQueueSize(); 187 } 188 189 @Override getSmallCompactionQueueSize()190 public int getSmallCompactionQueueSize() { 191 //The thread could be zero. if so assume there is no queue. 192 if (this.regionServer.compactSplitThread == null) { 193 return 0; 194 } 195 return this.regionServer.compactSplitThread.getSmallCompactionQueueSize(); 196 } 197 198 @Override getLargeCompactionQueueSize()199 public int getLargeCompactionQueueSize() { 200 //The thread could be zero. if so assume there is no queue. 201 if (this.regionServer.compactSplitThread == null) { 202 return 0; 203 } 204 return this.regionServer.compactSplitThread.getLargeCompactionQueueSize(); 205 } 206 207 @Override getFlushQueueSize()208 public int getFlushQueueSize() { 209 //If there is no flusher there should be no queue. 210 if (this.regionServer.cacheFlusher == null) { 211 return 0; 212 } 213 return this.regionServer.cacheFlusher.getFlushQueueSize(); 214 } 215 216 @Override getBlockCacheCount()217 public long getBlockCacheCount() { 218 if (this.blockCache == null) { 219 return 0; 220 } 221 return this.blockCache.getBlockCount(); 222 } 223 224 @Override getBlockCacheSize()225 public long getBlockCacheSize() { 226 if (this.blockCache == null) { 227 return 0; 228 } 229 return this.blockCache.getCurrentSize(); 230 } 231 232 @Override getBlockCacheFreeSize()233 public long getBlockCacheFreeSize() { 234 if (this.blockCache == null) { 235 return 0; 236 } 237 return this.blockCache.getFreeSize(); 238 } 239 240 @Override getBlockCacheHitCount()241 public long getBlockCacheHitCount() { 242 if (this.cacheStats == null) { 243 return 0; 244 } 245 return this.cacheStats.getHitCount(); 246 } 247 248 @Override getBlockCachePrimaryHitCount()249 public long getBlockCachePrimaryHitCount() { 250 if (this.cacheStats == null) { 251 return 0; 252 } 253 return this.cacheStats.getPrimaryHitCount(); 254 } 255 256 @Override getBlockCacheMissCount()257 public long getBlockCacheMissCount() { 258 if (this.cacheStats == null) { 259 return 0; 260 } 261 return this.cacheStats.getMissCount(); 262 } 263 264 @Override getBlockCachePrimaryMissCount()265 public long getBlockCachePrimaryMissCount() { 266 if (this.cacheStats == null) { 267 return 0; 268 } 269 return this.cacheStats.getPrimaryMissCount(); 270 } 271 272 @Override getBlockCacheEvictedCount()273 public long getBlockCacheEvictedCount() { 274 if (this.cacheStats == null) { 275 return 0; 276 } 277 return this.cacheStats.getEvictedCount(); 278 } 279 280 @Override getBlockCachePrimaryEvictedCount()281 public long getBlockCachePrimaryEvictedCount() { 282 if (this.cacheStats == null) { 283 return 0; 284 } 285 return this.cacheStats.getPrimaryEvictedCount(); 286 } 287 288 @Override getBlockCacheHitPercent()289 public double getBlockCacheHitPercent() { 290 if (this.cacheStats == null) { 291 return 0; 292 } 293 double ratio = this.cacheStats.getHitRatio(); 294 if (Double.isNaN(ratio)) { 295 ratio = 0; 296 } 297 return (ratio * 100); 298 } 299 300 @Override getBlockCacheHitCachingPercent()301 public double getBlockCacheHitCachingPercent() { 302 if (this.cacheStats == null) { 303 return 0; 304 } 305 306 double ratio = this.cacheStats.getHitCachingRatio(); 307 308 if (Double.isNaN(ratio)) { 309 ratio = 0; 310 } 311 return (ratio * 100); 312 } 313 314 @Override getBlockCacheFailedInsertions()315 public long getBlockCacheFailedInsertions() { 316 return this.cacheStats.getFailedInserts(); 317 } 318 forceRecompute()319 @Override public void forceRecompute() { 320 this.runnable.run(); 321 } 322 323 @Override getNumStores()324 public long getNumStores() { 325 return numStores; 326 } 327 328 @Override getNumWALFiles()329 public long getNumWALFiles() { 330 return numWALFiles; 331 } 332 333 @Override getWALFileSize()334 public long getWALFileSize() { 335 return walFileSize; 336 } 337 338 @Override getNumStoreFiles()339 public long getNumStoreFiles() { 340 return numStoreFiles; 341 } 342 343 @Override getMemstoreSize()344 public long getMemstoreSize() { 345 return memstoreSize; 346 } 347 348 @Override getStoreFileSize()349 public long getStoreFileSize() { 350 return storeFileSize; 351 } 352 getRequestsPerSecond()353 @Override public double getRequestsPerSecond() { 354 return requestsPerSecond; 355 } 356 357 @Override getReadRequestsCount()358 public long getReadRequestsCount() { 359 return readRequestsCount; 360 } 361 362 @Override getWriteRequestsCount()363 public long getWriteRequestsCount() { 364 return writeRequestsCount; 365 } 366 367 @Override getCheckAndMutateChecksFailed()368 public long getCheckAndMutateChecksFailed() { 369 return checkAndMutateChecksFailed; 370 } 371 372 @Override getCheckAndMutateChecksPassed()373 public long getCheckAndMutateChecksPassed() { 374 return checkAndMutateChecksPassed; 375 } 376 377 @Override getStoreFileIndexSize()378 public long getStoreFileIndexSize() { 379 return storefileIndexSize; 380 } 381 382 @Override getTotalStaticIndexSize()383 public long getTotalStaticIndexSize() { 384 return totalStaticIndexSize; 385 } 386 387 @Override getTotalStaticBloomSize()388 public long getTotalStaticBloomSize() { 389 return totalStaticBloomSize; 390 } 391 392 @Override getNumMutationsWithoutWAL()393 public long getNumMutationsWithoutWAL() { 394 return numMutationsWithoutWAL; 395 } 396 397 @Override getDataInMemoryWithoutWAL()398 public long getDataInMemoryWithoutWAL() { 399 return dataInMemoryWithoutWAL; 400 } 401 402 @Override getPercentFileLocal()403 public double getPercentFileLocal() { 404 return percentFileLocal; 405 } 406 407 @Override getPercentFileLocalSecondaryRegions()408 public double getPercentFileLocalSecondaryRegions() { 409 return percentFileLocalSecondaryRegions; 410 } 411 412 @Override getUpdatesBlockedTime()413 public long getUpdatesBlockedTime() { 414 if (this.regionServer.cacheFlusher == null) { 415 return 0; 416 } 417 return this.regionServer.cacheFlusher.getUpdatesBlockedMsHighWater().get(); 418 } 419 420 @Override getFlushedCellsCount()421 public long getFlushedCellsCount() { 422 return flushedCellsCount; 423 } 424 425 @Override getCompactedCellsCount()426 public long getCompactedCellsCount() { 427 return compactedCellsCount; 428 } 429 430 @Override getMajorCompactedCellsCount()431 public long getMajorCompactedCellsCount() { 432 return majorCompactedCellsCount; 433 } 434 435 @Override getFlushedCellsSize()436 public long getFlushedCellsSize() { 437 return flushedCellsSize; 438 } 439 440 @Override getCompactedCellsSize()441 public long getCompactedCellsSize() { 442 return compactedCellsSize; 443 } 444 445 @Override getMajorCompactedCellsSize()446 public long getMajorCompactedCellsSize() { 447 return majorCompactedCellsSize; 448 } 449 450 /** 451 * This is the runnable that will be executed on the executor every PERIOD number of seconds 452 * It will take metrics/numbers from all of the regions and use them to compute point in 453 * time metrics. 454 */ 455 public class RegionServerMetricsWrapperRunnable implements Runnable { 456 457 private long lastRan = 0; 458 private long lastRequestCount = 0; 459 460 @Override run()461 synchronized public void run() { 462 try { 463 initBlockCache(); 464 cacheStats = blockCache.getStats(); 465 466 HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution(); 467 HDFSBlocksDistribution hdfsBlocksDistributionSecondaryRegions = 468 new HDFSBlocksDistribution(); 469 470 long tempNumStores = 0; 471 long tempNumStoreFiles = 0; 472 long tempMemstoreSize = 0; 473 long tempStoreFileSize = 0; 474 long tempReadRequestsCount = 0; 475 long tempWriteRequestsCount = 0; 476 long tempCheckAndMutateChecksFailed = 0; 477 long tempCheckAndMutateChecksPassed = 0; 478 long tempStorefileIndexSize = 0; 479 long tempTotalStaticIndexSize = 0; 480 long tempTotalStaticBloomSize = 0; 481 long tempNumMutationsWithoutWAL = 0; 482 long tempDataInMemoryWithoutWAL = 0; 483 double tempPercentFileLocal = 0; 484 double tempPercentFileLocalSecondaryRegions = 0; 485 long tempFlushedCellsCount = 0; 486 long tempCompactedCellsCount = 0; 487 long tempMajorCompactedCellsCount = 0; 488 long tempFlushedCellsSize = 0; 489 long tempCompactedCellsSize = 0; 490 long tempMajorCompactedCellsSize = 0; 491 long tempBlockedRequestsCount = 0L; 492 493 for (Region r : regionServer.getOnlineRegionsLocalContext()) { 494 tempNumMutationsWithoutWAL += r.getNumMutationsWithoutWAL(); 495 tempDataInMemoryWithoutWAL += r.getDataInMemoryWithoutWAL(); 496 tempReadRequestsCount += r.getReadRequestsCount(); 497 tempWriteRequestsCount += r.getWriteRequestsCount(); 498 tempCheckAndMutateChecksFailed += r.getCheckAndMutateChecksFailed(); 499 tempCheckAndMutateChecksPassed += r.getCheckAndMutateChecksPassed(); 500 tempBlockedRequestsCount += r.getBlockedRequestsCount(); 501 List<Store> storeList = r.getStores(); 502 tempNumStores += storeList.size(); 503 for (Store store : storeList) { 504 tempNumStoreFiles += store.getStorefilesCount(); 505 tempMemstoreSize += store.getMemStoreSize(); 506 tempStoreFileSize += store.getStorefilesSize(); 507 tempStorefileIndexSize += store.getStorefilesIndexSize(); 508 tempTotalStaticBloomSize += store.getTotalStaticBloomSize(); 509 tempTotalStaticIndexSize += store.getTotalStaticIndexSize(); 510 tempFlushedCellsCount += store.getFlushedCellsCount(); 511 tempCompactedCellsCount += store.getCompactedCellsCount(); 512 tempMajorCompactedCellsCount += store.getMajorCompactedCellsCount(); 513 tempFlushedCellsSize += store.getFlushedCellsSize(); 514 tempCompactedCellsSize += store.getCompactedCellsSize(); 515 tempMajorCompactedCellsSize += store.getMajorCompactedCellsSize(); 516 } 517 518 HDFSBlocksDistribution distro = r.getHDFSBlocksDistribution(); 519 hdfsBlocksDistribution.add(distro); 520 if (r.getRegionInfo().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) { 521 hdfsBlocksDistributionSecondaryRegions.add(distro); 522 } 523 } 524 525 float localityIndex = 526 hdfsBlocksDistribution 527 .getBlockLocalityIndex(regionServer.getServerName().getHostname()); 528 tempPercentFileLocal = Double.isNaN(tempBlockedRequestsCount) ? 0 : (localityIndex * 100); 529 530 float localityIndexSecondaryRegions = 531 hdfsBlocksDistributionSecondaryRegions.getBlockLocalityIndex(regionServer 532 .getServerName().getHostname()); 533 tempPercentFileLocalSecondaryRegions = Double 534 .isNaN(localityIndexSecondaryRegions) ? 0 : (localityIndexSecondaryRegions * 100); 535 536 // Compute the number of requests per second 537 long currentTime = EnvironmentEdgeManager.currentTime(); 538 539 // assume that it took PERIOD seconds to start the executor. 540 // this is a guess but it's a pretty good one. 541 if (lastRan == 0) { 542 lastRan = currentTime - period; 543 } 544 545 // If we've time traveled keep the last requests per second. 546 if ((currentTime - lastRan) > 0) { 547 long currentRequestCount = getTotalRequestCount(); 548 requestsPerSecond = 549 (currentRequestCount - lastRequestCount) / ((currentTime - lastRan) / 1000.0); 550 lastRequestCount = currentRequestCount; 551 } 552 lastRan = currentTime; 553 554 numWALFiles = 555 DefaultWALProvider.getNumLogFiles(regionServer.walFactory) 556 + BoundedRegionGroupingProvider.getNumLogFiles(regionServer.walFactory); 557 walFileSize = 558 DefaultWALProvider.getLogFileSize(regionServer.walFactory) 559 + BoundedRegionGroupingProvider.getLogFileSize(regionServer.walFactory); 560 // Copy over computed values so that no thread sees half computed values. 561 numStores = tempNumStores; 562 numStoreFiles = tempNumStoreFiles; 563 memstoreSize = tempMemstoreSize; 564 storeFileSize = tempStoreFileSize; 565 readRequestsCount = tempReadRequestsCount; 566 writeRequestsCount = tempWriteRequestsCount; 567 checkAndMutateChecksFailed = tempCheckAndMutateChecksFailed; 568 checkAndMutateChecksPassed = tempCheckAndMutateChecksPassed; 569 storefileIndexSize = tempStorefileIndexSize; 570 totalStaticIndexSize = tempTotalStaticIndexSize; 571 totalStaticBloomSize = tempTotalStaticBloomSize; 572 numMutationsWithoutWAL = tempNumMutationsWithoutWAL; 573 dataInMemoryWithoutWAL = tempDataInMemoryWithoutWAL; 574 percentFileLocal = tempPercentFileLocal; 575 percentFileLocalSecondaryRegions = tempPercentFileLocalSecondaryRegions; 576 flushedCellsCount = tempFlushedCellsCount; 577 compactedCellsCount = tempCompactedCellsCount; 578 majorCompactedCellsCount = tempMajorCompactedCellsCount; 579 flushedCellsSize = tempFlushedCellsSize; 580 compactedCellsSize = tempCompactedCellsSize; 581 majorCompactedCellsSize = tempMajorCompactedCellsSize; 582 blockedRequestsCount = tempBlockedRequestsCount; 583 } catch (Throwable e) { 584 LOG.warn("Caught exception! Will suppress and retry.", e); 585 } 586 } 587 } 588 589 @Override getBlockedRequestsCount()590 public long getBlockedRequestsCount() { 591 return blockedRequestsCount; 592 } 593 } 594