1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.hadoop.hbase.util; 19 20 import java.io.Closeable; 21 import java.io.FileNotFoundException; 22 import java.io.IOException; 23 import java.io.InterruptedIOException; 24 import java.io.PrintWriter; 25 import java.io.StringWriter; 26 import java.net.InetAddress; 27 import java.net.URI; 28 import java.util.ArrayList; 29 import java.util.Arrays; 30 import java.util.Collection; 31 import java.util.Collections; 32 import java.util.Comparator; 33 import java.util.HashMap; 34 import java.util.HashSet; 35 import java.util.Iterator; 36 import java.util.List; 37 import java.util.Map; 38 import java.util.Map.Entry; 39 import java.util.Set; 40 import java.util.SortedMap; 41 import java.util.SortedSet; 42 import java.util.TreeMap; 43 import java.util.TreeSet; 44 import java.util.concurrent.Callable; 45 import java.util.concurrent.ConcurrentSkipListMap; 46 import java.util.concurrent.ExecutionException; 47 import java.util.concurrent.ExecutorService; 48 import java.util.concurrent.Executors; 49 import java.util.concurrent.Future; 50 import java.util.concurrent.FutureTask; 51 import java.util.concurrent.ScheduledThreadPoolExecutor; 52 import java.util.concurrent.TimeUnit; 53 import java.util.concurrent.TimeoutException; 54 import java.util.concurrent.atomic.AtomicBoolean; 55 import java.util.concurrent.atomic.AtomicInteger; 56 57 import org.apache.commons.io.IOUtils; 58 import org.apache.commons.lang.StringUtils; 59 import org.apache.commons.logging.Log; 60 import org.apache.commons.logging.LogFactory; 61 import org.apache.hadoop.conf.Configuration; 62 import org.apache.hadoop.conf.Configured; 63 import org.apache.hadoop.fs.FSDataOutputStream; 64 import org.apache.hadoop.fs.FileStatus; 65 import org.apache.hadoop.fs.FileSystem; 66 import org.apache.hadoop.fs.Path; 67 import org.apache.hadoop.fs.permission.FsAction; 68 import org.apache.hadoop.fs.permission.FsPermission; 69 import org.apache.hadoop.hbase.Abortable; 70 import org.apache.hadoop.hbase.Cell; 71 import org.apache.hadoop.hbase.ClusterStatus; 72 import org.apache.hadoop.hbase.CoordinatedStateException; 73 import org.apache.hadoop.hbase.HBaseConfiguration; 74 import org.apache.hadoop.hbase.HBaseInterfaceAudience; 75 import org.apache.hadoop.hbase.HColumnDescriptor; 76 import org.apache.hadoop.hbase.HConstants; 77 import org.apache.hadoop.hbase.HRegionInfo; 78 import org.apache.hadoop.hbase.HRegionLocation; 79 import org.apache.hadoop.hbase.HTableDescriptor; 80 import org.apache.hadoop.hbase.KeyValue; 81 import org.apache.hadoop.hbase.MasterNotRunningException; 82 import org.apache.hadoop.hbase.MetaTableAccessor; 83 import org.apache.hadoop.hbase.RegionLocations; 84 import org.apache.hadoop.hbase.ServerName; 85 import org.apache.hadoop.hbase.TableName; 86 import org.apache.hadoop.hbase.ZooKeeperConnectionException; 87 import org.apache.hadoop.hbase.classification.InterfaceAudience; 88 import org.apache.hadoop.hbase.classification.InterfaceStability; 89 import org.apache.hadoop.hbase.client.Admin; 90 import org.apache.hadoop.hbase.client.ClusterConnection; 91 import org.apache.hadoop.hbase.client.ConnectionFactory; 92 import org.apache.hadoop.hbase.client.Delete; 93 import org.apache.hadoop.hbase.client.Get; 94 import org.apache.hadoop.hbase.client.HBaseAdmin; 95 import org.apache.hadoop.hbase.client.HConnectable; 96 import org.apache.hadoop.hbase.client.HConnection; 97 import org.apache.hadoop.hbase.client.HConnectionManager; 98 import org.apache.hadoop.hbase.client.MetaScanner; 99 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; 100 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase; 101 import org.apache.hadoop.hbase.client.Put; 102 import org.apache.hadoop.hbase.client.RegionReplicaUtil; 103 import org.apache.hadoop.hbase.client.Result; 104 import org.apache.hadoop.hbase.client.RowMutations; 105 import org.apache.hadoop.hbase.client.Table; 106 import org.apache.hadoop.hbase.io.hfile.CacheConfig; 107 import org.apache.hadoop.hbase.io.hfile.HFile; 108 import org.apache.hadoop.hbase.master.MasterFileSystem; 109 import org.apache.hadoop.hbase.master.RegionState; 110 import org.apache.hadoop.hbase.protobuf.ProtobufUtil; 111 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface; 112 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; 113 import org.apache.hadoop.hbase.regionserver.HRegion; 114 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 115 import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 116 import org.apache.hadoop.hbase.security.AccessDeniedException; 117 import org.apache.hadoop.hbase.security.UserProvider; 118 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator; 119 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE; 120 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker; 121 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler; 122 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl; 123 import org.apache.hadoop.hbase.util.hbck.TableLockChecker; 124 import org.apache.hadoop.hbase.wal.WALSplitter; 125 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 126 import org.apache.hadoop.hbase.zookeeper.ZKTableStateClientSideReader; 127 import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager; 128 import org.apache.hadoop.hbase.zookeeper.ZKUtil; 129 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; 130 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; 131 import org.apache.hadoop.ipc.RemoteException; 132 import org.apache.hadoop.security.UserGroupInformation; 133 import org.apache.hadoop.util.ReflectionUtils; 134 import org.apache.hadoop.util.Tool; 135 import org.apache.hadoop.util.ToolRunner; 136 import org.apache.zookeeper.KeeperException; 137 138 import com.google.common.base.Joiner; 139 import com.google.common.base.Preconditions; 140 import com.google.common.collect.ImmutableList; 141 import com.google.common.collect.Lists; 142 import com.google.common.collect.Multimap; 143 import com.google.common.collect.Ordering; 144 import com.google.common.collect.TreeMultimap; 145 import com.google.protobuf.ServiceException; 146 147 /** 148 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and 149 * table integrity problems in a corrupted HBase. 150 * <p> 151 * Region consistency checks verify that hbase:meta, region deployment on region 152 * servers and the state of data in HDFS (.regioninfo files) all are in 153 * accordance. 154 * <p> 155 * Table integrity checks verify that all possible row keys resolve to exactly 156 * one region of a table. This means there are no individual degenerate 157 * or backwards regions; no holes between regions; and that there are no 158 * overlapping regions. 159 * <p> 160 * The general repair strategy works in two phases: 161 * <ol> 162 * <li> Repair Table Integrity on HDFS. (merge or fabricate regions) 163 * <li> Repair Region Consistency with hbase:meta and assignments 164 * </ol> 165 * <p> 166 * For table integrity repairs, the tables' region directories are scanned 167 * for .regioninfo files. Each table's integrity is then verified. If there 168 * are any orphan regions (regions with no .regioninfo files) or holes, new 169 * regions are fabricated. Backwards regions are sidelined as well as empty 170 * degenerate (endkey==startkey) regions. If there are any overlapping regions, 171 * a new region is created and all data is merged into the new region. 172 * <p> 173 * Table integrity repairs deal solely with HDFS and could potentially be done 174 * offline -- the hbase region servers or master do not need to be running. 175 * This phase can eventually be used to completely reconstruct the hbase:meta table in 176 * an offline fashion. 177 * <p> 178 * Region consistency requires three conditions -- 1) valid .regioninfo file 179 * present in an HDFS region dir, 2) valid row with .regioninfo data in META, 180 * and 3) a region is deployed only at the regionserver that was assigned to 181 * with proper state in the master. 182 * <p> 183 * Region consistency repairs require hbase to be online so that hbck can 184 * contact the HBase master and region servers. The hbck#connect() method must 185 * first be called successfully. Much of the region consistency information 186 * is transient and less risky to repair. 187 * <p> 188 * If hbck is run from the command line, there are a handful of arguments that 189 * can be used to limit the kinds of repairs hbck will do. See the code in 190 * {@link #printUsageAndExit()} for more details. 191 */ 192 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) 193 @InterfaceStability.Evolving 194 public class HBaseFsck extends Configured implements Closeable { 195 public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute 196 public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000; 197 private static final int MAX_NUM_THREADS = 50; // #threads to contact regions 198 private static boolean rsSupportsOffline = true; 199 private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2; 200 private static final int DEFAULT_MAX_MERGE = 5; 201 private static final String TO_BE_LOADED = "to_be_loaded"; 202 private static final String HBCK_LOCK_FILE = "hbase-hbck.lock"; 203 private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5; 204 private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds 205 private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds 206 // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD. 207 // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for 208 // AlreadyBeingCreatedException which is implies timeout on this operations up to 209 // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds). 210 private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds 211 212 /********************** 213 * Internal resources 214 **********************/ 215 private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName()); 216 private ClusterStatus status; 217 private ClusterConnection connection; 218 private Admin admin; 219 private Table meta; 220 // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions 221 protected ExecutorService executor; 222 private long startMillis = EnvironmentEdgeManager.currentTime(); 223 private HFileCorruptionChecker hfcc; 224 private int retcode = 0; 225 private Path HBCK_LOCK_PATH; 226 private FSDataOutputStream hbckOutFd; 227 // This lock is to prevent cleanup of balancer resources twice between 228 // ShutdownHook and the main code. We cleanup only if the connect() is 229 // successful 230 private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false); 231 232 /*********** 233 * Options 234 ***********/ 235 private static boolean details = false; // do we display the full report 236 private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older 237 private static boolean forceExclusive = false; // only this hbck can modify HBase 238 private static boolean disableBalancer = false; // disable load balancer to keep regions stable 239 private boolean fixAssignments = false; // fix assignment errors? 240 private boolean fixMeta = false; // fix meta errors? 241 private boolean checkHdfs = true; // load and check fs consistency? 242 private boolean fixHdfsHoles = false; // fix fs holes? 243 private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky) 244 private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo) 245 private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo) 246 private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs 247 private boolean fixSplitParents = false; // fix lingering split parents 248 private boolean fixReferenceFiles = false; // fix lingering reference store file 249 private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows 250 private boolean fixTableLocks = false; // fix table locks which are expired 251 private boolean fixTableZNodes = false; // fix table Znodes which are orphaned 252 private boolean fixAny = false; // Set to true if any of the fix is required. 253 254 // limit checking/fixes to listed tables, if empty attempt to check/fix all 255 // hbase:meta are always checked 256 private Set<TableName> tablesIncluded = new HashSet<TableName>(); 257 private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge 258 private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; // maximum number of overlapping regions to sideline 259 private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions 260 private Path sidelineDir = null; 261 262 private boolean rerun = false; // if we tried to fix something, rerun hbck 263 private static boolean SUMMARY = false; // if we want to print less output 264 private boolean checkMetaOnly = false; 265 private boolean checkRegionBoundaries = false; 266 private boolean ignorePreCheckPermission = false; // if pre-check permission 267 268 /********* 269 * State 270 *********/ 271 final private ErrorReporter errors; 272 int fixes = 0; 273 274 /** 275 * This map contains the state of all hbck items. It maps from encoded region 276 * name to HbckInfo structure. The information contained in HbckInfo is used 277 * to detect and correct consistency (hdfs/meta/deployment) problems. 278 */ 279 private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>(); 280 private TreeSet<TableName> disabledTables = 281 new TreeSet<TableName>(); 282 // Empty regioninfo qualifiers in hbase:meta 283 private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>(); 284 285 /** 286 * This map from Tablename -> TableInfo contains the structures necessary to 287 * detect table consistency problems (holes, dupes, overlaps). It is sorted 288 * to prevent dupes. 289 * 290 * If tablesIncluded is empty, this map contains all tables. 291 * Otherwise, it contains only meta tables and tables in tablesIncluded, 292 * unless checkMetaOnly is specified, in which case, it contains only 293 * the meta table 294 */ 295 private SortedMap<TableName, TableInfo> tablesInfo = 296 new ConcurrentSkipListMap<TableName, TableInfo>(); 297 298 /** 299 * When initially looking at HDFS, we attempt to find any orphaned data. 300 */ 301 private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>()); 302 303 private Map<TableName, Set<String>> orphanTableDirs = 304 new HashMap<TableName, Set<String>>(); 305 306 private Map<TableName, Set<String>> skippedRegions = new HashMap<TableName, Set<String>>(); 307 308 /** 309 * List of orphaned table ZNodes 310 */ 311 private Set<TableName> orphanedTableZNodes = new HashSet<TableName>(); 312 private final RetryCounterFactory lockFileRetryCounterFactory; 313 314 315 /** 316 * Constructor 317 * 318 * @param conf Configuration object 319 * @throws MasterNotRunningException if the master is not running 320 * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper 321 */ HBaseFsck(Configuration conf)322 public HBaseFsck(Configuration conf) throws MasterNotRunningException, 323 ZooKeeperConnectionException, IOException, ClassNotFoundException { 324 this(conf, createThreadPool(conf)); 325 } 326 createThreadPool(Configuration conf)327 private static ExecutorService createThreadPool(Configuration conf) { 328 int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS); 329 return new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck")); 330 } 331 332 /** 333 * Constructor 334 * 335 * @param conf 336 * Configuration object 337 * @throws MasterNotRunningException 338 * if the master is not running 339 * @throws ZooKeeperConnectionException 340 * if unable to connect to ZooKeeper 341 */ HBaseFsck(Configuration conf, ExecutorService exec)342 public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException, 343 ZooKeeperConnectionException, IOException, ClassNotFoundException { 344 super(conf); 345 errors = getErrorReporter(getConf()); 346 this.executor = exec; 347 lockFileRetryCounterFactory = new RetryCounterFactory( 348 getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS), 349 getConf().getInt( 350 "hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL), 351 getConf().getInt( 352 "hbase.hbck.lockfile.attempt.maxsleeptime", DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME)); 353 } 354 355 private class FileLockCallable implements Callable<FSDataOutputStream> { 356 RetryCounter retryCounter; 357 FileLockCallable(RetryCounter retryCounter)358 public FileLockCallable(RetryCounter retryCounter) { 359 this.retryCounter = retryCounter; 360 } 361 @Override call()362 public FSDataOutputStream call() throws IOException { 363 try { 364 FileSystem fs = FSUtils.getCurrentFileSystem(getConf()); 365 FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(), 366 HConstants.DATA_FILE_UMASK_KEY); 367 Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY); 368 fs.mkdirs(tmpDir); 369 HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE); 370 final FSDataOutputStream out = createFileWithRetries(fs, HBCK_LOCK_PATH, defaultPerms); 371 out.writeBytes(InetAddress.getLocalHost().toString()); 372 out.flush(); 373 return out; 374 } catch(RemoteException e) { 375 if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){ 376 return null; 377 } else { 378 throw e; 379 } 380 } 381 } 382 createFileWithRetries(final FileSystem fs, final Path hbckLockFilePath, final FsPermission defaultPerms)383 private FSDataOutputStream createFileWithRetries(final FileSystem fs, 384 final Path hbckLockFilePath, final FsPermission defaultPerms) 385 throws IOException { 386 387 IOException exception = null; 388 do { 389 try { 390 return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false); 391 } catch (IOException ioe) { 392 LOG.info("Failed to create lock file " + hbckLockFilePath.getName() 393 + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of " 394 + retryCounter.getMaxAttempts()); 395 LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(), 396 ioe); 397 try { 398 exception = ioe; 399 retryCounter.sleepUntilNextRetry(); 400 } catch (InterruptedException ie) { 401 throw (InterruptedIOException) new InterruptedIOException( 402 "Can't create lock file " + hbckLockFilePath.getName()) 403 .initCause(ie); 404 } 405 } 406 } while (retryCounter.shouldRetry()); 407 408 throw exception; 409 } 410 } 411 412 /** 413 * This method maintains a lock using a file. If the creation fails we return null 414 * 415 * @return FSDataOutputStream object corresponding to the newly opened lock file 416 * @throws IOException 417 */ checkAndMarkRunningHbck()418 private FSDataOutputStream checkAndMarkRunningHbck() throws IOException { 419 RetryCounter retryCounter = lockFileRetryCounterFactory.create(); 420 FileLockCallable callable = new FileLockCallable(retryCounter); 421 ExecutorService executor = Executors.newFixedThreadPool(1); 422 FutureTask<FSDataOutputStream> futureTask = new FutureTask<FSDataOutputStream>(callable); 423 executor.execute(futureTask); 424 final int timeoutInSeconds = getConf().getInt( 425 "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT); 426 FSDataOutputStream stream = null; 427 try { 428 stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS); 429 } catch (ExecutionException ee) { 430 LOG.warn("Encountered exception when opening lock file", ee); 431 } catch (InterruptedException ie) { 432 LOG.warn("Interrupted when opening lock file", ie); 433 Thread.currentThread().interrupt(); 434 } catch (TimeoutException exception) { 435 // took too long to obtain lock 436 LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock"); 437 futureTask.cancel(true); 438 } finally { 439 executor.shutdownNow(); 440 } 441 return stream; 442 } 443 unlockHbck()444 private void unlockHbck() { 445 if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) { 446 RetryCounter retryCounter = lockFileRetryCounterFactory.create(); 447 do { 448 try { 449 IOUtils.closeQuietly(hbckOutFd); 450 FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), 451 HBCK_LOCK_PATH, true); 452 LOG.info("Finishing hbck"); 453 return; 454 } catch (IOException ioe) { 455 LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try=" 456 + (retryCounter.getAttemptTimes() + 1) + " of " 457 + retryCounter.getMaxAttempts()); 458 LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe); 459 try { 460 retryCounter.sleepUntilNextRetry(); 461 } catch (InterruptedException ie) { 462 Thread.currentThread().interrupt(); 463 LOG.warn("Interrupted while deleting lock file" + 464 HBCK_LOCK_PATH); 465 return; 466 } 467 } 468 } while (retryCounter.shouldRetry()); 469 } 470 } 471 472 /** 473 * To repair region consistency, one must call connect() in order to repair 474 * online state. 475 */ connect()476 public void connect() throws IOException { 477 478 if (isExclusive()) { 479 // Grab the lock 480 hbckOutFd = checkAndMarkRunningHbck(); 481 if (hbckOutFd == null) { 482 setRetCode(-1); 483 LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " + 484 "[If you are sure no other instance is running, delete the lock file " + 485 HBCK_LOCK_PATH + " and rerun the tool]"); 486 throw new IOException("Duplicate hbck - Abort"); 487 } 488 489 // Make sure to cleanup the lock 490 hbckLockCleanup.set(true); 491 } 492 493 494 // Add a shutdown hook to this thread, in case user tries to 495 // kill the hbck with a ctrl-c, we want to cleanup the lock so that 496 // it is available for further calls 497 Runtime.getRuntime().addShutdownHook(new Thread() { 498 @Override 499 public void run() { 500 IOUtils.closeQuietly(HBaseFsck.this); 501 unlockHbck(); 502 } 503 }); 504 505 LOG.info("Launching hbck"); 506 507 connection = (ClusterConnection)ConnectionFactory.createConnection(getConf()); 508 admin = connection.getAdmin(); 509 meta = connection.getTable(TableName.META_TABLE_NAME); 510 status = admin.getClusterStatus(); 511 } 512 513 /** 514 * Get deployed regions according to the region servers. 515 */ loadDeployedRegions()516 private void loadDeployedRegions() throws IOException, InterruptedException { 517 // From the master, get a list of all known live region servers 518 Collection<ServerName> regionServers = status.getServers(); 519 errors.print("Number of live region servers: " + regionServers.size()); 520 if (details) { 521 for (ServerName rsinfo: regionServers) { 522 errors.print(" " + rsinfo.getServerName()); 523 } 524 } 525 526 // From the master, get a list of all dead region servers 527 Collection<ServerName> deadRegionServers = status.getDeadServerNames(); 528 errors.print("Number of dead region servers: " + deadRegionServers.size()); 529 if (details) { 530 for (ServerName name: deadRegionServers) { 531 errors.print(" " + name); 532 } 533 } 534 535 // Print the current master name and state 536 errors.print("Master: " + status.getMaster()); 537 538 // Print the list of all backup masters 539 Collection<ServerName> backupMasters = status.getBackupMasters(); 540 errors.print("Number of backup masters: " + backupMasters.size()); 541 if (details) { 542 for (ServerName name: backupMasters) { 543 errors.print(" " + name); 544 } 545 } 546 547 errors.print("Average load: " + status.getAverageLoad()); 548 errors.print("Number of requests: " + status.getRequestsCount()); 549 errors.print("Number of regions: " + status.getRegionsCount()); 550 551 Map<String, RegionState> rits = status.getRegionsInTransition(); 552 errors.print("Number of regions in transition: " + rits.size()); 553 if (details) { 554 for (RegionState state: rits.values()) { 555 errors.print(" " + state.toDescriptiveString()); 556 } 557 } 558 559 // Determine what's deployed 560 processRegionServers(regionServers); 561 } 562 563 /** 564 * Clear the current state of hbck. 565 */ clearState()566 private void clearState() { 567 // Make sure regionInfo is empty before starting 568 fixes = 0; 569 regionInfoMap.clear(); 570 emptyRegionInfoQualifiers.clear(); 571 disabledTables.clear(); 572 errors.clear(); 573 tablesInfo.clear(); 574 orphanHdfsDirs.clear(); 575 skippedRegions.clear(); 576 } 577 578 /** 579 * This repair method analyzes hbase data in hdfs and repairs it to satisfy 580 * the table integrity rules. HBase doesn't need to be online for this 581 * operation to work. 582 */ offlineHdfsIntegrityRepair()583 public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException { 584 // Initial pass to fix orphans. 585 if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles() 586 || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) { 587 LOG.info("Loading regioninfos HDFS"); 588 // if nothing is happening this should always complete in two iterations. 589 int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3); 590 int curIter = 0; 591 do { 592 clearState(); // clears hbck state and reset fixes to 0 and. 593 // repair what's on HDFS 594 restoreHdfsIntegrity(); 595 curIter++;// limit the number of iterations. 596 } while (fixes > 0 && curIter <= maxIterations); 597 598 // Repairs should be done in the first iteration and verification in the second. 599 // If there are more than 2 passes, something funny has happened. 600 if (curIter > 2) { 601 if (curIter == maxIterations) { 602 LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. " 603 + "Tables integrity may not be fully repaired!"); 604 } else { 605 LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations"); 606 } 607 } 608 } 609 } 610 611 /** 612 * This repair method requires the cluster to be online since it contacts 613 * region servers and the masters. It makes each region's state in HDFS, in 614 * hbase:meta, and deployments consistent. 615 * 616 * @return If > 0 , number of errors detected, if < 0 there was an unrecoverable 617 * error. If 0, we have a clean hbase. 618 */ onlineConsistencyRepair()619 public int onlineConsistencyRepair() throws IOException, KeeperException, 620 InterruptedException { 621 clearState(); 622 623 // get regions according to what is online on each RegionServer 624 loadDeployedRegions(); 625 // check whether hbase:meta is deployed and online 626 recordMetaRegion(); 627 // Check if hbase:meta is found only once and in the right place 628 if (!checkMetaRegion()) { 629 String errorMsg = "hbase:meta table is not consistent. "; 630 if (shouldFixAssignments()) { 631 errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state."; 632 } else { 633 errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency."; 634 } 635 errors.reportError(errorMsg + " Exiting..."); 636 return -2; 637 } 638 // Not going with further consistency check for tables when hbase:meta itself is not consistent. 639 LOG.info("Loading regionsinfo from the hbase:meta table"); 640 boolean success = loadMetaEntries(); 641 if (!success) return -1; 642 643 // Empty cells in hbase:meta? 644 reportEmptyMetaCells(); 645 646 // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta 647 if (shouldFixEmptyMetaCells()) { 648 fixEmptyMetaCells(); 649 } 650 651 // get a list of all tables that have not changed recently. 652 if (!checkMetaOnly) { 653 reportTablesInFlux(); 654 } 655 656 // load regiondirs and regioninfos from HDFS 657 if (shouldCheckHdfs()) { 658 LOG.info("Loading region directories from HDFS"); 659 loadHdfsRegionDirs(); 660 LOG.info("Loading region information from HDFS"); 661 loadHdfsRegionInfos(); 662 } 663 664 // Get disabled tables from ZooKeeper 665 loadDisabledTables(); 666 667 // fix the orphan tables 668 fixOrphanTables(); 669 670 LOG.info("Checking and fixing region consistency"); 671 // Check and fix consistency 672 checkAndFixConsistency(); 673 674 // Check integrity (does not fix) 675 checkIntegrity(); 676 return errors.getErrorList().size(); 677 } 678 679 /** 680 * Contacts the master and prints out cluster-wide information 681 * @return 0 on success, non-zero on failure 682 */ onlineHbck()683 public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException { 684 // print hbase server version 685 errors.print("Version: " + status.getHBaseVersion()); 686 offlineHdfsIntegrityRepair(); 687 688 boolean oldBalancer = false; 689 if (shouldDisableBalancer()) { 690 oldBalancer = admin.setBalancerRunning(false, true); 691 } 692 693 try { 694 onlineConsistencyRepair(); 695 } 696 finally { 697 // Only restore the balancer if it was true when we started repairing and 698 // we actually disabled it. Otherwise, we might clobber another run of 699 // hbck that has just restored it. 700 if (shouldDisableBalancer() && oldBalancer) { 701 admin.setBalancerRunning(oldBalancer, false); 702 } 703 } 704 705 if (checkRegionBoundaries) { 706 checkRegionBoundaries(); 707 } 708 709 offlineReferenceFileRepair(); 710 711 checkAndFixTableLocks(); 712 713 // Check (and fix if requested) orphaned table ZNodes 714 checkAndFixOrphanedTableZNodes(); 715 716 // Remove the hbck lock 717 unlockHbck(); 718 719 // Print table summary 720 printTableSummary(tablesInfo); 721 return errors.summarize(); 722 } 723 keyOnly(byte[] b)724 public static byte[] keyOnly (byte[] b) { 725 if (b == null) 726 return b; 727 int rowlength = Bytes.toShort(b, 0); 728 byte[] result = new byte[rowlength]; 729 System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength); 730 return result; 731 } 732 733 @Override close()734 public void close() throws IOException { 735 IOUtils.closeQuietly(admin); 736 IOUtils.closeQuietly(meta); 737 IOUtils.closeQuietly(connection); 738 } 739 740 private static class RegionBoundariesInformation { 741 public byte [] regionName; 742 public byte [] metaFirstKey; 743 public byte [] metaLastKey; 744 public byte [] storesFirstKey; 745 public byte [] storesLastKey; 746 @Override toString()747 public String toString () { 748 return "regionName=" + Bytes.toStringBinary(regionName) + 749 "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) + 750 "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) + 751 "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) + 752 "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey); 753 } 754 } 755 checkRegionBoundaries()756 public void checkRegionBoundaries() { 757 try { 758 ByteArrayComparator comparator = new ByteArrayComparator(); 759 List<HRegionInfo> regions = MetaScanner.listAllRegions(getConf(), connection, false); 760 final RegionBoundariesInformation currentRegionBoundariesInformation = 761 new RegionBoundariesInformation(); 762 Path hbaseRoot = FSUtils.getRootDir(getConf()); 763 for (HRegionInfo regionInfo : regions) { 764 Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable()); 765 currentRegionBoundariesInformation.regionName = regionInfo.getRegionName(); 766 // For each region, get the start and stop key from the META and compare them to the 767 // same information from the Stores. 768 Path path = new Path(tableDir, regionInfo.getEncodedName()); 769 FileSystem fs = path.getFileSystem(getConf()); 770 FileStatus[] files = fs.listStatus(path); 771 // For all the column families in this region... 772 byte[] storeFirstKey = null; 773 byte[] storeLastKey = null; 774 for (FileStatus file : files) { 775 String fileName = file.getPath().toString(); 776 fileName = fileName.substring(fileName.lastIndexOf("/") + 1); 777 if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) { 778 FileStatus[] storeFiles = fs.listStatus(file.getPath()); 779 // For all the stores in this column family. 780 for (FileStatus storeFile : storeFiles) { 781 HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig( 782 getConf()), getConf()); 783 if ((reader.getFirstKey() != null) 784 && ((storeFirstKey == null) || (comparator.compare(storeFirstKey, 785 reader.getFirstKey()) > 0))) { 786 storeFirstKey = reader.getFirstKey(); 787 } 788 if ((reader.getLastKey() != null) 789 && ((storeLastKey == null) || (comparator.compare(storeLastKey, 790 reader.getLastKey())) < 0)) { 791 storeLastKey = reader.getLastKey(); 792 } 793 reader.close(); 794 } 795 } 796 } 797 currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey(); 798 currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey(); 799 currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey); 800 currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey); 801 if (currentRegionBoundariesInformation.metaFirstKey.length == 0) 802 currentRegionBoundariesInformation.metaFirstKey = null; 803 if (currentRegionBoundariesInformation.metaLastKey.length == 0) 804 currentRegionBoundariesInformation.metaLastKey = null; 805 806 // For a region to be correct, we need the META start key to be smaller or equal to the 807 // smallest start key from all the stores, and the start key from the next META entry to 808 // be bigger than the last key from all the current stores. First region start key is null; 809 // Last region end key is null; some regions can be empty and not have any store. 810 811 boolean valid = true; 812 // Checking start key. 813 if ((currentRegionBoundariesInformation.storesFirstKey != null) 814 && (currentRegionBoundariesInformation.metaFirstKey != null)) { 815 valid = valid 816 && comparator.compare(currentRegionBoundariesInformation.storesFirstKey, 817 currentRegionBoundariesInformation.metaFirstKey) >= 0; 818 } 819 // Checking stop key. 820 if ((currentRegionBoundariesInformation.storesLastKey != null) 821 && (currentRegionBoundariesInformation.metaLastKey != null)) { 822 valid = valid 823 && comparator.compare(currentRegionBoundariesInformation.storesLastKey, 824 currentRegionBoundariesInformation.metaLastKey) < 0; 825 } 826 if (!valid) { 827 errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries", 828 tablesInfo.get(regionInfo.getTable())); 829 LOG.warn("Region's boundaries not alligned between stores and META for:"); 830 LOG.warn(currentRegionBoundariesInformation); 831 } 832 } 833 } catch (IOException e) { 834 LOG.error(e); 835 } 836 } 837 838 /** 839 * Iterates through the list of all orphan/invalid regiondirs. 840 */ 841 private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException { 842 for (HbckInfo hi : orphanHdfsDirs) { 843 LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir()); 844 adoptHdfsOrphan(hi); 845 } 846 } 847 848 /** 849 * Orphaned regions are regions without a .regioninfo file in them. We "adopt" 850 * these orphans by creating a new region, and moving the column families, 851 * recovered edits, WALs, into the new region dir. We determine the region 852 * startkey and endkeys by looking at all of the hfiles inside the column 853 * families to identify the min and max keys. The resulting region will 854 * likely violate table integrity but will be dealt with by merging 855 * overlapping regions. 856 */ 857 @SuppressWarnings("deprecation") 858 private void adoptHdfsOrphan(HbckInfo hi) throws IOException { 859 Path p = hi.getHdfsRegionDir(); 860 FileSystem fs = p.getFileSystem(getConf()); 861 FileStatus[] dirs = fs.listStatus(p); 862 if (dirs == null) { 863 LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " + 864 p + ". This dir could probably be deleted."); 865 return ; 866 } 867 868 TableName tableName = hi.getTableName(); 869 TableInfo tableInfo = tablesInfo.get(tableName); 870 Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!"); 871 HTableDescriptor template = tableInfo.getHTD(); 872 873 // find min and max key values 874 Pair<byte[],byte[]> orphanRegionRange = null; 875 for (FileStatus cf : dirs) { 876 String cfName= cf.getPath().getName(); 877 // TODO Figure out what the special dirs are 878 if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue; 879 880 FileStatus[] hfiles = fs.listStatus(cf.getPath()); 881 for (FileStatus hfile : hfiles) { 882 byte[] start, end; 883 HFile.Reader hf = null; 884 try { 885 CacheConfig cacheConf = new CacheConfig(getConf()); 886 hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf()); 887 hf.loadFileInfo(); 888 KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey()); 889 start = startKv.getRow(); 890 KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey()); 891 end = endKv.getRow(); 892 } catch (IOException ioe) { 893 LOG.warn("Problem reading orphan file " + hfile + ", skipping"); 894 continue; 895 } catch (NullPointerException ioe) { 896 LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping"); 897 continue; 898 } finally { 899 if (hf != null) { 900 hf.close(); 901 } 902 } 903 904 // expand the range to include the range of all hfiles 905 if (orphanRegionRange == null) { 906 // first range 907 orphanRegionRange = new Pair<byte[], byte[]>(start, end); 908 } else { 909 // TODO add test 910 911 // expand range only if the hfile is wider. 912 if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) { 913 orphanRegionRange.setFirst(start); 914 } 915 if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) { 916 orphanRegionRange.setSecond(end); 917 } 918 } 919 } 920 } 921 if (orphanRegionRange == null) { 922 LOG.warn("No data in dir " + p + ", sidelining data"); 923 fixes++; 924 sidelineRegionDir(fs, hi); 925 return; 926 } 927 LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " + 928 Bytes.toString(orphanRegionRange.getSecond()) + ")"); 929 930 // create new region on hdfs. move data into place. 931 HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond()); 932 LOG.info("Creating new region : " + hri); 933 HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template); 934 Path target = region.getRegionFileSystem().getRegionDir(); 935 936 // rename all the data to new region 937 mergeRegionDirs(target, hi); 938 fixes++; 939 } 940 941 /** 942 * This method determines if there are table integrity errors in HDFS. If 943 * there are errors and the appropriate "fix" options are enabled, the method 944 * will first correct orphan regions making them into legit regiondirs, and 945 * then reload to merge potentially overlapping regions. 946 * 947 * @return number of table integrity errors found 948 */ restoreHdfsIntegrity()949 private int restoreHdfsIntegrity() throws IOException, InterruptedException { 950 // Determine what's on HDFS 951 LOG.info("Loading HBase regioninfo from HDFS..."); 952 loadHdfsRegionDirs(); // populating regioninfo table. 953 954 int errs = errors.getErrorList().size(); 955 // First time just get suggestions. 956 tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs. 957 checkHdfsIntegrity(false, false); 958 959 if (errors.getErrorList().size() == errs) { 960 LOG.info("No integrity errors. We are done with this phase. Glorious."); 961 return 0; 962 } 963 964 if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) { 965 adoptHdfsOrphans(orphanHdfsDirs); 966 // TODO optimize by incrementally adding instead of reloading. 967 } 968 969 // Make sure there are no holes now. 970 if (shouldFixHdfsHoles()) { 971 clearState(); // this also resets # fixes. 972 loadHdfsRegionDirs(); 973 tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs. 974 tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false); 975 } 976 977 // Now we fix overlaps 978 if (shouldFixHdfsOverlaps()) { 979 // second pass we fix overlaps. 980 clearState(); // this also resets # fixes. 981 loadHdfsRegionDirs(); 982 tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs. 983 tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps()); 984 } 985 986 return errors.getErrorList().size(); 987 } 988 989 /** 990 * Scan all the store file names to find any lingering reference files, 991 * which refer to some none-exiting files. If "fix" option is enabled, 992 * any lingering reference file will be sidelined if found. 993 * <p> 994 * Lingering reference file prevents a region from opening. It has to 995 * be fixed before a cluster can start properly. 996 */ offlineReferenceFileRepair()997 private void offlineReferenceFileRepair() throws IOException { 998 Configuration conf = getConf(); 999 Path hbaseRoot = FSUtils.getRootDir(conf); 1000 FileSystem fs = hbaseRoot.getFileSystem(conf); 1001 LOG.info("Computing mapping of all store files"); 1002 Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot, errors); 1003 errors.print(""); 1004 LOG.info("Validating mapping using HDFS state"); 1005 for (Path path: allFiles.values()) { 1006 boolean isReference = false; 1007 try { 1008 isReference = StoreFileInfo.isReference(path); 1009 } catch (Throwable t) { 1010 // Ignore. Some files may not be store files at all. 1011 // For example, files under .oldlogs folder in hbase:meta 1012 // Warning message is already logged by 1013 // StoreFile#isReference. 1014 } 1015 if (!isReference) continue; 1016 1017 Path referredToFile = StoreFileInfo.getReferredToFile(path); 1018 if (fs.exists(referredToFile)) continue; // good, expected 1019 1020 // Found a lingering reference file 1021 errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE, 1022 "Found lingering reference file " + path); 1023 if (!shouldFixReferenceFiles()) continue; 1024 1025 // Now, trying to fix it since requested 1026 boolean success = false; 1027 String pathStr = path.toString(); 1028 1029 // A reference file path should be like 1030 // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name 1031 // Up 5 directories to get the root folder. 1032 // So the file will be sidelined to a similar folder structure. 1033 int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR); 1034 for (int i = 0; index > 0 && i < 5; i++) { 1035 index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1); 1036 } 1037 if (index > 0) { 1038 Path rootDir = getSidelineDir(); 1039 Path dst = new Path(rootDir, pathStr.substring(index + 1)); 1040 fs.mkdirs(dst.getParent()); 1041 LOG.info("Trying to sildeline reference file " 1042 + path + " to " + dst); 1043 setShouldRerun(); 1044 1045 success = fs.rename(path, dst); 1046 } 1047 if (!success) { 1048 LOG.error("Failed to sideline reference file " + path); 1049 } 1050 } 1051 } 1052 1053 /** 1054 * TODO -- need to add tests for this. 1055 */ reportEmptyMetaCells()1056 private void reportEmptyMetaCells() { 1057 errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " + 1058 emptyRegionInfoQualifiers.size()); 1059 if (details) { 1060 for (Result r: emptyRegionInfoQualifiers) { 1061 errors.print(" " + r); 1062 } 1063 } 1064 } 1065 1066 /** 1067 * TODO -- need to add tests for this. 1068 */ reportTablesInFlux()1069 private void reportTablesInFlux() { 1070 AtomicInteger numSkipped = new AtomicInteger(0); 1071 HTableDescriptor[] allTables = getTables(numSkipped); 1072 errors.print("Number of Tables: " + allTables.length); 1073 if (details) { 1074 if (numSkipped.get() > 0) { 1075 errors.detail("Number of Tables in flux: " + numSkipped.get()); 1076 } 1077 for (HTableDescriptor td : allTables) { 1078 errors.detail(" Table: " + td.getTableName() + "\t" + 1079 (td.isReadOnly() ? "ro" : "rw") + "\t" + 1080 (td.isMetaRegion() ? "META" : " ") + "\t" + 1081 " families: " + td.getFamilies().size()); 1082 } 1083 } 1084 } 1085 getErrors()1086 public ErrorReporter getErrors() { 1087 return errors; 1088 } 1089 1090 /** 1091 * Read the .regioninfo file from the file system. If there is no 1092 * .regioninfo, add it to the orphan hdfs region list. 1093 */ loadHdfsRegioninfo(HbckInfo hbi)1094 private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException { 1095 Path regionDir = hbi.getHdfsRegionDir(); 1096 if (regionDir == null) { 1097 LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry); 1098 return; 1099 } 1100 1101 if (hbi.hdfsEntry.hri != null) { 1102 // already loaded data 1103 return; 1104 } 1105 1106 FileSystem fs = FileSystem.get(getConf()); 1107 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir); 1108 LOG.debug("HRegionInfo read: " + hri.toString()); 1109 hbi.hdfsEntry.hri = hri; 1110 } 1111 1112 /** 1113 * Exception thrown when a integrity repair operation fails in an 1114 * unresolvable way. 1115 */ 1116 public static class RegionRepairException extends IOException { 1117 private static final long serialVersionUID = 1L; 1118 final IOException ioe; RegionRepairException(String s, IOException ioe)1119 public RegionRepairException(String s, IOException ioe) { 1120 super(s); 1121 this.ioe = ioe; 1122 } 1123 } 1124 1125 /** 1126 * Populate hbi's from regionInfos loaded from file system. 1127 */ loadHdfsRegionInfos()1128 private SortedMap<TableName, TableInfo> loadHdfsRegionInfos() 1129 throws IOException, InterruptedException { 1130 tablesInfo.clear(); // regenerating the data 1131 // generate region split structure 1132 Collection<HbckInfo> hbckInfos = regionInfoMap.values(); 1133 1134 // Parallelized read of .regioninfo files. 1135 List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size()); 1136 List<Future<Void>> hbiFutures; 1137 1138 for (HbckInfo hbi : hbckInfos) { 1139 WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors); 1140 hbis.add(work); 1141 } 1142 1143 // Submit and wait for completion 1144 hbiFutures = executor.invokeAll(hbis); 1145 1146 for(int i=0; i<hbiFutures.size(); i++) { 1147 WorkItemHdfsRegionInfo work = hbis.get(i); 1148 Future<Void> f = hbiFutures.get(i); 1149 try { 1150 f.get(); 1151 } catch(ExecutionException e) { 1152 LOG.warn("Failed to read .regioninfo file for region " + 1153 work.hbi.getRegionNameAsString(), e.getCause()); 1154 } 1155 } 1156 1157 Path hbaseRoot = FSUtils.getRootDir(getConf()); 1158 FileSystem fs = hbaseRoot.getFileSystem(getConf()); 1159 // serialized table info gathering. 1160 for (HbckInfo hbi: hbckInfos) { 1161 1162 if (hbi.getHdfsHRI() == null) { 1163 // was an orphan 1164 continue; 1165 } 1166 1167 1168 // get table name from hdfs, populate various HBaseFsck tables. 1169 TableName tableName = hbi.getTableName(); 1170 if (tableName == null) { 1171 // There was an entry in hbase:meta not in the HDFS? 1172 LOG.warn("tableName was null for: " + hbi); 1173 continue; 1174 } 1175 1176 TableInfo modTInfo = tablesInfo.get(tableName); 1177 if (modTInfo == null) { 1178 // only executed once per table. 1179 modTInfo = new TableInfo(tableName); 1180 tablesInfo.put(tableName, modTInfo); 1181 try { 1182 HTableDescriptor htd = 1183 FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName); 1184 modTInfo.htds.add(htd); 1185 } catch (IOException ioe) { 1186 if (!orphanTableDirs.containsKey(tableName)) { 1187 LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe); 1188 //should only report once for each table 1189 errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE, 1190 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName); 1191 Set<String> columns = new HashSet<String>(); 1192 orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi)); 1193 } 1194 } 1195 } 1196 if (!hbi.isSkipChecks()) { 1197 modTInfo.addRegionInfo(hbi); 1198 } 1199 } 1200 1201 loadTableInfosForTablesWithNoRegion(); 1202 errors.print(""); 1203 1204 return tablesInfo; 1205 } 1206 1207 /** 1208 * To get the column family list according to the column family dirs 1209 * @param columns 1210 * @param hbi 1211 * @return a set of column families 1212 * @throws IOException 1213 */ getColumnFamilyList(Set<String> columns, HbckInfo hbi)1214 private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException { 1215 Path regionDir = hbi.getHdfsRegionDir(); 1216 FileSystem fs = regionDir.getFileSystem(getConf()); 1217 FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs)); 1218 for (FileStatus subdir : subDirs) { 1219 String columnfamily = subdir.getPath().getName(); 1220 columns.add(columnfamily); 1221 } 1222 return columns; 1223 } 1224 1225 /** 1226 * To fabricate a .tableinfo file with following contents<br> 1227 * 1. the correct tablename <br> 1228 * 2. the correct colfamily list<br> 1229 * 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br> 1230 * @throws IOException 1231 */ fabricateTableInfo(FSTableDescriptors fstd, TableName tableName, Set<String> columns)1232 private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName, 1233 Set<String> columns) throws IOException { 1234 if (columns ==null || columns.isEmpty()) return false; 1235 HTableDescriptor htd = new HTableDescriptor(tableName); 1236 for (String columnfamimly : columns) { 1237 htd.addFamily(new HColumnDescriptor(columnfamimly)); 1238 } 1239 fstd.createTableDescriptor(htd, true); 1240 return true; 1241 } 1242 1243 /** 1244 * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br> 1245 * @throws IOException 1246 */ fixEmptyMetaCells()1247 public void fixEmptyMetaCells() throws IOException { 1248 if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) { 1249 LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows."); 1250 for (Result region : emptyRegionInfoQualifiers) { 1251 deleteMetaRegion(region.getRow()); 1252 errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL); 1253 } 1254 emptyRegionInfoQualifiers.clear(); 1255 } 1256 } 1257 1258 /** 1259 * To fix orphan table by creating a .tableinfo file under tableDir <br> 1260 * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br> 1261 * 2. else create a default .tableinfo file with following items<br> 1262 * 2.1 the correct tablename <br> 1263 * 2.2 the correct colfamily list<br> 1264 * 2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br> 1265 * @throws IOException 1266 */ fixOrphanTables()1267 public void fixOrphanTables() throws IOException { 1268 if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) { 1269 1270 List<TableName> tmpList = new ArrayList<TableName>(); 1271 tmpList.addAll(orphanTableDirs.keySet()); 1272 HTableDescriptor[] htds = getHTableDescriptors(tmpList); 1273 Iterator<Entry<TableName, Set<String>>> iter = 1274 orphanTableDirs.entrySet().iterator(); 1275 int j = 0; 1276 int numFailedCase = 0; 1277 FSTableDescriptors fstd = new FSTableDescriptors(getConf()); 1278 while (iter.hasNext()) { 1279 Entry<TableName, Set<String>> entry = 1280 iter.next(); 1281 TableName tableName = entry.getKey(); 1282 LOG.info("Trying to fix orphan table error: " + tableName); 1283 if (j < htds.length) { 1284 if (tableName.equals(htds[j].getTableName())) { 1285 HTableDescriptor htd = htds[j]; 1286 LOG.info("fixing orphan table: " + tableName + " from cache"); 1287 fstd.createTableDescriptor(htd, true); 1288 j++; 1289 iter.remove(); 1290 } 1291 } else { 1292 if (fabricateTableInfo(fstd, tableName, entry.getValue())) { 1293 LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file"); 1294 LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName); 1295 iter.remove(); 1296 } else { 1297 LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information"); 1298 numFailedCase++; 1299 } 1300 } 1301 fixes++; 1302 } 1303 1304 if (orphanTableDirs.isEmpty()) { 1305 // all orphanTableDirs are luckily recovered 1306 // re-run doFsck after recovering the .tableinfo file 1307 setShouldRerun(); 1308 LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed"); 1309 } else if (numFailedCase > 0) { 1310 LOG.error("Failed to fix " + numFailedCase 1311 + " OrphanTables with default .tableinfo files"); 1312 } 1313 1314 } 1315 //cleanup the list 1316 orphanTableDirs.clear(); 1317 1318 } 1319 1320 /** 1321 * This borrows code from MasterFileSystem.bootstrap() 1322 * 1323 * @return an open hbase:meta HRegion 1324 */ createNewMeta()1325 private HRegion createNewMeta() throws IOException { 1326 Path rootdir = FSUtils.getRootDir(getConf()); 1327 Configuration c = getConf(); 1328 HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO); 1329 HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME); 1330 MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false); 1331 HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor); 1332 MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true); 1333 return meta; 1334 } 1335 1336 /** 1337 * Generate set of puts to add to new meta. This expects the tables to be 1338 * clean with no overlaps or holes. If there are any problems it returns null. 1339 * 1340 * @return An array list of puts to do in bulk, null if tables have problems 1341 */ generatePuts( SortedMap<TableName, TableInfo> tablesInfo)1342 private ArrayList<Put> generatePuts( 1343 SortedMap<TableName, TableInfo> tablesInfo) throws IOException { 1344 ArrayList<Put> puts = new ArrayList<Put>(); 1345 boolean hasProblems = false; 1346 for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) { 1347 TableName name = e.getKey(); 1348 1349 // skip "hbase:meta" 1350 if (name.compareTo(TableName.META_TABLE_NAME) == 0) { 1351 continue; 1352 } 1353 1354 TableInfo ti = e.getValue(); 1355 for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap() 1356 .entrySet()) { 1357 Collection<HbckInfo> his = spl.getValue(); 1358 int sz = his.size(); 1359 if (sz != 1) { 1360 // problem 1361 LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey()) 1362 + " had " + sz + " regions instead of exactly 1." ); 1363 hasProblems = true; 1364 continue; 1365 } 1366 1367 // add the row directly to meta. 1368 HbckInfo hi = his.iterator().next(); 1369 HRegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry; 1370 Put p = MetaTableAccessor.makePutFromRegionInfo(hri); 1371 puts.add(p); 1372 } 1373 } 1374 return hasProblems ? null : puts; 1375 } 1376 1377 /** 1378 * Suggest fixes for each table 1379 */ suggestFixes( SortedMap<TableName, TableInfo> tablesInfo)1380 private void suggestFixes( 1381 SortedMap<TableName, TableInfo> tablesInfo) throws IOException { 1382 logParallelMerge(); 1383 for (TableInfo tInfo : tablesInfo.values()) { 1384 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors); 1385 tInfo.checkRegionChain(handler); 1386 } 1387 } 1388 1389 /** 1390 * Rebuilds meta from information in hdfs/fs. Depends on configuration 1391 * settings passed into hbck constructor to point to a particular fs/dir. 1392 * 1393 * @param fix flag that determines if method should attempt to fix holes 1394 * @return true if successful, false if attempt failed. 1395 */ rebuildMeta(boolean fix)1396 public boolean rebuildMeta(boolean fix) throws IOException, 1397 InterruptedException { 1398 1399 // TODO check to make sure hbase is offline. (or at least the table 1400 // currently being worked on is off line) 1401 1402 // Determine what's on HDFS 1403 LOG.info("Loading HBase regioninfo from HDFS..."); 1404 loadHdfsRegionDirs(); // populating regioninfo table. 1405 1406 int errs = errors.getErrorList().size(); 1407 tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs. 1408 checkHdfsIntegrity(false, false); 1409 1410 // make sure ok. 1411 if (errors.getErrorList().size() != errs) { 1412 // While in error state, iterate until no more fixes possible 1413 while(true) { 1414 fixes = 0; 1415 suggestFixes(tablesInfo); 1416 errors.clear(); 1417 loadHdfsRegionInfos(); // update tableInfos based on region info in fs. 1418 checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps()); 1419 1420 int errCount = errors.getErrorList().size(); 1421 1422 if (fixes == 0) { 1423 if (errCount > 0) { 1424 return false; // failed to fix problems. 1425 } else { 1426 break; // no fixes and no problems? drop out and fix stuff! 1427 } 1428 } 1429 } 1430 } 1431 1432 // we can rebuild, move old meta out of the way and start 1433 LOG.info("HDFS regioninfo's seems good. Sidelining old hbase:meta"); 1434 Path backupDir = sidelineOldMeta(); 1435 1436 LOG.info("Creating new hbase:meta"); 1437 HRegion meta = createNewMeta(); 1438 1439 // populate meta 1440 List<Put> puts = generatePuts(tablesInfo); 1441 if (puts == null) { 1442 LOG.fatal("Problem encountered when creating new hbase:meta entries. " + 1443 "You may need to restore the previously sidelined hbase:meta"); 1444 return false; 1445 } 1446 meta.batchMutate(puts.toArray(new Put[puts.size()])); 1447 HRegion.closeHRegion(meta); 1448 LOG.info("Success! hbase:meta table rebuilt."); 1449 LOG.info("Old hbase:meta is moved into " + backupDir); 1450 return true; 1451 } 1452 1453 /** 1454 * Log an appropriate message about whether or not overlapping merges are computed in parallel. 1455 */ logParallelMerge()1456 private void logParallelMerge() { 1457 if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) { 1458 LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" + 1459 " false to run serially."); 1460 } else { 1461 LOG.info("Handling overlap merges serially. set hbasefsck.overlap.merge.parallel to" + 1462 " true to run in parallel."); 1463 } 1464 } 1465 checkHdfsIntegrity(boolean fixHoles, boolean fixOverlaps)1466 private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles, 1467 boolean fixOverlaps) throws IOException { 1468 LOG.info("Checking HBase region split map from HDFS data..."); 1469 logParallelMerge(); 1470 for (TableInfo tInfo : tablesInfo.values()) { 1471 TableIntegrityErrorHandler handler; 1472 if (fixHoles || fixOverlaps) { 1473 handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(), 1474 fixHoles, fixOverlaps); 1475 } else { 1476 handler = tInfo.new IntegrityFixSuggester(tInfo, errors); 1477 } 1478 if (!tInfo.checkRegionChain(handler)) { 1479 // should dump info as well. 1480 errors.report("Found inconsistency in table " + tInfo.getName()); 1481 } 1482 } 1483 return tablesInfo; 1484 } 1485 getSidelineDir()1486 private Path getSidelineDir() throws IOException { 1487 if (sidelineDir == null) { 1488 Path hbaseDir = FSUtils.getRootDir(getConf()); 1489 Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME); 1490 sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-" 1491 + startMillis); 1492 } 1493 return sidelineDir; 1494 } 1495 1496 /** 1497 * Sideline a region dir (instead of deleting it) 1498 */ sidelineRegionDir(FileSystem fs, HbckInfo hi)1499 Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException { 1500 return sidelineRegionDir(fs, null, hi); 1501 } 1502 1503 /** 1504 * Sideline a region dir (instead of deleting it) 1505 * 1506 * @param parentDir if specified, the region will be sidelined to 1507 * folder like .../parentDir/<table name>/<region name>. The purpose 1508 * is to group together similar regions sidelined, for example, those 1509 * regions should be bulk loaded back later on. If null, it is ignored. 1510 */ sidelineRegionDir(FileSystem fs, String parentDir, HbckInfo hi)1511 Path sidelineRegionDir(FileSystem fs, 1512 String parentDir, HbckInfo hi) throws IOException { 1513 TableName tableName = hi.getTableName(); 1514 Path regionDir = hi.getHdfsRegionDir(); 1515 1516 if (!fs.exists(regionDir)) { 1517 LOG.warn("No previous " + regionDir + " exists. Continuing."); 1518 return null; 1519 } 1520 1521 Path rootDir = getSidelineDir(); 1522 if (parentDir != null) { 1523 rootDir = new Path(rootDir, parentDir); 1524 } 1525 Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName); 1526 Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName()); 1527 fs.mkdirs(sidelineRegionDir); 1528 boolean success = false; 1529 FileStatus[] cfs = fs.listStatus(regionDir); 1530 if (cfs == null) { 1531 LOG.info("Region dir is empty: " + regionDir); 1532 } else { 1533 for (FileStatus cf : cfs) { 1534 Path src = cf.getPath(); 1535 Path dst = new Path(sidelineRegionDir, src.getName()); 1536 if (fs.isFile(src)) { 1537 // simple file 1538 success = fs.rename(src, dst); 1539 if (!success) { 1540 String msg = "Unable to rename file " + src + " to " + dst; 1541 LOG.error(msg); 1542 throw new IOException(msg); 1543 } 1544 continue; 1545 } 1546 1547 // is a directory. 1548 fs.mkdirs(dst); 1549 1550 LOG.info("Sidelining files from " + src + " into containing region " + dst); 1551 // FileSystem.rename is inconsistent with directories -- if the 1552 // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir, 1553 // it moves the src into the dst dir resulting in (foo/a/b). If 1554 // the dst does not exist, and the src a dir, src becomes dst. (foo/b) 1555 FileStatus[] hfiles = fs.listStatus(src); 1556 if (hfiles != null && hfiles.length > 0) { 1557 for (FileStatus hfile : hfiles) { 1558 success = fs.rename(hfile.getPath(), dst); 1559 if (!success) { 1560 String msg = "Unable to rename file " + src + " to " + dst; 1561 LOG.error(msg); 1562 throw new IOException(msg); 1563 } 1564 } 1565 } 1566 LOG.debug("Sideline directory contents:"); 1567 debugLsr(sidelineRegionDir); 1568 } 1569 } 1570 1571 LOG.info("Removing old region dir: " + regionDir); 1572 success = fs.delete(regionDir, true); 1573 if (!success) { 1574 String msg = "Unable to delete dir " + regionDir; 1575 LOG.error(msg); 1576 throw new IOException(msg); 1577 } 1578 return sidelineRegionDir; 1579 } 1580 1581 /** 1582 * Side line an entire table. 1583 */ sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir, Path backupHbaseDir)1584 void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir, 1585 Path backupHbaseDir) throws IOException { 1586 Path tableDir = FSUtils.getTableDir(hbaseDir, tableName); 1587 if (fs.exists(tableDir)) { 1588 Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName); 1589 fs.mkdirs(backupTableDir.getParent()); 1590 boolean success = fs.rename(tableDir, backupTableDir); 1591 if (!success) { 1592 throw new IOException("Failed to move " + tableName + " from " 1593 + tableDir + " to " + backupTableDir); 1594 } 1595 } else { 1596 LOG.info("No previous " + tableName + " exists. Continuing."); 1597 } 1598 } 1599 1600 /** 1601 * @return Path to backup of original directory 1602 */ sidelineOldMeta()1603 Path sidelineOldMeta() throws IOException { 1604 // put current hbase:meta aside. 1605 Path hbaseDir = FSUtils.getRootDir(getConf()); 1606 FileSystem fs = hbaseDir.getFileSystem(getConf()); 1607 Path backupDir = getSidelineDir(); 1608 fs.mkdirs(backupDir); 1609 1610 try { 1611 sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir); 1612 } catch (IOException e) { 1613 LOG.fatal("... failed to sideline meta. Currently in inconsistent state. To restore " 1614 + "try to rename hbase:meta in " + backupDir.getName() + " to " 1615 + hbaseDir.getName() + ".", e); 1616 throw e; // throw original exception 1617 } 1618 return backupDir; 1619 } 1620 1621 /** 1622 * Load the list of disabled tables in ZK into local set. 1623 * @throws ZooKeeperConnectionException 1624 * @throws IOException 1625 */ loadDisabledTables()1626 private void loadDisabledTables() 1627 throws ZooKeeperConnectionException, IOException { 1628 HConnectionManager.execute(new HConnectable<Void>(getConf()) { 1629 @Override 1630 public Void connect(HConnection connection) throws IOException { 1631 ZooKeeperWatcher zkw = createZooKeeperWatcher(); 1632 try { 1633 for (TableName tableName : 1634 ZKTableStateClientSideReader.getDisabledOrDisablingTables(zkw)) { 1635 disabledTables.add(tableName); 1636 } 1637 } catch (KeeperException ke) { 1638 throw new IOException(ke); 1639 } catch (InterruptedException e) { 1640 throw new InterruptedIOException(); 1641 } finally { 1642 zkw.close(); 1643 } 1644 return null; 1645 } 1646 }); 1647 } 1648 1649 /** 1650 * Check if the specified region's table is disabled. 1651 */ isTableDisabled(HRegionInfo regionInfo)1652 private boolean isTableDisabled(HRegionInfo regionInfo) { 1653 return disabledTables.contains(regionInfo.getTable()); 1654 } 1655 1656 /** 1657 * Scan HDFS for all regions, recording their information into 1658 * regionInfoMap 1659 */ loadHdfsRegionDirs()1660 public void loadHdfsRegionDirs() throws IOException, InterruptedException { 1661 Path rootDir = FSUtils.getRootDir(getConf()); 1662 FileSystem fs = rootDir.getFileSystem(getConf()); 1663 1664 // list all tables from HDFS 1665 List<FileStatus> tableDirs = Lists.newArrayList(); 1666 1667 boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME)); 1668 1669 List<Path> paths = FSUtils.getTableDirs(fs, rootDir); 1670 for (Path path : paths) { 1671 TableName tableName = FSUtils.getTableName(path); 1672 if ((!checkMetaOnly && 1673 isTableIncluded(tableName)) || 1674 tableName.equals(TableName.META_TABLE_NAME)) { 1675 tableDirs.add(fs.getFileStatus(path)); 1676 } 1677 } 1678 1679 // verify that version file exists 1680 if (!foundVersionFile) { 1681 errors.reportError(ERROR_CODE.NO_VERSION_FILE, 1682 "Version file does not exist in root dir " + rootDir); 1683 if (shouldFixVersionFile()) { 1684 LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME 1685 + " file."); 1686 setShouldRerun(); 1687 FSUtils.setVersion(fs, rootDir, getConf().getInt( 1688 HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt( 1689 HConstants.VERSION_FILE_WRITE_ATTEMPTS, 1690 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS)); 1691 } 1692 } 1693 1694 // level 1: <HBASE_DIR>/* 1695 List<WorkItemHdfsDir> dirs = new ArrayList<WorkItemHdfsDir>(tableDirs.size()); 1696 List<Future<Void>> dirsFutures; 1697 1698 for (FileStatus tableDir : tableDirs) { 1699 LOG.debug("Loading region dirs from " +tableDir.getPath()); 1700 dirs.add(new WorkItemHdfsDir(this, fs, errors, tableDir)); 1701 } 1702 1703 // Invoke and wait for Callables to complete 1704 dirsFutures = executor.invokeAll(dirs); 1705 1706 for(Future<Void> f: dirsFutures) { 1707 try { 1708 f.get(); 1709 } catch(ExecutionException e) { 1710 LOG.warn("Could not load region dir " , e.getCause()); 1711 } 1712 } 1713 errors.print(""); 1714 } 1715 1716 /** 1717 * Record the location of the hbase:meta region as found in ZooKeeper. 1718 */ recordMetaRegion()1719 private boolean recordMetaRegion() throws IOException { 1720 RegionLocations rl = ((ClusterConnection)connection).locateRegion(TableName.META_TABLE_NAME, 1721 HConstants.EMPTY_START_ROW, false, false); 1722 if (rl == null) { 1723 errors.reportError(ERROR_CODE.NULL_META_REGION, 1724 "META region or some of its attributes are null."); 1725 return false; 1726 } 1727 for (HRegionLocation metaLocation : rl.getRegionLocations()) { 1728 // Check if Meta region is valid and existing 1729 if (metaLocation == null || metaLocation.getRegionInfo() == null || 1730 metaLocation.getHostname() == null) { 1731 errors.reportError(ERROR_CODE.NULL_META_REGION, 1732 "META region or some of its attributes are null."); 1733 return false; 1734 } 1735 ServerName sn = metaLocation.getServerName(); 1736 MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, EnvironmentEdgeManager.currentTime()); 1737 HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName()); 1738 if (hbckInfo == null) { 1739 regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m)); 1740 } else { 1741 hbckInfo.metaEntry = m; 1742 } 1743 } 1744 return true; 1745 } 1746 createZooKeeperWatcher()1747 private ZooKeeperWatcher createZooKeeperWatcher() throws IOException { 1748 return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() { 1749 @Override 1750 public void abort(String why, Throwable e) { 1751 LOG.error(why, e); 1752 System.exit(1); 1753 } 1754 1755 @Override 1756 public boolean isAborted() { 1757 return false; 1758 } 1759 1760 }); 1761 } 1762 getMetaRegionServerName(int replicaId)1763 private ServerName getMetaRegionServerName(int replicaId) 1764 throws IOException, KeeperException { 1765 ZooKeeperWatcher zkw = createZooKeeperWatcher(); 1766 ServerName sn = null; 1767 try { 1768 sn = new MetaTableLocator().getMetaRegionLocation(zkw, replicaId); 1769 } finally { 1770 zkw.close(); 1771 } 1772 return sn; 1773 } 1774 1775 /** 1776 * Contacts each regionserver and fetches metadata about regions. 1777 * @param regionServerList - the list of region servers to connect to 1778 * @throws IOException if a remote or network exception occurs 1779 */ processRegionServers(Collection<ServerName> regionServerList)1780 void processRegionServers(Collection<ServerName> regionServerList) 1781 throws IOException, InterruptedException { 1782 1783 List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size()); 1784 List<Future<Void>> workFutures; 1785 1786 // loop to contact each region server in parallel 1787 for (ServerName rsinfo: regionServerList) { 1788 workItems.add(new WorkItemRegion(this, rsinfo, errors, connection)); 1789 } 1790 1791 workFutures = executor.invokeAll(workItems); 1792 1793 for(int i=0; i<workFutures.size(); i++) { 1794 WorkItemRegion item = workItems.get(i); 1795 Future<Void> f = workFutures.get(i); 1796 try { 1797 f.get(); 1798 } catch(ExecutionException e) { 1799 LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(), 1800 e.getCause()); 1801 } 1802 } 1803 } 1804 1805 /** 1806 * Check consistency of all regions that have been found in previous phases. 1807 */ checkAndFixConsistency()1808 private void checkAndFixConsistency() 1809 throws IOException, KeeperException, InterruptedException { 1810 // Divide the checks in two phases. One for default/primary replicas and another 1811 // for the non-primary ones. Keeps code cleaner this way. 1812 List<CheckRegionConsistencyWorkItem> workItems = 1813 new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size()); 1814 for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) { 1815 if (e.getValue().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) { 1816 workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue())); 1817 } 1818 } 1819 checkRegionConsistencyConcurrently(workItems); 1820 1821 boolean prevHdfsCheck = shouldCheckHdfs(); 1822 setCheckHdfs(false); //replicas don't have any hdfs data 1823 // Run a pass over the replicas and fix any assignment issues that exist on the currently 1824 // deployed/undeployed replicas. 1825 List<CheckRegionConsistencyWorkItem> replicaWorkItems = 1826 new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size()); 1827 for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) { 1828 if (e.getValue().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) { 1829 replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue())); 1830 } 1831 } 1832 checkRegionConsistencyConcurrently(replicaWorkItems); 1833 setCheckHdfs(prevHdfsCheck); 1834 1835 // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might 1836 // not get accurate state of the hbase if continuing. The config here allows users to tune 1837 // the tolerance of number of skipped region. 1838 // TODO: evaluate the consequence to continue the hbck operation without config. 1839 int terminateThreshold = getConf().getInt("hbase.hbck.skipped.regions.limit", 0); 1840 int numOfSkippedRegions = skippedRegions.size(); 1841 if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) { 1842 throw new IOException(numOfSkippedRegions 1843 + " region(s) could not be checked or repaired. See logs for detail."); 1844 } 1845 } 1846 1847 /** 1848 * Check consistency of all regions using mulitple threads concurrently. 1849 */ checkRegionConsistencyConcurrently( final List<CheckRegionConsistencyWorkItem> workItems)1850 private void checkRegionConsistencyConcurrently( 1851 final List<CheckRegionConsistencyWorkItem> workItems) 1852 throws IOException, KeeperException, InterruptedException { 1853 if (workItems.isEmpty()) { 1854 return; // nothing to check 1855 } 1856 1857 List<Future<Void>> workFutures = executor.invokeAll(workItems); 1858 for(Future<Void> f: workFutures) { 1859 try { 1860 f.get(); 1861 } catch(ExecutionException e1) { 1862 LOG.warn("Could not check region consistency " , e1.getCause()); 1863 if (e1.getCause() instanceof IOException) { 1864 throw (IOException)e1.getCause(); 1865 } else if (e1.getCause() instanceof KeeperException) { 1866 throw (KeeperException)e1.getCause(); 1867 } else if (e1.getCause() instanceof InterruptedException) { 1868 throw (InterruptedException)e1.getCause(); 1869 } else { 1870 throw new IOException(e1.getCause()); 1871 } 1872 } 1873 } 1874 } 1875 1876 class CheckRegionConsistencyWorkItem implements Callable<Void> { 1877 private final String key; 1878 private final HbckInfo hbi; 1879 CheckRegionConsistencyWorkItem(String key, HbckInfo hbi)1880 CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) { 1881 this.key = key; 1882 this.hbi = hbi; 1883 } 1884 1885 @Override call()1886 public synchronized Void call() throws Exception { 1887 try { 1888 checkRegionConsistency(key, hbi); 1889 } catch (Exception e) { 1890 // If the region is non-META region, skip this region and send warning/error message; if 1891 // the region is META region, we should not continue. 1892 LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString() 1893 + "'.", e); 1894 if (hbi.getHdfsHRI().isMetaRegion()) { 1895 throw e; 1896 } 1897 LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'"); 1898 addSkippedRegion(hbi); 1899 } 1900 return null; 1901 } 1902 } 1903 addSkippedRegion(final HbckInfo hbi)1904 private void addSkippedRegion(final HbckInfo hbi) { 1905 Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName()); 1906 if (skippedRegionNames == null) { 1907 skippedRegionNames = new HashSet<String>(); 1908 } 1909 skippedRegionNames.add(hbi.getRegionNameAsString()); 1910 skippedRegions.put(hbi.getTableName(), skippedRegionNames); 1911 } 1912 preCheckPermission()1913 private void preCheckPermission() throws IOException, AccessDeniedException { 1914 if (shouldIgnorePreCheckPermission()) { 1915 return; 1916 } 1917 1918 Path hbaseDir = FSUtils.getRootDir(getConf()); 1919 FileSystem fs = hbaseDir.getFileSystem(getConf()); 1920 UserProvider userProvider = UserProvider.instantiate(getConf()); 1921 UserGroupInformation ugi = userProvider.getCurrent().getUGI(); 1922 FileStatus[] files = fs.listStatus(hbaseDir); 1923 for (FileStatus file : files) { 1924 try { 1925 FSUtils.checkAccess(ugi, file, FsAction.WRITE); 1926 } catch (AccessDeniedException ace) { 1927 LOG.warn("Got AccessDeniedException when preCheckPermission ", ace); 1928 errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName() 1929 + " does not have write perms to " + file.getPath() 1930 + ". Please rerun hbck as hdfs user " + file.getOwner()); 1931 throw ace; 1932 } 1933 } 1934 } 1935 1936 /** 1937 * Deletes region from meta table 1938 */ deleteMetaRegion(HbckInfo hi)1939 private void deleteMetaRegion(HbckInfo hi) throws IOException { 1940 deleteMetaRegion(hi.metaEntry.getRegionName()); 1941 } 1942 1943 /** 1944 * Deletes region from meta table 1945 */ deleteMetaRegion(byte[] metaKey)1946 private void deleteMetaRegion(byte[] metaKey) throws IOException { 1947 Delete d = new Delete(metaKey); 1948 meta.delete(d); 1949 LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" ); 1950 } 1951 1952 /** 1953 * Reset the split parent region info in meta table 1954 */ resetSplitParent(HbckInfo hi)1955 private void resetSplitParent(HbckInfo hi) throws IOException { 1956 RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName()); 1957 Delete d = new Delete(hi.metaEntry.getRegionName()); 1958 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER); 1959 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER); 1960 mutations.add(d); 1961 1962 HRegionInfo hri = new HRegionInfo(hi.metaEntry); 1963 hri.setOffline(false); 1964 hri.setSplit(false); 1965 Put p = MetaTableAccessor.makePutFromRegionInfo(hri); 1966 mutations.add(p); 1967 1968 meta.mutateRow(mutations); 1969 LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" ); 1970 } 1971 1972 /** 1973 * This backwards-compatibility wrapper for permanently offlining a region 1974 * that should not be alive. If the region server does not support the 1975 * "offline" method, it will use the closest unassign method instead. This 1976 * will basically work until one attempts to disable or delete the affected 1977 * table. The problem has to do with in-memory only master state, so 1978 * restarting the HMaster or failing over to another should fix this. 1979 */ offline(byte[] regionName)1980 private void offline(byte[] regionName) throws IOException { 1981 String regionString = Bytes.toStringBinary(regionName); 1982 if (!rsSupportsOffline) { 1983 LOG.warn("Using unassign region " + regionString 1984 + " instead of using offline method, you should" 1985 + " restart HMaster after these repairs"); 1986 admin.unassign(regionName, true); 1987 return; 1988 } 1989 1990 // first time we assume the rs's supports #offline. 1991 try { 1992 LOG.info("Offlining region " + regionString); 1993 admin.offline(regionName); 1994 } catch (IOException ioe) { 1995 String notFoundMsg = "java.lang.NoSuchMethodException: " + 1996 "org.apache.hadoop.hbase.master.HMaster.offline([B)"; 1997 if (ioe.getMessage().contains(notFoundMsg)) { 1998 LOG.warn("Using unassign region " + regionString 1999 + " instead of using offline method, you should" 2000 + " restart HMaster after these repairs"); 2001 rsSupportsOffline = false; // in the future just use unassign 2002 admin.unassign(regionName, true); 2003 return; 2004 } 2005 throw ioe; 2006 } 2007 } 2008 undeployRegions(HbckInfo hi)2009 private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException { 2010 undeployRegionsForHbi(hi); 2011 // undeploy replicas of the region (but only if the method is invoked for the primary) 2012 if (hi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) { 2013 return; 2014 } 2015 int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication(); 2016 for (int i = 1; i < numReplicas; i++) { 2017 if (hi.getPrimaryHRIForDeployedReplica() == null) continue; 2018 HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica( 2019 hi.getPrimaryHRIForDeployedReplica(), i); 2020 HbckInfo h = regionInfoMap.get(hri.getEncodedName()); 2021 if (h != null) { 2022 undeployRegionsForHbi(h); 2023 //set skip checks; we undeployed it, and we don't want to evaluate this anymore 2024 //in consistency checks 2025 h.setSkipChecks(true); 2026 } 2027 } 2028 } 2029 undeployRegionsForHbi(HbckInfo hi)2030 private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException { 2031 for (OnlineEntry rse : hi.deployedEntries) { 2032 LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa); 2033 try { 2034 HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri); 2035 offline(rse.hri.getRegionName()); 2036 } catch (IOException ioe) { 2037 LOG.warn("Got exception when attempting to offline region " 2038 + Bytes.toString(rse.hri.getRegionName()), ioe); 2039 } 2040 } 2041 } 2042 2043 /** 2044 * Attempts to undeploy a region from a region server based in information in 2045 * META. Any operations that modify the file system should make sure that 2046 * its corresponding region is not deployed to prevent data races. 2047 * 2048 * A separate call is required to update the master in-memory region state 2049 * kept in the AssignementManager. Because disable uses this state instead of 2050 * that found in META, we can't seem to cleanly disable/delete tables that 2051 * have been hbck fixed. When used on a version of HBase that does not have 2052 * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master 2053 * restart or failover may be required. 2054 */ closeRegion(HbckInfo hi)2055 private void closeRegion(HbckInfo hi) throws IOException, InterruptedException { 2056 if (hi.metaEntry == null && hi.hdfsEntry == null) { 2057 undeployRegions(hi); 2058 return; 2059 } 2060 2061 // get assignment info and hregioninfo from meta. 2062 Get get = new Get(hi.getRegionName()); 2063 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); 2064 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER); 2065 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER); 2066 // also get the locations of the replicas to close if the primary region is being closed 2067 if (hi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) { 2068 int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication(); 2069 for (int i = 0; i < numReplicas; i++) { 2070 get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i)); 2071 get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i)); 2072 } 2073 } 2074 Result r = meta.get(get); 2075 RegionLocations rl = MetaTableAccessor.getRegionLocations(r); 2076 if (rl == null) { 2077 LOG.warn("Unable to close region " + hi.getRegionNameAsString() + 2078 " since meta does not have handle to reach it"); 2079 return; 2080 } 2081 for (HRegionLocation h : rl.getRegionLocations()) { 2082 ServerName serverName = h.getServerName(); 2083 if (serverName == null) { 2084 errors.reportError("Unable to close region " 2085 + hi.getRegionNameAsString() + " because meta does not " 2086 + "have handle to reach it."); 2087 continue; 2088 } 2089 HRegionInfo hri = h.getRegionInfo(); 2090 if (hri == null) { 2091 LOG.warn("Unable to close region " + hi.getRegionNameAsString() 2092 + " because hbase:meta had invalid or missing " 2093 + HConstants.CATALOG_FAMILY_STR + ":" 2094 + Bytes.toString(HConstants.REGIONINFO_QUALIFIER) 2095 + " qualifier value."); 2096 continue; 2097 } 2098 // close the region -- close files and remove assignment 2099 HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri); 2100 } 2101 } 2102 tryAssignmentRepair(HbckInfo hbi, String msg)2103 private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException, 2104 KeeperException, InterruptedException { 2105 // If we are trying to fix the errors 2106 if (shouldFixAssignments()) { 2107 errors.print(msg); 2108 undeployRegions(hbi); 2109 setShouldRerun(); 2110 HRegionInfo hri = hbi.getHdfsHRI(); 2111 if (hri == null) { 2112 hri = hbi.metaEntry; 2113 } 2114 HBaseFsckRepair.fixUnassigned(admin, hri); 2115 HBaseFsckRepair.waitUntilAssigned(admin, hri); 2116 2117 // also assign replicas if needed (do it only when this call operates on a primary replica) 2118 if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) return; 2119 int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication(); 2120 for (int i = 1; i < replicationCount; i++) { 2121 hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i); 2122 HbckInfo h = regionInfoMap.get(hri.getEncodedName()); 2123 if (h != null) { 2124 undeployRegions(h); 2125 //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore 2126 //in consistency checks 2127 h.setSkipChecks(true); 2128 } 2129 HBaseFsckRepair.fixUnassigned(admin, hri); 2130 HBaseFsckRepair.waitUntilAssigned(admin, hri); 2131 } 2132 2133 } 2134 } 2135 2136 /** 2137 * Check a single region for consistency and correct deployment. 2138 */ checkRegionConsistency(final String key, final HbckInfo hbi)2139 private void checkRegionConsistency(final String key, final HbckInfo hbi) 2140 throws IOException, KeeperException, InterruptedException { 2141 2142 if (hbi.isSkipChecks()) return; 2143 String descriptiveName = hbi.toString(); 2144 boolean inMeta = hbi.metaEntry != null; 2145 // In case not checking HDFS, assume the region is on HDFS 2146 boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null; 2147 boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null; 2148 boolean isDeployed = !hbi.deployedOn.isEmpty(); 2149 boolean isMultiplyDeployed = hbi.deployedOn.size() > 1; 2150 boolean deploymentMatchesMeta = 2151 hasMetaAssignment && isDeployed && !isMultiplyDeployed && 2152 hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0)); 2153 boolean splitParent = 2154 (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline(); 2155 boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry); 2156 boolean recentlyModified = inHdfs && 2157 hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime(); 2158 2159 // ========== First the healthy cases ============= 2160 if (hbi.containsOnlyHdfsEdits()) { 2161 return; 2162 } 2163 if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) { 2164 return; 2165 } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) { 2166 LOG.info("Region " + descriptiveName + " is in META, and in a disabled " + 2167 "tabled that is not deployed"); 2168 return; 2169 } else if (recentlyModified) { 2170 LOG.warn("Region " + descriptiveName + " was recently modified -- skipping"); 2171 return; 2172 } 2173 // ========== Cases where the region is not in hbase:meta ============= 2174 else if (!inMeta && !inHdfs && !isDeployed) { 2175 // We shouldn't have record of this region at all then! 2176 assert false : "Entry for region with no data"; 2177 } else if (!inMeta && !inHdfs && isDeployed) { 2178 errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region " 2179 + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " + 2180 "deployed on " + Joiner.on(", ").join(hbi.deployedOn)); 2181 if (shouldFixAssignments()) { 2182 undeployRegions(hbi); 2183 } 2184 2185 } else if (!inMeta && inHdfs && !isDeployed) { 2186 if (hbi.isMerged()) { 2187 // This region has already been merged, the remaining hdfs file will be 2188 // cleaned by CatalogJanitor later 2189 hbi.setSkipChecks(true); 2190 LOG.info("Region " + descriptiveName 2191 + " got merge recently, its file(s) will be cleaned by CatalogJanitor later"); 2192 return; 2193 } 2194 errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region " 2195 + descriptiveName + " on HDFS, but not listed in hbase:meta " + 2196 "or deployed on any region server"); 2197 // restore region consistency of an adopted orphan 2198 if (shouldFixMeta()) { 2199 if (!hbi.isHdfsRegioninfoPresent()) { 2200 LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired" 2201 + " in table integrity repair phase if -fixHdfsOrphans was" + 2202 " used."); 2203 return; 2204 } 2205 2206 HRegionInfo hri = hbi.getHdfsHRI(); 2207 TableInfo tableInfo = tablesInfo.get(hri.getTable()); 2208 2209 for (HRegionInfo region : tableInfo.getRegionsFromMeta()) { 2210 if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0 2211 && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(), 2212 hri.getEndKey()) >= 0) 2213 && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) { 2214 if(region.isSplit() || region.isOffline()) continue; 2215 Path regionDir = hbi.getHdfsRegionDir(); 2216 FileSystem fs = regionDir.getFileSystem(getConf()); 2217 List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir); 2218 for (Path familyDir : familyDirs) { 2219 List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir); 2220 for (Path referenceFilePath : referenceFilePaths) { 2221 Path parentRegionDir = 2222 StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent(); 2223 if (parentRegionDir.toString().endsWith(region.getEncodedName())) { 2224 LOG.warn(hri + " start and stop keys are in the range of " + region 2225 + ". The region might not be cleaned up from hdfs when region " + region 2226 + " split failed. Hence deleting from hdfs."); 2227 HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs, 2228 regionDir.getParent(), hri); 2229 return; 2230 } 2231 } 2232 } 2233 } 2234 } 2235 2236 LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI()); 2237 int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication(); 2238 HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(), 2239 admin.getClusterStatus().getServers(), numReplicas); 2240 2241 tryAssignmentRepair(hbi, "Trying to reassign region..."); 2242 } 2243 2244 } else if (!inMeta && inHdfs && isDeployed) { 2245 errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName 2246 + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn)); 2247 debugLsr(hbi.getHdfsRegionDir()); 2248 if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) { 2249 // for replicas, this means that we should undeploy the region (we would have 2250 // gone over the primaries and fixed meta holes in first phase under 2251 // checkAndFixConsistency; we shouldn't get the condition !inMeta at 2252 // this stage unless unwanted replica) 2253 if (shouldFixAssignments()) { 2254 undeployRegionsForHbi(hbi); 2255 } 2256 } 2257 if (shouldFixMeta() && hbi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) { 2258 if (!hbi.isHdfsRegioninfoPresent()) { 2259 LOG.error("This should have been repaired in table integrity repair phase"); 2260 return; 2261 } 2262 2263 LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI()); 2264 int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication(); 2265 HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(), 2266 admin.getClusterStatus().getServers(), numReplicas); 2267 tryAssignmentRepair(hbi, "Trying to fix unassigned region..."); 2268 } 2269 2270 // ========== Cases where the region is in hbase:meta ============= 2271 } else if (inMeta && inHdfs && !isDeployed && splitParent) { 2272 // check whether this is an actual error, or just transient state where parent 2273 // is not cleaned 2274 if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) { 2275 // check that split daughters are there 2276 HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName()); 2277 HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName()); 2278 if (infoA != null && infoB != null) { 2279 // we already processed or will process daughters. Move on, nothing to see here. 2280 hbi.setSkipChecks(true); 2281 return; 2282 } 2283 } 2284 errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region " 2285 + descriptiveName + " is a split parent in META, in HDFS, " 2286 + "and not deployed on any region server. This could be transient."); 2287 if (shouldFixSplitParents()) { 2288 setShouldRerun(); 2289 resetSplitParent(hbi); 2290 } 2291 } else if (inMeta && !inHdfs && !isDeployed) { 2292 errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region " 2293 + descriptiveName + " found in META, but not in HDFS " 2294 + "or deployed on any region server."); 2295 if (shouldFixMeta()) { 2296 deleteMetaRegion(hbi); 2297 } 2298 } else if (inMeta && !inHdfs && isDeployed) { 2299 errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName 2300 + " found in META, but not in HDFS, " + 2301 "and deployed on " + Joiner.on(", ").join(hbi.deployedOn)); 2302 // We treat HDFS as ground truth. Any information in meta is transient 2303 // and equivalent data can be regenerated. So, lets unassign and remove 2304 // these problems from META. 2305 if (shouldFixAssignments()) { 2306 errors.print("Trying to fix unassigned region..."); 2307 undeployRegions(hbi); 2308 } 2309 if (shouldFixMeta()) { 2310 // wait for it to complete 2311 deleteMetaRegion(hbi); 2312 } 2313 } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) { 2314 errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName 2315 + " not deployed on any region server."); 2316 tryAssignmentRepair(hbi, "Trying to fix unassigned region..."); 2317 } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) { 2318 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, 2319 "Region " + descriptiveName + " should not be deployed according " + 2320 "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn)); 2321 if (shouldFixAssignments()) { 2322 errors.print("Trying to close the region " + descriptiveName); 2323 setShouldRerun(); 2324 HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn); 2325 } 2326 } else if (inMeta && inHdfs && isMultiplyDeployed) { 2327 errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName 2328 + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer 2329 + " but is multiply assigned to region servers " + 2330 Joiner.on(", ").join(hbi.deployedOn)); 2331 // If we are trying to fix the errors 2332 if (shouldFixAssignments()) { 2333 errors.print("Trying to fix assignment error..."); 2334 setShouldRerun(); 2335 HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn); 2336 } 2337 } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) { 2338 errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region " 2339 + descriptiveName + " listed in hbase:meta on region server " + 2340 hbi.metaEntry.regionServer + " but found on region server " + 2341 hbi.deployedOn.get(0)); 2342 // If we are trying to fix the errors 2343 if (shouldFixAssignments()) { 2344 errors.print("Trying to fix assignment error..."); 2345 setShouldRerun(); 2346 HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn); 2347 HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI()); 2348 } 2349 } else { 2350 errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName + 2351 " is in an unforeseen state:" + 2352 " inMeta=" + inMeta + 2353 " inHdfs=" + inHdfs + 2354 " isDeployed=" + isDeployed + 2355 " isMultiplyDeployed=" + isMultiplyDeployed + 2356 " deploymentMatchesMeta=" + deploymentMatchesMeta + 2357 " shouldBeDeployed=" + shouldBeDeployed); 2358 } 2359 } 2360 2361 /** 2362 * Checks tables integrity. Goes over all regions and scans the tables. 2363 * Collects all the pieces for each table and checks if there are missing, 2364 * repeated or overlapping ones. 2365 * @throws IOException 2366 */ checkIntegrity()2367 SortedMap<TableName, TableInfo> checkIntegrity() throws IOException { 2368 tablesInfo = new TreeMap<TableName,TableInfo> (); 2369 LOG.debug("There are " + regionInfoMap.size() + " region info entries"); 2370 for (HbckInfo hbi : regionInfoMap.values()) { 2371 // Check only valid, working regions 2372 if (hbi.metaEntry == null) { 2373 // this assumes that consistency check has run loadMetaEntry 2374 Path p = hbi.getHdfsRegionDir(); 2375 if (p == null) { 2376 errors.report("No regioninfo in Meta or HDFS. " + hbi); 2377 } 2378 2379 // TODO test. 2380 continue; 2381 } 2382 if (hbi.metaEntry.regionServer == null) { 2383 errors.detail("Skipping region because no region server: " + hbi); 2384 continue; 2385 } 2386 if (hbi.metaEntry.isOffline()) { 2387 errors.detail("Skipping region because it is offline: " + hbi); 2388 continue; 2389 } 2390 if (hbi.containsOnlyHdfsEdits()) { 2391 errors.detail("Skipping region because it only contains edits" + hbi); 2392 continue; 2393 } 2394 2395 // Missing regionDir or over-deployment is checked elsewhere. Include 2396 // these cases in modTInfo, so we can evaluate those regions as part of 2397 // the region chain in META 2398 //if (hbi.foundRegionDir == null) continue; 2399 //if (hbi.deployedOn.size() != 1) continue; 2400 if (hbi.deployedOn.size() == 0) continue; 2401 2402 // We should be safe here 2403 TableName tableName = hbi.metaEntry.getTable(); 2404 TableInfo modTInfo = tablesInfo.get(tableName); 2405 if (modTInfo == null) { 2406 modTInfo = new TableInfo(tableName); 2407 } 2408 for (ServerName server : hbi.deployedOn) { 2409 modTInfo.addServer(server); 2410 } 2411 2412 if (!hbi.isSkipChecks()) { 2413 modTInfo.addRegionInfo(hbi); 2414 } 2415 2416 tablesInfo.put(tableName, modTInfo); 2417 } 2418 2419 loadTableInfosForTablesWithNoRegion(); 2420 2421 logParallelMerge(); 2422 for (TableInfo tInfo : tablesInfo.values()) { 2423 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors); 2424 if (!tInfo.checkRegionChain(handler)) { 2425 errors.report("Found inconsistency in table " + tInfo.getName()); 2426 } 2427 } 2428 return tablesInfo; 2429 } 2430 2431 /** Loads table info's for tables that may not have been included, since there are no 2432 * regions reported for the table, but table dir is there in hdfs 2433 */ loadTableInfosForTablesWithNoRegion()2434 private void loadTableInfosForTablesWithNoRegion() throws IOException { 2435 Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll(); 2436 for (HTableDescriptor htd : allTables.values()) { 2437 if (checkMetaOnly && !htd.isMetaTable()) { 2438 continue; 2439 } 2440 2441 TableName tableName = htd.getTableName(); 2442 if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) { 2443 TableInfo tableInfo = new TableInfo(tableName); 2444 tableInfo.htds.add(htd); 2445 tablesInfo.put(htd.getTableName(), tableInfo); 2446 } 2447 } 2448 } 2449 2450 /** 2451 * Merge hdfs data by moving from contained HbckInfo into targetRegionDir. 2452 * @return number of file move fixes done to merge regions. 2453 */ mergeRegionDirs(Path targetRegionDir, HbckInfo contained)2454 public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException { 2455 int fileMoves = 0; 2456 String thread = Thread.currentThread().getName(); 2457 LOG.debug("[" + thread + "] Contained region dir after close and pause"); 2458 debugLsr(contained.getHdfsRegionDir()); 2459 2460 // rename the contained into the container. 2461 FileSystem fs = targetRegionDir.getFileSystem(getConf()); 2462 FileStatus[] dirs = null; 2463 try { 2464 dirs = fs.listStatus(contained.getHdfsRegionDir()); 2465 } catch (FileNotFoundException fnfe) { 2466 // region we are attempting to merge in is not present! Since this is a merge, there is 2467 // no harm skipping this region if it does not exist. 2468 if (!fs.exists(contained.getHdfsRegionDir())) { 2469 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir() 2470 + " is missing. Assuming already sidelined or moved."); 2471 } else { 2472 sidelineRegionDir(fs, contained); 2473 } 2474 return fileMoves; 2475 } 2476 2477 if (dirs == null) { 2478 if (!fs.exists(contained.getHdfsRegionDir())) { 2479 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir() 2480 + " already sidelined."); 2481 } else { 2482 sidelineRegionDir(fs, contained); 2483 } 2484 return fileMoves; 2485 } 2486 2487 for (FileStatus cf : dirs) { 2488 Path src = cf.getPath(); 2489 Path dst = new Path(targetRegionDir, src.getName()); 2490 2491 if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) { 2492 // do not copy the old .regioninfo file. 2493 continue; 2494 } 2495 2496 if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) { 2497 // do not copy the .oldlogs files 2498 continue; 2499 } 2500 2501 LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst); 2502 // FileSystem.rename is inconsistent with directories -- if the 2503 // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir, 2504 // it moves the src into the dst dir resulting in (foo/a/b). If 2505 // the dst does not exist, and the src a dir, src becomes dst. (foo/b) 2506 for (FileStatus hfile : fs.listStatus(src)) { 2507 boolean success = fs.rename(hfile.getPath(), dst); 2508 if (success) { 2509 fileMoves++; 2510 } 2511 } 2512 LOG.debug("[" + thread + "] Sideline directory contents:"); 2513 debugLsr(targetRegionDir); 2514 } 2515 2516 // if all success. 2517 sidelineRegionDir(fs, contained); 2518 LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " + 2519 getSidelineDir()); 2520 debugLsr(contained.getHdfsRegionDir()); 2521 2522 return fileMoves; 2523 } 2524 2525 2526 static class WorkItemOverlapMerge implements Callable<Void> { 2527 private TableIntegrityErrorHandler handler; 2528 Collection<HbckInfo> overlapgroup; 2529 WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler)2530 WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) { 2531 this.handler = handler; 2532 this.overlapgroup = overlapgroup; 2533 } 2534 2535 @Override call()2536 public Void call() throws Exception { 2537 handler.handleOverlapGroup(overlapgroup); 2538 return null; 2539 } 2540 }; 2541 2542 2543 /** 2544 * Maintain information about a particular table. 2545 */ 2546 public class TableInfo { 2547 TableName tableName; 2548 TreeSet <ServerName> deployedOn; 2549 2550 // backwards regions 2551 final List<HbckInfo> backwards = new ArrayList<HbckInfo>(); 2552 2553 // sidelined big overlapped regions 2554 final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>(); 2555 2556 // region split calculator 2557 final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp); 2558 2559 // Histogram of different HTableDescriptors found. Ideally there is only one! 2560 final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>(); 2561 2562 // key = start split, values = set of splits in problem group 2563 final Multimap<byte[], HbckInfo> overlapGroups = 2564 TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp); 2565 2566 // list of regions derived from meta entries. 2567 private ImmutableList<HRegionInfo> regionsFromMeta = null; 2568 TableInfo(TableName name)2569 TableInfo(TableName name) { 2570 this.tableName = name; 2571 deployedOn = new TreeSet <ServerName>(); 2572 } 2573 2574 /** 2575 * @return descriptor common to all regions. null if are none or multiple! 2576 */ getHTD()2577 private HTableDescriptor getHTD() { 2578 if (htds.size() == 1) { 2579 return (HTableDescriptor)htds.toArray()[0]; 2580 } else { 2581 LOG.error("None/Multiple table descriptors found for table '" 2582 + tableName + "' regions: " + htds); 2583 } 2584 return null; 2585 } 2586 addRegionInfo(HbckInfo hir)2587 public void addRegionInfo(HbckInfo hir) { 2588 if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) { 2589 // end key is absolute end key, just add it. 2590 // ignore replicas other than primary for these checks 2591 if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir); 2592 return; 2593 } 2594 2595 // if not the absolute end key, check for cycle 2596 if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) { 2597 errors.reportError( 2598 ERROR_CODE.REGION_CYCLE, 2599 String.format("The endkey for this region comes before the " 2600 + "startkey, startkey=%s, endkey=%s", 2601 Bytes.toStringBinary(hir.getStartKey()), 2602 Bytes.toStringBinary(hir.getEndKey())), this, hir); 2603 backwards.add(hir); 2604 return; 2605 } 2606 2607 // main case, add to split calculator 2608 // ignore replicas other than primary for these checks 2609 if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir); 2610 } 2611 addServer(ServerName server)2612 public void addServer(ServerName server) { 2613 this.deployedOn.add(server); 2614 } 2615 getName()2616 public TableName getName() { 2617 return tableName; 2618 } 2619 getNumRegions()2620 public int getNumRegions() { 2621 return sc.getStarts().size() + backwards.size(); 2622 } 2623 getRegionsFromMeta()2624 public synchronized ImmutableList<HRegionInfo> getRegionsFromMeta() { 2625 // lazy loaded, synchronized to ensure a single load 2626 if (regionsFromMeta == null) { 2627 List<HRegionInfo> regions = new ArrayList<HRegionInfo>(); 2628 for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) { 2629 if (tableName.equals(h.getTableName())) { 2630 if (h.metaEntry != null) { 2631 regions.add((HRegionInfo) h.metaEntry); 2632 } 2633 } 2634 } 2635 regionsFromMeta = Ordering.natural().immutableSortedCopy(regions); 2636 } 2637 2638 return regionsFromMeta; 2639 } 2640 2641 2642 private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl { 2643 ErrorReporter errors; 2644 IntegrityFixSuggester(TableInfo ti, ErrorReporter errors)2645 IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) { 2646 this.errors = errors; 2647 setTableInfo(ti); 2648 } 2649 2650 @Override handleRegionStartKeyNotEmpty(HbckInfo hi)2651 public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{ 2652 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY, 2653 "First region should start with an empty key. You need to " 2654 + " create a new region and regioninfo in HDFS to plug the hole.", 2655 getTableInfo(), hi); 2656 } 2657 2658 @Override handleRegionEndKeyNotEmpty(byte[] curEndKey)2659 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException { 2660 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY, 2661 "Last region should end with an empty key. You need to " 2662 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo()); 2663 } 2664 2665 @Override handleDegenerateRegion(HbckInfo hi)2666 public void handleDegenerateRegion(HbckInfo hi) throws IOException{ 2667 errors.reportError(ERROR_CODE.DEGENERATE_REGION, 2668 "Region has the same start and end key.", getTableInfo(), hi); 2669 } 2670 2671 @Override handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2)2672 public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{ 2673 byte[] key = r1.getStartKey(); 2674 // dup start key 2675 errors.reportError(ERROR_CODE.DUPE_STARTKEYS, 2676 "Multiple regions have the same startkey: " 2677 + Bytes.toStringBinary(key), getTableInfo(), r1); 2678 errors.reportError(ERROR_CODE.DUPE_STARTKEYS, 2679 "Multiple regions have the same startkey: " 2680 + Bytes.toStringBinary(key), getTableInfo(), r2); 2681 } 2682 2683 @Override handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2)2684 public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{ 2685 errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN, 2686 "There is an overlap in the region chain.", 2687 getTableInfo(), hi1, hi2); 2688 } 2689 2690 @Override handleHoleInRegionChain(byte[] holeStart, byte[] holeStop)2691 public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{ 2692 errors.reportError( 2693 ERROR_CODE.HOLE_IN_REGION_CHAIN, 2694 "There is a hole in the region chain between " 2695 + Bytes.toStringBinary(holeStart) + " and " 2696 + Bytes.toStringBinary(holeStop) 2697 + ". You need to create a new .regioninfo and region " 2698 + "dir in hdfs to plug the hole."); 2699 } 2700 }; 2701 2702 /** 2703 * This handler fixes integrity errors from hdfs information. There are 2704 * basically three classes of integrity problems 1) holes, 2) overlaps, and 2705 * 3) invalid regions. 2706 * 2707 * This class overrides methods that fix holes and the overlap group case. 2708 * Individual cases of particular overlaps are handled by the general 2709 * overlap group merge repair case. 2710 * 2711 * If hbase is online, this forces regions offline before doing merge 2712 * operations. 2713 */ 2714 private class HDFSIntegrityFixer extends IntegrityFixSuggester { 2715 Configuration conf; 2716 2717 boolean fixOverlaps = true; 2718 HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf, boolean fixHoles, boolean fixOverlaps)2719 HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf, 2720 boolean fixHoles, boolean fixOverlaps) { 2721 super(ti, errors); 2722 this.conf = conf; 2723 this.fixOverlaps = fixOverlaps; 2724 // TODO properly use fixHoles 2725 } 2726 2727 /** 2728 * This is a special case hole -- when the first region of a table is 2729 * missing from META, HBase doesn't acknowledge the existance of the 2730 * table. 2731 */ 2732 @Override handleRegionStartKeyNotEmpty(HbckInfo next)2733 public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException { 2734 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY, 2735 "First region should start with an empty key. Creating a new " + 2736 "region and regioninfo in HDFS to plug the hole.", 2737 getTableInfo(), next); 2738 HTableDescriptor htd = getTableInfo().getHTD(); 2739 // from special EMPTY_START_ROW to next region's startKey 2740 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), 2741 HConstants.EMPTY_START_ROW, next.getStartKey()); 2742 2743 // TODO test 2744 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd); 2745 LOG.info("Table region start key was not empty. Created new empty region: " 2746 + newRegion + " " +region); 2747 fixes++; 2748 } 2749 2750 @Override handleRegionEndKeyNotEmpty(byte[] curEndKey)2751 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException { 2752 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY, 2753 "Last region should end with an empty key. Creating a new " 2754 + "region and regioninfo in HDFS to plug the hole.", getTableInfo()); 2755 HTableDescriptor htd = getTableInfo().getHTD(); 2756 // from curEndKey to EMPTY_START_ROW 2757 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey, 2758 HConstants.EMPTY_START_ROW); 2759 2760 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd); 2761 LOG.info("Table region end key was not empty. Created new empty region: " + newRegion 2762 + " " + region); 2763 fixes++; 2764 } 2765 2766 /** 2767 * There is a hole in the hdfs regions that violates the table integrity 2768 * rules. Create a new empty region that patches the hole. 2769 */ 2770 @Override handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey)2771 public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException { 2772 errors.reportError( 2773 ERROR_CODE.HOLE_IN_REGION_CHAIN, 2774 "There is a hole in the region chain between " 2775 + Bytes.toStringBinary(holeStartKey) + " and " 2776 + Bytes.toStringBinary(holeStopKey) 2777 + ". Creating a new regioninfo and region " 2778 + "dir in hdfs to plug the hole."); 2779 HTableDescriptor htd = getTableInfo().getHTD(); 2780 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey); 2781 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd); 2782 LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region); 2783 fixes++; 2784 } 2785 2786 /** 2787 * This takes set of overlapping regions and merges them into a single 2788 * region. This covers cases like degenerate regions, shared start key, 2789 * general overlaps, duplicate ranges, and partial overlapping regions. 2790 * 2791 * Cases: 2792 * - Clean regions that overlap 2793 * - Only .oldlogs regions (can't find start/stop range, or figure out) 2794 * 2795 * This is basically threadsafe, except for the fixer increment in mergeOverlaps. 2796 */ 2797 @Override handleOverlapGroup(Collection<HbckInfo> overlap)2798 public void handleOverlapGroup(Collection<HbckInfo> overlap) 2799 throws IOException { 2800 Preconditions.checkNotNull(overlap); 2801 Preconditions.checkArgument(overlap.size() >0); 2802 2803 if (!this.fixOverlaps) { 2804 LOG.warn("Not attempting to repair overlaps."); 2805 return; 2806 } 2807 2808 if (overlap.size() > maxMerge) { 2809 LOG.warn("Overlap group has " + overlap.size() + " overlapping " + 2810 "regions which is greater than " + maxMerge + ", the max number of regions to merge"); 2811 if (sidelineBigOverlaps) { 2812 // we only sideline big overlapped groups that exceeds the max number of regions to merge 2813 sidelineBigOverlaps(overlap); 2814 } 2815 return; 2816 } 2817 2818 mergeOverlaps(overlap); 2819 } 2820 mergeOverlaps(Collection<HbckInfo> overlap)2821 void mergeOverlaps(Collection<HbckInfo> overlap) 2822 throws IOException { 2823 String thread = Thread.currentThread().getName(); 2824 LOG.info("== [" + thread + "] Merging regions into one region: " 2825 + Joiner.on(",").join(overlap)); 2826 // get the min / max range and close all concerned regions 2827 Pair<byte[], byte[]> range = null; 2828 for (HbckInfo hi : overlap) { 2829 if (range == null) { 2830 range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey()); 2831 } else { 2832 if (RegionSplitCalculator.BYTES_COMPARATOR 2833 .compare(hi.getStartKey(), range.getFirst()) < 0) { 2834 range.setFirst(hi.getStartKey()); 2835 } 2836 if (RegionSplitCalculator.BYTES_COMPARATOR 2837 .compare(hi.getEndKey(), range.getSecond()) > 0) { 2838 range.setSecond(hi.getEndKey()); 2839 } 2840 } 2841 // need to close files so delete can happen. 2842 LOG.debug("[" + thread + "] Closing region before moving data around: " + hi); 2843 LOG.debug("[" + thread + "] Contained region dir before close"); 2844 debugLsr(hi.getHdfsRegionDir()); 2845 try { 2846 LOG.info("[" + thread + "] Closing region: " + hi); 2847 closeRegion(hi); 2848 } catch (IOException ioe) { 2849 LOG.warn("[" + thread + "] Was unable to close region " + hi 2850 + ". Just continuing... ", ioe); 2851 } catch (InterruptedException e) { 2852 LOG.warn("[" + thread + "] Was unable to close region " + hi 2853 + ". Just continuing... ", e); 2854 } 2855 2856 try { 2857 LOG.info("[" + thread + "] Offlining region: " + hi); 2858 offline(hi.getRegionName()); 2859 } catch (IOException ioe) { 2860 LOG.warn("[" + thread + "] Unable to offline region from master: " + hi 2861 + ". Just continuing... ", ioe); 2862 } 2863 } 2864 2865 // create new empty container region. 2866 HTableDescriptor htd = getTableInfo().getHTD(); 2867 // from start key to end Key 2868 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(), 2869 range.getSecond()); 2870 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd); 2871 LOG.info("[" + thread + "] Created new empty container region: " + 2872 newRegion + " to contain regions: " + Joiner.on(",").join(overlap)); 2873 debugLsr(region.getRegionFileSystem().getRegionDir()); 2874 2875 // all target regions are closed, should be able to safely cleanup. 2876 boolean didFix= false; 2877 Path target = region.getRegionFileSystem().getRegionDir(); 2878 for (HbckInfo contained : overlap) { 2879 LOG.info("[" + thread + "] Merging " + contained + " into " + target ); 2880 int merges = mergeRegionDirs(target, contained); 2881 if (merges > 0) { 2882 didFix = true; 2883 } 2884 } 2885 if (didFix) { 2886 fixes++; 2887 } 2888 } 2889 2890 /** 2891 * Sideline some regions in a big overlap group so that it 2892 * will have fewer regions, and it is easier to merge them later on. 2893 * 2894 * @param bigOverlap the overlapped group with regions more than maxMerge 2895 * @throws IOException 2896 */ sidelineBigOverlaps( Collection<HbckInfo> bigOverlap)2897 void sidelineBigOverlaps( 2898 Collection<HbckInfo> bigOverlap) throws IOException { 2899 int overlapsToSideline = bigOverlap.size() - maxMerge; 2900 if (overlapsToSideline > maxOverlapsToSideline) { 2901 overlapsToSideline = maxOverlapsToSideline; 2902 } 2903 List<HbckInfo> regionsToSideline = 2904 RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline); 2905 FileSystem fs = FileSystem.get(conf); 2906 for (HbckInfo regionToSideline: regionsToSideline) { 2907 try { 2908 LOG.info("Closing region: " + regionToSideline); 2909 closeRegion(regionToSideline); 2910 } catch (IOException ioe) { 2911 LOG.warn("Was unable to close region " + regionToSideline 2912 + ". Just continuing... ", ioe); 2913 } catch (InterruptedException e) { 2914 LOG.warn("Was unable to close region " + regionToSideline 2915 + ". Just continuing... ", e); 2916 } 2917 2918 try { 2919 LOG.info("Offlining region: " + regionToSideline); 2920 offline(regionToSideline.getRegionName()); 2921 } catch (IOException ioe) { 2922 LOG.warn("Unable to offline region from master: " + regionToSideline 2923 + ". Just continuing... ", ioe); 2924 } 2925 2926 LOG.info("Before sideline big overlapped region: " + regionToSideline.toString()); 2927 Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline); 2928 if (sidelineRegionDir != null) { 2929 sidelinedRegions.put(sidelineRegionDir, regionToSideline); 2930 LOG.info("After sidelined big overlapped region: " 2931 + regionToSideline.getRegionNameAsString() 2932 + " to " + sidelineRegionDir.toString()); 2933 fixes++; 2934 } 2935 } 2936 } 2937 } 2938 2939 /** 2940 * Check the region chain (from META) of this table. We are looking for 2941 * holes, overlaps, and cycles. 2942 * @return false if there are errors 2943 * @throws IOException 2944 */ checkRegionChain(TableIntegrityErrorHandler handler)2945 public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException { 2946 // When table is disabled no need to check for the region chain. Some of the regions 2947 // accidently if deployed, this below code might report some issues like missing start 2948 // or end regions or region hole in chain and may try to fix which is unwanted. 2949 if (disabledTables.contains(this.tableName)) { 2950 return true; 2951 } 2952 int originalErrorsCount = errors.getErrorList().size(); 2953 Multimap<byte[], HbckInfo> regions = sc.calcCoverage(); 2954 SortedSet<byte[]> splits = sc.getSplits(); 2955 2956 byte[] prevKey = null; 2957 byte[] problemKey = null; 2958 2959 if (splits.size() == 0) { 2960 // no region for this table 2961 handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW); 2962 } 2963 2964 for (byte[] key : splits) { 2965 Collection<HbckInfo> ranges = regions.get(key); 2966 if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) { 2967 for (HbckInfo rng : ranges) { 2968 handler.handleRegionStartKeyNotEmpty(rng); 2969 } 2970 } 2971 2972 // check for degenerate ranges 2973 for (HbckInfo rng : ranges) { 2974 // special endkey case converts '' to null 2975 byte[] endKey = rng.getEndKey(); 2976 endKey = (endKey.length == 0) ? null : endKey; 2977 if (Bytes.equals(rng.getStartKey(),endKey)) { 2978 handler.handleDegenerateRegion(rng); 2979 } 2980 } 2981 2982 if (ranges.size() == 1) { 2983 // this split key is ok -- no overlap, not a hole. 2984 if (problemKey != null) { 2985 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key)); 2986 } 2987 problemKey = null; // fell through, no more problem. 2988 } else if (ranges.size() > 1) { 2989 // set the new problem key group name, if already have problem key, just 2990 // keep using it. 2991 if (problemKey == null) { 2992 // only for overlap regions. 2993 LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key)); 2994 problemKey = key; 2995 } 2996 overlapGroups.putAll(problemKey, ranges); 2997 2998 // record errors 2999 ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges); 3000 // this dumb and n^2 but this shouldn't happen often 3001 for (HbckInfo r1 : ranges) { 3002 if (r1.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue; 3003 subRange.remove(r1); 3004 for (HbckInfo r2 : subRange) { 3005 if (r2.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue; 3006 if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) { 3007 handler.handleDuplicateStartKeys(r1,r2); 3008 } else { 3009 // overlap 3010 handler.handleOverlapInRegionChain(r1, r2); 3011 } 3012 } 3013 } 3014 3015 } else if (ranges.size() == 0) { 3016 if (problemKey != null) { 3017 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key)); 3018 } 3019 problemKey = null; 3020 3021 byte[] holeStopKey = sc.getSplits().higher(key); 3022 // if higher key is null we reached the top. 3023 if (holeStopKey != null) { 3024 // hole 3025 handler.handleHoleInRegionChain(key, holeStopKey); 3026 } 3027 } 3028 prevKey = key; 3029 } 3030 3031 // When the last region of a table is proper and having an empty end key, 'prevKey' 3032 // will be null. 3033 if (prevKey != null) { 3034 handler.handleRegionEndKeyNotEmpty(prevKey); 3035 } 3036 3037 // TODO fold this into the TableIntegrityHandler 3038 if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) { 3039 boolean ok = handleOverlapsParallel(handler, prevKey); 3040 if (!ok) { 3041 return false; 3042 } 3043 } else { 3044 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) { 3045 handler.handleOverlapGroup(overlap); 3046 } 3047 } 3048 3049 if (details) { 3050 // do full region split map dump 3051 errors.print("---- Table '" + this.tableName 3052 + "': region split map"); 3053 dump(splits, regions); 3054 errors.print("---- Table '" + this.tableName 3055 + "': overlap groups"); 3056 dumpOverlapProblems(overlapGroups); 3057 errors.print("There are " + overlapGroups.keySet().size() 3058 + " overlap groups with " + overlapGroups.size() 3059 + " overlapping regions"); 3060 } 3061 if (!sidelinedRegions.isEmpty()) { 3062 LOG.warn("Sidelined big overlapped regions, please bulk load them!"); 3063 errors.print("---- Table '" + this.tableName 3064 + "': sidelined big overlapped regions"); 3065 dumpSidelinedRegions(sidelinedRegions); 3066 } 3067 return errors.getErrorList().size() == originalErrorsCount; 3068 } 3069 handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)3070 private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey) 3071 throws IOException { 3072 // we parallelize overlap handler for the case we have lots of groups to fix. We can 3073 // safely assume each group is independent. 3074 List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size()); 3075 List<Future<Void>> rets; 3076 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) { 3077 // 3078 merges.add(new WorkItemOverlapMerge(overlap, handler)); 3079 } 3080 try { 3081 rets = executor.invokeAll(merges); 3082 } catch (InterruptedException e) { 3083 LOG.error("Overlap merges were interrupted", e); 3084 return false; 3085 } 3086 for(int i=0; i<merges.size(); i++) { 3087 WorkItemOverlapMerge work = merges.get(i); 3088 Future<Void> f = rets.get(i); 3089 try { 3090 f.get(); 3091 } catch(ExecutionException e) { 3092 LOG.warn("Failed to merge overlap group" + work, e.getCause()); 3093 } catch (InterruptedException e) { 3094 LOG.error("Waiting for overlap merges was interrupted", e); 3095 return false; 3096 } 3097 } 3098 return true; 3099 } 3100 3101 /** 3102 * This dumps data in a visually reasonable way for visual debugging 3103 * 3104 * @param splits 3105 * @param regions 3106 */ dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions)3107 void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) { 3108 // we display this way because the last end key should be displayed as well. 3109 StringBuilder sb = new StringBuilder(); 3110 for (byte[] k : splits) { 3111 sb.setLength(0); // clear out existing buffer, if any. 3112 sb.append(Bytes.toStringBinary(k) + ":\t"); 3113 for (HbckInfo r : regions.get(k)) { 3114 sb.append("[ "+ r.toString() + ", " 3115 + Bytes.toStringBinary(r.getEndKey())+ "]\t"); 3116 } 3117 errors.print(sb.toString()); 3118 } 3119 } 3120 } 3121 dumpOverlapProblems(Multimap<byte[], HbckInfo> regions)3122 public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) { 3123 // we display this way because the last end key should be displayed as 3124 // well. 3125 for (byte[] k : regions.keySet()) { 3126 errors.print(Bytes.toStringBinary(k) + ":"); 3127 for (HbckInfo r : regions.get(k)) { 3128 errors.print("[ " + r.toString() + ", " 3129 + Bytes.toStringBinary(r.getEndKey()) + "]"); 3130 } 3131 errors.print("----"); 3132 } 3133 } 3134 dumpSidelinedRegions(Map<Path, HbckInfo> regions)3135 public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) { 3136 for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) { 3137 TableName tableName = entry.getValue().getTableName(); 3138 Path path = entry.getKey(); 3139 errors.print("This sidelined region dir should be bulk loaded: " 3140 + path.toString()); 3141 errors.print("Bulk load command looks like: " 3142 + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles " 3143 + path.toUri().getPath() + " "+ tableName); 3144 } 3145 } 3146 getOverlapGroups( TableName table)3147 public Multimap<byte[], HbckInfo> getOverlapGroups( 3148 TableName table) { 3149 TableInfo ti = tablesInfo.get(table); 3150 return ti.overlapGroups; 3151 } 3152 3153 /** 3154 * Return a list of user-space table names whose metadata have not been 3155 * modified in the last few milliseconds specified by timelag 3156 * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER, 3157 * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last 3158 * milliseconds specified by timelag, then the table is a candidate to be returned. 3159 * @return tables that have not been modified recently 3160 * @throws IOException if an error is encountered 3161 */ getTables(AtomicInteger numSkipped)3162 HTableDescriptor[] getTables(AtomicInteger numSkipped) { 3163 List<TableName> tableNames = new ArrayList<TableName>(); 3164 long now = EnvironmentEdgeManager.currentTime(); 3165 3166 for (HbckInfo hbi : regionInfoMap.values()) { 3167 MetaEntry info = hbi.metaEntry; 3168 3169 // if the start key is zero, then we have found the first region of a table. 3170 // pick only those tables that were not modified in the last few milliseconds. 3171 if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) { 3172 if (info.modTime + timelag < now) { 3173 tableNames.add(info.getTable()); 3174 } else { 3175 numSkipped.incrementAndGet(); // one more in-flux table 3176 } 3177 } 3178 } 3179 return getHTableDescriptors(tableNames); 3180 } 3181 getHTableDescriptors(List<TableName> tableNames)3182 HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) { 3183 HTableDescriptor[] htd = new HTableDescriptor[0]; 3184 Admin admin = null; 3185 try { 3186 LOG.info("getHTableDescriptors == tableNames => " + tableNames); 3187 admin = new HBaseAdmin(getConf()); 3188 htd = admin.getTableDescriptorsByTableName(tableNames); 3189 } catch (IOException e) { 3190 LOG.debug("Exception getting table descriptors", e); 3191 } finally { 3192 if (admin != null) { 3193 try { 3194 admin.close(); 3195 } catch (IOException e) { 3196 LOG.debug("Exception closing HBaseAdmin", e); 3197 } 3198 } 3199 } 3200 return htd; 3201 } 3202 3203 /** 3204 * Gets the entry in regionInfo corresponding to the the given encoded 3205 * region name. If the region has not been seen yet, a new entry is added 3206 * and returned. 3207 */ getOrCreateInfo(String name)3208 private synchronized HbckInfo getOrCreateInfo(String name) { 3209 HbckInfo hbi = regionInfoMap.get(name); 3210 if (hbi == null) { 3211 hbi = new HbckInfo(null); 3212 regionInfoMap.put(name, hbi); 3213 } 3214 return hbi; 3215 } 3216 checkAndFixTableLocks()3217 private void checkAndFixTableLocks() throws IOException { 3218 ZooKeeperWatcher zkw = createZooKeeperWatcher(); 3219 3220 try { 3221 TableLockChecker checker = new TableLockChecker(zkw, errors); 3222 checker.checkTableLocks(); 3223 3224 if (this.fixTableLocks) { 3225 checker.fixExpiredTableLocks(); 3226 } 3227 } finally { 3228 zkw.close(); 3229 } 3230 } 3231 3232 /** 3233 * Check whether a orphaned table ZNode exists and fix it if requested. 3234 * @throws IOException 3235 * @throws KeeperException 3236 * @throws InterruptedException 3237 */ checkAndFixOrphanedTableZNodes()3238 private void checkAndFixOrphanedTableZNodes() 3239 throws IOException, KeeperException, InterruptedException { 3240 ZooKeeperWatcher zkw = createZooKeeperWatcher(); 3241 3242 try { 3243 Set<TableName> enablingTables = ZKTableStateClientSideReader.getEnablingTables(zkw); 3244 String msg; 3245 TableInfo tableInfo; 3246 3247 for (TableName tableName : enablingTables) { 3248 // Check whether the table exists in hbase 3249 tableInfo = tablesInfo.get(tableName); 3250 if (tableInfo != null) { 3251 // Table exists. This table state is in transit. No problem for this table. 3252 continue; 3253 } 3254 3255 msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found."; 3256 LOG.warn(msg); 3257 orphanedTableZNodes.add(tableName); 3258 errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg); 3259 } 3260 3261 if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) { 3262 ZKTableStateManager zkTableStateMgr = new ZKTableStateManager(zkw); 3263 3264 for (TableName tableName : orphanedTableZNodes) { 3265 try { 3266 // Set the table state to be disabled so that if we made mistake, we can trace 3267 // the history and figure it out. 3268 // Another choice is to call checkAndRemoveTableState() to delete the orphaned ZNode. 3269 // Both approaches works. 3270 zkTableStateMgr.setTableState(tableName, ZooKeeperProtos.Table.State.DISABLED); 3271 } catch (CoordinatedStateException e) { 3272 // This exception should not happen here 3273 LOG.error( 3274 "Got a CoordinatedStateException while fixing the ENABLING table znode " + tableName, 3275 e); 3276 } 3277 } 3278 } 3279 } finally { 3280 zkw.close(); 3281 } 3282 } 3283 3284 /** 3285 * Check values in regionInfo for hbase:meta 3286 * Check if zero or more than one regions with hbase:meta are found. 3287 * If there are inconsistencies (i.e. zero or more than one regions 3288 * pretend to be holding the hbase:meta) try to fix that and report an error. 3289 * @throws IOException from HBaseFsckRepair functions 3290 * @throws KeeperException 3291 * @throws InterruptedException 3292 */ checkMetaRegion()3293 boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException { 3294 Map<Integer, HbckInfo> metaRegions = new HashMap<Integer, HbckInfo>(); 3295 for (HbckInfo value : regionInfoMap.values()) { 3296 if (value.metaEntry != null && value.metaEntry.isMetaRegion()) { 3297 metaRegions.put(value.getReplicaId(), value); 3298 } 3299 } 3300 int metaReplication = admin.getTableDescriptor(TableName.META_TABLE_NAME) 3301 .getRegionReplication(); 3302 boolean noProblem = true; 3303 // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas 3304 // Check the deployed servers. It should be exactly one server for each replica. 3305 for (int i = 0; i < metaReplication; i++) { 3306 HbckInfo metaHbckInfo = metaRegions.remove(i); 3307 List<ServerName> servers = new ArrayList<ServerName>(); 3308 if (metaHbckInfo != null) { 3309 servers = metaHbckInfo.deployedOn; 3310 } 3311 if (servers.size() != 1) { 3312 noProblem = false; 3313 if (servers.size() == 0) { 3314 assignMetaReplica(i); 3315 } else if (servers.size() > 1) { 3316 errors 3317 .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " + 3318 metaHbckInfo.getReplicaId() + " is found on more than one region."); 3319 if (shouldFixAssignments()) { 3320 errors.print("Trying to fix a problem with hbase:meta, replicaId " + 3321 metaHbckInfo.getReplicaId() +".."); 3322 setShouldRerun(); 3323 // try fix it (treat is a dupe assignment) 3324 HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers); 3325 } 3326 } 3327 } 3328 } 3329 // unassign whatever is remaining in metaRegions. They are excess replicas. 3330 for (Map.Entry<Integer, HbckInfo> entry : metaRegions.entrySet()) { 3331 noProblem = false; 3332 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, 3333 "hbase:meta replicas are deployed in excess. Configured " + metaReplication + 3334 ", deployed " + metaRegions.size()); 3335 if (shouldFixAssignments()) { 3336 errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() + 3337 " of hbase:meta.."); 3338 setShouldRerun(); 3339 unassignMetaReplica(entry.getValue()); 3340 } 3341 } 3342 // if noProblem is false, rerun hbck with hopefully fixed META 3343 // if noProblem is true, no errors, so continue normally 3344 return noProblem; 3345 } 3346 unassignMetaReplica(HbckInfo hi)3347 private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException, 3348 KeeperException { 3349 undeployRegions(hi); 3350 ZooKeeperWatcher zkw = createZooKeeperWatcher(); 3351 try { 3352 ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(hi.metaEntry.getReplicaId())); 3353 } finally { 3354 zkw.close(); 3355 } 3356 } 3357 assignMetaReplica(int replicaId)3358 private void assignMetaReplica(int replicaId) 3359 throws IOException, KeeperException, InterruptedException { 3360 errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " + 3361 replicaId +" is not found on any region."); 3362 if (shouldFixAssignments()) { 3363 errors.print("Trying to fix a problem with hbase:meta.."); 3364 setShouldRerun(); 3365 // try to fix it (treat it as unassigned region) 3366 HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica( 3367 HRegionInfo.FIRST_META_REGIONINFO, replicaId); 3368 HBaseFsckRepair.fixUnassigned(admin, h); 3369 HBaseFsckRepair.waitUntilAssigned(admin, h); 3370 } 3371 } 3372 3373 /** 3374 * Scan hbase:meta, adding all regions found to the regionInfo map. 3375 * @throws IOException if an error is encountered 3376 */ loadMetaEntries()3377 boolean loadMetaEntries() throws IOException { 3378 MetaScannerVisitor visitor = new MetaScannerVisitorBase() { 3379 int countRecord = 1; 3380 3381 // comparator to sort KeyValues with latest modtime 3382 final Comparator<Cell> comp = new Comparator<Cell>() { 3383 @Override 3384 public int compare(Cell k1, Cell k2) { 3385 return (int)(k1.getTimestamp() - k2.getTimestamp()); 3386 } 3387 }; 3388 3389 @Override 3390 public boolean processRow(Result result) throws IOException { 3391 try { 3392 3393 // record the latest modification of this META record 3394 long ts = Collections.max(result.listCells(), comp).getTimestamp(); 3395 RegionLocations rl = MetaTableAccessor.getRegionLocations(result); 3396 if (rl == null) { 3397 emptyRegionInfoQualifiers.add(result); 3398 errors.reportError(ERROR_CODE.EMPTY_META_CELL, 3399 "Empty REGIONINFO_QUALIFIER found in hbase:meta"); 3400 return true; 3401 } 3402 ServerName sn = null; 3403 if (rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID) == null || 3404 rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) { 3405 emptyRegionInfoQualifiers.add(result); 3406 errors.reportError(ERROR_CODE.EMPTY_META_CELL, 3407 "Empty REGIONINFO_QUALIFIER found in hbase:meta"); 3408 return true; 3409 } 3410 HRegionInfo hri = rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo(); 3411 if (!(isTableIncluded(hri.getTable()) 3412 || hri.isMetaRegion())) { 3413 return true; 3414 } 3415 PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result); 3416 for (HRegionLocation h : rl.getRegionLocations()) { 3417 if (h == null || h.getRegionInfo() == null) { 3418 continue; 3419 } 3420 sn = h.getServerName(); 3421 hri = h.getRegionInfo(); 3422 3423 MetaEntry m = null; 3424 if (hri.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) { 3425 m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond()); 3426 } else { 3427 m = new MetaEntry(hri, sn, ts, null, null); 3428 } 3429 HbckInfo previous = regionInfoMap.get(hri.getEncodedName()); 3430 if (previous == null) { 3431 regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m)); 3432 } else if (previous.metaEntry == null) { 3433 previous.metaEntry = m; 3434 } else { 3435 throw new IOException("Two entries in hbase:meta are same " + previous); 3436 } 3437 } 3438 PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result); 3439 for (HRegionInfo mergeRegion : new HRegionInfo[] { 3440 mergeRegions.getFirst(), mergeRegions.getSecond() }) { 3441 if (mergeRegion != null) { 3442 // This region is already been merged 3443 HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName()); 3444 hbInfo.setMerged(true); 3445 } 3446 } 3447 3448 // show proof of progress to the user, once for every 100 records. 3449 if (countRecord % 100 == 0) { 3450 errors.progress(); 3451 } 3452 countRecord++; 3453 return true; 3454 } catch (RuntimeException e) { 3455 LOG.error("Result=" + result); 3456 throw e; 3457 } 3458 } 3459 }; 3460 if (!checkMetaOnly) { 3461 // Scan hbase:meta to pick up user regions 3462 MetaScanner.metaScan(connection, visitor); 3463 } 3464 3465 errors.print(""); 3466 return true; 3467 } 3468 3469 /** 3470 * Stores the regioninfo entries scanned from META 3471 */ 3472 static class MetaEntry extends HRegionInfo { 3473 ServerName regionServer; // server hosting this region 3474 long modTime; // timestamp of most recent modification metadata 3475 HRegionInfo splitA, splitB; //split daughters 3476 MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime)3477 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) { 3478 this(rinfo, regionServer, modTime, null, null); 3479 } 3480 MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime, HRegionInfo splitA, HRegionInfo splitB)3481 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime, 3482 HRegionInfo splitA, HRegionInfo splitB) { 3483 super(rinfo); 3484 this.regionServer = regionServer; 3485 this.modTime = modTime; 3486 this.splitA = splitA; 3487 this.splitB = splitB; 3488 } 3489 3490 @Override equals(Object o)3491 public boolean equals(Object o) { 3492 boolean superEq = super.equals(o); 3493 if (!superEq) { 3494 return superEq; 3495 } 3496 3497 MetaEntry me = (MetaEntry) o; 3498 if (!regionServer.equals(me.regionServer)) { 3499 return false; 3500 } 3501 return (modTime == me.modTime); 3502 } 3503 3504 @Override hashCode()3505 public int hashCode() { 3506 int hash = Arrays.hashCode(getRegionName()); 3507 hash ^= getRegionId(); 3508 hash ^= Arrays.hashCode(getStartKey()); 3509 hash ^= Arrays.hashCode(getEndKey()); 3510 hash ^= Boolean.valueOf(isOffline()).hashCode(); 3511 hash ^= getTable().hashCode(); 3512 if (regionServer != null) { 3513 hash ^= regionServer.hashCode(); 3514 } 3515 hash ^= modTime; 3516 return hash; 3517 } 3518 } 3519 3520 /** 3521 * Stores the regioninfo entries from HDFS 3522 */ 3523 static class HdfsEntry { 3524 HRegionInfo hri; 3525 Path hdfsRegionDir = null; 3526 long hdfsRegionDirModTime = 0; 3527 boolean hdfsRegioninfoFilePresent = false; 3528 boolean hdfsOnlyEdits = false; 3529 } 3530 3531 /** 3532 * Stores the regioninfo retrieved from Online region servers. 3533 */ 3534 static class OnlineEntry { 3535 HRegionInfo hri; 3536 ServerName hsa; 3537 3538 @Override toString()3539 public String toString() { 3540 return hsa.toString() + ";" + hri.getRegionNameAsString(); 3541 } 3542 } 3543 3544 /** 3545 * Maintain information about a particular region. It gathers information 3546 * from three places -- HDFS, META, and region servers. 3547 */ 3548 public static class HbckInfo implements KeyRange { 3549 private MetaEntry metaEntry = null; // info in META 3550 private HdfsEntry hdfsEntry = null; // info in HDFS 3551 private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server 3552 private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's 3553 private boolean skipChecks = false; // whether to skip further checks to this region info. 3554 private boolean isMerged = false;// whether this region has already been merged into another one 3555 private int deployedReplicaId = HRegionInfo.DEFAULT_REPLICA_ID; 3556 private HRegionInfo primaryHRIForDeployedReplica = null; 3557 HbckInfo(MetaEntry metaEntry)3558 HbckInfo(MetaEntry metaEntry) { 3559 this.metaEntry = metaEntry; 3560 } 3561 getReplicaId()3562 public synchronized int getReplicaId() { 3563 return metaEntry != null? metaEntry.getReplicaId(): deployedReplicaId; 3564 } 3565 addServer(HRegionInfo hri, ServerName server)3566 public synchronized void addServer(HRegionInfo hri, ServerName server) { 3567 OnlineEntry rse = new OnlineEntry() ; 3568 rse.hri = hri; 3569 rse.hsa = server; 3570 this.deployedEntries.add(rse); 3571 this.deployedOn.add(server); 3572 // save the replicaId that we see deployed in the cluster 3573 this.deployedReplicaId = hri.getReplicaId(); 3574 this.primaryHRIForDeployedReplica = 3575 RegionReplicaUtil.getRegionInfoForDefaultReplica(hri); 3576 } 3577 3578 @Override toString()3579 public synchronized String toString() { 3580 StringBuilder sb = new StringBuilder(); 3581 sb.append("{ meta => "); 3582 sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null"); 3583 sb.append( ", hdfs => " + getHdfsRegionDir()); 3584 sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries)); 3585 sb.append( ", replicaId => " + getReplicaId()); 3586 sb.append(" }"); 3587 return sb.toString(); 3588 } 3589 3590 @Override getStartKey()3591 public byte[] getStartKey() { 3592 if (this.metaEntry != null) { 3593 return this.metaEntry.getStartKey(); 3594 } else if (this.hdfsEntry != null) { 3595 return this.hdfsEntry.hri.getStartKey(); 3596 } else { 3597 LOG.error("Entry " + this + " has no meta or hdfs region start key."); 3598 return null; 3599 } 3600 } 3601 3602 @Override getEndKey()3603 public byte[] getEndKey() { 3604 if (this.metaEntry != null) { 3605 return this.metaEntry.getEndKey(); 3606 } else if (this.hdfsEntry != null) { 3607 return this.hdfsEntry.hri.getEndKey(); 3608 } else { 3609 LOG.error("Entry " + this + " has no meta or hdfs region start key."); 3610 return null; 3611 } 3612 } 3613 getTableName()3614 public TableName getTableName() { 3615 if (this.metaEntry != null) { 3616 return this.metaEntry.getTable(); 3617 } else if (this.hdfsEntry != null) { 3618 // we are only guaranteed to have a path and not an HRI for hdfsEntry, 3619 // so we get the name from the Path 3620 Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent(); 3621 return FSUtils.getTableName(tableDir); 3622 } else { 3623 // return the info from the first online/deployed hri 3624 for (OnlineEntry e : deployedEntries) { 3625 return e.hri.getTable(); 3626 } 3627 return null; 3628 } 3629 } 3630 getRegionNameAsString()3631 public String getRegionNameAsString() { 3632 if (metaEntry != null) { 3633 return metaEntry.getRegionNameAsString(); 3634 } else if (hdfsEntry != null) { 3635 if (hdfsEntry.hri != null) { 3636 return hdfsEntry.hri.getRegionNameAsString(); 3637 } 3638 } else { 3639 // return the info from the first online/deployed hri 3640 for (OnlineEntry e : deployedEntries) { 3641 return e.hri.getRegionNameAsString(); 3642 } 3643 } 3644 return null; 3645 } 3646 getRegionName()3647 public byte[] getRegionName() { 3648 if (metaEntry != null) { 3649 return metaEntry.getRegionName(); 3650 } else if (hdfsEntry != null) { 3651 return hdfsEntry.hri.getRegionName(); 3652 } else { 3653 // return the info from the first online/deployed hri 3654 for (OnlineEntry e : deployedEntries) { 3655 return e.hri.getRegionName(); 3656 } 3657 return null; 3658 } 3659 } 3660 getPrimaryHRIForDeployedReplica()3661 public HRegionInfo getPrimaryHRIForDeployedReplica() { 3662 return primaryHRIForDeployedReplica; 3663 } 3664 getHdfsRegionDir()3665 Path getHdfsRegionDir() { 3666 if (hdfsEntry == null) { 3667 return null; 3668 } 3669 return hdfsEntry.hdfsRegionDir; 3670 } 3671 containsOnlyHdfsEdits()3672 boolean containsOnlyHdfsEdits() { 3673 if (hdfsEntry == null) { 3674 return false; 3675 } 3676 return hdfsEntry.hdfsOnlyEdits; 3677 } 3678 isHdfsRegioninfoPresent()3679 boolean isHdfsRegioninfoPresent() { 3680 if (hdfsEntry == null) { 3681 return false; 3682 } 3683 return hdfsEntry.hdfsRegioninfoFilePresent; 3684 } 3685 getModTime()3686 long getModTime() { 3687 if (hdfsEntry == null) { 3688 return 0; 3689 } 3690 return hdfsEntry.hdfsRegionDirModTime; 3691 } 3692 getHdfsHRI()3693 HRegionInfo getHdfsHRI() { 3694 if (hdfsEntry == null) { 3695 return null; 3696 } 3697 return hdfsEntry.hri; 3698 } 3699 setSkipChecks(boolean skipChecks)3700 public void setSkipChecks(boolean skipChecks) { 3701 this.skipChecks = skipChecks; 3702 } 3703 isSkipChecks()3704 public boolean isSkipChecks() { 3705 return skipChecks; 3706 } 3707 setMerged(boolean isMerged)3708 public void setMerged(boolean isMerged) { 3709 this.isMerged = isMerged; 3710 } 3711 isMerged()3712 public boolean isMerged() { 3713 return this.isMerged; 3714 } 3715 } 3716 3717 final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() { 3718 @Override 3719 public int compare(HbckInfo l, HbckInfo r) { 3720 if (l == r) { 3721 // same instance 3722 return 0; 3723 } 3724 3725 int tableCompare = l.getTableName().compareTo(r.getTableName()); 3726 if (tableCompare != 0) { 3727 return tableCompare; 3728 } 3729 3730 int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare( 3731 l.getStartKey(), r.getStartKey()); 3732 if (startComparison != 0) { 3733 return startComparison; 3734 } 3735 3736 // Special case for absolute endkey 3737 byte[] endKey = r.getEndKey(); 3738 endKey = (endKey.length == 0) ? null : endKey; 3739 byte[] endKey2 = l.getEndKey(); 3740 endKey2 = (endKey2.length == 0) ? null : endKey2; 3741 int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare( 3742 endKey2, endKey); 3743 3744 if (endComparison != 0) { 3745 return endComparison; 3746 } 3747 3748 // use regionId as tiebreaker. 3749 // Null is considered after all possible values so make it bigger. 3750 if (l.hdfsEntry == null && r.hdfsEntry == null) { 3751 return 0; 3752 } 3753 if (l.hdfsEntry == null && r.hdfsEntry != null) { 3754 return 1; 3755 } 3756 // l.hdfsEntry must not be null 3757 if (r.hdfsEntry == null) { 3758 return -1; 3759 } 3760 // both l.hdfsEntry and r.hdfsEntry must not be null. 3761 return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId()); 3762 } 3763 }; 3764 3765 /** 3766 * Prints summary of all tables found on the system. 3767 */ printTableSummary(SortedMap<TableName, TableInfo> tablesInfo)3768 private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) { 3769 StringBuilder sb = new StringBuilder(); 3770 int numOfSkippedRegions; 3771 errors.print("Summary:"); 3772 for (TableInfo tInfo : tablesInfo.values()) { 3773 numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ? 3774 skippedRegions.get(tInfo.getName()).size() : 0; 3775 3776 if (errors.tableHasErrors(tInfo)) { 3777 errors.print("Table " + tInfo.getName() + " is inconsistent."); 3778 } else if (numOfSkippedRegions > 0){ 3779 errors.print("Table " + tInfo.getName() + " is okay (with " 3780 + numOfSkippedRegions + " skipped regions)."); 3781 } 3782 else { 3783 errors.print("Table " + tInfo.getName() + " is okay."); 3784 } 3785 errors.print(" Number of regions: " + tInfo.getNumRegions()); 3786 if (numOfSkippedRegions > 0) { 3787 Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName()); 3788 System.out.println(" Number of skipped regions: " + numOfSkippedRegions); 3789 System.out.println(" List of skipped regions:"); 3790 for(String sr : skippedRegionStrings) { 3791 System.out.println(" " + sr); 3792 } 3793 } 3794 sb.setLength(0); // clear out existing buffer, if any. 3795 sb.append(" Deployed on: "); 3796 for (ServerName server : tInfo.deployedOn) { 3797 sb.append(" " + server.toString()); 3798 } 3799 errors.print(sb.toString()); 3800 } 3801 } 3802 getErrorReporter( final Configuration conf)3803 static ErrorReporter getErrorReporter( 3804 final Configuration conf) throws ClassNotFoundException { 3805 Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class); 3806 return ReflectionUtils.newInstance(reporter, conf); 3807 } 3808 3809 public interface ErrorReporter { 3810 enum ERROR_CODE { 3811 UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META, 3812 NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED, 3813 MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE, 3814 FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS, 3815 HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION, 3816 ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE, 3817 WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR 3818 } 3819 void clear(); 3820 void report(String message); 3821 void reportError(String message); 3822 void reportError(ERROR_CODE errorCode, String message); 3823 void reportError(ERROR_CODE errorCode, String message, TableInfo table); 3824 void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info); 3825 void reportError( 3826 ERROR_CODE errorCode, 3827 String message, 3828 TableInfo table, 3829 HbckInfo info1, 3830 HbckInfo info2 3831 ); 3832 int summarize(); 3833 void detail(String details); 3834 ArrayList<ERROR_CODE> getErrorList(); 3835 void progress(); 3836 void print(String message); 3837 void resetErrors(); 3838 boolean tableHasErrors(TableInfo table); 3839 } 3840 3841 static class PrintingErrorReporter implements ErrorReporter { 3842 public int errorCount = 0; 3843 private int showProgress; 3844 // How frequently calls to progress() will create output 3845 private static final int progressThreshold = 100; 3846 3847 Set<TableInfo> errorTables = new HashSet<TableInfo>(); 3848 3849 // for use by unit tests to verify which errors were discovered 3850 private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>(); 3851 3852 @Override clear()3853 public void clear() { 3854 errorTables.clear(); 3855 errorList.clear(); 3856 errorCount = 0; 3857 } 3858 3859 @Override reportError(ERROR_CODE errorCode, String message)3860 public synchronized void reportError(ERROR_CODE errorCode, String message) { 3861 if (errorCode == ERROR_CODE.WRONG_USAGE) { 3862 System.err.println(message); 3863 return; 3864 } 3865 3866 errorList.add(errorCode); 3867 if (!getSUMMARY()) { 3868 System.out.println("ERROR: " + message); 3869 } 3870 errorCount++; 3871 showProgress = 0; 3872 } 3873 3874 @Override reportError(ERROR_CODE errorCode, String message, TableInfo table)3875 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) { 3876 errorTables.add(table); 3877 reportError(errorCode, message); 3878 } 3879 3880 @Override reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info)3881 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table, 3882 HbckInfo info) { 3883 errorTables.add(table); 3884 String reference = "(region " + info.getRegionNameAsString() + ")"; 3885 reportError(errorCode, reference + " " + message); 3886 } 3887 3888 @Override reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info1, HbckInfo info2)3889 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table, 3890 HbckInfo info1, HbckInfo info2) { 3891 errorTables.add(table); 3892 String reference = "(regions " + info1.getRegionNameAsString() 3893 + " and " + info2.getRegionNameAsString() + ")"; 3894 reportError(errorCode, reference + " " + message); 3895 } 3896 3897 @Override reportError(String message)3898 public synchronized void reportError(String message) { 3899 reportError(ERROR_CODE.UNKNOWN, message); 3900 } 3901 3902 /** 3903 * Report error information, but do not increment the error count. Intended for cases 3904 * where the actual error would have been reported previously. 3905 * @param message 3906 */ 3907 @Override report(String message)3908 public synchronized void report(String message) { 3909 if (!getSUMMARY()) { 3910 System.out.println("ERROR: " + message); 3911 } 3912 showProgress = 0; 3913 } 3914 3915 @Override summarize()3916 public synchronized int summarize() { 3917 System.out.println(Integer.toString(errorCount) + 3918 " inconsistencies detected."); 3919 if (errorCount == 0) { 3920 System.out.println("Status: OK"); 3921 return 0; 3922 } else { 3923 System.out.println("Status: INCONSISTENT"); 3924 return -1; 3925 } 3926 } 3927 3928 @Override getErrorList()3929 public ArrayList<ERROR_CODE> getErrorList() { 3930 return errorList; 3931 } 3932 3933 @Override print(String message)3934 public synchronized void print(String message) { 3935 if (!getSUMMARY()) { 3936 System.out.println(message); 3937 } 3938 } 3939 getSUMMARY()3940 private synchronized static boolean getSUMMARY() { 3941 return SUMMARY; 3942 } 3943 3944 @Override tableHasErrors(TableInfo table)3945 public boolean tableHasErrors(TableInfo table) { 3946 return errorTables.contains(table); 3947 } 3948 3949 @Override resetErrors()3950 public void resetErrors() { 3951 errorCount = 0; 3952 } 3953 3954 @Override detail(String message)3955 public synchronized void detail(String message) { 3956 if (details) { 3957 System.out.println(message); 3958 } 3959 showProgress = 0; 3960 } 3961 3962 @Override progress()3963 public synchronized void progress() { 3964 if (showProgress++ == progressThreshold) { 3965 if (!getSUMMARY()) { 3966 System.out.print("."); 3967 } 3968 showProgress = 0; 3969 } 3970 } 3971 } 3972 3973 /** 3974 * Contact a region server and get all information from it 3975 */ 3976 static class WorkItemRegion implements Callable<Void> { 3977 private HBaseFsck hbck; 3978 private ServerName rsinfo; 3979 private ErrorReporter errors; 3980 private HConnection connection; 3981 WorkItemRegion(HBaseFsck hbck, ServerName info, ErrorReporter errors, HConnection connection)3982 WorkItemRegion(HBaseFsck hbck, ServerName info, 3983 ErrorReporter errors, HConnection connection) { 3984 this.hbck = hbck; 3985 this.rsinfo = info; 3986 this.errors = errors; 3987 this.connection = connection; 3988 } 3989 3990 @Override call()3991 public synchronized Void call() throws IOException { 3992 errors.progress(); 3993 try { 3994 BlockingInterface server = connection.getAdmin(rsinfo); 3995 3996 // list all online regions from this region server 3997 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server); 3998 regions = filterRegions(regions); 3999 4000 if (details) { 4001 errors.detail("RegionServer: " + rsinfo.getServerName() + 4002 " number of regions: " + regions.size()); 4003 for (HRegionInfo rinfo: regions) { 4004 errors.detail(" " + rinfo.getRegionNameAsString() + 4005 " id: " + rinfo.getRegionId() + 4006 " encoded_name: " + rinfo.getEncodedName() + 4007 " start: " + Bytes.toStringBinary(rinfo.getStartKey()) + 4008 " end: " + Bytes.toStringBinary(rinfo.getEndKey())); 4009 } 4010 } 4011 4012 // check to see if the existence of this region matches the region in META 4013 for (HRegionInfo r:regions) { 4014 HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName()); 4015 hbi.addServer(r, rsinfo); 4016 } 4017 } catch (IOException e) { // unable to connect to the region server. 4018 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() + 4019 " Unable to fetch region information. " + e); 4020 throw e; 4021 } 4022 return null; 4023 } 4024 filterRegions(List<HRegionInfo> regions)4025 private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) { 4026 List<HRegionInfo> ret = Lists.newArrayList(); 4027 for (HRegionInfo hri : regions) { 4028 if (hri.isMetaTable() || (!hbck.checkMetaOnly 4029 && hbck.isTableIncluded(hri.getTable()))) { 4030 ret.add(hri); 4031 } 4032 } 4033 return ret; 4034 } 4035 } 4036 4037 /** 4038 * Contact hdfs and get all information about specified table directory into 4039 * regioninfo list. 4040 */ 4041 static class WorkItemHdfsDir implements Callable<Void> { 4042 private HBaseFsck hbck; 4043 private FileStatus tableDir; 4044 private ErrorReporter errors; 4045 private FileSystem fs; 4046 WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors, FileStatus status)4047 WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors, 4048 FileStatus status) { 4049 this.hbck = hbck; 4050 this.fs = fs; 4051 this.tableDir = status; 4052 this.errors = errors; 4053 } 4054 4055 @Override call()4056 public synchronized Void call() throws IOException { 4057 try { 4058 // level 2: <HBASE_DIR>/<table>/* 4059 FileStatus[] regionDirs = fs.listStatus(tableDir.getPath()); 4060 for (FileStatus regionDir : regionDirs) { 4061 errors.progress(); 4062 String encodedName = regionDir.getPath().getName(); 4063 // ignore directories that aren't hexadecimal 4064 if (!encodedName.toLowerCase().matches("[0-9a-f]+")) { 4065 continue; 4066 } 4067 4068 LOG.debug("Loading region info from hdfs:"+ regionDir.getPath()); 4069 HbckInfo hbi = hbck.getOrCreateInfo(encodedName); 4070 HdfsEntry he = new HdfsEntry(); 4071 synchronized (hbi) { 4072 if (hbi.getHdfsRegionDir() != null) { 4073 errors.print("Directory " + encodedName + " duplicate??" + 4074 hbi.getHdfsRegionDir()); 4075 } 4076 4077 he.hdfsRegionDir = regionDir.getPath(); 4078 he.hdfsRegionDirModTime = regionDir.getModificationTime(); 4079 Path regioninfoFile = new Path(he.hdfsRegionDir, HRegionFileSystem.REGION_INFO_FILE); 4080 he.hdfsRegioninfoFilePresent = fs.exists(regioninfoFile); 4081 // we add to orphan list when we attempt to read .regioninfo 4082 4083 // Set a flag if this region contains only edits 4084 // This is special case if a region is left after split 4085 he.hdfsOnlyEdits = true; 4086 FileStatus[] subDirs = fs.listStatus(regionDir.getPath()); 4087 Path ePath = WALSplitter.getRegionDirRecoveredEditsDir(regionDir.getPath()); 4088 for (FileStatus subDir : subDirs) { 4089 errors.progress(); 4090 String sdName = subDir.getPath().getName(); 4091 if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) { 4092 he.hdfsOnlyEdits = false; 4093 break; 4094 } 4095 } 4096 hbi.hdfsEntry = he; 4097 } 4098 } 4099 } catch (IOException e) { 4100 // unable to connect to the region server. 4101 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: " 4102 + tableDir.getPath().getName() 4103 + " Unable to fetch region information. " + e); 4104 throw e; 4105 } 4106 return null; 4107 } 4108 } 4109 4110 /** 4111 * Contact hdfs and get all information about specified table directory into 4112 * regioninfo list. 4113 */ 4114 static class WorkItemHdfsRegionInfo implements Callable<Void> { 4115 private HbckInfo hbi; 4116 private HBaseFsck hbck; 4117 private ErrorReporter errors; 4118 WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors)4119 WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) { 4120 this.hbi = hbi; 4121 this.hbck = hbck; 4122 this.errors = errors; 4123 } 4124 4125 @Override call()4126 public synchronized Void call() throws IOException { 4127 // only load entries that haven't been loaded yet. 4128 if (hbi.getHdfsHRI() == null) { 4129 try { 4130 errors.progress(); 4131 hbck.loadHdfsRegioninfo(hbi); 4132 } catch (IOException ioe) { 4133 String msg = "Orphan region in HDFS: Unable to load .regioninfo from table " 4134 + hbi.getTableName() + " in hdfs dir " 4135 + hbi.getHdfsRegionDir() 4136 + "! It may be an invalid format or version file. Treating as " 4137 + "an orphaned regiondir."; 4138 errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg); 4139 try { 4140 hbck.debugLsr(hbi.getHdfsRegionDir()); 4141 } catch (IOException ioe2) { 4142 LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2); 4143 throw ioe2; 4144 } 4145 hbck.orphanHdfsDirs.add(hbi); 4146 throw ioe; 4147 } 4148 } 4149 return null; 4150 } 4151 }; 4152 4153 /** 4154 * Display the full report from fsck. This displays all live and dead region 4155 * servers, and all known regions. 4156 */ setDisplayFullReport()4157 public static void setDisplayFullReport() { 4158 details = true; 4159 } 4160 4161 /** 4162 * Set exclusive mode. 4163 */ setForceExclusive()4164 public static void setForceExclusive() { 4165 forceExclusive = true; 4166 } 4167 4168 /** 4169 * Only one instance of hbck can modify HBase at a time. 4170 */ isExclusive()4171 public boolean isExclusive() { 4172 return fixAny || forceExclusive; 4173 } 4174 4175 /** 4176 * Disable the load balancer. 4177 */ setDisableBalancer()4178 public static void setDisableBalancer() { 4179 disableBalancer = true; 4180 } 4181 4182 /** 4183 * The balancer should be disabled if we are modifying HBase. 4184 * It can be disabled if you want to prevent region movement from causing 4185 * false positives. 4186 */ shouldDisableBalancer()4187 public boolean shouldDisableBalancer() { 4188 return fixAny || disableBalancer; 4189 } 4190 4191 /** 4192 * Set summary mode. 4193 * Print only summary of the tables and status (OK or INCONSISTENT) 4194 */ setSummary()4195 synchronized static void setSummary() { 4196 SUMMARY = true; 4197 } 4198 4199 /** 4200 * Set hbase:meta check mode. 4201 * Print only info about hbase:meta table deployment/state 4202 */ setCheckMetaOnly()4203 void setCheckMetaOnly() { 4204 checkMetaOnly = true; 4205 } 4206 4207 /** 4208 * Set region boundaries check mode. 4209 */ setRegionBoundariesCheck()4210 void setRegionBoundariesCheck() { 4211 checkRegionBoundaries = true; 4212 } 4213 4214 /** 4215 * Set table locks fix mode. 4216 * Delete table locks held for a long time 4217 */ setFixTableLocks(boolean shouldFix)4218 public void setFixTableLocks(boolean shouldFix) { 4219 fixTableLocks = shouldFix; 4220 fixAny |= shouldFix; 4221 } 4222 4223 /** 4224 * Set orphaned table ZNodes fix mode. 4225 * Set the table state to disable in the orphaned table ZNode. 4226 */ setFixTableZNodes(boolean shouldFix)4227 public void setFixTableZNodes(boolean shouldFix) { 4228 fixTableZNodes = shouldFix; 4229 fixAny |= shouldFix; 4230 } 4231 4232 /** 4233 * Check if we should rerun fsck again. This checks if we've tried to 4234 * fix something and we should rerun fsck tool again. 4235 * Display the full report from fsck. This displays all live and dead 4236 * region servers, and all known regions. 4237 */ setShouldRerun()4238 void setShouldRerun() { 4239 rerun = true; 4240 } 4241 shouldRerun()4242 boolean shouldRerun() { 4243 return rerun; 4244 } 4245 4246 /** 4247 * Fix inconsistencies found by fsck. This should try to fix errors (if any) 4248 * found by fsck utility. 4249 */ setFixAssignments(boolean shouldFix)4250 public void setFixAssignments(boolean shouldFix) { 4251 fixAssignments = shouldFix; 4252 fixAny |= shouldFix; 4253 } 4254 shouldFixAssignments()4255 boolean shouldFixAssignments() { 4256 return fixAssignments; 4257 } 4258 setFixMeta(boolean shouldFix)4259 public void setFixMeta(boolean shouldFix) { 4260 fixMeta = shouldFix; 4261 fixAny |= shouldFix; 4262 } 4263 shouldFixMeta()4264 boolean shouldFixMeta() { 4265 return fixMeta; 4266 } 4267 setFixEmptyMetaCells(boolean shouldFix)4268 public void setFixEmptyMetaCells(boolean shouldFix) { 4269 fixEmptyMetaCells = shouldFix; 4270 fixAny |= shouldFix; 4271 } 4272 shouldFixEmptyMetaCells()4273 boolean shouldFixEmptyMetaCells() { 4274 return fixEmptyMetaCells; 4275 } 4276 setCheckHdfs(boolean checking)4277 public void setCheckHdfs(boolean checking) { 4278 checkHdfs = checking; 4279 } 4280 shouldCheckHdfs()4281 boolean shouldCheckHdfs() { 4282 return checkHdfs; 4283 } 4284 setFixHdfsHoles(boolean shouldFix)4285 public void setFixHdfsHoles(boolean shouldFix) { 4286 fixHdfsHoles = shouldFix; 4287 fixAny |= shouldFix; 4288 } 4289 shouldFixHdfsHoles()4290 boolean shouldFixHdfsHoles() { 4291 return fixHdfsHoles; 4292 } 4293 setFixTableOrphans(boolean shouldFix)4294 public void setFixTableOrphans(boolean shouldFix) { 4295 fixTableOrphans = shouldFix; 4296 fixAny |= shouldFix; 4297 } 4298 shouldFixTableOrphans()4299 boolean shouldFixTableOrphans() { 4300 return fixTableOrphans; 4301 } 4302 setFixHdfsOverlaps(boolean shouldFix)4303 public void setFixHdfsOverlaps(boolean shouldFix) { 4304 fixHdfsOverlaps = shouldFix; 4305 fixAny |= shouldFix; 4306 } 4307 shouldFixHdfsOverlaps()4308 boolean shouldFixHdfsOverlaps() { 4309 return fixHdfsOverlaps; 4310 } 4311 setFixHdfsOrphans(boolean shouldFix)4312 public void setFixHdfsOrphans(boolean shouldFix) { 4313 fixHdfsOrphans = shouldFix; 4314 fixAny |= shouldFix; 4315 } 4316 shouldFixHdfsOrphans()4317 boolean shouldFixHdfsOrphans() { 4318 return fixHdfsOrphans; 4319 } 4320 setFixVersionFile(boolean shouldFix)4321 public void setFixVersionFile(boolean shouldFix) { 4322 fixVersionFile = shouldFix; 4323 fixAny |= shouldFix; 4324 } 4325 shouldFixVersionFile()4326 public boolean shouldFixVersionFile() { 4327 return fixVersionFile; 4328 } 4329 setSidelineBigOverlaps(boolean sbo)4330 public void setSidelineBigOverlaps(boolean sbo) { 4331 this.sidelineBigOverlaps = sbo; 4332 } 4333 shouldSidelineBigOverlaps()4334 public boolean shouldSidelineBigOverlaps() { 4335 return sidelineBigOverlaps; 4336 } 4337 setFixSplitParents(boolean shouldFix)4338 public void setFixSplitParents(boolean shouldFix) { 4339 fixSplitParents = shouldFix; 4340 fixAny |= shouldFix; 4341 } 4342 shouldFixSplitParents()4343 boolean shouldFixSplitParents() { 4344 return fixSplitParents; 4345 } 4346 setFixReferenceFiles(boolean shouldFix)4347 public void setFixReferenceFiles(boolean shouldFix) { 4348 fixReferenceFiles = shouldFix; 4349 fixAny |= shouldFix; 4350 } 4351 shouldFixReferenceFiles()4352 boolean shouldFixReferenceFiles() { 4353 return fixReferenceFiles; 4354 } 4355 shouldIgnorePreCheckPermission()4356 public boolean shouldIgnorePreCheckPermission() { 4357 return !fixAny || ignorePreCheckPermission; 4358 } 4359 setIgnorePreCheckPermission(boolean ignorePreCheckPermission)4360 public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) { 4361 this.ignorePreCheckPermission = ignorePreCheckPermission; 4362 } 4363 4364 /** 4365 * @param mm maximum number of regions to merge into a single region. 4366 */ setMaxMerge(int mm)4367 public void setMaxMerge(int mm) { 4368 this.maxMerge = mm; 4369 } 4370 getMaxMerge()4371 public int getMaxMerge() { 4372 return maxMerge; 4373 } 4374 setMaxOverlapsToSideline(int mo)4375 public void setMaxOverlapsToSideline(int mo) { 4376 this.maxOverlapsToSideline = mo; 4377 } 4378 getMaxOverlapsToSideline()4379 public int getMaxOverlapsToSideline() { 4380 return maxOverlapsToSideline; 4381 } 4382 4383 /** 4384 * Only check/fix tables specified by the list, 4385 * Empty list means all tables are included. 4386 */ isTableIncluded(TableName table)4387 boolean isTableIncluded(TableName table) { 4388 return (tablesIncluded.size() == 0) || tablesIncluded.contains(table); 4389 } 4390 includeTable(TableName table)4391 public void includeTable(TableName table) { 4392 tablesIncluded.add(table); 4393 } 4394 getIncludedTables()4395 Set<TableName> getIncludedTables() { 4396 return new HashSet<TableName>(tablesIncluded); 4397 } 4398 4399 /** 4400 * We are interested in only those tables that have not changed their state in 4401 * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag 4402 * @param seconds - the time in seconds 4403 */ setTimeLag(long seconds)4404 public void setTimeLag(long seconds) { 4405 timelag = seconds * 1000; // convert to milliseconds 4406 } 4407 4408 /** 4409 * 4410 * @param sidelineDir - HDFS path to sideline data 4411 */ setSidelineDir(String sidelineDir)4412 public void setSidelineDir(String sidelineDir) { 4413 this.sidelineDir = new Path(sidelineDir); 4414 } 4415 createHFileCorruptionChecker(boolean sidelineCorruptHFiles)4416 protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException { 4417 return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles); 4418 } 4419 getHFilecorruptionChecker()4420 public HFileCorruptionChecker getHFilecorruptionChecker() { 4421 return hfcc; 4422 } 4423 setHFileCorruptionChecker(HFileCorruptionChecker hfcc)4424 public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) { 4425 this.hfcc = hfcc; 4426 } 4427 setRetCode(int code)4428 public void setRetCode(int code) { 4429 this.retcode = code; 4430 } 4431 getRetCode()4432 public int getRetCode() { 4433 return retcode; 4434 } 4435 printUsageAndExit()4436 protected HBaseFsck printUsageAndExit() { 4437 StringWriter sw = new StringWriter(2048); 4438 PrintWriter out = new PrintWriter(sw); 4439 out.println("Usage: fsck [opts] {only tables}"); 4440 out.println(" where [opts] are:"); 4441 out.println(" -help Display help options (this)"); 4442 out.println(" -details Display full report of all regions."); 4443 out.println(" -timelag <timeInSeconds> Process only regions that " + 4444 " have not experienced any metadata updates in the last " + 4445 " <timeInSeconds> seconds."); 4446 out.println(" -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" + 4447 " before checking if the fix worked if run with -fix"); 4448 out.println(" -summary Print only summary of the tables and status."); 4449 out.println(" -metaonly Only check the state of the hbase:meta table."); 4450 out.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta."); 4451 out.println(" -boundaries Verify that regions boundaries are the same between META and store files."); 4452 out.println(" -exclusive Abort if another hbck is exclusive or fixing."); 4453 out.println(" -disableBalancer Disable the load balancer."); 4454 4455 out.println(""); 4456 out.println(" Metadata Repair options: (expert features, use with caution!)"); 4457 out.println(" -fix Try to fix region assignments. This is for backwards compatiblity"); 4458 out.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix"); 4459 out.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good."); 4460 out.println(" -noHdfsChecking Don't load/check region info from HDFS." 4461 + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap"); 4462 out.println(" -fixHdfsHoles Try to fix region holes in hdfs."); 4463 out.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs"); 4464 out.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)"); 4465 out.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs."); 4466 out.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs."); 4467 out.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)"); 4468 out.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps"); 4469 out.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)"); 4470 out.println(" -fixSplitParents Try to force offline split parents to be online."); 4471 out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check"); 4472 out.println(" -fixReferenceFiles Try to offline lingering reference store files"); 4473 out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region" 4474 + " (empty REGIONINFO_QUALIFIER rows)"); 4475 4476 out.println(""); 4477 out.println(" Datafile Repair options: (expert features, use with caution!)"); 4478 out.println(" -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid"); 4479 out.println(" -sidelineCorruptHFiles Quarantine corrupted HFiles. implies -checkCorruptHFiles"); 4480 4481 out.println(""); 4482 out.println(" Metadata Repair shortcuts"); 4483 out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " + 4484 "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps " + 4485 "-fixReferenceFiles -fixTableLocks -fixOrphanedTableZnodes"); 4486 out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles"); 4487 4488 out.println(""); 4489 out.println(" Table lock options"); 4490 out.println(" -fixTableLocks Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)"); 4491 4492 out.println(""); 4493 out.println(" Table Znode options"); 4494 out.println(" -fixOrphanedTableZnodes Set table state in ZNode to disabled if table does not exists"); 4495 4496 out.flush(); 4497 errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString()); 4498 4499 setRetCode(-2); 4500 return this; 4501 } 4502 4503 /** 4504 * Main program 4505 * 4506 * @param args 4507 * @throws Exception 4508 */ main(String[] args)4509 public static void main(String[] args) throws Exception { 4510 // create a fsck object 4511 Configuration conf = HBaseConfiguration.create(); 4512 Path hbasedir = FSUtils.getRootDir(conf); 4513 URI defaultFs = hbasedir.getFileSystem(conf).getUri(); 4514 FSUtils.setFsDefault(conf, new Path(defaultFs)); 4515 int ret = ToolRunner.run(new HBaseFsckTool(conf), args); 4516 System.exit(ret); 4517 } 4518 4519 /** 4520 * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line. 4521 */ 4522 static class HBaseFsckTool extends Configured implements Tool { HBaseFsckTool(Configuration conf)4523 HBaseFsckTool(Configuration conf) { super(conf); } 4524 @Override run(String[] args)4525 public int run(String[] args) throws Exception { 4526 HBaseFsck hbck = new HBaseFsck(getConf()); 4527 hbck.exec(hbck.executor, args); 4528 hbck.close(); 4529 return hbck.getRetCode(); 4530 } 4531 }; 4532 4533 exec(ExecutorService exec, String[] args)4534 public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException, 4535 ServiceException, InterruptedException { 4536 long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN; 4537 4538 boolean checkCorruptHFiles = false; 4539 boolean sidelineCorruptHFiles = false; 4540 4541 // Process command-line args. 4542 for (int i = 0; i < args.length; i++) { 4543 String cmd = args[i]; 4544 if (cmd.equals("-help") || cmd.equals("-h")) { 4545 return printUsageAndExit(); 4546 } else if (cmd.equals("-details")) { 4547 setDisplayFullReport(); 4548 } else if (cmd.equals("-exclusive")) { 4549 setForceExclusive(); 4550 } else if (cmd.equals("-disableBalancer")) { 4551 setDisableBalancer(); 4552 } else if (cmd.equals("-timelag")) { 4553 if (i == args.length - 1) { 4554 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value."); 4555 return printUsageAndExit(); 4556 } 4557 try { 4558 long timelag = Long.parseLong(args[i+1]); 4559 setTimeLag(timelag); 4560 } catch (NumberFormatException e) { 4561 errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value."); 4562 return printUsageAndExit(); 4563 } 4564 i++; 4565 } else if (cmd.equals("-sleepBeforeRerun")) { 4566 if (i == args.length - 1) { 4567 errors.reportError(ERROR_CODE.WRONG_USAGE, 4568 "HBaseFsck: -sleepBeforeRerun needs a value."); 4569 return printUsageAndExit(); 4570 } 4571 try { 4572 sleepBeforeRerun = Long.parseLong(args[i+1]); 4573 } catch (NumberFormatException e) { 4574 errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value."); 4575 return printUsageAndExit(); 4576 } 4577 i++; 4578 } else if (cmd.equals("-sidelineDir")) { 4579 if (i == args.length - 1) { 4580 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value."); 4581 return printUsageAndExit(); 4582 } 4583 i++; 4584 setSidelineDir(args[i]); 4585 } else if (cmd.equals("-fix")) { 4586 errors.reportError(ERROR_CODE.WRONG_USAGE, 4587 "This option is deprecated, please use -fixAssignments instead."); 4588 setFixAssignments(true); 4589 } else if (cmd.equals("-fixAssignments")) { 4590 setFixAssignments(true); 4591 } else if (cmd.equals("-fixMeta")) { 4592 setFixMeta(true); 4593 } else if (cmd.equals("-noHdfsChecking")) { 4594 setCheckHdfs(false); 4595 } else if (cmd.equals("-fixHdfsHoles")) { 4596 setFixHdfsHoles(true); 4597 } else if (cmd.equals("-fixHdfsOrphans")) { 4598 setFixHdfsOrphans(true); 4599 } else if (cmd.equals("-fixTableOrphans")) { 4600 setFixTableOrphans(true); 4601 } else if (cmd.equals("-fixHdfsOverlaps")) { 4602 setFixHdfsOverlaps(true); 4603 } else if (cmd.equals("-fixVersionFile")) { 4604 setFixVersionFile(true); 4605 } else if (cmd.equals("-sidelineBigOverlaps")) { 4606 setSidelineBigOverlaps(true); 4607 } else if (cmd.equals("-fixSplitParents")) { 4608 setFixSplitParents(true); 4609 } else if (cmd.equals("-ignorePreCheckPermission")) { 4610 setIgnorePreCheckPermission(true); 4611 } else if (cmd.equals("-checkCorruptHFiles")) { 4612 checkCorruptHFiles = true; 4613 } else if (cmd.equals("-sidelineCorruptHFiles")) { 4614 sidelineCorruptHFiles = true; 4615 } else if (cmd.equals("-fixReferenceFiles")) { 4616 setFixReferenceFiles(true); 4617 } else if (cmd.equals("-fixEmptyMetaCells")) { 4618 setFixEmptyMetaCells(true); 4619 } else if (cmd.equals("-repair")) { 4620 // this attempts to merge overlapping hdfs regions, needs testing 4621 // under load 4622 setFixHdfsHoles(true); 4623 setFixHdfsOrphans(true); 4624 setFixMeta(true); 4625 setFixAssignments(true); 4626 setFixHdfsOverlaps(true); 4627 setFixVersionFile(true); 4628 setSidelineBigOverlaps(true); 4629 setFixSplitParents(false); 4630 setCheckHdfs(true); 4631 setFixReferenceFiles(true); 4632 setFixTableLocks(true); 4633 setFixTableZNodes(true); 4634 } else if (cmd.equals("-repairHoles")) { 4635 // this will make all missing hdfs regions available but may lose data 4636 setFixHdfsHoles(true); 4637 setFixHdfsOrphans(false); 4638 setFixMeta(true); 4639 setFixAssignments(true); 4640 setFixHdfsOverlaps(false); 4641 setSidelineBigOverlaps(false); 4642 setFixSplitParents(false); 4643 setCheckHdfs(true); 4644 } else if (cmd.equals("-maxOverlapsToSideline")) { 4645 if (i == args.length - 1) { 4646 errors.reportError(ERROR_CODE.WRONG_USAGE, 4647 "-maxOverlapsToSideline needs a numeric value argument."); 4648 return printUsageAndExit(); 4649 } 4650 try { 4651 int maxOverlapsToSideline = Integer.parseInt(args[i+1]); 4652 setMaxOverlapsToSideline(maxOverlapsToSideline); 4653 } catch (NumberFormatException e) { 4654 errors.reportError(ERROR_CODE.WRONG_USAGE, 4655 "-maxOverlapsToSideline needs a numeric value argument."); 4656 return printUsageAndExit(); 4657 } 4658 i++; 4659 } else if (cmd.equals("-maxMerge")) { 4660 if (i == args.length - 1) { 4661 errors.reportError(ERROR_CODE.WRONG_USAGE, 4662 "-maxMerge needs a numeric value argument."); 4663 return printUsageAndExit(); 4664 } 4665 try { 4666 int maxMerge = Integer.parseInt(args[i+1]); 4667 setMaxMerge(maxMerge); 4668 } catch (NumberFormatException e) { 4669 errors.reportError(ERROR_CODE.WRONG_USAGE, 4670 "-maxMerge needs a numeric value argument."); 4671 return printUsageAndExit(); 4672 } 4673 i++; 4674 } else if (cmd.equals("-summary")) { 4675 setSummary(); 4676 } else if (cmd.equals("-metaonly")) { 4677 setCheckMetaOnly(); 4678 } else if (cmd.equals("-boundaries")) { 4679 setRegionBoundariesCheck(); 4680 } else if (cmd.equals("-fixTableLocks")) { 4681 setFixTableLocks(true); 4682 } else if (cmd.equals("-fixOrphanedTableZnodes")) { 4683 setFixTableZNodes(true); 4684 } else if (cmd.startsWith("-")) { 4685 errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd); 4686 return printUsageAndExit(); 4687 } else { 4688 includeTable(TableName.valueOf(cmd)); 4689 errors.print("Allow checking/fixes for table: " + cmd); 4690 } 4691 } 4692 4693 errors.print("HBaseFsck command line options: " + StringUtils.join(args, " ")); 4694 4695 // pre-check current user has FS write permission or not 4696 try { 4697 preCheckPermission(); 4698 } catch (AccessDeniedException ace) { 4699 Runtime.getRuntime().exit(-1); 4700 } catch (IOException ioe) { 4701 Runtime.getRuntime().exit(-1); 4702 } 4703 4704 // do the real work of hbck 4705 connect(); 4706 4707 try { 4708 // if corrupt file mode is on, first fix them since they may be opened later 4709 if (checkCorruptHFiles || sidelineCorruptHFiles) { 4710 LOG.info("Checking all hfiles for corruption"); 4711 HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles); 4712 setHFileCorruptionChecker(hfcc); // so we can get result 4713 Collection<TableName> tables = getIncludedTables(); 4714 Collection<Path> tableDirs = new ArrayList<Path>(); 4715 Path rootdir = FSUtils.getRootDir(getConf()); 4716 if (tables.size() > 0) { 4717 for (TableName t : tables) { 4718 tableDirs.add(FSUtils.getTableDir(rootdir, t)); 4719 } 4720 } else { 4721 tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir); 4722 } 4723 hfcc.checkTables(tableDirs); 4724 hfcc.report(errors); 4725 } 4726 4727 // check and fix table integrity, region consistency. 4728 int code = onlineHbck(); 4729 setRetCode(code); 4730 // If we have changed the HBase state it is better to run hbck again 4731 // to see if we haven't broken something else in the process. 4732 // We run it only once more because otherwise we can easily fall into 4733 // an infinite loop. 4734 if (shouldRerun()) { 4735 try { 4736 LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix..."); 4737 Thread.sleep(sleepBeforeRerun); 4738 } catch (InterruptedException ie) { 4739 LOG.warn("Interrupted while sleeping"); 4740 return this; 4741 } 4742 // Just report 4743 setFixAssignments(false); 4744 setFixMeta(false); 4745 setFixHdfsHoles(false); 4746 setFixHdfsOverlaps(false); 4747 setFixVersionFile(false); 4748 setFixTableOrphans(false); 4749 errors.resetErrors(); 4750 code = onlineHbck(); 4751 setRetCode(code); 4752 } 4753 } finally { 4754 IOUtils.closeQuietly(this); 4755 } 4756 return this; 4757 } 4758 4759 /** 4760 * ls -r for debugging purposes 4761 */ debugLsr(Path p)4762 void debugLsr(Path p) throws IOException { 4763 debugLsr(getConf(), p, errors); 4764 } 4765 4766 /** 4767 * ls -r for debugging purposes 4768 */ debugLsr(Configuration conf, Path p)4769 public static void debugLsr(Configuration conf, 4770 Path p) throws IOException { 4771 debugLsr(conf, p, new PrintingErrorReporter()); 4772 } 4773 4774 /** 4775 * ls -r for debugging purposes 4776 */ debugLsr(Configuration conf, Path p, ErrorReporter errors)4777 public static void debugLsr(Configuration conf, 4778 Path p, ErrorReporter errors) throws IOException { 4779 if (!LOG.isDebugEnabled() || p == null) { 4780 return; 4781 } 4782 FileSystem fs = p.getFileSystem(conf); 4783 4784 if (!fs.exists(p)) { 4785 // nothing 4786 return; 4787 } 4788 errors.print(p.toString()); 4789 4790 if (fs.isFile(p)) { 4791 return; 4792 } 4793 4794 if (fs.getFileStatus(p).isDirectory()) { 4795 FileStatus[] fss= fs.listStatus(p); 4796 for (FileStatus status : fss) { 4797 debugLsr(conf, status.getPath(), errors); 4798 } 4799 } 4800 } 4801 } 4802