1 /** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 package org.apache.hadoop.hbase.master; 20 21 import static org.junit.Assert.assertEquals; 22 import static org.junit.Assert.assertFalse; 23 import static org.junit.Assert.assertNotNull; 24 import static org.junit.Assert.assertTrue; 25 26 import java.io.IOException; 27 import java.util.ArrayList; 28 import java.util.Iterator; 29 import java.util.List; 30 import java.util.Set; 31 import java.util.TreeSet; 32 33 import org.apache.commons.logging.Log; 34 import org.apache.commons.logging.LogFactory; 35 import org.apache.hadoop.conf.Configuration; 36 import org.apache.hadoop.fs.FileSystem; 37 import org.apache.hadoop.fs.Path; 38 import org.apache.hadoop.hbase.Abortable; 39 import org.apache.hadoop.hbase.ClusterStatus; 40 import org.apache.hadoop.hbase.HBaseConfiguration; 41 import org.apache.hadoop.hbase.HBaseTestingUtility; 42 import org.apache.hadoop.hbase.HColumnDescriptor; 43 import org.apache.hadoop.hbase.HConstants; 44 import org.apache.hadoop.hbase.HRegionInfo; 45 import org.apache.hadoop.hbase.HTableDescriptor; 46 import org.apache.hadoop.hbase.testclassification.LargeTests; 47 import org.apache.hadoop.hbase.MetaTableAccessor; 48 import org.apache.hadoop.hbase.MiniHBaseCluster; 49 import org.apache.hadoop.hbase.RegionTransition; 50 import org.apache.hadoop.hbase.ServerName; 51 import org.apache.hadoop.hbase.TableName; 52 import org.apache.hadoop.hbase.TableStateManager; 53 import org.apache.hadoop.hbase.client.RegionLocator; 54 import org.apache.hadoop.hbase.client.Table; 55 import org.apache.hadoop.hbase.executor.EventType; 56 import org.apache.hadoop.hbase.master.RegionState.State; 57 import org.apache.hadoop.hbase.protobuf.ProtobufUtil; 58 import org.apache.hadoop.hbase.protobuf.RequestConverter; 59 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; 60 import org.apache.hadoop.hbase.regionserver.HRegion; 61 import org.apache.hadoop.hbase.regionserver.HRegionServer; 62 import org.apache.hadoop.hbase.regionserver.Region; 63 import org.apache.hadoop.hbase.regionserver.RegionMergeTransactionImpl; 64 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException; 65 import org.apache.hadoop.hbase.util.Bytes; 66 import org.apache.hadoop.hbase.util.FSTableDescriptors; 67 import org.apache.hadoop.hbase.util.FSUtils; 68 import org.apache.hadoop.hbase.util.JVMClusterUtil; 69 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread; 70 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; 71 import org.apache.hadoop.hbase.util.Threads; 72 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 73 import org.apache.hadoop.hbase.zookeeper.ZKAssign; 74 import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager; 75 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; 76 import org.apache.zookeeper.data.Stat; 77 import org.junit.Ignore; 78 import org.junit.Test; 79 import org.junit.experimental.categories.Category; 80 81 @Category(LargeTests.class) 82 public class TestMasterFailover { 83 private static final Log LOG = LogFactory.getLog(TestMasterFailover.class); 84 85 /** 86 * Complex test of master failover that tests as many permutations of the 87 * different possible states that regions in transition could be in within ZK. 88 * <p> 89 * This tests the proper handling of these states by the failed-over master 90 * and includes a thorough testing of the timeout code as well. 91 * <p> 92 * Starts with a single master and three regionservers. 93 * <p> 94 * Creates two tables, enabledTable and disabledTable, each containing 5 95 * regions. The disabledTable is then disabled. 96 * <p> 97 * After reaching steady-state, the master is killed. We then mock several 98 * states in ZK. 99 * <p> 100 * After mocking them, we will startup a new master which should become the 101 * active master and also detect that it is a failover. The primary test 102 * passing condition will be that all regions of the enabled table are 103 * assigned and all the regions of the disabled table are not assigned. 104 * <p> 105 * The different scenarios to be tested are below: 106 * <p> 107 * <b>ZK State: OFFLINE</b> 108 * <p>A node can get into OFFLINE state if</p> 109 * <ul> 110 * <li>An RS fails to open a region, so it reverts the state back to OFFLINE 111 * <li>The Master is assigning the region to a RS before it sends RPC 112 * </ul> 113 * <p>We will mock the scenarios</p> 114 * <ul> 115 * <li>Master has assigned an enabled region but RS failed so a region is 116 * not assigned anywhere and is sitting in ZK as OFFLINE</li> 117 * <li>This seems to cover both cases?</li> 118 * </ul> 119 * <p> 120 * <b>ZK State: CLOSING</b> 121 * <p>A node can get into CLOSING state if</p> 122 * <ul> 123 * <li>An RS has begun to close a region 124 * </ul> 125 * <p>We will mock the scenarios</p> 126 * <ul> 127 * <li>Region of enabled table was being closed but did not complete 128 * <li>Region of disabled table was being closed but did not complete 129 * </ul> 130 * <p> 131 * <b>ZK State: CLOSED</b> 132 * <p>A node can get into CLOSED state if</p> 133 * <ul> 134 * <li>An RS has completed closing a region but not acknowledged by master yet 135 * </ul> 136 * <p>We will mock the scenarios</p> 137 * <ul> 138 * <li>Region of a table that should be enabled was closed on an RS 139 * <li>Region of a table that should be disabled was closed on an RS 140 * </ul> 141 * <p> 142 * <b>ZK State: OPENING</b> 143 * <p>A node can get into OPENING state if</p> 144 * <ul> 145 * <li>An RS has begun to open a region 146 * </ul> 147 * <p>We will mock the scenarios</p> 148 * <ul> 149 * <li>RS was opening a region of enabled table but never finishes 150 * </ul> 151 * <p> 152 * <b>ZK State: OPENED</b> 153 * <p>A node can get into OPENED state if</p> 154 * <ul> 155 * <li>An RS has finished opening a region but not acknowledged by master yet 156 * </ul> 157 * <p>We will mock the scenarios</p> 158 * <ul> 159 * <li>Region of a table that should be enabled was opened on an RS 160 * <li>Region of a table that should be disabled was opened on an RS 161 * </ul> 162 * @throws Exception 163 */ 164 @Test (timeout=240000) testMasterFailoverWithMockedRIT()165 public void testMasterFailoverWithMockedRIT() throws Exception { 166 167 final int NUM_MASTERS = 1; 168 final int NUM_RS = 3; 169 170 // Create config to use for this cluster 171 Configuration conf = HBaseConfiguration.create(); 172 conf.setBoolean("hbase.assignment.usezk", true); 173 174 // Start the cluster 175 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); 176 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); 177 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 178 log("Cluster started"); 179 180 // Create a ZKW to use in the test 181 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL); 182 183 // get all the master threads 184 List<MasterThread> masterThreads = cluster.getMasterThreads(); 185 assertEquals(1, masterThreads.size()); 186 187 // only one master thread, let's wait for it to be initialized 188 assertTrue(cluster.waitForActiveAndReadyMaster()); 189 HMaster master = masterThreads.get(0).getMaster(); 190 assertTrue(master.isActiveMaster()); 191 assertTrue(master.isInitialized()); 192 193 // disable load balancing on this master 194 master.balanceSwitch(false); 195 196 // create two tables in META, each with 10 regions 197 byte [] FAMILY = Bytes.toBytes("family"); 198 byte [][] SPLIT_KEYS = new byte [][] { 199 new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"), 200 Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"), 201 Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"), 202 Bytes.toBytes("iii"), Bytes.toBytes("jjj") 203 }; 204 205 byte [] enabledTable = Bytes.toBytes("enabledTable"); 206 HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable)); 207 htdEnabled.addFamily(new HColumnDescriptor(FAMILY)); 208 209 FileSystem filesystem = FileSystem.get(conf); 210 Path rootdir = FSUtils.getRootDir(conf); 211 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir); 212 // Write the .tableinfo 213 fstd.createTableDescriptor(htdEnabled); 214 215 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(), null, null); 216 createRegion(hriEnabled, rootdir, conf, htdEnabled); 217 218 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta( 219 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS); 220 221 TableName disabledTable = TableName.valueOf("disabledTable"); 222 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable); 223 htdDisabled.addFamily(new HColumnDescriptor(FAMILY)); 224 // Write the .tableinfo 225 fstd.createTableDescriptor(htdDisabled); 226 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null); 227 createRegion(hriDisabled, rootdir, conf, htdDisabled); 228 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta( 229 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS); 230 231 TableName tableWithMergingRegions = TableName.valueOf("tableWithMergingRegions"); 232 TEST_UTIL.createTable(tableWithMergingRegions, FAMILY, new byte [][] {Bytes.toBytes("m")}); 233 234 log("Regions in hbase:meta and namespace have been created"); 235 236 // at this point we only expect 4 regions to be assigned out 237 // (catalogs and namespace, + 2 merging regions) 238 assertEquals(4, cluster.countServedRegions()); 239 240 // Move merging regions to the same region server 241 AssignmentManager am = master.getAssignmentManager(); 242 RegionStates regionStates = am.getRegionStates(); 243 List<HRegionInfo> mergingRegions = regionStates.getRegionsOfTable(tableWithMergingRegions); 244 assertEquals(2, mergingRegions.size()); 245 HRegionInfo a = mergingRegions.get(0); 246 HRegionInfo b = mergingRegions.get(1); 247 HRegionInfo newRegion = RegionMergeTransactionImpl.getMergedRegionInfo(a, b); 248 ServerName mergingServer = regionStates.getRegionServerOfRegion(a); 249 ServerName serverB = regionStates.getRegionServerOfRegion(b); 250 if (!serverB.equals(mergingServer)) { 251 RegionPlan plan = new RegionPlan(b, serverB, mergingServer); 252 am.balance(plan); 253 assertTrue(am.waitForAssignment(b)); 254 } 255 256 // Let's just assign everything to first RS 257 HRegionServer hrs = cluster.getRegionServer(0); 258 ServerName serverName = hrs.getServerName(); 259 HRegionInfo closingRegion = enabledRegions.remove(0); 260 // we'll need some regions to already be assigned out properly on live RS 261 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>(); 262 enabledAndAssignedRegions.add(enabledRegions.remove(0)); 263 enabledAndAssignedRegions.add(enabledRegions.remove(0)); 264 enabledAndAssignedRegions.add(closingRegion); 265 266 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>(); 267 disabledAndAssignedRegions.add(disabledRegions.remove(0)); 268 disabledAndAssignedRegions.add(disabledRegions.remove(0)); 269 270 // now actually assign them 271 for (HRegionInfo hri : enabledAndAssignedRegions) { 272 master.assignmentManager.addPlan(hri.getEncodedName(), 273 new RegionPlan(hri, null, serverName)); 274 master.assignRegion(hri); 275 } 276 277 for (HRegionInfo hri : disabledAndAssignedRegions) { 278 master.assignmentManager.addPlan(hri.getEncodedName(), 279 new RegionPlan(hri, null, serverName)); 280 master.assignRegion(hri); 281 } 282 283 // wait for no more RIT 284 log("Waiting for assignment to finish"); 285 ZKAssign.blockUntilNoRIT(zkw); 286 log("Assignment completed"); 287 288 // Stop the master 289 log("Aborting master"); 290 cluster.abortMaster(0); 291 cluster.waitOnMaster(0); 292 log("Master has aborted"); 293 294 /* 295 * Now, let's start mocking up some weird states as described in the method 296 * javadoc. 297 */ 298 299 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>(); 300 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>(); 301 302 log("Beginning to mock scenarios"); 303 304 // Disable the disabledTable in ZK 305 TableStateManager zktable = new ZKTableStateManager(zkw); 306 zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED); 307 308 /* 309 * ZK = OFFLINE 310 */ 311 312 // Region that should be assigned but is not and is in ZK as OFFLINE 313 // Cause: This can happen if the master crashed after creating the znode but before sending the 314 // request to the region server 315 HRegionInfo region = enabledRegions.remove(0); 316 regionsThatShouldBeOnline.add(region); 317 ZKAssign.createNodeOffline(zkw, region, serverName); 318 319 /* 320 * ZK = CLOSING 321 */ 322 // Cause: Same as offline. 323 regionsThatShouldBeOnline.add(closingRegion); 324 ZKAssign.createNodeClosing(zkw, closingRegion, serverName); 325 326 /* 327 * ZK = CLOSED 328 */ 329 330 // Region of enabled table closed but not ack 331 //Cause: Master was down while the region server updated the ZK status. 332 region = enabledRegions.remove(0); 333 regionsThatShouldBeOnline.add(region); 334 int version = ZKAssign.createNodeClosing(zkw, region, serverName); 335 ZKAssign.transitionNodeClosed(zkw, region, serverName, version); 336 337 // Region of disabled table closed but not ack 338 region = disabledRegions.remove(0); 339 regionsThatShouldBeOffline.add(region); 340 version = ZKAssign.createNodeClosing(zkw, region, serverName); 341 ZKAssign.transitionNodeClosed(zkw, region, serverName, version); 342 343 /* 344 * ZK = OPENED 345 */ 346 347 // Region of enabled table was opened on RS 348 // Cause: as offline 349 region = enabledRegions.remove(0); 350 regionsThatShouldBeOnline.add(region); 351 ZKAssign.createNodeOffline(zkw, region, serverName); 352 ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region); 353 while (true) { 354 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); 355 RegionTransition rt = RegionTransition.parseFrom(bytes); 356 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) { 357 break; 358 } 359 Thread.sleep(100); 360 } 361 362 // Region of disable table was opened on RS 363 // Cause: Master failed while updating the status for this region server. 364 region = disabledRegions.remove(0); 365 regionsThatShouldBeOffline.add(region); 366 ZKAssign.createNodeOffline(zkw, region, serverName); 367 ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region); 368 while (true) { 369 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); 370 RegionTransition rt = RegionTransition.parseFrom(bytes); 371 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) { 372 break; 373 } 374 Thread.sleep(100); 375 } 376 377 /* 378 * ZK = MERGING 379 */ 380 381 // Regions of table of merging regions 382 // Cause: Master was down while merging was going on 383 hrs.getCoordinatedStateManager(). 384 getRegionMergeCoordination().startRegionMergeTransaction(newRegion, mergingServer, a, b); 385 386 /* 387 * ZK = NONE 388 */ 389 390 /* 391 * DONE MOCKING 392 */ 393 394 log("Done mocking data up in ZK"); 395 396 // Start up a new master 397 log("Starting up a new master"); 398 master = cluster.startMaster().getMaster(); 399 log("Waiting for master to be ready"); 400 cluster.waitForActiveAndReadyMaster(); 401 log("Master is ready"); 402 403 // Get new region states since master restarted 404 regionStates = master.getAssignmentManager().getRegionStates(); 405 // Merging region should remain merging 406 assertTrue(regionStates.isRegionInState(a, State.MERGING)); 407 assertTrue(regionStates.isRegionInState(b, State.MERGING)); 408 assertTrue(regionStates.isRegionInState(newRegion, State.MERGING_NEW)); 409 // Now remove the faked merging znode, merging regions should be 410 // offlined automatically, otherwise it is a bug in AM. 411 ZKAssign.deleteNodeFailSilent(zkw, newRegion); 412 413 // Failover should be completed, now wait for no RIT 414 log("Waiting for no more RIT"); 415 ZKAssign.blockUntilNoRIT(zkw); 416 log("No more RIT in ZK, now doing final test verification"); 417 418 // Grab all the regions that are online across RSs 419 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>(); 420 for (JVMClusterUtil.RegionServerThread rst : 421 cluster.getRegionServerThreads()) { 422 onlineRegions.addAll(ProtobufUtil.getOnlineRegions( 423 rst.getRegionServer().getRSRpcServices())); 424 } 425 426 // Now, everything that should be online should be online 427 for (HRegionInfo hri : regionsThatShouldBeOnline) { 428 assertTrue(onlineRegions.contains(hri)); 429 } 430 431 // Everything that should be offline should not be online 432 for (HRegionInfo hri : regionsThatShouldBeOffline) { 433 if (onlineRegions.contains(hri)) { 434 LOG.debug(hri); 435 } 436 assertFalse(onlineRegions.contains(hri)); 437 } 438 439 log("Done with verification, all passed, shutting down cluster"); 440 441 // Done, shutdown the cluster 442 TEST_UTIL.shutdownMiniCluster(); 443 } 444 445 /** 446 * Complex test of master failover that tests as many permutations of the 447 * different possible states that regions in transition could be in within ZK 448 * pointing to an RS that has died while no master is around to process it. 449 * <p> 450 * This tests the proper handling of these states by the failed-over master 451 * and includes a thorough testing of the timeout code as well. 452 * <p> 453 * Starts with a single master and two regionservers. 454 * <p> 455 * Creates two tables, enabledTable and disabledTable, each containing 5 456 * regions. The disabledTable is then disabled. 457 * <p> 458 * After reaching steady-state, the master is killed. We then mock several 459 * states in ZK. And one of the RS will be killed. 460 * <p> 461 * After mocking them and killing an RS, we will startup a new master which 462 * should become the active master and also detect that it is a failover. The 463 * primary test passing condition will be that all regions of the enabled 464 * table are assigned and all the regions of the disabled table are not 465 * assigned. 466 * <p> 467 * The different scenarios to be tested are below: 468 * <p> 469 * <b>ZK State: CLOSING</b> 470 * <p>A node can get into CLOSING state if</p> 471 * <ul> 472 * <li>An RS has begun to close a region 473 * </ul> 474 * <p>We will mock the scenarios</p> 475 * <ul> 476 * <li>Region was being closed but the RS died before finishing the close 477 * </ul> 478 * <b>ZK State: OPENED</b> 479 * <p>A node can get into OPENED state if</p> 480 * <ul> 481 * <li>An RS has finished opening a region but not acknowledged by master yet 482 * </ul> 483 * <p>We will mock the scenarios</p> 484 * <ul> 485 * <li>Region of a table that should be enabled was opened by a now-dead RS 486 * <li>Region of a table that should be disabled was opened by a now-dead RS 487 * </ul> 488 * <p> 489 * <b>ZK State: NONE</b> 490 * <p>A region could not have a transition node if</p> 491 * <ul> 492 * <li>The server hosting the region died and no master processed it 493 * </ul> 494 * <p>We will mock the scenarios</p> 495 * <ul> 496 * <li>Region of enabled table was on a dead RS that was not yet processed 497 * <li>Region of disabled table was on a dead RS that was not yet processed 498 * </ul> 499 * @throws Exception 500 */ 501 @Test (timeout=180000) testMasterFailoverWithMockedRITOnDeadRS()502 public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception { 503 504 final int NUM_MASTERS = 1; 505 final int NUM_RS = 2; 506 507 // Create and start the cluster 508 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 509 Configuration conf = TEST_UTIL.getConfiguration(); 510 conf.setBoolean("hbase.assignment.usezk", true); 511 512 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1); 513 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2); 514 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); 515 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 516 log("Cluster started"); 517 518 // Create a ZKW to use in the test 519 ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(), 520 "unittest", new Abortable() { 521 522 @Override 523 public void abort(String why, Throwable e) { 524 LOG.error("Fatal ZK Error: " + why, e); 525 org.junit.Assert.assertFalse("Fatal ZK error", true); 526 } 527 528 @Override 529 public boolean isAborted() { 530 return false; 531 } 532 533 }); 534 535 // get all the master threads 536 List<MasterThread> masterThreads = cluster.getMasterThreads(); 537 assertEquals(1, masterThreads.size()); 538 539 // only one master thread, let's wait for it to be initialized 540 assertTrue(cluster.waitForActiveAndReadyMaster()); 541 HMaster master = masterThreads.get(0).getMaster(); 542 assertTrue(master.isActiveMaster()); 543 assertTrue(master.isInitialized()); 544 545 // disable load balancing on this master 546 master.balanceSwitch(false); 547 548 // create two tables in META, each with 30 regions 549 byte [] FAMILY = Bytes.toBytes("family"); 550 byte[][] SPLIT_KEYS = 551 TEST_UTIL.getRegionSplitStartKeys(Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 30); 552 553 byte [] enabledTable = Bytes.toBytes("enabledTable"); 554 HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable)); 555 htdEnabled.addFamily(new HColumnDescriptor(FAMILY)); 556 FileSystem filesystem = FileSystem.get(conf); 557 Path rootdir = FSUtils.getRootDir(conf); 558 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir); 559 // Write the .tableinfo 560 fstd.createTableDescriptor(htdEnabled); 561 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(), 562 null, null); 563 createRegion(hriEnabled, rootdir, conf, htdEnabled); 564 565 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta( 566 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS); 567 568 TableName disabledTable = 569 TableName.valueOf("disabledTable"); 570 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable); 571 htdDisabled.addFamily(new HColumnDescriptor(FAMILY)); 572 // Write the .tableinfo 573 fstd.createTableDescriptor(htdDisabled); 574 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null); 575 createRegion(hriDisabled, rootdir, conf, htdDisabled); 576 577 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta( 578 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS); 579 580 log("Regions in hbase:meta and Namespace have been created"); 581 582 // at this point we only expect 2 regions to be assigned out (catalogs and namespace ) 583 assertEquals(2, cluster.countServedRegions()); 584 585 // The first RS will stay online 586 List<RegionServerThread> regionservers = 587 cluster.getRegionServerThreads(); 588 HRegionServer hrs = regionservers.get(0).getRegionServer(); 589 590 // The second RS is going to be hard-killed 591 RegionServerThread hrsDeadThread = regionservers.get(1); 592 HRegionServer hrsDead = hrsDeadThread.getRegionServer(); 593 ServerName deadServerName = hrsDead.getServerName(); 594 595 // we'll need some regions to already be assigned out properly on live RS 596 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>(); 597 enabledAndAssignedRegions.addAll(enabledRegions.subList(0, 6)); 598 enabledRegions.removeAll(enabledAndAssignedRegions); 599 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>(); 600 disabledAndAssignedRegions.addAll(disabledRegions.subList(0, 6)); 601 disabledRegions.removeAll(disabledAndAssignedRegions); 602 603 // now actually assign them 604 for (HRegionInfo hri : enabledAndAssignedRegions) { 605 master.assignmentManager.addPlan(hri.getEncodedName(), 606 new RegionPlan(hri, null, hrs.getServerName())); 607 master.assignRegion(hri); 608 } 609 for (HRegionInfo hri : disabledAndAssignedRegions) { 610 master.assignmentManager.addPlan(hri.getEncodedName(), 611 new RegionPlan(hri, null, hrs.getServerName())); 612 master.assignRegion(hri); 613 } 614 615 log("Waiting for assignment to finish"); 616 ZKAssign.blockUntilNoRIT(zkw); 617 master.assignmentManager.waitUntilNoRegionsInTransition(60000); 618 log("Assignment completed"); 619 620 assertTrue(" Table must be enabled.", master.getAssignmentManager() 621 .getTableStateManager().isTableState(TableName.valueOf("enabledTable"), 622 ZooKeeperProtos.Table.State.ENABLED)); 623 // we also need regions assigned out on the dead server 624 List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>(); 625 enabledAndOnDeadRegions.addAll(enabledRegions.subList(0, 6)); 626 enabledRegions.removeAll(enabledAndOnDeadRegions); 627 List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>(); 628 disabledAndOnDeadRegions.addAll(disabledRegions.subList(0, 6)); 629 disabledRegions.removeAll(disabledAndOnDeadRegions); 630 631 // set region plan to server to be killed and trigger assign 632 for (HRegionInfo hri : enabledAndOnDeadRegions) { 633 master.assignmentManager.addPlan(hri.getEncodedName(), 634 new RegionPlan(hri, null, deadServerName)); 635 master.assignRegion(hri); 636 } 637 for (HRegionInfo hri : disabledAndOnDeadRegions) { 638 master.assignmentManager.addPlan(hri.getEncodedName(), 639 new RegionPlan(hri, null, deadServerName)); 640 master.assignRegion(hri); 641 } 642 643 // wait for no more RIT 644 log("Waiting for assignment to finish"); 645 ZKAssign.blockUntilNoRIT(zkw); 646 master.assignmentManager.waitUntilNoRegionsInTransition(60000); 647 log("Assignment completed"); 648 649 // Due to master.assignRegion(hri) could fail to assign a region to a specified RS 650 // therefore, we need make sure that regions are in the expected RS 651 verifyRegionLocation(hrs, enabledAndAssignedRegions); 652 verifyRegionLocation(hrs, disabledAndAssignedRegions); 653 verifyRegionLocation(hrsDead, enabledAndOnDeadRegions); 654 verifyRegionLocation(hrsDead, disabledAndOnDeadRegions); 655 656 assertTrue(" Didn't get enough regions of enabledTalbe on live rs.", 657 enabledAndAssignedRegions.size() >= 2); 658 assertTrue(" Didn't get enough regions of disalbedTable on live rs.", 659 disabledAndAssignedRegions.size() >= 2); 660 assertTrue(" Didn't get enough regions of enabledTalbe on dead rs.", 661 enabledAndOnDeadRegions.size() >= 2); 662 assertTrue(" Didn't get enough regions of disalbedTable on dead rs.", 663 disabledAndOnDeadRegions.size() >= 2); 664 665 // Stop the master 666 log("Aborting master"); 667 cluster.abortMaster(0); 668 cluster.waitOnMaster(0); 669 log("Master has aborted"); 670 671 /* 672 * Now, let's start mocking up some weird states as described in the method 673 * javadoc. 674 */ 675 676 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>(); 677 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>(); 678 679 log("Beginning to mock scenarios"); 680 681 // Disable the disabledTable in ZK 682 TableStateManager zktable = new ZKTableStateManager(zkw); 683 zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED); 684 685 assertTrue(" The enabled table should be identified on master fail over.", 686 zktable.isTableState(TableName.valueOf("enabledTable"), 687 ZooKeeperProtos.Table.State.ENABLED)); 688 689 /* 690 * ZK = CLOSING 691 */ 692 693 // Region of enabled table being closed on dead RS but not finished 694 HRegionInfo region = enabledAndOnDeadRegions.remove(0); 695 regionsThatShouldBeOnline.add(region); 696 ZKAssign.createNodeClosing(zkw, region, deadServerName); 697 LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" + 698 region + "\n\n"); 699 700 // Region of disabled table being closed on dead RS but not finished 701 region = disabledAndOnDeadRegions.remove(0); 702 regionsThatShouldBeOffline.add(region); 703 ZKAssign.createNodeClosing(zkw, region, deadServerName); 704 LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" + 705 region + "\n\n"); 706 707 /* 708 * ZK = CLOSED 709 */ 710 711 // Region of enabled on dead server gets closed but not ack'd by master 712 region = enabledAndOnDeadRegions.remove(0); 713 regionsThatShouldBeOnline.add(region); 714 int version = ZKAssign.createNodeClosing(zkw, region, deadServerName); 715 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version); 716 LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" + 717 region + "\n\n"); 718 719 // Region of disabled on dead server gets closed but not ack'd by master 720 region = disabledAndOnDeadRegions.remove(0); 721 regionsThatShouldBeOffline.add(region); 722 version = ZKAssign.createNodeClosing(zkw, region, deadServerName); 723 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version); 724 LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" + 725 region + "\n\n"); 726 727 /* 728 * ZK = OPENING 729 */ 730 731 // RS was opening a region of enabled table then died 732 region = enabledRegions.remove(0); 733 regionsThatShouldBeOnline.add(region); 734 ZKAssign.createNodeOffline(zkw, region, deadServerName); 735 ZKAssign.transitionNodeOpening(zkw, region, deadServerName); 736 LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" + 737 region + "\n\n"); 738 739 // RS was opening a region of disabled table then died 740 region = disabledRegions.remove(0); 741 regionsThatShouldBeOffline.add(region); 742 ZKAssign.createNodeOffline(zkw, region, deadServerName); 743 ZKAssign.transitionNodeOpening(zkw, region, deadServerName); 744 LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" + 745 region + "\n\n"); 746 747 /* 748 * ZK = OPENED 749 */ 750 751 // Region of enabled table was opened on dead RS 752 region = enabledRegions.remove(0); 753 regionsThatShouldBeOnline.add(region); 754 ZKAssign.createNodeOffline(zkw, region, deadServerName); 755 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(), 756 hrsDead.getServerName(), region); 757 while (true) { 758 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); 759 RegionTransition rt = RegionTransition.parseFrom(bytes); 760 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) { 761 break; 762 } 763 Thread.sleep(100); 764 } 765 LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" + region + "\n\n"); 766 767 // Region of disabled table was opened on dead RS 768 region = disabledRegions.remove(0); 769 regionsThatShouldBeOffline.add(region); 770 ZKAssign.createNodeOffline(zkw, region, deadServerName); 771 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(), 772 hrsDead.getServerName(), region); 773 while (true) { 774 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); 775 RegionTransition rt = RegionTransition.parseFrom(bytes); 776 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) { 777 break; 778 } 779 Thread.sleep(100); 780 } 781 LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" + region + "\n\n"); 782 783 /* 784 * ZK = NONE 785 */ 786 787 // Region of enabled table was open at steady-state on dead RS 788 region = enabledRegions.remove(0); 789 regionsThatShouldBeOnline.add(region); 790 ZKAssign.createNodeOffline(zkw, region, deadServerName); 791 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(), 792 hrsDead.getServerName(), region); 793 while (true) { 794 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); 795 RegionTransition rt = RegionTransition.parseFrom(bytes); 796 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) { 797 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName()); 798 LOG.debug("DELETED " + rt); 799 break; 800 } 801 Thread.sleep(100); 802 } 803 LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS" 804 + "\n" + region + "\n\n"); 805 806 // Region of disabled table was open at steady-state on dead RS 807 region = disabledRegions.remove(0); 808 regionsThatShouldBeOffline.add(region); 809 ZKAssign.createNodeOffline(zkw, region, deadServerName); 810 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(), 811 hrsDead.getServerName(), region); 812 while (true) { 813 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); 814 RegionTransition rt = RegionTransition.parseFrom(bytes); 815 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) { 816 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName()); 817 break; 818 } 819 Thread.sleep(100); 820 } 821 LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS" 822 + "\n" + region + "\n\n"); 823 824 /* 825 * DONE MOCKING 826 */ 827 828 log("Done mocking data up in ZK"); 829 830 // Kill the RS that had a hard death 831 log("Killing RS " + deadServerName); 832 hrsDead.abort("Killing for unit test"); 833 log("RS " + deadServerName + " killed"); 834 835 // Start up a new master. Wait until regionserver is completely down 836 // before starting new master because of hbase-4511. 837 while (hrsDeadThread.isAlive()) { 838 Threads.sleep(10); 839 } 840 log("Starting up a new master"); 841 master = cluster.startMaster().getMaster(); 842 log("Waiting for master to be ready"); 843 assertTrue(cluster.waitForActiveAndReadyMaster()); 844 log("Master is ready"); 845 846 // Wait until SSH processing completed for dead server. 847 while (master.getServerManager().areDeadServersInProgress()) { 848 Thread.sleep(10); 849 } 850 851 // Failover should be completed, now wait for no RIT 852 log("Waiting for no more RIT"); 853 ZKAssign.blockUntilNoRIT(zkw); 854 log("No more RIT in ZK"); 855 long now = System.currentTimeMillis(); 856 long maxTime = 120000; 857 boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime); 858 if (!done) { 859 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 860 LOG.info("rit=" + regionStates.getRegionsInTransition()); 861 } 862 long elapsed = System.currentTimeMillis() - now; 863 assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done, 864 elapsed < maxTime); 865 log("No more RIT in RIT map, doing final test verification"); 866 867 // Grab all the regions that are online across RSs 868 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>(); 869 now = System.currentTimeMillis(); 870 maxTime = 30000; 871 for (JVMClusterUtil.RegionServerThread rst : 872 cluster.getRegionServerThreads()) { 873 try { 874 HRegionServer rs = rst.getRegionServer(); 875 while (!rs.getRegionsInTransitionInRS().isEmpty()) { 876 elapsed = System.currentTimeMillis() - now; 877 assertTrue("Test timed out in getting online regions", elapsed < maxTime); 878 if (rs.isAborted() || rs.isStopped()) { 879 // This region server is stopped, skip it. 880 break; 881 } 882 Thread.sleep(100); 883 } 884 onlineRegions.addAll(ProtobufUtil.getOnlineRegions(rs.getRSRpcServices())); 885 } catch (RegionServerStoppedException e) { 886 LOG.info("Got RegionServerStoppedException", e); 887 } 888 } 889 890 // Now, everything that should be online should be online 891 for (HRegionInfo hri : regionsThatShouldBeOnline) { 892 assertTrue("region=" + hri.getRegionNameAsString() + ", " + onlineRegions.toString(), 893 onlineRegions.contains(hri)); 894 } 895 896 // Everything that should be offline should not be online 897 for (HRegionInfo hri : regionsThatShouldBeOffline) { 898 assertFalse(onlineRegions.contains(hri)); 899 } 900 901 log("Done with verification, all passed, shutting down cluster"); 902 903 // Done, shutdown the cluster 904 TEST_UTIL.shutdownMiniCluster(); 905 } 906 907 /** 908 * Verify regions are on the expected region server 909 */ verifyRegionLocation(HRegionServer hrs, List<HRegionInfo> regions)910 private void verifyRegionLocation(HRegionServer hrs, List<HRegionInfo> regions) 911 throws IOException { 912 List<HRegionInfo> tmpOnlineRegions = 913 ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()); 914 Iterator<HRegionInfo> itr = regions.iterator(); 915 while (itr.hasNext()) { 916 HRegionInfo tmp = itr.next(); 917 if (!tmpOnlineRegions.contains(tmp)) { 918 itr.remove(); 919 } 920 } 921 } 922 createRegion(final HRegionInfo hri, final Path rootdir, final Configuration c, final HTableDescriptor htd)923 HRegion createRegion(final HRegionInfo hri, final Path rootdir, final Configuration c, 924 final HTableDescriptor htd) 925 throws IOException { 926 HRegion r = HRegion.createHRegion(hri, rootdir, c, htd); 927 // The above call to create a region will create an wal file. Each 928 // log file create will also create a running thread to do syncing. We need 929 // to close out this log else we will have a running thread trying to sync 930 // the file system continuously which is ugly when dfs is taken away at the 931 // end of the test. 932 HRegion.closeHRegion(r); 933 return r; 934 } 935 936 // TODO: Next test to add is with testing permutations of the RIT or the RS 937 // killed are hosting ROOT and hbase:meta regions. 938 log(String string)939 private void log(String string) { 940 LOG.info("\n\n" + string + " \n\n"); 941 } 942 943 @Test (timeout=180000) testShouldCheckMasterFailOverWhenMETAIsInOpenedState()944 public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState() 945 throws Exception { 946 LOG.info("Starting testShouldCheckMasterFailOverWhenMETAIsInOpenedState"); 947 final int NUM_MASTERS = 1; 948 final int NUM_RS = 2; 949 950 // Start the cluster 951 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 952 Configuration conf = TEST_UTIL.getConfiguration(); 953 conf.setInt("hbase.master.info.port", -1); 954 conf.setBoolean("hbase.assignment.usezk", true); 955 956 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); 957 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 958 959 // Find regionserver carrying meta. 960 List<RegionServerThread> regionServerThreads = 961 cluster.getRegionServerThreads(); 962 Region metaRegion = null; 963 HRegionServer metaRegionServer = null; 964 for (RegionServerThread regionServerThread : regionServerThreads) { 965 HRegionServer regionServer = regionServerThread.getRegionServer(); 966 metaRegion = regionServer.getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName()); 967 regionServer.abort(""); 968 if (null != metaRegion) { 969 metaRegionServer = regionServer; 970 break; 971 } 972 } 973 974 TEST_UTIL.shutdownMiniHBaseCluster(); 975 976 // Create a ZKW to use in the test 977 ZooKeeperWatcher zkw = 978 HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL, 979 metaRegion, metaRegionServer.getServerName()); 980 981 LOG.info("Staring cluster for second time"); 982 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS); 983 984 HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); 985 while (!master.isInitialized()) { 986 Thread.sleep(100); 987 } 988 // Failover should be completed, now wait for no RIT 989 log("Waiting for no more RIT"); 990 ZKAssign.blockUntilNoRIT(zkw); 991 992 zkw.close(); 993 // Stop the cluster 994 TEST_UTIL.shutdownMiniCluster(); 995 } 996 997 /** 998 * This tests a RIT in offline state will get re-assigned after a master restart 999 */ 1000 @Test(timeout=240000) testOfflineRegionReAssginedAfterMasterRestart()1001 public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception { 1002 final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart"); 1003 final int NUM_MASTERS = 1; 1004 final int NUM_RS = 2; 1005 1006 // Create config to use for this cluster 1007 Configuration conf = HBaseConfiguration.create(); 1008 conf.setBoolean("hbase.assignment.usezk", true); 1009 1010 // Start the cluster 1011 final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); 1012 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); 1013 log("Cluster started"); 1014 1015 TEST_UTIL.createTable(table, Bytes.toBytes("family")); 1016 HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); 1017 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 1018 HRegionInfo hri = regionStates.getRegionsOfTable(table).get(0); 1019 ServerName serverName = regionStates.getRegionServerOfRegion(hri); 1020 TEST_UTIL.assertRegionOnServer(hri, serverName, 200); 1021 1022 ServerName dstName = null; 1023 for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) { 1024 if (!tmpServer.equals(serverName)) { 1025 dstName = tmpServer; 1026 break; 1027 } 1028 } 1029 // find a different server 1030 assertTrue(dstName != null); 1031 // shutdown HBase cluster 1032 TEST_UTIL.shutdownMiniHBaseCluster(); 1033 // create a RIT node in offline state 1034 ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher(); 1035 ZKAssign.createNodeOffline(zkw, hri, dstName); 1036 Stat stat = new Stat(); 1037 byte[] data = 1038 ZKAssign.getDataNoWatch(zkw, hri.getEncodedName(), stat); 1039 assertTrue(data != null); 1040 RegionTransition rt = RegionTransition.parseFrom(data); 1041 assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE); 1042 1043 LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName 1044 + " and dst server=" + dstName); 1045 1046 // start HBase cluster 1047 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS); 1048 1049 while (true) { 1050 master = TEST_UTIL.getHBaseCluster().getMaster(); 1051 if (master != null && master.isInitialized()) { 1052 ServerManager serverManager = master.getServerManager(); 1053 if (!serverManager.areDeadServersInProgress()) { 1054 break; 1055 } 1056 } 1057 Thread.sleep(200); 1058 } 1059 1060 // verify the region is assigned 1061 master = TEST_UTIL.getHBaseCluster().getMaster(); 1062 master.getAssignmentManager().waitForAssignment(hri); 1063 regionStates = master.getAssignmentManager().getRegionStates(); 1064 RegionState newState = regionStates.getRegionState(hri); 1065 assertTrue(newState.isOpened()); 1066 } 1067 1068 /** 1069 * Simple test of master failover. 1070 * <p> 1071 * Starts with three masters. Kills a backup master. Then kills the active 1072 * master. Ensures the final master becomes active and we can still contact 1073 * the cluster. 1074 * @throws Exception 1075 */ 1076 @Test (timeout=240000) testSimpleMasterFailover()1077 public void testSimpleMasterFailover() throws Exception { 1078 1079 final int NUM_MASTERS = 3; 1080 final int NUM_RS = 3; 1081 1082 // Start the cluster 1083 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 1084 1085 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); 1086 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 1087 1088 // get all the master threads 1089 List<MasterThread> masterThreads = cluster.getMasterThreads(); 1090 1091 // wait for each to come online 1092 for (MasterThread mt : masterThreads) { 1093 assertTrue(mt.isAlive()); 1094 } 1095 1096 // verify only one is the active master and we have right number 1097 int numActive = 0; 1098 int activeIndex = -1; 1099 ServerName activeName = null; 1100 HMaster active = null; 1101 for (int i = 0; i < masterThreads.size(); i++) { 1102 if (masterThreads.get(i).getMaster().isActiveMaster()) { 1103 numActive++; 1104 activeIndex = i; 1105 active = masterThreads.get(activeIndex).getMaster(); 1106 activeName = active.getServerName(); 1107 } 1108 } 1109 assertEquals(1, numActive); 1110 assertEquals(NUM_MASTERS, masterThreads.size()); 1111 LOG.info("Active master " + activeName); 1112 1113 // Check that ClusterStatus reports the correct active and backup masters 1114 assertNotNull(active); 1115 ClusterStatus status = active.getClusterStatus(); 1116 assertTrue(status.getMaster().equals(activeName)); 1117 assertEquals(2, status.getBackupMastersSize()); 1118 assertEquals(2, status.getBackupMasters().size()); 1119 1120 // attempt to stop one of the inactive masters 1121 int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1); 1122 HMaster master = cluster.getMaster(backupIndex); 1123 LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n"); 1124 cluster.stopMaster(backupIndex, false); 1125 cluster.waitOnMaster(backupIndex); 1126 1127 // Verify still one active master and it's the same 1128 for (int i = 0; i < masterThreads.size(); i++) { 1129 if (masterThreads.get(i).getMaster().isActiveMaster()) { 1130 assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName())); 1131 activeIndex = i; 1132 active = masterThreads.get(activeIndex).getMaster(); 1133 } 1134 } 1135 assertEquals(1, numActive); 1136 assertEquals(2, masterThreads.size()); 1137 int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize(); 1138 LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers"); 1139 assertEquals(3, rsCount); 1140 1141 // Check that ClusterStatus reports the correct active and backup masters 1142 assertNotNull(active); 1143 status = active.getClusterStatus(); 1144 assertTrue(status.getMaster().equals(activeName)); 1145 assertEquals(1, status.getBackupMastersSize()); 1146 assertEquals(1, status.getBackupMasters().size()); 1147 1148 // kill the active master 1149 LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n"); 1150 cluster.stopMaster(activeIndex, false); 1151 cluster.waitOnMaster(activeIndex); 1152 1153 // wait for an active master to show up and be ready 1154 assertTrue(cluster.waitForActiveAndReadyMaster()); 1155 1156 LOG.debug("\n\nVerifying backup master is now active\n"); 1157 // should only have one master now 1158 assertEquals(1, masterThreads.size()); 1159 1160 // and he should be active 1161 active = masterThreads.get(0).getMaster(); 1162 assertNotNull(active); 1163 status = active.getClusterStatus(); 1164 ServerName mastername = status.getMaster(); 1165 assertTrue(mastername.equals(active.getServerName())); 1166 assertTrue(active.isActiveMaster()); 1167 assertEquals(0, status.getBackupMastersSize()); 1168 assertEquals(0, status.getBackupMasters().size()); 1169 int rss = status.getServersSize(); 1170 LOG.info("Active master " + mastername.getServerName() + " managing " + 1171 rss + " region servers"); 1172 assertEquals(3, rss); 1173 1174 // Stop the cluster 1175 TEST_UTIL.shutdownMiniCluster(); 1176 } 1177 1178 /** 1179 * Test region in pending_open/close and failed_open/close when master failover 1180 */ 1181 @Test (timeout=180000) 1182 @SuppressWarnings("deprecation") testPendingOpenOrCloseWhenMasterFailover()1183 public void testPendingOpenOrCloseWhenMasterFailover() throws Exception { 1184 final int NUM_MASTERS = 1; 1185 final int NUM_RS = 1; 1186 1187 // Create config to use for this cluster 1188 Configuration conf = HBaseConfiguration.create(); 1189 conf.setBoolean("hbase.assignment.usezk", false); 1190 1191 // Start the cluster 1192 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); 1193 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); 1194 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 1195 log("Cluster started"); 1196 1197 // get all the master threads 1198 List<MasterThread> masterThreads = cluster.getMasterThreads(); 1199 assertEquals(1, masterThreads.size()); 1200 1201 // only one master thread, let's wait for it to be initialized 1202 assertTrue(cluster.waitForActiveAndReadyMaster()); 1203 HMaster master = masterThreads.get(0).getMaster(); 1204 assertTrue(master.isActiveMaster()); 1205 assertTrue(master.isInitialized()); 1206 1207 // Create a table with a region online 1208 Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family"); 1209 onlineTable.close(); 1210 // Create a table in META, so it has a region offline 1211 HTableDescriptor offlineTable = new HTableDescriptor( 1212 TableName.valueOf(Bytes.toBytes("offlineTable"))); 1213 offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family"))); 1214 1215 FileSystem filesystem = FileSystem.get(conf); 1216 Path rootdir = FSUtils.getRootDir(conf); 1217 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir); 1218 fstd.createTableDescriptor(offlineTable); 1219 1220 HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null); 1221 createRegion(hriOffline, rootdir, conf, offlineTable); 1222 MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline); 1223 1224 log("Regions in hbase:meta and namespace have been created"); 1225 1226 // at this point we only expect 3 regions to be assigned out 1227 // (catalogs and namespace, + 1 online region) 1228 assertEquals(3, cluster.countServedRegions()); 1229 HRegionInfo hriOnline = null; 1230 try (RegionLocator locator = 1231 TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) { 1232 hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo(); 1233 } 1234 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 1235 RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore(); 1236 1237 // Put the online region in pending_close. It is actually already opened. 1238 // This is to simulate that the region close RPC is not sent out before failover 1239 RegionState oldState = regionStates.getRegionState(hriOnline); 1240 RegionState newState = new RegionState( 1241 hriOnline, State.PENDING_CLOSE, oldState.getServerName()); 1242 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState); 1243 1244 // Put the offline region in pending_open. It is actually not opened yet. 1245 // This is to simulate that the region open RPC is not sent out before failover 1246 oldState = new RegionState(hriOffline, State.OFFLINE); 1247 newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName()); 1248 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState); 1249 1250 HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null); 1251 createRegion(failedClose, rootdir, conf, offlineTable); 1252 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose); 1253 1254 oldState = new RegionState(failedClose, State.PENDING_CLOSE); 1255 newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName()); 1256 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState); 1257 1258 1259 HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null); 1260 createRegion(failedOpen, rootdir, conf, offlineTable); 1261 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen); 1262 1263 // Simulate a region transitioning to failed open when the region server reports the 1264 // transition as FAILED_OPEN 1265 oldState = new RegionState(failedOpen, State.PENDING_OPEN); 1266 newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName()); 1267 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState); 1268 1269 HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null); 1270 createRegion(failedOpenNullServer, rootdir, conf, offlineTable); 1271 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer); 1272 1273 // Simulate a region transitioning to failed open when the master couldn't find a plan for 1274 // the region 1275 oldState = new RegionState(failedOpenNullServer, State.OFFLINE); 1276 newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null); 1277 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState); 1278 1279 1280 1281 // Stop the master 1282 log("Aborting master"); 1283 cluster.abortMaster(0); 1284 cluster.waitOnMaster(0); 1285 log("Master has aborted"); 1286 1287 // Start up a new master 1288 log("Starting up a new master"); 1289 master = cluster.startMaster().getMaster(); 1290 log("Waiting for master to be ready"); 1291 cluster.waitForActiveAndReadyMaster(); 1292 log("Master is ready"); 1293 1294 // Wait till no region in transition any more 1295 master.getAssignmentManager().waitUntilNoRegionsInTransition(60000); 1296 1297 // Get new region states since master restarted 1298 regionStates = master.getAssignmentManager().getRegionStates(); 1299 1300 // Both pending_open (RPC sent/not yet) regions should be online 1301 assertTrue(regionStates.isRegionOnline(hriOffline)); 1302 assertTrue(regionStates.isRegionOnline(hriOnline)); 1303 assertTrue(regionStates.isRegionOnline(failedClose)); 1304 assertTrue(regionStates.isRegionOnline(failedOpenNullServer)); 1305 assertTrue(regionStates.isRegionOnline(failedOpen)); 1306 1307 log("Done with verification, shutting down cluster"); 1308 1309 // Done, shutdown the cluster 1310 TEST_UTIL.shutdownMiniCluster(); 1311 } 1312 1313 /** 1314 * Test meta in transition when master failover 1315 */ 1316 @Test(timeout = 180000) testMetaInTransitionWhenMasterFailover()1317 public void testMetaInTransitionWhenMasterFailover() throws Exception { 1318 final int NUM_MASTERS = 1; 1319 final int NUM_RS = 1; 1320 1321 // Start the cluster 1322 Configuration conf = HBaseConfiguration.create(); 1323 conf.setBoolean("hbase.assignment.usezk", false); 1324 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); 1325 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); 1326 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 1327 log("Cluster started"); 1328 1329 log("Moving meta off the master"); 1330 HMaster activeMaster = cluster.getMaster(); 1331 HRegionServer rs = cluster.getRegionServer(0); 1332 ServerName metaServerName = cluster.getLiveRegionServerThreads() 1333 .get(0).getRegionServer().getServerName(); 1334 activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), 1335 Bytes.toBytes(metaServerName.getServerName())); 1336 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 1337 assertEquals("Meta should be assigned on expected regionserver", 1338 metaServerName, activeMaster.getMetaTableLocator() 1339 .getMetaRegionLocation(activeMaster.getZooKeeper())); 1340 1341 // Now kill master, meta should remain on rs, where we placed it before. 1342 log("Aborting master"); 1343 activeMaster.abort("test-kill"); 1344 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000); 1345 log("Master has aborted"); 1346 1347 // meta should remain where it was 1348 RegionState metaState = 1349 MetaTableLocator.getMetaRegionState(rs.getZooKeeper()); 1350 assertEquals("hbase:meta should be onlined on RS", 1351 metaState.getServerName(), rs.getServerName()); 1352 assertEquals("hbase:meta should be onlined on RS", 1353 metaState.getState(), State.OPEN); 1354 1355 // Start up a new master 1356 log("Starting up a new master"); 1357 activeMaster = cluster.startMaster().getMaster(); 1358 log("Waiting for master to be ready"); 1359 cluster.waitForActiveAndReadyMaster(); 1360 log("Master is ready"); 1361 1362 // ensure meta is still deployed on RS 1363 metaState = 1364 MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper()); 1365 assertEquals("hbase:meta should be onlined on RS", 1366 metaState.getServerName(), rs.getServerName()); 1367 assertEquals("hbase:meta should be onlined on RS", 1368 metaState.getState(), State.OPEN); 1369 1370 // Update meta state as PENDING_OPEN, then kill master 1371 // that simulates, that RS successfully deployed, but 1372 // RPC was lost right before failure. 1373 // region server should expire (how it can be verified?) 1374 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(), 1375 rs.getServerName(), State.PENDING_OPEN); 1376 Region meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName()); 1377 rs.removeFromOnlineRegions(meta, null); 1378 ((HRegion)meta).close(); 1379 1380 log("Aborting master"); 1381 activeMaster.abort("test-kill"); 1382 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000); 1383 log("Master has aborted"); 1384 1385 // Start up a new master 1386 log("Starting up a new master"); 1387 activeMaster = cluster.startMaster().getMaster(); 1388 log("Waiting for master to be ready"); 1389 cluster.waitForActiveAndReadyMaster(); 1390 log("Master is ready"); 1391 1392 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 1393 log("Meta was assigned"); 1394 1395 metaState = 1396 MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper()); 1397 assertEquals("hbase:meta should be onlined on RS", 1398 metaState.getServerName(), rs.getServerName()); 1399 assertEquals("hbase:meta should be onlined on RS", 1400 metaState.getState(), State.OPEN); 1401 1402 // Update meta state as PENDING_CLOSE, then kill master 1403 // that simulates, that RS successfully deployed, but 1404 // RPC was lost right before failure. 1405 // region server should expire (how it can be verified?) 1406 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(), 1407 rs.getServerName(), State.PENDING_CLOSE); 1408 1409 log("Aborting master"); 1410 activeMaster.abort("test-kill"); 1411 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000); 1412 log("Master has aborted"); 1413 1414 rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest( 1415 rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false)); 1416 1417 // Start up a new master 1418 log("Starting up a new master"); 1419 activeMaster = cluster.startMaster().getMaster(); 1420 log("Waiting for master to be ready"); 1421 cluster.waitForActiveAndReadyMaster(); 1422 log("Master is ready"); 1423 1424 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 1425 log("Meta was assigned"); 1426 1427 rs.getRSRpcServices().closeRegion( 1428 null, 1429 RequestConverter.buildCloseRegionRequest(rs.getServerName(), 1430 HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false)); 1431 1432 // Set a dummy server to check if master reassigns meta on restart 1433 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(), 1434 ServerName.valueOf("dummyserver.example.org", 1234, -1L), State.OPEN); 1435 1436 log("Aborting master"); 1437 activeMaster.stop("test-kill"); 1438 1439 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000); 1440 log("Master has aborted"); 1441 1442 // Start up a new master 1443 log("Starting up a new master"); 1444 activeMaster = cluster.startMaster().getMaster(); 1445 log("Waiting for master to be ready"); 1446 cluster.waitForActiveAndReadyMaster(); 1447 log("Master is ready"); 1448 1449 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 1450 log("Meta was assigned"); 1451 1452 // Done, shutdown the cluster 1453 TEST_UTIL.shutdownMiniCluster(); 1454 } 1455 } 1456 1457