1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.hadoop.hbase.master; 19 20 import static org.junit.Assert.assertEquals; 21 import static org.junit.Assert.assertFalse; 22 import static org.junit.Assert.assertNotEquals; 23 import static org.junit.Assert.assertNotNull; 24 import static org.junit.Assert.assertNull; 25 import static org.junit.Assert.assertTrue; 26 import static org.junit.Assert.fail; 27 28 import java.io.IOException; 29 import java.util.ArrayList; 30 import java.util.List; 31 import java.util.Map; 32 import java.util.Set; 33 import java.util.concurrent.atomic.AtomicBoolean; 34 import java.util.concurrent.atomic.AtomicInteger; 35 36 import org.apache.hadoop.conf.Configuration; 37 import org.apache.hadoop.fs.FileSystem; 38 import org.apache.hadoop.fs.Path; 39 import org.apache.hadoop.hbase.CoordinatedStateManager; 40 import org.apache.hadoop.hbase.HBaseTestingUtility; 41 import org.apache.hadoop.hbase.HColumnDescriptor; 42 import org.apache.hadoop.hbase.HConstants; 43 import org.apache.hadoop.hbase.HRegionInfo; 44 import org.apache.hadoop.hbase.HTableDescriptor; 45 import org.apache.hadoop.hbase.testclassification.MediumTests; 46 import org.apache.hadoop.hbase.MetaTableAccessor; 47 import org.apache.hadoop.hbase.MiniHBaseCluster; 48 import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer; 49 import org.apache.hadoop.hbase.ServerLoad; 50 import org.apache.hadoop.hbase.ServerName; 51 import org.apache.hadoop.hbase.TableName; 52 import org.apache.hadoop.hbase.UnknownRegionException; 53 import org.apache.hadoop.hbase.Waiter; 54 import org.apache.hadoop.hbase.client.Admin; 55 import org.apache.hadoop.hbase.client.HBaseAdmin; 56 import org.apache.hadoop.hbase.client.HTable; 57 import org.apache.hadoop.hbase.client.Result; 58 import org.apache.hadoop.hbase.client.Table; 59 import org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager; 60 import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver; 61 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; 62 import org.apache.hadoop.hbase.coprocessor.ObserverContext; 63 import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; 64 import org.apache.hadoop.hbase.coprocessor.RegionObserver; 65 import org.apache.hadoop.hbase.executor.EventType; 66 import org.apache.hadoop.hbase.master.RegionState.State; 67 import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer; 68 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 69 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; 70 import org.apache.hadoop.hbase.regionserver.HRegionServer; 71 import org.apache.hadoop.hbase.util.Bytes; 72 import org.apache.hadoop.hbase.util.ConfigUtil; 73 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 74 import org.apache.hadoop.hbase.util.FSUtils; 75 import org.apache.hadoop.hbase.util.JVMClusterUtil; 76 import org.apache.hadoop.hbase.util.Threads; 77 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 78 import org.apache.hadoop.hbase.zookeeper.ZKAssign; 79 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; 80 import org.apache.zookeeper.KeeperException; 81 import org.junit.AfterClass; 82 import org.junit.BeforeClass; 83 import org.junit.Test; 84 import org.junit.experimental.categories.Category; 85 86 87 /** 88 * This tests AssignmentManager with a testing cluster. 89 */ 90 @Category(MediumTests.class) 91 @SuppressWarnings("deprecation") 92 public class TestAssignmentManagerOnCluster { 93 private final static byte[] FAMILY = Bytes.toBytes("FAMILY"); 94 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 95 final static Configuration conf = TEST_UTIL.getConfiguration(); 96 private static HBaseAdmin admin; 97 setupOnce()98 static void setupOnce() throws Exception { 99 // Using the our load balancer to control region plans 100 conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, 101 MyLoadBalancer.class, LoadBalancer.class); 102 conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, 103 MyRegionObserver.class, RegionObserver.class); 104 // Reduce the maximum attempts to speed up the test 105 conf.setInt("hbase.assignment.maximum.attempts", 3); 106 // Put meta on master to avoid meta server shutdown handling 107 conf.set("hbase.balancer.tablesOnMaster", "hbase:meta"); 108 conf.setInt("hbase.master.maximum.ping.server.attempts", 3); 109 conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1); 110 111 TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class, MyRegionServer.class); 112 admin = TEST_UTIL.getHBaseAdmin(); 113 } 114 115 @BeforeClass setUpBeforeClass()116 public static void setUpBeforeClass() throws Exception { 117 // Use ZK for region assignment 118 conf.setBoolean("hbase.assignment.usezk", true); 119 setupOnce(); 120 } 121 122 @AfterClass tearDownAfterClass()123 public static void tearDownAfterClass() throws Exception { 124 TEST_UTIL.shutdownMiniCluster(); 125 } 126 127 /** 128 * This tests restarting meta regionserver 129 */ 130 @Test (timeout=180000) testRestartMetaRegionServer()131 public void testRestartMetaRegionServer() throws Exception { 132 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 133 boolean stoppedARegionServer = false; 134 try { 135 HMaster master = cluster.getMaster(); 136 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 137 ServerName metaServerName = regionStates.getRegionServerOfRegion( 138 HRegionInfo.FIRST_META_REGIONINFO); 139 if (master.getServerName().equals(metaServerName) || metaServerName == null 140 || !metaServerName.equals(cluster.getServerHoldingMeta())) { 141 // Move meta off master 142 metaServerName = cluster.getLiveRegionServerThreads() 143 .get(0).getRegionServer().getServerName(); 144 master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), 145 Bytes.toBytes(metaServerName.getServerName())); 146 master.assignmentManager.waitUntilNoRegionsInTransition(60000); 147 } 148 RegionState metaState = 149 MetaTableLocator.getMetaRegionState(master.getZooKeeper()); 150 assertEquals("Meta should be not in transition", 151 metaState.getState(), RegionState.State.OPEN); 152 assertNotEquals("Meta should be moved off master", 153 metaServerName, master.getServerName()); 154 cluster.killRegionServer(metaServerName); 155 stoppedARegionServer = true; 156 cluster.waitForRegionServerToStop(metaServerName, 60000); 157 // Wait for SSH to finish 158 final ServerManager serverManager = master.getServerManager(); 159 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() { 160 @Override 161 public boolean evaluate() throws Exception { 162 return !serverManager.areDeadServersInProgress(); 163 } 164 }); 165 166 // Now, make sure meta is assigned 167 assertTrue("Meta should be assigned", 168 regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO)); 169 // Now, make sure meta is registered in zk 170 metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper()); 171 assertEquals("Meta should be not in transition", 172 metaState.getState(), RegionState.State.OPEN); 173 assertEquals("Meta should be assigned", metaState.getServerName(), 174 regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO)); 175 assertNotEquals("Meta should be assigned on a different server", 176 metaState.getServerName(), metaServerName); 177 } finally { 178 if (stoppedARegionServer) { 179 cluster.startRegionServer(); 180 } 181 } 182 } 183 184 /** 185 * This tests region assignment 186 */ 187 @Test (timeout=60000) testAssignRegion()188 public void testAssignRegion() throws Exception { 189 String table = "testAssignRegion"; 190 try { 191 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table)); 192 desc.addFamily(new HColumnDescriptor(FAMILY)); 193 admin.createTable(desc); 194 195 Table meta = new HTable(conf, TableName.META_TABLE_NAME); 196 HRegionInfo hri = new HRegionInfo( 197 desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); 198 MetaTableAccessor.addRegionToMeta(meta, hri); 199 200 HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); 201 master.assignRegion(hri); 202 AssignmentManager am = master.getAssignmentManager(); 203 am.waitForAssignment(hri); 204 205 RegionStates regionStates = am.getRegionStates(); 206 ServerName serverName = regionStates.getRegionServerOfRegion(hri); 207 TEST_UTIL.assertRegionOnServer(hri, serverName, 6000); 208 209 // Region is assigned now. Let's assign it again. 210 // Master should not abort, and region should be assigned. 211 RegionState oldState = regionStates.getRegionState(hri); 212 TEST_UTIL.getHBaseAdmin().assign(hri.getRegionName()); 213 master.getAssignmentManager().waitForAssignment(hri); 214 RegionState newState = regionStates.getRegionState(hri); 215 assertTrue(newState.isOpened() 216 && newState.getStamp() != oldState.getStamp()); 217 } finally { 218 TEST_UTIL.deleteTable(Bytes.toBytes(table)); 219 } 220 } 221 222 // Simulate a scenario where the AssignCallable and SSH are trying to assign a region 223 @Test (timeout=60000) testAssignRegionBySSH()224 public void testAssignRegionBySSH() throws Exception { 225 if (!conf.getBoolean("hbase.assignment.usezk", true)) { 226 return; 227 } 228 String table = "testAssignRegionBySSH"; 229 MyMaster master = (MyMaster) TEST_UTIL.getHBaseCluster().getMaster(); 230 try { 231 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table)); 232 desc.addFamily(new HColumnDescriptor(FAMILY)); 233 admin.createTable(desc); 234 235 HTable meta = new HTable(conf, TableName.META_TABLE_NAME); 236 HRegionInfo hri = new HRegionInfo( 237 desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); 238 MetaTableAccessor.addRegionToMeta(meta, hri); 239 // Add some dummy server for the region entry 240 MetaTableAccessor.updateRegionLocation(TEST_UTIL.getHBaseCluster().getMaster().getConnection(), hri, 241 ServerName.valueOf("example.org", 1234, System.currentTimeMillis()), 0, -1); 242 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 243 int i = TEST_UTIL.getHBaseCluster().getServerWithMeta(); 244 HRegionServer rs = TEST_UTIL.getHBaseCluster().getRegionServer(i == 0 ? 1 : 0); 245 // Choose a server other than meta to kill 246 ServerName controlledServer = rs.getServerName(); 247 master.enableSSH(false); 248 TEST_UTIL.getHBaseCluster().killRegionServer(controlledServer); 249 TEST_UTIL.getHBaseCluster().waitForRegionServerToStop(controlledServer, -1); 250 AssignmentManager am = master.getAssignmentManager(); 251 252 // Simulate the AssignCallable trying to assign the region. Have the region in OFFLINE state, 253 // but not in transition and the server is the dead 'controlledServer' 254 regionStates.createRegionState(hri, State.OFFLINE, controlledServer, null); 255 am.assign(hri, true, true); 256 // Region should remain OFFLINE and go to transition 257 assertEquals(State.OFFLINE, regionStates.getRegionState(hri).getState()); 258 assertTrue (regionStates.isRegionInTransition(hri)); 259 260 master.enableSSH(true); 261 am.waitForAssignment(hri); 262 assertTrue (regionStates.getRegionState(hri).isOpened()); 263 ServerName serverName = regionStates.getRegionServerOfRegion(hri); 264 TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000); 265 } finally { 266 if (master != null) { 267 master.enableSSH(true); 268 } 269 TEST_UTIL.deleteTable(Bytes.toBytes(table)); 270 TEST_UTIL.getHBaseCluster().startRegionServer(); 271 } 272 } 273 274 /** 275 * This tests region assignment on a simulated restarted server 276 */ 277 @Test (timeout=120000) testAssignRegionOnRestartedServer()278 public void testAssignRegionOnRestartedServer() throws Exception { 279 String table = "testAssignRegionOnRestartedServer"; 280 TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20); 281 TEST_UTIL.getMiniHBaseCluster().stopMaster(0); 282 TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect 283 284 ServerName deadServer = null; 285 HMaster master = null; 286 try { 287 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table)); 288 desc.addFamily(new HColumnDescriptor(FAMILY)); 289 admin.createTable(desc); 290 291 Table meta = new HTable(conf, TableName.META_TABLE_NAME); 292 final HRegionInfo hri = new HRegionInfo( 293 desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); 294 MetaTableAccessor.addRegionToMeta(meta, hri); 295 296 master = TEST_UTIL.getHBaseCluster().getMaster(); 297 Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet(); 298 assertFalse("There should be some servers online", onlineServers.isEmpty()); 299 300 // Use the first server as the destination server 301 ServerName destServer = onlineServers.iterator().next(); 302 303 // Created faked dead server 304 deadServer = ServerName.valueOf(destServer.getHostname(), 305 destServer.getPort(), destServer.getStartcode() - 100L); 306 master.serverManager.recordNewServerWithLock(deadServer, ServerLoad.EMPTY_SERVERLOAD); 307 308 final AssignmentManager am = master.getAssignmentManager(); 309 RegionPlan plan = new RegionPlan(hri, null, deadServer); 310 am.addPlan(hri.getEncodedName(), plan); 311 master.assignRegion(hri); 312 313 int version = ZKAssign.transitionNode(master.getZooKeeper(), hri, 314 destServer, EventType.M_ZK_REGION_OFFLINE, 315 EventType.RS_ZK_REGION_OPENING, 0); 316 assertEquals("TansitionNode should fail", -1, version); 317 318 TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() { 319 @Override 320 public boolean evaluate() throws Exception { 321 return ! am.getRegionStates().isRegionInTransition(hri); 322 } 323 }); 324 325 assertFalse("Region should be assigned", am.getRegionStates().isRegionInTransition(hri)); 326 } finally { 327 if (deadServer != null) { 328 master.serverManager.expireServer(deadServer); 329 } 330 331 TEST_UTIL.deleteTable(Bytes.toBytes(table)); 332 333 // reset the value for other tests 334 TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 3); 335 ServerName masterServerName = TEST_UTIL.getMiniHBaseCluster().getMaster().getServerName(); 336 TEST_UTIL.getMiniHBaseCluster().stopMaster(masterServerName); 337 TEST_UTIL.getMiniHBaseCluster().startMaster(); 338 // Wait till master is active and is initialized 339 while (TEST_UTIL.getMiniHBaseCluster().getMaster() == null || 340 !TEST_UTIL.getMiniHBaseCluster().getMaster().isInitialized()) { 341 Threads.sleep(1); 342 } 343 } 344 } 345 346 /** 347 * This tests offlining a region 348 */ 349 @Test (timeout=60000) testOfflineRegion()350 public void testOfflineRegion() throws Exception { 351 TableName table = 352 TableName.valueOf("testOfflineRegion"); 353 try { 354 HRegionInfo hri = createTableAndGetOneRegion(table); 355 356 RegionStates regionStates = TEST_UTIL.getHBaseCluster(). 357 getMaster().getAssignmentManager().getRegionStates(); 358 ServerName serverName = regionStates.getRegionServerOfRegion(hri); 359 TEST_UTIL.assertRegionOnServer(hri, serverName, 6000); 360 admin.offline(hri.getRegionName()); 361 362 long timeoutTime = System.currentTimeMillis() + 800; 363 while (true) { 364 if (regionStates.getRegionByStateOfTable(table) 365 .get(RegionState.State.OFFLINE).contains(hri)) 366 break; 367 long now = System.currentTimeMillis(); 368 if (now > timeoutTime) { 369 fail("Failed to offline the region in time"); 370 break; 371 } 372 Thread.sleep(10); 373 } 374 RegionState regionState = regionStates.getRegionState(hri); 375 assertTrue(regionState.isOffline()); 376 } finally { 377 TEST_UTIL.deleteTable(table); 378 } 379 } 380 381 /** 382 * This tests moving a region 383 */ 384 @Test (timeout=50000) testMoveRegion()385 public void testMoveRegion() throws Exception { 386 TableName table = 387 TableName.valueOf("testMoveRegion"); 388 try { 389 HRegionInfo hri = createTableAndGetOneRegion(table); 390 391 HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); 392 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 393 ServerName serverName = regionStates.getRegionServerOfRegion(hri); 394 ServerManager serverManager = master.getServerManager(); 395 ServerName destServerName = null; 396 List<JVMClusterUtil.RegionServerThread> regionServers = 397 TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads(); 398 for (JVMClusterUtil.RegionServerThread regionServer: regionServers) { 399 HRegionServer destServer = regionServer.getRegionServer(); 400 destServerName = destServer.getServerName(); 401 if (!destServerName.equals(serverName) 402 && serverManager.isServerOnline(destServerName)) { 403 break; 404 } 405 } 406 assertTrue(destServerName != null 407 && !destServerName.equals(serverName)); 408 TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(), 409 Bytes.toBytes(destServerName.getServerName())); 410 411 long timeoutTime = System.currentTimeMillis() + 30000; 412 while (true) { 413 ServerName sn = regionStates.getRegionServerOfRegion(hri); 414 if (sn != null && sn.equals(destServerName)) { 415 TEST_UTIL.assertRegionOnServer(hri, sn, 6000); 416 break; 417 } 418 long now = System.currentTimeMillis(); 419 if (now > timeoutTime) { 420 fail("Failed to move the region in time: " 421 + regionStates.getRegionState(hri)); 422 } 423 regionStates.waitForUpdate(50); 424 } 425 426 } finally { 427 TEST_UTIL.deleteTable(table); 428 } 429 } 430 431 /** 432 * If a table is deleted, we should not be able to move it anymore. 433 * Otherwise, the region will be brought back. 434 * @throws Exception 435 */ 436 @Test (timeout=50000) testMoveRegionOfDeletedTable()437 public void testMoveRegionOfDeletedTable() throws Exception { 438 TableName table = 439 TableName.valueOf("testMoveRegionOfDeletedTable"); 440 Admin admin = TEST_UTIL.getHBaseAdmin(); 441 try { 442 HRegionInfo hri = createTableAndGetOneRegion(table); 443 444 HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); 445 AssignmentManager am = master.getAssignmentManager(); 446 RegionStates regionStates = am.getRegionStates(); 447 ServerName serverName = regionStates.getRegionServerOfRegion(hri); 448 ServerName destServerName = null; 449 for (int i = 0; i < 3; i++) { 450 HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i); 451 if (!destServer.getServerName().equals(serverName)) { 452 destServerName = destServer.getServerName(); 453 break; 454 } 455 } 456 assertTrue(destServerName != null 457 && !destServerName.equals(serverName)); 458 459 TEST_UTIL.deleteTable(table); 460 461 try { 462 admin.move(hri.getEncodedNameAsBytes(), 463 Bytes.toBytes(destServerName.getServerName())); 464 fail("We should not find the region"); 465 } catch (IOException ioe) { 466 assertTrue(ioe instanceof UnknownRegionException); 467 } 468 469 am.balance(new RegionPlan(hri, serverName, destServerName)); 470 assertFalse("The region should not be in transition", 471 regionStates.isRegionInTransition(hri)); 472 } finally { 473 if (admin.tableExists(table)) { 474 TEST_UTIL.deleteTable(table); 475 } 476 } 477 } 478 createTableAndGetOneRegion( final TableName tableName)479 HRegionInfo createTableAndGetOneRegion( 480 final TableName tableName) throws IOException, InterruptedException { 481 HTableDescriptor desc = new HTableDescriptor(tableName); 482 desc.addFamily(new HColumnDescriptor(FAMILY)); 483 admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5); 484 485 // wait till the table is assigned 486 HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); 487 long timeoutTime = System.currentTimeMillis() + 1000; 488 while (true) { 489 List<HRegionInfo> regions = master.getAssignmentManager(). 490 getRegionStates().getRegionsOfTable(tableName); 491 if (regions.size() > 3) { 492 return regions.get(2); 493 } 494 long now = System.currentTimeMillis(); 495 if (now > timeoutTime) { 496 fail("Could not find an online region"); 497 } 498 Thread.sleep(10); 499 } 500 } 501 502 /** 503 * This test should not be flaky. If it is flaky, it means something 504 * wrong with AssignmentManager which should be reported and fixed 505 * 506 * This tests forcefully assign a region while it's closing and re-assigned. 507 */ 508 @Test (timeout=60000) testForceAssignWhileClosing()509 public void testForceAssignWhileClosing() throws Exception { 510 String table = "testForceAssignWhileClosing"; 511 try { 512 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table)); 513 desc.addFamily(new HColumnDescriptor(FAMILY)); 514 admin.createTable(desc); 515 516 Table meta = new HTable(conf, TableName.META_TABLE_NAME); 517 HRegionInfo hri = new HRegionInfo( 518 desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); 519 MetaTableAccessor.addRegionToMeta(meta, hri); 520 521 HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); 522 master.assignRegion(hri); 523 AssignmentManager am = master.getAssignmentManager(); 524 assertTrue(am.waitForAssignment(hri)); 525 526 ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri); 527 TEST_UTIL.assertRegionOnServer(hri, sn, 6000); 528 MyRegionObserver.preCloseEnabled.set(true); 529 am.unassign(hri); 530 RegionState state = am.getRegionStates().getRegionState(hri); 531 assertEquals(RegionState.State.FAILED_CLOSE, state.getState()); 532 533 MyRegionObserver.preCloseEnabled.set(false); 534 am.unassign(hri, true); 535 536 // region is closing now, will be re-assigned automatically. 537 // now, let's forcefully assign it again. it should be 538 // assigned properly and no double-assignment 539 am.assign(hri, true, true); 540 541 // let's check if it's assigned after it's out of transition 542 am.waitOnRegionToClearRegionsInTransition(hri); 543 assertTrue(am.waitForAssignment(hri)); 544 545 ServerName serverName = master.getAssignmentManager(). 546 getRegionStates().getRegionServerOfRegion(hri); 547 TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200); 548 } finally { 549 MyRegionObserver.preCloseEnabled.set(false); 550 TEST_UTIL.deleteTable(Bytes.toBytes(table)); 551 } 552 } 553 554 /** 555 * This tests region close failed 556 */ 557 @Test (timeout=60000) testCloseFailed()558 public void testCloseFailed() throws Exception { 559 String table = "testCloseFailed"; 560 try { 561 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table)); 562 desc.addFamily(new HColumnDescriptor(FAMILY)); 563 admin.createTable(desc); 564 565 Table meta = new HTable(conf, TableName.META_TABLE_NAME); 566 HRegionInfo hri = new HRegionInfo( 567 desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); 568 MetaTableAccessor.addRegionToMeta(meta, hri); 569 570 HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); 571 master.assignRegion(hri); 572 AssignmentManager am = master.getAssignmentManager(); 573 assertTrue(am.waitForAssignment(hri)); 574 ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri); 575 TEST_UTIL.assertRegionOnServer(hri, sn, 6000); 576 577 MyRegionObserver.preCloseEnabled.set(true); 578 am.unassign(hri); 579 RegionState state = am.getRegionStates().getRegionState(hri); 580 assertEquals(RegionState.State.FAILED_CLOSE, state.getState()); 581 582 MyRegionObserver.preCloseEnabled.set(false); 583 am.unassign(hri, true); 584 585 // region may still be assigned now since it's closing, 586 // let's check if it's assigned after it's out of transition 587 am.waitOnRegionToClearRegionsInTransition(hri); 588 589 // region should be closed and re-assigned 590 assertTrue(am.waitForAssignment(hri)); 591 ServerName serverName = master.getAssignmentManager(). 592 getRegionStates().getRegionServerOfRegion(hri); 593 TEST_UTIL.assertRegionOnServer(hri, serverName, 6000); 594 } finally { 595 MyRegionObserver.preCloseEnabled.set(false); 596 TEST_UTIL.deleteTable(Bytes.toBytes(table)); 597 } 598 } 599 600 /** 601 * This tests region open failed 602 */ 603 @Test (timeout=60000) testOpenFailed()604 public void testOpenFailed() throws Exception { 605 String table = "testOpenFailed"; 606 try { 607 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table)); 608 desc.addFamily(new HColumnDescriptor(FAMILY)); 609 admin.createTable(desc); 610 611 Table meta = new HTable(conf, TableName.META_TABLE_NAME); 612 HRegionInfo hri = new HRegionInfo( 613 desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); 614 MetaTableAccessor.addRegionToMeta(meta, hri); 615 616 MyLoadBalancer.controledRegion = hri.getEncodedName(); 617 618 HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); 619 master.assignRegion(hri); 620 AssignmentManager am = master.getAssignmentManager(); 621 assertFalse(am.waitForAssignment(hri)); 622 623 RegionState state = am.getRegionStates().getRegionState(hri); 624 assertEquals(RegionState.State.FAILED_OPEN, state.getState()); 625 // Failed to open since no plan, so it's on no server 626 assertNull(state.getServerName()); 627 628 MyLoadBalancer.controledRegion = null; 629 master.assignRegion(hri); 630 assertTrue(am.waitForAssignment(hri)); 631 632 ServerName serverName = master.getAssignmentManager(). 633 getRegionStates().getRegionServerOfRegion(hri); 634 TEST_UTIL.assertRegionOnServer(hri, serverName, 6000); 635 } finally { 636 MyLoadBalancer.controledRegion = null; 637 TEST_UTIL.deleteTable(Bytes.toBytes(table)); 638 } 639 } 640 641 /** 642 * This tests region open failure which is not recoverable 643 */ 644 @Test (timeout=60000) testOpenFailedUnrecoverable()645 public void testOpenFailedUnrecoverable() throws Exception { 646 TableName table = 647 TableName.valueOf("testOpenFailedUnrecoverable"); 648 try { 649 HTableDescriptor desc = new HTableDescriptor(table); 650 desc.addFamily(new HColumnDescriptor(FAMILY)); 651 admin.createTable(desc); 652 653 Table meta = new HTable(conf, TableName.META_TABLE_NAME); 654 HRegionInfo hri = new HRegionInfo( 655 desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); 656 MetaTableAccessor.addRegionToMeta(meta, hri); 657 658 FileSystem fs = FileSystem.get(conf); 659 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table); 660 Path regionDir = new Path(tableDir, hri.getEncodedName()); 661 // create a file named the same as the region dir to 662 // mess up with region opening 663 fs.create(regionDir, true); 664 665 HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); 666 master.assignRegion(hri); 667 AssignmentManager am = master.getAssignmentManager(); 668 assertFalse(am.waitForAssignment(hri)); 669 670 RegionState state = am.getRegionStates().getRegionState(hri); 671 assertEquals(RegionState.State.FAILED_OPEN, state.getState()); 672 // Failed to open due to file system issue. Region state should 673 // carry the opening region server so that we can force close it 674 // later on before opening it again. See HBASE-9092. 675 assertNotNull(state.getServerName()); 676 677 // remove the blocking file, so that region can be opened 678 fs.delete(regionDir, true); 679 master.assignRegion(hri); 680 assertTrue(am.waitForAssignment(hri)); 681 682 ServerName serverName = master.getAssignmentManager(). 683 getRegionStates().getRegionServerOfRegion(hri); 684 TEST_UTIL.assertRegionOnServer(hri, serverName, 6000); 685 } finally { 686 TEST_UTIL.deleteTable(table); 687 } 688 } 689 690 @Test (timeout=60000) testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState()691 public void testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState() throws Exception { 692 final TableName table = 693 TableName.valueOf 694 ("testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState"); 695 AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager(); 696 HRegionInfo hri = null; 697 ServerName serverName = null; 698 try { 699 hri = createTableAndGetOneRegion(table); 700 serverName = am.getRegionStates().getRegionServerOfRegion(hri); 701 ServerName destServerName = null; 702 HRegionServer destServer = null; 703 for (int i = 0; i < 3; i++) { 704 destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i); 705 if (!destServer.getServerName().equals(serverName)) { 706 destServerName = destServer.getServerName(); 707 break; 708 } 709 } 710 am.regionOffline(hri); 711 ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper(); 712 am.getRegionStates().updateRegionState(hri, State.PENDING_OPEN, destServerName); 713 if (ConfigUtil.useZKForAssignment(conf)) { 714 ZKAssign.createNodeOffline(zkw, hri, destServerName); 715 ZKAssign.transitionNodeOpening(zkw, hri, destServerName); 716 717 // Wait till the event is processed and the region is in transition 718 long timeoutTime = System.currentTimeMillis() + 20000; 719 while (!am.getRegionStates().isRegionInTransition(hri)) { 720 assertTrue("Failed to process ZK opening event in time", 721 System.currentTimeMillis() < timeoutTime); 722 Thread.sleep(100); 723 } 724 } 725 726 am.getTableStateManager().setTableState(table, ZooKeeperProtos.Table.State.DISABLING); 727 List<HRegionInfo> toAssignRegions = am.cleanOutCrashedServerReferences(destServerName); 728 assertTrue("Regions to be assigned should be empty.", toAssignRegions.isEmpty()); 729 assertTrue("Regions to be assigned should be empty.", am.getRegionStates() 730 .getRegionState(hri).isOffline()); 731 } finally { 732 if (hri != null && serverName != null) { 733 am.regionOnline(hri, serverName); 734 } 735 am.getTableStateManager().setTableState(table, ZooKeeperProtos.Table.State.DISABLED); 736 TEST_UTIL.deleteTable(table); 737 } 738 } 739 740 /** 741 * This tests region close hanging 742 */ 743 @Test (timeout=60000) testCloseHang()744 public void testCloseHang() throws Exception { 745 String table = "testCloseHang"; 746 try { 747 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table)); 748 desc.addFamily(new HColumnDescriptor(FAMILY)); 749 admin.createTable(desc); 750 751 Table meta = new HTable(conf, TableName.META_TABLE_NAME); 752 HRegionInfo hri = new HRegionInfo( 753 desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); 754 MetaTableAccessor.addRegionToMeta(meta, hri); 755 756 HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); 757 master.assignRegion(hri); 758 AssignmentManager am = master.getAssignmentManager(); 759 assertTrue(am.waitForAssignment(hri)); 760 ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri); 761 TEST_UTIL.assertRegionOnServer(hri, sn, 6000); 762 763 MyRegionObserver.postCloseEnabled.set(true); 764 am.unassign(hri); 765 // Now region should pending_close or closing 766 // Unassign it again forcefully so that we can trigger already 767 // in transition exception. This test is to make sure this scenario 768 // is handled properly. 769 am.server.getConfiguration().setLong( 770 AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000); 771 am.unassign(hri, true); 772 RegionState state = am.getRegionStates().getRegionState(hri); 773 assertEquals(RegionState.State.FAILED_CLOSE, state.getState()); 774 775 // Let region closing move ahead. The region should be closed 776 // properly and re-assigned automatically 777 MyRegionObserver.postCloseEnabled.set(false); 778 779 // region may still be assigned now since it's closing, 780 // let's check if it's assigned after it's out of transition 781 am.waitOnRegionToClearRegionsInTransition(hri); 782 783 // region should be closed and re-assigned 784 assertTrue(am.waitForAssignment(hri)); 785 ServerName serverName = master.getAssignmentManager(). 786 getRegionStates().getRegionServerOfRegion(hri); 787 TEST_UTIL.assertRegionOnServer(hri, serverName, 6000); 788 } finally { 789 MyRegionObserver.postCloseEnabled.set(false); 790 TEST_UTIL.deleteTable(Bytes.toBytes(table)); 791 } 792 } 793 794 /** 795 * This tests region close racing with open 796 */ 797 @Test (timeout=60000) testOpenCloseRacing()798 public void testOpenCloseRacing() throws Exception { 799 String table = "testOpenCloseRacing"; 800 try { 801 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table)); 802 desc.addFamily(new HColumnDescriptor(FAMILY)); 803 admin.createTable(desc); 804 805 Table meta = new HTable(conf, TableName.META_TABLE_NAME); 806 HRegionInfo hri = new HRegionInfo( 807 desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); 808 MetaTableAccessor.addRegionToMeta(meta, hri); 809 meta.close(); 810 811 MyRegionObserver.postOpenEnabled.set(true); 812 MyRegionObserver.postOpenCalled = false; 813 HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); 814 // Region will be opened, but it won't complete 815 master.assignRegion(hri); 816 long end = EnvironmentEdgeManager.currentTime() + 20000; 817 // Wait till postOpen is called 818 while (!MyRegionObserver.postOpenCalled ) { 819 assertFalse("Timed out waiting for postOpen to be called", 820 EnvironmentEdgeManager.currentTime() > end); 821 Thread.sleep(300); 822 } 823 824 AssignmentManager am = master.getAssignmentManager(); 825 // Now let's unassign it, it should do nothing 826 am.unassign(hri); 827 RegionState state = am.getRegionStates().getRegionState(hri); 828 ServerName oldServerName = state.getServerName(); 829 assertTrue(state.isPendingOpenOrOpening() && oldServerName != null); 830 831 // Now the region is stuck in opening 832 // Let's forcefully re-assign it to trigger closing/opening 833 // racing. This test is to make sure this scenario 834 // is handled properly. 835 ServerName destServerName = null; 836 int numRS = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size(); 837 for (int i = 0; i < numRS; i++) { 838 HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i); 839 if (!destServer.getServerName().equals(oldServerName)) { 840 destServerName = destServer.getServerName(); 841 break; 842 } 843 } 844 assertNotNull(destServerName); 845 assertFalse("Region should be assigned on a new region server", 846 oldServerName.equals(destServerName)); 847 List<HRegionInfo> regions = new ArrayList<HRegionInfo>(); 848 regions.add(hri); 849 am.assign(destServerName, regions); 850 851 // let region open continue 852 MyRegionObserver.postOpenEnabled.set(false); 853 854 // let's check if it's assigned after it's out of transition 855 am.waitOnRegionToClearRegionsInTransition(hri); 856 assertTrue(am.waitForAssignment(hri)); 857 858 ServerName serverName = master.getAssignmentManager(). 859 getRegionStates().getRegionServerOfRegion(hri); 860 TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000); 861 } finally { 862 MyRegionObserver.postOpenEnabled.set(false); 863 TEST_UTIL.deleteTable(Bytes.toBytes(table)); 864 } 865 } 866 867 /** 868 * Test force unassign/assign a region hosted on a dead server 869 */ 870 @Test (timeout=60000) testAssignRacingWithSSH()871 public void testAssignRacingWithSSH() throws Exception { 872 String table = "testAssignRacingWithSSH"; 873 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 874 MyMaster master = null; 875 try { 876 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table)); 877 desc.addFamily(new HColumnDescriptor(FAMILY)); 878 admin.createTable(desc); 879 880 Table meta = new HTable(conf, TableName.META_TABLE_NAME); 881 HRegionInfo hri = new HRegionInfo( 882 desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); 883 MetaTableAccessor.addRegionToMeta(meta, hri); 884 885 // Assign the region 886 master = (MyMaster)cluster.getMaster(); 887 master.assignRegion(hri); 888 889 // Hold SSH before killing the hosting server 890 master.enableSSH(false); 891 892 AssignmentManager am = master.getAssignmentManager(); 893 RegionStates regionStates = am.getRegionStates(); 894 ServerName metaServer = regionStates.getRegionServerOfRegion( 895 HRegionInfo.FIRST_META_REGIONINFO); 896 while (true) { 897 assertTrue(am.waitForAssignment(hri)); 898 RegionState state = regionStates.getRegionState(hri); 899 ServerName oldServerName = state.getServerName(); 900 if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) { 901 // Kill the hosting server, which doesn't have meta on it. 902 cluster.killRegionServer(oldServerName); 903 cluster.waitForRegionServerToStop(oldServerName, -1); 904 break; 905 } 906 int i = cluster.getServerWithMeta(); 907 HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0); 908 oldServerName = rs.getServerName(); 909 master.move(hri.getEncodedNameAsBytes(), 910 Bytes.toBytes(oldServerName.getServerName())); 911 } 912 913 // You can't assign a dead region before SSH 914 am.assign(hri, true, true); 915 RegionState state = regionStates.getRegionState(hri); 916 assertTrue(state.isFailedClose()); 917 918 // You can't unassign a dead region before SSH either 919 am.unassign(hri, true); 920 assertTrue(state.isFailedClose()); 921 922 // Enable SSH so that log can be split 923 master.enableSSH(true); 924 925 // let's check if it's assigned after it's out of transition. 926 // no need to assign it manually, SSH should do it 927 am.waitOnRegionToClearRegionsInTransition(hri); 928 assertTrue(am.waitForAssignment(hri)); 929 930 ServerName serverName = master.getAssignmentManager(). 931 getRegionStates().getRegionServerOfRegion(hri); 932 TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000); 933 } finally { 934 if (master != null) { 935 master.enableSSH(true); 936 } 937 TEST_UTIL.deleteTable(Bytes.toBytes(table)); 938 cluster.startRegionServer(); 939 } 940 } 941 942 /** 943 * Test force unassign/assign a region of a disabled table 944 */ 945 @Test (timeout=60000) testAssignDisabledRegion()946 public void testAssignDisabledRegion() throws Exception { 947 TableName table = TableName.valueOf("testAssignDisabledRegion"); 948 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 949 MyMaster master = null; 950 try { 951 HTableDescriptor desc = new HTableDescriptor(table); 952 desc.addFamily(new HColumnDescriptor(FAMILY)); 953 admin.createTable(desc); 954 955 Table meta = new HTable(conf, TableName.META_TABLE_NAME); 956 HRegionInfo hri = new HRegionInfo( 957 desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); 958 MetaTableAccessor.addRegionToMeta(meta, hri); 959 960 // Assign the region 961 master = (MyMaster)cluster.getMaster(); 962 master.assignRegion(hri); 963 AssignmentManager am = master.getAssignmentManager(); 964 RegionStates regionStates = am.getRegionStates(); 965 assertTrue(am.waitForAssignment(hri)); 966 967 // Disable the table 968 admin.disableTable(table); 969 assertTrue(regionStates.isRegionOffline(hri)); 970 971 // You can't assign a disabled region 972 am.assign(hri, true, true); 973 assertTrue(regionStates.isRegionOffline(hri)); 974 975 // You can't unassign a disabled region either 976 am.unassign(hri, true); 977 assertTrue(regionStates.isRegionOffline(hri)); 978 } finally { 979 TEST_UTIL.deleteTable(table); 980 } 981 } 982 983 /** 984 * Test offlined region is assigned by SSH 985 */ 986 @Test (timeout=60000) testAssignOfflinedRegionBySSH()987 public void testAssignOfflinedRegionBySSH() throws Exception { 988 String table = "testAssignOfflinedRegionBySSH"; 989 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 990 MyMaster master = null; 991 try { 992 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table)); 993 desc.addFamily(new HColumnDescriptor(FAMILY)); 994 admin.createTable(desc); 995 996 Table meta = new HTable(conf, TableName.META_TABLE_NAME); 997 HRegionInfo hri = new HRegionInfo( 998 desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); 999 MetaTableAccessor.addRegionToMeta(meta, hri); 1000 1001 // Assign the region 1002 master = (MyMaster)cluster.getMaster(); 1003 master.assignRegion(hri); 1004 1005 AssignmentManager am = master.getAssignmentManager(); 1006 RegionStates regionStates = am.getRegionStates(); 1007 ServerName metaServer = regionStates.getRegionServerOfRegion( 1008 HRegionInfo.FIRST_META_REGIONINFO); 1009 ServerName oldServerName = null; 1010 while (true) { 1011 assertTrue(am.waitForAssignment(hri)); 1012 RegionState state = regionStates.getRegionState(hri); 1013 oldServerName = state.getServerName(); 1014 if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) { 1015 // Mark the hosting server aborted, but don't actually kill it. 1016 // It doesn't have meta on it. 1017 MyRegionServer.abortedServer = oldServerName; 1018 break; 1019 } 1020 int i = cluster.getServerWithMeta(); 1021 HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0); 1022 oldServerName = rs.getServerName(); 1023 master.move(hri.getEncodedNameAsBytes(), 1024 Bytes.toBytes(oldServerName.getServerName())); 1025 } 1026 1027 // Make sure the region is assigned on the dead server 1028 assertTrue(regionStates.isRegionOnline(hri)); 1029 assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri)); 1030 1031 // Kill the hosting server, which doesn't have meta on it. 1032 cluster.killRegionServer(oldServerName); 1033 cluster.waitForRegionServerToStop(oldServerName, -1); 1034 1035 ServerManager serverManager = master.getServerManager(); 1036 while (!serverManager.isServerDead(oldServerName) 1037 || serverManager.getDeadServers().areDeadServersInProgress()) { 1038 Thread.sleep(100); 1039 } 1040 1041 // Let's check if it's assigned after it's out of transition. 1042 // no need to assign it manually, SSH should do it 1043 am.waitOnRegionToClearRegionsInTransition(hri); 1044 assertTrue(am.waitForAssignment(hri)); 1045 1046 ServerName serverName = master.getAssignmentManager(). 1047 getRegionStates().getRegionServerOfRegion(hri); 1048 TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200); 1049 } finally { 1050 MyRegionServer.abortedServer = null; 1051 TEST_UTIL.deleteTable(Bytes.toBytes(table)); 1052 cluster.startRegionServer(); 1053 } 1054 } 1055 1056 /** 1057 * Test SSH waiting for extra region server for assignment 1058 */ 1059 @Test (timeout=300000) testSSHWaitForServerToAssignRegion()1060 public void testSSHWaitForServerToAssignRegion() throws Exception { 1061 TableName table = TableName.valueOf("testSSHWaitForServerToAssignRegion"); 1062 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 1063 boolean startAServer = false; 1064 try { 1065 HTableDescriptor desc = new HTableDescriptor(table); 1066 desc.addFamily(new HColumnDescriptor(FAMILY)); 1067 admin.createTable(desc); 1068 1069 HMaster master = cluster.getMaster(); 1070 final ServerManager serverManager = master.getServerManager(); 1071 MyLoadBalancer.countRegionServers = Integer.valueOf( 1072 serverManager.countOfRegionServers()); 1073 HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(table); 1074 assertNotNull("First region should be assigned", rs); 1075 final ServerName serverName = rs.getServerName(); 1076 // Wait till SSH tried to assign regions a several times 1077 int counter = MyLoadBalancer.counter.get() + 5; 1078 cluster.killRegionServer(serverName); 1079 startAServer = true; 1080 cluster.waitForRegionServerToStop(serverName, -1); 1081 while (counter > MyLoadBalancer.counter.get()) { 1082 Thread.sleep(1000); 1083 } 1084 cluster.startRegionServer(); 1085 startAServer = false; 1086 // Wait till the dead server is processed by SSH 1087 TEST_UTIL.waitFor(120000, 1000, new Waiter.Predicate<Exception>() { 1088 @Override 1089 public boolean evaluate() throws Exception { 1090 return serverManager.isServerDead(serverName) 1091 && !serverManager.areDeadServersInProgress(); 1092 } 1093 }); 1094 TEST_UTIL.waitUntilAllRegionsAssigned(table, 300000); 1095 1096 rs = TEST_UTIL.getRSForFirstRegionInTable(table); 1097 assertTrue("First region should be re-assigned to a different server", 1098 rs != null && !serverName.equals(rs.getServerName())); 1099 } finally { 1100 MyLoadBalancer.countRegionServers = null; 1101 TEST_UTIL.deleteTable(table); 1102 if (startAServer) { 1103 cluster.startRegionServer(); 1104 } 1105 } 1106 } 1107 1108 /** 1109 * Test disabled region is ignored by SSH 1110 */ 1111 @Test (timeout=60000) testAssignDisabledRegionBySSH()1112 public void testAssignDisabledRegionBySSH() throws Exception { 1113 String table = "testAssignDisabledRegionBySSH"; 1114 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 1115 MyMaster master = null; 1116 try { 1117 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table)); 1118 desc.addFamily(new HColumnDescriptor(FAMILY)); 1119 admin.createTable(desc); 1120 1121 Table meta = new HTable(conf, TableName.META_TABLE_NAME); 1122 HRegionInfo hri = new HRegionInfo( 1123 desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); 1124 MetaTableAccessor.addRegionToMeta(meta, hri); 1125 1126 // Assign the region 1127 master = (MyMaster)cluster.getMaster(); 1128 master.assignRegion(hri); 1129 1130 AssignmentManager am = master.getAssignmentManager(); 1131 RegionStates regionStates = am.getRegionStates(); 1132 ServerName metaServer = regionStates.getRegionServerOfRegion( 1133 HRegionInfo.FIRST_META_REGIONINFO); 1134 ServerName oldServerName = null; 1135 while (true) { 1136 assertTrue(am.waitForAssignment(hri)); 1137 RegionState state = regionStates.getRegionState(hri); 1138 oldServerName = state.getServerName(); 1139 if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) { 1140 // Mark the hosting server aborted, but don't actually kill it. 1141 // It doesn't have meta on it. 1142 MyRegionServer.abortedServer = oldServerName; 1143 break; 1144 } 1145 int i = cluster.getServerWithMeta(); 1146 HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0); 1147 oldServerName = rs.getServerName(); 1148 master.move(hri.getEncodedNameAsBytes(), 1149 Bytes.toBytes(oldServerName.getServerName())); 1150 } 1151 1152 // Make sure the region is assigned on the dead server 1153 assertTrue(regionStates.isRegionOnline(hri)); 1154 assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri)); 1155 1156 // Disable the table now. 1157 master.disableTable(hri.getTable(), HConstants.NO_NONCE, HConstants.NO_NONCE); 1158 1159 // Kill the hosting server, which doesn't have meta on it. 1160 cluster.killRegionServer(oldServerName); 1161 cluster.waitForRegionServerToStop(oldServerName, -1); 1162 1163 ServerManager serverManager = master.getServerManager(); 1164 while (!serverManager.isServerDead(oldServerName) 1165 || serverManager.getDeadServers().areDeadServersInProgress()) { 1166 Thread.sleep(100); 1167 } 1168 1169 // Wait till no more RIT, the region should be offline. 1170 am.waitUntilNoRegionsInTransition(60000); 1171 assertTrue(regionStates.isRegionOffline(hri)); 1172 } finally { 1173 MyRegionServer.abortedServer = null; 1174 TEST_UTIL.deleteTable(Bytes.toBytes(table)); 1175 cluster.startRegionServer(); 1176 } 1177 } 1178 1179 /** 1180 * Test that region state transition call is idempotent 1181 */ 1182 @Test(timeout = 60000) testReportRegionStateTransition()1183 public void testReportRegionStateTransition() throws Exception { 1184 String table = "testReportRegionStateTransition"; 1185 try { 1186 MyRegionServer.simulateRetry = true; 1187 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table)); 1188 desc.addFamily(new HColumnDescriptor(FAMILY)); 1189 admin.createTable(desc); 1190 Table meta = new HTable(conf, TableName.META_TABLE_NAME); 1191 HRegionInfo hri = 1192 new HRegionInfo(desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); 1193 MetaTableAccessor.addRegionToMeta(meta, hri); 1194 HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); 1195 master.assignRegion(hri); 1196 AssignmentManager am = master.getAssignmentManager(); 1197 am.waitForAssignment(hri); 1198 RegionStates regionStates = am.getRegionStates(); 1199 ServerName serverName = regionStates.getRegionServerOfRegion(hri); 1200 // Assert the the region is actually open on the server 1201 TEST_UTIL.assertRegionOnServer(hri, serverName, 6000); 1202 // Closing region should just work fine 1203 admin.disableTable(TableName.valueOf(table)); 1204 assertTrue(regionStates.isRegionOffline(hri)); 1205 List<HRegionInfo> regions = TEST_UTIL.getHBaseAdmin().getOnlineRegions(serverName); 1206 assertTrue(!regions.contains(hri)); 1207 } finally { 1208 MyRegionServer.simulateRetry = false; 1209 TEST_UTIL.deleteTable(Bytes.toBytes(table)); 1210 } 1211 } 1212 1213 /** 1214 * Test concurrent updates to meta when meta is not on master 1215 * @throws Exception 1216 */ 1217 @Test(timeout = 30000) testUpdatesRemoteMeta()1218 public void testUpdatesRemoteMeta() throws Exception { 1219 // Not for zk less assignment 1220 if (conf.getBoolean("hbase.assignment.usezk", true)) { 1221 return; 1222 } 1223 conf.setInt("hbase.regionstatestore.meta.connection", 3); 1224 final RegionStateStore rss = 1225 new RegionStateStore(new MyRegionServer(conf, new ZkCoordinatedStateManager())); 1226 rss.start(); 1227 // Create 10 threads and make each do 10 puts related to region state update 1228 Thread[] th = new Thread[10]; 1229 List<String> nameList = new ArrayList<String>(); 1230 List<TableName> tableNameList = new ArrayList<TableName>(); 1231 for (int i = 0; i < th.length; i++) { 1232 th[i] = new Thread() { 1233 @Override 1234 public void run() { 1235 HRegionInfo[] hri = new HRegionInfo[10]; 1236 ServerName serverName = ServerName.valueOf("dummyhost", 1000, 1234); 1237 for (int i = 0; i < 10; i++) { 1238 hri[i] = new HRegionInfo(TableName.valueOf(Thread.currentThread().getName() + "_" + i)); 1239 RegionState newState = new RegionState(hri[i], RegionState.State.OPEN, serverName); 1240 RegionState oldState = 1241 new RegionState(hri[i], RegionState.State.PENDING_OPEN, serverName); 1242 rss.updateRegionState(1, newState, oldState); 1243 } 1244 } 1245 }; 1246 th[i].start(); 1247 nameList.add(th[i].getName()); 1248 } 1249 for (int i = 0; i < th.length; i++) { 1250 th[i].join(); 1251 } 1252 // Add all the expected table names in meta to tableNameList 1253 for (String name : nameList) { 1254 for (int i = 0; i < 10; i++) { 1255 tableNameList.add(TableName.valueOf(name + "_" + i)); 1256 } 1257 } 1258 List<Result> metaRows = MetaTableAccessor.fullScanOfMeta(admin.getConnection()); 1259 int count = 0; 1260 // Check all 100 rows are in meta 1261 for (Result result : metaRows) { 1262 if (tableNameList.contains(HRegionInfo.getTable(result.getRow()))) { 1263 count++; 1264 if (count == 100) { 1265 break; 1266 } 1267 } 1268 } 1269 assertTrue(count == 100); 1270 rss.stop(); 1271 } 1272 1273 static class MyLoadBalancer extends StochasticLoadBalancer { 1274 // For this region, if specified, always assign to nowhere 1275 static volatile String controledRegion = null; 1276 1277 static volatile Integer countRegionServers = null; 1278 static AtomicInteger counter = new AtomicInteger(0); 1279 1280 @Override randomAssignment(HRegionInfo regionInfo, List<ServerName> servers)1281 public ServerName randomAssignment(HRegionInfo regionInfo, 1282 List<ServerName> servers) { 1283 if (regionInfo.getEncodedName().equals(controledRegion)) { 1284 return null; 1285 } 1286 return super.randomAssignment(regionInfo, servers); 1287 } 1288 1289 @Override roundRobinAssignment( List<HRegionInfo> regions, List<ServerName> servers)1290 public Map<ServerName, List<HRegionInfo>> roundRobinAssignment( 1291 List<HRegionInfo> regions, List<ServerName> servers) { 1292 if (countRegionServers != null && services != null) { 1293 int regionServers = services.getServerManager().countOfRegionServers(); 1294 if (regionServers < countRegionServers.intValue()) { 1295 // Let's wait till more region servers join in. 1296 // Before that, fail region assignments. 1297 counter.incrementAndGet(); 1298 return null; 1299 } 1300 } 1301 return super.roundRobinAssignment(regions, servers); 1302 } 1303 } 1304 1305 public static class MyMaster extends HMaster { 1306 AtomicBoolean enabled = new AtomicBoolean(true); 1307 MyMaster(Configuration conf, CoordinatedStateManager cp)1308 public MyMaster(Configuration conf, CoordinatedStateManager cp) 1309 throws IOException, KeeperException, 1310 InterruptedException { 1311 super(conf, cp); 1312 } 1313 1314 @Override isServerCrashProcessingEnabled()1315 public boolean isServerCrashProcessingEnabled() { 1316 return enabled.get() && super.isServerCrashProcessingEnabled(); 1317 } 1318 enableSSH(boolean enabled)1319 public void enableSSH(boolean enabled) { 1320 this.enabled.set(enabled); 1321 if (enabled) { 1322 serverManager.processQueuedDeadServers(); 1323 } 1324 } 1325 } 1326 1327 public static class MyRegionServer extends MiniHBaseClusterRegionServer { 1328 static volatile ServerName abortedServer = null; 1329 static volatile boolean simulateRetry = false; 1330 MyRegionServer(Configuration conf, CoordinatedStateManager cp)1331 public MyRegionServer(Configuration conf, CoordinatedStateManager cp) 1332 throws IOException, KeeperException, 1333 InterruptedException { 1334 super(conf, cp); 1335 } 1336 1337 @Override reportRegionStateTransition(TransitionCode code, long openSeqNum, HRegionInfo... hris)1338 public boolean reportRegionStateTransition(TransitionCode code, long openSeqNum, 1339 HRegionInfo... hris) { 1340 if (simulateRetry) { 1341 // Simulate retry by calling the method twice 1342 super.reportRegionStateTransition(code, openSeqNum, hris); 1343 return super.reportRegionStateTransition(code, openSeqNum, hris); 1344 } 1345 return super.reportRegionStateTransition(code, openSeqNum, hris); 1346 } 1347 1348 @Override isAborted()1349 public boolean isAborted() { 1350 return getServerName().equals(abortedServer) || super.isAborted(); 1351 } 1352 } 1353 1354 public static class MyRegionObserver extends BaseRegionObserver { 1355 // If enabled, fail all preClose calls 1356 static AtomicBoolean preCloseEnabled = new AtomicBoolean(false); 1357 1358 // If enabled, stall postClose calls 1359 static AtomicBoolean postCloseEnabled = new AtomicBoolean(false); 1360 1361 // If enabled, stall postOpen calls 1362 static AtomicBoolean postOpenEnabled = new AtomicBoolean(false); 1363 1364 // A flag to track if postOpen is called 1365 static volatile boolean postOpenCalled = false; 1366 1367 @Override preClose(ObserverContext<RegionCoprocessorEnvironment> c, boolean abortRequested)1368 public void preClose(ObserverContext<RegionCoprocessorEnvironment> c, 1369 boolean abortRequested) throws IOException { 1370 if (preCloseEnabled.get()) throw new IOException("fail preClose from coprocessor"); 1371 } 1372 1373 @Override postClose(ObserverContext<RegionCoprocessorEnvironment> c, boolean abortRequested)1374 public void postClose(ObserverContext<RegionCoprocessorEnvironment> c, 1375 boolean abortRequested) { 1376 stallOnFlag(postCloseEnabled); 1377 } 1378 1379 @Override postOpen(ObserverContext<RegionCoprocessorEnvironment> c)1380 public void postOpen(ObserverContext<RegionCoprocessorEnvironment> c) { 1381 postOpenCalled = true; 1382 stallOnFlag(postOpenEnabled); 1383 } 1384 stallOnFlag(final AtomicBoolean flag)1385 private void stallOnFlag(final AtomicBoolean flag) { 1386 try { 1387 // If enabled, stall 1388 while (flag.get()) { 1389 Thread.sleep(1000); 1390 } 1391 } catch (InterruptedException ie) { 1392 Thread.currentThread().interrupt(); 1393 } 1394 } 1395 } 1396 } 1397