1 /**
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 package org.apache.hadoop.hbase.master;
19 
20 import static org.junit.Assert.assertEquals;
21 import static org.junit.Assert.assertFalse;
22 import static org.junit.Assert.assertNotEquals;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertNull;
25 import static org.junit.Assert.assertTrue;
26 import static org.junit.Assert.fail;
27 
28 import java.io.IOException;
29 import java.util.ArrayList;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.Set;
33 import java.util.concurrent.atomic.AtomicBoolean;
34 import java.util.concurrent.atomic.AtomicInteger;
35 
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.fs.FileSystem;
38 import org.apache.hadoop.fs.Path;
39 import org.apache.hadoop.hbase.CoordinatedStateManager;
40 import org.apache.hadoop.hbase.HBaseTestingUtility;
41 import org.apache.hadoop.hbase.HColumnDescriptor;
42 import org.apache.hadoop.hbase.HConstants;
43 import org.apache.hadoop.hbase.HRegionInfo;
44 import org.apache.hadoop.hbase.HTableDescriptor;
45 import org.apache.hadoop.hbase.testclassification.MediumTests;
46 import org.apache.hadoop.hbase.MetaTableAccessor;
47 import org.apache.hadoop.hbase.MiniHBaseCluster;
48 import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
49 import org.apache.hadoop.hbase.ServerLoad;
50 import org.apache.hadoop.hbase.ServerName;
51 import org.apache.hadoop.hbase.TableName;
52 import org.apache.hadoop.hbase.UnknownRegionException;
53 import org.apache.hadoop.hbase.Waiter;
54 import org.apache.hadoop.hbase.client.Admin;
55 import org.apache.hadoop.hbase.client.HBaseAdmin;
56 import org.apache.hadoop.hbase.client.HTable;
57 import org.apache.hadoop.hbase.client.Result;
58 import org.apache.hadoop.hbase.client.Table;
59 import org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager;
60 import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
61 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
62 import org.apache.hadoop.hbase.coprocessor.ObserverContext;
63 import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
64 import org.apache.hadoop.hbase.coprocessor.RegionObserver;
65 import org.apache.hadoop.hbase.executor.EventType;
66 import org.apache.hadoop.hbase.master.RegionState.State;
67 import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
68 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
69 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
70 import org.apache.hadoop.hbase.regionserver.HRegionServer;
71 import org.apache.hadoop.hbase.util.Bytes;
72 import org.apache.hadoop.hbase.util.ConfigUtil;
73 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
74 import org.apache.hadoop.hbase.util.FSUtils;
75 import org.apache.hadoop.hbase.util.JVMClusterUtil;
76 import org.apache.hadoop.hbase.util.Threads;
77 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
78 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
79 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
80 import org.apache.zookeeper.KeeperException;
81 import org.junit.AfterClass;
82 import org.junit.BeforeClass;
83 import org.junit.Test;
84 import org.junit.experimental.categories.Category;
85 
86 
87 /**
88  * This tests AssignmentManager with a testing cluster.
89  */
90 @Category(MediumTests.class)
91 @SuppressWarnings("deprecation")
92 public class TestAssignmentManagerOnCluster {
93   private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
94   private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
95   final static Configuration conf = TEST_UTIL.getConfiguration();
96   private static HBaseAdmin admin;
97 
setupOnce()98   static void setupOnce() throws Exception {
99     // Using the our load balancer to control region plans
100     conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
101       MyLoadBalancer.class, LoadBalancer.class);
102     conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
103       MyRegionObserver.class, RegionObserver.class);
104     // Reduce the maximum attempts to speed up the test
105     conf.setInt("hbase.assignment.maximum.attempts", 3);
106     // Put meta on master to avoid meta server shutdown handling
107     conf.set("hbase.balancer.tablesOnMaster", "hbase:meta");
108     conf.setInt("hbase.master.maximum.ping.server.attempts", 3);
109     conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1);
110 
111     TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class, MyRegionServer.class);
112     admin = TEST_UTIL.getHBaseAdmin();
113   }
114 
115   @BeforeClass
setUpBeforeClass()116   public static void setUpBeforeClass() throws Exception {
117     // Use ZK for region assignment
118     conf.setBoolean("hbase.assignment.usezk", true);
119     setupOnce();
120   }
121 
122   @AfterClass
tearDownAfterClass()123   public static void tearDownAfterClass() throws Exception {
124     TEST_UTIL.shutdownMiniCluster();
125   }
126 
127   /**
128    * This tests restarting meta regionserver
129    */
130   @Test (timeout=180000)
testRestartMetaRegionServer()131   public void testRestartMetaRegionServer() throws Exception {
132     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
133     boolean stoppedARegionServer = false;
134     try {
135       HMaster master = cluster.getMaster();
136       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
137       ServerName metaServerName = regionStates.getRegionServerOfRegion(
138         HRegionInfo.FIRST_META_REGIONINFO);
139       if (master.getServerName().equals(metaServerName) || metaServerName == null
140           || !metaServerName.equals(cluster.getServerHoldingMeta())) {
141         // Move meta off master
142         metaServerName = cluster.getLiveRegionServerThreads()
143           .get(0).getRegionServer().getServerName();
144         master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
145           Bytes.toBytes(metaServerName.getServerName()));
146         master.assignmentManager.waitUntilNoRegionsInTransition(60000);
147       }
148       RegionState metaState =
149           MetaTableLocator.getMetaRegionState(master.getZooKeeper());
150         assertEquals("Meta should be not in transition",
151             metaState.getState(), RegionState.State.OPEN);
152       assertNotEquals("Meta should be moved off master",
153         metaServerName, master.getServerName());
154       cluster.killRegionServer(metaServerName);
155       stoppedARegionServer = true;
156       cluster.waitForRegionServerToStop(metaServerName, 60000);
157       // Wait for SSH to finish
158       final ServerManager serverManager = master.getServerManager();
159       TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
160         @Override
161         public boolean evaluate() throws Exception {
162           return !serverManager.areDeadServersInProgress();
163         }
164       });
165 
166       // Now, make sure meta is assigned
167       assertTrue("Meta should be assigned",
168         regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
169       // Now, make sure meta is registered in zk
170       metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
171       assertEquals("Meta should be not in transition",
172           metaState.getState(), RegionState.State.OPEN);
173       assertEquals("Meta should be assigned", metaState.getServerName(),
174         regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO));
175       assertNotEquals("Meta should be assigned on a different server",
176         metaState.getServerName(), metaServerName);
177     } finally {
178       if (stoppedARegionServer) {
179         cluster.startRegionServer();
180       }
181     }
182   }
183 
184   /**
185    * This tests region assignment
186    */
187   @Test (timeout=60000)
testAssignRegion()188   public void testAssignRegion() throws Exception {
189     String table = "testAssignRegion";
190     try {
191       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
192       desc.addFamily(new HColumnDescriptor(FAMILY));
193       admin.createTable(desc);
194 
195       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
196       HRegionInfo hri = new HRegionInfo(
197         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
198       MetaTableAccessor.addRegionToMeta(meta, hri);
199 
200       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
201       master.assignRegion(hri);
202       AssignmentManager am = master.getAssignmentManager();
203       am.waitForAssignment(hri);
204 
205       RegionStates regionStates = am.getRegionStates();
206       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
207       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
208 
209       // Region is assigned now. Let's assign it again.
210       // Master should not abort, and region should be assigned.
211       RegionState oldState = regionStates.getRegionState(hri);
212       TEST_UTIL.getHBaseAdmin().assign(hri.getRegionName());
213       master.getAssignmentManager().waitForAssignment(hri);
214       RegionState newState = regionStates.getRegionState(hri);
215       assertTrue(newState.isOpened()
216         && newState.getStamp() != oldState.getStamp());
217     } finally {
218       TEST_UTIL.deleteTable(Bytes.toBytes(table));
219     }
220   }
221 
222   // Simulate a scenario where the AssignCallable and SSH are trying to assign a region
223   @Test (timeout=60000)
testAssignRegionBySSH()224   public void testAssignRegionBySSH() throws Exception {
225     if (!conf.getBoolean("hbase.assignment.usezk", true)) {
226       return;
227     }
228     String table = "testAssignRegionBySSH";
229     MyMaster master = (MyMaster) TEST_UTIL.getHBaseCluster().getMaster();
230     try {
231       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
232       desc.addFamily(new HColumnDescriptor(FAMILY));
233       admin.createTable(desc);
234 
235       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
236       HRegionInfo hri = new HRegionInfo(
237         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
238       MetaTableAccessor.addRegionToMeta(meta, hri);
239       // Add some dummy server for the region entry
240       MetaTableAccessor.updateRegionLocation(TEST_UTIL.getHBaseCluster().getMaster().getConnection(), hri,
241         ServerName.valueOf("example.org", 1234, System.currentTimeMillis()), 0, -1);
242       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
243       int i = TEST_UTIL.getHBaseCluster().getServerWithMeta();
244       HRegionServer rs = TEST_UTIL.getHBaseCluster().getRegionServer(i == 0 ? 1 : 0);
245       // Choose a server other than meta to kill
246       ServerName controlledServer = rs.getServerName();
247       master.enableSSH(false);
248       TEST_UTIL.getHBaseCluster().killRegionServer(controlledServer);
249       TEST_UTIL.getHBaseCluster().waitForRegionServerToStop(controlledServer, -1);
250       AssignmentManager am = master.getAssignmentManager();
251 
252       // Simulate the AssignCallable trying to assign the region. Have the region in OFFLINE state,
253       // but not in transition and the server is the dead 'controlledServer'
254       regionStates.createRegionState(hri, State.OFFLINE, controlledServer, null);
255       am.assign(hri, true, true);
256       // Region should remain OFFLINE and go to transition
257       assertEquals(State.OFFLINE, regionStates.getRegionState(hri).getState());
258       assertTrue (regionStates.isRegionInTransition(hri));
259 
260       master.enableSSH(true);
261       am.waitForAssignment(hri);
262       assertTrue (regionStates.getRegionState(hri).isOpened());
263       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
264       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
265     } finally {
266       if (master != null) {
267         master.enableSSH(true);
268       }
269       TEST_UTIL.deleteTable(Bytes.toBytes(table));
270       TEST_UTIL.getHBaseCluster().startRegionServer();
271     }
272   }
273 
274   /**
275    * This tests region assignment on a simulated restarted server
276    */
277   @Test (timeout=120000)
testAssignRegionOnRestartedServer()278   public void testAssignRegionOnRestartedServer() throws Exception {
279     String table = "testAssignRegionOnRestartedServer";
280     TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20);
281     TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
282     TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect
283 
284     ServerName deadServer = null;
285     HMaster master = null;
286     try {
287       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
288       desc.addFamily(new HColumnDescriptor(FAMILY));
289       admin.createTable(desc);
290 
291       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
292       final HRegionInfo hri = new HRegionInfo(
293         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
294       MetaTableAccessor.addRegionToMeta(meta, hri);
295 
296       master = TEST_UTIL.getHBaseCluster().getMaster();
297       Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
298       assertFalse("There should be some servers online", onlineServers.isEmpty());
299 
300       // Use the first server as the destination server
301       ServerName destServer = onlineServers.iterator().next();
302 
303       // Created faked dead server
304       deadServer = ServerName.valueOf(destServer.getHostname(),
305           destServer.getPort(), destServer.getStartcode() - 100L);
306       master.serverManager.recordNewServerWithLock(deadServer, ServerLoad.EMPTY_SERVERLOAD);
307 
308       final AssignmentManager am = master.getAssignmentManager();
309       RegionPlan plan = new RegionPlan(hri, null, deadServer);
310       am.addPlan(hri.getEncodedName(), plan);
311       master.assignRegion(hri);
312 
313       int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
314         destServer, EventType.M_ZK_REGION_OFFLINE,
315         EventType.RS_ZK_REGION_OPENING, 0);
316       assertEquals("TansitionNode should fail", -1, version);
317 
318       TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
319         @Override
320         public boolean evaluate() throws Exception {
321           return ! am.getRegionStates().isRegionInTransition(hri);
322         }
323       });
324 
325     assertFalse("Region should be assigned", am.getRegionStates().isRegionInTransition(hri));
326     } finally {
327       if (deadServer != null) {
328         master.serverManager.expireServer(deadServer);
329       }
330 
331       TEST_UTIL.deleteTable(Bytes.toBytes(table));
332 
333       // reset the value for other tests
334       TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 3);
335       ServerName masterServerName = TEST_UTIL.getMiniHBaseCluster().getMaster().getServerName();
336       TEST_UTIL.getMiniHBaseCluster().stopMaster(masterServerName);
337       TEST_UTIL.getMiniHBaseCluster().startMaster();
338       // Wait till master is active and is initialized
339       while (TEST_UTIL.getMiniHBaseCluster().getMaster() == null ||
340           !TEST_UTIL.getMiniHBaseCluster().getMaster().isInitialized()) {
341         Threads.sleep(1);
342       }
343     }
344   }
345 
346   /**
347    * This tests offlining a region
348    */
349   @Test (timeout=60000)
testOfflineRegion()350   public void testOfflineRegion() throws Exception {
351     TableName table =
352         TableName.valueOf("testOfflineRegion");
353     try {
354       HRegionInfo hri = createTableAndGetOneRegion(table);
355 
356       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
357         getMaster().getAssignmentManager().getRegionStates();
358       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
359       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
360       admin.offline(hri.getRegionName());
361 
362       long timeoutTime = System.currentTimeMillis() + 800;
363       while (true) {
364         if (regionStates.getRegionByStateOfTable(table)
365             .get(RegionState.State.OFFLINE).contains(hri))
366           break;
367         long now = System.currentTimeMillis();
368         if (now > timeoutTime) {
369           fail("Failed to offline the region in time");
370           break;
371         }
372         Thread.sleep(10);
373       }
374       RegionState regionState = regionStates.getRegionState(hri);
375       assertTrue(regionState.isOffline());
376     } finally {
377       TEST_UTIL.deleteTable(table);
378     }
379   }
380 
381   /**
382    * This tests moving a region
383    */
384   @Test (timeout=50000)
testMoveRegion()385   public void testMoveRegion() throws Exception {
386     TableName table =
387         TableName.valueOf("testMoveRegion");
388     try {
389       HRegionInfo hri = createTableAndGetOneRegion(table);
390 
391       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
392       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
393       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
394       ServerManager serverManager = master.getServerManager();
395       ServerName destServerName = null;
396       List<JVMClusterUtil.RegionServerThread> regionServers =
397         TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads();
398       for (JVMClusterUtil.RegionServerThread regionServer: regionServers) {
399         HRegionServer destServer = regionServer.getRegionServer();
400         destServerName = destServer.getServerName();
401         if (!destServerName.equals(serverName)
402             && serverManager.isServerOnline(destServerName)) {
403           break;
404         }
405       }
406       assertTrue(destServerName != null
407         && !destServerName.equals(serverName));
408       TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
409         Bytes.toBytes(destServerName.getServerName()));
410 
411       long timeoutTime = System.currentTimeMillis() + 30000;
412       while (true) {
413         ServerName sn = regionStates.getRegionServerOfRegion(hri);
414         if (sn != null && sn.equals(destServerName)) {
415           TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
416           break;
417         }
418         long now = System.currentTimeMillis();
419         if (now > timeoutTime) {
420           fail("Failed to move the region in time: "
421             + regionStates.getRegionState(hri));
422         }
423         regionStates.waitForUpdate(50);
424       }
425 
426     } finally {
427       TEST_UTIL.deleteTable(table);
428     }
429   }
430 
431   /**
432    * If a table is deleted, we should not be able to move it anymore.
433    * Otherwise, the region will be brought back.
434    * @throws Exception
435    */
436   @Test (timeout=50000)
testMoveRegionOfDeletedTable()437   public void testMoveRegionOfDeletedTable() throws Exception {
438     TableName table =
439         TableName.valueOf("testMoveRegionOfDeletedTable");
440     Admin admin = TEST_UTIL.getHBaseAdmin();
441     try {
442       HRegionInfo hri = createTableAndGetOneRegion(table);
443 
444       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
445       AssignmentManager am = master.getAssignmentManager();
446       RegionStates regionStates = am.getRegionStates();
447       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
448       ServerName destServerName = null;
449       for (int i = 0; i < 3; i++) {
450         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
451         if (!destServer.getServerName().equals(serverName)) {
452           destServerName = destServer.getServerName();
453           break;
454         }
455       }
456       assertTrue(destServerName != null
457         && !destServerName.equals(serverName));
458 
459       TEST_UTIL.deleteTable(table);
460 
461       try {
462         admin.move(hri.getEncodedNameAsBytes(),
463           Bytes.toBytes(destServerName.getServerName()));
464         fail("We should not find the region");
465       } catch (IOException ioe) {
466         assertTrue(ioe instanceof UnknownRegionException);
467       }
468 
469       am.balance(new RegionPlan(hri, serverName, destServerName));
470       assertFalse("The region should not be in transition",
471         regionStates.isRegionInTransition(hri));
472     } finally {
473       if (admin.tableExists(table)) {
474         TEST_UTIL.deleteTable(table);
475       }
476     }
477   }
478 
createTableAndGetOneRegion( final TableName tableName)479   HRegionInfo createTableAndGetOneRegion(
480       final TableName tableName) throws IOException, InterruptedException {
481     HTableDescriptor desc = new HTableDescriptor(tableName);
482     desc.addFamily(new HColumnDescriptor(FAMILY));
483     admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5);
484 
485     // wait till the table is assigned
486     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
487     long timeoutTime = System.currentTimeMillis() + 1000;
488     while (true) {
489       List<HRegionInfo> regions = master.getAssignmentManager().
490         getRegionStates().getRegionsOfTable(tableName);
491       if (regions.size() > 3) {
492         return regions.get(2);
493       }
494       long now = System.currentTimeMillis();
495       if (now > timeoutTime) {
496         fail("Could not find an online region");
497       }
498       Thread.sleep(10);
499     }
500   }
501 
502   /**
503    * This test should not be flaky. If it is flaky, it means something
504    * wrong with AssignmentManager which should be reported and fixed
505    *
506    * This tests forcefully assign a region while it's closing and re-assigned.
507    */
508   @Test (timeout=60000)
testForceAssignWhileClosing()509   public void testForceAssignWhileClosing() throws Exception {
510     String table = "testForceAssignWhileClosing";
511     try {
512       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
513       desc.addFamily(new HColumnDescriptor(FAMILY));
514       admin.createTable(desc);
515 
516       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
517       HRegionInfo hri = new HRegionInfo(
518         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
519       MetaTableAccessor.addRegionToMeta(meta, hri);
520 
521       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
522       master.assignRegion(hri);
523       AssignmentManager am = master.getAssignmentManager();
524       assertTrue(am.waitForAssignment(hri));
525 
526       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
527       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
528       MyRegionObserver.preCloseEnabled.set(true);
529       am.unassign(hri);
530       RegionState state = am.getRegionStates().getRegionState(hri);
531       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
532 
533       MyRegionObserver.preCloseEnabled.set(false);
534       am.unassign(hri, true);
535 
536       // region is closing now, will be re-assigned automatically.
537       // now, let's forcefully assign it again. it should be
538       // assigned properly and no double-assignment
539       am.assign(hri, true, true);
540 
541       // let's check if it's assigned after it's out of transition
542       am.waitOnRegionToClearRegionsInTransition(hri);
543       assertTrue(am.waitForAssignment(hri));
544 
545       ServerName serverName = master.getAssignmentManager().
546         getRegionStates().getRegionServerOfRegion(hri);
547       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
548     } finally {
549       MyRegionObserver.preCloseEnabled.set(false);
550       TEST_UTIL.deleteTable(Bytes.toBytes(table));
551     }
552   }
553 
554   /**
555    * This tests region close failed
556    */
557   @Test (timeout=60000)
testCloseFailed()558   public void testCloseFailed() throws Exception {
559     String table = "testCloseFailed";
560     try {
561       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
562       desc.addFamily(new HColumnDescriptor(FAMILY));
563       admin.createTable(desc);
564 
565       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
566       HRegionInfo hri = new HRegionInfo(
567         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
568       MetaTableAccessor.addRegionToMeta(meta, hri);
569 
570       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
571       master.assignRegion(hri);
572       AssignmentManager am = master.getAssignmentManager();
573       assertTrue(am.waitForAssignment(hri));
574       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
575       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
576 
577       MyRegionObserver.preCloseEnabled.set(true);
578       am.unassign(hri);
579       RegionState state = am.getRegionStates().getRegionState(hri);
580       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
581 
582       MyRegionObserver.preCloseEnabled.set(false);
583       am.unassign(hri, true);
584 
585       // region may still be assigned now since it's closing,
586       // let's check if it's assigned after it's out of transition
587       am.waitOnRegionToClearRegionsInTransition(hri);
588 
589       // region should be closed and re-assigned
590       assertTrue(am.waitForAssignment(hri));
591       ServerName serverName = master.getAssignmentManager().
592         getRegionStates().getRegionServerOfRegion(hri);
593       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
594     } finally {
595       MyRegionObserver.preCloseEnabled.set(false);
596       TEST_UTIL.deleteTable(Bytes.toBytes(table));
597     }
598   }
599 
600   /**
601    * This tests region open failed
602    */
603   @Test (timeout=60000)
testOpenFailed()604   public void testOpenFailed() throws Exception {
605     String table = "testOpenFailed";
606     try {
607       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
608       desc.addFamily(new HColumnDescriptor(FAMILY));
609       admin.createTable(desc);
610 
611       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
612       HRegionInfo hri = new HRegionInfo(
613         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
614       MetaTableAccessor.addRegionToMeta(meta, hri);
615 
616       MyLoadBalancer.controledRegion = hri.getEncodedName();
617 
618       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
619       master.assignRegion(hri);
620       AssignmentManager am = master.getAssignmentManager();
621       assertFalse(am.waitForAssignment(hri));
622 
623       RegionState state = am.getRegionStates().getRegionState(hri);
624       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
625       // Failed to open since no plan, so it's on no server
626       assertNull(state.getServerName());
627 
628       MyLoadBalancer.controledRegion = null;
629       master.assignRegion(hri);
630       assertTrue(am.waitForAssignment(hri));
631 
632       ServerName serverName = master.getAssignmentManager().
633         getRegionStates().getRegionServerOfRegion(hri);
634       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
635     } finally {
636       MyLoadBalancer.controledRegion = null;
637       TEST_UTIL.deleteTable(Bytes.toBytes(table));
638     }
639   }
640 
641   /**
642    * This tests region open failure which is not recoverable
643    */
644   @Test (timeout=60000)
testOpenFailedUnrecoverable()645   public void testOpenFailedUnrecoverable() throws Exception {
646     TableName table =
647         TableName.valueOf("testOpenFailedUnrecoverable");
648     try {
649       HTableDescriptor desc = new HTableDescriptor(table);
650       desc.addFamily(new HColumnDescriptor(FAMILY));
651       admin.createTable(desc);
652 
653       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
654       HRegionInfo hri = new HRegionInfo(
655         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
656       MetaTableAccessor.addRegionToMeta(meta, hri);
657 
658       FileSystem fs = FileSystem.get(conf);
659       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
660       Path regionDir = new Path(tableDir, hri.getEncodedName());
661       // create a file named the same as the region dir to
662       // mess up with region opening
663       fs.create(regionDir, true);
664 
665       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
666       master.assignRegion(hri);
667       AssignmentManager am = master.getAssignmentManager();
668       assertFalse(am.waitForAssignment(hri));
669 
670       RegionState state = am.getRegionStates().getRegionState(hri);
671       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
672       // Failed to open due to file system issue. Region state should
673       // carry the opening region server so that we can force close it
674       // later on before opening it again. See HBASE-9092.
675       assertNotNull(state.getServerName());
676 
677       // remove the blocking file, so that region can be opened
678       fs.delete(regionDir, true);
679       master.assignRegion(hri);
680       assertTrue(am.waitForAssignment(hri));
681 
682       ServerName serverName = master.getAssignmentManager().
683         getRegionStates().getRegionServerOfRegion(hri);
684       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
685     } finally {
686       TEST_UTIL.deleteTable(table);
687     }
688   }
689 
690   @Test (timeout=60000)
testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState()691   public void testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState() throws Exception {
692     final TableName table =
693         TableName.valueOf
694             ("testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState");
695     AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
696     HRegionInfo hri = null;
697     ServerName serverName = null;
698     try {
699       hri = createTableAndGetOneRegion(table);
700       serverName = am.getRegionStates().getRegionServerOfRegion(hri);
701       ServerName destServerName = null;
702       HRegionServer destServer = null;
703       for (int i = 0; i < 3; i++) {
704         destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
705         if (!destServer.getServerName().equals(serverName)) {
706           destServerName = destServer.getServerName();
707           break;
708         }
709       }
710       am.regionOffline(hri);
711       ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper();
712       am.getRegionStates().updateRegionState(hri, State.PENDING_OPEN, destServerName);
713       if (ConfigUtil.useZKForAssignment(conf)) {
714         ZKAssign.createNodeOffline(zkw, hri, destServerName);
715         ZKAssign.transitionNodeOpening(zkw, hri, destServerName);
716 
717         // Wait till the event is processed and the region is in transition
718         long timeoutTime = System.currentTimeMillis() + 20000;
719         while (!am.getRegionStates().isRegionInTransition(hri)) {
720           assertTrue("Failed to process ZK opening event in time",
721             System.currentTimeMillis() < timeoutTime);
722           Thread.sleep(100);
723         }
724       }
725 
726       am.getTableStateManager().setTableState(table, ZooKeeperProtos.Table.State.DISABLING);
727       List<HRegionInfo> toAssignRegions = am.cleanOutCrashedServerReferences(destServerName);
728       assertTrue("Regions to be assigned should be empty.", toAssignRegions.isEmpty());
729       assertTrue("Regions to be assigned should be empty.", am.getRegionStates()
730           .getRegionState(hri).isOffline());
731     } finally {
732       if (hri != null && serverName != null) {
733         am.regionOnline(hri, serverName);
734       }
735       am.getTableStateManager().setTableState(table, ZooKeeperProtos.Table.State.DISABLED);
736       TEST_UTIL.deleteTable(table);
737     }
738   }
739 
740   /**
741    * This tests region close hanging
742    */
743   @Test (timeout=60000)
testCloseHang()744   public void testCloseHang() throws Exception {
745     String table = "testCloseHang";
746     try {
747       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
748       desc.addFamily(new HColumnDescriptor(FAMILY));
749       admin.createTable(desc);
750 
751       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
752       HRegionInfo hri = new HRegionInfo(
753         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
754       MetaTableAccessor.addRegionToMeta(meta, hri);
755 
756       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
757       master.assignRegion(hri);
758       AssignmentManager am = master.getAssignmentManager();
759       assertTrue(am.waitForAssignment(hri));
760       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
761       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
762 
763       MyRegionObserver.postCloseEnabled.set(true);
764       am.unassign(hri);
765       // Now region should pending_close or closing
766       // Unassign it again forcefully so that we can trigger already
767       // in transition exception. This test is to make sure this scenario
768       // is handled properly.
769       am.server.getConfiguration().setLong(
770         AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
771       am.unassign(hri, true);
772       RegionState state = am.getRegionStates().getRegionState(hri);
773       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
774 
775       // Let region closing move ahead. The region should be closed
776       // properly and re-assigned automatically
777       MyRegionObserver.postCloseEnabled.set(false);
778 
779       // region may still be assigned now since it's closing,
780       // let's check if it's assigned after it's out of transition
781       am.waitOnRegionToClearRegionsInTransition(hri);
782 
783       // region should be closed and re-assigned
784       assertTrue(am.waitForAssignment(hri));
785       ServerName serverName = master.getAssignmentManager().
786         getRegionStates().getRegionServerOfRegion(hri);
787       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
788     } finally {
789       MyRegionObserver.postCloseEnabled.set(false);
790       TEST_UTIL.deleteTable(Bytes.toBytes(table));
791     }
792   }
793 
794   /**
795    * This tests region close racing with open
796    */
797   @Test (timeout=60000)
testOpenCloseRacing()798   public void testOpenCloseRacing() throws Exception {
799     String table = "testOpenCloseRacing";
800     try {
801       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
802       desc.addFamily(new HColumnDescriptor(FAMILY));
803       admin.createTable(desc);
804 
805       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
806       HRegionInfo hri = new HRegionInfo(
807         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
808       MetaTableAccessor.addRegionToMeta(meta, hri);
809       meta.close();
810 
811       MyRegionObserver.postOpenEnabled.set(true);
812       MyRegionObserver.postOpenCalled = false;
813       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
814       // Region will be opened, but it won't complete
815       master.assignRegion(hri);
816       long end = EnvironmentEdgeManager.currentTime() + 20000;
817       // Wait till postOpen is called
818       while (!MyRegionObserver.postOpenCalled ) {
819         assertFalse("Timed out waiting for postOpen to be called",
820           EnvironmentEdgeManager.currentTime() > end);
821         Thread.sleep(300);
822       }
823 
824       AssignmentManager am = master.getAssignmentManager();
825       // Now let's unassign it, it should do nothing
826       am.unassign(hri);
827       RegionState state = am.getRegionStates().getRegionState(hri);
828       ServerName oldServerName = state.getServerName();
829       assertTrue(state.isPendingOpenOrOpening() && oldServerName != null);
830 
831       // Now the region is stuck in opening
832       // Let's forcefully re-assign it to trigger closing/opening
833       // racing. This test is to make sure this scenario
834       // is handled properly.
835       ServerName destServerName = null;
836       int numRS = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size();
837       for (int i = 0; i < numRS; i++) {
838         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
839         if (!destServer.getServerName().equals(oldServerName)) {
840           destServerName = destServer.getServerName();
841           break;
842         }
843       }
844       assertNotNull(destServerName);
845       assertFalse("Region should be assigned on a new region server",
846         oldServerName.equals(destServerName));
847       List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
848       regions.add(hri);
849       am.assign(destServerName, regions);
850 
851       // let region open continue
852       MyRegionObserver.postOpenEnabled.set(false);
853 
854       // let's check if it's assigned after it's out of transition
855       am.waitOnRegionToClearRegionsInTransition(hri);
856       assertTrue(am.waitForAssignment(hri));
857 
858       ServerName serverName = master.getAssignmentManager().
859         getRegionStates().getRegionServerOfRegion(hri);
860       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
861     } finally {
862       MyRegionObserver.postOpenEnabled.set(false);
863       TEST_UTIL.deleteTable(Bytes.toBytes(table));
864     }
865   }
866 
867   /**
868    * Test force unassign/assign a region hosted on a dead server
869    */
870   @Test (timeout=60000)
testAssignRacingWithSSH()871   public void testAssignRacingWithSSH() throws Exception {
872     String table = "testAssignRacingWithSSH";
873     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
874     MyMaster master = null;
875     try {
876       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
877       desc.addFamily(new HColumnDescriptor(FAMILY));
878       admin.createTable(desc);
879 
880       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
881       HRegionInfo hri = new HRegionInfo(
882         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
883       MetaTableAccessor.addRegionToMeta(meta, hri);
884 
885       // Assign the region
886       master = (MyMaster)cluster.getMaster();
887       master.assignRegion(hri);
888 
889       // Hold SSH before killing the hosting server
890       master.enableSSH(false);
891 
892       AssignmentManager am = master.getAssignmentManager();
893       RegionStates regionStates = am.getRegionStates();
894       ServerName metaServer = regionStates.getRegionServerOfRegion(
895         HRegionInfo.FIRST_META_REGIONINFO);
896       while (true) {
897         assertTrue(am.waitForAssignment(hri));
898         RegionState state = regionStates.getRegionState(hri);
899         ServerName oldServerName = state.getServerName();
900         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
901           // Kill the hosting server, which doesn't have meta on it.
902           cluster.killRegionServer(oldServerName);
903           cluster.waitForRegionServerToStop(oldServerName, -1);
904           break;
905         }
906         int i = cluster.getServerWithMeta();
907         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
908         oldServerName = rs.getServerName();
909         master.move(hri.getEncodedNameAsBytes(),
910           Bytes.toBytes(oldServerName.getServerName()));
911       }
912 
913       // You can't assign a dead region before SSH
914       am.assign(hri, true, true);
915       RegionState state = regionStates.getRegionState(hri);
916       assertTrue(state.isFailedClose());
917 
918       // You can't unassign a dead region before SSH either
919       am.unassign(hri, true);
920       assertTrue(state.isFailedClose());
921 
922       // Enable SSH so that log can be split
923       master.enableSSH(true);
924 
925       // let's check if it's assigned after it's out of transition.
926       // no need to assign it manually, SSH should do it
927       am.waitOnRegionToClearRegionsInTransition(hri);
928       assertTrue(am.waitForAssignment(hri));
929 
930       ServerName serverName = master.getAssignmentManager().
931         getRegionStates().getRegionServerOfRegion(hri);
932       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
933     } finally {
934       if (master != null) {
935         master.enableSSH(true);
936       }
937       TEST_UTIL.deleteTable(Bytes.toBytes(table));
938       cluster.startRegionServer();
939     }
940   }
941 
942   /**
943    * Test force unassign/assign a region of a disabled table
944    */
945   @Test (timeout=60000)
testAssignDisabledRegion()946   public void testAssignDisabledRegion() throws Exception {
947     TableName table = TableName.valueOf("testAssignDisabledRegion");
948     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
949     MyMaster master = null;
950     try {
951       HTableDescriptor desc = new HTableDescriptor(table);
952       desc.addFamily(new HColumnDescriptor(FAMILY));
953       admin.createTable(desc);
954 
955       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
956       HRegionInfo hri = new HRegionInfo(
957         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
958       MetaTableAccessor.addRegionToMeta(meta, hri);
959 
960       // Assign the region
961       master = (MyMaster)cluster.getMaster();
962       master.assignRegion(hri);
963       AssignmentManager am = master.getAssignmentManager();
964       RegionStates regionStates = am.getRegionStates();
965       assertTrue(am.waitForAssignment(hri));
966 
967       // Disable the table
968       admin.disableTable(table);
969       assertTrue(regionStates.isRegionOffline(hri));
970 
971       // You can't assign a disabled region
972       am.assign(hri, true, true);
973       assertTrue(regionStates.isRegionOffline(hri));
974 
975       // You can't unassign a disabled region either
976       am.unassign(hri, true);
977       assertTrue(regionStates.isRegionOffline(hri));
978     } finally {
979       TEST_UTIL.deleteTable(table);
980     }
981   }
982 
983   /**
984    * Test offlined region is assigned by SSH
985    */
986   @Test (timeout=60000)
testAssignOfflinedRegionBySSH()987   public void testAssignOfflinedRegionBySSH() throws Exception {
988     String table = "testAssignOfflinedRegionBySSH";
989     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
990     MyMaster master = null;
991     try {
992       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
993       desc.addFamily(new HColumnDescriptor(FAMILY));
994       admin.createTable(desc);
995 
996       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
997       HRegionInfo hri = new HRegionInfo(
998         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
999       MetaTableAccessor.addRegionToMeta(meta, hri);
1000 
1001       // Assign the region
1002       master = (MyMaster)cluster.getMaster();
1003       master.assignRegion(hri);
1004 
1005       AssignmentManager am = master.getAssignmentManager();
1006       RegionStates regionStates = am.getRegionStates();
1007       ServerName metaServer = regionStates.getRegionServerOfRegion(
1008         HRegionInfo.FIRST_META_REGIONINFO);
1009       ServerName oldServerName = null;
1010       while (true) {
1011         assertTrue(am.waitForAssignment(hri));
1012         RegionState state = regionStates.getRegionState(hri);
1013         oldServerName = state.getServerName();
1014         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
1015           // Mark the hosting server aborted, but don't actually kill it.
1016           // It doesn't have meta on it.
1017           MyRegionServer.abortedServer = oldServerName;
1018           break;
1019         }
1020         int i = cluster.getServerWithMeta();
1021         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
1022         oldServerName = rs.getServerName();
1023         master.move(hri.getEncodedNameAsBytes(),
1024           Bytes.toBytes(oldServerName.getServerName()));
1025       }
1026 
1027       // Make sure the region is assigned on the dead server
1028       assertTrue(regionStates.isRegionOnline(hri));
1029       assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
1030 
1031       // Kill the hosting server, which doesn't have meta on it.
1032       cluster.killRegionServer(oldServerName);
1033       cluster.waitForRegionServerToStop(oldServerName, -1);
1034 
1035       ServerManager serverManager = master.getServerManager();
1036       while (!serverManager.isServerDead(oldServerName)
1037           || serverManager.getDeadServers().areDeadServersInProgress()) {
1038         Thread.sleep(100);
1039       }
1040 
1041       // Let's check if it's assigned after it's out of transition.
1042       // no need to assign it manually, SSH should do it
1043       am.waitOnRegionToClearRegionsInTransition(hri);
1044       assertTrue(am.waitForAssignment(hri));
1045 
1046       ServerName serverName = master.getAssignmentManager().
1047         getRegionStates().getRegionServerOfRegion(hri);
1048       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
1049     } finally {
1050       MyRegionServer.abortedServer = null;
1051       TEST_UTIL.deleteTable(Bytes.toBytes(table));
1052       cluster.startRegionServer();
1053     }
1054   }
1055 
1056   /**
1057    * Test SSH waiting for extra region server for assignment
1058    */
1059   @Test (timeout=300000)
testSSHWaitForServerToAssignRegion()1060   public void testSSHWaitForServerToAssignRegion() throws Exception {
1061     TableName table = TableName.valueOf("testSSHWaitForServerToAssignRegion");
1062     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1063     boolean startAServer = false;
1064     try {
1065       HTableDescriptor desc = new HTableDescriptor(table);
1066       desc.addFamily(new HColumnDescriptor(FAMILY));
1067       admin.createTable(desc);
1068 
1069       HMaster master = cluster.getMaster();
1070       final ServerManager serverManager = master.getServerManager();
1071       MyLoadBalancer.countRegionServers = Integer.valueOf(
1072         serverManager.countOfRegionServers());
1073       HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(table);
1074       assertNotNull("First region should be assigned", rs);
1075       final ServerName serverName = rs.getServerName();
1076       // Wait till SSH tried to assign regions a several times
1077       int counter = MyLoadBalancer.counter.get() + 5;
1078       cluster.killRegionServer(serverName);
1079       startAServer = true;
1080       cluster.waitForRegionServerToStop(serverName, -1);
1081       while (counter > MyLoadBalancer.counter.get()) {
1082         Thread.sleep(1000);
1083       }
1084       cluster.startRegionServer();
1085       startAServer = false;
1086       // Wait till the dead server is processed by SSH
1087       TEST_UTIL.waitFor(120000, 1000, new Waiter.Predicate<Exception>() {
1088         @Override
1089         public boolean evaluate() throws Exception {
1090           return serverManager.isServerDead(serverName)
1091             && !serverManager.areDeadServersInProgress();
1092         }
1093       });
1094       TEST_UTIL.waitUntilAllRegionsAssigned(table, 300000);
1095 
1096       rs = TEST_UTIL.getRSForFirstRegionInTable(table);
1097       assertTrue("First region should be re-assigned to a different server",
1098         rs != null && !serverName.equals(rs.getServerName()));
1099     } finally {
1100       MyLoadBalancer.countRegionServers = null;
1101       TEST_UTIL.deleteTable(table);
1102       if (startAServer) {
1103         cluster.startRegionServer();
1104       }
1105     }
1106   }
1107 
1108   /**
1109    * Test disabled region is ignored by SSH
1110    */
1111   @Test (timeout=60000)
testAssignDisabledRegionBySSH()1112   public void testAssignDisabledRegionBySSH() throws Exception {
1113     String table = "testAssignDisabledRegionBySSH";
1114     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1115     MyMaster master = null;
1116     try {
1117       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
1118       desc.addFamily(new HColumnDescriptor(FAMILY));
1119       admin.createTable(desc);
1120 
1121       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
1122       HRegionInfo hri = new HRegionInfo(
1123         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
1124       MetaTableAccessor.addRegionToMeta(meta, hri);
1125 
1126       // Assign the region
1127       master = (MyMaster)cluster.getMaster();
1128       master.assignRegion(hri);
1129 
1130       AssignmentManager am = master.getAssignmentManager();
1131       RegionStates regionStates = am.getRegionStates();
1132       ServerName metaServer = regionStates.getRegionServerOfRegion(
1133         HRegionInfo.FIRST_META_REGIONINFO);
1134       ServerName oldServerName = null;
1135       while (true) {
1136         assertTrue(am.waitForAssignment(hri));
1137         RegionState state = regionStates.getRegionState(hri);
1138         oldServerName = state.getServerName();
1139         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
1140           // Mark the hosting server aborted, but don't actually kill it.
1141           // It doesn't have meta on it.
1142           MyRegionServer.abortedServer = oldServerName;
1143           break;
1144         }
1145         int i = cluster.getServerWithMeta();
1146         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
1147         oldServerName = rs.getServerName();
1148         master.move(hri.getEncodedNameAsBytes(),
1149           Bytes.toBytes(oldServerName.getServerName()));
1150       }
1151 
1152       // Make sure the region is assigned on the dead server
1153       assertTrue(regionStates.isRegionOnline(hri));
1154       assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
1155 
1156       // Disable the table now.
1157       master.disableTable(hri.getTable(), HConstants.NO_NONCE, HConstants.NO_NONCE);
1158 
1159       // Kill the hosting server, which doesn't have meta on it.
1160       cluster.killRegionServer(oldServerName);
1161       cluster.waitForRegionServerToStop(oldServerName, -1);
1162 
1163       ServerManager serverManager = master.getServerManager();
1164       while (!serverManager.isServerDead(oldServerName)
1165           || serverManager.getDeadServers().areDeadServersInProgress()) {
1166         Thread.sleep(100);
1167       }
1168 
1169       // Wait till no more RIT, the region should be offline.
1170       am.waitUntilNoRegionsInTransition(60000);
1171       assertTrue(regionStates.isRegionOffline(hri));
1172     } finally {
1173       MyRegionServer.abortedServer = null;
1174       TEST_UTIL.deleteTable(Bytes.toBytes(table));
1175       cluster.startRegionServer();
1176     }
1177   }
1178 
1179   /**
1180    * Test that region state transition call is idempotent
1181    */
1182   @Test(timeout = 60000)
testReportRegionStateTransition()1183   public void testReportRegionStateTransition() throws Exception {
1184     String table = "testReportRegionStateTransition";
1185     try {
1186       MyRegionServer.simulateRetry = true;
1187       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
1188       desc.addFamily(new HColumnDescriptor(FAMILY));
1189       admin.createTable(desc);
1190       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
1191       HRegionInfo hri =
1192           new HRegionInfo(desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
1193       MetaTableAccessor.addRegionToMeta(meta, hri);
1194       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1195       master.assignRegion(hri);
1196       AssignmentManager am = master.getAssignmentManager();
1197       am.waitForAssignment(hri);
1198       RegionStates regionStates = am.getRegionStates();
1199       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1200       // Assert the the region is actually open on the server
1201       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
1202       // Closing region should just work fine
1203       admin.disableTable(TableName.valueOf(table));
1204       assertTrue(regionStates.isRegionOffline(hri));
1205       List<HRegionInfo> regions = TEST_UTIL.getHBaseAdmin().getOnlineRegions(serverName);
1206       assertTrue(!regions.contains(hri));
1207     } finally {
1208       MyRegionServer.simulateRetry = false;
1209       TEST_UTIL.deleteTable(Bytes.toBytes(table));
1210     }
1211   }
1212 
1213   /**
1214    * Test concurrent updates to meta when meta is not on master
1215    * @throws Exception
1216    */
1217   @Test(timeout = 30000)
testUpdatesRemoteMeta()1218   public void testUpdatesRemoteMeta() throws Exception {
1219     // Not for zk less assignment
1220     if (conf.getBoolean("hbase.assignment.usezk", true)) {
1221       return;
1222     }
1223     conf.setInt("hbase.regionstatestore.meta.connection", 3);
1224     final RegionStateStore rss =
1225         new RegionStateStore(new MyRegionServer(conf, new ZkCoordinatedStateManager()));
1226     rss.start();
1227     // Create 10 threads and make each do 10 puts related to region state update
1228     Thread[] th = new Thread[10];
1229     List<String> nameList = new ArrayList<String>();
1230     List<TableName> tableNameList = new ArrayList<TableName>();
1231     for (int i = 0; i < th.length; i++) {
1232       th[i] = new Thread() {
1233         @Override
1234         public void run() {
1235           HRegionInfo[] hri = new HRegionInfo[10];
1236           ServerName serverName = ServerName.valueOf("dummyhost", 1000, 1234);
1237           for (int i = 0; i < 10; i++) {
1238             hri[i] = new HRegionInfo(TableName.valueOf(Thread.currentThread().getName() + "_" + i));
1239             RegionState newState = new RegionState(hri[i], RegionState.State.OPEN, serverName);
1240             RegionState oldState =
1241                 new RegionState(hri[i], RegionState.State.PENDING_OPEN, serverName);
1242             rss.updateRegionState(1, newState, oldState);
1243           }
1244         }
1245       };
1246       th[i].start();
1247       nameList.add(th[i].getName());
1248     }
1249     for (int i = 0; i < th.length; i++) {
1250       th[i].join();
1251     }
1252     // Add all the expected table names in meta to tableNameList
1253     for (String name : nameList) {
1254       for (int i = 0; i < 10; i++) {
1255         tableNameList.add(TableName.valueOf(name + "_" + i));
1256       }
1257     }
1258     List<Result> metaRows = MetaTableAccessor.fullScanOfMeta(admin.getConnection());
1259     int count = 0;
1260     // Check all 100 rows are in meta
1261     for (Result result : metaRows) {
1262       if (tableNameList.contains(HRegionInfo.getTable(result.getRow()))) {
1263         count++;
1264         if (count == 100) {
1265           break;
1266         }
1267       }
1268     }
1269     assertTrue(count == 100);
1270     rss.stop();
1271   }
1272 
1273   static class MyLoadBalancer extends StochasticLoadBalancer {
1274     // For this region, if specified, always assign to nowhere
1275     static volatile String controledRegion = null;
1276 
1277     static volatile Integer countRegionServers = null;
1278     static AtomicInteger counter = new AtomicInteger(0);
1279 
1280     @Override
randomAssignment(HRegionInfo regionInfo, List<ServerName> servers)1281     public ServerName randomAssignment(HRegionInfo regionInfo,
1282         List<ServerName> servers) {
1283       if (regionInfo.getEncodedName().equals(controledRegion)) {
1284         return null;
1285       }
1286       return super.randomAssignment(regionInfo, servers);
1287     }
1288 
1289     @Override
roundRobinAssignment( List<HRegionInfo> regions, List<ServerName> servers)1290     public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(
1291         List<HRegionInfo> regions, List<ServerName> servers) {
1292       if (countRegionServers != null && services != null) {
1293         int regionServers = services.getServerManager().countOfRegionServers();
1294         if (regionServers < countRegionServers.intValue()) {
1295           // Let's wait till more region servers join in.
1296           // Before that, fail region assignments.
1297           counter.incrementAndGet();
1298           return null;
1299         }
1300       }
1301       return super.roundRobinAssignment(regions, servers);
1302     }
1303   }
1304 
1305   public static class MyMaster extends HMaster {
1306     AtomicBoolean enabled = new AtomicBoolean(true);
1307 
MyMaster(Configuration conf, CoordinatedStateManager cp)1308     public MyMaster(Configuration conf, CoordinatedStateManager cp)
1309       throws IOException, KeeperException,
1310         InterruptedException {
1311       super(conf, cp);
1312     }
1313 
1314     @Override
isServerCrashProcessingEnabled()1315     public boolean isServerCrashProcessingEnabled() {
1316       return enabled.get() && super.isServerCrashProcessingEnabled();
1317     }
1318 
enableSSH(boolean enabled)1319     public void enableSSH(boolean enabled) {
1320       this.enabled.set(enabled);
1321       if (enabled) {
1322         serverManager.processQueuedDeadServers();
1323       }
1324     }
1325   }
1326 
1327   public static class MyRegionServer extends MiniHBaseClusterRegionServer {
1328     static volatile ServerName abortedServer = null;
1329     static volatile boolean simulateRetry = false;
1330 
MyRegionServer(Configuration conf, CoordinatedStateManager cp)1331     public MyRegionServer(Configuration conf, CoordinatedStateManager cp)
1332       throws IOException, KeeperException,
1333         InterruptedException {
1334       super(conf, cp);
1335     }
1336 
1337     @Override
reportRegionStateTransition(TransitionCode code, long openSeqNum, HRegionInfo... hris)1338     public boolean reportRegionStateTransition(TransitionCode code, long openSeqNum,
1339         HRegionInfo... hris) {
1340       if (simulateRetry) {
1341         // Simulate retry by calling the method twice
1342         super.reportRegionStateTransition(code, openSeqNum, hris);
1343         return super.reportRegionStateTransition(code, openSeqNum, hris);
1344       }
1345       return super.reportRegionStateTransition(code, openSeqNum, hris);
1346     }
1347 
1348     @Override
isAborted()1349     public boolean isAborted() {
1350       return getServerName().equals(abortedServer) || super.isAborted();
1351     }
1352   }
1353 
1354   public static class MyRegionObserver extends BaseRegionObserver {
1355     // If enabled, fail all preClose calls
1356     static AtomicBoolean preCloseEnabled = new AtomicBoolean(false);
1357 
1358     // If enabled, stall postClose calls
1359     static AtomicBoolean postCloseEnabled = new AtomicBoolean(false);
1360 
1361     // If enabled, stall postOpen calls
1362     static AtomicBoolean postOpenEnabled = new AtomicBoolean(false);
1363 
1364     // A flag to track if postOpen is called
1365     static volatile boolean postOpenCalled = false;
1366 
1367     @Override
preClose(ObserverContext<RegionCoprocessorEnvironment> c, boolean abortRequested)1368     public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
1369         boolean abortRequested) throws IOException {
1370       if (preCloseEnabled.get()) throw new IOException("fail preClose from coprocessor");
1371     }
1372 
1373     @Override
postClose(ObserverContext<RegionCoprocessorEnvironment> c, boolean abortRequested)1374     public void postClose(ObserverContext<RegionCoprocessorEnvironment> c,
1375         boolean abortRequested) {
1376       stallOnFlag(postCloseEnabled);
1377     }
1378 
1379     @Override
postOpen(ObserverContext<RegionCoprocessorEnvironment> c)1380     public void postOpen(ObserverContext<RegionCoprocessorEnvironment> c) {
1381       postOpenCalled = true;
1382       stallOnFlag(postOpenEnabled);
1383     }
1384 
stallOnFlag(final AtomicBoolean flag)1385     private void stallOnFlag(final AtomicBoolean flag) {
1386       try {
1387         // If enabled, stall
1388         while (flag.get()) {
1389           Thread.sleep(1000);
1390         }
1391       } catch (InterruptedException ie) {
1392         Thread.currentThread().interrupt();
1393       }
1394     }
1395   }
1396 }
1397