1 /**
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *     http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.apache.hadoop.hbase.master;
20 
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertTrue;
25 
26 import java.io.IOException;
27 import java.util.ArrayList;
28 import java.util.Iterator;
29 import java.util.List;
30 import java.util.Set;
31 import java.util.TreeSet;
32 
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.fs.FileSystem;
37 import org.apache.hadoop.fs.Path;
38 import org.apache.hadoop.hbase.Abortable;
39 import org.apache.hadoop.hbase.ClusterStatus;
40 import org.apache.hadoop.hbase.HBaseConfiguration;
41 import org.apache.hadoop.hbase.HBaseTestingUtility;
42 import org.apache.hadoop.hbase.HColumnDescriptor;
43 import org.apache.hadoop.hbase.HConstants;
44 import org.apache.hadoop.hbase.HRegionInfo;
45 import org.apache.hadoop.hbase.HTableDescriptor;
46 import org.apache.hadoop.hbase.testclassification.LargeTests;
47 import org.apache.hadoop.hbase.MetaTableAccessor;
48 import org.apache.hadoop.hbase.MiniHBaseCluster;
49 import org.apache.hadoop.hbase.RegionTransition;
50 import org.apache.hadoop.hbase.ServerName;
51 import org.apache.hadoop.hbase.TableName;
52 import org.apache.hadoop.hbase.TableStateManager;
53 import org.apache.hadoop.hbase.client.RegionLocator;
54 import org.apache.hadoop.hbase.client.Table;
55 import org.apache.hadoop.hbase.executor.EventType;
56 import org.apache.hadoop.hbase.master.RegionState.State;
57 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
58 import org.apache.hadoop.hbase.protobuf.RequestConverter;
59 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
60 import org.apache.hadoop.hbase.regionserver.HRegion;
61 import org.apache.hadoop.hbase.regionserver.HRegionServer;
62 import org.apache.hadoop.hbase.regionserver.Region;
63 import org.apache.hadoop.hbase.regionserver.RegionMergeTransactionImpl;
64 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
65 import org.apache.hadoop.hbase.util.Bytes;
66 import org.apache.hadoop.hbase.util.FSTableDescriptors;
67 import org.apache.hadoop.hbase.util.FSUtils;
68 import org.apache.hadoop.hbase.util.JVMClusterUtil;
69 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
70 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
71 import org.apache.hadoop.hbase.util.Threads;
72 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
73 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
74 import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
75 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
76 import org.apache.zookeeper.data.Stat;
77 import org.junit.Ignore;
78 import org.junit.Test;
79 import org.junit.experimental.categories.Category;
80 
81 @Category(LargeTests.class)
82 public class TestMasterFailover {
83   private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
84 
85   /**
86    * Complex test of master failover that tests as many permutations of the
87    * different possible states that regions in transition could be in within ZK.
88    * <p>
89    * This tests the proper handling of these states by the failed-over master
90    * and includes a thorough testing of the timeout code as well.
91    * <p>
92    * Starts with a single master and three regionservers.
93    * <p>
94    * Creates two tables, enabledTable and disabledTable, each containing 5
95    * regions.  The disabledTable is then disabled.
96    * <p>
97    * After reaching steady-state, the master is killed.  We then mock several
98    * states in ZK.
99    * <p>
100    * After mocking them, we will startup a new master which should become the
101    * active master and also detect that it is a failover.  The primary test
102    * passing condition will be that all regions of the enabled table are
103    * assigned and all the regions of the disabled table are not assigned.
104    * <p>
105    * The different scenarios to be tested are below:
106    * <p>
107    * <b>ZK State:  OFFLINE</b>
108    * <p>A node can get into OFFLINE state if</p>
109    * <ul>
110    * <li>An RS fails to open a region, so it reverts the state back to OFFLINE
111    * <li>The Master is assigning the region to a RS before it sends RPC
112    * </ul>
113    * <p>We will mock the scenarios</p>
114    * <ul>
115    * <li>Master has assigned an enabled region but RS failed so a region is
116    *     not assigned anywhere and is sitting in ZK as OFFLINE</li>
117    * <li>This seems to cover both cases?</li>
118    * </ul>
119    * <p>
120    * <b>ZK State:  CLOSING</b>
121    * <p>A node can get into CLOSING state if</p>
122    * <ul>
123    * <li>An RS has begun to close a region
124    * </ul>
125    * <p>We will mock the scenarios</p>
126    * <ul>
127    * <li>Region of enabled table was being closed but did not complete
128    * <li>Region of disabled table was being closed but did not complete
129    * </ul>
130    * <p>
131    * <b>ZK State:  CLOSED</b>
132    * <p>A node can get into CLOSED state if</p>
133    * <ul>
134    * <li>An RS has completed closing a region but not acknowledged by master yet
135    * </ul>
136    * <p>We will mock the scenarios</p>
137    * <ul>
138    * <li>Region of a table that should be enabled was closed on an RS
139    * <li>Region of a table that should be disabled was closed on an RS
140    * </ul>
141    * <p>
142    * <b>ZK State:  OPENING</b>
143    * <p>A node can get into OPENING state if</p>
144    * <ul>
145    * <li>An RS has begun to open a region
146    * </ul>
147    * <p>We will mock the scenarios</p>
148    * <ul>
149    * <li>RS was opening a region of enabled table but never finishes
150    * </ul>
151    * <p>
152    * <b>ZK State:  OPENED</b>
153    * <p>A node can get into OPENED state if</p>
154    * <ul>
155    * <li>An RS has finished opening a region but not acknowledged by master yet
156    * </ul>
157    * <p>We will mock the scenarios</p>
158    * <ul>
159    * <li>Region of a table that should be enabled was opened on an RS
160    * <li>Region of a table that should be disabled was opened on an RS
161    * </ul>
162    * @throws Exception
163    */
164   @Test (timeout=240000)
testMasterFailoverWithMockedRIT()165   public void testMasterFailoverWithMockedRIT() throws Exception {
166 
167     final int NUM_MASTERS = 1;
168     final int NUM_RS = 3;
169 
170     // Create config to use for this cluster
171     Configuration conf = HBaseConfiguration.create();
172     conf.setBoolean("hbase.assignment.usezk", true);
173 
174     // Start the cluster
175     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
176     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
177     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
178     log("Cluster started");
179 
180     // Create a ZKW to use in the test
181     ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
182 
183     // get all the master threads
184     List<MasterThread> masterThreads = cluster.getMasterThreads();
185     assertEquals(1, masterThreads.size());
186 
187     // only one master thread, let's wait for it to be initialized
188     assertTrue(cluster.waitForActiveAndReadyMaster());
189     HMaster master = masterThreads.get(0).getMaster();
190     assertTrue(master.isActiveMaster());
191     assertTrue(master.isInitialized());
192 
193     // disable load balancing on this master
194     master.balanceSwitch(false);
195 
196     // create two tables in META, each with 10 regions
197     byte [] FAMILY = Bytes.toBytes("family");
198     byte [][] SPLIT_KEYS = new byte [][] {
199         new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
200         Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
201         Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
202         Bytes.toBytes("iii"), Bytes.toBytes("jjj")
203     };
204 
205     byte [] enabledTable = Bytes.toBytes("enabledTable");
206     HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
207     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
208 
209     FileSystem filesystem = FileSystem.get(conf);
210     Path rootdir = FSUtils.getRootDir(conf);
211     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
212     // Write the .tableinfo
213     fstd.createTableDescriptor(htdEnabled);
214 
215     HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(), null, null);
216     createRegion(hriEnabled, rootdir, conf, htdEnabled);
217 
218     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
219         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
220 
221     TableName disabledTable = TableName.valueOf("disabledTable");
222     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
223     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
224     // Write the .tableinfo
225     fstd.createTableDescriptor(htdDisabled);
226     HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
227     createRegion(hriDisabled, rootdir, conf, htdDisabled);
228     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
229         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
230 
231     TableName tableWithMergingRegions = TableName.valueOf("tableWithMergingRegions");
232     TEST_UTIL.createTable(tableWithMergingRegions, FAMILY, new byte [][] {Bytes.toBytes("m")});
233 
234     log("Regions in hbase:meta and namespace have been created");
235 
236     // at this point we only expect 4 regions to be assigned out
237     // (catalogs and namespace, + 2 merging regions)
238     assertEquals(4, cluster.countServedRegions());
239 
240     // Move merging regions to the same region server
241     AssignmentManager am = master.getAssignmentManager();
242     RegionStates regionStates = am.getRegionStates();
243     List<HRegionInfo> mergingRegions = regionStates.getRegionsOfTable(tableWithMergingRegions);
244     assertEquals(2, mergingRegions.size());
245     HRegionInfo a = mergingRegions.get(0);
246     HRegionInfo b = mergingRegions.get(1);
247     HRegionInfo newRegion = RegionMergeTransactionImpl.getMergedRegionInfo(a, b);
248     ServerName mergingServer = regionStates.getRegionServerOfRegion(a);
249     ServerName serverB = regionStates.getRegionServerOfRegion(b);
250     if (!serverB.equals(mergingServer)) {
251       RegionPlan plan = new RegionPlan(b, serverB, mergingServer);
252       am.balance(plan);
253       assertTrue(am.waitForAssignment(b));
254     }
255 
256     // Let's just assign everything to first RS
257     HRegionServer hrs = cluster.getRegionServer(0);
258     ServerName serverName = hrs.getServerName();
259     HRegionInfo closingRegion = enabledRegions.remove(0);
260     // we'll need some regions to already be assigned out properly on live RS
261     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
262     enabledAndAssignedRegions.add(enabledRegions.remove(0));
263     enabledAndAssignedRegions.add(enabledRegions.remove(0));
264     enabledAndAssignedRegions.add(closingRegion);
265 
266     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
267     disabledAndAssignedRegions.add(disabledRegions.remove(0));
268     disabledAndAssignedRegions.add(disabledRegions.remove(0));
269 
270     // now actually assign them
271     for (HRegionInfo hri : enabledAndAssignedRegions) {
272       master.assignmentManager.addPlan(hri.getEncodedName(),
273           new RegionPlan(hri, null, serverName));
274       master.assignRegion(hri);
275     }
276 
277     for (HRegionInfo hri : disabledAndAssignedRegions) {
278       master.assignmentManager.addPlan(hri.getEncodedName(),
279           new RegionPlan(hri, null, serverName));
280       master.assignRegion(hri);
281     }
282 
283     // wait for no more RIT
284     log("Waiting for assignment to finish");
285     ZKAssign.blockUntilNoRIT(zkw);
286     log("Assignment completed");
287 
288     // Stop the master
289     log("Aborting master");
290     cluster.abortMaster(0);
291     cluster.waitOnMaster(0);
292     log("Master has aborted");
293 
294     /*
295      * Now, let's start mocking up some weird states as described in the method
296      * javadoc.
297      */
298 
299     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
300     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
301 
302     log("Beginning to mock scenarios");
303 
304     // Disable the disabledTable in ZK
305     TableStateManager zktable = new ZKTableStateManager(zkw);
306     zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
307 
308     /*
309      *  ZK = OFFLINE
310      */
311 
312     // Region that should be assigned but is not and is in ZK as OFFLINE
313     // Cause: This can happen if the master crashed after creating the znode but before sending the
314     //  request to the region server
315     HRegionInfo region = enabledRegions.remove(0);
316     regionsThatShouldBeOnline.add(region);
317     ZKAssign.createNodeOffline(zkw, region, serverName);
318 
319     /*
320      * ZK = CLOSING
321      */
322     // Cause: Same as offline.
323     regionsThatShouldBeOnline.add(closingRegion);
324     ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
325 
326     /*
327      * ZK = CLOSED
328      */
329 
330     // Region of enabled table closed but not ack
331     //Cause: Master was down while the region server updated the ZK status.
332     region = enabledRegions.remove(0);
333     regionsThatShouldBeOnline.add(region);
334     int version = ZKAssign.createNodeClosing(zkw, region, serverName);
335     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
336 
337     // Region of disabled table closed but not ack
338     region = disabledRegions.remove(0);
339     regionsThatShouldBeOffline.add(region);
340     version = ZKAssign.createNodeClosing(zkw, region, serverName);
341     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
342 
343     /*
344      * ZK = OPENED
345      */
346 
347     // Region of enabled table was opened on RS
348     // Cause: as offline
349     region = enabledRegions.remove(0);
350     regionsThatShouldBeOnline.add(region);
351     ZKAssign.createNodeOffline(zkw, region, serverName);
352     ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
353     while (true) {
354       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
355       RegionTransition rt = RegionTransition.parseFrom(bytes);
356       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
357         break;
358       }
359       Thread.sleep(100);
360     }
361 
362     // Region of disable table was opened on RS
363     // Cause: Master failed while updating the status for this region server.
364     region = disabledRegions.remove(0);
365     regionsThatShouldBeOffline.add(region);
366     ZKAssign.createNodeOffline(zkw, region, serverName);
367     ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
368     while (true) {
369       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
370       RegionTransition rt = RegionTransition.parseFrom(bytes);
371       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
372         break;
373       }
374       Thread.sleep(100);
375     }
376 
377     /*
378      * ZK = MERGING
379      */
380 
381     // Regions of table of merging regions
382     // Cause: Master was down while merging was going on
383     hrs.getCoordinatedStateManager().
384       getRegionMergeCoordination().startRegionMergeTransaction(newRegion, mergingServer, a, b);
385 
386     /*
387      * ZK = NONE
388      */
389 
390     /*
391      * DONE MOCKING
392      */
393 
394     log("Done mocking data up in ZK");
395 
396     // Start up a new master
397     log("Starting up a new master");
398     master = cluster.startMaster().getMaster();
399     log("Waiting for master to be ready");
400     cluster.waitForActiveAndReadyMaster();
401     log("Master is ready");
402 
403     // Get new region states since master restarted
404     regionStates = master.getAssignmentManager().getRegionStates();
405     // Merging region should remain merging
406     assertTrue(regionStates.isRegionInState(a, State.MERGING));
407     assertTrue(regionStates.isRegionInState(b, State.MERGING));
408     assertTrue(regionStates.isRegionInState(newRegion, State.MERGING_NEW));
409     // Now remove the faked merging znode, merging regions should be
410     // offlined automatically, otherwise it is a bug in AM.
411     ZKAssign.deleteNodeFailSilent(zkw, newRegion);
412 
413     // Failover should be completed, now wait for no RIT
414     log("Waiting for no more RIT");
415     ZKAssign.blockUntilNoRIT(zkw);
416     log("No more RIT in ZK, now doing final test verification");
417 
418     // Grab all the regions that are online across RSs
419     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
420     for (JVMClusterUtil.RegionServerThread rst :
421       cluster.getRegionServerThreads()) {
422       onlineRegions.addAll(ProtobufUtil.getOnlineRegions(
423         rst.getRegionServer().getRSRpcServices()));
424     }
425 
426     // Now, everything that should be online should be online
427     for (HRegionInfo hri : regionsThatShouldBeOnline) {
428       assertTrue(onlineRegions.contains(hri));
429     }
430 
431     // Everything that should be offline should not be online
432     for (HRegionInfo hri : regionsThatShouldBeOffline) {
433       if (onlineRegions.contains(hri)) {
434        LOG.debug(hri);
435       }
436       assertFalse(onlineRegions.contains(hri));
437     }
438 
439     log("Done with verification, all passed, shutting down cluster");
440 
441     // Done, shutdown the cluster
442     TEST_UTIL.shutdownMiniCluster();
443   }
444 
445   /**
446    * Complex test of master failover that tests as many permutations of the
447    * different possible states that regions in transition could be in within ZK
448    * pointing to an RS that has died while no master is around to process it.
449    * <p>
450    * This tests the proper handling of these states by the failed-over master
451    * and includes a thorough testing of the timeout code as well.
452    * <p>
453    * Starts with a single master and two regionservers.
454    * <p>
455    * Creates two tables, enabledTable and disabledTable, each containing 5
456    * regions.  The disabledTable is then disabled.
457    * <p>
458    * After reaching steady-state, the master is killed.  We then mock several
459    * states in ZK.  And one of the RS will be killed.
460    * <p>
461    * After mocking them and killing an RS, we will startup a new master which
462    * should become the active master and also detect that it is a failover.  The
463    * primary test passing condition will be that all regions of the enabled
464    * table are assigned and all the regions of the disabled table are not
465    * assigned.
466    * <p>
467    * The different scenarios to be tested are below:
468    * <p>
469    * <b>ZK State:  CLOSING</b>
470    * <p>A node can get into CLOSING state if</p>
471    * <ul>
472    * <li>An RS has begun to close a region
473    * </ul>
474    * <p>We will mock the scenarios</p>
475    * <ul>
476    * <li>Region was being closed but the RS died before finishing the close
477    * </ul>
478    * <b>ZK State:  OPENED</b>
479    * <p>A node can get into OPENED state if</p>
480    * <ul>
481    * <li>An RS has finished opening a region but not acknowledged by master yet
482    * </ul>
483    * <p>We will mock the scenarios</p>
484    * <ul>
485    * <li>Region of a table that should be enabled was opened by a now-dead RS
486    * <li>Region of a table that should be disabled was opened by a now-dead RS
487    * </ul>
488    * <p>
489    * <b>ZK State:  NONE</b>
490    * <p>A region could not have a transition node if</p>
491    * <ul>
492    * <li>The server hosting the region died and no master processed it
493    * </ul>
494    * <p>We will mock the scenarios</p>
495    * <ul>
496    * <li>Region of enabled table was on a dead RS that was not yet processed
497    * <li>Region of disabled table was on a dead RS that was not yet processed
498    * </ul>
499    * @throws Exception
500    */
501   @Test (timeout=180000)
testMasterFailoverWithMockedRITOnDeadRS()502   public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
503 
504     final int NUM_MASTERS = 1;
505     final int NUM_RS = 2;
506 
507     // Create and start the cluster
508     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
509     Configuration conf = TEST_UTIL.getConfiguration();
510     conf.setBoolean("hbase.assignment.usezk", true);
511 
512     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
513     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2);
514     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
515     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
516     log("Cluster started");
517 
518     // Create a ZKW to use in the test
519     ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
520         "unittest", new Abortable() {
521 
522           @Override
523           public void abort(String why, Throwable e) {
524             LOG.error("Fatal ZK Error: " + why, e);
525             org.junit.Assert.assertFalse("Fatal ZK error", true);
526           }
527 
528           @Override
529           public boolean isAborted() {
530             return false;
531           }
532 
533     });
534 
535     // get all the master threads
536     List<MasterThread> masterThreads = cluster.getMasterThreads();
537     assertEquals(1, masterThreads.size());
538 
539     // only one master thread, let's wait for it to be initialized
540     assertTrue(cluster.waitForActiveAndReadyMaster());
541     HMaster master = masterThreads.get(0).getMaster();
542     assertTrue(master.isActiveMaster());
543     assertTrue(master.isInitialized());
544 
545     // disable load balancing on this master
546     master.balanceSwitch(false);
547 
548     // create two tables in META, each with 30 regions
549     byte [] FAMILY = Bytes.toBytes("family");
550     byte[][] SPLIT_KEYS =
551         TEST_UTIL.getRegionSplitStartKeys(Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 30);
552 
553     byte [] enabledTable = Bytes.toBytes("enabledTable");
554     HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
555     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
556     FileSystem filesystem = FileSystem.get(conf);
557     Path rootdir = FSUtils.getRootDir(conf);
558     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
559     // Write the .tableinfo
560     fstd.createTableDescriptor(htdEnabled);
561     HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(),
562         null, null);
563     createRegion(hriEnabled, rootdir, conf, htdEnabled);
564 
565     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
566         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
567 
568     TableName disabledTable =
569         TableName.valueOf("disabledTable");
570     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
571     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
572     // Write the .tableinfo
573     fstd.createTableDescriptor(htdDisabled);
574     HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
575     createRegion(hriDisabled, rootdir, conf, htdDisabled);
576 
577     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
578         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
579 
580     log("Regions in hbase:meta and Namespace have been created");
581 
582     // at this point we only expect 2 regions to be assigned out (catalogs and namespace  )
583     assertEquals(2, cluster.countServedRegions());
584 
585     // The first RS will stay online
586     List<RegionServerThread> regionservers =
587       cluster.getRegionServerThreads();
588     HRegionServer hrs = regionservers.get(0).getRegionServer();
589 
590     // The second RS is going to be hard-killed
591     RegionServerThread hrsDeadThread = regionservers.get(1);
592     HRegionServer hrsDead = hrsDeadThread.getRegionServer();
593     ServerName deadServerName = hrsDead.getServerName();
594 
595     // we'll need some regions to already be assigned out properly on live RS
596     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
597     enabledAndAssignedRegions.addAll(enabledRegions.subList(0, 6));
598     enabledRegions.removeAll(enabledAndAssignedRegions);
599     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
600     disabledAndAssignedRegions.addAll(disabledRegions.subList(0, 6));
601     disabledRegions.removeAll(disabledAndAssignedRegions);
602 
603     // now actually assign them
604     for (HRegionInfo hri : enabledAndAssignedRegions) {
605       master.assignmentManager.addPlan(hri.getEncodedName(),
606           new RegionPlan(hri, null, hrs.getServerName()));
607       master.assignRegion(hri);
608     }
609     for (HRegionInfo hri : disabledAndAssignedRegions) {
610       master.assignmentManager.addPlan(hri.getEncodedName(),
611           new RegionPlan(hri, null, hrs.getServerName()));
612       master.assignRegion(hri);
613     }
614 
615     log("Waiting for assignment to finish");
616     ZKAssign.blockUntilNoRIT(zkw);
617     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
618     log("Assignment completed");
619 
620     assertTrue(" Table must be enabled.", master.getAssignmentManager()
621         .getTableStateManager().isTableState(TableName.valueOf("enabledTable"),
622         ZooKeeperProtos.Table.State.ENABLED));
623     // we also need regions assigned out on the dead server
624     List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
625     enabledAndOnDeadRegions.addAll(enabledRegions.subList(0, 6));
626     enabledRegions.removeAll(enabledAndOnDeadRegions);
627     List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
628     disabledAndOnDeadRegions.addAll(disabledRegions.subList(0, 6));
629     disabledRegions.removeAll(disabledAndOnDeadRegions);
630 
631     // set region plan to server to be killed and trigger assign
632     for (HRegionInfo hri : enabledAndOnDeadRegions) {
633       master.assignmentManager.addPlan(hri.getEncodedName(),
634           new RegionPlan(hri, null, deadServerName));
635       master.assignRegion(hri);
636     }
637     for (HRegionInfo hri : disabledAndOnDeadRegions) {
638       master.assignmentManager.addPlan(hri.getEncodedName(),
639           new RegionPlan(hri, null, deadServerName));
640       master.assignRegion(hri);
641     }
642 
643     // wait for no more RIT
644     log("Waiting for assignment to finish");
645     ZKAssign.blockUntilNoRIT(zkw);
646     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
647     log("Assignment completed");
648 
649     // Due to master.assignRegion(hri) could fail to assign a region to a specified RS
650     // therefore, we need make sure that regions are in the expected RS
651     verifyRegionLocation(hrs, enabledAndAssignedRegions);
652     verifyRegionLocation(hrs, disabledAndAssignedRegions);
653     verifyRegionLocation(hrsDead, enabledAndOnDeadRegions);
654     verifyRegionLocation(hrsDead, disabledAndOnDeadRegions);
655 
656     assertTrue(" Didn't get enough regions of enabledTalbe on live rs.",
657       enabledAndAssignedRegions.size() >= 2);
658     assertTrue(" Didn't get enough regions of disalbedTable on live rs.",
659       disabledAndAssignedRegions.size() >= 2);
660     assertTrue(" Didn't get enough regions of enabledTalbe on dead rs.",
661       enabledAndOnDeadRegions.size() >= 2);
662     assertTrue(" Didn't get enough regions of disalbedTable on dead rs.",
663       disabledAndOnDeadRegions.size() >= 2);
664 
665     // Stop the master
666     log("Aborting master");
667     cluster.abortMaster(0);
668     cluster.waitOnMaster(0);
669     log("Master has aborted");
670 
671     /*
672      * Now, let's start mocking up some weird states as described in the method
673      * javadoc.
674      */
675 
676     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
677     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
678 
679     log("Beginning to mock scenarios");
680 
681     // Disable the disabledTable in ZK
682     TableStateManager zktable = new ZKTableStateManager(zkw);
683     zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
684 
685     assertTrue(" The enabled table should be identified on master fail over.",
686         zktable.isTableState(TableName.valueOf("enabledTable"),
687           ZooKeeperProtos.Table.State.ENABLED));
688 
689     /*
690      * ZK = CLOSING
691      */
692 
693     // Region of enabled table being closed on dead RS but not finished
694     HRegionInfo region = enabledAndOnDeadRegions.remove(0);
695     regionsThatShouldBeOnline.add(region);
696     ZKAssign.createNodeClosing(zkw, region, deadServerName);
697     LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
698         region + "\n\n");
699 
700     // Region of disabled table being closed on dead RS but not finished
701     region = disabledAndOnDeadRegions.remove(0);
702     regionsThatShouldBeOffline.add(region);
703     ZKAssign.createNodeClosing(zkw, region, deadServerName);
704     LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
705         region + "\n\n");
706 
707     /*
708      * ZK = CLOSED
709      */
710 
711     // Region of enabled on dead server gets closed but not ack'd by master
712     region = enabledAndOnDeadRegions.remove(0);
713     regionsThatShouldBeOnline.add(region);
714     int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
715     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
716     LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
717         region + "\n\n");
718 
719     // Region of disabled on dead server gets closed but not ack'd by master
720     region = disabledAndOnDeadRegions.remove(0);
721     regionsThatShouldBeOffline.add(region);
722     version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
723     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
724     LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
725         region + "\n\n");
726 
727     /*
728      * ZK = OPENING
729      */
730 
731     // RS was opening a region of enabled table then died
732     region = enabledRegions.remove(0);
733     regionsThatShouldBeOnline.add(region);
734     ZKAssign.createNodeOffline(zkw, region, deadServerName);
735     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
736     LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
737         region + "\n\n");
738 
739     // RS was opening a region of disabled table then died
740     region = disabledRegions.remove(0);
741     regionsThatShouldBeOffline.add(region);
742     ZKAssign.createNodeOffline(zkw, region, deadServerName);
743     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
744     LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
745         region + "\n\n");
746 
747     /*
748      * ZK = OPENED
749      */
750 
751     // Region of enabled table was opened on dead RS
752     region = enabledRegions.remove(0);
753     regionsThatShouldBeOnline.add(region);
754     ZKAssign.createNodeOffline(zkw, region, deadServerName);
755     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
756       hrsDead.getServerName(), region);
757     while (true) {
758       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
759       RegionTransition rt = RegionTransition.parseFrom(bytes);
760       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
761         break;
762       }
763       Thread.sleep(100);
764     }
765     LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" + region + "\n\n");
766 
767     // Region of disabled table was opened on dead RS
768     region = disabledRegions.remove(0);
769     regionsThatShouldBeOffline.add(region);
770     ZKAssign.createNodeOffline(zkw, region, deadServerName);
771     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
772       hrsDead.getServerName(), region);
773     while (true) {
774       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
775       RegionTransition rt = RegionTransition.parseFrom(bytes);
776       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
777         break;
778       }
779       Thread.sleep(100);
780     }
781     LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" + region + "\n\n");
782 
783     /*
784      * ZK = NONE
785      */
786 
787     // Region of enabled table was open at steady-state on dead RS
788     region = enabledRegions.remove(0);
789     regionsThatShouldBeOnline.add(region);
790     ZKAssign.createNodeOffline(zkw, region, deadServerName);
791     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
792       hrsDead.getServerName(), region);
793     while (true) {
794       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
795       RegionTransition rt = RegionTransition.parseFrom(bytes);
796       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
797         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
798         LOG.debug("DELETED " + rt);
799         break;
800       }
801       Thread.sleep(100);
802     }
803     LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
804         + "\n" + region + "\n\n");
805 
806     // Region of disabled table was open at steady-state on dead RS
807     region = disabledRegions.remove(0);
808     regionsThatShouldBeOffline.add(region);
809     ZKAssign.createNodeOffline(zkw, region, deadServerName);
810     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
811       hrsDead.getServerName(), region);
812     while (true) {
813       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
814       RegionTransition rt = RegionTransition.parseFrom(bytes);
815       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
816         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
817         break;
818       }
819       Thread.sleep(100);
820     }
821     LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
822       + "\n" + region + "\n\n");
823 
824     /*
825      * DONE MOCKING
826      */
827 
828     log("Done mocking data up in ZK");
829 
830     // Kill the RS that had a hard death
831     log("Killing RS " + deadServerName);
832     hrsDead.abort("Killing for unit test");
833     log("RS " + deadServerName + " killed");
834 
835     // Start up a new master.  Wait until regionserver is completely down
836     // before starting new master because of hbase-4511.
837     while (hrsDeadThread.isAlive()) {
838       Threads.sleep(10);
839     }
840     log("Starting up a new master");
841     master = cluster.startMaster().getMaster();
842     log("Waiting for master to be ready");
843     assertTrue(cluster.waitForActiveAndReadyMaster());
844     log("Master is ready");
845 
846     // Wait until SSH processing completed for dead server.
847     while (master.getServerManager().areDeadServersInProgress()) {
848       Thread.sleep(10);
849     }
850 
851     // Failover should be completed, now wait for no RIT
852     log("Waiting for no more RIT");
853     ZKAssign.blockUntilNoRIT(zkw);
854     log("No more RIT in ZK");
855     long now = System.currentTimeMillis();
856     long maxTime = 120000;
857     boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
858     if (!done) {
859       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
860       LOG.info("rit=" + regionStates.getRegionsInTransition());
861     }
862     long elapsed = System.currentTimeMillis() - now;
863     assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
864       elapsed < maxTime);
865     log("No more RIT in RIT map, doing final test verification");
866 
867     // Grab all the regions that are online across RSs
868     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
869     now = System.currentTimeMillis();
870     maxTime = 30000;
871     for (JVMClusterUtil.RegionServerThread rst :
872         cluster.getRegionServerThreads()) {
873       try {
874         HRegionServer rs = rst.getRegionServer();
875         while (!rs.getRegionsInTransitionInRS().isEmpty()) {
876           elapsed = System.currentTimeMillis() - now;
877           assertTrue("Test timed out in getting online regions", elapsed < maxTime);
878           if (rs.isAborted() || rs.isStopped()) {
879             // This region server is stopped, skip it.
880             break;
881           }
882           Thread.sleep(100);
883         }
884         onlineRegions.addAll(ProtobufUtil.getOnlineRegions(rs.getRSRpcServices()));
885       } catch (RegionServerStoppedException e) {
886         LOG.info("Got RegionServerStoppedException", e);
887       }
888     }
889 
890     // Now, everything that should be online should be online
891     for (HRegionInfo hri : regionsThatShouldBeOnline) {
892       assertTrue("region=" + hri.getRegionNameAsString() + ", " + onlineRegions.toString(),
893         onlineRegions.contains(hri));
894     }
895 
896     // Everything that should be offline should not be online
897     for (HRegionInfo hri : regionsThatShouldBeOffline) {
898       assertFalse(onlineRegions.contains(hri));
899     }
900 
901     log("Done with verification, all passed, shutting down cluster");
902 
903     // Done, shutdown the cluster
904     TEST_UTIL.shutdownMiniCluster();
905   }
906 
907   /**
908    * Verify regions are on the expected region server
909    */
verifyRegionLocation(HRegionServer hrs, List<HRegionInfo> regions)910   private void verifyRegionLocation(HRegionServer hrs, List<HRegionInfo> regions)
911       throws IOException {
912     List<HRegionInfo> tmpOnlineRegions =
913       ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
914     Iterator<HRegionInfo> itr = regions.iterator();
915     while (itr.hasNext()) {
916       HRegionInfo tmp = itr.next();
917       if (!tmpOnlineRegions.contains(tmp)) {
918         itr.remove();
919       }
920     }
921   }
922 
createRegion(final HRegionInfo hri, final Path rootdir, final Configuration c, final HTableDescriptor htd)923   HRegion createRegion(final HRegionInfo  hri, final Path rootdir, final Configuration c,
924       final HTableDescriptor htd)
925   throws IOException {
926     HRegion r = HRegion.createHRegion(hri, rootdir, c, htd);
927     // The above call to create a region will create an wal file.  Each
928     // log file create will also create a running thread to do syncing.  We need
929     // to close out this log else we will have a running thread trying to sync
930     // the file system continuously which is ugly when dfs is taken away at the
931     // end of the test.
932     HRegion.closeHRegion(r);
933     return r;
934   }
935 
936   // TODO: Next test to add is with testing permutations of the RIT or the RS
937   //       killed are hosting ROOT and hbase:meta regions.
938 
log(String string)939   private void log(String string) {
940     LOG.info("\n\n" + string + " \n\n");
941   }
942 
943   @Test (timeout=180000)
testShouldCheckMasterFailOverWhenMETAIsInOpenedState()944   public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
945       throws Exception {
946     LOG.info("Starting testShouldCheckMasterFailOverWhenMETAIsInOpenedState");
947     final int NUM_MASTERS = 1;
948     final int NUM_RS = 2;
949 
950     // Start the cluster
951     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
952     Configuration conf = TEST_UTIL.getConfiguration();
953     conf.setInt("hbase.master.info.port", -1);
954     conf.setBoolean("hbase.assignment.usezk", true);
955 
956     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
957     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
958 
959     // Find regionserver carrying meta.
960     List<RegionServerThread> regionServerThreads =
961       cluster.getRegionServerThreads();
962     Region metaRegion = null;
963     HRegionServer metaRegionServer = null;
964     for (RegionServerThread regionServerThread : regionServerThreads) {
965       HRegionServer regionServer = regionServerThread.getRegionServer();
966       metaRegion = regionServer.getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
967       regionServer.abort("");
968       if (null != metaRegion) {
969         metaRegionServer = regionServer;
970         break;
971       }
972     }
973 
974     TEST_UTIL.shutdownMiniHBaseCluster();
975 
976     // Create a ZKW to use in the test
977     ZooKeeperWatcher zkw =
978       HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL,
979           metaRegion, metaRegionServer.getServerName());
980 
981     LOG.info("Staring cluster for second time");
982     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
983 
984     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
985     while (!master.isInitialized()) {
986       Thread.sleep(100);
987     }
988     // Failover should be completed, now wait for no RIT
989     log("Waiting for no more RIT");
990     ZKAssign.blockUntilNoRIT(zkw);
991 
992     zkw.close();
993     // Stop the cluster
994     TEST_UTIL.shutdownMiniCluster();
995   }
996 
997   /**
998    * This tests a RIT in offline state will get re-assigned after a master restart
999    */
1000   @Test(timeout=240000)
testOfflineRegionReAssginedAfterMasterRestart()1001   public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
1002     final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
1003     final int NUM_MASTERS = 1;
1004     final int NUM_RS = 2;
1005 
1006     // Create config to use for this cluster
1007     Configuration conf = HBaseConfiguration.create();
1008     conf.setBoolean("hbase.assignment.usezk", true);
1009 
1010     // Start the cluster
1011     final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1012     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1013     log("Cluster started");
1014 
1015     TEST_UTIL.createTable(table, Bytes.toBytes("family"));
1016     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1017     RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1018     HRegionInfo hri = regionStates.getRegionsOfTable(table).get(0);
1019     ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1020     TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
1021 
1022     ServerName dstName = null;
1023     for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
1024       if (!tmpServer.equals(serverName)) {
1025         dstName = tmpServer;
1026         break;
1027       }
1028     }
1029     // find a different server
1030     assertTrue(dstName != null);
1031     // shutdown HBase cluster
1032     TEST_UTIL.shutdownMiniHBaseCluster();
1033     // create a RIT node in offline state
1034     ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
1035     ZKAssign.createNodeOffline(zkw, hri, dstName);
1036     Stat stat = new Stat();
1037     byte[] data =
1038         ZKAssign.getDataNoWatch(zkw, hri.getEncodedName(), stat);
1039     assertTrue(data != null);
1040     RegionTransition rt = RegionTransition.parseFrom(data);
1041     assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
1042 
1043     LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
1044         + " and dst server=" + dstName);
1045 
1046     // start HBase cluster
1047     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
1048 
1049     while (true) {
1050       master = TEST_UTIL.getHBaseCluster().getMaster();
1051       if (master != null && master.isInitialized()) {
1052         ServerManager serverManager = master.getServerManager();
1053         if (!serverManager.areDeadServersInProgress()) {
1054           break;
1055         }
1056       }
1057       Thread.sleep(200);
1058     }
1059 
1060     // verify the region is assigned
1061     master = TEST_UTIL.getHBaseCluster().getMaster();
1062     master.getAssignmentManager().waitForAssignment(hri);
1063     regionStates = master.getAssignmentManager().getRegionStates();
1064     RegionState newState = regionStates.getRegionState(hri);
1065     assertTrue(newState.isOpened());
1066   }
1067 
1068  /**
1069    * Simple test of master failover.
1070    * <p>
1071    * Starts with three masters.  Kills a backup master.  Then kills the active
1072    * master.  Ensures the final master becomes active and we can still contact
1073    * the cluster.
1074    * @throws Exception
1075    */
1076   @Test (timeout=240000)
testSimpleMasterFailover()1077   public void testSimpleMasterFailover() throws Exception {
1078 
1079     final int NUM_MASTERS = 3;
1080     final int NUM_RS = 3;
1081 
1082     // Start the cluster
1083     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
1084 
1085     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1086     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1087 
1088     // get all the master threads
1089     List<MasterThread> masterThreads = cluster.getMasterThreads();
1090 
1091     // wait for each to come online
1092     for (MasterThread mt : masterThreads) {
1093       assertTrue(mt.isAlive());
1094     }
1095 
1096     // verify only one is the active master and we have right number
1097     int numActive = 0;
1098     int activeIndex = -1;
1099     ServerName activeName = null;
1100     HMaster active = null;
1101     for (int i = 0; i < masterThreads.size(); i++) {
1102       if (masterThreads.get(i).getMaster().isActiveMaster()) {
1103         numActive++;
1104         activeIndex = i;
1105         active = masterThreads.get(activeIndex).getMaster();
1106         activeName = active.getServerName();
1107       }
1108     }
1109     assertEquals(1, numActive);
1110     assertEquals(NUM_MASTERS, masterThreads.size());
1111     LOG.info("Active master " + activeName);
1112 
1113     // Check that ClusterStatus reports the correct active and backup masters
1114     assertNotNull(active);
1115     ClusterStatus status = active.getClusterStatus();
1116     assertTrue(status.getMaster().equals(activeName));
1117     assertEquals(2, status.getBackupMastersSize());
1118     assertEquals(2, status.getBackupMasters().size());
1119 
1120     // attempt to stop one of the inactive masters
1121     int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
1122     HMaster master = cluster.getMaster(backupIndex);
1123     LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
1124     cluster.stopMaster(backupIndex, false);
1125     cluster.waitOnMaster(backupIndex);
1126 
1127     // Verify still one active master and it's the same
1128     for (int i = 0; i < masterThreads.size(); i++) {
1129       if (masterThreads.get(i).getMaster().isActiveMaster()) {
1130         assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
1131         activeIndex = i;
1132         active = masterThreads.get(activeIndex).getMaster();
1133       }
1134     }
1135     assertEquals(1, numActive);
1136     assertEquals(2, masterThreads.size());
1137     int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
1138     LOG.info("Active master " + active.getServerName() + " managing " + rsCount +  " regions servers");
1139     assertEquals(3, rsCount);
1140 
1141     // Check that ClusterStatus reports the correct active and backup masters
1142     assertNotNull(active);
1143     status = active.getClusterStatus();
1144     assertTrue(status.getMaster().equals(activeName));
1145     assertEquals(1, status.getBackupMastersSize());
1146     assertEquals(1, status.getBackupMasters().size());
1147 
1148     // kill the active master
1149     LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
1150     cluster.stopMaster(activeIndex, false);
1151     cluster.waitOnMaster(activeIndex);
1152 
1153     // wait for an active master to show up and be ready
1154     assertTrue(cluster.waitForActiveAndReadyMaster());
1155 
1156     LOG.debug("\n\nVerifying backup master is now active\n");
1157     // should only have one master now
1158     assertEquals(1, masterThreads.size());
1159 
1160     // and he should be active
1161     active = masterThreads.get(0).getMaster();
1162     assertNotNull(active);
1163     status = active.getClusterStatus();
1164     ServerName mastername = status.getMaster();
1165     assertTrue(mastername.equals(active.getServerName()));
1166     assertTrue(active.isActiveMaster());
1167     assertEquals(0, status.getBackupMastersSize());
1168     assertEquals(0, status.getBackupMasters().size());
1169     int rss = status.getServersSize();
1170     LOG.info("Active master " + mastername.getServerName() + " managing " +
1171       rss +  " region servers");
1172     assertEquals(3, rss);
1173 
1174     // Stop the cluster
1175     TEST_UTIL.shutdownMiniCluster();
1176   }
1177 
1178   /**
1179    * Test region in pending_open/close and failed_open/close when master failover
1180    */
1181   @Test (timeout=180000)
1182   @SuppressWarnings("deprecation")
testPendingOpenOrCloseWhenMasterFailover()1183   public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
1184     final int NUM_MASTERS = 1;
1185     final int NUM_RS = 1;
1186 
1187     // Create config to use for this cluster
1188     Configuration conf = HBaseConfiguration.create();
1189     conf.setBoolean("hbase.assignment.usezk", false);
1190 
1191     // Start the cluster
1192     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1193     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1194     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1195     log("Cluster started");
1196 
1197     // get all the master threads
1198     List<MasterThread> masterThreads = cluster.getMasterThreads();
1199     assertEquals(1, masterThreads.size());
1200 
1201     // only one master thread, let's wait for it to be initialized
1202     assertTrue(cluster.waitForActiveAndReadyMaster());
1203     HMaster master = masterThreads.get(0).getMaster();
1204     assertTrue(master.isActiveMaster());
1205     assertTrue(master.isInitialized());
1206 
1207     // Create a table with a region online
1208     Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
1209     onlineTable.close();
1210     // Create a table in META, so it has a region offline
1211     HTableDescriptor offlineTable = new HTableDescriptor(
1212       TableName.valueOf(Bytes.toBytes("offlineTable")));
1213     offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
1214 
1215     FileSystem filesystem = FileSystem.get(conf);
1216     Path rootdir = FSUtils.getRootDir(conf);
1217     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
1218     fstd.createTableDescriptor(offlineTable);
1219 
1220     HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
1221     createRegion(hriOffline, rootdir, conf, offlineTable);
1222     MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);
1223 
1224     log("Regions in hbase:meta and namespace have been created");
1225 
1226     // at this point we only expect 3 regions to be assigned out
1227     // (catalogs and namespace, + 1 online region)
1228     assertEquals(3, cluster.countServedRegions());
1229     HRegionInfo hriOnline = null;
1230     try (RegionLocator locator =
1231         TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
1232       hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
1233     }
1234     RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1235     RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
1236 
1237     // Put the online region in pending_close. It is actually already opened.
1238     // This is to simulate that the region close RPC is not sent out before failover
1239     RegionState oldState = regionStates.getRegionState(hriOnline);
1240     RegionState newState = new RegionState(
1241       hriOnline, State.PENDING_CLOSE, oldState.getServerName());
1242     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1243 
1244     // Put the offline region in pending_open. It is actually not opened yet.
1245     // This is to simulate that the region open RPC is not sent out before failover
1246     oldState = new RegionState(hriOffline, State.OFFLINE);
1247     newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
1248     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1249 
1250     HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
1251     createRegion(failedClose, rootdir, conf, offlineTable);
1252     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
1253 
1254     oldState = new RegionState(failedClose, State.PENDING_CLOSE);
1255     newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
1256     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1257 
1258 
1259     HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
1260     createRegion(failedOpen, rootdir, conf, offlineTable);
1261     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
1262 
1263     // Simulate a region transitioning to failed open when the region server reports the
1264     // transition as FAILED_OPEN
1265     oldState = new RegionState(failedOpen, State.PENDING_OPEN);
1266     newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
1267     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1268 
1269     HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
1270     createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
1271     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
1272 
1273     // Simulate a region transitioning to failed open when the master couldn't find a plan for
1274     // the region
1275     oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
1276     newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
1277     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1278 
1279 
1280 
1281     // Stop the master
1282     log("Aborting master");
1283     cluster.abortMaster(0);
1284     cluster.waitOnMaster(0);
1285     log("Master has aborted");
1286 
1287     // Start up a new master
1288     log("Starting up a new master");
1289     master = cluster.startMaster().getMaster();
1290     log("Waiting for master to be ready");
1291     cluster.waitForActiveAndReadyMaster();
1292     log("Master is ready");
1293 
1294     // Wait till no region in transition any more
1295     master.getAssignmentManager().waitUntilNoRegionsInTransition(60000);
1296 
1297     // Get new region states since master restarted
1298     regionStates = master.getAssignmentManager().getRegionStates();
1299 
1300     // Both pending_open (RPC sent/not yet) regions should be online
1301     assertTrue(regionStates.isRegionOnline(hriOffline));
1302     assertTrue(regionStates.isRegionOnline(hriOnline));
1303     assertTrue(regionStates.isRegionOnline(failedClose));
1304     assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
1305     assertTrue(regionStates.isRegionOnline(failedOpen));
1306 
1307     log("Done with verification, shutting down cluster");
1308 
1309     // Done, shutdown the cluster
1310     TEST_UTIL.shutdownMiniCluster();
1311   }
1312 
1313   /**
1314    * Test meta in transition when master failover
1315    */
1316   @Test(timeout = 180000)
testMetaInTransitionWhenMasterFailover()1317   public void testMetaInTransitionWhenMasterFailover() throws Exception {
1318     final int NUM_MASTERS = 1;
1319     final int NUM_RS = 1;
1320 
1321     // Start the cluster
1322     Configuration conf = HBaseConfiguration.create();
1323     conf.setBoolean("hbase.assignment.usezk", false);
1324     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1325     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1326     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1327     log("Cluster started");
1328 
1329     log("Moving meta off the master");
1330     HMaster activeMaster = cluster.getMaster();
1331     HRegionServer rs = cluster.getRegionServer(0);
1332     ServerName metaServerName = cluster.getLiveRegionServerThreads()
1333       .get(0).getRegionServer().getServerName();
1334     activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
1335       Bytes.toBytes(metaServerName.getServerName()));
1336     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1337     assertEquals("Meta should be assigned on expected regionserver",
1338       metaServerName, activeMaster.getMetaTableLocator()
1339         .getMetaRegionLocation(activeMaster.getZooKeeper()));
1340 
1341     // Now kill master, meta should remain on rs, where we placed it before.
1342     log("Aborting master");
1343     activeMaster.abort("test-kill");
1344     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1345     log("Master has aborted");
1346 
1347     // meta should remain where it was
1348     RegionState metaState =
1349       MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
1350     assertEquals("hbase:meta should be onlined on RS",
1351       metaState.getServerName(), rs.getServerName());
1352     assertEquals("hbase:meta should be onlined on RS",
1353       metaState.getState(), State.OPEN);
1354 
1355     // Start up a new master
1356     log("Starting up a new master");
1357     activeMaster = cluster.startMaster().getMaster();
1358     log("Waiting for master to be ready");
1359     cluster.waitForActiveAndReadyMaster();
1360     log("Master is ready");
1361 
1362     // ensure meta is still deployed on RS
1363     metaState =
1364       MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1365     assertEquals("hbase:meta should be onlined on RS",
1366       metaState.getServerName(), rs.getServerName());
1367     assertEquals("hbase:meta should be onlined on RS",
1368       metaState.getState(), State.OPEN);
1369 
1370     // Update meta state as PENDING_OPEN, then kill master
1371     // that simulates, that RS successfully deployed, but
1372     // RPC was lost right before failure.
1373     // region server should expire (how it can be verified?)
1374     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1375       rs.getServerName(), State.PENDING_OPEN);
1376     Region meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1377     rs.removeFromOnlineRegions(meta, null);
1378     ((HRegion)meta).close();
1379 
1380     log("Aborting master");
1381     activeMaster.abort("test-kill");
1382     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1383     log("Master has aborted");
1384 
1385     // Start up a new master
1386     log("Starting up a new master");
1387     activeMaster = cluster.startMaster().getMaster();
1388     log("Waiting for master to be ready");
1389     cluster.waitForActiveAndReadyMaster();
1390     log("Master is ready");
1391 
1392     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1393     log("Meta was assigned");
1394 
1395     metaState =
1396       MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1397     assertEquals("hbase:meta should be onlined on RS",
1398       metaState.getServerName(), rs.getServerName());
1399     assertEquals("hbase:meta should be onlined on RS",
1400       metaState.getState(), State.OPEN);
1401 
1402     // Update meta state as PENDING_CLOSE, then kill master
1403     // that simulates, that RS successfully deployed, but
1404     // RPC was lost right before failure.
1405     // region server should expire (how it can be verified?)
1406     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1407       rs.getServerName(), State.PENDING_CLOSE);
1408 
1409     log("Aborting master");
1410     activeMaster.abort("test-kill");
1411     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1412     log("Master has aborted");
1413 
1414     rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest(
1415       rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1416 
1417     // Start up a new master
1418     log("Starting up a new master");
1419     activeMaster = cluster.startMaster().getMaster();
1420     log("Waiting for master to be ready");
1421     cluster.waitForActiveAndReadyMaster();
1422     log("Master is ready");
1423 
1424     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1425     log("Meta was assigned");
1426 
1427     rs.getRSRpcServices().closeRegion(
1428       null,
1429       RequestConverter.buildCloseRegionRequest(rs.getServerName(),
1430         HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1431 
1432     // Set a dummy server to check if master reassigns meta on restart
1433     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1434       ServerName.valueOf("dummyserver.example.org", 1234, -1L), State.OPEN);
1435 
1436     log("Aborting master");
1437     activeMaster.stop("test-kill");
1438 
1439     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1440     log("Master has aborted");
1441 
1442     // Start up a new master
1443     log("Starting up a new master");
1444     activeMaster = cluster.startMaster().getMaster();
1445     log("Waiting for master to be ready");
1446     cluster.waitForActiveAndReadyMaster();
1447     log("Master is ready");
1448 
1449     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1450     log("Meta was assigned");
1451 
1452     // Done, shutdown the cluster
1453     TEST_UTIL.shutdownMiniCluster();
1454   }
1455 }
1456 
1457