1 /**
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 package org.apache.hadoop.hbase.zookeeper;
19 
20 import java.io.EOFException;
21 import java.io.IOException;
22 import java.net.ConnectException;
23 import java.net.NoRouteToHostException;
24 import java.net.SocketException;
25 import java.net.SocketTimeoutException;
26 import java.rmi.UnknownHostException;
27 import java.util.ArrayList;
28 import java.util.List;
29 
30 import org.apache.commons.logging.Log;
31 import org.apache.commons.logging.LogFactory;
32 import org.apache.hadoop.conf.Configuration;
33 import org.apache.hadoop.hbase.HConstants;
34 import org.apache.hadoop.hbase.HRegionInfo;
35 import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
36 import org.apache.hadoop.hbase.ServerName;
37 import org.apache.hadoop.hbase.classification.InterfaceAudience;
38 import org.apache.hadoop.hbase.client.ClusterConnection;
39 import org.apache.hadoop.hbase.client.Connection;
40 import org.apache.hadoop.hbase.client.HConnection;
41 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
42 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
43 import org.apache.hadoop.hbase.exceptions.DeserializationException;
44 import org.apache.hadoop.hbase.ipc.FailedServerException;
45 import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
46 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
47 import org.apache.hadoop.hbase.master.RegionState;
48 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
49 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
50 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
51 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
52 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
53 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.MetaRegionServer;
54 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
55 import org.apache.hadoop.hbase.util.Bytes;
56 import org.apache.hadoop.hbase.util.Pair;
57 import org.apache.hadoop.ipc.RemoteException;
58 import org.apache.zookeeper.KeeperException;
59 
60 import com.google.common.base.Stopwatch;
61 import com.google.protobuf.InvalidProtocolBufferException;
62 
63 /**
64  * Utility class to perform operation (get/wait for/verify/set/delete) on znode in ZooKeeper
65  * which keeps hbase:meta region server location.
66  *
67  * Stateless class with a bunch of static methods. Doesn't manage resources passed in
68  * (e.g. HConnection, ZooKeeperWatcher etc).
69  *
70  * Meta region location is set by <code>RegionServerServices</code>.
71  * This class doesn't use ZK watchers, rather accesses ZK directly.
72  *
73  * This class it stateless. The only reason it's not made a non-instantiable util class
74  * with a collection of static methods is that it'd be rather hard to mock properly in tests.
75  *
76  * TODO: rewrite using RPC calls to master to find out about hbase:meta.
77  */
78 @InterfaceAudience.Private
79 public class MetaTableLocator {
80   private static final Log LOG = LogFactory.getLog(MetaTableLocator.class);
81 
82   // only needed to allow non-timeout infinite waits to stop when cluster shuts down
83   private volatile boolean stopped = false;
84 
85   /**
86    * Checks if the meta region location is available.
87    * @return true if meta region location is available, false if not
88    */
isLocationAvailable(ZooKeeperWatcher zkw)89   public boolean isLocationAvailable(ZooKeeperWatcher zkw) {
90     return getMetaRegionLocation(zkw) != null;
91   }
92 
93   /**
94    * @param zkw ZooKeeper watcher to be used
95    * @return meta table regions and their locations.
96    */
getMetaRegionsAndLocations(ZooKeeperWatcher zkw)97   public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw) {
98     return getMetaRegionsAndLocations(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
99   }
100 
101   /**
102    *
103    * @param zkw
104    * @param replicaId
105    * @return meta table regions and their locations.
106    */
getMetaRegionsAndLocations(ZooKeeperWatcher zkw, int replicaId)107   public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw,
108       int replicaId) {
109     ServerName serverName = getMetaRegionLocation(zkw, replicaId);
110     List<Pair<HRegionInfo, ServerName>> list = new ArrayList<Pair<HRegionInfo, ServerName>>();
111     list.add(new Pair<HRegionInfo, ServerName>(RegionReplicaUtil.getRegionInfoForReplica(
112         HRegionInfo.FIRST_META_REGIONINFO, replicaId), serverName));
113     return list;
114   }
115 
116   /**
117    * @param zkw ZooKeeper watcher to be used
118    * @return List of meta regions
119    */
getMetaRegions(ZooKeeperWatcher zkw)120   public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw) {
121     return getMetaRegions(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
122   }
123 
124   /**
125    *
126    * @param zkw
127    * @param replicaId
128    * @return List of meta regions
129    */
getMetaRegions(ZooKeeperWatcher zkw, int replicaId)130   public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw, int replicaId) {
131     List<Pair<HRegionInfo, ServerName>> result;
132     result = getMetaRegionsAndLocations(zkw, replicaId);
133     return getListOfHRegionInfos(result);
134   }
135 
getListOfHRegionInfos( final List<Pair<HRegionInfo, ServerName>> pairs)136   private List<HRegionInfo> getListOfHRegionInfos(
137       final List<Pair<HRegionInfo, ServerName>> pairs) {
138     if (pairs == null || pairs.isEmpty()) return null;
139     List<HRegionInfo> result = new ArrayList<HRegionInfo>(pairs.size());
140     for (Pair<HRegionInfo, ServerName> pair: pairs) {
141       result.add(pair.getFirst());
142     }
143     return result;
144   }
145 
146   /**
147    * Gets the meta region location, if available.  Does not block.
148    * @param zkw zookeeper connection to use
149    * @return server name or null if we failed to get the data.
150    */
getMetaRegionLocation(final ZooKeeperWatcher zkw)151   public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw) {
152     try {
153       RegionState state = getMetaRegionState(zkw);
154       return state.isOpened() ? state.getServerName() : null;
155     } catch (KeeperException ke) {
156       return null;
157     }
158   }
159 
160   /**
161    * Gets the meta region location, if available.  Does not block.
162    * @param zkw
163    * @param replicaId
164    * @return server name
165    */
getMetaRegionLocation(final ZooKeeperWatcher zkw, int replicaId)166   public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw, int replicaId) {
167     try {
168       RegionState state = getMetaRegionState(zkw, replicaId);
169       return state.isOpened() ? state.getServerName() : null;
170     } catch (KeeperException ke) {
171       return null;
172     }
173   }
174 
175   /**
176    * Gets the meta region location, if available, and waits for up to the
177    * specified timeout if not immediately available.
178    * Given the zookeeper notification could be delayed, we will try to
179    * get the latest data.
180    * @param zkw
181    * @param timeout maximum time to wait, in millis
182    * @return server name for server hosting meta region formatted as per
183    * {@link ServerName}, or null if none available
184    * @throws InterruptedException if interrupted while waiting
185    * @throws NotAllMetaRegionsOnlineException
186    */
waitMetaRegionLocation(ZooKeeperWatcher zkw, long timeout)187   public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, long timeout)
188   throws InterruptedException, NotAllMetaRegionsOnlineException {
189     return waitMetaRegionLocation(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout);
190   }
191 
192   /**
193    * Gets the meta region location, if available, and waits for up to the
194    * specified timeout if not immediately available.
195    * Given the zookeeper notification could be delayed, we will try to
196    * get the latest data.
197    * @param zkw
198    * @param replicaId
199    * @param timeout maximum time to wait, in millis
200    * @return server name for server hosting meta region formatted as per
201    * {@link ServerName}, or null if none available
202    * @throws InterruptedException
203    * @throws NotAllMetaRegionsOnlineException
204    */
waitMetaRegionLocation(ZooKeeperWatcher zkw, int replicaId, long timeout)205   public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, int replicaId, long timeout)
206   throws InterruptedException, NotAllMetaRegionsOnlineException {
207     try {
208       if (ZKUtil.checkExists(zkw, zkw.baseZNode) == -1) {
209         String errorMsg = "Check the value configured in 'zookeeper.znode.parent'. "
210             + "There could be a mismatch with the one configured in the master.";
211         LOG.error(errorMsg);
212         throw new IllegalArgumentException(errorMsg);
213       }
214     } catch (KeeperException e) {
215       throw new IllegalStateException("KeeperException while trying to check baseZNode:", e);
216     }
217     ServerName sn = blockUntilAvailable(zkw, replicaId, timeout);
218 
219     if (sn == null) {
220       throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms");
221     }
222 
223     return sn;
224   }
225 
226   /**
227    * Waits indefinitely for availability of <code>hbase:meta</code>.  Used during
228    * cluster startup.  Does not verify meta, just that something has been
229    * set up in zk.
230    * @see #waitMetaRegionLocation(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher, long)
231    * @throws InterruptedException if interrupted while waiting
232    */
waitMetaRegionLocation(ZooKeeperWatcher zkw)233   public void waitMetaRegionLocation(ZooKeeperWatcher zkw) throws InterruptedException {
234     Stopwatch stopwatch = new Stopwatch().start();
235     while (!stopped) {
236       try {
237         if (waitMetaRegionLocation(zkw, 100) != null) break;
238         long sleepTime = stopwatch.elapsedMillis();
239         // +1 in case sleepTime=0
240         if ((sleepTime + 1) % 10000 == 0) {
241           LOG.warn("Have been waiting for meta to be assigned for " + sleepTime + "ms");
242         }
243       } catch (NotAllMetaRegionsOnlineException e) {
244         if (LOG.isTraceEnabled()) {
245           LOG.trace("hbase:meta still not available, sleeping and retrying." +
246             " Reason: " + e.getMessage());
247         }
248       }
249     }
250   }
251 
252   /**
253    * Verify <code>hbase:meta</code> is deployed and accessible.
254    * @param hConnection
255    * @param zkw
256    * @param timeout How long to wait on zk for meta address (passed through to
257    * the internal call to {@link #getMetaServerConnection}.
258    * @return True if the <code>hbase:meta</code> location is healthy.
259    * @throws java.io.IOException
260    * @throws InterruptedException
261    */
verifyMetaRegionLocation(HConnection hConnection, ZooKeeperWatcher zkw, final long timeout)262   public boolean verifyMetaRegionLocation(HConnection hConnection,
263       ZooKeeperWatcher zkw, final long timeout)
264   throws InterruptedException, IOException {
265     return verifyMetaRegionLocation(hConnection, zkw, timeout, HRegionInfo.DEFAULT_REPLICA_ID);
266   }
267 
268   /**
269    * Verify <code>hbase:meta</code> is deployed and accessible.
270    * @param hConnection
271    * @param zkw
272    * @param timeout How long to wait on zk for meta address (passed through to
273    * @param replicaId
274    * @return True if the <code>hbase:meta</code> location is healthy.
275    * @throws InterruptedException
276    * @throws IOException
277    */
verifyMetaRegionLocation(HConnection hConnection, ZooKeeperWatcher zkw, final long timeout, int replicaId)278   public boolean verifyMetaRegionLocation(HConnection hConnection,
279       ZooKeeperWatcher zkw, final long timeout, int replicaId)
280   throws InterruptedException, IOException {
281     AdminProtos.AdminService.BlockingInterface service = null;
282     try {
283       service = getMetaServerConnection(hConnection, zkw, timeout, replicaId);
284     } catch (NotAllMetaRegionsOnlineException e) {
285       // Pass
286     } catch (ServerNotRunningYetException e) {
287       // Pass -- remote server is not up so can't be carrying root
288     } catch (UnknownHostException e) {
289       // Pass -- server name doesn't resolve so it can't be assigned anything.
290     } catch (RegionServerStoppedException e) {
291       // Pass -- server name sends us to a server that is dying or already dead.
292     }
293     return (service != null) && verifyRegionLocation(hConnection, service,
294             getMetaRegionLocation(zkw, replicaId), RegionReplicaUtil.getRegionInfoForReplica(
295                 HRegionInfo.FIRST_META_REGIONINFO, replicaId).getRegionName());
296   }
297 
298   /**
299    * Verify we can connect to <code>hostingServer</code> and that its carrying
300    * <code>regionName</code>.
301    * @param hostingServer Interface to the server hosting <code>regionName</code>
302    * @param address The servername that goes with the <code>metaServer</code>
303    * Interface.  Used logging.
304    * @param regionName The regionname we are interested in.
305    * @return True if we were able to verify the region located at other side of
306    * the Interface.
307    * @throws IOException
308    */
309   // TODO: We should be able to get the ServerName from the AdminProtocol
310   // rather than have to pass it in.  Its made awkward by the fact that the
311   // HRI is likely a proxy against remote server so the getServerName needs
312   // to be fixed to go to a local method or to a cache before we can do this.
verifyRegionLocation(final Connection connection, AdminService.BlockingInterface hostingServer, final ServerName address, final byte [] regionName)313   private boolean verifyRegionLocation(final Connection connection,
314       AdminService.BlockingInterface hostingServer, final ServerName address,
315       final byte [] regionName)
316   throws IOException {
317     if (hostingServer == null) {
318       LOG.info("Passed hostingServer is null");
319       return false;
320     }
321     Throwable t;
322     PayloadCarryingRpcController controller = null;
323     if (connection instanceof ClusterConnection) {
324       controller = ((ClusterConnection) connection).getRpcControllerFactory().newController();
325     }
326     try {
327       // Try and get regioninfo from the hosting server.
328       return ProtobufUtil.getRegionInfo(controller, hostingServer, regionName) != null;
329     } catch (ConnectException e) {
330       t = e;
331     } catch (RetriesExhaustedException e) {
332       t = e;
333     } catch (RemoteException e) {
334       IOException ioe = e.unwrapRemoteException();
335       t = ioe;
336     } catch (IOException e) {
337       Throwable cause = e.getCause();
338       if (cause != null && cause instanceof EOFException) {
339         t = cause;
340       } else if (cause != null && cause.getMessage() != null
341           && cause.getMessage().contains("Connection reset")) {
342         t = cause;
343       } else {
344         t = e;
345       }
346     }
347     LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) +
348       " at address=" + address + ", exception=" + t.getMessage());
349     return false;
350   }
351 
352   /**
353    * Gets a connection to the server hosting meta, as reported by ZooKeeper,
354    * waiting up to the specified timeout for availability.
355    * <p>WARNING: Does not retry.  Use an {@link org.apache.hadoop.hbase.client.HTable} instead.
356    * @param hConnection
357    * @param zkw
358    * @param timeout How long to wait on meta location
359    * @param replicaId
360    * @return connection to server hosting meta
361    * @throws InterruptedException
362    * @throws NotAllMetaRegionsOnlineException if timed out waiting
363    * @throws IOException
364    */
getMetaServerConnection(HConnection hConnection, ZooKeeperWatcher zkw, long timeout, int replicaId)365   private AdminService.BlockingInterface getMetaServerConnection(HConnection hConnection,
366       ZooKeeperWatcher zkw, long timeout, int replicaId)
367   throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
368     return getCachedConnection(hConnection, waitMetaRegionLocation(zkw, replicaId, timeout));
369   }
370 
371   /**
372    * @param sn ServerName to get a connection against.
373    * @return The AdminProtocol we got when we connected to <code>sn</code>
374    * May have come from cache, may not be good, may have been setup by this
375    * invocation, or may be null.
376    * @throws IOException
377    */
378   @SuppressWarnings("deprecation")
getCachedConnection(HConnection hConnection, ServerName sn)379   private static AdminService.BlockingInterface getCachedConnection(HConnection hConnection,
380     ServerName sn)
381   throws IOException {
382     if (sn == null) {
383       return null;
384     }
385     AdminService.BlockingInterface service = null;
386     try {
387       service = hConnection.getAdmin(sn);
388     } catch (RetriesExhaustedException e) {
389       if (e.getCause() != null && e.getCause() instanceof ConnectException) {
390         // Catch this; presume it means the cached connection has gone bad.
391       } else {
392         throw e;
393       }
394     } catch (SocketTimeoutException e) {
395       LOG.debug("Timed out connecting to " + sn);
396     } catch (NoRouteToHostException e) {
397       LOG.debug("Connecting to " + sn, e);
398     } catch (SocketException e) {
399       LOG.debug("Exception connecting to " + sn);
400     } catch (UnknownHostException e) {
401       LOG.debug("Unknown host exception connecting to  " + sn);
402     } catch (FailedServerException e) {
403       if (LOG.isDebugEnabled()) {
404         LOG.debug("Server " + sn + " is in failed server list.");
405       }
406     } catch (IOException ioe) {
407       Throwable cause = ioe.getCause();
408       if (ioe instanceof ConnectException) {
409         // Catch. Connect refused.
410       } else if (cause != null && cause instanceof EOFException) {
411         // Catch. Other end disconnected us.
412       } else if (cause != null && cause.getMessage() != null &&
413         cause.getMessage().toLowerCase().contains("connection reset")) {
414         // Catch. Connection reset.
415       } else {
416         throw ioe;
417       }
418 
419     }
420     return service;
421   }
422 
423   /**
424    * Sets the location of <code>hbase:meta</code> in ZooKeeper to the
425    * specified server address.
426    * @param zookeeper zookeeper reference
427    * @param serverName The server hosting <code>hbase:meta</code>
428    * @param state The region transition state
429    * @throws KeeperException unexpected zookeeper exception
430    */
setMetaLocation(ZooKeeperWatcher zookeeper, ServerName serverName, RegionState.State state)431   public static void setMetaLocation(ZooKeeperWatcher zookeeper,
432       ServerName serverName, RegionState.State state) throws KeeperException {
433     setMetaLocation(zookeeper, serverName, HRegionInfo.DEFAULT_REPLICA_ID, state);
434   }
435 
436   /**
437    * Sets the location of <code>hbase:meta</code> in ZooKeeper to the
438    * specified server address.
439    * @param zookeeper
440    * @param serverName
441    * @param replicaId
442    * @param state
443    * @throws KeeperException
444    */
setMetaLocation(ZooKeeperWatcher zookeeper, ServerName serverName, int replicaId, RegionState.State state)445   public static void setMetaLocation(ZooKeeperWatcher zookeeper,
446       ServerName serverName, int replicaId, RegionState.State state) throws KeeperException {
447     LOG.info("Setting hbase:meta region location in ZooKeeper as " + serverName);
448     // Make the MetaRegionServer pb and then get its bytes and save this as
449     // the znode content.
450     MetaRegionServer pbrsr = MetaRegionServer.newBuilder()
451       .setServer(ProtobufUtil.toServerName(serverName))
452       .setRpcVersion(HConstants.RPC_CURRENT_VERSION)
453       .setState(state.convert()).build();
454     byte[] data = ProtobufUtil.prependPBMagic(pbrsr.toByteArray());
455     try {
456       ZKUtil.setData(zookeeper, zookeeper.getZNodeForReplica(replicaId), data);
457     } catch(KeeperException.NoNodeException nne) {
458       if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
459         LOG.debug("META region location doesn't exist, create it");
460       } else {
461         LOG.debug("META region location doesn't exist for replicaId " + replicaId +
462             ", create it");
463       }
464       ZKUtil.createAndWatch(zookeeper, zookeeper.getZNodeForReplica(replicaId), data);
465     }
466   }
467 
468   /**
469    * Load the meta region state from the meta server ZNode.
470    */
getMetaRegionState(ZooKeeperWatcher zkw)471   public static RegionState getMetaRegionState(ZooKeeperWatcher zkw) throws KeeperException {
472     return getMetaRegionState(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
473   }
474 
475   /**
476    * Load the meta region state from the meta server ZNode.
477    * @param zkw
478    * @param replicaId
479    * @return regionstate
480    * @throws KeeperException
481    */
getMetaRegionState(ZooKeeperWatcher zkw, int replicaId)482   public static RegionState getMetaRegionState(ZooKeeperWatcher zkw, int replicaId)
483       throws KeeperException {
484     RegionState.State state = RegionState.State.OPEN;
485     ServerName serverName = null;
486     try {
487       byte[] data = ZKUtil.getData(zkw, zkw.getZNodeForReplica(replicaId));
488       if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) {
489         try {
490           int prefixLen = ProtobufUtil.lengthOfPBMagic();
491           ZooKeeperProtos.MetaRegionServer rl =
492             ZooKeeperProtos.MetaRegionServer.PARSER.parseFrom
493               (data, prefixLen, data.length - prefixLen);
494           if (rl.hasState()) {
495             state = RegionState.State.convert(rl.getState());
496           }
497           HBaseProtos.ServerName sn = rl.getServer();
498           serverName = ServerName.valueOf(
499             sn.getHostName(), sn.getPort(), sn.getStartCode());
500         } catch (InvalidProtocolBufferException e) {
501           throw new DeserializationException("Unable to parse meta region location");
502         }
503       } else {
504         // old style of meta region location?
505         serverName = ServerName.parseFrom(data);
506       }
507     } catch (DeserializationException e) {
508       throw ZKUtil.convert(e);
509     } catch (InterruptedException e) {
510       Thread.currentThread().interrupt();
511     }
512     if (serverName == null) {
513       state = RegionState.State.OFFLINE;
514     }
515     return new RegionState(
516         RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId),
517       state, serverName);
518   }
519 
520   /**
521    * Deletes the location of <code>hbase:meta</code> in ZooKeeper.
522    * @param zookeeper zookeeper reference
523    * @throws KeeperException unexpected zookeeper exception
524    */
deleteMetaLocation(ZooKeeperWatcher zookeeper)525   public void deleteMetaLocation(ZooKeeperWatcher zookeeper)
526   throws KeeperException {
527     deleteMetaLocation(zookeeper, HRegionInfo.DEFAULT_REPLICA_ID);
528   }
529 
deleteMetaLocation(ZooKeeperWatcher zookeeper, int replicaId)530   public void deleteMetaLocation(ZooKeeperWatcher zookeeper, int replicaId)
531   throws KeeperException {
532     if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
533       LOG.info("Deleting hbase:meta region location in ZooKeeper");
534     } else {
535       LOG.info("Deleting hbase:meta for " + replicaId + " region location in ZooKeeper");
536     }
537     try {
538       // Just delete the node.  Don't need any watches.
539       ZKUtil.deleteNode(zookeeper, zookeeper.getZNodeForReplica(replicaId));
540     } catch(KeeperException.NoNodeException nne) {
541       // Has already been deleted
542     }
543   }
544   /**
545    * Wait until the primary meta region is available. Get the secondary
546    * locations as well but don't block for those.
547    * @param zkw
548    * @param timeout
549    * @param conf
550    * @return ServerName or null if we timed out.
551    * @throws InterruptedException
552    */
blockUntilAvailable(final ZooKeeperWatcher zkw, final long timeout, Configuration conf)553   public List<ServerName> blockUntilAvailable(final ZooKeeperWatcher zkw,
554       final long timeout, Configuration conf)
555           throws InterruptedException {
556     int numReplicasConfigured = 1;
557     try {
558       List<String> metaReplicaNodes = zkw.getMetaReplicaNodes();
559       numReplicasConfigured = metaReplicaNodes.size();
560     } catch (KeeperException e) {
561       LOG.warn("Got ZK exception " + e);
562     }
563     List<ServerName> servers = new ArrayList<ServerName>(numReplicasConfigured);
564     ServerName server = blockUntilAvailable(zkw, timeout);
565     if (server == null) return null;
566     servers.add(server);
567 
568     for (int replicaId = 1; replicaId < numReplicasConfigured; replicaId++) {
569       // return all replica locations for the meta
570       servers.add(getMetaRegionLocation(zkw, replicaId));
571     }
572     return servers;
573   }
574 
575   /**
576    * Wait until the meta region is available and is not in transition.
577    * @param zkw zookeeper connection to use
578    * @param timeout maximum time to wait, in millis
579    * @return ServerName or null if we timed out.
580    * @throws InterruptedException
581    */
blockUntilAvailable(final ZooKeeperWatcher zkw, final long timeout)582   public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw,
583       final long timeout)
584   throws InterruptedException {
585     return blockUntilAvailable(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout);
586   }
587 
588   /**
589    * Wait until the meta region is available and is not in transition.
590    * @param zkw
591    * @param replicaId
592    * @param timeout
593    * @return ServerName or null if we timed out.
594    * @throws InterruptedException
595    */
blockUntilAvailable(final ZooKeeperWatcher zkw, int replicaId, final long timeout)596   public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw, int replicaId,
597       final long timeout)
598   throws InterruptedException {
599     if (timeout < 0) throw new IllegalArgumentException();
600     if (zkw == null) throw new IllegalArgumentException();
601     Stopwatch sw = new Stopwatch().start();
602     ServerName sn = null;
603     try {
604       while (true) {
605         sn = getMetaRegionLocation(zkw, replicaId);
606         if (sn != null || sw.elapsedMillis()
607             > timeout - HConstants.SOCKET_RETRY_WAIT_MS) {
608           break;
609         }
610         Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS);
611       }
612     } finally {
613       sw.stop();
614     }
615     return sn;
616   }
617 
618   /**
619    * Stop working.
620    * Interrupts any ongoing waits.
621    */
stop()622   public void stop() {
623     if (!stopped) {
624       LOG.debug("Stopping MetaTableLocator");
625       stopped = true;
626     }
627   }
628 }
629