1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.hadoop.hbase.zookeeper; 19 20 import java.io.EOFException; 21 import java.io.IOException; 22 import java.net.ConnectException; 23 import java.net.NoRouteToHostException; 24 import java.net.SocketException; 25 import java.net.SocketTimeoutException; 26 import java.rmi.UnknownHostException; 27 import java.util.ArrayList; 28 import java.util.List; 29 30 import org.apache.commons.logging.Log; 31 import org.apache.commons.logging.LogFactory; 32 import org.apache.hadoop.conf.Configuration; 33 import org.apache.hadoop.hbase.HConstants; 34 import org.apache.hadoop.hbase.HRegionInfo; 35 import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; 36 import org.apache.hadoop.hbase.ServerName; 37 import org.apache.hadoop.hbase.classification.InterfaceAudience; 38 import org.apache.hadoop.hbase.client.ClusterConnection; 39 import org.apache.hadoop.hbase.client.Connection; 40 import org.apache.hadoop.hbase.client.HConnection; 41 import org.apache.hadoop.hbase.client.RegionReplicaUtil; 42 import org.apache.hadoop.hbase.client.RetriesExhaustedException; 43 import org.apache.hadoop.hbase.exceptions.DeserializationException; 44 import org.apache.hadoop.hbase.ipc.FailedServerException; 45 import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController; 46 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; 47 import org.apache.hadoop.hbase.master.RegionState; 48 import org.apache.hadoop.hbase.protobuf.ProtobufUtil; 49 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos; 50 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService; 51 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos; 52 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; 53 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.MetaRegionServer; 54 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException; 55 import org.apache.hadoop.hbase.util.Bytes; 56 import org.apache.hadoop.hbase.util.Pair; 57 import org.apache.hadoop.ipc.RemoteException; 58 import org.apache.zookeeper.KeeperException; 59 60 import com.google.common.base.Stopwatch; 61 import com.google.protobuf.InvalidProtocolBufferException; 62 63 /** 64 * Utility class to perform operation (get/wait for/verify/set/delete) on znode in ZooKeeper 65 * which keeps hbase:meta region server location. 66 * 67 * Stateless class with a bunch of static methods. Doesn't manage resources passed in 68 * (e.g. HConnection, ZooKeeperWatcher etc). 69 * 70 * Meta region location is set by <code>RegionServerServices</code>. 71 * This class doesn't use ZK watchers, rather accesses ZK directly. 72 * 73 * This class it stateless. The only reason it's not made a non-instantiable util class 74 * with a collection of static methods is that it'd be rather hard to mock properly in tests. 75 * 76 * TODO: rewrite using RPC calls to master to find out about hbase:meta. 77 */ 78 @InterfaceAudience.Private 79 public class MetaTableLocator { 80 private static final Log LOG = LogFactory.getLog(MetaTableLocator.class); 81 82 // only needed to allow non-timeout infinite waits to stop when cluster shuts down 83 private volatile boolean stopped = false; 84 85 /** 86 * Checks if the meta region location is available. 87 * @return true if meta region location is available, false if not 88 */ isLocationAvailable(ZooKeeperWatcher zkw)89 public boolean isLocationAvailable(ZooKeeperWatcher zkw) { 90 return getMetaRegionLocation(zkw) != null; 91 } 92 93 /** 94 * @param zkw ZooKeeper watcher to be used 95 * @return meta table regions and their locations. 96 */ getMetaRegionsAndLocations(ZooKeeperWatcher zkw)97 public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw) { 98 return getMetaRegionsAndLocations(zkw, HRegionInfo.DEFAULT_REPLICA_ID); 99 } 100 101 /** 102 * 103 * @param zkw 104 * @param replicaId 105 * @return meta table regions and their locations. 106 */ getMetaRegionsAndLocations(ZooKeeperWatcher zkw, int replicaId)107 public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw, 108 int replicaId) { 109 ServerName serverName = getMetaRegionLocation(zkw, replicaId); 110 List<Pair<HRegionInfo, ServerName>> list = new ArrayList<Pair<HRegionInfo, ServerName>>(); 111 list.add(new Pair<HRegionInfo, ServerName>(RegionReplicaUtil.getRegionInfoForReplica( 112 HRegionInfo.FIRST_META_REGIONINFO, replicaId), serverName)); 113 return list; 114 } 115 116 /** 117 * @param zkw ZooKeeper watcher to be used 118 * @return List of meta regions 119 */ getMetaRegions(ZooKeeperWatcher zkw)120 public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw) { 121 return getMetaRegions(zkw, HRegionInfo.DEFAULT_REPLICA_ID); 122 } 123 124 /** 125 * 126 * @param zkw 127 * @param replicaId 128 * @return List of meta regions 129 */ getMetaRegions(ZooKeeperWatcher zkw, int replicaId)130 public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw, int replicaId) { 131 List<Pair<HRegionInfo, ServerName>> result; 132 result = getMetaRegionsAndLocations(zkw, replicaId); 133 return getListOfHRegionInfos(result); 134 } 135 getListOfHRegionInfos( final List<Pair<HRegionInfo, ServerName>> pairs)136 private List<HRegionInfo> getListOfHRegionInfos( 137 final List<Pair<HRegionInfo, ServerName>> pairs) { 138 if (pairs == null || pairs.isEmpty()) return null; 139 List<HRegionInfo> result = new ArrayList<HRegionInfo>(pairs.size()); 140 for (Pair<HRegionInfo, ServerName> pair: pairs) { 141 result.add(pair.getFirst()); 142 } 143 return result; 144 } 145 146 /** 147 * Gets the meta region location, if available. Does not block. 148 * @param zkw zookeeper connection to use 149 * @return server name or null if we failed to get the data. 150 */ getMetaRegionLocation(final ZooKeeperWatcher zkw)151 public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw) { 152 try { 153 RegionState state = getMetaRegionState(zkw); 154 return state.isOpened() ? state.getServerName() : null; 155 } catch (KeeperException ke) { 156 return null; 157 } 158 } 159 160 /** 161 * Gets the meta region location, if available. Does not block. 162 * @param zkw 163 * @param replicaId 164 * @return server name 165 */ getMetaRegionLocation(final ZooKeeperWatcher zkw, int replicaId)166 public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw, int replicaId) { 167 try { 168 RegionState state = getMetaRegionState(zkw, replicaId); 169 return state.isOpened() ? state.getServerName() : null; 170 } catch (KeeperException ke) { 171 return null; 172 } 173 } 174 175 /** 176 * Gets the meta region location, if available, and waits for up to the 177 * specified timeout if not immediately available. 178 * Given the zookeeper notification could be delayed, we will try to 179 * get the latest data. 180 * @param zkw 181 * @param timeout maximum time to wait, in millis 182 * @return server name for server hosting meta region formatted as per 183 * {@link ServerName}, or null if none available 184 * @throws InterruptedException if interrupted while waiting 185 * @throws NotAllMetaRegionsOnlineException 186 */ waitMetaRegionLocation(ZooKeeperWatcher zkw, long timeout)187 public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, long timeout) 188 throws InterruptedException, NotAllMetaRegionsOnlineException { 189 return waitMetaRegionLocation(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout); 190 } 191 192 /** 193 * Gets the meta region location, if available, and waits for up to the 194 * specified timeout if not immediately available. 195 * Given the zookeeper notification could be delayed, we will try to 196 * get the latest data. 197 * @param zkw 198 * @param replicaId 199 * @param timeout maximum time to wait, in millis 200 * @return server name for server hosting meta region formatted as per 201 * {@link ServerName}, or null if none available 202 * @throws InterruptedException 203 * @throws NotAllMetaRegionsOnlineException 204 */ waitMetaRegionLocation(ZooKeeperWatcher zkw, int replicaId, long timeout)205 public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, int replicaId, long timeout) 206 throws InterruptedException, NotAllMetaRegionsOnlineException { 207 try { 208 if (ZKUtil.checkExists(zkw, zkw.baseZNode) == -1) { 209 String errorMsg = "Check the value configured in 'zookeeper.znode.parent'. " 210 + "There could be a mismatch with the one configured in the master."; 211 LOG.error(errorMsg); 212 throw new IllegalArgumentException(errorMsg); 213 } 214 } catch (KeeperException e) { 215 throw new IllegalStateException("KeeperException while trying to check baseZNode:", e); 216 } 217 ServerName sn = blockUntilAvailable(zkw, replicaId, timeout); 218 219 if (sn == null) { 220 throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms"); 221 } 222 223 return sn; 224 } 225 226 /** 227 * Waits indefinitely for availability of <code>hbase:meta</code>. Used during 228 * cluster startup. Does not verify meta, just that something has been 229 * set up in zk. 230 * @see #waitMetaRegionLocation(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher, long) 231 * @throws InterruptedException if interrupted while waiting 232 */ waitMetaRegionLocation(ZooKeeperWatcher zkw)233 public void waitMetaRegionLocation(ZooKeeperWatcher zkw) throws InterruptedException { 234 Stopwatch stopwatch = new Stopwatch().start(); 235 while (!stopped) { 236 try { 237 if (waitMetaRegionLocation(zkw, 100) != null) break; 238 long sleepTime = stopwatch.elapsedMillis(); 239 // +1 in case sleepTime=0 240 if ((sleepTime + 1) % 10000 == 0) { 241 LOG.warn("Have been waiting for meta to be assigned for " + sleepTime + "ms"); 242 } 243 } catch (NotAllMetaRegionsOnlineException e) { 244 if (LOG.isTraceEnabled()) { 245 LOG.trace("hbase:meta still not available, sleeping and retrying." + 246 " Reason: " + e.getMessage()); 247 } 248 } 249 } 250 } 251 252 /** 253 * Verify <code>hbase:meta</code> is deployed and accessible. 254 * @param hConnection 255 * @param zkw 256 * @param timeout How long to wait on zk for meta address (passed through to 257 * the internal call to {@link #getMetaServerConnection}. 258 * @return True if the <code>hbase:meta</code> location is healthy. 259 * @throws java.io.IOException 260 * @throws InterruptedException 261 */ verifyMetaRegionLocation(HConnection hConnection, ZooKeeperWatcher zkw, final long timeout)262 public boolean verifyMetaRegionLocation(HConnection hConnection, 263 ZooKeeperWatcher zkw, final long timeout) 264 throws InterruptedException, IOException { 265 return verifyMetaRegionLocation(hConnection, zkw, timeout, HRegionInfo.DEFAULT_REPLICA_ID); 266 } 267 268 /** 269 * Verify <code>hbase:meta</code> is deployed and accessible. 270 * @param hConnection 271 * @param zkw 272 * @param timeout How long to wait on zk for meta address (passed through to 273 * @param replicaId 274 * @return True if the <code>hbase:meta</code> location is healthy. 275 * @throws InterruptedException 276 * @throws IOException 277 */ verifyMetaRegionLocation(HConnection hConnection, ZooKeeperWatcher zkw, final long timeout, int replicaId)278 public boolean verifyMetaRegionLocation(HConnection hConnection, 279 ZooKeeperWatcher zkw, final long timeout, int replicaId) 280 throws InterruptedException, IOException { 281 AdminProtos.AdminService.BlockingInterface service = null; 282 try { 283 service = getMetaServerConnection(hConnection, zkw, timeout, replicaId); 284 } catch (NotAllMetaRegionsOnlineException e) { 285 // Pass 286 } catch (ServerNotRunningYetException e) { 287 // Pass -- remote server is not up so can't be carrying root 288 } catch (UnknownHostException e) { 289 // Pass -- server name doesn't resolve so it can't be assigned anything. 290 } catch (RegionServerStoppedException e) { 291 // Pass -- server name sends us to a server that is dying or already dead. 292 } 293 return (service != null) && verifyRegionLocation(hConnection, service, 294 getMetaRegionLocation(zkw, replicaId), RegionReplicaUtil.getRegionInfoForReplica( 295 HRegionInfo.FIRST_META_REGIONINFO, replicaId).getRegionName()); 296 } 297 298 /** 299 * Verify we can connect to <code>hostingServer</code> and that its carrying 300 * <code>regionName</code>. 301 * @param hostingServer Interface to the server hosting <code>regionName</code> 302 * @param address The servername that goes with the <code>metaServer</code> 303 * Interface. Used logging. 304 * @param regionName The regionname we are interested in. 305 * @return True if we were able to verify the region located at other side of 306 * the Interface. 307 * @throws IOException 308 */ 309 // TODO: We should be able to get the ServerName from the AdminProtocol 310 // rather than have to pass it in. Its made awkward by the fact that the 311 // HRI is likely a proxy against remote server so the getServerName needs 312 // to be fixed to go to a local method or to a cache before we can do this. verifyRegionLocation(final Connection connection, AdminService.BlockingInterface hostingServer, final ServerName address, final byte [] regionName)313 private boolean verifyRegionLocation(final Connection connection, 314 AdminService.BlockingInterface hostingServer, final ServerName address, 315 final byte [] regionName) 316 throws IOException { 317 if (hostingServer == null) { 318 LOG.info("Passed hostingServer is null"); 319 return false; 320 } 321 Throwable t; 322 PayloadCarryingRpcController controller = null; 323 if (connection instanceof ClusterConnection) { 324 controller = ((ClusterConnection) connection).getRpcControllerFactory().newController(); 325 } 326 try { 327 // Try and get regioninfo from the hosting server. 328 return ProtobufUtil.getRegionInfo(controller, hostingServer, regionName) != null; 329 } catch (ConnectException e) { 330 t = e; 331 } catch (RetriesExhaustedException e) { 332 t = e; 333 } catch (RemoteException e) { 334 IOException ioe = e.unwrapRemoteException(); 335 t = ioe; 336 } catch (IOException e) { 337 Throwable cause = e.getCause(); 338 if (cause != null && cause instanceof EOFException) { 339 t = cause; 340 } else if (cause != null && cause.getMessage() != null 341 && cause.getMessage().contains("Connection reset")) { 342 t = cause; 343 } else { 344 t = e; 345 } 346 } 347 LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) + 348 " at address=" + address + ", exception=" + t.getMessage()); 349 return false; 350 } 351 352 /** 353 * Gets a connection to the server hosting meta, as reported by ZooKeeper, 354 * waiting up to the specified timeout for availability. 355 * <p>WARNING: Does not retry. Use an {@link org.apache.hadoop.hbase.client.HTable} instead. 356 * @param hConnection 357 * @param zkw 358 * @param timeout How long to wait on meta location 359 * @param replicaId 360 * @return connection to server hosting meta 361 * @throws InterruptedException 362 * @throws NotAllMetaRegionsOnlineException if timed out waiting 363 * @throws IOException 364 */ getMetaServerConnection(HConnection hConnection, ZooKeeperWatcher zkw, long timeout, int replicaId)365 private AdminService.BlockingInterface getMetaServerConnection(HConnection hConnection, 366 ZooKeeperWatcher zkw, long timeout, int replicaId) 367 throws InterruptedException, NotAllMetaRegionsOnlineException, IOException { 368 return getCachedConnection(hConnection, waitMetaRegionLocation(zkw, replicaId, timeout)); 369 } 370 371 /** 372 * @param sn ServerName to get a connection against. 373 * @return The AdminProtocol we got when we connected to <code>sn</code> 374 * May have come from cache, may not be good, may have been setup by this 375 * invocation, or may be null. 376 * @throws IOException 377 */ 378 @SuppressWarnings("deprecation") getCachedConnection(HConnection hConnection, ServerName sn)379 private static AdminService.BlockingInterface getCachedConnection(HConnection hConnection, 380 ServerName sn) 381 throws IOException { 382 if (sn == null) { 383 return null; 384 } 385 AdminService.BlockingInterface service = null; 386 try { 387 service = hConnection.getAdmin(sn); 388 } catch (RetriesExhaustedException e) { 389 if (e.getCause() != null && e.getCause() instanceof ConnectException) { 390 // Catch this; presume it means the cached connection has gone bad. 391 } else { 392 throw e; 393 } 394 } catch (SocketTimeoutException e) { 395 LOG.debug("Timed out connecting to " + sn); 396 } catch (NoRouteToHostException e) { 397 LOG.debug("Connecting to " + sn, e); 398 } catch (SocketException e) { 399 LOG.debug("Exception connecting to " + sn); 400 } catch (UnknownHostException e) { 401 LOG.debug("Unknown host exception connecting to " + sn); 402 } catch (FailedServerException e) { 403 if (LOG.isDebugEnabled()) { 404 LOG.debug("Server " + sn + " is in failed server list."); 405 } 406 } catch (IOException ioe) { 407 Throwable cause = ioe.getCause(); 408 if (ioe instanceof ConnectException) { 409 // Catch. Connect refused. 410 } else if (cause != null && cause instanceof EOFException) { 411 // Catch. Other end disconnected us. 412 } else if (cause != null && cause.getMessage() != null && 413 cause.getMessage().toLowerCase().contains("connection reset")) { 414 // Catch. Connection reset. 415 } else { 416 throw ioe; 417 } 418 419 } 420 return service; 421 } 422 423 /** 424 * Sets the location of <code>hbase:meta</code> in ZooKeeper to the 425 * specified server address. 426 * @param zookeeper zookeeper reference 427 * @param serverName The server hosting <code>hbase:meta</code> 428 * @param state The region transition state 429 * @throws KeeperException unexpected zookeeper exception 430 */ setMetaLocation(ZooKeeperWatcher zookeeper, ServerName serverName, RegionState.State state)431 public static void setMetaLocation(ZooKeeperWatcher zookeeper, 432 ServerName serverName, RegionState.State state) throws KeeperException { 433 setMetaLocation(zookeeper, serverName, HRegionInfo.DEFAULT_REPLICA_ID, state); 434 } 435 436 /** 437 * Sets the location of <code>hbase:meta</code> in ZooKeeper to the 438 * specified server address. 439 * @param zookeeper 440 * @param serverName 441 * @param replicaId 442 * @param state 443 * @throws KeeperException 444 */ setMetaLocation(ZooKeeperWatcher zookeeper, ServerName serverName, int replicaId, RegionState.State state)445 public static void setMetaLocation(ZooKeeperWatcher zookeeper, 446 ServerName serverName, int replicaId, RegionState.State state) throws KeeperException { 447 LOG.info("Setting hbase:meta region location in ZooKeeper as " + serverName); 448 // Make the MetaRegionServer pb and then get its bytes and save this as 449 // the znode content. 450 MetaRegionServer pbrsr = MetaRegionServer.newBuilder() 451 .setServer(ProtobufUtil.toServerName(serverName)) 452 .setRpcVersion(HConstants.RPC_CURRENT_VERSION) 453 .setState(state.convert()).build(); 454 byte[] data = ProtobufUtil.prependPBMagic(pbrsr.toByteArray()); 455 try { 456 ZKUtil.setData(zookeeper, zookeeper.getZNodeForReplica(replicaId), data); 457 } catch(KeeperException.NoNodeException nne) { 458 if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) { 459 LOG.debug("META region location doesn't exist, create it"); 460 } else { 461 LOG.debug("META region location doesn't exist for replicaId " + replicaId + 462 ", create it"); 463 } 464 ZKUtil.createAndWatch(zookeeper, zookeeper.getZNodeForReplica(replicaId), data); 465 } 466 } 467 468 /** 469 * Load the meta region state from the meta server ZNode. 470 */ getMetaRegionState(ZooKeeperWatcher zkw)471 public static RegionState getMetaRegionState(ZooKeeperWatcher zkw) throws KeeperException { 472 return getMetaRegionState(zkw, HRegionInfo.DEFAULT_REPLICA_ID); 473 } 474 475 /** 476 * Load the meta region state from the meta server ZNode. 477 * @param zkw 478 * @param replicaId 479 * @return regionstate 480 * @throws KeeperException 481 */ getMetaRegionState(ZooKeeperWatcher zkw, int replicaId)482 public static RegionState getMetaRegionState(ZooKeeperWatcher zkw, int replicaId) 483 throws KeeperException { 484 RegionState.State state = RegionState.State.OPEN; 485 ServerName serverName = null; 486 try { 487 byte[] data = ZKUtil.getData(zkw, zkw.getZNodeForReplica(replicaId)); 488 if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) { 489 try { 490 int prefixLen = ProtobufUtil.lengthOfPBMagic(); 491 ZooKeeperProtos.MetaRegionServer rl = 492 ZooKeeperProtos.MetaRegionServer.PARSER.parseFrom 493 (data, prefixLen, data.length - prefixLen); 494 if (rl.hasState()) { 495 state = RegionState.State.convert(rl.getState()); 496 } 497 HBaseProtos.ServerName sn = rl.getServer(); 498 serverName = ServerName.valueOf( 499 sn.getHostName(), sn.getPort(), sn.getStartCode()); 500 } catch (InvalidProtocolBufferException e) { 501 throw new DeserializationException("Unable to parse meta region location"); 502 } 503 } else { 504 // old style of meta region location? 505 serverName = ServerName.parseFrom(data); 506 } 507 } catch (DeserializationException e) { 508 throw ZKUtil.convert(e); 509 } catch (InterruptedException e) { 510 Thread.currentThread().interrupt(); 511 } 512 if (serverName == null) { 513 state = RegionState.State.OFFLINE; 514 } 515 return new RegionState( 516 RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId), 517 state, serverName); 518 } 519 520 /** 521 * Deletes the location of <code>hbase:meta</code> in ZooKeeper. 522 * @param zookeeper zookeeper reference 523 * @throws KeeperException unexpected zookeeper exception 524 */ deleteMetaLocation(ZooKeeperWatcher zookeeper)525 public void deleteMetaLocation(ZooKeeperWatcher zookeeper) 526 throws KeeperException { 527 deleteMetaLocation(zookeeper, HRegionInfo.DEFAULT_REPLICA_ID); 528 } 529 deleteMetaLocation(ZooKeeperWatcher zookeeper, int replicaId)530 public void deleteMetaLocation(ZooKeeperWatcher zookeeper, int replicaId) 531 throws KeeperException { 532 if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) { 533 LOG.info("Deleting hbase:meta region location in ZooKeeper"); 534 } else { 535 LOG.info("Deleting hbase:meta for " + replicaId + " region location in ZooKeeper"); 536 } 537 try { 538 // Just delete the node. Don't need any watches. 539 ZKUtil.deleteNode(zookeeper, zookeeper.getZNodeForReplica(replicaId)); 540 } catch(KeeperException.NoNodeException nne) { 541 // Has already been deleted 542 } 543 } 544 /** 545 * Wait until the primary meta region is available. Get the secondary 546 * locations as well but don't block for those. 547 * @param zkw 548 * @param timeout 549 * @param conf 550 * @return ServerName or null if we timed out. 551 * @throws InterruptedException 552 */ blockUntilAvailable(final ZooKeeperWatcher zkw, final long timeout, Configuration conf)553 public List<ServerName> blockUntilAvailable(final ZooKeeperWatcher zkw, 554 final long timeout, Configuration conf) 555 throws InterruptedException { 556 int numReplicasConfigured = 1; 557 try { 558 List<String> metaReplicaNodes = zkw.getMetaReplicaNodes(); 559 numReplicasConfigured = metaReplicaNodes.size(); 560 } catch (KeeperException e) { 561 LOG.warn("Got ZK exception " + e); 562 } 563 List<ServerName> servers = new ArrayList<ServerName>(numReplicasConfigured); 564 ServerName server = blockUntilAvailable(zkw, timeout); 565 if (server == null) return null; 566 servers.add(server); 567 568 for (int replicaId = 1; replicaId < numReplicasConfigured; replicaId++) { 569 // return all replica locations for the meta 570 servers.add(getMetaRegionLocation(zkw, replicaId)); 571 } 572 return servers; 573 } 574 575 /** 576 * Wait until the meta region is available and is not in transition. 577 * @param zkw zookeeper connection to use 578 * @param timeout maximum time to wait, in millis 579 * @return ServerName or null if we timed out. 580 * @throws InterruptedException 581 */ blockUntilAvailable(final ZooKeeperWatcher zkw, final long timeout)582 public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw, 583 final long timeout) 584 throws InterruptedException { 585 return blockUntilAvailable(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout); 586 } 587 588 /** 589 * Wait until the meta region is available and is not in transition. 590 * @param zkw 591 * @param replicaId 592 * @param timeout 593 * @return ServerName or null if we timed out. 594 * @throws InterruptedException 595 */ blockUntilAvailable(final ZooKeeperWatcher zkw, int replicaId, final long timeout)596 public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw, int replicaId, 597 final long timeout) 598 throws InterruptedException { 599 if (timeout < 0) throw new IllegalArgumentException(); 600 if (zkw == null) throw new IllegalArgumentException(); 601 Stopwatch sw = new Stopwatch().start(); 602 ServerName sn = null; 603 try { 604 while (true) { 605 sn = getMetaRegionLocation(zkw, replicaId); 606 if (sn != null || sw.elapsedMillis() 607 > timeout - HConstants.SOCKET_RETRY_WAIT_MS) { 608 break; 609 } 610 Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS); 611 } 612 } finally { 613 sw.stop(); 614 } 615 return sn; 616 } 617 618 /** 619 * Stop working. 620 * Interrupts any ongoing waits. 621 */ stop()622 public void stop() { 623 if (!stopped) { 624 LOG.debug("Stopping MetaTableLocator"); 625 stopped = true; 626 } 627 } 628 } 629