1 /** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 package org.apache.hadoop.hbase.zookeeper; 20 21 import java.util.List; 22 23 import org.apache.commons.logging.Log; 24 import org.apache.commons.logging.LogFactory; 25 import org.apache.hadoop.hbase.classification.InterfaceAudience; 26 import org.apache.hadoop.hbase.HConstants; 27 import org.apache.hadoop.hbase.HRegionInfo; 28 import org.apache.hadoop.hbase.RegionTransition; 29 import org.apache.hadoop.hbase.ServerName; 30 import org.apache.hadoop.hbase.exceptions.DeserializationException; 31 import org.apache.hadoop.hbase.executor.EventType; 32 import org.apache.zookeeper.AsyncCallback; 33 import org.apache.zookeeper.KeeperException; 34 import org.apache.zookeeper.KeeperException.Code; 35 import org.apache.zookeeper.data.Stat; 36 37 // We should not be importing this Type here, nor a RegionTransition, etc. This class should be 38 // about zk and bytes only. 39 40 /** 41 * Utility class for doing region assignment in ZooKeeper. This class extends 42 * stuff done in {@link ZKUtil} to cover specific assignment operations. 43 * <p> 44 * Contains only static methods and constants. 45 * <p> 46 * Used by both the Master and RegionServer. 47 * <p> 48 * All valid transitions outlined below: 49 * <p> 50 * <b>MASTER</b> 51 * <ol> 52 * <li> 53 * Master creates an unassigned node as OFFLINE. 54 * - Cluster startup and table enabling. 55 * </li> 56 * <li> 57 * Master forces an existing unassigned node to OFFLINE. 58 * - RegionServer failure. 59 * - Allows transitions from all states to OFFLINE. 60 * </li> 61 * <li> 62 * Master deletes an unassigned node that was in a OPENED state. 63 * - Normal region transitions. Besides cluster startup, no other deletions 64 * of unassigned nodes is allowed. 65 * </li> 66 * <li> 67 * Master deletes all unassigned nodes regardless of state. 68 * - Cluster startup before any assignment happens. 69 * </li> 70 * </ol> 71 * <p> 72 * <b>REGIONSERVER</b> 73 * <ol> 74 * <li> 75 * RegionServer creates an unassigned node as CLOSING. 76 * - All region closes will do this in response to a CLOSE RPC from Master. 77 * - A node can never be transitioned to CLOSING, only created. 78 * </li> 79 * <li> 80 * RegionServer transitions an unassigned node from CLOSING to CLOSED. 81 * - Normal region closes. CAS operation. 82 * </li> 83 * <li> 84 * RegionServer transitions an unassigned node from OFFLINE to OPENING. 85 * - All region opens will do this in response to an OPEN RPC from the Master. 86 * - Normal region opens. CAS operation. 87 * </li> 88 * <li> 89 * RegionServer transitions an unassigned node from OPENING to OPENED. 90 * - Normal region opens. CAS operation. 91 * </li> 92 * </ol> 93 */ 94 @InterfaceAudience.Private 95 public class ZKAssign { 96 private static final Log LOG = LogFactory.getLog(ZKAssign.class); 97 98 /** 99 * Gets the full path node name for the unassigned node for the specified 100 * region. 101 * @param zkw zk reference 102 * @param regionName region name 103 * @return full path node name 104 */ getNodeName(ZooKeeperWatcher zkw, String regionName)105 public static String getNodeName(ZooKeeperWatcher zkw, String regionName) { 106 return ZKUtil.joinZNode(zkw.assignmentZNode, regionName); 107 } 108 109 /** 110 * Gets the region name from the full path node name of an unassigned node. 111 * @param path full zk path 112 * @return region name 113 */ getRegionName(ZooKeeperWatcher zkw, String path)114 public static String getRegionName(ZooKeeperWatcher zkw, String path) { 115 return path.substring(zkw.assignmentZNode.length()+1); 116 } 117 118 // Master methods 119 120 /** 121 * Creates a new unassigned node in the OFFLINE state for the specified region. 122 * 123 * <p>Does not transition nodes from other states. If a node already exists 124 * for this region, a {@link org.apache.zookeeper.KeeperException.NodeExistsException} 125 * will be thrown. 126 * 127 * <p>Sets a watcher on the unassigned region node if the method is successful. 128 * 129 * <p>This method should only be used during cluster startup and the enabling 130 * of a table. 131 * 132 * @param zkw zk reference 133 * @param region region to be created as offline 134 * @param serverName server transition will happen on 135 * @throws KeeperException if unexpected zookeeper exception 136 * @throws KeeperException.NodeExistsException if node already exists 137 */ createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName)138 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 139 ServerName serverName) 140 throws KeeperException, KeeperException.NodeExistsException { 141 createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE); 142 } 143 createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName, final EventType event)144 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 145 ServerName serverName, final EventType event) 146 throws KeeperException, KeeperException.NodeExistsException { 147 LOG.debug(zkw.prefix("Creating unassigned node " + 148 region.getEncodedName() + " in OFFLINE state")); 149 RegionTransition rt = 150 RegionTransition.createRegionTransition(event, region.getRegionName(), serverName); 151 String node = getNodeName(zkw, region.getEncodedName()); 152 ZKUtil.createAndWatch(zkw, node, rt.toByteArray()); 153 } 154 155 /** 156 * Creates an unassigned node in the OFFLINE state for the specified region. 157 * <p> 158 * Runs asynchronously. Depends on no pre-existing znode. 159 * 160 * <p>Sets a watcher on the unassigned region node. 161 * 162 * @param zkw zk reference 163 * @param region region to be created as offline 164 * @param serverName server transition will happen on 165 * @param cb 166 * @param ctx 167 * @throws KeeperException if unexpected zookeeper exception 168 * @throws KeeperException.NodeExistsException if node already exists 169 */ asyncCreateNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName, final AsyncCallback.StringCallback cb, final Object ctx)170 public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw, 171 HRegionInfo region, ServerName serverName, 172 final AsyncCallback.StringCallback cb, final Object ctx) 173 throws KeeperException { 174 LOG.debug(zkw.prefix("Async create of unassigned node " + 175 region.getEncodedName() + " with OFFLINE state")); 176 RegionTransition rt = 177 RegionTransition.createRegionTransition( 178 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName); 179 String node = getNodeName(zkw, region.getEncodedName()); 180 ZKUtil.asyncCreate(zkw, node, rt.toByteArray(), cb, ctx); 181 } 182 183 /** 184 * Creates or force updates an unassigned node to the OFFLINE state for the 185 * specified region. 186 * <p> 187 * Attempts to create the node but if it exists will force it to transition to 188 * and OFFLINE state. 189 * 190 * <p>Sets a watcher on the unassigned region node if the method is 191 * successful. 192 * 193 * <p>This method should be used when assigning a region. 194 * 195 * @param zkw zk reference 196 * @param region region to be created as offline 197 * @param serverName server transition will happen on 198 * @return the version of the znode created in OFFLINE state, -1 if 199 * unsuccessful. 200 * @throws KeeperException if unexpected zookeeper exception 201 * @throws KeeperException.NodeExistsException if node already exists 202 */ createOrForceNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName)203 public static int createOrForceNodeOffline(ZooKeeperWatcher zkw, 204 HRegionInfo region, ServerName serverName) throws KeeperException { 205 LOG.debug(zkw.prefix("Creating (or updating) unassigned node " + 206 region.getEncodedName() + " with OFFLINE state")); 207 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_OFFLINE, 208 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY); 209 byte [] data = rt.toByteArray(); 210 String node = getNodeName(zkw, region.getEncodedName()); 211 zkw.sync(node); 212 int version = ZKUtil.checkExists(zkw, node); 213 if (version == -1) { 214 return ZKUtil.createAndWatch(zkw, node, data); 215 } else { 216 boolean setData = false; 217 try { 218 setData = ZKUtil.setData(zkw, node, data, version); 219 // Setdata throws KeeperException which aborts the Master. So we are 220 // catching it here. 221 // If just before setting the znode to OFFLINE if the RS has made any 222 // change to the 223 // znode state then we need to return -1. 224 } catch (KeeperException kpe) { 225 LOG.info("Version mismatch while setting the node to OFFLINE state."); 226 return -1; 227 } 228 if (!setData) { 229 return -1; 230 } else { 231 // We successfully forced to OFFLINE, reset watch and handle if 232 // the state changed in between our set and the watch 233 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); 234 rt = getRegionTransition(bytes); 235 if (rt.getEventType() != EventType.M_ZK_REGION_OFFLINE) { 236 // state changed, need to process 237 return -1; 238 } 239 } 240 } 241 return version + 1; 242 } 243 244 /** 245 * Deletes an existing unassigned node that is in the OPENED state for the 246 * specified region. 247 * 248 * <p>If a node does not already exist for this region, a 249 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown. 250 * 251 * <p>No watcher is set whether this succeeds or not. 252 * 253 * <p>Returns false if the node was not in the proper state but did exist. 254 * 255 * <p>This method is used during normal region transitions when a region 256 * finishes successfully opening. This is the Master acknowledging completion 257 * of the specified regions transition. 258 * 259 * @param zkw zk reference 260 * @param encodedRegionName opened region to be deleted from zk 261 * @param sn the expected region transition target server name 262 * @throws KeeperException if unexpected zookeeper exception 263 * @throws KeeperException.NoNodeException if node does not exist 264 */ deleteOpenedNode(ZooKeeperWatcher zkw, String encodedRegionName, ServerName sn)265 public static boolean deleteOpenedNode(ZooKeeperWatcher zkw, 266 String encodedRegionName, ServerName sn) 267 throws KeeperException, KeeperException.NoNodeException { 268 return deleteNode(zkw, encodedRegionName, 269 EventType.RS_ZK_REGION_OPENED, sn); 270 } 271 272 /** 273 * Deletes an existing unassigned node that is in the OFFLINE state for the 274 * specified region. 275 * 276 * <p>If a node does not already exist for this region, a 277 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown. 278 * 279 * <p>No watcher is set whether this succeeds or not. 280 * 281 * <p>Returns false if the node was not in the proper state but did exist. 282 * 283 * <p>This method is used during master failover when the regions on an RS 284 * that has died are all set to OFFLINE before being processed. 285 * 286 * @param zkw zk reference 287 * @param encodedRegionName closed region to be deleted from zk 288 * @param sn the expected region transition target server name 289 * @throws KeeperException if unexpected zookeeper exception 290 * @throws KeeperException.NoNodeException if node does not exist 291 */ deleteOfflineNode(ZooKeeperWatcher zkw, String encodedRegionName, ServerName sn)292 public static boolean deleteOfflineNode(ZooKeeperWatcher zkw, 293 String encodedRegionName, ServerName sn) 294 throws KeeperException, KeeperException.NoNodeException { 295 return deleteNode(zkw, encodedRegionName, 296 EventType.M_ZK_REGION_OFFLINE, sn); 297 } 298 299 /** 300 * Deletes an existing unassigned node that is in the CLOSED state for the 301 * specified region. 302 * 303 * <p>If a node does not already exist for this region, a 304 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown. 305 * 306 * <p>No watcher is set whether this succeeds or not. 307 * 308 * <p>Returns false if the node was not in the proper state but did exist. 309 * 310 * <p>This method is used during table disables when a region finishes 311 * successfully closing. This is the Master acknowledging completion 312 * of the specified regions transition to being closed. 313 * 314 * @param zkw zk reference 315 * @param encodedRegionName closed region to be deleted from zk 316 * @param sn the expected region transition target server name 317 * @throws KeeperException if unexpected zookeeper exception 318 * @throws KeeperException.NoNodeException if node does not exist 319 */ deleteClosedNode(ZooKeeperWatcher zkw, String encodedRegionName, ServerName sn)320 public static boolean deleteClosedNode(ZooKeeperWatcher zkw, 321 String encodedRegionName, ServerName sn) 322 throws KeeperException, KeeperException.NoNodeException { 323 return deleteNode(zkw, encodedRegionName, 324 EventType.RS_ZK_REGION_CLOSED, sn); 325 } 326 327 /** 328 * Deletes an existing unassigned node that is in the CLOSING state for the 329 * specified region. 330 * 331 * <p>If a node does not already exist for this region, a 332 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown. 333 * 334 * <p>No watcher is set whether this succeeds or not. 335 * 336 * <p>Returns false if the node was not in the proper state but did exist. 337 * 338 * <p>This method is used during table disables when a region finishes 339 * successfully closing. This is the Master acknowledging completion 340 * of the specified regions transition to being closed. 341 * 342 * @param zkw zk reference 343 * @param region closing region to be deleted from zk 344 * @param sn the expected region transition target server name 345 * @throws KeeperException if unexpected zookeeper exception 346 * @throws KeeperException.NoNodeException if node does not exist 347 */ deleteClosingNode(ZooKeeperWatcher zkw, HRegionInfo region, ServerName sn)348 public static boolean deleteClosingNode(ZooKeeperWatcher zkw, 349 HRegionInfo region, ServerName sn) 350 throws KeeperException, KeeperException.NoNodeException { 351 String encodedRegionName = region.getEncodedName(); 352 return deleteNode(zkw, encodedRegionName, 353 EventType.M_ZK_REGION_CLOSING, sn); 354 } 355 356 /** 357 * Deletes an existing unassigned node that is in the specified state for the 358 * specified region. 359 * 360 * <p>If a node does not already exist for this region, a 361 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown. 362 * 363 * <p>No watcher is set whether this succeeds or not. 364 * 365 * <p>Returns false if the node was not in the proper state but did exist. 366 * 367 * <p>This method is used when a region finishes opening/closing. 368 * The Master acknowledges completion 369 * of the specified regions transition to being closed/opened. 370 * 371 * @param zkw zk reference 372 * @param encodedRegionName region to be deleted from zk 373 * @param expectedState state region must be in for delete to complete 374 * @param sn the expected region transition target server name 375 * @throws KeeperException if unexpected zookeeper exception 376 * @throws KeeperException.NoNodeException if node does not exist 377 */ deleteNode(ZooKeeperWatcher zkw, String encodedRegionName, EventType expectedState, ServerName sn)378 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName, 379 EventType expectedState, ServerName sn) 380 throws KeeperException, KeeperException.NoNodeException { 381 return deleteNode(zkw, encodedRegionName, expectedState, sn, -1); 382 } 383 384 /** 385 * Deletes an existing unassigned node that is in the specified state for the 386 * specified region. 387 * 388 * <p>If a node does not already exist for this region, a 389 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown. 390 * 391 * <p>No watcher is set whether this succeeds or not. 392 * 393 * <p>Returns false if the node was not in the proper state but did exist. 394 * 395 * <p>This method is used when a region finishes opening/closing. 396 * The Master acknowledges completion 397 * of the specified regions transition to being closed/opened. 398 * 399 * @param zkw zk reference 400 * @param encodedRegionName region to be deleted from zk 401 * @param expectedState state region must be in for delete to complete 402 * @param expectedVersion of the znode that is to be deleted. 403 * If expectedVersion need not be compared while deleting the znode 404 * pass -1 405 * @throws KeeperException if unexpected zookeeper exception 406 * @throws KeeperException.NoNodeException if node does not exist 407 */ deleteNode(ZooKeeperWatcher zkw, String encodedRegionName, EventType expectedState, int expectedVersion)408 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName, 409 EventType expectedState, int expectedVersion) 410 throws KeeperException, KeeperException.NoNodeException { 411 return deleteNode(zkw, encodedRegionName, expectedState, null, expectedVersion); 412 } 413 414 /** 415 * Deletes an existing unassigned node that is in the specified state for the 416 * specified region. 417 * 418 * <p>If a node does not already exist for this region, a 419 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown. 420 * 421 * <p>No watcher is set whether this succeeds or not. 422 * 423 * <p>Returns false if the node was not in the proper state but did exist. 424 * 425 * <p>This method is used when a region finishes opening/closing. 426 * The Master acknowledges completion 427 * of the specified regions transition to being closed/opened. 428 * 429 * @param zkw zk reference 430 * @param encodedRegionName region to be deleted from zk 431 * @param expectedState state region must be in for delete to complete 432 * @param serverName the expected region transition target server name 433 * @param expectedVersion of the znode that is to be deleted. 434 * If expectedVersion need not be compared while deleting the znode 435 * pass -1 436 * @throws KeeperException if unexpected zookeeper exception 437 * @throws KeeperException.NoNodeException if node does not exist 438 */ deleteNode(ZooKeeperWatcher zkw, String encodedRegionName, EventType expectedState, ServerName serverName, int expectedVersion)439 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName, 440 EventType expectedState, ServerName serverName, int expectedVersion) 441 throws KeeperException, KeeperException.NoNodeException { 442 if (LOG.isTraceEnabled()) { 443 LOG.trace(zkw.prefix("Deleting existing unassigned " + 444 "node " + encodedRegionName + " in expected state " + expectedState)); 445 } 446 String node = getNodeName(zkw, encodedRegionName); 447 zkw.sync(node); 448 Stat stat = new Stat(); 449 byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat); 450 if (bytes == null) { 451 // If it came back null, node does not exist. 452 throw KeeperException.create(Code.NONODE); 453 } 454 RegionTransition rt = getRegionTransition(bytes); 455 EventType et = rt.getEventType(); 456 if (!et.equals(expectedState)) { 457 LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName + " in " + 458 expectedState + " state but node is in " + et + " state")); 459 return false; 460 } 461 // Verify the server transition happens on is not changed 462 if (serverName != null && !rt.getServerName().equals(serverName)) { 463 LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName 464 + " with target " + serverName + " but node has " + rt.getServerName())); 465 return false; 466 } 467 if (expectedVersion != -1 468 && stat.getVersion() != expectedVersion) { 469 LOG.warn("The node " + encodedRegionName + " we are trying to delete is not" + 470 " the expected one. Got a version mismatch"); 471 return false; 472 } 473 if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) { 474 LOG.warn(zkw.prefix("Attempting to delete " + 475 "unassigned node " + encodedRegionName + " in " + expectedState + 476 " state but after verifying state, we got a version mismatch")); 477 return false; 478 } 479 LOG.debug(zkw.prefix("Deleted unassigned node " + 480 encodedRegionName + " in expected state " + expectedState)); 481 return true; 482 } 483 484 /** 485 * Deletes all unassigned nodes regardless of their state. 486 * 487 * <p>No watchers are set. 488 * 489 * <p>This method is used by the Master during cluster startup to clear out 490 * any existing state from other cluster runs. 491 * 492 * @param zkw zk reference 493 * @throws KeeperException if unexpected zookeeper exception 494 */ deleteAllNodes(ZooKeeperWatcher zkw)495 public static void deleteAllNodes(ZooKeeperWatcher zkw) 496 throws KeeperException { 497 LOG.debug(zkw.prefix("Deleting any existing unassigned nodes")); 498 ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode); 499 } 500 501 /** 502 * Creates a new unassigned node in the CLOSING state for the specified 503 * region. 504 * 505 * <p>Does not transition nodes from any states. If a node already exists 506 * for this region, a {@link org.apache.zookeeper.KeeperException.NodeExistsException} 507 * will be thrown. 508 * 509 * <p>If creation is successful, returns the version number of the CLOSING 510 * node created. 511 * 512 * <p>Set a watch. 513 * 514 * <p>This method should only be used by a Master when initiating a 515 * close of a region before sending a close request to the region server. 516 * 517 * @param zkw zk reference 518 * @param region region to be created as closing 519 * @param serverName server transition will happen on 520 * @return version of node after transition, -1 if unsuccessful transition 521 * @throws KeeperException if unexpected zookeeper exception 522 * @throws KeeperException.NodeExistsException if node already exists 523 */ createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName)524 public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region, 525 ServerName serverName) 526 throws KeeperException, KeeperException.NodeExistsException { 527 LOG.debug(zkw.prefix("Creating unassigned node " + 528 region.getEncodedName() + " in a CLOSING state")); 529 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING, 530 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY); 531 String node = getNodeName(zkw, region.getEncodedName()); 532 return ZKUtil.createAndWatch(zkw, node, rt.toByteArray()); 533 } 534 535 // RegionServer methods 536 537 /** 538 * Transitions an existing unassigned node for the specified region which is 539 * currently in the CLOSING state to be in the CLOSED state. 540 * 541 * <p>Does not transition nodes from other states. If for some reason the 542 * node could not be transitioned, the method returns -1. If the transition 543 * is successful, the version of the node after transition is returned. 544 * 545 * <p>This method can fail and return false for three different reasons: 546 * <ul><li>Unassigned node for this region does not exist</li> 547 * <li>Unassigned node for this region is not in CLOSING state</li> 548 * <li>After verifying CLOSING state, update fails because of wrong version 549 * (someone else already transitioned the node)</li> 550 * </ul> 551 * 552 * <p>Does not set any watches. 553 * 554 * <p>This method should only be used by a RegionServer when initiating a 555 * close of a region after receiving a CLOSE RPC from the Master. 556 * 557 * @param zkw zk reference 558 * @param region region to be transitioned to closed 559 * @param serverName server transition happens on 560 * @return version of node after transition, -1 if unsuccessful transition 561 * @throws KeeperException if unexpected zookeeper exception 562 */ transitionNodeClosed(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName, int expectedVersion)563 public static int transitionNodeClosed(ZooKeeperWatcher zkw, 564 HRegionInfo region, ServerName serverName, int expectedVersion) 565 throws KeeperException { 566 return transitionNode(zkw, region, serverName, 567 EventType.M_ZK_REGION_CLOSING, 568 EventType.RS_ZK_REGION_CLOSED, expectedVersion); 569 } 570 571 /** 572 * Transitions an existing unassigned node for the specified region which is 573 * currently in the OFFLINE state to be in the OPENING state. 574 * 575 * <p>Does not transition nodes from other states. If for some reason the 576 * node could not be transitioned, the method returns -1. If the transition 577 * is successful, the version of the node written as OPENING is returned. 578 * 579 * <p>This method can fail and return -1 for three different reasons: 580 * <ul><li>Unassigned node for this region does not exist</li> 581 * <li>Unassigned node for this region is not in OFFLINE state</li> 582 * <li>After verifying OFFLINE state, update fails because of wrong version 583 * (someone else already transitioned the node)</li> 584 * </ul> 585 * 586 * <p>Does not set any watches. 587 * 588 * <p>This method should only be used by a RegionServer when initiating an 589 * open of a region after receiving an OPEN RPC from the Master. 590 * 591 * @param zkw zk reference 592 * @param region region to be transitioned to opening 593 * @param serverName server transition happens on 594 * @return version of node after transition, -1 if unsuccessful transition 595 * @throws KeeperException if unexpected zookeeper exception 596 */ transitionNodeOpening(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName)597 public static int transitionNodeOpening(ZooKeeperWatcher zkw, 598 HRegionInfo region, ServerName serverName) 599 throws KeeperException { 600 return transitionNodeOpening(zkw, region, serverName, 601 EventType.M_ZK_REGION_OFFLINE); 602 } 603 transitionNodeOpening(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName, final EventType beginState)604 public static int transitionNodeOpening(ZooKeeperWatcher zkw, 605 HRegionInfo region, ServerName serverName, final EventType beginState) 606 throws KeeperException { 607 return transitionNode(zkw, region, serverName, beginState, 608 EventType.RS_ZK_REGION_OPENING, -1); 609 } 610 611 /** 612 * Confirm an existing unassigned node for the specified region which is 613 * currently in the OPENING state to be still in the OPENING state on 614 * the specified server. 615 * 616 * <p>If for some reason the check fails, the method returns -1. Otherwise, 617 * the version of the node (same as the expected version) is returned. 618 * 619 * <p>This method can fail and return -1 for three different reasons: 620 * <ul><li>Unassigned node for this region does not exist</li> 621 * <li>Unassigned node for this region is not in OPENING state</li> 622 * <li>After verifying OPENING state, the server name or the version of the 623 * doesn't match)</li> 624 * </ul> 625 * 626 * <p>Does not set any watches. 627 * 628 * <p>This method should only be used by a RegionServer when initiating an 629 * open of a region after receiving an OPEN RPC from the Master. 630 * 631 * @param zkw zk reference 632 * @param region region to be transitioned to opening 633 * @param serverName server transition happens on 634 * @return version of node after transition, -1 if unsuccessful transition 635 * @throws KeeperException if unexpected zookeeper exception 636 */ confirmNodeOpening(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName, int expectedVersion)637 public static int confirmNodeOpening(ZooKeeperWatcher zkw, 638 HRegionInfo region, ServerName serverName, int expectedVersion) 639 throws KeeperException { 640 641 String encoded = region.getEncodedName(); 642 if(LOG.isDebugEnabled()) { 643 LOG.debug(zkw.prefix("Attempting to retransition opening state of node " + 644 HRegionInfo.prettyPrint(encoded))); 645 } 646 647 String node = getNodeName(zkw, encoded); 648 zkw.sync(node); 649 650 // Read existing data of the node 651 Stat stat = new Stat(); 652 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat); 653 if (existingBytes == null) { 654 // Node no longer exists. Return -1. It means unsuccessful transition. 655 return -1; 656 } 657 RegionTransition rt = getRegionTransition(existingBytes); 658 659 // Verify it is the expected version 660 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) { 661 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " + 662 "unassigned node for " + encoded + " failed, " + 663 "the node existed but was version " + stat.getVersion() + 664 " not the expected version " + expectedVersion)); 665 return -1; 666 } 667 668 // Verify it is in expected state 669 EventType et = rt.getEventType(); 670 if (!et.equals(EventType.RS_ZK_REGION_OPENING)) { 671 String existingServer = (rt.getServerName() == null) 672 ? "<unknown>" : rt.getServerName().toString(); 673 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the unassigned node for " 674 + encoded + " failed, the node existed but was in the state " + et + 675 " set by the server " + existingServer)); 676 return -1; 677 } 678 679 return expectedVersion; 680 } 681 682 /** 683 * Transitions an existing unassigned node for the specified region which is 684 * currently in the OPENING state to be in the OPENED state. 685 * 686 * <p>Does not transition nodes from other states. If for some reason the 687 * node could not be transitioned, the method returns -1. If the transition 688 * is successful, the version of the node after transition is returned. 689 * 690 * <p>This method can fail and return false for three different reasons: 691 * <ul><li>Unassigned node for this region does not exist</li> 692 * <li>Unassigned node for this region is not in OPENING state</li> 693 * <li>After verifying OPENING state, update fails because of wrong version 694 * (this should never actually happen since an RS only does this transition 695 * following a transition to OPENING. if two RS are conflicting, one would 696 * fail the original transition to OPENING and not this transition)</li> 697 * </ul> 698 * 699 * <p>Does not set any watches. 700 * 701 * <p>This method should only be used by a RegionServer when completing the 702 * open of a region. 703 * 704 * @param zkw zk reference 705 * @param region region to be transitioned to opened 706 * @param serverName server transition happens on 707 * @return version of node after transition, -1 if unsuccessful transition 708 * @throws KeeperException if unexpected zookeeper exception 709 */ transitionNodeOpened(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName, int expectedVersion)710 public static int transitionNodeOpened(ZooKeeperWatcher zkw, 711 HRegionInfo region, ServerName serverName, int expectedVersion) 712 throws KeeperException { 713 return transitionNode(zkw, region, serverName, 714 EventType.RS_ZK_REGION_OPENING, 715 EventType.RS_ZK_REGION_OPENED, expectedVersion); 716 } 717 718 /** 719 * 720 * @param zkw zk reference 721 * @param region region to be closed 722 * @param expectedVersion expected version of the znode 723 * @return true if the znode exists, has the right version and the right state. False otherwise. 724 * @throws KeeperException 725 */ checkClosingState(ZooKeeperWatcher zkw, HRegionInfo region, int expectedVersion)726 public static boolean checkClosingState(ZooKeeperWatcher zkw, HRegionInfo region, 727 int expectedVersion) throws KeeperException { 728 729 final String encoded = getNodeName(zkw, region.getEncodedName()); 730 zkw.sync(encoded); 731 732 // Read existing data of the node 733 Stat stat = new Stat(); 734 byte[] existingBytes = ZKUtil.getDataNoWatch(zkw, encoded, stat); 735 736 if (existingBytes == null) { 737 LOG.warn(zkw.prefix("Attempt to check the " + 738 "closing node for " + encoded + 739 ". The node does not exist")); 740 return false; 741 } 742 743 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) { 744 LOG.warn(zkw.prefix("Attempt to check the " + 745 "closing node for " + encoded + 746 ". The node existed but was version " + stat.getVersion() + 747 " not the expected version " + expectedVersion)); 748 return false; 749 } 750 751 RegionTransition rt = getRegionTransition(existingBytes); 752 753 if (!EventType.M_ZK_REGION_CLOSING.equals(rt.getEventType())) { 754 LOG.warn(zkw.prefix("Attempt to check the " + 755 "closing node for " + encoded + 756 ". The node existed but was in an unexpected state: " + rt.getEventType())); 757 return false; 758 } 759 760 return true; 761 } 762 763 /** 764 * Method that actually performs unassigned node transitions. 765 * 766 * <p>Attempts to transition the unassigned node for the specified region 767 * from the expected state to the state in the specified transition data. 768 * 769 * <p>Method first reads existing data and verifies it is in the expected 770 * state. If the node does not exist or the node is not in the expected 771 * state, the method returns -1. If the transition is successful, the 772 * version number of the node following the transition is returned. 773 * 774 * <p>If the read state is what is expected, it attempts to write the new 775 * state and data into the node. When doing this, it includes the expected 776 * version (determined when the existing state was verified) to ensure that 777 * only one transition is successful. If there is a version mismatch, the 778 * method returns -1. 779 * 780 * <p>If the write is successful, no watch is set and the method returns true. 781 * 782 * @param zkw zk reference 783 * @param region region to be transitioned to opened 784 * @param serverName server transition happens on 785 * @param endState state to transition node to if all checks pass 786 * @param beginState state the node must currently be in to do transition 787 * @param expectedVersion expected version of data before modification, or -1 788 * @return version of node after transition, -1 if unsuccessful transition 789 * @throws KeeperException if unexpected zookeeper exception 790 */ transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName, EventType beginState, EventType endState, int expectedVersion)791 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, 792 ServerName serverName, EventType beginState, EventType endState, 793 int expectedVersion) 794 throws KeeperException { 795 return transitionNode(zkw, region, serverName, beginState, endState, expectedVersion, null); 796 } 797 798 transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName, EventType beginState, EventType endState, int expectedVersion, final byte [] payload)799 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, 800 ServerName serverName, EventType beginState, EventType endState, 801 int expectedVersion, final byte [] payload) 802 throws KeeperException { 803 String encoded = region.getEncodedName(); 804 if(LOG.isDebugEnabled()) { 805 LOG.debug(zkw.prefix("Transitioning " + HRegionInfo.prettyPrint(encoded) + 806 " from " + beginState.toString() + " to " + endState.toString())); 807 } 808 809 String node = getNodeName(zkw, encoded); 810 zkw.sync(node); 811 812 // Read existing data of the node 813 Stat stat = new Stat(); 814 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat); 815 if (existingBytes == null) { 816 // Node no longer exists. Return -1. It means unsuccessful transition. 817 return -1; 818 } 819 820 // Verify it is the expected version 821 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) { 822 LOG.warn(zkw.prefix("Attempt to transition the " + 823 "unassigned node for " + encoded + 824 " from " + beginState + " to " + endState + " failed, " + 825 "the node existed but was version " + stat.getVersion() + 826 " not the expected version " + expectedVersion)); 827 return -1; 828 } 829 830 if (beginState.equals(EventType.M_ZK_REGION_OFFLINE) 831 && endState.equals(EventType.RS_ZK_REGION_OPENING) 832 && expectedVersion == -1 && stat.getVersion() != 0) { 833 // the below check ensures that double assignment doesnot happen. 834 // When the node is created for the first time then the expected version 835 // that is passed will be -1 and the version in znode will be 0. 836 // In all other cases the version in znode will be > 0. 837 LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for " 838 + encoded + " from " + beginState + " to " + endState + " failed, " 839 + "the node existed but was version " + stat.getVersion() 840 + " not the expected version " + expectedVersion)); 841 return -1; 842 } 843 844 RegionTransition rt = getRegionTransition(existingBytes); 845 846 // Verify the server transition happens on is not changed 847 if (!rt.getServerName().equals(serverName)) { 848 LOG.warn(zkw.prefix("Attempt to transition the " + 849 "unassigned node for " + encoded + 850 " from " + beginState + " to " + endState + " failed, " + 851 "the server that tried to transition was " + serverName + 852 " not the expected " + rt.getServerName())); 853 return -1; 854 } 855 856 // Verify it is in expected state 857 EventType et = rt.getEventType(); 858 if (!et.equals(beginState)) { 859 String existingServer = (rt.getServerName() == null) 860 ? "<unknown>" : rt.getServerName().toString(); 861 LOG.warn(zkw.prefix("Attempt to transition the unassigned node for " + encoded 862 + " from " + beginState + " to " + endState + " failed, the node existed but" 863 + " was in the state " + et + " set by the server " + existingServer)); 864 return -1; 865 } 866 867 // Write new data, ensuring data has not changed since we last read it 868 try { 869 rt = RegionTransition.createRegionTransition( 870 endState, region.getRegionName(), serverName, payload); 871 if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) { 872 LOG.warn(zkw.prefix("Attempt to transition the " + 873 "unassigned node for " + encoded + 874 " from " + beginState + " to " + endState + " failed, " + 875 "the node existed and was in the expected state but then when " + 876 "setting data we got a version mismatch")); 877 return -1; 878 } 879 if(LOG.isDebugEnabled()) { 880 LOG.debug(zkw.prefix("Transitioned node " + encoded + 881 " from " + beginState + " to " + endState)); 882 } 883 return stat.getVersion() + 1; 884 } catch (KeeperException.NoNodeException nne) { 885 LOG.warn(zkw.prefix("Attempt to transition the " + 886 "unassigned node for " + encoded + 887 " from " + beginState + " to " + endState + " failed, " + 888 "the node existed and was in the expected state but then when " + 889 "setting data it no longer existed")); 890 return -1; 891 } 892 } 893 getRegionTransition(final byte [] bytes)894 private static RegionTransition getRegionTransition(final byte [] bytes) throws KeeperException { 895 try { 896 return RegionTransition.parseFrom(bytes); 897 } catch (DeserializationException e) { 898 // Convert to a zk exception for now. Otherwise have to change API 899 throw ZKUtil.convert(e); 900 } 901 } 902 903 /** 904 * Gets the current data in the unassigned node for the specified region name 905 * or fully-qualified path. 906 * 907 * <p>Returns null if the region does not currently have a node. 908 * 909 * <p>Sets a watch on the node if the node exists. 910 * 911 * @param zkw zk reference 912 * @param pathOrRegionName fully-specified path or region name 913 * @return znode content 914 * @throws KeeperException if unexpected zookeeper exception 915 */ getData(ZooKeeperWatcher zkw, String pathOrRegionName)916 public static byte [] getData(ZooKeeperWatcher zkw, 917 String pathOrRegionName) 918 throws KeeperException { 919 String node = getPath(zkw, pathOrRegionName); 920 return ZKUtil.getDataAndWatch(zkw, node); 921 } 922 923 /** 924 * Gets the current data in the unassigned node for the specified region name 925 * or fully-qualified path. 926 * 927 * <p>Returns null if the region does not currently have a node. 928 * 929 * <p>Sets a watch on the node if the node exists. 930 * 931 * @param zkw zk reference 932 * @param pathOrRegionName fully-specified path or region name 933 * @param stat object to populate the version. 934 * @return znode content 935 * @throws KeeperException if unexpected zookeeper exception 936 */ getDataAndWatch(ZooKeeperWatcher zkw, String pathOrRegionName, Stat stat)937 public static byte [] getDataAndWatch(ZooKeeperWatcher zkw, 938 String pathOrRegionName, Stat stat) 939 throws KeeperException { 940 String node = getPath(zkw, pathOrRegionName); 941 return ZKUtil.getDataAndWatch(zkw, node, stat); 942 } 943 944 /** 945 * Gets the current data in the unassigned node for the specified region name 946 * or fully-qualified path. 947 * 948 * <p>Returns null if the region does not currently have a node. 949 * 950 * <p>Does not set a watch. 951 * 952 * @param zkw zk reference 953 * @param pathOrRegionName fully-specified path or region name 954 * @param stat object to store node info into on getData call 955 * @return znode content 956 * @throws KeeperException if unexpected zookeeper exception 957 */ getDataNoWatch(ZooKeeperWatcher zkw, String pathOrRegionName, Stat stat)958 public static byte [] getDataNoWatch(ZooKeeperWatcher zkw, 959 String pathOrRegionName, Stat stat) 960 throws KeeperException { 961 String node = getPath(zkw, pathOrRegionName); 962 return ZKUtil.getDataNoWatch(zkw, node, stat); 963 } 964 965 /** 966 * @param zkw 967 * @param pathOrRegionName 968 * @return Path to znode 969 */ getPath(final ZooKeeperWatcher zkw, final String pathOrRegionName)970 public static String getPath(final ZooKeeperWatcher zkw, final String pathOrRegionName) { 971 return pathOrRegionName.startsWith("/")? pathOrRegionName : getNodeName(zkw, pathOrRegionName); 972 } 973 974 /** 975 * Get the version of the specified znode 976 * @param zkw zk reference 977 * @param region region's info 978 * @return the version of the znode, -1 if it doesn't exist 979 * @throws KeeperException 980 */ getVersion(ZooKeeperWatcher zkw, HRegionInfo region)981 public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region) 982 throws KeeperException { 983 String znode = getNodeName(zkw, region.getEncodedName()); 984 return ZKUtil.checkExists(zkw, znode); 985 } 986 987 /** 988 * Delete the assignment node regardless of its current state. 989 * <p> 990 * Fail silent even if the node does not exist at all. 991 * @param watcher 992 * @param regionInfo 993 * @throws KeeperException 994 */ deleteNodeFailSilent(ZooKeeperWatcher watcher, HRegionInfo regionInfo)995 public static void deleteNodeFailSilent(ZooKeeperWatcher watcher, 996 HRegionInfo regionInfo) 997 throws KeeperException { 998 String node = getNodeName(watcher, regionInfo.getEncodedName()); 999 ZKUtil.deleteNodeFailSilent(watcher, node); 1000 } 1001 1002 /** 1003 * Blocks until there are no node in regions in transition. 1004 * <p> 1005 * Used in testing only. 1006 * @param zkw zk reference 1007 * @throws KeeperException 1008 * @throws InterruptedException 1009 */ blockUntilNoRIT(ZooKeeperWatcher zkw)1010 public static void blockUntilNoRIT(ZooKeeperWatcher zkw) 1011 throws KeeperException, InterruptedException { 1012 while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) { 1013 List<String> znodes = 1014 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode); 1015 if (znodes != null && !znodes.isEmpty()) { 1016 LOG.debug("Waiting on RIT: " + znodes); 1017 } 1018 Thread.sleep(100); 1019 } 1020 } 1021 1022 /** 1023 * Blocks until there is at least one node in regions in transition. 1024 * <p> 1025 * Used in testing only. 1026 * @param zkw zk reference 1027 * @throws KeeperException 1028 * @throws InterruptedException 1029 */ blockUntilRIT(ZooKeeperWatcher zkw)1030 public static void blockUntilRIT(ZooKeeperWatcher zkw) 1031 throws KeeperException, InterruptedException { 1032 while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) { 1033 List<String> znodes = 1034 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode); 1035 if (znodes == null || znodes.isEmpty()) { 1036 LOG.debug("No RIT in ZK"); 1037 } 1038 Thread.sleep(100); 1039 } 1040 } 1041 1042 /** 1043 * Presume bytes are serialized unassigned data structure 1044 * @param znodeBytes 1045 * @return String of the deserialized znode bytes. 1046 */ toString(final byte[] znodeBytes)1047 static String toString(final byte[] znodeBytes) { 1048 // This method should not exist. Used by ZKUtil stringifying RegionTransition. Have the 1049 // method in here so RegionTransition does not leak into ZKUtil. 1050 try { 1051 RegionTransition rt = RegionTransition.parseFrom(znodeBytes); 1052 return rt.toString(); 1053 } catch (DeserializationException e) { 1054 return ""; 1055 } 1056 } 1057 } 1058